xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/pa/pa.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992-2015 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "hash-set.h"
33 #include "machmode.h"
34 #include "vec.h"
35 #include "double-int.h"
36 #include "input.h"
37 #include "alias.h"
38 #include "symtab.h"
39 #include "wide-int.h"
40 #include "inchash.h"
41 #include "tree.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "stringpool.h"
45 #include "varasm.h"
46 #include "calls.h"
47 #include "output.h"
48 #include "dbxout.h"
49 #include "except.h"
50 #include "hashtab.h"
51 #include "function.h"
52 #include "statistics.h"
53 #include "real.h"
54 #include "fixed-value.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "emit-rtl.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "insn-codes.h"
62 #include "optabs.h"
63 #include "reload.h"
64 #include "diagnostic-core.h"
65 #include "ggc.h"
66 #include "recog.h"
67 #include "predict.h"
68 #include "tm_p.h"
69 #include "target.h"
70 #include "common/common-target.h"
71 #include "target-def.h"
72 #include "langhooks.h"
73 #include "dominance.h"
74 #include "cfg.h"
75 #include "cfgrtl.h"
76 #include "cfganal.h"
77 #include "lcm.h"
78 #include "cfgbuild.h"
79 #include "cfgcleanup.h"
80 #include "basic-block.h"
81 #include "df.h"
82 #include "opts.h"
83 #include "builtins.h"
84 
85 /* Return nonzero if there is a bypass for the output of
86    OUT_INSN and the fp store IN_INSN.  */
87 int
88 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
89 {
90   machine_mode store_mode;
91   machine_mode other_mode;
92   rtx set;
93 
94   if (recog_memoized (in_insn) < 0
95       || (get_attr_type (in_insn) != TYPE_FPSTORE
96 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
97       || recog_memoized (out_insn) < 0)
98     return 0;
99 
100   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
101 
102   set = single_set (out_insn);
103   if (!set)
104     return 0;
105 
106   other_mode = GET_MODE (SET_SRC (set));
107 
108   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
109 }
110 
111 
112 #ifndef DO_FRAME_NOTES
113 #ifdef INCOMING_RETURN_ADDR_RTX
114 #define DO_FRAME_NOTES 1
115 #else
116 #define DO_FRAME_NOTES 0
117 #endif
118 #endif
119 
120 static void pa_option_override (void);
121 static void copy_reg_pointer (rtx, rtx);
122 static void fix_range (const char *);
123 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
124 				    reg_class_t);
125 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
126 static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
127 static inline rtx force_mode (machine_mode, rtx);
128 static void pa_reorg (void);
129 static void pa_combine_instructions (void);
130 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
131 			     rtx, rtx);
132 static bool forward_branch_p (rtx_insn *);
133 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
134 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
135 static int compute_movmem_length (rtx_insn *);
136 static int compute_clrmem_length (rtx_insn *);
137 static bool pa_assemble_integer (rtx, unsigned int, int);
138 static void remove_useless_addtr_insns (int);
139 static void store_reg (int, HOST_WIDE_INT, int);
140 static void store_reg_modify (int, int, HOST_WIDE_INT);
141 static void load_reg (int, HOST_WIDE_INT, int);
142 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
143 static rtx pa_function_value (const_tree, const_tree, bool);
144 static rtx pa_libcall_value (machine_mode, const_rtx);
145 static bool pa_function_value_regno_p (const unsigned int);
146 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
147 static void update_total_code_bytes (unsigned int);
148 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
149 static int pa_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
150 static int pa_adjust_priority (rtx_insn *, int);
151 static int pa_issue_rate (void);
152 static int pa_reloc_rw_mask (void);
153 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
154 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
155 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
156      ATTRIBUTE_UNUSED;
157 static void pa_encode_section_info (tree, rtx, int);
158 static const char *pa_strip_name_encoding (const char *);
159 static bool pa_function_ok_for_sibcall (tree, tree);
160 static void pa_globalize_label (FILE *, const char *)
161      ATTRIBUTE_UNUSED;
162 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
163 				    HOST_WIDE_INT, tree);
164 #if !defined(USE_COLLECT2)
165 static void pa_asm_out_constructor (rtx, int);
166 static void pa_asm_out_destructor (rtx, int);
167 #endif
168 static void pa_init_builtins (void);
169 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
170 static rtx hppa_builtin_saveregs (void);
171 static void hppa_va_start (tree, rtx);
172 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
173 static bool pa_scalar_mode_supported_p (machine_mode);
174 static bool pa_commutative_p (const_rtx x, int outer_code);
175 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
176 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
177 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
178 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
179 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
180 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
181 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
182 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
183 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
184 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
185 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
186 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
187 static void output_deferred_plabels (void);
188 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
189 #ifdef ASM_OUTPUT_EXTERNAL_REAL
190 static void pa_hpux_file_end (void);
191 #endif
192 static void pa_init_libfuncs (void);
193 static rtx pa_struct_value_rtx (tree, int);
194 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
195 				  const_tree, bool);
196 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
197 				 tree, bool);
198 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
199 				     const_tree, bool);
200 static rtx pa_function_arg (cumulative_args_t, machine_mode,
201 			    const_tree, bool);
202 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
203 static struct machine_function * pa_init_machine_status (void);
204 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
205 					machine_mode,
206 					secondary_reload_info *);
207 static void pa_extra_live_on_entry (bitmap);
208 static machine_mode pa_promote_function_mode (const_tree,
209 						   machine_mode, int *,
210 						   const_tree, int);
211 
212 static void pa_asm_trampoline_template (FILE *);
213 static void pa_trampoline_init (rtx, tree, rtx);
214 static rtx pa_trampoline_adjust_address (rtx);
215 static rtx pa_delegitimize_address (rtx);
216 static bool pa_print_operand_punct_valid_p (unsigned char);
217 static rtx pa_internal_arg_pointer (void);
218 static bool pa_can_eliminate (const int, const int);
219 static void pa_conditional_register_usage (void);
220 static machine_mode pa_c_mode_for_suffix (char);
221 static section *pa_function_section (tree, enum node_frequency, bool, bool);
222 static bool pa_cannot_force_const_mem (machine_mode, rtx);
223 static bool pa_legitimate_constant_p (machine_mode, rtx);
224 static unsigned int pa_section_type_flags (tree, const char *, int);
225 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
226 
227 /* The following extra sections are only used for SOM.  */
228 static GTY(()) section *som_readonly_data_section;
229 static GTY(()) section *som_one_only_readonly_data_section;
230 static GTY(()) section *som_one_only_data_section;
231 static GTY(()) section *som_tm_clone_table_section;
232 
233 /* Counts for the number of callee-saved general and floating point
234    registers which were saved by the current function's prologue.  */
235 static int gr_saved, fr_saved;
236 
237 /* Boolean indicating whether the return pointer was saved by the
238    current function's prologue.  */
239 static bool rp_saved;
240 
241 static rtx find_addr_reg (rtx);
242 
243 /* Keep track of the number of bytes we have output in the CODE subspace
244    during this compilation so we'll know when to emit inline long-calls.  */
245 unsigned long total_code_bytes;
246 
247 /* The last address of the previous function plus the number of bytes in
248    associated thunks that have been output.  This is used to determine if
249    a thunk can use an IA-relative branch to reach its target function.  */
250 static unsigned int last_address;
251 
252 /* Variables to handle plabels that we discover are necessary at assembly
253    output time.  They are output after the current function.  */
254 struct GTY(()) deferred_plabel
255 {
256   rtx internal_label;
257   rtx symbol;
258 };
259 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
260   deferred_plabels;
261 static size_t n_deferred_plabels = 0;
262 
263 /* Initialize the GCC target structure.  */
264 
265 #undef TARGET_OPTION_OVERRIDE
266 #define TARGET_OPTION_OVERRIDE pa_option_override
267 
268 #undef TARGET_ASM_ALIGNED_HI_OP
269 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
270 #undef TARGET_ASM_ALIGNED_SI_OP
271 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
272 #undef TARGET_ASM_ALIGNED_DI_OP
273 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
274 #undef TARGET_ASM_UNALIGNED_HI_OP
275 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
276 #undef TARGET_ASM_UNALIGNED_SI_OP
277 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
278 #undef TARGET_ASM_UNALIGNED_DI_OP
279 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
280 #undef TARGET_ASM_INTEGER
281 #define TARGET_ASM_INTEGER pa_assemble_integer
282 
283 #undef TARGET_ASM_FUNCTION_PROLOGUE
284 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
285 #undef TARGET_ASM_FUNCTION_EPILOGUE
286 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
287 
288 #undef TARGET_FUNCTION_VALUE
289 #define TARGET_FUNCTION_VALUE pa_function_value
290 #undef TARGET_LIBCALL_VALUE
291 #define TARGET_LIBCALL_VALUE pa_libcall_value
292 #undef TARGET_FUNCTION_VALUE_REGNO_P
293 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
294 
295 #undef TARGET_LEGITIMIZE_ADDRESS
296 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
297 
298 #undef TARGET_SCHED_ADJUST_COST
299 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
300 #undef TARGET_SCHED_ADJUST_PRIORITY
301 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
302 #undef TARGET_SCHED_ISSUE_RATE
303 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
304 
305 #undef TARGET_ENCODE_SECTION_INFO
306 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
307 #undef TARGET_STRIP_NAME_ENCODING
308 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
309 
310 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
311 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
312 
313 #undef TARGET_COMMUTATIVE_P
314 #define TARGET_COMMUTATIVE_P pa_commutative_p
315 
316 #undef TARGET_ASM_OUTPUT_MI_THUNK
317 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
318 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
319 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
320 
321 #undef TARGET_ASM_FILE_END
322 #ifdef ASM_OUTPUT_EXTERNAL_REAL
323 #define TARGET_ASM_FILE_END pa_hpux_file_end
324 #else
325 #define TARGET_ASM_FILE_END output_deferred_plabels
326 #endif
327 
328 #undef TARGET_ASM_RELOC_RW_MASK
329 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
330 
331 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
332 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
333 
334 #if !defined(USE_COLLECT2)
335 #undef TARGET_ASM_CONSTRUCTOR
336 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
337 #undef TARGET_ASM_DESTRUCTOR
338 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
339 #endif
340 
341 #undef TARGET_INIT_BUILTINS
342 #define TARGET_INIT_BUILTINS pa_init_builtins
343 
344 #undef TARGET_EXPAND_BUILTIN
345 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
346 
347 #undef TARGET_REGISTER_MOVE_COST
348 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
349 #undef TARGET_RTX_COSTS
350 #define TARGET_RTX_COSTS hppa_rtx_costs
351 #undef TARGET_ADDRESS_COST
352 #define TARGET_ADDRESS_COST hppa_address_cost
353 
354 #undef TARGET_MACHINE_DEPENDENT_REORG
355 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
356 
357 #undef TARGET_INIT_LIBFUNCS
358 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
359 
360 #undef TARGET_PROMOTE_FUNCTION_MODE
361 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
362 #undef TARGET_PROMOTE_PROTOTYPES
363 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
364 
365 #undef TARGET_STRUCT_VALUE_RTX
366 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
367 #undef TARGET_RETURN_IN_MEMORY
368 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
369 #undef TARGET_MUST_PASS_IN_STACK
370 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
371 #undef TARGET_PASS_BY_REFERENCE
372 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
373 #undef TARGET_CALLEE_COPIES
374 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
375 #undef TARGET_ARG_PARTIAL_BYTES
376 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
377 #undef TARGET_FUNCTION_ARG
378 #define TARGET_FUNCTION_ARG pa_function_arg
379 #undef TARGET_FUNCTION_ARG_ADVANCE
380 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
381 #undef TARGET_FUNCTION_ARG_BOUNDARY
382 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
383 
384 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
385 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
386 #undef TARGET_EXPAND_BUILTIN_VA_START
387 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
388 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
389 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
390 
391 #undef TARGET_SCALAR_MODE_SUPPORTED_P
392 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
393 
394 #undef TARGET_CANNOT_FORCE_CONST_MEM
395 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
396 
397 #undef TARGET_SECONDARY_RELOAD
398 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
399 
400 #undef TARGET_EXTRA_LIVE_ON_ENTRY
401 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
402 
403 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
404 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
405 #undef TARGET_TRAMPOLINE_INIT
406 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
407 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
408 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
409 #undef TARGET_DELEGITIMIZE_ADDRESS
410 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
411 #undef TARGET_INTERNAL_ARG_POINTER
412 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
413 #undef TARGET_CAN_ELIMINATE
414 #define TARGET_CAN_ELIMINATE pa_can_eliminate
415 #undef TARGET_CONDITIONAL_REGISTER_USAGE
416 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
417 #undef TARGET_C_MODE_FOR_SUFFIX
418 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
419 #undef TARGET_ASM_FUNCTION_SECTION
420 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
421 
422 #undef TARGET_LEGITIMATE_CONSTANT_P
423 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
424 #undef TARGET_SECTION_TYPE_FLAGS
425 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
426 #undef TARGET_LEGITIMATE_ADDRESS_P
427 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
428 
429 struct gcc_target targetm = TARGET_INITIALIZER;
430 
431 /* Parse the -mfixed-range= option string.  */
432 
433 static void
434 fix_range (const char *const_str)
435 {
436   int i, first, last;
437   char *str, *dash, *comma;
438 
439   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
440      REG2 are either register names or register numbers.  The effect
441      of this option is to mark the registers in the range from REG1 to
442      REG2 as ``fixed'' so they won't be used by the compiler.  This is
443      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
444 
445   i = strlen (const_str);
446   str = (char *) alloca (i + 1);
447   memcpy (str, const_str, i + 1);
448 
449   while (1)
450     {
451       dash = strchr (str, '-');
452       if (!dash)
453 	{
454 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
455 	  return;
456 	}
457       *dash = '\0';
458 
459       comma = strchr (dash + 1, ',');
460       if (comma)
461 	*comma = '\0';
462 
463       first = decode_reg_name (str);
464       if (first < 0)
465 	{
466 	  warning (0, "unknown register name: %s", str);
467 	  return;
468 	}
469 
470       last = decode_reg_name (dash + 1);
471       if (last < 0)
472 	{
473 	  warning (0, "unknown register name: %s", dash + 1);
474 	  return;
475 	}
476 
477       *dash = '-';
478 
479       if (first > last)
480 	{
481 	  warning (0, "%s-%s is an empty range", str, dash + 1);
482 	  return;
483 	}
484 
485       for (i = first; i <= last; ++i)
486 	fixed_regs[i] = call_used_regs[i] = 1;
487 
488       if (!comma)
489 	break;
490 
491       *comma = ',';
492       str = comma + 1;
493     }
494 
495   /* Check if all floating point registers have been fixed.  */
496   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
497     if (!fixed_regs[i])
498       break;
499 
500   if (i > FP_REG_LAST)
501     target_flags |= MASK_DISABLE_FPREGS;
502 }
503 
504 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
505 
506 static void
507 pa_option_override (void)
508 {
509   unsigned int i;
510   cl_deferred_option *opt;
511   vec<cl_deferred_option> *v
512     = (vec<cl_deferred_option> *) pa_deferred_options;
513 
514   if (v)
515     FOR_EACH_VEC_ELT (*v, i, opt)
516       {
517 	switch (opt->opt_index)
518 	  {
519 	  case OPT_mfixed_range_:
520 	    fix_range (opt->arg);
521 	    break;
522 
523 	  default:
524 	    gcc_unreachable ();
525 	  }
526       }
527 
528   if (flag_pic && TARGET_PORTABLE_RUNTIME)
529     {
530       warning (0, "PIC code generation is not supported in the portable runtime model");
531     }
532 
533   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
534    {
535       warning (0, "PIC code generation is not compatible with fast indirect calls");
536    }
537 
538   if (! TARGET_GAS && write_symbols != NO_DEBUG)
539     {
540       warning (0, "-g is only supported when using GAS on this processor,");
541       warning (0, "-g option disabled");
542       write_symbols = NO_DEBUG;
543     }
544 
545   /* We only support the "big PIC" model now.  And we always generate PIC
546      code when in 64bit mode.  */
547   if (flag_pic == 1 || TARGET_64BIT)
548     flag_pic = 2;
549 
550   /* Disable -freorder-blocks-and-partition as we don't support hot and
551      cold partitioning.  */
552   if (flag_reorder_blocks_and_partition)
553     {
554       inform (input_location,
555               "-freorder-blocks-and-partition does not work "
556               "on this architecture");
557       flag_reorder_blocks_and_partition = 0;
558       flag_reorder_blocks = 1;
559     }
560 
561   /* We can't guarantee that .dword is available for 32-bit targets.  */
562   if (UNITS_PER_WORD == 4)
563     targetm.asm_out.aligned_op.di = NULL;
564 
565   /* The unaligned ops are only available when using GAS.  */
566   if (!TARGET_GAS)
567     {
568       targetm.asm_out.unaligned_op.hi = NULL;
569       targetm.asm_out.unaligned_op.si = NULL;
570       targetm.asm_out.unaligned_op.di = NULL;
571     }
572 
573   init_machine_status = pa_init_machine_status;
574 }
575 
576 enum pa_builtins
577 {
578   PA_BUILTIN_COPYSIGNQ,
579   PA_BUILTIN_FABSQ,
580   PA_BUILTIN_INFQ,
581   PA_BUILTIN_HUGE_VALQ,
582   PA_BUILTIN_max
583 };
584 
585 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
586 
587 static void
588 pa_init_builtins (void)
589 {
590 #ifdef DONT_HAVE_FPUTC_UNLOCKED
591   {
592     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
593     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
594 		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
595   }
596 #endif
597 #if TARGET_HPUX_11
598   {
599     tree decl;
600 
601     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
602       set_user_assembler_name (decl, "_Isfinite");
603     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
604       set_user_assembler_name (decl, "_Isfinitef");
605   }
606 #endif
607 
608   if (HPUX_LONG_DOUBLE_LIBRARY)
609     {
610       tree decl, ftype;
611 
612       /* Under HPUX, the __float128 type is a synonym for "long double".  */
613       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
614 						 "__float128");
615 
616       /* TFmode support builtins.  */
617       ftype = build_function_type_list (long_double_type_node,
618 					long_double_type_node,
619 					NULL_TREE);
620       decl = add_builtin_function ("__builtin_fabsq", ftype,
621 				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
622 				   "_U_Qfabs", NULL_TREE);
623       TREE_READONLY (decl) = 1;
624       pa_builtins[PA_BUILTIN_FABSQ] = decl;
625 
626       ftype = build_function_type_list (long_double_type_node,
627 					long_double_type_node,
628 					long_double_type_node,
629 					NULL_TREE);
630       decl = add_builtin_function ("__builtin_copysignq", ftype,
631 				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
632 				   "_U_Qfcopysign", NULL_TREE);
633       TREE_READONLY (decl) = 1;
634       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
635 
636       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
637       decl = add_builtin_function ("__builtin_infq", ftype,
638 				   PA_BUILTIN_INFQ, BUILT_IN_MD,
639 				   NULL, NULL_TREE);
640       pa_builtins[PA_BUILTIN_INFQ] = decl;
641 
642       decl = add_builtin_function ("__builtin_huge_valq", ftype,
643                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
644                                    NULL, NULL_TREE);
645       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
646     }
647 }
648 
649 static rtx
650 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
651 		   machine_mode mode ATTRIBUTE_UNUSED,
652 		   int ignore ATTRIBUTE_UNUSED)
653 {
654   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
655   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
656 
657   switch (fcode)
658     {
659     case PA_BUILTIN_FABSQ:
660     case PA_BUILTIN_COPYSIGNQ:
661       return expand_call (exp, target, ignore);
662 
663     case PA_BUILTIN_INFQ:
664     case PA_BUILTIN_HUGE_VALQ:
665       {
666 	machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
667 	REAL_VALUE_TYPE inf;
668 	rtx tmp;
669 
670 	real_inf (&inf);
671 	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
672 
673 	tmp = validize_mem (force_const_mem (target_mode, tmp));
674 
675 	if (target == 0)
676 	  target = gen_reg_rtx (target_mode);
677 
678 	emit_move_insn (target, tmp);
679 	return target;
680       }
681 
682     default:
683       gcc_unreachable ();
684     }
685 
686   return NULL_RTX;
687 }
688 
689 /* Function to init struct machine_function.
690    This will be called, via a pointer variable,
691    from push_function_context.  */
692 
693 static struct machine_function *
694 pa_init_machine_status (void)
695 {
696   return ggc_cleared_alloc<machine_function> ();
697 }
698 
699 /* If FROM is a probable pointer register, mark TO as a probable
700    pointer register with the same pointer alignment as FROM.  */
701 
702 static void
703 copy_reg_pointer (rtx to, rtx from)
704 {
705   if (REG_POINTER (from))
706     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
707 }
708 
709 /* Return 1 if X contains a symbolic expression.  We know these
710    expressions will have one of a few well defined forms, so
711    we need only check those forms.  */
712 int
713 pa_symbolic_expression_p (rtx x)
714 {
715 
716   /* Strip off any HIGH.  */
717   if (GET_CODE (x) == HIGH)
718     x = XEXP (x, 0);
719 
720   return symbolic_operand (x, VOIDmode);
721 }
722 
723 /* Accept any constant that can be moved in one instruction into a
724    general register.  */
725 int
726 pa_cint_ok_for_move (HOST_WIDE_INT ival)
727 {
728   /* OK if ldo, ldil, or zdepi, can be used.  */
729   return (VAL_14_BITS_P (ival)
730 	  || pa_ldil_cint_p (ival)
731 	  || pa_zdepi_cint_p (ival));
732 }
733 
734 /* True iff ldil can be used to load this CONST_INT.  The least
735    significant 11 bits of the value must be zero and the value must
736    not change sign when extended from 32 to 64 bits.  */
737 int
738 pa_ldil_cint_p (HOST_WIDE_INT ival)
739 {
740   HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
741 
742   return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
743 }
744 
745 /* True iff zdepi can be used to generate this CONST_INT.
746    zdepi first sign extends a 5-bit signed number to a given field
747    length, then places this field anywhere in a zero.  */
748 int
749 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
750 {
751   unsigned HOST_WIDE_INT lsb_mask, t;
752 
753   /* This might not be obvious, but it's at least fast.
754      This function is critical; we don't have the time loops would take.  */
755   lsb_mask = x & -x;
756   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
757   /* Return true iff t is a power of two.  */
758   return ((t & (t - 1)) == 0);
759 }
760 
761 /* True iff depi or extru can be used to compute (reg & mask).
762    Accept bit pattern like these:
763    0....01....1
764    1....10....0
765    1..10..01..1  */
766 int
767 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
768 {
769   mask = ~mask;
770   mask += mask & -mask;
771   return (mask & (mask - 1)) == 0;
772 }
773 
774 /* True iff depi can be used to compute (reg | MASK).  */
775 int
776 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
777 {
778   mask += mask & -mask;
779   return (mask & (mask - 1)) == 0;
780 }
781 
782 /* Legitimize PIC addresses.  If the address is already
783    position-independent, we return ORIG.  Newly generated
784    position-independent addresses go to REG.  If we need more
785    than one register, we lose.  */
786 
787 static rtx
788 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
789 {
790   rtx pic_ref = orig;
791 
792   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
793 
794   /* Labels need special handling.  */
795   if (pic_label_operand (orig, mode))
796     {
797       rtx_insn *insn;
798 
799       /* We do not want to go through the movXX expanders here since that
800 	 would create recursion.
801 
802 	 Nor do we really want to call a generator for a named pattern
803 	 since that requires multiple patterns if we want to support
804 	 multiple word sizes.
805 
806 	 So instead we just emit the raw set, which avoids the movXX
807 	 expanders completely.  */
808       mark_reg_pointer (reg, BITS_PER_UNIT);
809       insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
810 
811       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
812       add_reg_note (insn, REG_EQUAL, orig);
813 
814       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
815 	 and update LABEL_NUSES because this is not done automatically.  */
816       if (reload_in_progress || reload_completed)
817 	{
818 	  /* Extract LABEL_REF.  */
819 	  if (GET_CODE (orig) == CONST)
820 	    orig = XEXP (XEXP (orig, 0), 0);
821 	  /* Extract CODE_LABEL.  */
822 	  orig = XEXP (orig, 0);
823 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
824 	  /* Make sure we have label and not a note.  */
825 	  if (LABEL_P (orig))
826 	    LABEL_NUSES (orig)++;
827 	}
828       crtl->uses_pic_offset_table = 1;
829       return reg;
830     }
831   if (GET_CODE (orig) == SYMBOL_REF)
832     {
833       rtx_insn *insn;
834       rtx tmp_reg;
835 
836       gcc_assert (reg);
837 
838       /* Before reload, allocate a temporary register for the intermediate
839 	 result.  This allows the sequence to be deleted when the final
840 	 result is unused and the insns are trivially dead.  */
841       tmp_reg = ((reload_in_progress || reload_completed)
842 		 ? reg : gen_reg_rtx (Pmode));
843 
844       if (function_label_operand (orig, VOIDmode))
845 	{
846 	  /* Force function label into memory in word mode.  */
847 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
848 	  /* Load plabel address from DLT.  */
849 	  emit_move_insn (tmp_reg,
850 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
851 					gen_rtx_HIGH (word_mode, orig)));
852 	  pic_ref
853 	    = gen_const_mem (Pmode,
854 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
855 					     gen_rtx_UNSPEC (Pmode,
856 						         gen_rtvec (1, orig),
857 						         UNSPEC_DLTIND14R)));
858 	  emit_move_insn (reg, pic_ref);
859 	  /* Now load address of function descriptor.  */
860 	  pic_ref = gen_rtx_MEM (Pmode, reg);
861 	}
862       else
863 	{
864 	  /* Load symbol reference from DLT.  */
865 	  emit_move_insn (tmp_reg,
866 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
867 					gen_rtx_HIGH (word_mode, orig)));
868 	  pic_ref
869 	    = gen_const_mem (Pmode,
870 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
871 					     gen_rtx_UNSPEC (Pmode,
872 						         gen_rtvec (1, orig),
873 						         UNSPEC_DLTIND14R)));
874 	}
875 
876       crtl->uses_pic_offset_table = 1;
877       mark_reg_pointer (reg, BITS_PER_UNIT);
878       insn = emit_move_insn (reg, pic_ref);
879 
880       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
881       set_unique_reg_note (insn, REG_EQUAL, orig);
882 
883       return reg;
884     }
885   else if (GET_CODE (orig) == CONST)
886     {
887       rtx base;
888 
889       if (GET_CODE (XEXP (orig, 0)) == PLUS
890 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
891 	return orig;
892 
893       gcc_assert (reg);
894       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
895 
896       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
897       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
898 				     base == reg ? 0 : reg);
899 
900       if (GET_CODE (orig) == CONST_INT)
901 	{
902 	  if (INT_14_BITS (orig))
903 	    return plus_constant (Pmode, base, INTVAL (orig));
904 	  orig = force_reg (Pmode, orig);
905 	}
906       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
907       /* Likewise, should we set special REG_NOTEs here?  */
908     }
909 
910   return pic_ref;
911 }
912 
913 static GTY(()) rtx gen_tls_tga;
914 
915 static rtx
916 gen_tls_get_addr (void)
917 {
918   if (!gen_tls_tga)
919     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
920   return gen_tls_tga;
921 }
922 
923 static rtx
924 hppa_tls_call (rtx arg)
925 {
926   rtx ret;
927 
928   ret = gen_reg_rtx (Pmode);
929   emit_library_call_value (gen_tls_get_addr (), ret,
930 		  	   LCT_CONST, Pmode, 1, arg, Pmode);
931 
932   return ret;
933 }
934 
935 static rtx
936 legitimize_tls_address (rtx addr)
937 {
938   rtx ret, tmp, t1, t2, tp;
939   rtx_insn *insn;
940 
941   /* Currently, we can't handle anything but a SYMBOL_REF.  */
942   if (GET_CODE (addr) != SYMBOL_REF)
943     return addr;
944 
945   switch (SYMBOL_REF_TLS_MODEL (addr))
946     {
947       case TLS_MODEL_GLOBAL_DYNAMIC:
948 	tmp = gen_reg_rtx (Pmode);
949 	if (flag_pic)
950 	  emit_insn (gen_tgd_load_pic (tmp, addr));
951 	else
952 	  emit_insn (gen_tgd_load (tmp, addr));
953 	ret = hppa_tls_call (tmp);
954 	break;
955 
956       case TLS_MODEL_LOCAL_DYNAMIC:
957 	ret = gen_reg_rtx (Pmode);
958 	tmp = gen_reg_rtx (Pmode);
959 	start_sequence ();
960 	if (flag_pic)
961 	  emit_insn (gen_tld_load_pic (tmp, addr));
962 	else
963 	  emit_insn (gen_tld_load (tmp, addr));
964 	t1 = hppa_tls_call (tmp);
965 	insn = get_insns ();
966 	end_sequence ();
967 	t2 = gen_reg_rtx (Pmode);
968 	emit_libcall_block (insn, t2, t1,
969 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
970 				            UNSPEC_TLSLDBASE));
971 	emit_insn (gen_tld_offset_load (ret, addr, t2));
972 	break;
973 
974       case TLS_MODEL_INITIAL_EXEC:
975 	tp = gen_reg_rtx (Pmode);
976 	tmp = gen_reg_rtx (Pmode);
977 	ret = gen_reg_rtx (Pmode);
978 	emit_insn (gen_tp_load (tp));
979 	if (flag_pic)
980 	  emit_insn (gen_tie_load_pic (tmp, addr));
981 	else
982 	  emit_insn (gen_tie_load (tmp, addr));
983 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
984 	break;
985 
986       case TLS_MODEL_LOCAL_EXEC:
987 	tp = gen_reg_rtx (Pmode);
988 	ret = gen_reg_rtx (Pmode);
989 	emit_insn (gen_tp_load (tp));
990 	emit_insn (gen_tle_load (ret, addr, tp));
991 	break;
992 
993       default:
994 	gcc_unreachable ();
995     }
996 
997   return ret;
998 }
999 
1000 /* Try machine-dependent ways of modifying an illegitimate address
1001    to be legitimate.  If we find one, return the new, valid address.
1002    This macro is used in only one place: `memory_address' in explow.c.
1003 
1004    OLDX is the address as it was before break_out_memory_refs was called.
1005    In some cases it is useful to look at this to decide what needs to be done.
1006 
1007    It is always safe for this macro to do nothing.  It exists to recognize
1008    opportunities to optimize the output.
1009 
1010    For the PA, transform:
1011 
1012 	memory(X + <large int>)
1013 
1014    into:
1015 
1016 	if (<large int> & mask) >= 16
1017 	  Y = (<large int> & ~mask) + mask + 1	Round up.
1018 	else
1019 	  Y = (<large int> & ~mask)		Round down.
1020 	Z = X + Y
1021 	memory (Z + (<large int> - Y));
1022 
1023    This is for CSE to find several similar references, and only use one Z.
1024 
1025    X can either be a SYMBOL_REF or REG, but because combine cannot
1026    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1027    D will not fit in 14 bits.
1028 
1029    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1030    0x1f as the mask.
1031 
1032    MODE_INT references allow displacements which fit in 14 bits, so use
1033    0x3fff as the mask.
1034 
1035    This relies on the fact that most mode MODE_FLOAT references will use FP
1036    registers and most mode MODE_INT references will use integer registers.
1037    (In the rare case of an FP register used in an integer MODE, we depend
1038    on secondary reloads to clean things up.)
1039 
1040 
1041    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1042    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1043    addressing modes to be used).
1044 
1045    Put X and Z into registers.  Then put the entire expression into
1046    a register.  */
1047 
1048 rtx
1049 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1050 			 machine_mode mode)
1051 {
1052   rtx orig = x;
1053 
1054   /* We need to canonicalize the order of operands in unscaled indexed
1055      addresses since the code that checks if an address is valid doesn't
1056      always try both orders.  */
1057   if (!TARGET_NO_SPACE_REGS
1058       && GET_CODE (x) == PLUS
1059       && GET_MODE (x) == Pmode
1060       && REG_P (XEXP (x, 0))
1061       && REG_P (XEXP (x, 1))
1062       && REG_POINTER (XEXP (x, 0))
1063       && !REG_POINTER (XEXP (x, 1)))
1064     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1065 
1066   if (tls_referenced_p (x))
1067     return legitimize_tls_address (x);
1068   else if (flag_pic)
1069     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1070 
1071   /* Strip off CONST.  */
1072   if (GET_CODE (x) == CONST)
1073     x = XEXP (x, 0);
1074 
1075   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1076      That should always be safe.  */
1077   if (GET_CODE (x) == PLUS
1078       && GET_CODE (XEXP (x, 0)) == REG
1079       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1080     {
1081       rtx reg = force_reg (Pmode, XEXP (x, 1));
1082       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1083     }
1084 
1085   /* Note we must reject symbols which represent function addresses
1086      since the assembler/linker can't handle arithmetic on plabels.  */
1087   if (GET_CODE (x) == PLUS
1088       && GET_CODE (XEXP (x, 1)) == CONST_INT
1089       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1090 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1091 	  || GET_CODE (XEXP (x, 0)) == REG))
1092     {
1093       rtx int_part, ptr_reg;
1094       HOST_WIDE_INT newoffset;
1095       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
1096       HOST_WIDE_INT mask;
1097 
1098       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1099 	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1100 
1101       /* Choose which way to round the offset.  Round up if we
1102 	 are >= halfway to the next boundary.  */
1103       if ((offset & mask) >= ((mask + 1) / 2))
1104 	newoffset = (offset & ~ mask) + mask + 1;
1105       else
1106 	newoffset = (offset & ~ mask);
1107 
1108       /* If the newoffset will not fit in 14 bits (ldo), then
1109 	 handling this would take 4 or 5 instructions (2 to load
1110 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1111 	 add the new offset and the SYMBOL_REF.)  Combine can
1112 	 not handle 4->2 or 5->2 combinations, so do not create
1113 	 them.  */
1114       if (! VAL_14_BITS_P (newoffset)
1115 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1116 	{
1117 	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1118 	  rtx tmp_reg
1119 	    = force_reg (Pmode,
1120 			 gen_rtx_HIGH (Pmode, const_part));
1121 	  ptr_reg
1122 	    = force_reg (Pmode,
1123 			 gen_rtx_LO_SUM (Pmode,
1124 					 tmp_reg, const_part));
1125 	}
1126       else
1127 	{
1128 	  if (! VAL_14_BITS_P (newoffset))
1129 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1130 	  else
1131 	    int_part = GEN_INT (newoffset);
1132 
1133 	  ptr_reg = force_reg (Pmode,
1134 			       gen_rtx_PLUS (Pmode,
1135 					     force_reg (Pmode, XEXP (x, 0)),
1136 					     int_part));
1137 	}
1138       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1139     }
1140 
1141   /* Handle (plus (mult (a) (shadd_constant)) (b)).  */
1142 
1143   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1144       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1145       && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1146       && (OBJECT_P (XEXP (x, 1))
1147 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1148       && GET_CODE (XEXP (x, 1)) != CONST)
1149     {
1150       HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
1151       rtx reg1, reg2;
1152 
1153       reg1 = XEXP (x, 1);
1154       if (GET_CODE (reg1) != REG)
1155 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1156 
1157       reg2 = XEXP (XEXP (x, 0), 0);
1158       if (GET_CODE (reg2) != REG)
1159         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1160 
1161       return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1162 					     gen_rtx_MULT (Pmode,
1163 							   reg2,
1164 							   GEN_INT (val)),
1165 					     reg1));
1166     }
1167 
1168   /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1169 
1170      Only do so for floating point modes since this is more speculative
1171      and we lose if it's an integer store.  */
1172   if (GET_CODE (x) == PLUS
1173       && GET_CODE (XEXP (x, 0)) == PLUS
1174       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1175       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1176       && pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1177       && (mode == SFmode || mode == DFmode))
1178     {
1179 
1180       /* First, try and figure out what to use as a base register.  */
1181       rtx reg1, reg2, base, idx;
1182 
1183       reg1 = XEXP (XEXP (x, 0), 1);
1184       reg2 = XEXP (x, 1);
1185       base = NULL_RTX;
1186       idx = NULL_RTX;
1187 
1188       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1189 	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1190 	 it's a base register below.  */
1191       if (GET_CODE (reg1) != REG)
1192 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1193 
1194       if (GET_CODE (reg2) != REG)
1195 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1196 
1197       /* Figure out what the base and index are.  */
1198 
1199       if (GET_CODE (reg1) == REG
1200 	  && REG_POINTER (reg1))
1201 	{
1202 	  base = reg1;
1203 	  idx = gen_rtx_PLUS (Pmode,
1204 			      gen_rtx_MULT (Pmode,
1205 					    XEXP (XEXP (XEXP (x, 0), 0), 0),
1206 					    XEXP (XEXP (XEXP (x, 0), 0), 1)),
1207 			      XEXP (x, 1));
1208 	}
1209       else if (GET_CODE (reg2) == REG
1210 	       && REG_POINTER (reg2))
1211 	{
1212 	  base = reg2;
1213 	  idx = XEXP (x, 0);
1214 	}
1215 
1216       if (base == 0)
1217 	return orig;
1218 
1219       /* If the index adds a large constant, try to scale the
1220 	 constant so that it can be loaded with only one insn.  */
1221       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1222 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1223 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1224 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1225 	{
1226 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1227 	  HOST_WIDE_INT val = INTVAL (XEXP (idx, 1));
1228 
1229 	  val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1230 	  reg1 = XEXP (XEXP (idx, 0), 0);
1231 	  if (GET_CODE (reg1) != REG)
1232 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1233 
1234 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1235 
1236 	  /* We can now generate a simple scaled indexed address.  */
1237 	  return
1238 	    force_reg
1239 	      (Pmode, gen_rtx_PLUS (Pmode,
1240 				    gen_rtx_MULT (Pmode, reg1,
1241 						  XEXP (XEXP (idx, 0), 1)),
1242 				    base));
1243 	}
1244 
1245       /* If B + C is still a valid base register, then add them.  */
1246       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1247 	  && INTVAL (XEXP (idx, 1)) <= 4096
1248 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1249 	{
1250 	  HOST_WIDE_INT val = INTVAL (XEXP (XEXP (idx, 0), 1));
1251 	  rtx reg1, reg2;
1252 
1253 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1254 
1255 	  reg2 = XEXP (XEXP (idx, 0), 0);
1256 	  if (GET_CODE (reg2) != CONST_INT)
1257 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1258 
1259 	  return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1260 						 gen_rtx_MULT (Pmode,
1261 							       reg2,
1262 							       GEN_INT (val)),
1263 						 reg1));
1264 	}
1265 
1266       /* Get the index into a register, then add the base + index and
1267 	 return a register holding the result.  */
1268 
1269       /* First get A into a register.  */
1270       reg1 = XEXP (XEXP (idx, 0), 0);
1271       if (GET_CODE (reg1) != REG)
1272 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1273 
1274       /* And get B into a register.  */
1275       reg2 = XEXP (idx, 1);
1276       if (GET_CODE (reg2) != REG)
1277 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1278 
1279       reg1 = force_reg (Pmode,
1280 			gen_rtx_PLUS (Pmode,
1281 				      gen_rtx_MULT (Pmode, reg1,
1282 						    XEXP (XEXP (idx, 0), 1)),
1283 				      reg2));
1284 
1285       /* Add the result to our base register and return.  */
1286       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1287 
1288     }
1289 
1290   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1291      special handling to avoid creating an indexed memory address
1292      with x-100000 as the base.
1293 
1294      If the constant part is small enough, then it's still safe because
1295      there is a guard page at the beginning and end of the data segment.
1296 
1297      Scaled references are common enough that we want to try and rearrange the
1298      terms so that we can use indexing for these addresses too.  Only
1299      do the optimization for floatint point modes.  */
1300 
1301   if (GET_CODE (x) == PLUS
1302       && pa_symbolic_expression_p (XEXP (x, 1)))
1303     {
1304       /* Ugly.  We modify things here so that the address offset specified
1305 	 by the index expression is computed first, then added to x to form
1306 	 the entire address.  */
1307 
1308       rtx regx1, regx2, regy1, regy2, y;
1309 
1310       /* Strip off any CONST.  */
1311       y = XEXP (x, 1);
1312       if (GET_CODE (y) == CONST)
1313 	y = XEXP (y, 0);
1314 
1315       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1316 	{
1317 	  /* See if this looks like
1318 		(plus (mult (reg) (shadd_const))
1319 		      (const (plus (symbol_ref) (const_int))))
1320 
1321 	     Where const_int is small.  In that case the const
1322 	     expression is a valid pointer for indexing.
1323 
1324 	     If const_int is big, but can be divided evenly by shadd_const
1325 	     and added to (reg).  This allows more scaled indexed addresses.  */
1326 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1327 	      && GET_CODE (XEXP (x, 0)) == MULT
1328 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1329 	      && INTVAL (XEXP (y, 1)) >= -4096
1330 	      && INTVAL (XEXP (y, 1)) <= 4095
1331 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1332 	      && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1333 	    {
1334 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
1335 	      rtx reg1, reg2;
1336 
1337 	      reg1 = XEXP (x, 1);
1338 	      if (GET_CODE (reg1) != REG)
1339 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1340 
1341 	      reg2 = XEXP (XEXP (x, 0), 0);
1342 	      if (GET_CODE (reg2) != REG)
1343 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1344 
1345 	      return force_reg (Pmode,
1346 				gen_rtx_PLUS (Pmode,
1347 					      gen_rtx_MULT (Pmode,
1348 							    reg2,
1349 							    GEN_INT (val)),
1350 					      reg1));
1351 	    }
1352 	  else if ((mode == DFmode || mode == SFmode)
1353 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1354 		   && GET_CODE (XEXP (x, 0)) == MULT
1355 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1356 		   && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1357 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1358 		   && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1359 	    {
1360 	      regx1
1361 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1362 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1363 	      regx2 = XEXP (XEXP (x, 0), 0);
1364 	      if (GET_CODE (regx2) != REG)
1365 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1366 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1367 							regx2, regx1));
1368 	      return
1369 		force_reg (Pmode,
1370 			   gen_rtx_PLUS (Pmode,
1371 					 gen_rtx_MULT (Pmode, regx2,
1372 						       XEXP (XEXP (x, 0), 1)),
1373 					 force_reg (Pmode, XEXP (y, 0))));
1374 	    }
1375 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1376 		   && INTVAL (XEXP (y, 1)) >= -4096
1377 		   && INTVAL (XEXP (y, 1)) <= 4095)
1378 	    {
1379 	      /* This is safe because of the guard page at the
1380 		 beginning and end of the data space.  Just
1381 		 return the original address.  */
1382 	      return orig;
1383 	    }
1384 	  else
1385 	    {
1386 	      /* Doesn't look like one we can optimize.  */
1387 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1388 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1389 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1390 	      regx1 = force_reg (Pmode,
1391 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1392 						 regx1, regy2));
1393 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1394 	    }
1395 	}
1396     }
1397 
1398   return orig;
1399 }
1400 
1401 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1402 
1403    Compute extra cost of moving data between one register class
1404    and another.
1405 
1406    Make moves from SAR so expensive they should never happen.  We used to
1407    have 0xffff here, but that generates overflow in rare cases.
1408 
1409    Copies involving a FP register and a non-FP register are relatively
1410    expensive because they must go through memory.
1411 
1412    Other copies are reasonably cheap.  */
1413 
1414 static int
1415 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1416 			 reg_class_t from, reg_class_t to)
1417 {
1418   if (from == SHIFT_REGS)
1419     return 0x100;
1420   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1421     return 18;
1422   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1423            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1424     return 16;
1425   else
1426     return 2;
1427 }
1428 
1429 /* For the HPPA, REG and REG+CONST is cost 0
1430    and addresses involving symbolic constants are cost 2.
1431 
1432    PIC addresses are very expensive.
1433 
1434    It is no coincidence that this has the same structure
1435    as pa_legitimate_address_p.  */
1436 
1437 static int
1438 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1439 		   addr_space_t as ATTRIBUTE_UNUSED,
1440 		   bool speed ATTRIBUTE_UNUSED)
1441 {
1442   switch (GET_CODE (X))
1443     {
1444     case REG:
1445     case PLUS:
1446     case LO_SUM:
1447       return 1;
1448     case HIGH:
1449       return 2;
1450     default:
1451       return 4;
1452     }
1453 }
1454 
1455 /* Compute a (partial) cost for rtx X.  Return true if the complete
1456    cost has been computed, and false if subexpressions should be
1457    scanned.  In either case, *TOTAL contains the cost result.  */
1458 
1459 static bool
1460 hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
1461 		int *total, bool speed ATTRIBUTE_UNUSED)
1462 {
1463   int factor;
1464 
1465   switch (code)
1466     {
1467     case CONST_INT:
1468       if (INTVAL (x) == 0)
1469 	*total = 0;
1470       else if (INT_14_BITS (x))
1471 	*total = 1;
1472       else
1473 	*total = 2;
1474       return true;
1475 
1476     case HIGH:
1477       *total = 2;
1478       return true;
1479 
1480     case CONST:
1481     case LABEL_REF:
1482     case SYMBOL_REF:
1483       *total = 4;
1484       return true;
1485 
1486     case CONST_DOUBLE:
1487       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1488 	  && outer_code != SET)
1489 	*total = 0;
1490       else
1491         *total = 8;
1492       return true;
1493 
1494     case MULT:
1495       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1496 	{
1497 	  *total = COSTS_N_INSNS (3);
1498 	  return true;
1499 	}
1500 
1501       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1502       factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1503       if (factor == 0)
1504 	factor = 1;
1505 
1506       if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1507 	*total = factor * factor * COSTS_N_INSNS (8);
1508       else
1509 	*total = factor * factor * COSTS_N_INSNS (20);
1510       return true;
1511 
1512     case DIV:
1513       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1514 	{
1515 	  *total = COSTS_N_INSNS (14);
1516 	  return true;
1517 	}
1518       /* FALLTHRU */
1519 
1520     case UDIV:
1521     case MOD:
1522     case UMOD:
1523       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1524       factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1525       if (factor == 0)
1526 	factor = 1;
1527 
1528       *total = factor * factor * COSTS_N_INSNS (60);
1529       return true;
1530 
1531     case PLUS: /* this includes shNadd insns */
1532     case MINUS:
1533       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1534 	{
1535 	  *total = COSTS_N_INSNS (3);
1536 	  return true;
1537 	}
1538 
1539       /* A size N times larger than UNITS_PER_WORD needs N times as
1540 	 many insns, taking N times as long.  */
1541       factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD;
1542       if (factor == 0)
1543 	factor = 1;
1544       *total = factor * COSTS_N_INSNS (1);
1545       return true;
1546 
1547     case ASHIFT:
1548     case ASHIFTRT:
1549     case LSHIFTRT:
1550       *total = COSTS_N_INSNS (1);
1551       return true;
1552 
1553     default:
1554       return false;
1555     }
1556 }
1557 
1558 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1559    new rtx with the correct mode.  */
1560 static inline rtx
1561 force_mode (machine_mode mode, rtx orig)
1562 {
1563   if (mode == GET_MODE (orig))
1564     return orig;
1565 
1566   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1567 
1568   return gen_rtx_REG (mode, REGNO (orig));
1569 }
1570 
1571 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1572 
1573 static bool
1574 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1575 {
1576   return tls_referenced_p (x);
1577 }
1578 
1579 /* Emit insns to move operands[1] into operands[0].
1580 
1581    Return 1 if we have written out everything that needs to be done to
1582    do the move.  Otherwise, return 0 and the caller will emit the move
1583    normally.
1584 
1585    Note SCRATCH_REG may not be in the proper mode depending on how it
1586    will be used.  This routine is responsible for creating a new copy
1587    of SCRATCH_REG in the proper mode.  */
1588 
1589 int
1590 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1591 {
1592   register rtx operand0 = operands[0];
1593   register rtx operand1 = operands[1];
1594   register rtx tem;
1595 
1596   /* We can only handle indexed addresses in the destination operand
1597      of floating point stores.  Thus, we need to break out indexed
1598      addresses from the destination operand.  */
1599   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1600     {
1601       gcc_assert (can_create_pseudo_p ());
1602 
1603       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1604       operand0 = replace_equiv_address (operand0, tem);
1605     }
1606 
1607   /* On targets with non-equivalent space registers, break out unscaled
1608      indexed addresses from the source operand before the final CSE.
1609      We have to do this because the REG_POINTER flag is not correctly
1610      carried through various optimization passes and CSE may substitute
1611      a pseudo without the pointer set for one with the pointer set.  As
1612      a result, we loose various opportunities to create insns with
1613      unscaled indexed addresses.  */
1614   if (!TARGET_NO_SPACE_REGS
1615       && !cse_not_expected
1616       && GET_CODE (operand1) == MEM
1617       && GET_CODE (XEXP (operand1, 0)) == PLUS
1618       && REG_P (XEXP (XEXP (operand1, 0), 0))
1619       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1620     operand1
1621       = replace_equiv_address (operand1,
1622 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1623 
1624   if (scratch_reg
1625       && reload_in_progress && GET_CODE (operand0) == REG
1626       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1627     operand0 = reg_equiv_mem (REGNO (operand0));
1628   else if (scratch_reg
1629 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1630 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1631 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1632     {
1633      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1634 	the code which tracks sets/uses for delete_output_reload.  */
1635       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1636 				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1637 				 SUBREG_BYTE (operand0));
1638       operand0 = alter_subreg (&temp, true);
1639     }
1640 
1641   if (scratch_reg
1642       && reload_in_progress && GET_CODE (operand1) == REG
1643       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1644     operand1 = reg_equiv_mem (REGNO (operand1));
1645   else if (scratch_reg
1646 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1647 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1648 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1649     {
1650      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1651 	the code which tracks sets/uses for delete_output_reload.  */
1652       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1653 				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1654 				 SUBREG_BYTE (operand1));
1655       operand1 = alter_subreg (&temp, true);
1656     }
1657 
1658   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1659       && ((tem = find_replacement (&XEXP (operand0, 0)))
1660 	  != XEXP (operand0, 0)))
1661     operand0 = replace_equiv_address (operand0, tem);
1662 
1663   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1664       && ((tem = find_replacement (&XEXP (operand1, 0)))
1665 	  != XEXP (operand1, 0)))
1666     operand1 = replace_equiv_address (operand1, tem);
1667 
1668   /* Handle secondary reloads for loads/stores of FP registers from
1669      REG+D addresses where D does not fit in 5 or 14 bits, including
1670      (subreg (mem (addr))) cases, and reloads for other unsupported
1671      memory operands.  */
1672   if (scratch_reg
1673       && FP_REG_P (operand0)
1674       && (MEM_P (operand1)
1675 	  || (GET_CODE (operand1) == SUBREG
1676 	      && MEM_P (XEXP (operand1, 0)))))
1677     {
1678       rtx op1 = operand1;
1679 
1680       if (GET_CODE (op1) == SUBREG)
1681 	op1 = XEXP (op1, 0);
1682 
1683       if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1684 	{
1685 	  if (!(TARGET_PA_20
1686 		&& !TARGET_ELF32
1687 		&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1688 	      && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1689 	    {
1690 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1691 		 We want it in WORD_MODE regardless of what mode it was
1692 		 originally given to us.  */
1693 	      scratch_reg = force_mode (word_mode, scratch_reg);
1694 
1695 	      /* D might not fit in 14 bits either; for such cases load D
1696 		 into scratch reg.  */
1697 	      if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1698 		{
1699 		  emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1700 		  emit_move_insn (scratch_reg,
1701 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1702 						  Pmode,
1703 						  XEXP (XEXP (op1, 0), 0),
1704 						  scratch_reg));
1705 		}
1706 	      else
1707 		emit_move_insn (scratch_reg, XEXP (op1, 0));
1708 	      emit_insn (gen_rtx_SET (VOIDmode, operand0,
1709 				  replace_equiv_address (op1, scratch_reg)));
1710 	      return 1;
1711 	    }
1712 	}
1713       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1714 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1715 	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1716 	{
1717 	  /* Load memory address into SCRATCH_REG.  */
1718 	  scratch_reg = force_mode (word_mode, scratch_reg);
1719 	  emit_move_insn (scratch_reg, XEXP (op1, 0));
1720 	  emit_insn (gen_rtx_SET (VOIDmode, operand0,
1721 				  replace_equiv_address (op1, scratch_reg)));
1722 	  return 1;
1723 	}
1724     }
1725   else if (scratch_reg
1726 	   && FP_REG_P (operand1)
1727 	   && (MEM_P (operand0)
1728 	       || (GET_CODE (operand0) == SUBREG
1729 		   && MEM_P (XEXP (operand0, 0)))))
1730     {
1731       rtx op0 = operand0;
1732 
1733       if (GET_CODE (op0) == SUBREG)
1734 	op0 = XEXP (op0, 0);
1735 
1736       if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1737 	{
1738 	  if (!(TARGET_PA_20
1739 		&& !TARGET_ELF32
1740 		&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1741 	      && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1742 	    {
1743 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1744 		 We want it in WORD_MODE regardless of what mode it was
1745 		 originally given to us.  */
1746 	      scratch_reg = force_mode (word_mode, scratch_reg);
1747 
1748 	      /* D might not fit in 14 bits either; for such cases load D
1749 		 into scratch reg.  */
1750 	      if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1751 		{
1752 		  emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1753 		  emit_move_insn (scratch_reg,
1754 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1755 						  Pmode,
1756 						  XEXP (XEXP (op0, 0), 0),
1757 						  scratch_reg));
1758 		}
1759 	      else
1760 		emit_move_insn (scratch_reg, XEXP (op0, 0));
1761 	      emit_insn (gen_rtx_SET (VOIDmode,
1762 				      replace_equiv_address (op0, scratch_reg),
1763 				      operand1));
1764 	      return 1;
1765 	    }
1766 	}
1767       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1768 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1769 	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1770 	{
1771 	  /* Load memory address into SCRATCH_REG.  */
1772 	  emit_move_insn (scratch_reg, XEXP (op0, 0));
1773 	  emit_insn (gen_rtx_SET (VOIDmode,
1774 				  replace_equiv_address (op0, scratch_reg),
1775 				  operand1));
1776 	  return 1;
1777 	}
1778     }
1779   /* Handle secondary reloads for loads of FP registers from constant
1780      expressions by forcing the constant into memory.  For the most part,
1781      this is only necessary for SImode and DImode.
1782 
1783      Use scratch_reg to hold the address of the memory location.  */
1784   else if (scratch_reg
1785 	   && CONSTANT_P (operand1)
1786 	   && FP_REG_P (operand0))
1787     {
1788       rtx const_mem, xoperands[2];
1789 
1790       if (operand1 == CONST0_RTX (mode))
1791 	{
1792 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1793 	  return 1;
1794 	}
1795 
1796       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1797 	 it in WORD_MODE regardless of what mode it was originally given
1798 	 to us.  */
1799       scratch_reg = force_mode (word_mode, scratch_reg);
1800 
1801       /* Force the constant into memory and put the address of the
1802 	 memory location into scratch_reg.  */
1803       const_mem = force_const_mem (mode, operand1);
1804       xoperands[0] = scratch_reg;
1805       xoperands[1] = XEXP (const_mem, 0);
1806       pa_emit_move_sequence (xoperands, Pmode, 0);
1807 
1808       /* Now load the destination register.  */
1809       emit_insn (gen_rtx_SET (mode, operand0,
1810 			      replace_equiv_address (const_mem, scratch_reg)));
1811       return 1;
1812     }
1813   /* Handle secondary reloads for SAR.  These occur when trying to load
1814      the SAR from memory or a constant.  */
1815   else if (scratch_reg
1816 	   && GET_CODE (operand0) == REG
1817 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1818 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1819 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1820     {
1821       /* D might not fit in 14 bits either; for such cases load D into
1822 	 scratch reg.  */
1823       if (GET_CODE (operand1) == MEM
1824 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1825 	{
1826 	  /* We are reloading the address into the scratch register, so we
1827 	     want to make sure the scratch register is a full register.  */
1828 	  scratch_reg = force_mode (word_mode, scratch_reg);
1829 
1830 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1831 	  emit_move_insn (scratch_reg,
1832 			  gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1833 					  Pmode,
1834 					  XEXP (XEXP (operand1, 0), 0),
1835 					  scratch_reg));
1836 
1837 	  /* Now we are going to load the scratch register from memory,
1838 	     we want to load it in the same width as the original MEM,
1839 	     which must be the same as the width of the ultimate destination,
1840 	     OPERAND0.  */
1841 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1842 
1843 	  emit_move_insn (scratch_reg,
1844 			  replace_equiv_address (operand1, scratch_reg));
1845 	}
1846       else
1847 	{
1848 	  /* We want to load the scratch register using the same mode as
1849 	     the ultimate destination.  */
1850 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1851 
1852 	  emit_move_insn (scratch_reg, operand1);
1853 	}
1854 
1855       /* And emit the insn to set the ultimate destination.  We know that
1856 	 the scratch register has the same mode as the destination at this
1857 	 point.  */
1858       emit_move_insn (operand0, scratch_reg);
1859       return 1;
1860     }
1861 
1862   /* Handle the most common case: storing into a register.  */
1863   if (register_operand (operand0, mode))
1864     {
1865       /* Legitimize TLS symbol references.  This happens for references
1866 	 that aren't a legitimate constant.  */
1867       if (PA_SYMBOL_REF_TLS_P (operand1))
1868 	operand1 = legitimize_tls_address (operand1);
1869 
1870       if (register_operand (operand1, mode)
1871 	  || (GET_CODE (operand1) == CONST_INT
1872 	      && pa_cint_ok_for_move (INTVAL (operand1)))
1873 	  || (operand1 == CONST0_RTX (mode))
1874 	  || (GET_CODE (operand1) == HIGH
1875 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1876 	  /* Only `general_operands' can come here, so MEM is ok.  */
1877 	  || GET_CODE (operand1) == MEM)
1878 	{
1879 	  /* Various sets are created during RTL generation which don't
1880 	     have the REG_POINTER flag correctly set.  After the CSE pass,
1881 	     instruction recognition can fail if we don't consistently
1882 	     set this flag when performing register copies.  This should
1883 	     also improve the opportunities for creating insns that use
1884 	     unscaled indexing.  */
1885 	  if (REG_P (operand0) && REG_P (operand1))
1886 	    {
1887 	      if (REG_POINTER (operand1)
1888 		  && !REG_POINTER (operand0)
1889 		  && !HARD_REGISTER_P (operand0))
1890 		copy_reg_pointer (operand0, operand1);
1891 	    }
1892 
1893 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
1894 	     get set.  In some cases, we can set the REG_POINTER flag
1895 	     from the declaration for the MEM.  */
1896 	  if (REG_P (operand0)
1897 	      && GET_CODE (operand1) == MEM
1898 	      && !REG_POINTER (operand0))
1899 	    {
1900 	      tree decl = MEM_EXPR (operand1);
1901 
1902 	      /* Set the register pointer flag and register alignment
1903 		 if the declaration for this memory reference is a
1904 		 pointer type.  */
1905 	      if (decl)
1906 		{
1907 		  tree type;
1908 
1909 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1910 		     tree operand 1.  */
1911 		  if (TREE_CODE (decl) == COMPONENT_REF)
1912 		    decl = TREE_OPERAND (decl, 1);
1913 
1914 		  type = TREE_TYPE (decl);
1915 		  type = strip_array_types (type);
1916 
1917 		  if (POINTER_TYPE_P (type))
1918 		    {
1919 		      int align;
1920 
1921 		      type = TREE_TYPE (type);
1922 		      /* Using TYPE_ALIGN_OK is rather conservative as
1923 			 only the ada frontend actually sets it.  */
1924 		      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1925 			       : BITS_PER_UNIT);
1926 		      mark_reg_pointer (operand0, align);
1927 		    }
1928 		}
1929 	    }
1930 
1931 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1932 	  return 1;
1933 	}
1934     }
1935   else if (GET_CODE (operand0) == MEM)
1936     {
1937       if (mode == DFmode && operand1 == CONST0_RTX (mode)
1938 	  && !(reload_in_progress || reload_completed))
1939 	{
1940 	  rtx temp = gen_reg_rtx (DFmode);
1941 
1942 	  emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1943 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1944 	  return 1;
1945 	}
1946       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1947 	{
1948 	  /* Run this case quickly.  */
1949 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1950 	  return 1;
1951 	}
1952       if (! (reload_in_progress || reload_completed))
1953 	{
1954 	  operands[0] = validize_mem (operand0);
1955 	  operands[1] = operand1 = force_reg (mode, operand1);
1956 	}
1957     }
1958 
1959   /* Simplify the source if we need to.
1960      Note we do have to handle function labels here, even though we do
1961      not consider them legitimate constants.  Loop optimizations can
1962      call the emit_move_xxx with one as a source.  */
1963   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1964       || (GET_CODE (operand1) == HIGH
1965 	  && symbolic_operand (XEXP (operand1, 0), mode))
1966       || function_label_operand (operand1, VOIDmode)
1967       || tls_referenced_p (operand1))
1968     {
1969       int ishighonly = 0;
1970 
1971       if (GET_CODE (operand1) == HIGH)
1972 	{
1973 	  ishighonly = 1;
1974 	  operand1 = XEXP (operand1, 0);
1975 	}
1976       if (symbolic_operand (operand1, mode))
1977 	{
1978 	  /* Argh.  The assembler and linker can't handle arithmetic
1979 	     involving plabels.
1980 
1981 	     So we force the plabel into memory, load operand0 from
1982 	     the memory location, then add in the constant part.  */
1983 	  if ((GET_CODE (operand1) == CONST
1984 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
1985 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1986 					  VOIDmode))
1987 	      || function_label_operand (operand1, VOIDmode))
1988 	    {
1989 	      rtx temp, const_part;
1990 
1991 	      /* Figure out what (if any) scratch register to use.  */
1992 	      if (reload_in_progress || reload_completed)
1993 		{
1994 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
1995 		  /* SCRATCH_REG will hold an address and maybe the actual
1996 		     data.  We want it in WORD_MODE regardless of what mode it
1997 		     was originally given to us.  */
1998 		  scratch_reg = force_mode (word_mode, scratch_reg);
1999 		}
2000 	      else if (flag_pic)
2001 		scratch_reg = gen_reg_rtx (Pmode);
2002 
2003 	      if (GET_CODE (operand1) == CONST)
2004 		{
2005 		  /* Save away the constant part of the expression.  */
2006 		  const_part = XEXP (XEXP (operand1, 0), 1);
2007 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
2008 
2009 		  /* Force the function label into memory.  */
2010 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2011 		}
2012 	      else
2013 		{
2014 		  /* No constant part.  */
2015 		  const_part = NULL_RTX;
2016 
2017 		  /* Force the function label into memory.  */
2018 		  temp = force_const_mem (mode, operand1);
2019 		}
2020 
2021 
2022 	      /* Get the address of the memory location.  PIC-ify it if
2023 		 necessary.  */
2024 	      temp = XEXP (temp, 0);
2025 	      if (flag_pic)
2026 		temp = legitimize_pic_address (temp, mode, scratch_reg);
2027 
2028 	      /* Put the address of the memory location into our destination
2029 		 register.  */
2030 	      operands[1] = temp;
2031 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2032 
2033 	      /* Now load from the memory location into our destination
2034 		 register.  */
2035 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2036 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2037 
2038 	      /* And add back in the constant part.  */
2039 	      if (const_part != NULL_RTX)
2040 		expand_inc (operand0, const_part);
2041 
2042 	      return 1;
2043 	    }
2044 
2045 	  if (flag_pic)
2046 	    {
2047 	      rtx_insn *insn;
2048 	      rtx temp;
2049 
2050 	      if (reload_in_progress || reload_completed)
2051 		{
2052 		  temp = scratch_reg ? scratch_reg : operand0;
2053 		  /* TEMP will hold an address and maybe the actual
2054 		     data.  We want it in WORD_MODE regardless of what mode it
2055 		     was originally given to us.  */
2056 		  temp = force_mode (word_mode, temp);
2057 		}
2058 	      else
2059 		temp = gen_reg_rtx (Pmode);
2060 
2061 	      /* Force (const (plus (symbol) (const_int))) to memory
2062 	         if the const_int will not fit in 14 bits.  Although
2063 		 this requires a relocation, the instruction sequence
2064 		 needed to load the value is shorter.  */
2065 	      if (GET_CODE (operand1) == CONST
2066 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2067 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2068 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2069 		{
2070 		  rtx x, m = force_const_mem (mode, operand1);
2071 
2072 		  x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2073 		  x = replace_equiv_address (m, x);
2074 		  insn = emit_move_insn (operand0, x);
2075 		}
2076 	      else
2077 		{
2078 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2079 		  if (REG_P (operand0) && REG_P (operands[1]))
2080 		    copy_reg_pointer (operand0, operands[1]);
2081 		  insn = emit_move_insn (operand0, operands[1]);
2082 		}
2083 
2084 	      /* Put a REG_EQUAL note on this insn.  */
2085 	      set_unique_reg_note (insn, REG_EQUAL, operand1);
2086 	    }
2087 	  /* On the HPPA, references to data space are supposed to use dp,
2088 	     register 27, but showing it in the RTL inhibits various cse
2089 	     and loop optimizations.  */
2090 	  else
2091 	    {
2092 	      rtx temp, set;
2093 
2094 	      if (reload_in_progress || reload_completed)
2095 		{
2096 		  temp = scratch_reg ? scratch_reg : operand0;
2097 		  /* TEMP will hold an address and maybe the actual
2098 		     data.  We want it in WORD_MODE regardless of what mode it
2099 		     was originally given to us.  */
2100 		  temp = force_mode (word_mode, temp);
2101 		}
2102 	      else
2103 		temp = gen_reg_rtx (mode);
2104 
2105 	      /* Loading a SYMBOL_REF into a register makes that register
2106 		 safe to be used as the base in an indexed address.
2107 
2108 		 Don't mark hard registers though.  That loses.  */
2109 	      if (GET_CODE (operand0) == REG
2110 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2111 		mark_reg_pointer (operand0, BITS_PER_UNIT);
2112 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2113 		mark_reg_pointer (temp, BITS_PER_UNIT);
2114 
2115 	      if (ishighonly)
2116 		set = gen_rtx_SET (mode, operand0, temp);
2117 	      else
2118 		set = gen_rtx_SET (VOIDmode,
2119 				   operand0,
2120 				   gen_rtx_LO_SUM (mode, temp, operand1));
2121 
2122 	      emit_insn (gen_rtx_SET (VOIDmode,
2123 				      temp,
2124 				      gen_rtx_HIGH (mode, operand1)));
2125 	      emit_insn (set);
2126 
2127 	    }
2128 	  return 1;
2129 	}
2130       else if (tls_referenced_p (operand1))
2131 	{
2132 	  rtx tmp = operand1;
2133 	  rtx addend = NULL;
2134 
2135 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2136 	    {
2137 	      addend = XEXP (XEXP (tmp, 0), 1);
2138 	      tmp = XEXP (XEXP (tmp, 0), 0);
2139 	    }
2140 
2141 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2142 	  tmp = legitimize_tls_address (tmp);
2143 	  if (addend)
2144 	    {
2145 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2146 	      tmp = force_operand (tmp, operands[0]);
2147 	    }
2148 	  operands[1] = tmp;
2149 	}
2150       else if (GET_CODE (operand1) != CONST_INT
2151 	       || !pa_cint_ok_for_move (INTVAL (operand1)))
2152 	{
2153 	  rtx temp;
2154 	  rtx_insn *insn;
2155 	  rtx op1 = operand1;
2156 	  HOST_WIDE_INT value = 0;
2157 	  HOST_WIDE_INT insv = 0;
2158 	  int insert = 0;
2159 
2160 	  if (GET_CODE (operand1) == CONST_INT)
2161 	    value = INTVAL (operand1);
2162 
2163 	  if (TARGET_64BIT
2164 	      && GET_CODE (operand1) == CONST_INT
2165 	      && HOST_BITS_PER_WIDE_INT > 32
2166 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2167 	    {
2168 	      HOST_WIDE_INT nval;
2169 
2170 	      /* Extract the low order 32 bits of the value and sign extend.
2171 		 If the new value is the same as the original value, we can
2172 		 can use the original value as-is.  If the new value is
2173 		 different, we use it and insert the most-significant 32-bits
2174 		 of the original value into the final result.  */
2175 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2176 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2177 	      if (value != nval)
2178 		{
2179 #if HOST_BITS_PER_WIDE_INT > 32
2180 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2181 #endif
2182 		  insert = 1;
2183 		  value = nval;
2184 		  operand1 = GEN_INT (nval);
2185 		}
2186 	    }
2187 
2188 	  if (reload_in_progress || reload_completed)
2189 	    temp = scratch_reg ? scratch_reg : operand0;
2190 	  else
2191 	    temp = gen_reg_rtx (mode);
2192 
2193 	  /* We don't directly split DImode constants on 32-bit targets
2194 	     because PLUS uses an 11-bit immediate and the insn sequence
2195 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2196 	  if (GET_CODE (operand1) == CONST_INT
2197 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2198 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2199 	      && !insert)
2200 	    {
2201 	      /* Directly break constant into high and low parts.  This
2202 		 provides better optimization opportunities because various
2203 		 passes recognize constants split with PLUS but not LO_SUM.
2204 		 We use a 14-bit signed low part except when the addition
2205 		 of 0x4000 to the high part might change the sign of the
2206 		 high part.  */
2207 	      HOST_WIDE_INT low = value & 0x3fff;
2208 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2209 
2210 	      if (low >= 0x2000)
2211 		{
2212 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2213 		    high += 0x2000;
2214 		  else
2215 		    high += 0x4000;
2216 		}
2217 
2218 	      low = value - high;
2219 
2220 	      emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2221 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2222 	    }
2223 	  else
2224 	    {
2225 	      emit_insn (gen_rtx_SET (VOIDmode, temp,
2226 				      gen_rtx_HIGH (mode, operand1)));
2227 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2228 	    }
2229 
2230 	  insn = emit_move_insn (operands[0], operands[1]);
2231 
2232 	  /* Now insert the most significant 32 bits of the value
2233 	     into the register.  When we don't have a second register
2234 	     available, it could take up to nine instructions to load
2235 	     a 64-bit integer constant.  Prior to reload, we force
2236 	     constants that would take more than three instructions
2237 	     to load to the constant pool.  During and after reload,
2238 	     we have to handle all possible values.  */
2239 	  if (insert)
2240 	    {
2241 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2242 		 register and the value to be inserted is outside the
2243 		 range that can be loaded with three depdi instructions.  */
2244 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2245 		{
2246 		  operand1 = GEN_INT (insv);
2247 
2248 		  emit_insn (gen_rtx_SET (VOIDmode, temp,
2249 					  gen_rtx_HIGH (mode, operand1)));
2250 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2251 		  if (mode == DImode)
2252 		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2253 						  const0_rtx, temp));
2254 		  else
2255 		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2256 						  const0_rtx, temp));
2257 		}
2258 	      else
2259 		{
2260 		  int len = 5, pos = 27;
2261 
2262 		  /* Insert the bits using the depdi instruction.  */
2263 		  while (pos >= 0)
2264 		    {
2265 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2266 		      HOST_WIDE_INT sign = v5 < 0;
2267 
2268 		      /* Left extend the insertion.  */
2269 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2270 		      while (pos > 0 && (insv & 1) == sign)
2271 			{
2272 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2273 			  len += 1;
2274 			  pos -= 1;
2275 			}
2276 
2277 		      if (mode == DImode)
2278 			insn = emit_insn (gen_insvdi (operand0,
2279 						      GEN_INT (len),
2280 						      GEN_INT (pos),
2281 						      GEN_INT (v5)));
2282 		      else
2283 			insn = emit_insn (gen_insvsi (operand0,
2284 						      GEN_INT (len),
2285 						      GEN_INT (pos),
2286 						      GEN_INT (v5)));
2287 
2288 		      len = pos > 0 && pos < 5 ? pos : 5;
2289 		      pos -= len;
2290 		    }
2291 		}
2292 	    }
2293 
2294 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2295 
2296 	  return 1;
2297 	}
2298     }
2299   /* Now have insn-emit do whatever it normally does.  */
2300   return 0;
2301 }
2302 
2303 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2304    it will need a link/runtime reloc).  */
2305 
2306 int
2307 pa_reloc_needed (tree exp)
2308 {
2309   int reloc = 0;
2310 
2311   switch (TREE_CODE (exp))
2312     {
2313     case ADDR_EXPR:
2314       return 1;
2315 
2316     case POINTER_PLUS_EXPR:
2317     case PLUS_EXPR:
2318     case MINUS_EXPR:
2319       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2320       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2321       break;
2322 
2323     CASE_CONVERT:
2324     case NON_LVALUE_EXPR:
2325       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2326       break;
2327 
2328     case CONSTRUCTOR:
2329       {
2330 	tree value;
2331 	unsigned HOST_WIDE_INT ix;
2332 
2333 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2334 	  if (value)
2335 	    reloc |= pa_reloc_needed (value);
2336       }
2337       break;
2338 
2339     case ERROR_MARK:
2340       break;
2341 
2342     default:
2343       break;
2344     }
2345   return reloc;
2346 }
2347 
2348 
2349 /* Return the best assembler insn template
2350    for moving operands[1] into operands[0] as a fullword.  */
2351 const char *
2352 pa_singlemove_string (rtx *operands)
2353 {
2354   HOST_WIDE_INT intval;
2355 
2356   if (GET_CODE (operands[0]) == MEM)
2357     return "stw %r1,%0";
2358   if (GET_CODE (operands[1]) == MEM)
2359     return "ldw %1,%0";
2360   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2361     {
2362       long i;
2363       REAL_VALUE_TYPE d;
2364 
2365       gcc_assert (GET_MODE (operands[1]) == SFmode);
2366 
2367       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2368 	 bit pattern.  */
2369       REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2370       REAL_VALUE_TO_TARGET_SINGLE (d, i);
2371 
2372       operands[1] = GEN_INT (i);
2373       /* Fall through to CONST_INT case.  */
2374     }
2375   if (GET_CODE (operands[1]) == CONST_INT)
2376     {
2377       intval = INTVAL (operands[1]);
2378 
2379       if (VAL_14_BITS_P (intval))
2380 	return "ldi %1,%0";
2381       else if ((intval & 0x7ff) == 0)
2382 	return "ldil L'%1,%0";
2383       else if (pa_zdepi_cint_p (intval))
2384 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2385       else
2386 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2387     }
2388   return "copy %1,%0";
2389 }
2390 
2391 
2392 /* Compute position (in OP[1]) and width (in OP[2])
2393    useful for copying IMM to a register using the zdepi
2394    instructions.  Store the immediate value to insert in OP[0].  */
2395 static void
2396 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2397 {
2398   int lsb, len;
2399 
2400   /* Find the least significant set bit in IMM.  */
2401   for (lsb = 0; lsb < 32; lsb++)
2402     {
2403       if ((imm & 1) != 0)
2404         break;
2405       imm >>= 1;
2406     }
2407 
2408   /* Choose variants based on *sign* of the 5-bit field.  */
2409   if ((imm & 0x10) == 0)
2410     len = (lsb <= 28) ? 4 : 32 - lsb;
2411   else
2412     {
2413       /* Find the width of the bitstring in IMM.  */
2414       for (len = 5; len < 32 - lsb; len++)
2415 	{
2416 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2417 	    break;
2418 	}
2419 
2420       /* Sign extend IMM as a 5-bit value.  */
2421       imm = (imm & 0xf) - 0x10;
2422     }
2423 
2424   op[0] = imm;
2425   op[1] = 31 - lsb;
2426   op[2] = len;
2427 }
2428 
2429 /* Compute position (in OP[1]) and width (in OP[2])
2430    useful for copying IMM to a register using the depdi,z
2431    instructions.  Store the immediate value to insert in OP[0].  */
2432 
2433 static void
2434 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2435 {
2436   int lsb, len, maxlen;
2437 
2438   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2439 
2440   /* Find the least significant set bit in IMM.  */
2441   for (lsb = 0; lsb < maxlen; lsb++)
2442     {
2443       if ((imm & 1) != 0)
2444         break;
2445       imm >>= 1;
2446     }
2447 
2448   /* Choose variants based on *sign* of the 5-bit field.  */
2449   if ((imm & 0x10) == 0)
2450     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2451   else
2452     {
2453       /* Find the width of the bitstring in IMM.  */
2454       for (len = 5; len < maxlen - lsb; len++)
2455 	{
2456 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2457 	    break;
2458 	}
2459 
2460       /* Extend length if host is narrow and IMM is negative.  */
2461       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2462 	len += 32;
2463 
2464       /* Sign extend IMM as a 5-bit value.  */
2465       imm = (imm & 0xf) - 0x10;
2466     }
2467 
2468   op[0] = imm;
2469   op[1] = 63 - lsb;
2470   op[2] = len;
2471 }
2472 
2473 /* Output assembler code to perform a doubleword move insn
2474    with operands OPERANDS.  */
2475 
2476 const char *
2477 pa_output_move_double (rtx *operands)
2478 {
2479   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2480   rtx latehalf[2];
2481   rtx addreg0 = 0, addreg1 = 0;
2482   int highonly = 0;
2483 
2484   /* First classify both operands.  */
2485 
2486   if (REG_P (operands[0]))
2487     optype0 = REGOP;
2488   else if (offsettable_memref_p (operands[0]))
2489     optype0 = OFFSOP;
2490   else if (GET_CODE (operands[0]) == MEM)
2491     optype0 = MEMOP;
2492   else
2493     optype0 = RNDOP;
2494 
2495   if (REG_P (operands[1]))
2496     optype1 = REGOP;
2497   else if (CONSTANT_P (operands[1]))
2498     optype1 = CNSTOP;
2499   else if (offsettable_memref_p (operands[1]))
2500     optype1 = OFFSOP;
2501   else if (GET_CODE (operands[1]) == MEM)
2502     optype1 = MEMOP;
2503   else
2504     optype1 = RNDOP;
2505 
2506   /* Check for the cases that the operand constraints are not
2507      supposed to allow to happen.  */
2508   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2509 
2510   /* Handle copies between general and floating registers.  */
2511 
2512   if (optype0 == REGOP && optype1 == REGOP
2513       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2514     {
2515       if (FP_REG_P (operands[0]))
2516 	{
2517 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2518 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2519 	  return "{fldds|fldd} -16(%%sp),%0";
2520 	}
2521       else
2522 	{
2523 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2524 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2525 	  return "{ldws|ldw} -12(%%sp),%R0";
2526 	}
2527     }
2528 
2529    /* Handle auto decrementing and incrementing loads and stores
2530      specifically, since the structure of the function doesn't work
2531      for them without major modification.  Do it better when we learn
2532      this port about the general inc/dec addressing of PA.
2533      (This was written by tege.  Chide him if it doesn't work.)  */
2534 
2535   if (optype0 == MEMOP)
2536     {
2537       /* We have to output the address syntax ourselves, since print_operand
2538 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2539 
2540       rtx addr = XEXP (operands[0], 0);
2541       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2542 	{
2543 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2544 
2545 	  operands[0] = XEXP (addr, 0);
2546 	  gcc_assert (GET_CODE (operands[1]) == REG
2547 		      && GET_CODE (operands[0]) == REG);
2548 
2549 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2550 
2551 	  /* No overlap between high target register and address
2552 	     register.  (We do this in a non-obvious way to
2553 	     save a register file writeback)  */
2554 	  if (GET_CODE (addr) == POST_INC)
2555 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2556 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2557 	}
2558       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2559 	{
2560 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2561 
2562 	  operands[0] = XEXP (addr, 0);
2563 	  gcc_assert (GET_CODE (operands[1]) == REG
2564 		      && GET_CODE (operands[0]) == REG);
2565 
2566 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2567 	  /* No overlap between high target register and address
2568 	     register.  (We do this in a non-obvious way to save a
2569 	     register file writeback)  */
2570 	  if (GET_CODE (addr) == PRE_INC)
2571 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2572 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2573 	}
2574     }
2575   if (optype1 == MEMOP)
2576     {
2577       /* We have to output the address syntax ourselves, since print_operand
2578 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2579 
2580       rtx addr = XEXP (operands[1], 0);
2581       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2582 	{
2583 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2584 
2585 	  operands[1] = XEXP (addr, 0);
2586 	  gcc_assert (GET_CODE (operands[0]) == REG
2587 		      && GET_CODE (operands[1]) == REG);
2588 
2589 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2590 	    {
2591 	      /* No overlap between high target register and address
2592 		 register.  (We do this in a non-obvious way to
2593 		 save a register file writeback)  */
2594 	      if (GET_CODE (addr) == POST_INC)
2595 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2596 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2597 	    }
2598 	  else
2599 	    {
2600 	      /* This is an undefined situation.  We should load into the
2601 		 address register *and* update that register.  Probably
2602 		 we don't need to handle this at all.  */
2603 	      if (GET_CODE (addr) == POST_INC)
2604 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2605 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2606 	    }
2607 	}
2608       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2609 	{
2610 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2611 
2612 	  operands[1] = XEXP (addr, 0);
2613 	  gcc_assert (GET_CODE (operands[0]) == REG
2614 		      && GET_CODE (operands[1]) == REG);
2615 
2616 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2617 	    {
2618 	      /* No overlap between high target register and address
2619 		 register.  (We do this in a non-obvious way to
2620 		 save a register file writeback)  */
2621 	      if (GET_CODE (addr) == PRE_INC)
2622 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2623 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2624 	    }
2625 	  else
2626 	    {
2627 	      /* This is an undefined situation.  We should load into the
2628 		 address register *and* update that register.  Probably
2629 		 we don't need to handle this at all.  */
2630 	      if (GET_CODE (addr) == PRE_INC)
2631 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2632 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2633 	    }
2634 	}
2635       else if (GET_CODE (addr) == PLUS
2636 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2637 	{
2638 	  rtx xoperands[4];
2639 
2640 	  /* Load address into left half of destination register.  */
2641 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2642 	  xoperands[1] = XEXP (addr, 1);
2643 	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2644 	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2645 	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2646 			   xoperands);
2647 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2648 	}
2649       else if (GET_CODE (addr) == PLUS
2650 	       && REG_P (XEXP (addr, 0))
2651 	       && REG_P (XEXP (addr, 1)))
2652 	{
2653 	  rtx xoperands[3];
2654 
2655 	  /* Load address into left half of destination register.  */
2656 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2657 	  xoperands[1] = XEXP (addr, 0);
2658 	  xoperands[2] = XEXP (addr, 1);
2659 	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2660 			   xoperands);
2661 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2662 	}
2663     }
2664 
2665   /* If an operand is an unoffsettable memory ref, find a register
2666      we can increment temporarily to make it refer to the second word.  */
2667 
2668   if (optype0 == MEMOP)
2669     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2670 
2671   if (optype1 == MEMOP)
2672     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2673 
2674   /* Ok, we can do one word at a time.
2675      Normally we do the low-numbered word first.
2676 
2677      In either case, set up in LATEHALF the operands to use
2678      for the high-numbered word and in some cases alter the
2679      operands in OPERANDS to be suitable for the low-numbered word.  */
2680 
2681   if (optype0 == REGOP)
2682     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2683   else if (optype0 == OFFSOP)
2684     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2685   else
2686     latehalf[0] = operands[0];
2687 
2688   if (optype1 == REGOP)
2689     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2690   else if (optype1 == OFFSOP)
2691     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2692   else if (optype1 == CNSTOP)
2693     {
2694       if (GET_CODE (operands[1]) == HIGH)
2695 	{
2696 	  operands[1] = XEXP (operands[1], 0);
2697 	  highonly = 1;
2698 	}
2699       split_double (operands[1], &operands[1], &latehalf[1]);
2700     }
2701   else
2702     latehalf[1] = operands[1];
2703 
2704   /* If the first move would clobber the source of the second one,
2705      do them in the other order.
2706 
2707      This can happen in two cases:
2708 
2709 	mem -> register where the first half of the destination register
2710  	is the same register used in the memory's address.  Reload
2711 	can create such insns.
2712 
2713 	mem in this case will be either register indirect or register
2714 	indirect plus a valid offset.
2715 
2716 	register -> register move where REGNO(dst) == REGNO(src + 1)
2717 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2718 
2719      Handle mem -> register case first.  */
2720   if (optype0 == REGOP
2721       && (optype1 == MEMOP || optype1 == OFFSOP)
2722       && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2723     {
2724       /* Do the late half first.  */
2725       if (addreg1)
2726 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2727       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2728 
2729       /* Then clobber.  */
2730       if (addreg1)
2731 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2732       return pa_singlemove_string (operands);
2733     }
2734 
2735   /* Now handle register -> register case.  */
2736   if (optype0 == REGOP && optype1 == REGOP
2737       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2738     {
2739       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2740       return pa_singlemove_string (operands);
2741     }
2742 
2743   /* Normal case: do the two words, low-numbered first.  */
2744 
2745   output_asm_insn (pa_singlemove_string (operands), operands);
2746 
2747   /* Make any unoffsettable addresses point at high-numbered word.  */
2748   if (addreg0)
2749     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2750   if (addreg1)
2751     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2752 
2753   /* Do high-numbered word.  */
2754   if (highonly)
2755     output_asm_insn ("ldil L'%1,%0", latehalf);
2756   else
2757     output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2758 
2759   /* Undo the adds we just did.  */
2760   if (addreg0)
2761     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2762   if (addreg1)
2763     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2764 
2765   return "";
2766 }
2767 
2768 const char *
2769 pa_output_fp_move_double (rtx *operands)
2770 {
2771   if (FP_REG_P (operands[0]))
2772     {
2773       if (FP_REG_P (operands[1])
2774 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2775 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2776       else
2777 	output_asm_insn ("fldd%F1 %1,%0", operands);
2778     }
2779   else if (FP_REG_P (operands[1]))
2780     {
2781       output_asm_insn ("fstd%F0 %1,%0", operands);
2782     }
2783   else
2784     {
2785       rtx xoperands[2];
2786 
2787       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2788 
2789       /* This is a pain.  You have to be prepared to deal with an
2790 	 arbitrary address here including pre/post increment/decrement.
2791 
2792 	 so avoid this in the MD.  */
2793       gcc_assert (GET_CODE (operands[0]) == REG);
2794 
2795       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2796       xoperands[0] = operands[0];
2797       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2798     }
2799   return "";
2800 }
2801 
2802 /* Return a REG that occurs in ADDR with coefficient 1.
2803    ADDR can be effectively incremented by incrementing REG.  */
2804 
2805 static rtx
2806 find_addr_reg (rtx addr)
2807 {
2808   while (GET_CODE (addr) == PLUS)
2809     {
2810       if (GET_CODE (XEXP (addr, 0)) == REG)
2811 	addr = XEXP (addr, 0);
2812       else if (GET_CODE (XEXP (addr, 1)) == REG)
2813 	addr = XEXP (addr, 1);
2814       else if (CONSTANT_P (XEXP (addr, 0)))
2815 	addr = XEXP (addr, 1);
2816       else if (CONSTANT_P (XEXP (addr, 1)))
2817 	addr = XEXP (addr, 0);
2818       else
2819 	gcc_unreachable ();
2820     }
2821   gcc_assert (GET_CODE (addr) == REG);
2822   return addr;
2823 }
2824 
2825 /* Emit code to perform a block move.
2826 
2827    OPERANDS[0] is the destination pointer as a REG, clobbered.
2828    OPERANDS[1] is the source pointer as a REG, clobbered.
2829    OPERANDS[2] is a register for temporary storage.
2830    OPERANDS[3] is a register for temporary storage.
2831    OPERANDS[4] is the size as a CONST_INT
2832    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2833    OPERANDS[6] is another temporary register.  */
2834 
2835 const char *
2836 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2837 {
2838   HOST_WIDE_INT align = INTVAL (operands[5]);
2839   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]);
2840 
2841   /* We can't move more than a word at a time because the PA
2842      has no longer integer move insns.  (Could use fp mem ops?)  */
2843   if (align > (TARGET_64BIT ? 8 : 4))
2844     align = (TARGET_64BIT ? 8 : 4);
2845 
2846   /* Note that we know each loop below will execute at least twice
2847      (else we would have open-coded the copy).  */
2848   switch (align)
2849     {
2850       case 8:
2851 	/* Pre-adjust the loop counter.  */
2852 	operands[4] = GEN_INT (n_bytes - 16);
2853 	output_asm_insn ("ldi %4,%2", operands);
2854 
2855 	/* Copying loop.  */
2856 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2857 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2858 	output_asm_insn ("std,ma %3,8(%0)", operands);
2859 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2860 	output_asm_insn ("std,ma %6,8(%0)", operands);
2861 
2862 	/* Handle the residual.  There could be up to 7 bytes of
2863 	   residual to copy!  */
2864 	if (n_bytes % 16 != 0)
2865 	  {
2866 	    operands[4] = GEN_INT (n_bytes % 8);
2867 	    if (n_bytes % 16 >= 8)
2868 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
2869 	    if (n_bytes % 8 != 0)
2870 	      output_asm_insn ("ldd 0(%1),%6", operands);
2871 	    if (n_bytes % 16 >= 8)
2872 	      output_asm_insn ("std,ma %3,8(%0)", operands);
2873 	    if (n_bytes % 8 != 0)
2874 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
2875 	  }
2876 	return "";
2877 
2878       case 4:
2879 	/* Pre-adjust the loop counter.  */
2880 	operands[4] = GEN_INT (n_bytes - 8);
2881 	output_asm_insn ("ldi %4,%2", operands);
2882 
2883 	/* Copying loop.  */
2884 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2885 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2886 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2887 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
2888 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2889 
2890 	/* Handle the residual.  There could be up to 7 bytes of
2891 	   residual to copy!  */
2892 	if (n_bytes % 8 != 0)
2893 	  {
2894 	    operands[4] = GEN_INT (n_bytes % 4);
2895 	    if (n_bytes % 8 >= 4)
2896 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2897 	    if (n_bytes % 4 != 0)
2898 	      output_asm_insn ("ldw 0(%1),%6", operands);
2899 	    if (n_bytes % 8 >= 4)
2900 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2901 	    if (n_bytes % 4 != 0)
2902 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2903 	  }
2904 	return "";
2905 
2906       case 2:
2907 	/* Pre-adjust the loop counter.  */
2908 	operands[4] = GEN_INT (n_bytes - 4);
2909 	output_asm_insn ("ldi %4,%2", operands);
2910 
2911 	/* Copying loop.  */
2912 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2913 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2914 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2915 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
2916 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2917 
2918 	/* Handle the residual.  */
2919 	if (n_bytes % 4 != 0)
2920 	  {
2921 	    if (n_bytes % 4 >= 2)
2922 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2923 	    if (n_bytes % 2 != 0)
2924 	      output_asm_insn ("ldb 0(%1),%6", operands);
2925 	    if (n_bytes % 4 >= 2)
2926 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2927 	    if (n_bytes % 2 != 0)
2928 	      output_asm_insn ("stb %6,0(%0)", operands);
2929 	  }
2930 	return "";
2931 
2932       case 1:
2933 	/* Pre-adjust the loop counter.  */
2934 	operands[4] = GEN_INT (n_bytes - 2);
2935 	output_asm_insn ("ldi %4,%2", operands);
2936 
2937 	/* Copying loop.  */
2938 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2939 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2940 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2941 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
2942 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2943 
2944 	/* Handle the residual.  */
2945 	if (n_bytes % 2 != 0)
2946 	  {
2947 	    output_asm_insn ("ldb 0(%1),%3", operands);
2948 	    output_asm_insn ("stb %3,0(%0)", operands);
2949 	  }
2950 	return "";
2951 
2952       default:
2953 	gcc_unreachable ();
2954     }
2955 }
2956 
2957 /* Count the number of insns necessary to handle this block move.
2958 
2959    Basic structure is the same as emit_block_move, except that we
2960    count insns rather than emit them.  */
2961 
2962 static int
2963 compute_movmem_length (rtx_insn *insn)
2964 {
2965   rtx pat = PATTERN (insn);
2966   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2967   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2968   unsigned int n_insns = 0;
2969 
2970   /* We can't move more than four bytes at a time because the PA
2971      has no longer integer move insns.  (Could use fp mem ops?)  */
2972   if (align > (TARGET_64BIT ? 8 : 4))
2973     align = (TARGET_64BIT ? 8 : 4);
2974 
2975   /* The basic copying loop.  */
2976   n_insns = 6;
2977 
2978   /* Residuals.  */
2979   if (n_bytes % (2 * align) != 0)
2980     {
2981       if ((n_bytes % (2 * align)) >= align)
2982 	n_insns += 2;
2983 
2984       if ((n_bytes % align) != 0)
2985 	n_insns += 2;
2986     }
2987 
2988   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2989   return n_insns * 4;
2990 }
2991 
2992 /* Emit code to perform a block clear.
2993 
2994    OPERANDS[0] is the destination pointer as a REG, clobbered.
2995    OPERANDS[1] is a register for temporary storage.
2996    OPERANDS[2] is the size as a CONST_INT
2997    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
2998 
2999 const char *
3000 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3001 {
3002   HOST_WIDE_INT align = INTVAL (operands[3]);
3003   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]);
3004 
3005   /* We can't clear more than a word at a time because the PA
3006      has no longer integer move insns.  */
3007   if (align > (TARGET_64BIT ? 8 : 4))
3008     align = (TARGET_64BIT ? 8 : 4);
3009 
3010   /* Note that we know each loop below will execute at least twice
3011      (else we would have open-coded the copy).  */
3012   switch (align)
3013     {
3014       case 8:
3015 	/* Pre-adjust the loop counter.  */
3016 	operands[2] = GEN_INT (n_bytes - 16);
3017 	output_asm_insn ("ldi %2,%1", operands);
3018 
3019 	/* Loop.  */
3020 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3021 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
3022 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3023 
3024 	/* Handle the residual.  There could be up to 7 bytes of
3025 	   residual to copy!  */
3026 	if (n_bytes % 16 != 0)
3027 	  {
3028 	    operands[2] = GEN_INT (n_bytes % 8);
3029 	    if (n_bytes % 16 >= 8)
3030 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
3031 	    if (n_bytes % 8 != 0)
3032 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3033 	  }
3034 	return "";
3035 
3036       case 4:
3037 	/* Pre-adjust the loop counter.  */
3038 	operands[2] = GEN_INT (n_bytes - 8);
3039 	output_asm_insn ("ldi %2,%1", operands);
3040 
3041 	/* Loop.  */
3042 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3043 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
3044 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3045 
3046 	/* Handle the residual.  There could be up to 7 bytes of
3047 	   residual to copy!  */
3048 	if (n_bytes % 8 != 0)
3049 	  {
3050 	    operands[2] = GEN_INT (n_bytes % 4);
3051 	    if (n_bytes % 8 >= 4)
3052 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3053 	    if (n_bytes % 4 != 0)
3054 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3055 	  }
3056 	return "";
3057 
3058       case 2:
3059 	/* Pre-adjust the loop counter.  */
3060 	operands[2] = GEN_INT (n_bytes - 4);
3061 	output_asm_insn ("ldi %2,%1", operands);
3062 
3063 	/* Loop.  */
3064 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3065 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3066 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3067 
3068 	/* Handle the residual.  */
3069 	if (n_bytes % 4 != 0)
3070 	  {
3071 	    if (n_bytes % 4 >= 2)
3072 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3073 	    if (n_bytes % 2 != 0)
3074 	      output_asm_insn ("stb %%r0,0(%0)", operands);
3075 	  }
3076 	return "";
3077 
3078       case 1:
3079 	/* Pre-adjust the loop counter.  */
3080 	operands[2] = GEN_INT (n_bytes - 2);
3081 	output_asm_insn ("ldi %2,%1", operands);
3082 
3083 	/* Loop.  */
3084 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3085 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3086 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3087 
3088 	/* Handle the residual.  */
3089 	if (n_bytes % 2 != 0)
3090 	  output_asm_insn ("stb %%r0,0(%0)", operands);
3091 
3092 	return "";
3093 
3094       default:
3095 	gcc_unreachable ();
3096     }
3097 }
3098 
3099 /* Count the number of insns necessary to handle this block move.
3100 
3101    Basic structure is the same as emit_block_move, except that we
3102    count insns rather than emit them.  */
3103 
3104 static int
3105 compute_clrmem_length (rtx_insn *insn)
3106 {
3107   rtx pat = PATTERN (insn);
3108   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3109   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3110   unsigned int n_insns = 0;
3111 
3112   /* We can't clear more than a word at a time because the PA
3113      has no longer integer move insns.  */
3114   if (align > (TARGET_64BIT ? 8 : 4))
3115     align = (TARGET_64BIT ? 8 : 4);
3116 
3117   /* The basic loop.  */
3118   n_insns = 4;
3119 
3120   /* Residuals.  */
3121   if (n_bytes % (2 * align) != 0)
3122     {
3123       if ((n_bytes % (2 * align)) >= align)
3124 	n_insns++;
3125 
3126       if ((n_bytes % align) != 0)
3127 	n_insns++;
3128     }
3129 
3130   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3131   return n_insns * 4;
3132 }
3133 
3134 
3135 const char *
3136 pa_output_and (rtx *operands)
3137 {
3138   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3139     {
3140       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3141       int ls0, ls1, ms0, p, len;
3142 
3143       for (ls0 = 0; ls0 < 32; ls0++)
3144 	if ((mask & (1 << ls0)) == 0)
3145 	  break;
3146 
3147       for (ls1 = ls0; ls1 < 32; ls1++)
3148 	if ((mask & (1 << ls1)) != 0)
3149 	  break;
3150 
3151       for (ms0 = ls1; ms0 < 32; ms0++)
3152 	if ((mask & (1 << ms0)) == 0)
3153 	  break;
3154 
3155       gcc_assert (ms0 == 32);
3156 
3157       if (ls1 == 32)
3158 	{
3159 	  len = ls0;
3160 
3161 	  gcc_assert (len);
3162 
3163 	  operands[2] = GEN_INT (len);
3164 	  return "{extru|extrw,u} %1,31,%2,%0";
3165 	}
3166       else
3167 	{
3168 	  /* We could use this `depi' for the case above as well, but `depi'
3169 	     requires one more register file access than an `extru'.  */
3170 
3171 	  p = 31 - ls0;
3172 	  len = ls1 - ls0;
3173 
3174 	  operands[2] = GEN_INT (p);
3175 	  operands[3] = GEN_INT (len);
3176 	  return "{depi|depwi} 0,%2,%3,%0";
3177 	}
3178     }
3179   else
3180     return "and %1,%2,%0";
3181 }
3182 
3183 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3184    storing the result in operands[0].  */
3185 const char *
3186 pa_output_64bit_and (rtx *operands)
3187 {
3188   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3189     {
3190       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3191       int ls0, ls1, ms0, p, len;
3192 
3193       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3194 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3195 	  break;
3196 
3197       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3198 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3199 	  break;
3200 
3201       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3202 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3203 	  break;
3204 
3205       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3206 
3207       if (ls1 == HOST_BITS_PER_WIDE_INT)
3208 	{
3209 	  len = ls0;
3210 
3211 	  gcc_assert (len);
3212 
3213 	  operands[2] = GEN_INT (len);
3214 	  return "extrd,u %1,63,%2,%0";
3215 	}
3216       else
3217 	{
3218 	  /* We could use this `depi' for the case above as well, but `depi'
3219 	     requires one more register file access than an `extru'.  */
3220 
3221 	  p = 63 - ls0;
3222 	  len = ls1 - ls0;
3223 
3224 	  operands[2] = GEN_INT (p);
3225 	  operands[3] = GEN_INT (len);
3226 	  return "depdi 0,%2,%3,%0";
3227 	}
3228     }
3229   else
3230     return "and %1,%2,%0";
3231 }
3232 
3233 const char *
3234 pa_output_ior (rtx *operands)
3235 {
3236   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3237   int bs0, bs1, p, len;
3238 
3239   if (INTVAL (operands[2]) == 0)
3240     return "copy %1,%0";
3241 
3242   for (bs0 = 0; bs0 < 32; bs0++)
3243     if ((mask & (1 << bs0)) != 0)
3244       break;
3245 
3246   for (bs1 = bs0; bs1 < 32; bs1++)
3247     if ((mask & (1 << bs1)) == 0)
3248       break;
3249 
3250   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3251 
3252   p = 31 - bs0;
3253   len = bs1 - bs0;
3254 
3255   operands[2] = GEN_INT (p);
3256   operands[3] = GEN_INT (len);
3257   return "{depi|depwi} -1,%2,%3,%0";
3258 }
3259 
3260 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3261    storing the result in operands[0].  */
3262 const char *
3263 pa_output_64bit_ior (rtx *operands)
3264 {
3265   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3266   int bs0, bs1, p, len;
3267 
3268   if (INTVAL (operands[2]) == 0)
3269     return "copy %1,%0";
3270 
3271   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3272     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3273       break;
3274 
3275   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3276     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3277       break;
3278 
3279   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3280 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3281 
3282   p = 63 - bs0;
3283   len = bs1 - bs0;
3284 
3285   operands[2] = GEN_INT (p);
3286   operands[3] = GEN_INT (len);
3287   return "depdi -1,%2,%3,%0";
3288 }
3289 
3290 /* Target hook for assembling integer objects.  This code handles
3291    aligned SI and DI integers specially since function references
3292    must be preceded by P%.  */
3293 
3294 static bool
3295 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3296 {
3297   if (size == UNITS_PER_WORD
3298       && aligned_p
3299       && function_label_operand (x, VOIDmode))
3300     {
3301       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3302 
3303       /* We don't want an OPD when generating fast indirect calls.  */
3304       if (!TARGET_FAST_INDIRECT_CALLS)
3305 	fputs ("P%", asm_out_file);
3306 
3307       output_addr_const (asm_out_file, x);
3308       fputc ('\n', asm_out_file);
3309       return true;
3310     }
3311   return default_assemble_integer (x, size, aligned_p);
3312 }
3313 
3314 /* Output an ascii string.  */
3315 void
3316 pa_output_ascii (FILE *file, const char *p, int size)
3317 {
3318   int i;
3319   int chars_output;
3320   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3321 
3322   /* The HP assembler can only take strings of 256 characters at one
3323      time.  This is a limitation on input line length, *not* the
3324      length of the string.  Sigh.  Even worse, it seems that the
3325      restriction is in number of input characters (see \xnn &
3326      \whatever).  So we have to do this very carefully.  */
3327 
3328   fputs ("\t.STRING \"", file);
3329 
3330   chars_output = 0;
3331   for (i = 0; i < size; i += 4)
3332     {
3333       int co = 0;
3334       int io = 0;
3335       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3336 	{
3337 	  register unsigned int c = (unsigned char) p[i + io];
3338 
3339 	  if (c == '\"' || c == '\\')
3340 	    partial_output[co++] = '\\';
3341 	  if (c >= ' ' && c < 0177)
3342 	    partial_output[co++] = c;
3343 	  else
3344 	    {
3345 	      unsigned int hexd;
3346 	      partial_output[co++] = '\\';
3347 	      partial_output[co++] = 'x';
3348 	      hexd =  c  / 16 - 0 + '0';
3349 	      if (hexd > '9')
3350 		hexd -= '9' - 'a' + 1;
3351 	      partial_output[co++] = hexd;
3352 	      hexd =  c % 16 - 0 + '0';
3353 	      if (hexd > '9')
3354 		hexd -= '9' - 'a' + 1;
3355 	      partial_output[co++] = hexd;
3356 	    }
3357 	}
3358       if (chars_output + co > 243)
3359 	{
3360 	  fputs ("\"\n\t.STRING \"", file);
3361 	  chars_output = 0;
3362 	}
3363       fwrite (partial_output, 1, (size_t) co, file);
3364       chars_output += co;
3365       co = 0;
3366     }
3367   fputs ("\"\n", file);
3368 }
3369 
3370 /* Try to rewrite floating point comparisons & branches to avoid
3371    useless add,tr insns.
3372 
3373    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3374    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3375    first attempt to remove useless add,tr insns.  It is zero
3376    for the second pass as reorg sometimes leaves bogus REG_DEAD
3377    notes lying around.
3378 
3379    When CHECK_NOTES is zero we can only eliminate add,tr insns
3380    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3381    instructions.  */
3382 static void
3383 remove_useless_addtr_insns (int check_notes)
3384 {
3385   rtx_insn *insn;
3386   static int pass = 0;
3387 
3388   /* This is fairly cheap, so always run it when optimizing.  */
3389   if (optimize > 0)
3390     {
3391       int fcmp_count = 0;
3392       int fbranch_count = 0;
3393 
3394       /* Walk all the insns in this function looking for fcmp & fbranch
3395 	 instructions.  Keep track of how many of each we find.  */
3396       for (insn = get_insns (); insn; insn = next_insn (insn))
3397 	{
3398 	  rtx tmp;
3399 
3400 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3401 	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3402 	    continue;
3403 
3404 	  tmp = PATTERN (insn);
3405 
3406 	  /* It must be a set.  */
3407 	  if (GET_CODE (tmp) != SET)
3408 	    continue;
3409 
3410 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3411 	  tmp = SET_DEST (tmp);
3412 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3413 	    {
3414 	      fcmp_count++;
3415 	      continue;
3416 	    }
3417 
3418 	  tmp = PATTERN (insn);
3419 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3420 	  if (GET_CODE (tmp) == SET
3421 	      && SET_DEST (tmp) == pc_rtx
3422 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3423 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3424 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3425 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3426 	    {
3427 	      fbranch_count++;
3428 	      continue;
3429 	    }
3430 	}
3431 
3432 
3433       /* Find all floating point compare + branch insns.  If possible,
3434 	 reverse the comparison & the branch to avoid add,tr insns.  */
3435       for (insn = get_insns (); insn; insn = next_insn (insn))
3436 	{
3437 	  rtx tmp;
3438 	  rtx_insn *next;
3439 
3440 	  /* Ignore anything that isn't an INSN.  */
3441 	  if (! NONJUMP_INSN_P (insn))
3442 	    continue;
3443 
3444 	  tmp = PATTERN (insn);
3445 
3446 	  /* It must be a set.  */
3447 	  if (GET_CODE (tmp) != SET)
3448 	    continue;
3449 
3450 	  /* The destination must be CCFP, which is register zero.  */
3451 	  tmp = SET_DEST (tmp);
3452 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3453 	    continue;
3454 
3455 	  /* INSN should be a set of CCFP.
3456 
3457 	     See if the result of this insn is used in a reversed FP
3458 	     conditional branch.  If so, reverse our condition and
3459 	     the branch.  Doing so avoids useless add,tr insns.  */
3460 	  next = next_insn (insn);
3461 	  while (next)
3462 	    {
3463 	      /* Jumps, calls and labels stop our search.  */
3464 	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3465 		break;
3466 
3467 	      /* As does another fcmp insn.  */
3468 	      if (NONJUMP_INSN_P (next)
3469 		  && GET_CODE (PATTERN (next)) == SET
3470 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3471 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3472 		break;
3473 
3474 	      next = next_insn (next);
3475 	    }
3476 
3477 	  /* Is NEXT_INSN a branch?  */
3478 	  if (next && JUMP_P (next))
3479 	    {
3480 	      rtx pattern = PATTERN (next);
3481 
3482 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3483 		 and CCFP dies, then reverse our conditional and the branch
3484 		 to avoid the add,tr.  */
3485 	      if (GET_CODE (pattern) == SET
3486 		  && SET_DEST (pattern) == pc_rtx
3487 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3488 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3489 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3490 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3491 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3492 		  && (fcmp_count == fbranch_count
3493 		      || (check_notes
3494 			  && find_regno_note (next, REG_DEAD, 0))))
3495 		{
3496 		  /* Reverse the branch.  */
3497 		  tmp = XEXP (SET_SRC (pattern), 1);
3498 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3499 		  XEXP (SET_SRC (pattern), 2) = tmp;
3500 		  INSN_CODE (next) = -1;
3501 
3502 		  /* Reverse our condition.  */
3503 		  tmp = PATTERN (insn);
3504 		  PUT_CODE (XEXP (tmp, 1),
3505 			    (reverse_condition_maybe_unordered
3506 			     (GET_CODE (XEXP (tmp, 1)))));
3507 		}
3508 	    }
3509 	}
3510     }
3511 
3512   pass = !pass;
3513 
3514 }
3515 
3516 /* You may have trouble believing this, but this is the 32 bit HP-PA
3517    stack layout.  Wow.
3518 
3519    Offset		Contents
3520 
3521    Variable arguments	(optional; any number may be allocated)
3522 
3523    SP-(4*(N+9))		arg word N
3524    	:		    :
3525       SP-56		arg word 5
3526       SP-52		arg word 4
3527 
3528    Fixed arguments	(must be allocated; may remain unused)
3529 
3530       SP-48		arg word 3
3531       SP-44		arg word 2
3532       SP-40		arg word 1
3533       SP-36		arg word 0
3534 
3535    Frame Marker
3536 
3537       SP-32		External Data Pointer (DP)
3538       SP-28		External sr4
3539       SP-24		External/stub RP (RP')
3540       SP-20		Current RP
3541       SP-16		Static Link
3542       SP-12		Clean up
3543       SP-8		Calling Stub RP (RP'')
3544       SP-4		Previous SP
3545 
3546    Top of Frame
3547 
3548       SP-0		Stack Pointer (points to next available address)
3549 
3550 */
3551 
3552 /* This function saves registers as follows.  Registers marked with ' are
3553    this function's registers (as opposed to the previous function's).
3554    If a frame_pointer isn't needed, r4 is saved as a general register;
3555    the space for the frame pointer is still allocated, though, to keep
3556    things simple.
3557 
3558 
3559    Top of Frame
3560 
3561        SP (FP')		Previous FP
3562        SP + 4		Alignment filler (sigh)
3563        SP + 8		Space for locals reserved here.
3564        .
3565        .
3566        .
3567        SP + n		All call saved register used.
3568        .
3569        .
3570        .
3571        SP + o		All call saved fp registers used.
3572        .
3573        .
3574        .
3575        SP + p (SP')	points to next available address.
3576 
3577 */
3578 
3579 /* Global variables set by output_function_prologue().  */
3580 /* Size of frame.  Need to know this to emit return insns from
3581    leaf procedures.  */
3582 static HOST_WIDE_INT actual_fsize, local_fsize;
3583 static int save_fregs;
3584 
3585 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3586    Handle case where DISP > 8k by using the add_high_const patterns.
3587 
3588    Note in DISP > 8k case, we will leave the high part of the address
3589    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3590 
3591 static void
3592 store_reg (int reg, HOST_WIDE_INT disp, int base)
3593 {
3594   rtx dest, src, basereg;
3595   rtx_insn *insn;
3596 
3597   src = gen_rtx_REG (word_mode, reg);
3598   basereg = gen_rtx_REG (Pmode, base);
3599   if (VAL_14_BITS_P (disp))
3600     {
3601       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3602       insn = emit_move_insn (dest, src);
3603     }
3604   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3605     {
3606       rtx delta = GEN_INT (disp);
3607       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3608 
3609       emit_move_insn (tmpreg, delta);
3610       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3611       if (DO_FRAME_NOTES)
3612 	{
3613 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3614 			gen_rtx_SET (VOIDmode, tmpreg,
3615 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3616 	  RTX_FRAME_RELATED_P (insn) = 1;
3617 	}
3618       dest = gen_rtx_MEM (word_mode, tmpreg);
3619       insn = emit_move_insn (dest, src);
3620     }
3621   else
3622     {
3623       rtx delta = GEN_INT (disp);
3624       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3625       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3626 
3627       emit_move_insn (tmpreg, high);
3628       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3629       insn = emit_move_insn (dest, src);
3630       if (DO_FRAME_NOTES)
3631 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3632 		      gen_rtx_SET (VOIDmode,
3633 				   gen_rtx_MEM (word_mode,
3634 						gen_rtx_PLUS (word_mode,
3635 							      basereg,
3636 							      delta)),
3637 				   src));
3638     }
3639 
3640   if (DO_FRAME_NOTES)
3641     RTX_FRAME_RELATED_P (insn) = 1;
3642 }
3643 
3644 /* Emit RTL to store REG at the memory location specified by BASE and then
3645    add MOD to BASE.  MOD must be <= 8k.  */
3646 
3647 static void
3648 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3649 {
3650   rtx basereg, srcreg, delta;
3651   rtx_insn *insn;
3652 
3653   gcc_assert (VAL_14_BITS_P (mod));
3654 
3655   basereg = gen_rtx_REG (Pmode, base);
3656   srcreg = gen_rtx_REG (word_mode, reg);
3657   delta = GEN_INT (mod);
3658 
3659   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3660   if (DO_FRAME_NOTES)
3661     {
3662       RTX_FRAME_RELATED_P (insn) = 1;
3663 
3664       /* RTX_FRAME_RELATED_P must be set on each frame related set
3665 	 in a parallel with more than one element.  */
3666       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3667       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3668     }
3669 }
3670 
3671 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3672    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3673    whether to add a frame note or not.
3674 
3675    In the DISP > 8k case, we leave the high part of the address in %r1.
3676    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3677 
3678 static void
3679 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3680 {
3681   rtx_insn *insn;
3682 
3683   if (VAL_14_BITS_P (disp))
3684     {
3685       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3686 			     plus_constant (Pmode,
3687 					    gen_rtx_REG (Pmode, base), disp));
3688     }
3689   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3690     {
3691       rtx basereg = gen_rtx_REG (Pmode, base);
3692       rtx delta = GEN_INT (disp);
3693       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3694 
3695       emit_move_insn (tmpreg, delta);
3696       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3697 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3698       if (DO_FRAME_NOTES)
3699 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3700 		      gen_rtx_SET (VOIDmode, tmpreg,
3701 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3702     }
3703   else
3704     {
3705       rtx basereg = gen_rtx_REG (Pmode, base);
3706       rtx delta = GEN_INT (disp);
3707       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3708 
3709       emit_move_insn (tmpreg,
3710 		      gen_rtx_PLUS (Pmode, basereg,
3711 				    gen_rtx_HIGH (Pmode, delta)));
3712       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3713 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3714     }
3715 
3716   if (DO_FRAME_NOTES && note)
3717     RTX_FRAME_RELATED_P (insn) = 1;
3718 }
3719 
3720 HOST_WIDE_INT
3721 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3722 {
3723   int freg_saved = 0;
3724   int i, j;
3725 
3726   /* The code in pa_expand_prologue and pa_expand_epilogue must
3727      be consistent with the rounding and size calculation done here.
3728      Change them at the same time.  */
3729 
3730   /* We do our own stack alignment.  First, round the size of the
3731      stack locals up to a word boundary.  */
3732   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3733 
3734   /* Space for previous frame pointer + filler.  If any frame is
3735      allocated, we need to add in the STARTING_FRAME_OFFSET.  We
3736      waste some space here for the sake of HP compatibility.  The
3737      first slot is only used when the frame pointer is needed.  */
3738   if (size || frame_pointer_needed)
3739     size += STARTING_FRAME_OFFSET;
3740 
3741   /* If the current function calls __builtin_eh_return, then we need
3742      to allocate stack space for registers that will hold data for
3743      the exception handler.  */
3744   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3745     {
3746       unsigned int i;
3747 
3748       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3749 	continue;
3750       size += i * UNITS_PER_WORD;
3751     }
3752 
3753   /* Account for space used by the callee general register saves.  */
3754   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3755     if (df_regs_ever_live_p (i))
3756       size += UNITS_PER_WORD;
3757 
3758   /* Account for space used by the callee floating point register saves.  */
3759   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3760     if (df_regs_ever_live_p (i)
3761 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3762       {
3763 	freg_saved = 1;
3764 
3765 	/* We always save both halves of the FP register, so always
3766 	   increment the frame size by 8 bytes.  */
3767 	size += 8;
3768       }
3769 
3770   /* If any of the floating registers are saved, account for the
3771      alignment needed for the floating point register save block.  */
3772   if (freg_saved)
3773     {
3774       size = (size + 7) & ~7;
3775       if (fregs_live)
3776 	*fregs_live = 1;
3777     }
3778 
3779   /* The various ABIs include space for the outgoing parameters in the
3780      size of the current function's stack frame.  We don't need to align
3781      for the outgoing arguments as their alignment is set by the final
3782      rounding for the frame as a whole.  */
3783   size += crtl->outgoing_args_size;
3784 
3785   /* Allocate space for the fixed frame marker.  This space must be
3786      allocated for any function that makes calls or allocates
3787      stack space.  */
3788   if (!crtl->is_leaf || size)
3789     size += TARGET_64BIT ? 48 : 32;
3790 
3791   /* Finally, round to the preferred stack boundary.  */
3792   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3793 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3794 }
3795 
3796 /* Generate the assembly code for function entry.  FILE is a stdio
3797    stream to output the code to.  SIZE is an int: how many units of
3798    temporary storage to allocate.
3799 
3800    Refer to the array `regs_ever_live' to determine which registers to
3801    save; `regs_ever_live[I]' is nonzero if register number I is ever
3802    used in the function.  This function is responsible for knowing
3803    which registers should not be saved even if used.  */
3804 
3805 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3806    of memory.  If any fpu reg is used in the function, we allocate
3807    such a block here, at the bottom of the frame, just in case it's needed.
3808 
3809    If this function is a leaf procedure, then we may choose not
3810    to do a "save" insn.  The decision about whether or not
3811    to do this is made in regclass.c.  */
3812 
3813 static void
3814 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3815 {
3816   /* The function's label and associated .PROC must never be
3817      separated and must be output *after* any profiling declarations
3818      to avoid changing spaces/subspaces within a procedure.  */
3819   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3820   fputs ("\t.PROC\n", file);
3821 
3822   /* pa_expand_prologue does the dirty work now.  We just need
3823      to output the assembler directives which denote the start
3824      of a function.  */
3825   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3826   if (crtl->is_leaf)
3827     fputs (",NO_CALLS", file);
3828   else
3829     fputs (",CALLS", file);
3830   if (rp_saved)
3831     fputs (",SAVE_RP", file);
3832 
3833   /* The SAVE_SP flag is used to indicate that register %r3 is stored
3834      at the beginning of the frame and that it is used as the frame
3835      pointer for the frame.  We do this because our current frame
3836      layout doesn't conform to that specified in the HP runtime
3837      documentation and we need a way to indicate to programs such as
3838      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3839      isn't used by HP compilers but is supported by the assembler.
3840      However, SAVE_SP is supposed to indicate that the previous stack
3841      pointer has been saved in the frame marker.  */
3842   if (frame_pointer_needed)
3843     fputs (",SAVE_SP", file);
3844 
3845   /* Pass on information about the number of callee register saves
3846      performed in the prologue.
3847 
3848      The compiler is supposed to pass the highest register number
3849      saved, the assembler then has to adjust that number before
3850      entering it into the unwind descriptor (to account for any
3851      caller saved registers with lower register numbers than the
3852      first callee saved register).  */
3853   if (gr_saved)
3854     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3855 
3856   if (fr_saved)
3857     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3858 
3859   fputs ("\n\t.ENTRY\n", file);
3860 
3861   remove_useless_addtr_insns (0);
3862 }
3863 
3864 void
3865 pa_expand_prologue (void)
3866 {
3867   int merge_sp_adjust_with_store = 0;
3868   HOST_WIDE_INT size = get_frame_size ();
3869   HOST_WIDE_INT offset;
3870   int i;
3871   rtx tmpreg;
3872   rtx_insn *insn;
3873 
3874   gr_saved = 0;
3875   fr_saved = 0;
3876   save_fregs = 0;
3877 
3878   /* Compute total size for frame pointer, filler, locals and rounding to
3879      the next word boundary.  Similar code appears in pa_compute_frame_size
3880      and must be changed in tandem with this code.  */
3881   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3882   if (local_fsize || frame_pointer_needed)
3883     local_fsize += STARTING_FRAME_OFFSET;
3884 
3885   actual_fsize = pa_compute_frame_size (size, &save_fregs);
3886   if (flag_stack_usage_info)
3887     current_function_static_stack_size = actual_fsize;
3888 
3889   /* Compute a few things we will use often.  */
3890   tmpreg = gen_rtx_REG (word_mode, 1);
3891 
3892   /* Save RP first.  The calling conventions manual states RP will
3893      always be stored into the caller's frame at sp - 20 or sp - 16
3894      depending on which ABI is in use.  */
3895   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3896     {
3897       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3898       rp_saved = true;
3899     }
3900   else
3901     rp_saved = false;
3902 
3903   /* Allocate the local frame and set up the frame pointer if needed.  */
3904   if (actual_fsize != 0)
3905     {
3906       if (frame_pointer_needed)
3907 	{
3908 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
3909 	     new stack pointer, then store away the saved old frame pointer
3910 	     into the stack at sp and at the same time update the stack
3911 	     pointer by actual_fsize bytes.  Two versions, first
3912 	     handles small (<8k) frames.  The second handles large (>=8k)
3913 	     frames.  */
3914 	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3915 	  if (DO_FRAME_NOTES)
3916 	    RTX_FRAME_RELATED_P (insn) = 1;
3917 
3918 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3919 	  if (DO_FRAME_NOTES)
3920 	    RTX_FRAME_RELATED_P (insn) = 1;
3921 
3922 	  if (VAL_14_BITS_P (actual_fsize))
3923 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3924 	  else
3925 	    {
3926 	      /* It is incorrect to store the saved frame pointer at *sp,
3927 		 then increment sp (writes beyond the current stack boundary).
3928 
3929 		 So instead use stwm to store at *sp and post-increment the
3930 		 stack pointer as an atomic operation.  Then increment sp to
3931 		 finish allocating the new frame.  */
3932 	      HOST_WIDE_INT adjust1 = 8192 - 64;
3933 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3934 
3935 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3936 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3937 			      adjust2, 1);
3938 	    }
3939 
3940 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3941 	     we need to store the previous stack pointer (frame pointer)
3942 	     into the frame marker on targets that use the HP unwind
3943 	     library.  This allows the HP unwind library to be used to
3944 	     unwind GCC frames.  However, we are not fully compatible
3945 	     with the HP library because our frame layout differs from
3946 	     that specified in the HP runtime specification.
3947 
3948 	     We don't want a frame note on this instruction as the frame
3949 	     marker moves during dynamic stack allocation.
3950 
3951 	     This instruction also serves as a blockage to prevent
3952 	     register spills from being scheduled before the stack
3953 	     pointer is raised.  This is necessary as we store
3954 	     registers using the frame pointer as a base register,
3955 	     and the frame pointer is set before sp is raised.  */
3956 	  if (TARGET_HPUX_UNWIND_LIBRARY)
3957 	    {
3958 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3959 				       GEN_INT (TARGET_64BIT ? -8 : -4));
3960 
3961 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
3962 			      hard_frame_pointer_rtx);
3963 	    }
3964 	  else
3965 	    emit_insn (gen_blockage ());
3966 	}
3967       /* no frame pointer needed.  */
3968       else
3969 	{
3970 	  /* In some cases we can perform the first callee register save
3971 	     and allocating the stack frame at the same time.   If so, just
3972 	     make a note of it and defer allocating the frame until saving
3973 	     the callee registers.  */
3974 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3975 	    merge_sp_adjust_with_store = 1;
3976 	  /* Can not optimize.  Adjust the stack frame by actual_fsize
3977 	     bytes.  */
3978 	  else
3979 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3980 			    actual_fsize, 1);
3981 	}
3982     }
3983 
3984   /* Normal register save.
3985 
3986      Do not save the frame pointer in the frame_pointer_needed case.  It
3987      was done earlier.  */
3988   if (frame_pointer_needed)
3989     {
3990       offset = local_fsize;
3991 
3992       /* Saving the EH return data registers in the frame is the simplest
3993 	 way to get the frame unwind information emitted.  We put them
3994 	 just before the general registers.  */
3995       if (DO_FRAME_NOTES && crtl->calls_eh_return)
3996 	{
3997 	  unsigned int i, regno;
3998 
3999 	  for (i = 0; ; ++i)
4000 	    {
4001 	      regno = EH_RETURN_DATA_REGNO (i);
4002 	      if (regno == INVALID_REGNUM)
4003 		break;
4004 
4005 	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4006 	      offset += UNITS_PER_WORD;
4007 	    }
4008 	}
4009 
4010       for (i = 18; i >= 4; i--)
4011 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4012 	  {
4013 	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4014 	    offset += UNITS_PER_WORD;
4015 	    gr_saved++;
4016 	  }
4017       /* Account for %r3 which is saved in a special place.  */
4018       gr_saved++;
4019     }
4020   /* No frame pointer needed.  */
4021   else
4022     {
4023       offset = local_fsize - actual_fsize;
4024 
4025       /* Saving the EH return data registers in the frame is the simplest
4026          way to get the frame unwind information emitted.  */
4027       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4028 	{
4029 	  unsigned int i, regno;
4030 
4031 	  for (i = 0; ; ++i)
4032 	    {
4033 	      regno = EH_RETURN_DATA_REGNO (i);
4034 	      if (regno == INVALID_REGNUM)
4035 		break;
4036 
4037 	      /* If merge_sp_adjust_with_store is nonzero, then we can
4038 		 optimize the first save.  */
4039 	      if (merge_sp_adjust_with_store)
4040 		{
4041 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4042 		  merge_sp_adjust_with_store = 0;
4043 		}
4044 	      else
4045 		store_reg (regno, offset, STACK_POINTER_REGNUM);
4046 	      offset += UNITS_PER_WORD;
4047 	    }
4048 	}
4049 
4050       for (i = 18; i >= 3; i--)
4051       	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4052 	  {
4053 	    /* If merge_sp_adjust_with_store is nonzero, then we can
4054 	       optimize the first GR save.  */
4055 	    if (merge_sp_adjust_with_store)
4056 	      {
4057 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4058 		merge_sp_adjust_with_store = 0;
4059 	      }
4060 	    else
4061 	      store_reg (i, offset, STACK_POINTER_REGNUM);
4062 	    offset += UNITS_PER_WORD;
4063 	    gr_saved++;
4064 	  }
4065 
4066       /* If we wanted to merge the SP adjustment with a GR save, but we never
4067 	 did any GR saves, then just emit the adjustment here.  */
4068       if (merge_sp_adjust_with_store)
4069 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4070 			actual_fsize, 1);
4071     }
4072 
4073   /* The hppa calling conventions say that %r19, the pic offset
4074      register, is saved at sp - 32 (in this function's frame)
4075      when generating PIC code.  FIXME:  What is the correct thing
4076      to do for functions which make no calls and allocate no
4077      frame?  Do we need to allocate a frame, or can we just omit
4078      the save?   For now we'll just omit the save.
4079 
4080      We don't want a note on this insn as the frame marker can
4081      move if there is a dynamic stack allocation.  */
4082   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4083     {
4084       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4085 
4086       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4087 
4088     }
4089 
4090   /* Align pointer properly (doubleword boundary).  */
4091   offset = (offset + 7) & ~7;
4092 
4093   /* Floating point register store.  */
4094   if (save_fregs)
4095     {
4096       rtx base;
4097 
4098       /* First get the frame or stack pointer to the start of the FP register
4099 	 save area.  */
4100       if (frame_pointer_needed)
4101 	{
4102 	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4103 	  base = hard_frame_pointer_rtx;
4104 	}
4105       else
4106 	{
4107 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4108 	  base = stack_pointer_rtx;
4109 	}
4110 
4111       /* Now actually save the FP registers.  */
4112       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4113 	{
4114 	  if (df_regs_ever_live_p (i)
4115 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4116 	    {
4117 	      rtx addr, reg;
4118 	      rtx_insn *insn;
4119 	      addr = gen_rtx_MEM (DFmode,
4120 				  gen_rtx_POST_INC (word_mode, tmpreg));
4121 	      reg = gen_rtx_REG (DFmode, i);
4122 	      insn = emit_move_insn (addr, reg);
4123 	      if (DO_FRAME_NOTES)
4124 		{
4125 		  RTX_FRAME_RELATED_P (insn) = 1;
4126 		  if (TARGET_64BIT)
4127 		    {
4128 		      rtx mem = gen_rtx_MEM (DFmode,
4129 					     plus_constant (Pmode, base,
4130 							    offset));
4131 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4132 				    gen_rtx_SET (VOIDmode, mem, reg));
4133 		    }
4134 		  else
4135 		    {
4136 		      rtx meml = gen_rtx_MEM (SFmode,
4137 					      plus_constant (Pmode, base,
4138 							     offset));
4139 		      rtx memr = gen_rtx_MEM (SFmode,
4140 					      plus_constant (Pmode, base,
4141 							     offset + 4));
4142 		      rtx regl = gen_rtx_REG (SFmode, i);
4143 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4144 		      rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4145 		      rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4146 		      rtvec vec;
4147 
4148 		      RTX_FRAME_RELATED_P (setl) = 1;
4149 		      RTX_FRAME_RELATED_P (setr) = 1;
4150 		      vec = gen_rtvec (2, setl, setr);
4151 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4152 				    gen_rtx_SEQUENCE (VOIDmode, vec));
4153 		    }
4154 		}
4155 	      offset += GET_MODE_SIZE (DFmode);
4156 	      fr_saved++;
4157 	    }
4158 	}
4159     }
4160 }
4161 
4162 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4163    Handle case where DISP > 8k by using the add_high_const patterns.  */
4164 
4165 static void
4166 load_reg (int reg, HOST_WIDE_INT disp, int base)
4167 {
4168   rtx dest = gen_rtx_REG (word_mode, reg);
4169   rtx basereg = gen_rtx_REG (Pmode, base);
4170   rtx src;
4171 
4172   if (VAL_14_BITS_P (disp))
4173     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4174   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4175     {
4176       rtx delta = GEN_INT (disp);
4177       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4178 
4179       emit_move_insn (tmpreg, delta);
4180       if (TARGET_DISABLE_INDEXING)
4181 	{
4182 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4183 	  src = gen_rtx_MEM (word_mode, tmpreg);
4184 	}
4185       else
4186 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4187     }
4188   else
4189     {
4190       rtx delta = GEN_INT (disp);
4191       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4192       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4193 
4194       emit_move_insn (tmpreg, high);
4195       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4196     }
4197 
4198   emit_move_insn (dest, src);
4199 }
4200 
4201 /* Update the total code bytes output to the text section.  */
4202 
4203 static void
4204 update_total_code_bytes (unsigned int nbytes)
4205 {
4206   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4207       && !IN_NAMED_SECTION_P (cfun->decl))
4208     {
4209       unsigned int old_total = total_code_bytes;
4210 
4211       total_code_bytes += nbytes;
4212 
4213       /* Be prepared to handle overflows.  */
4214       if (old_total > total_code_bytes)
4215         total_code_bytes = UINT_MAX;
4216     }
4217 }
4218 
4219 /* This function generates the assembly code for function exit.
4220    Args are as for output_function_prologue ().
4221 
4222    The function epilogue should not depend on the current stack
4223    pointer!  It should use the frame pointer only.  This is mandatory
4224    because of alloca; we also take advantage of it to omit stack
4225    adjustments before returning.  */
4226 
4227 static void
4228 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4229 {
4230   rtx_insn *insn = get_last_insn ();
4231   bool extra_nop;
4232 
4233   /* pa_expand_epilogue does the dirty work now.  We just need
4234      to output the assembler directives which denote the end
4235      of a function.
4236 
4237      To make debuggers happy, emit a nop if the epilogue was completely
4238      eliminated due to a volatile call as the last insn in the
4239      current function.  That way the return address (in %r2) will
4240      always point to a valid instruction in the current function.  */
4241 
4242   /* Get the last real insn.  */
4243   if (NOTE_P (insn))
4244     insn = prev_real_insn (insn);
4245 
4246   /* If it is a sequence, then look inside.  */
4247   if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4248     insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4249 
4250   /* If insn is a CALL_INSN, then it must be a call to a volatile
4251      function (otherwise there would be epilogue insns).  */
4252   if (insn && CALL_P (insn))
4253     {
4254       fputs ("\tnop\n", file);
4255       extra_nop = true;
4256     }
4257   else
4258     extra_nop = false;
4259 
4260   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4261 
4262   if (TARGET_SOM && TARGET_GAS)
4263     {
4264       /* We are done with this subspace except possibly for some additional
4265 	 debug information.  Forget that we are in this subspace to ensure
4266 	 that the next function is output in its own subspace.  */
4267       in_section = NULL;
4268       cfun->machine->in_nsubspa = 2;
4269     }
4270 
4271   /* Thunks do their own insn accounting.  */
4272   if (cfun->is_thunk)
4273     return;
4274 
4275   if (INSN_ADDRESSES_SET_P ())
4276     {
4277       last_address = extra_nop ? 4 : 0;
4278       insn = get_last_nonnote_insn ();
4279       if (insn)
4280 	{
4281 	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4282 	  if (INSN_P (insn))
4283 	    last_address += insn_default_length (insn);
4284 	}
4285       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4286 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4287     }
4288   else
4289     last_address = UINT_MAX;
4290 
4291   /* Finally, update the total number of code bytes output so far.  */
4292   update_total_code_bytes (last_address);
4293 }
4294 
4295 void
4296 pa_expand_epilogue (void)
4297 {
4298   rtx tmpreg;
4299   HOST_WIDE_INT offset;
4300   HOST_WIDE_INT ret_off = 0;
4301   int i;
4302   int merge_sp_adjust_with_load = 0;
4303 
4304   /* We will use this often.  */
4305   tmpreg = gen_rtx_REG (word_mode, 1);
4306 
4307   /* Try to restore RP early to avoid load/use interlocks when
4308      RP gets used in the return (bv) instruction.  This appears to still
4309      be necessary even when we schedule the prologue and epilogue.  */
4310   if (rp_saved)
4311     {
4312       ret_off = TARGET_64BIT ? -16 : -20;
4313       if (frame_pointer_needed)
4314 	{
4315 	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4316 	  ret_off = 0;
4317 	}
4318       else
4319 	{
4320 	  /* No frame pointer, and stack is smaller than 8k.  */
4321 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4322 	    {
4323 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4324 	      ret_off = 0;
4325 	    }
4326 	}
4327     }
4328 
4329   /* General register restores.  */
4330   if (frame_pointer_needed)
4331     {
4332       offset = local_fsize;
4333 
4334       /* If the current function calls __builtin_eh_return, then we need
4335          to restore the saved EH data registers.  */
4336       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4337 	{
4338 	  unsigned int i, regno;
4339 
4340 	  for (i = 0; ; ++i)
4341 	    {
4342 	      regno = EH_RETURN_DATA_REGNO (i);
4343 	      if (regno == INVALID_REGNUM)
4344 		break;
4345 
4346 	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4347 	      offset += UNITS_PER_WORD;
4348 	    }
4349 	}
4350 
4351       for (i = 18; i >= 4; i--)
4352 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4353 	  {
4354 	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4355 	    offset += UNITS_PER_WORD;
4356 	  }
4357     }
4358   else
4359     {
4360       offset = local_fsize - actual_fsize;
4361 
4362       /* If the current function calls __builtin_eh_return, then we need
4363          to restore the saved EH data registers.  */
4364       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4365 	{
4366 	  unsigned int i, regno;
4367 
4368 	  for (i = 0; ; ++i)
4369 	    {
4370 	      regno = EH_RETURN_DATA_REGNO (i);
4371 	      if (regno == INVALID_REGNUM)
4372 		break;
4373 
4374 	      /* Only for the first load.
4375 	         merge_sp_adjust_with_load holds the register load
4376 	         with which we will merge the sp adjustment.  */
4377 	      if (merge_sp_adjust_with_load == 0
4378 		  && local_fsize == 0
4379 		  && VAL_14_BITS_P (-actual_fsize))
4380 	        merge_sp_adjust_with_load = regno;
4381 	      else
4382 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4383 	      offset += UNITS_PER_WORD;
4384 	    }
4385 	}
4386 
4387       for (i = 18; i >= 3; i--)
4388 	{
4389 	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4390 	    {
4391 	      /* Only for the first load.
4392 	         merge_sp_adjust_with_load holds the register load
4393 	         with which we will merge the sp adjustment.  */
4394 	      if (merge_sp_adjust_with_load == 0
4395 		  && local_fsize == 0
4396 		  && VAL_14_BITS_P (-actual_fsize))
4397 	        merge_sp_adjust_with_load = i;
4398 	      else
4399 		load_reg (i, offset, STACK_POINTER_REGNUM);
4400 	      offset += UNITS_PER_WORD;
4401 	    }
4402 	}
4403     }
4404 
4405   /* Align pointer properly (doubleword boundary).  */
4406   offset = (offset + 7) & ~7;
4407 
4408   /* FP register restores.  */
4409   if (save_fregs)
4410     {
4411       /* Adjust the register to index off of.  */
4412       if (frame_pointer_needed)
4413 	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4414       else
4415 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4416 
4417       /* Actually do the restores now.  */
4418       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4419 	if (df_regs_ever_live_p (i)
4420 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4421 	  {
4422 	    rtx src = gen_rtx_MEM (DFmode,
4423 				   gen_rtx_POST_INC (word_mode, tmpreg));
4424 	    rtx dest = gen_rtx_REG (DFmode, i);
4425 	    emit_move_insn (dest, src);
4426 	  }
4427     }
4428 
4429   /* Emit a blockage insn here to keep these insns from being moved to
4430      an earlier spot in the epilogue, or into the main instruction stream.
4431 
4432      This is necessary as we must not cut the stack back before all the
4433      restores are finished.  */
4434   emit_insn (gen_blockage ());
4435 
4436   /* Reset stack pointer (and possibly frame pointer).  The stack
4437      pointer is initially set to fp + 64 to avoid a race condition.  */
4438   if (frame_pointer_needed)
4439     {
4440       rtx delta = GEN_INT (-64);
4441 
4442       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4443       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4444 			       stack_pointer_rtx, delta));
4445     }
4446   /* If we were deferring a callee register restore, do it now.  */
4447   else if (merge_sp_adjust_with_load)
4448     {
4449       rtx delta = GEN_INT (-actual_fsize);
4450       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4451 
4452       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4453     }
4454   else if (actual_fsize != 0)
4455     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4456 		    - actual_fsize, 0);
4457 
4458   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4459      frame greater than 8k), do so now.  */
4460   if (ret_off != 0)
4461     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4462 
4463   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4464     {
4465       rtx sa = EH_RETURN_STACKADJ_RTX;
4466 
4467       emit_insn (gen_blockage ());
4468       emit_insn (TARGET_64BIT
4469 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4470 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4471     }
4472 }
4473 
4474 bool
4475 pa_can_use_return_insn (void)
4476 {
4477   if (!reload_completed)
4478     return false;
4479 
4480   if (frame_pointer_needed)
4481     return false;
4482 
4483   if (df_regs_ever_live_p (2))
4484     return false;
4485 
4486   if (crtl->profile)
4487     return false;
4488 
4489   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4490 }
4491 
4492 rtx
4493 hppa_pic_save_rtx (void)
4494 {
4495   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4496 }
4497 
4498 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4499 #define NO_DEFERRED_PROFILE_COUNTERS 0
4500 #endif
4501 
4502 
4503 /* Vector of funcdef numbers.  */
4504 static vec<int> funcdef_nos;
4505 
4506 /* Output deferred profile counters.  */
4507 static void
4508 output_deferred_profile_counters (void)
4509 {
4510   unsigned int i;
4511   int align, n;
4512 
4513   if (funcdef_nos.is_empty ())
4514    return;
4515 
4516   switch_to_section (data_section);
4517   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4518   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4519 
4520   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4521     {
4522       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4523       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4524     }
4525 
4526   funcdef_nos.release ();
4527 }
4528 
4529 void
4530 hppa_profile_hook (int label_no)
4531 {
4532   /* We use SImode for the address of the function in both 32 and
4533      64-bit code to avoid having to provide DImode versions of the
4534      lcla2 and load_offset_label_address insn patterns.  */
4535   rtx reg = gen_reg_rtx (SImode);
4536   rtx_code_label *label_rtx = gen_label_rtx ();
4537   rtx begin_label_rtx;
4538   rtx_insn *call_insn;
4539   char begin_label_name[16];
4540 
4541   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4542 			       label_no);
4543   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4544 
4545   if (TARGET_64BIT)
4546     emit_move_insn (arg_pointer_rtx,
4547 		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4548 				  GEN_INT (64)));
4549 
4550   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4551 
4552   /* The address of the function is loaded into %r25 with an instruction-
4553      relative sequence that avoids the use of relocations.  The sequence
4554      is split so that the load_offset_label_address instruction can
4555      occupy the delay slot of the call to _mcount.  */
4556   if (TARGET_PA_20)
4557     emit_insn (gen_lcla2 (reg, label_rtx));
4558   else
4559     emit_insn (gen_lcla1 (reg, label_rtx));
4560 
4561   emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4562 					    reg, begin_label_rtx, label_rtx));
4563 
4564 #if !NO_DEFERRED_PROFILE_COUNTERS
4565   {
4566     rtx count_label_rtx, addr, r24;
4567     char count_label_name[16];
4568 
4569     funcdef_nos.safe_push (label_no);
4570     ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4571     count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4572 
4573     addr = force_reg (Pmode, count_label_rtx);
4574     r24 = gen_rtx_REG (Pmode, 24);
4575     emit_move_insn (r24, addr);
4576 
4577     call_insn =
4578       emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4579 					     gen_rtx_SYMBOL_REF (Pmode,
4580 								 "_mcount")),
4581 				GEN_INT (TARGET_64BIT ? 24 : 12)));
4582 
4583     use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4584   }
4585 #else
4586 
4587   call_insn =
4588     emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4589 					   gen_rtx_SYMBOL_REF (Pmode,
4590 							       "_mcount")),
4591 			      GEN_INT (TARGET_64BIT ? 16 : 8)));
4592 
4593 #endif
4594 
4595   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4596   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4597 
4598   /* Indicate the _mcount call cannot throw, nor will it execute a
4599      non-local goto.  */
4600   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4601 }
4602 
4603 /* Fetch the return address for the frame COUNT steps up from
4604    the current frame, after the prologue.  FRAMEADDR is the
4605    frame pointer of the COUNT frame.
4606 
4607    We want to ignore any export stub remnants here.  To handle this,
4608    we examine the code at the return address, and if it is an export
4609    stub, we return a memory rtx for the stub return address stored
4610    at frame-24.
4611 
4612    The value returned is used in two different ways:
4613 
4614 	1. To find a function's caller.
4615 
4616 	2. To change the return address for a function.
4617 
4618    This function handles most instances of case 1; however, it will
4619    fail if there are two levels of stubs to execute on the return
4620    path.  The only way I believe that can happen is if the return value
4621    needs a parameter relocation, which never happens for C code.
4622 
4623    This function handles most instances of case 2; however, it will
4624    fail if we did not originally have stub code on the return path
4625    but will need stub code on the new return path.  This can happen if
4626    the caller & callee are both in the main program, but the new
4627    return location is in a shared library.  */
4628 
4629 rtx
4630 pa_return_addr_rtx (int count, rtx frameaddr)
4631 {
4632   rtx label;
4633   rtx rp;
4634   rtx saved_rp;
4635   rtx ins;
4636 
4637   /* The instruction stream at the return address of a PA1.X export stub is:
4638 
4639 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4640 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4641 	0x00011820 | stub+16:  mtsp r1,sr0
4642 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4643 
4644      0xe0400002 must be specified as -532676606 so that it won't be
4645      rejected as an invalid immediate operand on 64-bit hosts.
4646 
4647      The instruction stream at the return address of a PA2.0 export stub is:
4648 
4649 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4650 	0xe840d002 | stub+12:  bve,n (rp)
4651   */
4652 
4653   HOST_WIDE_INT insns[4];
4654   int i, len;
4655 
4656   if (count != 0)
4657     return NULL_RTX;
4658 
4659   rp = get_hard_reg_initial_val (Pmode, 2);
4660 
4661   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4662     return rp;
4663 
4664   /* If there is no export stub then just use the value saved from
4665      the return pointer register.  */
4666 
4667   saved_rp = gen_reg_rtx (Pmode);
4668   emit_move_insn (saved_rp, rp);
4669 
4670   /* Get pointer to the instruction stream.  We have to mask out the
4671      privilege level from the two low order bits of the return address
4672      pointer here so that ins will point to the start of the first
4673      instruction that would have been executed if we returned.  */
4674   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4675   label = gen_label_rtx ();
4676 
4677   if (TARGET_PA_20)
4678     {
4679       insns[0] = 0x4bc23fd1;
4680       insns[1] = -398405630;
4681       len = 2;
4682     }
4683   else
4684     {
4685       insns[0] = 0x4bc23fd1;
4686       insns[1] = 0x004010a1;
4687       insns[2] = 0x00011820;
4688       insns[3] = -532676606;
4689       len = 4;
4690     }
4691 
4692   /* Check the instruction stream at the normal return address for the
4693      export stub.  If it is an export stub, than our return address is
4694      really in -24[frameaddr].  */
4695 
4696   for (i = 0; i < len; i++)
4697     {
4698       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4699       rtx op1 = GEN_INT (insns[i]);
4700       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4701     }
4702 
4703   /* Here we know that our return address points to an export
4704      stub.  We don't want to return the address of the export stub,
4705      but rather the return address of the export stub.  That return
4706      address is stored at -24[frameaddr].  */
4707 
4708   emit_move_insn (saved_rp,
4709 		  gen_rtx_MEM (Pmode,
4710 			       memory_address (Pmode,
4711 					       plus_constant (Pmode, frameaddr,
4712 							      -24))));
4713 
4714   emit_label (label);
4715 
4716   return saved_rp;
4717 }
4718 
4719 void
4720 pa_emit_bcond_fp (rtx operands[])
4721 {
4722   enum rtx_code code = GET_CODE (operands[0]);
4723   rtx operand0 = operands[1];
4724   rtx operand1 = operands[2];
4725   rtx label = operands[3];
4726 
4727   emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4728 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4729 
4730   emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4731 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4732 						     gen_rtx_fmt_ee (NE,
4733 							      VOIDmode,
4734 							      gen_rtx_REG (CCFPmode, 0),
4735 							      const0_rtx),
4736 						     gen_rtx_LABEL_REF (VOIDmode, label),
4737 						     pc_rtx)));
4738 
4739 }
4740 
4741 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4742    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4743 
4744 static int
4745 pa_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4746 {
4747   enum attr_type attr_type;
4748 
4749   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4750      true dependencies as they are described with bypasses now.  */
4751   if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4752     return cost;
4753 
4754   if (! recog_memoized (insn))
4755     return 0;
4756 
4757   attr_type = get_attr_type (insn);
4758 
4759   switch (REG_NOTE_KIND (link))
4760     {
4761     case REG_DEP_ANTI:
4762       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4763 	 cycles later.  */
4764 
4765       if (attr_type == TYPE_FPLOAD)
4766 	{
4767 	  rtx pat = PATTERN (insn);
4768 	  rtx dep_pat = PATTERN (dep_insn);
4769 	  if (GET_CODE (pat) == PARALLEL)
4770 	    {
4771 	      /* This happens for the fldXs,mb patterns.  */
4772 	      pat = XVECEXP (pat, 0, 0);
4773 	    }
4774 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4775 	    /* If this happens, we have to extend this to schedule
4776 	       optimally.  Return 0 for now.  */
4777 	  return 0;
4778 
4779 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4780 	    {
4781 	      if (! recog_memoized (dep_insn))
4782 		return 0;
4783 	      switch (get_attr_type (dep_insn))
4784 		{
4785 		case TYPE_FPALU:
4786 		case TYPE_FPMULSGL:
4787 		case TYPE_FPMULDBL:
4788 		case TYPE_FPDIVSGL:
4789 		case TYPE_FPDIVDBL:
4790 		case TYPE_FPSQRTSGL:
4791 		case TYPE_FPSQRTDBL:
4792 		  /* A fpload can't be issued until one cycle before a
4793 		     preceding arithmetic operation has finished if
4794 		     the target of the fpload is any of the sources
4795 		     (or destination) of the arithmetic operation.  */
4796 		  return insn_default_latency (dep_insn) - 1;
4797 
4798 		default:
4799 		  return 0;
4800 		}
4801 	    }
4802 	}
4803       else if (attr_type == TYPE_FPALU)
4804 	{
4805 	  rtx pat = PATTERN (insn);
4806 	  rtx dep_pat = PATTERN (dep_insn);
4807 	  if (GET_CODE (pat) == PARALLEL)
4808 	    {
4809 	      /* This happens for the fldXs,mb patterns.  */
4810 	      pat = XVECEXP (pat, 0, 0);
4811 	    }
4812 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4813 	    /* If this happens, we have to extend this to schedule
4814 	       optimally.  Return 0 for now.  */
4815 	  return 0;
4816 
4817 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4818 	    {
4819 	      if (! recog_memoized (dep_insn))
4820 		return 0;
4821 	      switch (get_attr_type (dep_insn))
4822 		{
4823 		case TYPE_FPDIVSGL:
4824 		case TYPE_FPDIVDBL:
4825 		case TYPE_FPSQRTSGL:
4826 		case TYPE_FPSQRTDBL:
4827 		  /* An ALU flop can't be issued until two cycles before a
4828 		     preceding divide or sqrt operation has finished if
4829 		     the target of the ALU flop is any of the sources
4830 		     (or destination) of the divide or sqrt operation.  */
4831 		  return insn_default_latency (dep_insn) - 2;
4832 
4833 		default:
4834 		  return 0;
4835 		}
4836 	    }
4837 	}
4838 
4839       /* For other anti dependencies, the cost is 0.  */
4840       return 0;
4841 
4842     case REG_DEP_OUTPUT:
4843       /* Output dependency; DEP_INSN writes a register that INSN writes some
4844 	 cycles later.  */
4845       if (attr_type == TYPE_FPLOAD)
4846 	{
4847 	  rtx pat = PATTERN (insn);
4848 	  rtx dep_pat = PATTERN (dep_insn);
4849 	  if (GET_CODE (pat) == PARALLEL)
4850 	    {
4851 	      /* This happens for the fldXs,mb patterns.  */
4852 	      pat = XVECEXP (pat, 0, 0);
4853 	    }
4854 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4855 	    /* If this happens, we have to extend this to schedule
4856 	       optimally.  Return 0 for now.  */
4857 	  return 0;
4858 
4859 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4860 	    {
4861 	      if (! recog_memoized (dep_insn))
4862 		return 0;
4863 	      switch (get_attr_type (dep_insn))
4864 		{
4865 		case TYPE_FPALU:
4866 		case TYPE_FPMULSGL:
4867 		case TYPE_FPMULDBL:
4868 		case TYPE_FPDIVSGL:
4869 		case TYPE_FPDIVDBL:
4870 		case TYPE_FPSQRTSGL:
4871 		case TYPE_FPSQRTDBL:
4872 		  /* A fpload can't be issued until one cycle before a
4873 		     preceding arithmetic operation has finished if
4874 		     the target of the fpload is the destination of the
4875 		     arithmetic operation.
4876 
4877 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
4878 		     is 3 cycles, unless they bundle together.   We also
4879 		     pay the penalty if the second insn is a fpload.  */
4880 		  return insn_default_latency (dep_insn) - 1;
4881 
4882 		default:
4883 		  return 0;
4884 		}
4885 	    }
4886 	}
4887       else if (attr_type == TYPE_FPALU)
4888 	{
4889 	  rtx pat = PATTERN (insn);
4890 	  rtx dep_pat = PATTERN (dep_insn);
4891 	  if (GET_CODE (pat) == PARALLEL)
4892 	    {
4893 	      /* This happens for the fldXs,mb patterns.  */
4894 	      pat = XVECEXP (pat, 0, 0);
4895 	    }
4896 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4897 	    /* If this happens, we have to extend this to schedule
4898 	       optimally.  Return 0 for now.  */
4899 	  return 0;
4900 
4901 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4902 	    {
4903 	      if (! recog_memoized (dep_insn))
4904 		return 0;
4905 	      switch (get_attr_type (dep_insn))
4906 		{
4907 		case TYPE_FPDIVSGL:
4908 		case TYPE_FPDIVDBL:
4909 		case TYPE_FPSQRTSGL:
4910 		case TYPE_FPSQRTDBL:
4911 		  /* An ALU flop can't be issued until two cycles before a
4912 		     preceding divide or sqrt operation has finished if
4913 		     the target of the ALU flop is also the target of
4914 		     the divide or sqrt operation.  */
4915 		  return insn_default_latency (dep_insn) - 2;
4916 
4917 		default:
4918 		  return 0;
4919 		}
4920 	    }
4921 	}
4922 
4923       /* For other output dependencies, the cost is 0.  */
4924       return 0;
4925 
4926     default:
4927       gcc_unreachable ();
4928     }
4929 }
4930 
4931 /* Adjust scheduling priorities.  We use this to try and keep addil
4932    and the next use of %r1 close together.  */
4933 static int
4934 pa_adjust_priority (rtx_insn *insn, int priority)
4935 {
4936   rtx set = single_set (insn);
4937   rtx src, dest;
4938   if (set)
4939     {
4940       src = SET_SRC (set);
4941       dest = SET_DEST (set);
4942       if (GET_CODE (src) == LO_SUM
4943 	  && symbolic_operand (XEXP (src, 1), VOIDmode)
4944 	  && ! read_only_operand (XEXP (src, 1), VOIDmode))
4945 	priority >>= 3;
4946 
4947       else if (GET_CODE (src) == MEM
4948 	       && GET_CODE (XEXP (src, 0)) == LO_SUM
4949 	       && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4950 	       && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4951 	priority >>= 1;
4952 
4953       else if (GET_CODE (dest) == MEM
4954 	       && GET_CODE (XEXP (dest, 0)) == LO_SUM
4955 	       && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4956 	       && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4957 	priority >>= 3;
4958     }
4959   return priority;
4960 }
4961 
4962 /* The 700 can only issue a single insn at a time.
4963    The 7XXX processors can issue two insns at a time.
4964    The 8000 can issue 4 insns at a time.  */
4965 static int
4966 pa_issue_rate (void)
4967 {
4968   switch (pa_cpu)
4969     {
4970     case PROCESSOR_700:		return 1;
4971     case PROCESSOR_7100:	return 2;
4972     case PROCESSOR_7100LC:	return 2;
4973     case PROCESSOR_7200:	return 2;
4974     case PROCESSOR_7300:	return 2;
4975     case PROCESSOR_8000:	return 4;
4976 
4977     default:
4978       gcc_unreachable ();
4979     }
4980 }
4981 
4982 
4983 
4984 /* Return any length plus adjustment needed by INSN which already has
4985    its length computed as LENGTH.   Return LENGTH if no adjustment is
4986    necessary.
4987 
4988    Also compute the length of an inline block move here as it is too
4989    complicated to express as a length attribute in pa.md.  */
4990 int
4991 pa_adjust_insn_length (rtx_insn *insn, int length)
4992 {
4993   rtx pat = PATTERN (insn);
4994 
4995   /* If length is negative or undefined, provide initial length.  */
4996   if ((unsigned int) length >= INT_MAX)
4997     {
4998       if (GET_CODE (pat) == SEQUENCE)
4999 	insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5000 
5001       switch (get_attr_type (insn))
5002 	{
5003 	case TYPE_MILLI:
5004 	  length = pa_attr_length_millicode_call (insn);
5005 	  break;
5006 	case TYPE_CALL:
5007 	  length = pa_attr_length_call (insn, 0);
5008 	  break;
5009 	case TYPE_SIBCALL:
5010 	  length = pa_attr_length_call (insn, 1);
5011 	  break;
5012 	case TYPE_DYNCALL:
5013 	  length = pa_attr_length_indirect_call (insn);
5014 	  break;
5015 	case TYPE_SH_FUNC_ADRS:
5016 	  length = pa_attr_length_millicode_call (insn) + 20;
5017 	  break;
5018 	default:
5019 	  gcc_unreachable ();
5020 	}
5021     }
5022 
5023   /* Block move pattern.  */
5024   if (NONJUMP_INSN_P (insn)
5025       && GET_CODE (pat) == PARALLEL
5026       && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5027       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5028       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5029       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5030       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5031     length += compute_movmem_length (insn) - 4;
5032   /* Block clear pattern.  */
5033   else if (NONJUMP_INSN_P (insn)
5034 	   && GET_CODE (pat) == PARALLEL
5035 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5036 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5037 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5038 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5039     length += compute_clrmem_length (insn) - 4;
5040   /* Conditional branch with an unfilled delay slot.  */
5041   else if (JUMP_P (insn) && ! simplejump_p (insn))
5042     {
5043       /* Adjust a short backwards conditional with an unfilled delay slot.  */
5044       if (GET_CODE (pat) == SET
5045 	  && length == 4
5046 	  && JUMP_LABEL (insn) != NULL_RTX
5047 	  && ! forward_branch_p (insn))
5048 	length += 4;
5049       else if (GET_CODE (pat) == PARALLEL
5050 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5051 	       && length == 4)
5052 	length += 4;
5053       /* Adjust dbra insn with short backwards conditional branch with
5054 	 unfilled delay slot -- only for case where counter is in a
5055 	 general register register.  */
5056       else if (GET_CODE (pat) == PARALLEL
5057 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5058 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5059  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5060 	       && length == 4
5061 	       && ! forward_branch_p (insn))
5062 	length += 4;
5063     }
5064   return length;
5065 }
5066 
5067 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5068 
5069 static bool
5070 pa_print_operand_punct_valid_p (unsigned char code)
5071 {
5072   if (code == '@'
5073       || code == '#'
5074       || code == '*'
5075       || code == '^')
5076     return true;
5077 
5078   return false;
5079 }
5080 
5081 /* Print operand X (an rtx) in assembler syntax to file FILE.
5082    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5083    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5084 
5085 void
5086 pa_print_operand (FILE *file, rtx x, int code)
5087 {
5088   switch (code)
5089     {
5090     case '#':
5091       /* Output a 'nop' if there's nothing for the delay slot.  */
5092       if (dbr_sequence_length () == 0)
5093 	fputs ("\n\tnop", file);
5094       return;
5095     case '*':
5096       /* Output a nullification completer if there's nothing for the */
5097       /* delay slot or nullification is requested.  */
5098       if (dbr_sequence_length () == 0 ||
5099 	  (final_sequence &&
5100 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5101         fputs (",n", file);
5102       return;
5103     case 'R':
5104       /* Print out the second register name of a register pair.
5105 	 I.e., R (6) => 7.  */
5106       fputs (reg_names[REGNO (x) + 1], file);
5107       return;
5108     case 'r':
5109       /* A register or zero.  */
5110       if (x == const0_rtx
5111 	  || (x == CONST0_RTX (DFmode))
5112 	  || (x == CONST0_RTX (SFmode)))
5113 	{
5114 	  fputs ("%r0", file);
5115 	  return;
5116 	}
5117       else
5118 	break;
5119     case 'f':
5120       /* A register or zero (floating point).  */
5121       if (x == const0_rtx
5122 	  || (x == CONST0_RTX (DFmode))
5123 	  || (x == CONST0_RTX (SFmode)))
5124 	{
5125 	  fputs ("%fr0", file);
5126 	  return;
5127 	}
5128       else
5129 	break;
5130     case 'A':
5131       {
5132 	rtx xoperands[2];
5133 
5134 	xoperands[0] = XEXP (XEXP (x, 0), 0);
5135 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5136 	pa_output_global_address (file, xoperands[1], 0);
5137         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5138 	return;
5139       }
5140 
5141     case 'C':			/* Plain (C)ondition */
5142     case 'X':
5143       switch (GET_CODE (x))
5144 	{
5145 	case EQ:
5146 	  fputs ("=", file);  break;
5147 	case NE:
5148 	  fputs ("<>", file);  break;
5149 	case GT:
5150 	  fputs (">", file);  break;
5151 	case GE:
5152 	  fputs (">=", file);  break;
5153 	case GEU:
5154 	  fputs (">>=", file);  break;
5155 	case GTU:
5156 	  fputs (">>", file);  break;
5157 	case LT:
5158 	  fputs ("<", file);  break;
5159 	case LE:
5160 	  fputs ("<=", file);  break;
5161 	case LEU:
5162 	  fputs ("<<=", file);  break;
5163 	case LTU:
5164 	  fputs ("<<", file);  break;
5165 	default:
5166 	  gcc_unreachable ();
5167 	}
5168       return;
5169     case 'N':			/* Condition, (N)egated */
5170       switch (GET_CODE (x))
5171 	{
5172 	case EQ:
5173 	  fputs ("<>", file);  break;
5174 	case NE:
5175 	  fputs ("=", file);  break;
5176 	case GT:
5177 	  fputs ("<=", file);  break;
5178 	case GE:
5179 	  fputs ("<", file);  break;
5180 	case GEU:
5181 	  fputs ("<<", file);  break;
5182 	case GTU:
5183 	  fputs ("<<=", file);  break;
5184 	case LT:
5185 	  fputs (">=", file);  break;
5186 	case LE:
5187 	  fputs (">", file);  break;
5188 	case LEU:
5189 	  fputs (">>", file);  break;
5190 	case LTU:
5191 	  fputs (">>=", file);  break;
5192 	default:
5193 	  gcc_unreachable ();
5194 	}
5195       return;
5196     /* For floating point comparisons.  Note that the output
5197        predicates are the complement of the desired mode.  The
5198        conditions for GT, GE, LT, LE and LTGT cause an invalid
5199        operation exception if the result is unordered and this
5200        exception is enabled in the floating-point status register.  */
5201     case 'Y':
5202       switch (GET_CODE (x))
5203 	{
5204 	case EQ:
5205 	  fputs ("!=", file);  break;
5206 	case NE:
5207 	  fputs ("=", file);  break;
5208 	case GT:
5209 	  fputs ("!>", file);  break;
5210 	case GE:
5211 	  fputs ("!>=", file);  break;
5212 	case LT:
5213 	  fputs ("!<", file);  break;
5214 	case LE:
5215 	  fputs ("!<=", file);  break;
5216 	case LTGT:
5217 	  fputs ("!<>", file);  break;
5218 	case UNLE:
5219 	  fputs ("!?<=", file);  break;
5220 	case UNLT:
5221 	  fputs ("!?<", file);  break;
5222 	case UNGE:
5223 	  fputs ("!?>=", file);  break;
5224 	case UNGT:
5225 	  fputs ("!?>", file);  break;
5226 	case UNEQ:
5227 	  fputs ("!?=", file);  break;
5228 	case UNORDERED:
5229 	  fputs ("!?", file);  break;
5230 	case ORDERED:
5231 	  fputs ("?", file);  break;
5232 	default:
5233 	  gcc_unreachable ();
5234 	}
5235       return;
5236     case 'S':			/* Condition, operands are (S)wapped.  */
5237       switch (GET_CODE (x))
5238 	{
5239 	case EQ:
5240 	  fputs ("=", file);  break;
5241 	case NE:
5242 	  fputs ("<>", file);  break;
5243 	case GT:
5244 	  fputs ("<", file);  break;
5245 	case GE:
5246 	  fputs ("<=", file);  break;
5247 	case GEU:
5248 	  fputs ("<<=", file);  break;
5249 	case GTU:
5250 	  fputs ("<<", file);  break;
5251 	case LT:
5252 	  fputs (">", file);  break;
5253 	case LE:
5254 	  fputs (">=", file);  break;
5255 	case LEU:
5256 	  fputs (">>=", file);  break;
5257 	case LTU:
5258 	  fputs (">>", file);  break;
5259 	default:
5260 	  gcc_unreachable ();
5261 	}
5262       return;
5263     case 'B':			/* Condition, (B)oth swapped and negate.  */
5264       switch (GET_CODE (x))
5265 	{
5266 	case EQ:
5267 	  fputs ("<>", file);  break;
5268 	case NE:
5269 	  fputs ("=", file);  break;
5270 	case GT:
5271 	  fputs (">=", file);  break;
5272 	case GE:
5273 	  fputs (">", file);  break;
5274 	case GEU:
5275 	  fputs (">>", file);  break;
5276 	case GTU:
5277 	  fputs (">>=", file);  break;
5278 	case LT:
5279 	  fputs ("<=", file);  break;
5280 	case LE:
5281 	  fputs ("<", file);  break;
5282 	case LEU:
5283 	  fputs ("<<", file);  break;
5284 	case LTU:
5285 	  fputs ("<<=", file);  break;
5286 	default:
5287 	  gcc_unreachable ();
5288 	}
5289       return;
5290     case 'k':
5291       gcc_assert (GET_CODE (x) == CONST_INT);
5292       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5293       return;
5294     case 'Q':
5295       gcc_assert (GET_CODE (x) == CONST_INT);
5296       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5297       return;
5298     case 'L':
5299       gcc_assert (GET_CODE (x) == CONST_INT);
5300       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5301       return;
5302     case 'O':
5303       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5304       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5305       return;
5306     case 'p':
5307       gcc_assert (GET_CODE (x) == CONST_INT);
5308       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5309       return;
5310     case 'P':
5311       gcc_assert (GET_CODE (x) == CONST_INT);
5312       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5313       return;
5314     case 'I':
5315       if (GET_CODE (x) == CONST_INT)
5316 	fputs ("i", file);
5317       return;
5318     case 'M':
5319     case 'F':
5320       switch (GET_CODE (XEXP (x, 0)))
5321 	{
5322 	case PRE_DEC:
5323 	case PRE_INC:
5324 	  if (ASSEMBLER_DIALECT == 0)
5325 	    fputs ("s,mb", file);
5326 	  else
5327 	    fputs (",mb", file);
5328 	  break;
5329 	case POST_DEC:
5330 	case POST_INC:
5331 	  if (ASSEMBLER_DIALECT == 0)
5332 	    fputs ("s,ma", file);
5333 	  else
5334 	    fputs (",ma", file);
5335 	  break;
5336 	case PLUS:
5337 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5338 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5339 	    {
5340 	      if (ASSEMBLER_DIALECT == 0)
5341 		fputs ("x", file);
5342 	    }
5343 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5344 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5345 	    {
5346 	      if (ASSEMBLER_DIALECT == 0)
5347 		fputs ("x,s", file);
5348 	      else
5349 		fputs (",s", file);
5350 	    }
5351 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5352 	    fputs ("s", file);
5353 	  break;
5354 	default:
5355 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5356 	    fputs ("s", file);
5357 	  break;
5358 	}
5359       return;
5360     case 'G':
5361       pa_output_global_address (file, x, 0);
5362       return;
5363     case 'H':
5364       pa_output_global_address (file, x, 1);
5365       return;
5366     case 0:			/* Don't do anything special */
5367       break;
5368     case 'Z':
5369       {
5370 	unsigned op[3];
5371 	compute_zdepwi_operands (INTVAL (x), op);
5372 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5373 	return;
5374       }
5375     case 'z':
5376       {
5377 	unsigned op[3];
5378 	compute_zdepdi_operands (INTVAL (x), op);
5379 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5380 	return;
5381       }
5382     case 'c':
5383       /* We can get here from a .vtable_inherit due to our
5384 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5385 	 addresses.  */
5386       break;
5387     default:
5388       gcc_unreachable ();
5389     }
5390   if (GET_CODE (x) == REG)
5391     {
5392       fputs (reg_names [REGNO (x)], file);
5393       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5394 	{
5395 	  fputs ("R", file);
5396 	  return;
5397 	}
5398       if (FP_REG_P (x)
5399 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5400 	  && (REGNO (x) & 1) == 0)
5401 	fputs ("L", file);
5402     }
5403   else if (GET_CODE (x) == MEM)
5404     {
5405       int size = GET_MODE_SIZE (GET_MODE (x));
5406       rtx base = NULL_RTX;
5407       switch (GET_CODE (XEXP (x, 0)))
5408 	{
5409 	case PRE_DEC:
5410 	case POST_DEC:
5411           base = XEXP (XEXP (x, 0), 0);
5412 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5413 	  break;
5414 	case PRE_INC:
5415 	case POST_INC:
5416           base = XEXP (XEXP (x, 0), 0);
5417 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5418 	  break;
5419 	case PLUS:
5420 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5421 	    fprintf (file, "%s(%s)",
5422 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5423 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5424 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5425 	    fprintf (file, "%s(%s)",
5426 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5427 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5428 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5429 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5430 	    {
5431 	      /* Because the REG_POINTER flag can get lost during reload,
5432 		 pa_legitimate_address_p canonicalizes the order of the
5433 		 index and base registers in the combined move patterns.  */
5434 	      rtx base = XEXP (XEXP (x, 0), 1);
5435 	      rtx index = XEXP (XEXP (x, 0), 0);
5436 
5437 	      fprintf (file, "%s(%s)",
5438 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5439 	    }
5440 	  else
5441 	    output_address (XEXP (x, 0));
5442 	  break;
5443 	default:
5444 	  output_address (XEXP (x, 0));
5445 	  break;
5446 	}
5447     }
5448   else
5449     output_addr_const (file, x);
5450 }
5451 
5452 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5453 
5454 void
5455 pa_output_global_address (FILE *file, rtx x, int round_constant)
5456 {
5457 
5458   /* Imagine  (high (const (plus ...))).  */
5459   if (GET_CODE (x) == HIGH)
5460     x = XEXP (x, 0);
5461 
5462   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5463     output_addr_const (file, x);
5464   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5465     {
5466       output_addr_const (file, x);
5467       fputs ("-$global$", file);
5468     }
5469   else if (GET_CODE (x) == CONST)
5470     {
5471       const char *sep = "";
5472       int offset = 0;		/* assembler wants -$global$ at end */
5473       rtx base = NULL_RTX;
5474 
5475       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5476 	{
5477 	case LABEL_REF:
5478 	case SYMBOL_REF:
5479 	  base = XEXP (XEXP (x, 0), 0);
5480 	  output_addr_const (file, base);
5481 	  break;
5482 	case CONST_INT:
5483 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5484 	  break;
5485 	default:
5486 	  gcc_unreachable ();
5487 	}
5488 
5489       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5490 	{
5491 	case LABEL_REF:
5492 	case SYMBOL_REF:
5493 	  base = XEXP (XEXP (x, 0), 1);
5494 	  output_addr_const (file, base);
5495 	  break;
5496 	case CONST_INT:
5497 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5498 	  break;
5499 	default:
5500 	  gcc_unreachable ();
5501 	}
5502 
5503       /* How bogus.  The compiler is apparently responsible for
5504 	 rounding the constant if it uses an LR field selector.
5505 
5506 	 The linker and/or assembler seem a better place since
5507 	 they have to do this kind of thing already.
5508 
5509 	 If we fail to do this, HP's optimizing linker may eliminate
5510 	 an addil, but not update the ldw/stw/ldo instruction that
5511 	 uses the result of the addil.  */
5512       if (round_constant)
5513 	offset = ((offset + 0x1000) & ~0x1fff);
5514 
5515       switch (GET_CODE (XEXP (x, 0)))
5516 	{
5517 	case PLUS:
5518 	  if (offset < 0)
5519 	    {
5520 	      offset = -offset;
5521 	      sep = "-";
5522 	    }
5523 	  else
5524 	    sep = "+";
5525 	  break;
5526 
5527 	case MINUS:
5528 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5529 	  sep = "-";
5530 	  break;
5531 
5532 	default:
5533 	  gcc_unreachable ();
5534 	}
5535 
5536       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5537 	fputs ("-$global$", file);
5538       if (offset)
5539 	fprintf (file, "%s%d", sep, offset);
5540     }
5541   else
5542     output_addr_const (file, x);
5543 }
5544 
5545 /* Output boilerplate text to appear at the beginning of the file.
5546    There are several possible versions.  */
5547 #define aputs(x) fputs(x, asm_out_file)
5548 static inline void
5549 pa_file_start_level (void)
5550 {
5551   if (TARGET_64BIT)
5552     aputs ("\t.LEVEL 2.0w\n");
5553   else if (TARGET_PA_20)
5554     aputs ("\t.LEVEL 2.0\n");
5555   else if (TARGET_PA_11)
5556     aputs ("\t.LEVEL 1.1\n");
5557   else
5558     aputs ("\t.LEVEL 1.0\n");
5559 }
5560 
5561 static inline void
5562 pa_file_start_space (int sortspace)
5563 {
5564   aputs ("\t.SPACE $PRIVATE$");
5565   if (sortspace)
5566     aputs (",SORT=16");
5567   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5568   if (flag_tm)
5569     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5570   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5571 	 "\n\t.SPACE $TEXT$");
5572   if (sortspace)
5573     aputs (",SORT=8");
5574   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5575 	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5576 }
5577 
5578 static inline void
5579 pa_file_start_file (int want_version)
5580 {
5581   if (write_symbols != NO_DEBUG)
5582     {
5583       output_file_directive (asm_out_file, main_input_filename);
5584       if (want_version)
5585 	aputs ("\t.version\t\"01.01\"\n");
5586     }
5587 }
5588 
5589 static inline void
5590 pa_file_start_mcount (const char *aswhat)
5591 {
5592   if (profile_flag)
5593     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5594 }
5595 
5596 static void
5597 pa_elf_file_start (void)
5598 {
5599   pa_file_start_level ();
5600   pa_file_start_mcount ("ENTRY");
5601   pa_file_start_file (0);
5602 }
5603 
5604 static void
5605 pa_som_file_start (void)
5606 {
5607   pa_file_start_level ();
5608   pa_file_start_space (0);
5609   aputs ("\t.IMPORT $global$,DATA\n"
5610          "\t.IMPORT $$dyncall,MILLICODE\n");
5611   pa_file_start_mcount ("CODE");
5612   pa_file_start_file (0);
5613 }
5614 
5615 static void
5616 pa_linux_file_start (void)
5617 {
5618   pa_file_start_file (0);
5619   pa_file_start_level ();
5620   pa_file_start_mcount ("CODE");
5621 }
5622 
5623 static void
5624 pa_hpux64_gas_file_start (void)
5625 {
5626   pa_file_start_level ();
5627 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5628   if (profile_flag)
5629     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5630 #endif
5631   pa_file_start_file (1);
5632 }
5633 
5634 static void
5635 pa_hpux64_hpas_file_start (void)
5636 {
5637   pa_file_start_level ();
5638   pa_file_start_space (1);
5639   pa_file_start_mcount ("CODE");
5640   pa_file_start_file (0);
5641 }
5642 #undef aputs
5643 
5644 /* Search the deferred plabel list for SYMBOL and return its internal
5645    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5646 
5647 rtx
5648 pa_get_deferred_plabel (rtx symbol)
5649 {
5650   const char *fname = XSTR (symbol, 0);
5651   size_t i;
5652 
5653   /* See if we have already put this function on the list of deferred
5654      plabels.  This list is generally small, so a liner search is not
5655      too ugly.  If it proves too slow replace it with something faster.  */
5656   for (i = 0; i < n_deferred_plabels; i++)
5657     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5658       break;
5659 
5660   /* If the deferred plabel list is empty, or this entry was not found
5661      on the list, create a new entry on the list.  */
5662   if (deferred_plabels == NULL || i == n_deferred_plabels)
5663     {
5664       tree id;
5665 
5666       if (deferred_plabels == 0)
5667 	deferred_plabels =  ggc_alloc<deferred_plabel> ();
5668       else
5669         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5670                                           deferred_plabels,
5671                                           n_deferred_plabels + 1);
5672 
5673       i = n_deferred_plabels++;
5674       deferred_plabels[i].internal_label = gen_label_rtx ();
5675       deferred_plabels[i].symbol = symbol;
5676 
5677       /* Gross.  We have just implicitly taken the address of this
5678 	 function.  Mark it in the same manner as assemble_name.  */
5679       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5680       if (id)
5681 	mark_referenced (id);
5682     }
5683 
5684   return deferred_plabels[i].internal_label;
5685 }
5686 
5687 static void
5688 output_deferred_plabels (void)
5689 {
5690   size_t i;
5691 
5692   /* If we have some deferred plabels, then we need to switch into the
5693      data or readonly data section, and align it to a 4 byte boundary
5694      before outputting the deferred plabels.  */
5695   if (n_deferred_plabels)
5696     {
5697       switch_to_section (flag_pic ? data_section : readonly_data_section);
5698       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5699     }
5700 
5701   /* Now output the deferred plabels.  */
5702   for (i = 0; i < n_deferred_plabels; i++)
5703     {
5704       targetm.asm_out.internal_label (asm_out_file, "L",
5705 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5706       assemble_integer (deferred_plabels[i].symbol,
5707 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5708     }
5709 }
5710 
5711 /* Initialize optabs to point to emulation routines.  */
5712 
5713 static void
5714 pa_init_libfuncs (void)
5715 {
5716   if (HPUX_LONG_DOUBLE_LIBRARY)
5717     {
5718       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5719       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5720       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5721       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5722       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5723       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5724       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5725       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5726       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5727 
5728       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5729       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5730       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5731       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5732       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5733       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5734       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5735 
5736       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5737       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5738       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5739       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5740 
5741       set_conv_libfunc (sfix_optab, SImode, TFmode,
5742 			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5743 				     : "_U_Qfcnvfxt_quad_to_sgl");
5744       set_conv_libfunc (sfix_optab, DImode, TFmode,
5745 			"_U_Qfcnvfxt_quad_to_dbl");
5746       set_conv_libfunc (ufix_optab, SImode, TFmode,
5747 			"_U_Qfcnvfxt_quad_to_usgl");
5748       set_conv_libfunc (ufix_optab, DImode, TFmode,
5749 			"_U_Qfcnvfxt_quad_to_udbl");
5750 
5751       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5752 			"_U_Qfcnvxf_sgl_to_quad");
5753       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5754 			"_U_Qfcnvxf_dbl_to_quad");
5755       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5756 			"_U_Qfcnvxf_usgl_to_quad");
5757       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5758 			"_U_Qfcnvxf_udbl_to_quad");
5759     }
5760 
5761   if (TARGET_SYNC_LIBCALL)
5762     init_sync_libfuncs (8);
5763 }
5764 
5765 /* HP's millicode routines mean something special to the assembler.
5766    Keep track of which ones we have used.  */
5767 
5768 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5769 static void import_milli (enum millicodes);
5770 static char imported[(int) end1000];
5771 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5772 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5773 #define MILLI_START 10
5774 
5775 static void
5776 import_milli (enum millicodes code)
5777 {
5778   char str[sizeof (import_string)];
5779 
5780   if (!imported[(int) code])
5781     {
5782       imported[(int) code] = 1;
5783       strcpy (str, import_string);
5784       strncpy (str + MILLI_START, milli_names[(int) code], 4);
5785       output_asm_insn (str, 0);
5786     }
5787 }
5788 
5789 /* The register constraints have put the operands and return value in
5790    the proper registers.  */
5791 
5792 const char *
5793 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5794 {
5795   import_milli (mulI);
5796   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5797 }
5798 
5799 /* Emit the rtl for doing a division by a constant.  */
5800 
5801 /* Do magic division millicodes exist for this value? */
5802 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5803 
5804 /* We'll use an array to keep track of the magic millicodes and
5805    whether or not we've used them already. [n][0] is signed, [n][1] is
5806    unsigned.  */
5807 
5808 static int div_milli[16][2];
5809 
5810 int
5811 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5812 {
5813   if (GET_CODE (operands[2]) == CONST_INT
5814       && INTVAL (operands[2]) > 0
5815       && INTVAL (operands[2]) < 16
5816       && pa_magic_milli[INTVAL (operands[2])])
5817     {
5818       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5819 
5820       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5821       emit
5822 	(gen_rtx_PARALLEL
5823 	 (VOIDmode,
5824 	  gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5825 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5826 						     SImode,
5827 						     gen_rtx_REG (SImode, 26),
5828 						     operands[2])),
5829 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5830 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5831 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5832 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5833 		     gen_rtx_CLOBBER (VOIDmode, ret))));
5834       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5835       return 1;
5836     }
5837   return 0;
5838 }
5839 
5840 const char *
5841 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5842 {
5843   HOST_WIDE_INT divisor;
5844 
5845   /* If the divisor is a constant, try to use one of the special
5846      opcodes .*/
5847   if (GET_CODE (operands[0]) == CONST_INT)
5848     {
5849       static char buf[100];
5850       divisor = INTVAL (operands[0]);
5851       if (!div_milli[divisor][unsignedp])
5852 	{
5853 	  div_milli[divisor][unsignedp] = 1;
5854 	  if (unsignedp)
5855 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5856 	  else
5857 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5858 	}
5859       if (unsignedp)
5860 	{
5861 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5862 		   INTVAL (operands[0]));
5863 	  return pa_output_millicode_call (insn,
5864 					   gen_rtx_SYMBOL_REF (SImode, buf));
5865 	}
5866       else
5867 	{
5868 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5869 		   INTVAL (operands[0]));
5870 	  return pa_output_millicode_call (insn,
5871 					   gen_rtx_SYMBOL_REF (SImode, buf));
5872 	}
5873     }
5874   /* Divisor isn't a special constant.  */
5875   else
5876     {
5877       if (unsignedp)
5878 	{
5879 	  import_milli (divU);
5880 	  return pa_output_millicode_call (insn,
5881 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5882 	}
5883       else
5884 	{
5885 	  import_milli (divI);
5886 	  return pa_output_millicode_call (insn,
5887 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5888 	}
5889     }
5890 }
5891 
5892 /* Output a $$rem millicode to do mod.  */
5893 
5894 const char *
5895 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5896 {
5897   if (unsignedp)
5898     {
5899       import_milli (remU);
5900       return pa_output_millicode_call (insn,
5901 				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5902     }
5903   else
5904     {
5905       import_milli (remI);
5906       return pa_output_millicode_call (insn,
5907 				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5908     }
5909 }
5910 
5911 void
5912 pa_output_arg_descriptor (rtx_insn *call_insn)
5913 {
5914   const char *arg_regs[4];
5915   machine_mode arg_mode;
5916   rtx link;
5917   int i, output_flag = 0;
5918   int regno;
5919 
5920   /* We neither need nor want argument location descriptors for the
5921      64bit runtime environment or the ELF32 environment.  */
5922   if (TARGET_64BIT || TARGET_ELF32)
5923     return;
5924 
5925   for (i = 0; i < 4; i++)
5926     arg_regs[i] = 0;
5927 
5928   /* Specify explicitly that no argument relocations should take place
5929      if using the portable runtime calling conventions.  */
5930   if (TARGET_PORTABLE_RUNTIME)
5931     {
5932       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5933 	     asm_out_file);
5934       return;
5935     }
5936 
5937   gcc_assert (CALL_P (call_insn));
5938   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5939        link; link = XEXP (link, 1))
5940     {
5941       rtx use = XEXP (link, 0);
5942 
5943       if (! (GET_CODE (use) == USE
5944 	     && GET_CODE (XEXP (use, 0)) == REG
5945 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5946 	continue;
5947 
5948       arg_mode = GET_MODE (XEXP (use, 0));
5949       regno = REGNO (XEXP (use, 0));
5950       if (regno >= 23 && regno <= 26)
5951 	{
5952 	  arg_regs[26 - regno] = "GR";
5953 	  if (arg_mode == DImode)
5954 	    arg_regs[25 - regno] = "GR";
5955 	}
5956       else if (regno >= 32 && regno <= 39)
5957 	{
5958 	  if (arg_mode == SFmode)
5959 	    arg_regs[(regno - 32) / 2] = "FR";
5960 	  else
5961 	    {
5962 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5963 	      arg_regs[(regno - 34) / 2] = "FR";
5964 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
5965 #else
5966 	      arg_regs[(regno - 34) / 2] = "FU";
5967 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
5968 #endif
5969 	    }
5970 	}
5971     }
5972   fputs ("\t.CALL ", asm_out_file);
5973   for (i = 0; i < 4; i++)
5974     {
5975       if (arg_regs[i])
5976 	{
5977 	  if (output_flag++)
5978 	    fputc (',', asm_out_file);
5979 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5980 	}
5981     }
5982   fputc ('\n', asm_out_file);
5983 }
5984 
5985 /* Inform reload about cases where moving X with a mode MODE to or from
5986    a register in RCLASS requires an extra scratch or immediate register.
5987    Return the class needed for the immediate register.  */
5988 
5989 static reg_class_t
5990 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5991 		     machine_mode mode, secondary_reload_info *sri)
5992 {
5993   int regno;
5994   enum reg_class rclass = (enum reg_class) rclass_i;
5995 
5996   /* Handle the easy stuff first.  */
5997   if (rclass == R1_REGS)
5998     return NO_REGS;
5999 
6000   if (REG_P (x))
6001     {
6002       regno = REGNO (x);
6003       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6004 	return NO_REGS;
6005     }
6006   else
6007     regno = -1;
6008 
6009   /* If we have something like (mem (mem (...)), we can safely assume the
6010      inner MEM will end up in a general register after reloading, so there's
6011      no need for a secondary reload.  */
6012   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6013     return NO_REGS;
6014 
6015   /* Trying to load a constant into a FP register during PIC code
6016      generation requires %r1 as a scratch register.  For float modes,
6017      the only legitimate constant is CONST0_RTX.  However, there are
6018      a few patterns that accept constant double operands.  */
6019   if (flag_pic
6020       && FP_REG_CLASS_P (rclass)
6021       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6022     {
6023       switch (mode)
6024 	{
6025 	case SImode:
6026 	  sri->icode = CODE_FOR_reload_insi_r1;
6027 	  break;
6028 
6029 	case DImode:
6030 	  sri->icode = CODE_FOR_reload_indi_r1;
6031 	  break;
6032 
6033 	case SFmode:
6034 	  sri->icode = CODE_FOR_reload_insf_r1;
6035 	  break;
6036 
6037 	case DFmode:
6038 	  sri->icode = CODE_FOR_reload_indf_r1;
6039 	  break;
6040 
6041 	default:
6042 	  gcc_unreachable ();
6043 	}
6044       return NO_REGS;
6045     }
6046 
6047   /* Secondary reloads of symbolic expressions require %r1 as a scratch
6048      register when we're generating PIC code or when the operand isn't
6049      readonly.  */
6050   if (pa_symbolic_expression_p (x))
6051     {
6052       if (GET_CODE (x) == HIGH)
6053 	x = XEXP (x, 0);
6054 
6055       if (flag_pic || !read_only_operand (x, VOIDmode))
6056 	{
6057 	  switch (mode)
6058 	    {
6059 	    case SImode:
6060 	      sri->icode = CODE_FOR_reload_insi_r1;
6061 	      break;
6062 
6063 	    case DImode:
6064 	      sri->icode = CODE_FOR_reload_indi_r1;
6065 	      break;
6066 
6067 	    default:
6068 	      gcc_unreachable ();
6069 	    }
6070 	  return NO_REGS;
6071 	}
6072     }
6073 
6074   /* Profiling showed the PA port spends about 1.3% of its compilation
6075      time in true_regnum from calls inside pa_secondary_reload_class.  */
6076   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6077     regno = true_regnum (x);
6078 
6079   /* Handle reloads for floating point loads and stores.  */
6080   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6081       && FP_REG_CLASS_P (rclass))
6082     {
6083       if (MEM_P (x))
6084 	{
6085 	  x = XEXP (x, 0);
6086 
6087 	  /* We don't need a secondary reload for indexed memory addresses.
6088 
6089 	     When INT14_OK_STRICT is true, it might appear that we could
6090 	     directly allow register indirect memory addresses.  However,
6091 	     this doesn't work because we don't support SUBREGs in
6092 	     floating-point register copies and reload doesn't tell us
6093 	     when it's going to use a SUBREG.  */
6094 	  if (IS_INDEX_ADDR_P (x))
6095 	    return NO_REGS;
6096 	}
6097 
6098       /* Request a secondary reload with a general scratch register
6099 	 for everything else.  ??? Could symbolic operands be handled
6100 	 directly when generating non-pic PA 2.0 code?  */
6101       sri->icode = (in_p
6102 		    ? direct_optab_handler (reload_in_optab, mode)
6103 		    : direct_optab_handler (reload_out_optab, mode));
6104       return NO_REGS;
6105     }
6106 
6107   /* A SAR<->FP register copy requires an intermediate general register
6108      and secondary memory.  We need a secondary reload with a general
6109      scratch register for spills.  */
6110   if (rclass == SHIFT_REGS)
6111     {
6112       /* Handle spill.  */
6113       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6114 	{
6115 	  sri->icode = (in_p
6116 			? direct_optab_handler (reload_in_optab, mode)
6117 			: direct_optab_handler (reload_out_optab, mode));
6118 	  return NO_REGS;
6119 	}
6120 
6121       /* Handle FP copy.  */
6122       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6123 	return GENERAL_REGS;
6124     }
6125 
6126   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6127       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6128       && FP_REG_CLASS_P (rclass))
6129     return GENERAL_REGS;
6130 
6131   return NO_REGS;
6132 }
6133 
6134 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6135    is only marked as live on entry by df-scan when it is a fixed
6136    register.  It isn't a fixed register in the 64-bit runtime,
6137    so we need to mark it here.  */
6138 
6139 static void
6140 pa_extra_live_on_entry (bitmap regs)
6141 {
6142   if (TARGET_64BIT)
6143     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6144 }
6145 
6146 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6147    to prevent it from being deleted.  */
6148 
6149 rtx
6150 pa_eh_return_handler_rtx (void)
6151 {
6152   rtx tmp;
6153 
6154   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6155 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6156   tmp = gen_rtx_MEM (word_mode, tmp);
6157   tmp->volatil = 1;
6158   return tmp;
6159 }
6160 
6161 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6162    by invisible reference.  As a GCC extension, we also pass anything
6163    with a zero or variable size by reference.
6164 
6165    The 64-bit runtime does not describe passing any types by invisible
6166    reference.  The internals of GCC can't currently handle passing
6167    empty structures, and zero or variable length arrays when they are
6168    not passed entirely on the stack or by reference.  Thus, as a GCC
6169    extension, we pass these types by reference.  The HP compiler doesn't
6170    support these types, so hopefully there shouldn't be any compatibility
6171    issues.  This may have to be revisited when HP releases a C99 compiler
6172    or updates the ABI.  */
6173 
6174 static bool
6175 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6176 		      machine_mode mode, const_tree type,
6177 		      bool named ATTRIBUTE_UNUSED)
6178 {
6179   HOST_WIDE_INT size;
6180 
6181   if (type)
6182     size = int_size_in_bytes (type);
6183   else
6184     size = GET_MODE_SIZE (mode);
6185 
6186   if (TARGET_64BIT)
6187     return size <= 0;
6188   else
6189     return size <= 0 || size > 8;
6190 }
6191 
6192 enum direction
6193 pa_function_arg_padding (machine_mode mode, const_tree type)
6194 {
6195   if (mode == BLKmode
6196       || (TARGET_64BIT
6197 	  && type
6198 	  && (AGGREGATE_TYPE_P (type)
6199 	      || TREE_CODE (type) == COMPLEX_TYPE
6200 	      || TREE_CODE (type) == VECTOR_TYPE)))
6201     {
6202       /* Return none if justification is not required.  */
6203       if (type
6204 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6205 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6206 	return none;
6207 
6208       /* The directions set here are ignored when a BLKmode argument larger
6209 	 than a word is placed in a register.  Different code is used for
6210 	 the stack and registers.  This makes it difficult to have a
6211 	 consistent data representation for both the stack and registers.
6212 	 For both runtimes, the justification and padding for arguments on
6213 	 the stack and in registers should be identical.  */
6214       if (TARGET_64BIT)
6215 	/* The 64-bit runtime specifies left justification for aggregates.  */
6216         return upward;
6217       else
6218 	/* The 32-bit runtime architecture specifies right justification.
6219 	   When the argument is passed on the stack, the argument is padded
6220 	   with garbage on the left.  The HP compiler pads with zeros.  */
6221 	return downward;
6222     }
6223 
6224   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6225     return downward;
6226   else
6227     return none;
6228 }
6229 
6230 
6231 /* Do what is necessary for `va_start'.  We look at the current function
6232    to determine if stdargs or varargs is used and fill in an initial
6233    va_list.  A pointer to this constructor is returned.  */
6234 
6235 static rtx
6236 hppa_builtin_saveregs (void)
6237 {
6238   rtx offset, dest;
6239   tree fntype = TREE_TYPE (current_function_decl);
6240   int argadj = ((!stdarg_p (fntype))
6241 		? UNITS_PER_WORD : 0);
6242 
6243   if (argadj)
6244     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6245   else
6246     offset = crtl->args.arg_offset_rtx;
6247 
6248   if (TARGET_64BIT)
6249     {
6250       int i, off;
6251 
6252       /* Adjust for varargs/stdarg differences.  */
6253       if (argadj)
6254 	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6255       else
6256 	offset = crtl->args.arg_offset_rtx;
6257 
6258       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6259 	 from the incoming arg pointer and growing to larger addresses.  */
6260       for (i = 26, off = -64; i >= 19; i--, off += 8)
6261 	emit_move_insn (gen_rtx_MEM (word_mode,
6262 				     plus_constant (Pmode,
6263 						    arg_pointer_rtx, off)),
6264 			gen_rtx_REG (word_mode, i));
6265 
6266       /* The incoming args pointer points just beyond the flushback area;
6267 	 normally this is not a serious concern.  However, when we are doing
6268 	 varargs/stdargs we want to make the arg pointer point to the start
6269 	 of the incoming argument area.  */
6270       emit_move_insn (virtual_incoming_args_rtx,
6271 		      plus_constant (Pmode, arg_pointer_rtx, -64));
6272 
6273       /* Now return a pointer to the first anonymous argument.  */
6274       return copy_to_reg (expand_binop (Pmode, add_optab,
6275 					virtual_incoming_args_rtx,
6276 					offset, 0, 0, OPTAB_LIB_WIDEN));
6277     }
6278 
6279   /* Store general registers on the stack.  */
6280   dest = gen_rtx_MEM (BLKmode,
6281 		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6282 				     -16));
6283   set_mem_alias_set (dest, get_varargs_alias_set ());
6284   set_mem_align (dest, BITS_PER_WORD);
6285   move_block_from_reg (23, dest, 4);
6286 
6287   /* move_block_from_reg will emit code to store the argument registers
6288      individually as scalar stores.
6289 
6290      However, other insns may later load from the same addresses for
6291      a structure load (passing a struct to a varargs routine).
6292 
6293      The alias code assumes that such aliasing can never happen, so we
6294      have to keep memory referencing insns from moving up beyond the
6295      last argument register store.  So we emit a blockage insn here.  */
6296   emit_insn (gen_blockage ());
6297 
6298   return copy_to_reg (expand_binop (Pmode, add_optab,
6299 				    crtl->args.internal_arg_pointer,
6300 				    offset, 0, 0, OPTAB_LIB_WIDEN));
6301 }
6302 
6303 static void
6304 hppa_va_start (tree valist, rtx nextarg)
6305 {
6306   nextarg = expand_builtin_saveregs ();
6307   std_expand_builtin_va_start (valist, nextarg);
6308 }
6309 
6310 static tree
6311 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6312 			   gimple_seq *post_p)
6313 {
6314   if (TARGET_64BIT)
6315     {
6316       /* Args grow upward.  We can use the generic routines.  */
6317       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6318     }
6319   else /* !TARGET_64BIT */
6320     {
6321       tree ptr = build_pointer_type (type);
6322       tree valist_type;
6323       tree t, u;
6324       unsigned int size, ofs;
6325       bool indirect;
6326 
6327       indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6328       if (indirect)
6329 	{
6330 	  type = ptr;
6331 	  ptr = build_pointer_type (type);
6332 	}
6333       size = int_size_in_bytes (type);
6334       valist_type = TREE_TYPE (valist);
6335 
6336       /* Args grow down.  Not handled by generic routines.  */
6337 
6338       u = fold_convert (sizetype, size_in_bytes (type));
6339       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6340       t = fold_build_pointer_plus (valist, u);
6341 
6342       /* Align to 4 or 8 byte boundary depending on argument size.  */
6343 
6344       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6345       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6346       t = fold_convert (valist_type, t);
6347 
6348       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6349 
6350       ofs = (8 - size) % 4;
6351       if (ofs != 0)
6352 	t = fold_build_pointer_plus_hwi (t, ofs);
6353 
6354       t = fold_convert (ptr, t);
6355       t = build_va_arg_indirect_ref (t);
6356 
6357       if (indirect)
6358 	t = build_va_arg_indirect_ref (t);
6359 
6360       return t;
6361     }
6362 }
6363 
6364 /* True if MODE is valid for the target.  By "valid", we mean able to
6365    be manipulated in non-trivial ways.  In particular, this means all
6366    the arithmetic is supported.
6367 
6368    Currently, TImode is not valid as the HP 64-bit runtime documentation
6369    doesn't document the alignment and calling conventions for this type.
6370    Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6371    2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6372 
6373 static bool
6374 pa_scalar_mode_supported_p (machine_mode mode)
6375 {
6376   int precision = GET_MODE_PRECISION (mode);
6377 
6378   switch (GET_MODE_CLASS (mode))
6379     {
6380     case MODE_PARTIAL_INT:
6381     case MODE_INT:
6382       if (precision == CHAR_TYPE_SIZE)
6383 	return true;
6384       if (precision == SHORT_TYPE_SIZE)
6385 	return true;
6386       if (precision == INT_TYPE_SIZE)
6387 	return true;
6388       if (precision == LONG_TYPE_SIZE)
6389 	return true;
6390       if (precision == LONG_LONG_TYPE_SIZE)
6391 	return true;
6392       return false;
6393 
6394     case MODE_FLOAT:
6395       if (precision == FLOAT_TYPE_SIZE)
6396 	return true;
6397       if (precision == DOUBLE_TYPE_SIZE)
6398 	return true;
6399       if (precision == LONG_DOUBLE_TYPE_SIZE)
6400 	return true;
6401       return false;
6402 
6403     case MODE_DECIMAL_FLOAT:
6404       return false;
6405 
6406     default:
6407       gcc_unreachable ();
6408     }
6409 }
6410 
6411 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6412    it branches into the delay slot.  Otherwise, return FALSE.  */
6413 
6414 static bool
6415 branch_to_delay_slot_p (rtx_insn *insn)
6416 {
6417   rtx_insn *jump_insn;
6418 
6419   if (dbr_sequence_length ())
6420     return FALSE;
6421 
6422   jump_insn = next_active_insn (JUMP_LABEL (insn));
6423   while (insn)
6424     {
6425       insn = next_active_insn (insn);
6426       if (jump_insn == insn)
6427 	return TRUE;
6428 
6429       /* We can't rely on the length of asms.  So, we return FALSE when
6430 	 the branch is followed by an asm.  */
6431       if (!insn
6432 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6433 	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6434 	  || get_attr_length (insn) > 0)
6435 	break;
6436     }
6437 
6438   return FALSE;
6439 }
6440 
6441 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6442 
6443    This occurs when INSN has an unfilled delay slot and is followed
6444    by an asm.  Disaster can occur if the asm is empty and the jump
6445    branches into the delay slot.  So, we add a nop in the delay slot
6446    when this occurs.  */
6447 
6448 static bool
6449 branch_needs_nop_p (rtx_insn *insn)
6450 {
6451   rtx_insn *jump_insn;
6452 
6453   if (dbr_sequence_length ())
6454     return FALSE;
6455 
6456   jump_insn = next_active_insn (JUMP_LABEL (insn));
6457   while (insn)
6458     {
6459       insn = next_active_insn (insn);
6460       if (!insn || jump_insn == insn)
6461 	return TRUE;
6462 
6463       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6464 	   || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6465 	  && get_attr_length (insn) > 0)
6466 	break;
6467     }
6468 
6469   return FALSE;
6470 }
6471 
6472 /* Return TRUE if INSN, a forward jump insn, can use nullification
6473    to skip the following instruction.  This avoids an extra cycle due
6474    to a mis-predicted branch when we fall through.  */
6475 
6476 static bool
6477 use_skip_p (rtx_insn *insn)
6478 {
6479   rtx_insn *jump_insn = next_active_insn (JUMP_LABEL (insn));
6480 
6481   while (insn)
6482     {
6483       insn = next_active_insn (insn);
6484 
6485       /* We can't rely on the length of asms, so we can't skip asms.  */
6486       if (!insn
6487 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6488 	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6489 	break;
6490       if (get_attr_length (insn) == 4
6491 	  && jump_insn == next_active_insn (insn))
6492 	return TRUE;
6493       if (get_attr_length (insn) > 0)
6494 	break;
6495     }
6496 
6497   return FALSE;
6498 }
6499 
6500 /* This routine handles all the normal conditional branch sequences we
6501    might need to generate.  It handles compare immediate vs compare
6502    register, nullification of delay slots, varying length branches,
6503    negated branches, and all combinations of the above.  It returns the
6504    output appropriate to emit the branch corresponding to all given
6505    parameters.  */
6506 
6507 const char *
6508 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6509 {
6510   static char buf[100];
6511   bool useskip;
6512   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6513   int length = get_attr_length (insn);
6514   int xdelay;
6515 
6516   /* A conditional branch to the following instruction (e.g. the delay slot)
6517      is asking for a disaster.  This can happen when not optimizing and
6518      when jump optimization fails.
6519 
6520      While it is usually safe to emit nothing, this can fail if the
6521      preceding instruction is a nullified branch with an empty delay
6522      slot and the same branch target as this branch.  We could check
6523      for this but jump optimization should eliminate nop jumps.  It
6524      is always safe to emit a nop.  */
6525   if (branch_to_delay_slot_p (insn))
6526     return "nop";
6527 
6528   /* The doubleword form of the cmpib instruction doesn't have the LEU
6529      and GTU conditions while the cmpb instruction does.  Since we accept
6530      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6531   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6532     operands[2] = gen_rtx_REG (DImode, 0);
6533   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6534     operands[1] = gen_rtx_REG (DImode, 0);
6535 
6536   /* If this is a long branch with its delay slot unfilled, set `nullify'
6537      as it can nullify the delay slot and save a nop.  */
6538   if (length == 8 && dbr_sequence_length () == 0)
6539     nullify = 1;
6540 
6541   /* If this is a short forward conditional branch which did not get
6542      its delay slot filled, the delay slot can still be nullified.  */
6543   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6544     nullify = forward_branch_p (insn);
6545 
6546   /* A forward branch over a single nullified insn can be done with a
6547      comclr instruction.  This avoids a single cycle penalty due to
6548      mis-predicted branch if we fall through (branch not taken).  */
6549   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6550 
6551   switch (length)
6552     {
6553       /* All short conditional branches except backwards with an unfilled
6554 	 delay slot.  */
6555       case 4:
6556 	if (useskip)
6557 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6558 	else
6559 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6560 	if (GET_MODE (operands[1]) == DImode)
6561 	  strcat (buf, "*");
6562 	if (negated)
6563 	  strcat (buf, "%B3");
6564 	else
6565 	  strcat (buf, "%S3");
6566 	if (useskip)
6567 	  strcat (buf, " %2,%r1,%%r0");
6568 	else if (nullify)
6569 	  {
6570 	    if (branch_needs_nop_p (insn))
6571 	      strcat (buf, ",n %2,%r1,%0%#");
6572 	    else
6573 	      strcat (buf, ",n %2,%r1,%0");
6574 	  }
6575 	else
6576 	  strcat (buf, " %2,%r1,%0");
6577 	break;
6578 
6579      /* All long conditionals.  Note a short backward branch with an
6580 	unfilled delay slot is treated just like a long backward branch
6581 	with an unfilled delay slot.  */
6582       case 8:
6583 	/* Handle weird backwards branch with a filled delay slot
6584 	   which is nullified.  */
6585 	if (dbr_sequence_length () != 0
6586 	    && ! forward_branch_p (insn)
6587 	    && nullify)
6588 	  {
6589 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6590 	    if (GET_MODE (operands[1]) == DImode)
6591 	      strcat (buf, "*");
6592 	    if (negated)
6593 	      strcat (buf, "%S3");
6594 	    else
6595 	      strcat (buf, "%B3");
6596 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6597 	  }
6598 	/* Handle short backwards branch with an unfilled delay slot.
6599 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6600 	   taken and untaken branches.  */
6601 	else if (dbr_sequence_length () == 0
6602 		 && ! forward_branch_p (insn)
6603 		 && INSN_ADDRESSES_SET_P ()
6604 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6605 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6606 	  {
6607 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6608 	    if (GET_MODE (operands[1]) == DImode)
6609 	      strcat (buf, "*");
6610 	    if (negated)
6611 	      strcat (buf, "%B3 %2,%r1,%0%#");
6612 	    else
6613 	      strcat (buf, "%S3 %2,%r1,%0%#");
6614 	  }
6615 	else
6616 	  {
6617 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6618 	    if (GET_MODE (operands[1]) == DImode)
6619 	      strcat (buf, "*");
6620 	    if (negated)
6621 	      strcat (buf, "%S3");
6622 	    else
6623 	      strcat (buf, "%B3");
6624 	    if (nullify)
6625 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6626 	    else
6627 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6628 	  }
6629 	break;
6630 
6631       default:
6632 	/* The reversed conditional branch must branch over one additional
6633 	   instruction if the delay slot is filled and needs to be extracted
6634 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6635 	   nullified forward branch, the instruction after the reversed
6636 	   condition branch must be nullified.  */
6637 	if (dbr_sequence_length () == 0
6638 	    || (nullify && forward_branch_p (insn)))
6639 	  {
6640 	    nullify = 1;
6641 	    xdelay = 0;
6642 	    operands[4] = GEN_INT (length);
6643 	  }
6644 	else
6645 	  {
6646 	    xdelay = 1;
6647 	    operands[4] = GEN_INT (length + 4);
6648 	  }
6649 
6650 	/* Create a reversed conditional branch which branches around
6651 	   the following insns.  */
6652 	if (GET_MODE (operands[1]) != DImode)
6653 	  {
6654 	    if (nullify)
6655 	      {
6656 		if (negated)
6657 		  strcpy (buf,
6658 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6659 		else
6660 		  strcpy (buf,
6661 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6662 	      }
6663 	    else
6664 	      {
6665 		if (negated)
6666 		  strcpy (buf,
6667 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6668 		else
6669 		  strcpy (buf,
6670 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6671 	      }
6672 	  }
6673 	else
6674 	  {
6675 	    if (nullify)
6676 	      {
6677 		if (negated)
6678 		  strcpy (buf,
6679 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6680 		else
6681 		  strcpy (buf,
6682 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6683 	      }
6684 	    else
6685 	      {
6686 		if (negated)
6687 		  strcpy (buf,
6688 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6689 		else
6690 		  strcpy (buf,
6691 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6692 	      }
6693 	  }
6694 
6695 	output_asm_insn (buf, operands);
6696 	return pa_output_lbranch (operands[0], insn, xdelay);
6697     }
6698   return buf;
6699 }
6700 
6701 /* This routine handles output of long unconditional branches that
6702    exceed the maximum range of a simple branch instruction.  Since
6703    we don't have a register available for the branch, we save register
6704    %r1 in the frame marker, load the branch destination DEST into %r1,
6705    execute the branch, and restore %r1 in the delay slot of the branch.
6706 
6707    Since long branches may have an insn in the delay slot and the
6708    delay slot is used to restore %r1, we in general need to extract
6709    this insn and execute it before the branch.  However, to facilitate
6710    use of this function by conditional branches, we also provide an
6711    option to not extract the delay insn so that it will be emitted
6712    after the long branch.  So, if there is an insn in the delay slot,
6713    it is extracted if XDELAY is nonzero.
6714 
6715    The lengths of the various long-branch sequences are 20, 16 and 24
6716    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6717 
6718 const char *
6719 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6720 {
6721   rtx xoperands[2];
6722 
6723   xoperands[0] = dest;
6724 
6725   /* First, free up the delay slot.  */
6726   if (xdelay && dbr_sequence_length () != 0)
6727     {
6728       /* We can't handle a jump in the delay slot.  */
6729       gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6730 
6731       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6732 		       optimize, 0, NULL);
6733 
6734       /* Now delete the delay insn.  */
6735       SET_INSN_DELETED (NEXT_INSN (insn));
6736     }
6737 
6738   /* Output an insn to save %r1.  The runtime documentation doesn't
6739      specify whether the "Clean Up" slot in the callers frame can
6740      be clobbered by the callee.  It isn't copied by HP's builtin
6741      alloca, so this suggests that it can be clobbered if necessary.
6742      The "Static Link" location is copied by HP builtin alloca, so
6743      we avoid using it.  Using the cleanup slot might be a problem
6744      if we have to interoperate with languages that pass cleanup
6745      information.  However, it should be possible to handle these
6746      situations with GCC's asm feature.
6747 
6748      The "Current RP" slot is reserved for the called procedure, so
6749      we try to use it when we don't have a frame of our own.  It's
6750      rather unlikely that we won't have a frame when we need to emit
6751      a very long branch.
6752 
6753      Really the way to go long term is a register scavenger; goto
6754      the target of the jump and find a register which we can use
6755      as a scratch to hold the value in %r1.  Then, we wouldn't have
6756      to free up the delay slot or clobber a slot that may be needed
6757      for other purposes.  */
6758   if (TARGET_64BIT)
6759     {
6760       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6761 	/* Use the return pointer slot in the frame marker.  */
6762 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6763       else
6764 	/* Use the slot at -40 in the frame marker since HP builtin
6765 	   alloca doesn't copy it.  */
6766 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6767     }
6768   else
6769     {
6770       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6771 	/* Use the return pointer slot in the frame marker.  */
6772 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6773       else
6774 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6775 	   the only other use of this location is for copying a
6776 	   floating point double argument from a floating-point
6777 	   register to two general registers.  The copy is done
6778 	   as an "atomic" operation when outputting a call, so it
6779 	   won't interfere with our using the location here.  */
6780 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6781     }
6782 
6783   if (TARGET_PORTABLE_RUNTIME)
6784     {
6785       output_asm_insn ("ldil L'%0,%%r1", xoperands);
6786       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6787       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6788     }
6789   else if (flag_pic)
6790     {
6791       output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6792       if (TARGET_SOM || !TARGET_GAS)
6793 	{
6794 	  xoperands[1] = gen_label_rtx ();
6795 	  output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6796 	  targetm.asm_out.internal_label (asm_out_file, "L",
6797 					  CODE_LABEL_NUMBER (xoperands[1]));
6798 	  output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6799 	}
6800       else
6801 	{
6802 	  output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6803 	  output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6804 	}
6805       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6806     }
6807   else
6808     /* Now output a very long branch to the original target.  */
6809     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6810 
6811   /* Now restore the value of %r1 in the delay slot.  */
6812   if (TARGET_64BIT)
6813     {
6814       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6815 	return "ldd -16(%%r30),%%r1";
6816       else
6817 	return "ldd -40(%%r30),%%r1";
6818     }
6819   else
6820     {
6821       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6822 	return "ldw -20(%%r30),%%r1";
6823       else
6824 	return "ldw -12(%%r30),%%r1";
6825     }
6826 }
6827 
6828 /* This routine handles all the branch-on-bit conditional branch sequences we
6829    might need to generate.  It handles nullification of delay slots,
6830    varying length branches, negated branches and all combinations of the
6831    above.  it returns the appropriate output template to emit the branch.  */
6832 
6833 const char *
6834 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6835 {
6836   static char buf[100];
6837   bool useskip;
6838   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6839   int length = get_attr_length (insn);
6840   int xdelay;
6841 
6842   /* A conditional branch to the following instruction (e.g. the delay slot) is
6843      asking for a disaster.  I do not think this can happen as this pattern
6844      is only used when optimizing; jump optimization should eliminate the
6845      jump.  But be prepared just in case.  */
6846 
6847   if (branch_to_delay_slot_p (insn))
6848     return "nop";
6849 
6850   /* If this is a long branch with its delay slot unfilled, set `nullify'
6851      as it can nullify the delay slot and save a nop.  */
6852   if (length == 8 && dbr_sequence_length () == 0)
6853     nullify = 1;
6854 
6855   /* If this is a short forward conditional branch which did not get
6856      its delay slot filled, the delay slot can still be nullified.  */
6857   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6858     nullify = forward_branch_p (insn);
6859 
6860   /* A forward branch over a single nullified insn can be done with a
6861      extrs instruction.  This avoids a single cycle penalty due to
6862      mis-predicted branch if we fall through (branch not taken).  */
6863   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6864 
6865   switch (length)
6866     {
6867 
6868       /* All short conditional branches except backwards with an unfilled
6869 	 delay slot.  */
6870       case 4:
6871 	if (useskip)
6872 	  strcpy (buf, "{extrs,|extrw,s,}");
6873 	else
6874 	  strcpy (buf, "bb,");
6875 	if (useskip && GET_MODE (operands[0]) == DImode)
6876 	  strcpy (buf, "extrd,s,*");
6877 	else if (GET_MODE (operands[0]) == DImode)
6878 	  strcpy (buf, "bb,*");
6879 	if ((which == 0 && negated)
6880 	     || (which == 1 && ! negated))
6881 	  strcat (buf, ">=");
6882 	else
6883 	  strcat (buf, "<");
6884 	if (useskip)
6885 	  strcat (buf, " %0,%1,1,%%r0");
6886 	else if (nullify && negated)
6887 	  {
6888 	    if (branch_needs_nop_p (insn))
6889 	      strcat (buf, ",n %0,%1,%3%#");
6890 	    else
6891 	      strcat (buf, ",n %0,%1,%3");
6892 	  }
6893 	else if (nullify && ! negated)
6894 	  {
6895 	    if (branch_needs_nop_p (insn))
6896 	      strcat (buf, ",n %0,%1,%2%#");
6897 	    else
6898 	      strcat (buf, ",n %0,%1,%2");
6899 	  }
6900 	else if (! nullify && negated)
6901 	  strcat (buf, " %0,%1,%3");
6902 	else if (! nullify && ! negated)
6903 	  strcat (buf, " %0,%1,%2");
6904 	break;
6905 
6906      /* All long conditionals.  Note a short backward branch with an
6907 	unfilled delay slot is treated just like a long backward branch
6908 	with an unfilled delay slot.  */
6909       case 8:
6910 	/* Handle weird backwards branch with a filled delay slot
6911 	   which is nullified.  */
6912 	if (dbr_sequence_length () != 0
6913 	    && ! forward_branch_p (insn)
6914 	    && nullify)
6915 	  {
6916 	    strcpy (buf, "bb,");
6917 	    if (GET_MODE (operands[0]) == DImode)
6918 	      strcat (buf, "*");
6919 	    if ((which == 0 && negated)
6920 		|| (which == 1 && ! negated))
6921 	      strcat (buf, "<");
6922 	    else
6923 	      strcat (buf, ">=");
6924 	    if (negated)
6925 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
6926 	    else
6927 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
6928 	  }
6929 	/* Handle short backwards branch with an unfilled delay slot.
6930 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
6931 	   taken and untaken branches.  */
6932 	else if (dbr_sequence_length () == 0
6933 		 && ! forward_branch_p (insn)
6934 		 && INSN_ADDRESSES_SET_P ()
6935 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6936 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6937 	  {
6938 	    strcpy (buf, "bb,");
6939 	    if (GET_MODE (operands[0]) == DImode)
6940 	      strcat (buf, "*");
6941 	    if ((which == 0 && negated)
6942 		|| (which == 1 && ! negated))
6943 	      strcat (buf, ">=");
6944 	    else
6945 	      strcat (buf, "<");
6946 	    if (negated)
6947 	      strcat (buf, " %0,%1,%3%#");
6948 	    else
6949 	      strcat (buf, " %0,%1,%2%#");
6950 	  }
6951 	else
6952 	  {
6953 	    if (GET_MODE (operands[0]) == DImode)
6954 	      strcpy (buf, "extrd,s,*");
6955 	    else
6956 	      strcpy (buf, "{extrs,|extrw,s,}");
6957 	    if ((which == 0 && negated)
6958 		|| (which == 1 && ! negated))
6959 	      strcat (buf, "<");
6960 	    else
6961 	      strcat (buf, ">=");
6962 	    if (nullify && negated)
6963 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6964 	    else if (nullify && ! negated)
6965 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6966 	    else if (negated)
6967 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6968 	    else
6969 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6970 	  }
6971 	break;
6972 
6973       default:
6974 	/* The reversed conditional branch must branch over one additional
6975 	   instruction if the delay slot is filled and needs to be extracted
6976 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6977 	   nullified forward branch, the instruction after the reversed
6978 	   condition branch must be nullified.  */
6979 	if (dbr_sequence_length () == 0
6980 	    || (nullify && forward_branch_p (insn)))
6981 	  {
6982 	    nullify = 1;
6983 	    xdelay = 0;
6984 	    operands[4] = GEN_INT (length);
6985 	  }
6986 	else
6987 	  {
6988 	    xdelay = 1;
6989 	    operands[4] = GEN_INT (length + 4);
6990 	  }
6991 
6992 	if (GET_MODE (operands[0]) == DImode)
6993 	  strcpy (buf, "bb,*");
6994 	else
6995 	  strcpy (buf, "bb,");
6996 	if ((which == 0 && negated)
6997 	    || (which == 1 && !negated))
6998 	  strcat (buf, "<");
6999 	else
7000 	  strcat (buf, ">=");
7001 	if (nullify)
7002 	  strcat (buf, ",n %0,%1,.+%4");
7003 	else
7004 	  strcat (buf, " %0,%1,.+%4");
7005 	output_asm_insn (buf, operands);
7006 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7007 				  insn, xdelay);
7008     }
7009   return buf;
7010 }
7011 
7012 /* This routine handles all the branch-on-variable-bit conditional branch
7013    sequences we might need to generate.  It handles nullification of delay
7014    slots, varying length branches, negated branches and all combinations
7015    of the above.  it returns the appropriate output template to emit the
7016    branch.  */
7017 
7018 const char *
7019 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7020 	       int which)
7021 {
7022   static char buf[100];
7023   bool useskip;
7024   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7025   int length = get_attr_length (insn);
7026   int xdelay;
7027 
7028   /* A conditional branch to the following instruction (e.g. the delay slot) is
7029      asking for a disaster.  I do not think this can happen as this pattern
7030      is only used when optimizing; jump optimization should eliminate the
7031      jump.  But be prepared just in case.  */
7032 
7033   if (branch_to_delay_slot_p (insn))
7034     return "nop";
7035 
7036   /* If this is a long branch with its delay slot unfilled, set `nullify'
7037      as it can nullify the delay slot and save a nop.  */
7038   if (length == 8 && dbr_sequence_length () == 0)
7039     nullify = 1;
7040 
7041   /* If this is a short forward conditional branch which did not get
7042      its delay slot filled, the delay slot can still be nullified.  */
7043   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7044     nullify = forward_branch_p (insn);
7045 
7046   /* A forward branch over a single nullified insn can be done with a
7047      extrs instruction.  This avoids a single cycle penalty due to
7048      mis-predicted branch if we fall through (branch not taken).  */
7049   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7050 
7051   switch (length)
7052     {
7053 
7054       /* All short conditional branches except backwards with an unfilled
7055 	 delay slot.  */
7056       case 4:
7057 	if (useskip)
7058 	  strcpy (buf, "{vextrs,|extrw,s,}");
7059 	else
7060 	  strcpy (buf, "{bvb,|bb,}");
7061 	if (useskip && GET_MODE (operands[0]) == DImode)
7062 	  strcpy (buf, "extrd,s,*");
7063 	else if (GET_MODE (operands[0]) == DImode)
7064 	  strcpy (buf, "bb,*");
7065 	if ((which == 0 && negated)
7066 	     || (which == 1 && ! negated))
7067 	  strcat (buf, ">=");
7068 	else
7069 	  strcat (buf, "<");
7070 	if (useskip)
7071 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7072 	else if (nullify && negated)
7073 	  {
7074 	    if (branch_needs_nop_p (insn))
7075 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7076 	    else
7077 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7078 	  }
7079 	else if (nullify && ! negated)
7080 	  {
7081 	    if (branch_needs_nop_p (insn))
7082 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7083 	    else
7084 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7085 	  }
7086 	else if (! nullify && negated)
7087 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7088 	else if (! nullify && ! negated)
7089 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7090 	break;
7091 
7092      /* All long conditionals.  Note a short backward branch with an
7093 	unfilled delay slot is treated just like a long backward branch
7094 	with an unfilled delay slot.  */
7095       case 8:
7096 	/* Handle weird backwards branch with a filled delay slot
7097 	   which is nullified.  */
7098 	if (dbr_sequence_length () != 0
7099 	    && ! forward_branch_p (insn)
7100 	    && nullify)
7101 	  {
7102 	    strcpy (buf, "{bvb,|bb,}");
7103 	    if (GET_MODE (operands[0]) == DImode)
7104 	      strcat (buf, "*");
7105 	    if ((which == 0 && negated)
7106 		|| (which == 1 && ! negated))
7107 	      strcat (buf, "<");
7108 	    else
7109 	      strcat (buf, ">=");
7110 	    if (negated)
7111 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7112 	    else
7113 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7114 	  }
7115 	/* Handle short backwards branch with an unfilled delay slot.
7116 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7117 	   taken and untaken branches.  */
7118 	else if (dbr_sequence_length () == 0
7119 		 && ! forward_branch_p (insn)
7120 		 && INSN_ADDRESSES_SET_P ()
7121 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7122 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7123 	  {
7124 	    strcpy (buf, "{bvb,|bb,}");
7125 	    if (GET_MODE (operands[0]) == DImode)
7126 	      strcat (buf, "*");
7127 	    if ((which == 0 && negated)
7128 		|| (which == 1 && ! negated))
7129 	      strcat (buf, ">=");
7130 	    else
7131 	      strcat (buf, "<");
7132 	    if (negated)
7133 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7134 	    else
7135 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7136 	  }
7137 	else
7138 	  {
7139 	    strcpy (buf, "{vextrs,|extrw,s,}");
7140 	    if (GET_MODE (operands[0]) == DImode)
7141 	      strcpy (buf, "extrd,s,*");
7142 	    if ((which == 0 && negated)
7143 		|| (which == 1 && ! negated))
7144 	      strcat (buf, "<");
7145 	    else
7146 	      strcat (buf, ">=");
7147 	    if (nullify && negated)
7148 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7149 	    else if (nullify && ! negated)
7150 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7151 	    else if (negated)
7152 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7153 	    else
7154 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7155 	  }
7156 	break;
7157 
7158       default:
7159 	/* The reversed conditional branch must branch over one additional
7160 	   instruction if the delay slot is filled and needs to be extracted
7161 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7162 	   nullified forward branch, the instruction after the reversed
7163 	   condition branch must be nullified.  */
7164 	if (dbr_sequence_length () == 0
7165 	    || (nullify && forward_branch_p (insn)))
7166 	  {
7167 	    nullify = 1;
7168 	    xdelay = 0;
7169 	    operands[4] = GEN_INT (length);
7170 	  }
7171 	else
7172 	  {
7173 	    xdelay = 1;
7174 	    operands[4] = GEN_INT (length + 4);
7175 	  }
7176 
7177 	if (GET_MODE (operands[0]) == DImode)
7178 	  strcpy (buf, "bb,*");
7179 	else
7180 	  strcpy (buf, "{bvb,|bb,}");
7181 	if ((which == 0 && negated)
7182 	    || (which == 1 && !negated))
7183 	  strcat (buf, "<");
7184 	else
7185 	  strcat (buf, ">=");
7186 	if (nullify)
7187 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7188 	else
7189 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7190 	output_asm_insn (buf, operands);
7191 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7192 				  insn, xdelay);
7193     }
7194   return buf;
7195 }
7196 
7197 /* Return the output template for emitting a dbra type insn.
7198 
7199    Note it may perform some output operations on its own before
7200    returning the final output string.  */
7201 const char *
7202 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7203 {
7204   int length = get_attr_length (insn);
7205 
7206   /* A conditional branch to the following instruction (e.g. the delay slot) is
7207      asking for a disaster.  Be prepared!  */
7208 
7209   if (branch_to_delay_slot_p (insn))
7210     {
7211       if (which_alternative == 0)
7212 	return "ldo %1(%0),%0";
7213       else if (which_alternative == 1)
7214 	{
7215 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7216 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7217 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7218 	  return "{fldws|fldw} -16(%%r30),%0";
7219 	}
7220       else
7221 	{
7222 	  output_asm_insn ("ldw %0,%4", operands);
7223 	  return "ldo %1(%4),%4\n\tstw %4,%0";
7224 	}
7225     }
7226 
7227   if (which_alternative == 0)
7228     {
7229       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7230       int xdelay;
7231 
7232       /* If this is a long branch with its delay slot unfilled, set `nullify'
7233 	 as it can nullify the delay slot and save a nop.  */
7234       if (length == 8 && dbr_sequence_length () == 0)
7235 	nullify = 1;
7236 
7237       /* If this is a short forward conditional branch which did not get
7238 	 its delay slot filled, the delay slot can still be nullified.  */
7239       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7240 	nullify = forward_branch_p (insn);
7241 
7242       switch (length)
7243 	{
7244 	case 4:
7245 	  if (nullify)
7246 	    {
7247 	      if (branch_needs_nop_p (insn))
7248 		return "addib,%C2,n %1,%0,%3%#";
7249 	      else
7250 		return "addib,%C2,n %1,%0,%3";
7251 	    }
7252 	  else
7253 	    return "addib,%C2 %1,%0,%3";
7254 
7255 	case 8:
7256 	  /* Handle weird backwards branch with a fulled delay slot
7257 	     which is nullified.  */
7258 	  if (dbr_sequence_length () != 0
7259 	      && ! forward_branch_p (insn)
7260 	      && nullify)
7261 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7262 	  /* Handle short backwards branch with an unfilled delay slot.
7263 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7264 	     taken and untaken branches.  */
7265 	  else if (dbr_sequence_length () == 0
7266 		   && ! forward_branch_p (insn)
7267 		   && INSN_ADDRESSES_SET_P ()
7268 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7269 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7270 	      return "addib,%C2 %1,%0,%3%#";
7271 
7272 	  /* Handle normal cases.  */
7273 	  if (nullify)
7274 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7275 	  else
7276 	    return "addi,%N2 %1,%0,%0\n\tb %3";
7277 
7278 	default:
7279 	  /* The reversed conditional branch must branch over one additional
7280 	     instruction if the delay slot is filled and needs to be extracted
7281 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7282 	     nullified forward branch, the instruction after the reversed
7283 	     condition branch must be nullified.  */
7284 	  if (dbr_sequence_length () == 0
7285 	      || (nullify && forward_branch_p (insn)))
7286 	    {
7287 	      nullify = 1;
7288 	      xdelay = 0;
7289 	      operands[4] = GEN_INT (length);
7290 	    }
7291 	  else
7292 	    {
7293 	      xdelay = 1;
7294 	      operands[4] = GEN_INT (length + 4);
7295 	    }
7296 
7297 	  if (nullify)
7298 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7299 	  else
7300 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7301 
7302 	  return pa_output_lbranch (operands[3], insn, xdelay);
7303 	}
7304 
7305     }
7306   /* Deal with gross reload from FP register case.  */
7307   else if (which_alternative == 1)
7308     {
7309       /* Move loop counter from FP register to MEM then into a GR,
7310 	 increment the GR, store the GR into MEM, and finally reload
7311 	 the FP register from MEM from within the branch's delay slot.  */
7312       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7313 		       operands);
7314       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7315       if (length == 24)
7316 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7317       else if (length == 28)
7318 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7319       else
7320 	{
7321 	  operands[5] = GEN_INT (length - 16);
7322 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7323 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7324 	  return pa_output_lbranch (operands[3], insn, 0);
7325 	}
7326     }
7327   /* Deal with gross reload from memory case.  */
7328   else
7329     {
7330       /* Reload loop counter from memory, the store back to memory
7331 	 happens in the branch's delay slot.  */
7332       output_asm_insn ("ldw %0,%4", operands);
7333       if (length == 12)
7334 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7335       else if (length == 16)
7336 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7337       else
7338 	{
7339 	  operands[5] = GEN_INT (length - 4);
7340 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7341 	  return pa_output_lbranch (operands[3], insn, 0);
7342 	}
7343     }
7344 }
7345 
7346 /* Return the output template for emitting a movb type insn.
7347 
7348    Note it may perform some output operations on its own before
7349    returning the final output string.  */
7350 const char *
7351 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7352 	     int reverse_comparison)
7353 {
7354   int length = get_attr_length (insn);
7355 
7356   /* A conditional branch to the following instruction (e.g. the delay slot) is
7357      asking for a disaster.  Be prepared!  */
7358 
7359   if (branch_to_delay_slot_p (insn))
7360     {
7361       if (which_alternative == 0)
7362 	return "copy %1,%0";
7363       else if (which_alternative == 1)
7364 	{
7365 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7366 	  return "{fldws|fldw} -16(%%r30),%0";
7367 	}
7368       else if (which_alternative == 2)
7369 	return "stw %1,%0";
7370       else
7371 	return "mtsar %r1";
7372     }
7373 
7374   /* Support the second variant.  */
7375   if (reverse_comparison)
7376     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7377 
7378   if (which_alternative == 0)
7379     {
7380       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7381       int xdelay;
7382 
7383       /* If this is a long branch with its delay slot unfilled, set `nullify'
7384 	 as it can nullify the delay slot and save a nop.  */
7385       if (length == 8 && dbr_sequence_length () == 0)
7386 	nullify = 1;
7387 
7388       /* If this is a short forward conditional branch which did not get
7389 	 its delay slot filled, the delay slot can still be nullified.  */
7390       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7391 	nullify = forward_branch_p (insn);
7392 
7393       switch (length)
7394 	{
7395 	case 4:
7396 	  if (nullify)
7397 	    {
7398 	      if (branch_needs_nop_p (insn))
7399 		return "movb,%C2,n %1,%0,%3%#";
7400 	      else
7401 		return "movb,%C2,n %1,%0,%3";
7402 	    }
7403 	  else
7404 	    return "movb,%C2 %1,%0,%3";
7405 
7406 	case 8:
7407 	  /* Handle weird backwards branch with a filled delay slot
7408 	     which is nullified.  */
7409 	  if (dbr_sequence_length () != 0
7410 	      && ! forward_branch_p (insn)
7411 	      && nullify)
7412 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7413 
7414 	  /* Handle short backwards branch with an unfilled delay slot.
7415 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7416 	     taken and untaken branches.  */
7417 	  else if (dbr_sequence_length () == 0
7418 		   && ! forward_branch_p (insn)
7419 		   && INSN_ADDRESSES_SET_P ()
7420 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7421 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7422 	    return "movb,%C2 %1,%0,%3%#";
7423 	  /* Handle normal cases.  */
7424 	  if (nullify)
7425 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7426 	  else
7427 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7428 
7429 	default:
7430 	  /* The reversed conditional branch must branch over one additional
7431 	     instruction if the delay slot is filled and needs to be extracted
7432 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7433 	     nullified forward branch, the instruction after the reversed
7434 	     condition branch must be nullified.  */
7435 	  if (dbr_sequence_length () == 0
7436 	      || (nullify && forward_branch_p (insn)))
7437 	    {
7438 	      nullify = 1;
7439 	      xdelay = 0;
7440 	      operands[4] = GEN_INT (length);
7441 	    }
7442 	  else
7443 	    {
7444 	      xdelay = 1;
7445 	      operands[4] = GEN_INT (length + 4);
7446 	    }
7447 
7448 	  if (nullify)
7449 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7450 	  else
7451 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7452 
7453 	  return pa_output_lbranch (operands[3], insn, xdelay);
7454 	}
7455     }
7456   /* Deal with gross reload for FP destination register case.  */
7457   else if (which_alternative == 1)
7458     {
7459       /* Move source register to MEM, perform the branch test, then
7460 	 finally load the FP register from MEM from within the branch's
7461 	 delay slot.  */
7462       output_asm_insn ("stw %1,-16(%%r30)", operands);
7463       if (length == 12)
7464 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7465       else if (length == 16)
7466 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7467       else
7468 	{
7469 	  operands[4] = GEN_INT (length - 4);
7470 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7471 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7472 	  return pa_output_lbranch (operands[3], insn, 0);
7473 	}
7474     }
7475   /* Deal with gross reload from memory case.  */
7476   else if (which_alternative == 2)
7477     {
7478       /* Reload loop counter from memory, the store back to memory
7479 	 happens in the branch's delay slot.  */
7480       if (length == 8)
7481 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7482       else if (length == 12)
7483 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7484       else
7485 	{
7486 	  operands[4] = GEN_INT (length);
7487 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7488 			   operands);
7489 	  return pa_output_lbranch (operands[3], insn, 0);
7490 	}
7491     }
7492   /* Handle SAR as a destination.  */
7493   else
7494     {
7495       if (length == 8)
7496 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7497       else if (length == 12)
7498 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7499       else
7500 	{
7501 	  operands[4] = GEN_INT (length);
7502 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7503 			   operands);
7504 	  return pa_output_lbranch (operands[3], insn, 0);
7505 	}
7506     }
7507 }
7508 
7509 /* Copy any FP arguments in INSN into integer registers.  */
7510 static void
7511 copy_fp_args (rtx_insn *insn)
7512 {
7513   rtx link;
7514   rtx xoperands[2];
7515 
7516   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7517     {
7518       int arg_mode, regno;
7519       rtx use = XEXP (link, 0);
7520 
7521       if (! (GET_CODE (use) == USE
7522 	  && GET_CODE (XEXP (use, 0)) == REG
7523 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7524 	continue;
7525 
7526       arg_mode = GET_MODE (XEXP (use, 0));
7527       regno = REGNO (XEXP (use, 0));
7528 
7529       /* Is it a floating point register?  */
7530       if (regno >= 32 && regno <= 39)
7531 	{
7532 	  /* Copy the FP register into an integer register via memory.  */
7533 	  if (arg_mode == SFmode)
7534 	    {
7535 	      xoperands[0] = XEXP (use, 0);
7536 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7537 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7538 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7539 	    }
7540 	  else
7541 	    {
7542 	      xoperands[0] = XEXP (use, 0);
7543 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7544 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7545 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7546 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7547 	    }
7548 	}
7549     }
7550 }
7551 
7552 /* Compute length of the FP argument copy sequence for INSN.  */
7553 static int
7554 length_fp_args (rtx_insn *insn)
7555 {
7556   int length = 0;
7557   rtx link;
7558 
7559   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7560     {
7561       int arg_mode, regno;
7562       rtx use = XEXP (link, 0);
7563 
7564       if (! (GET_CODE (use) == USE
7565 	  && GET_CODE (XEXP (use, 0)) == REG
7566 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7567 	continue;
7568 
7569       arg_mode = GET_MODE (XEXP (use, 0));
7570       regno = REGNO (XEXP (use, 0));
7571 
7572       /* Is it a floating point register?  */
7573       if (regno >= 32 && regno <= 39)
7574 	{
7575 	  if (arg_mode == SFmode)
7576 	    length += 8;
7577 	  else
7578 	    length += 12;
7579 	}
7580     }
7581 
7582   return length;
7583 }
7584 
7585 /* Return the attribute length for the millicode call instruction INSN.
7586    The length must match the code generated by pa_output_millicode_call.
7587    We include the delay slot in the returned length as it is better to
7588    over estimate the length than to under estimate it.  */
7589 
7590 int
7591 pa_attr_length_millicode_call (rtx_insn *insn)
7592 {
7593   unsigned long distance = -1;
7594   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7595 
7596   if (INSN_ADDRESSES_SET_P ())
7597     {
7598       distance = (total + insn_current_reference_address (insn));
7599       if (distance < total)
7600 	distance = -1;
7601     }
7602 
7603   if (TARGET_64BIT)
7604     {
7605       if (!TARGET_LONG_CALLS && distance < 7600000)
7606 	return 8;
7607 
7608       return 20;
7609     }
7610   else if (TARGET_PORTABLE_RUNTIME)
7611     return 24;
7612   else
7613     {
7614       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7615 	return 8;
7616 
7617       if (!flag_pic)
7618 	return 12;
7619 
7620       return 24;
7621     }
7622 }
7623 
7624 /* INSN is a function call.
7625 
7626    CALL_DEST is the routine we are calling.  */
7627 
7628 const char *
7629 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7630 {
7631   int attr_length = get_attr_length (insn);
7632   int seq_length = dbr_sequence_length ();
7633   rtx xoperands[3];
7634 
7635   xoperands[0] = call_dest;
7636   xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7637 
7638   /* Handle the common case where we are sure that the branch will
7639      reach the beginning of the $CODE$ subspace.  The within reach
7640      form of the $$sh_func_adrs call has a length of 28.  Because it
7641      has an attribute type of sh_func_adrs, it never has a nonzero
7642      sequence length (i.e., the delay slot is never filled).  */
7643   if (!TARGET_LONG_CALLS
7644       && (attr_length == 8
7645 	  || (attr_length == 28
7646 	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7647     {
7648       output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7649     }
7650   else
7651     {
7652       if (TARGET_64BIT)
7653 	{
7654 	  /* It might seem that one insn could be saved by accessing
7655 	     the millicode function using the linkage table.  However,
7656 	     this doesn't work in shared libraries and other dynamically
7657 	     loaded objects.  Using a pc-relative sequence also avoids
7658 	     problems related to the implicit use of the gp register.  */
7659 	  output_asm_insn ("b,l .+8,%%r1", xoperands);
7660 
7661 	  if (TARGET_GAS)
7662 	    {
7663 	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7664 	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7665 	    }
7666 	  else
7667 	    {
7668 	      xoperands[1] = gen_label_rtx ();
7669 	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7670 	      targetm.asm_out.internal_label (asm_out_file, "L",
7671 					 CODE_LABEL_NUMBER (xoperands[1]));
7672 	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7673 	    }
7674 
7675 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7676 	}
7677       else if (TARGET_PORTABLE_RUNTIME)
7678 	{
7679 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7680 	     have PIC support in the assembler/linker, so this sequence
7681 	     is needed.  */
7682 
7683 	  /* Get the address of our target into %r1.  */
7684 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7685 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7686 
7687 	  /* Get our return address into %r31.  */
7688 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7689 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7690 
7691 	  /* Jump to our target address in %r1.  */
7692 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7693 	}
7694       else if (!flag_pic)
7695 	{
7696 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7697 	  if (TARGET_PA_20)
7698 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7699 	  else
7700 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7701 	}
7702       else
7703 	{
7704 	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7705 	  output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7706 
7707 	  if (TARGET_SOM || !TARGET_GAS)
7708 	    {
7709 	      /* The HP assembler can generate relocations for the
7710 		 difference of two symbols.  GAS can do this for a
7711 		 millicode symbol but not an arbitrary external
7712 		 symbol when generating SOM output.  */
7713 	      xoperands[1] = gen_label_rtx ();
7714 	      targetm.asm_out.internal_label (asm_out_file, "L",
7715 					 CODE_LABEL_NUMBER (xoperands[1]));
7716 	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7717 	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7718 	    }
7719 	  else
7720 	    {
7721 	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7722 	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7723 			       xoperands);
7724 	    }
7725 
7726 	  /* Jump to our target address in %r1.  */
7727 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7728 	}
7729     }
7730 
7731   if (seq_length == 0)
7732     output_asm_insn ("nop", xoperands);
7733 
7734   return "";
7735 }
7736 
7737 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7738    flag indicates whether INSN is a regular call or a sibling call.  The
7739    length returned must be longer than the code actually generated by
7740    pa_output_call.  Since branch shortening is done before delay branch
7741    sequencing, there is no way to determine whether or not the delay
7742    slot will be filled during branch shortening.  Even when the delay
7743    slot is filled, we may have to add a nop if the delay slot contains
7744    a branch that can't reach its target.  Thus, we always have to include
7745    the delay slot in the length estimate.  This used to be done in
7746    pa_adjust_insn_length but we do it here now as some sequences always
7747    fill the delay slot and we can save four bytes in the estimate for
7748    these sequences.  */
7749 
7750 int
7751 pa_attr_length_call (rtx_insn *insn, int sibcall)
7752 {
7753   int local_call;
7754   rtx call, call_dest;
7755   tree call_decl;
7756   int length = 0;
7757   rtx pat = PATTERN (insn);
7758   unsigned long distance = -1;
7759 
7760   gcc_assert (CALL_P (insn));
7761 
7762   if (INSN_ADDRESSES_SET_P ())
7763     {
7764       unsigned long total;
7765 
7766       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7767       distance = (total + insn_current_reference_address (insn));
7768       if (distance < total)
7769 	distance = -1;
7770     }
7771 
7772   gcc_assert (GET_CODE (pat) == PARALLEL);
7773 
7774   /* Get the call rtx.  */
7775   call = XVECEXP (pat, 0, 0);
7776   if (GET_CODE (call) == SET)
7777     call = SET_SRC (call);
7778 
7779   gcc_assert (GET_CODE (call) == CALL);
7780 
7781   /* Determine if this is a local call.  */
7782   call_dest = XEXP (XEXP (call, 0), 0);
7783   call_decl = SYMBOL_REF_DECL (call_dest);
7784   local_call = call_decl && targetm.binds_local_p (call_decl);
7785 
7786   /* pc-relative branch.  */
7787   if (!TARGET_LONG_CALLS
7788       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7789 	  || distance < MAX_PCREL17F_OFFSET))
7790     length += 8;
7791 
7792   /* 64-bit plabel sequence.  */
7793   else if (TARGET_64BIT && !local_call)
7794     length += sibcall ? 28 : 24;
7795 
7796   /* non-pic long absolute branch sequence.  */
7797   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7798     length += 12;
7799 
7800   /* long pc-relative branch sequence.  */
7801   else if (TARGET_LONG_PIC_SDIFF_CALL
7802 	   || (TARGET_GAS && !TARGET_SOM
7803 	       && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7804     {
7805       length += 20;
7806 
7807       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7808 	length += 8;
7809     }
7810 
7811   /* 32-bit plabel sequence.  */
7812   else
7813     {
7814       length += 32;
7815 
7816       if (TARGET_SOM)
7817 	length += length_fp_args (insn);
7818 
7819       if (flag_pic)
7820 	length += 4;
7821 
7822       if (!TARGET_PA_20)
7823 	{
7824 	  if (!sibcall)
7825 	    length += 8;
7826 
7827 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7828 	    length += 8;
7829 	}
7830     }
7831 
7832   return length;
7833 }
7834 
7835 /* INSN is a function call.
7836 
7837    CALL_DEST is the routine we are calling.  */
7838 
7839 const char *
7840 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7841 {
7842   int seq_length = dbr_sequence_length ();
7843   tree call_decl = SYMBOL_REF_DECL (call_dest);
7844   int local_call = call_decl && targetm.binds_local_p (call_decl);
7845   rtx xoperands[2];
7846 
7847   xoperands[0] = call_dest;
7848 
7849   /* Handle the common case where we're sure that the branch will reach
7850      the beginning of the "$CODE$" subspace.  This is the beginning of
7851      the current function if we are in a named section.  */
7852   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7853     {
7854       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7855       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7856     }
7857   else
7858     {
7859       if (TARGET_64BIT && !local_call)
7860 	{
7861 	  /* ??? As far as I can tell, the HP linker doesn't support the
7862 	     long pc-relative sequence described in the 64-bit runtime
7863 	     architecture.  So, we use a slightly longer indirect call.  */
7864 	  xoperands[0] = pa_get_deferred_plabel (call_dest);
7865 	  xoperands[1] = gen_label_rtx ();
7866 
7867 	  /* If this isn't a sibcall, we put the load of %r27 into the
7868 	     delay slot.  We can't do this in a sibcall as we don't
7869 	     have a second call-clobbered scratch register available.
7870 	     We don't need to do anything when generating fast indirect
7871 	     calls.  */
7872 	  if (seq_length != 0 && !sibcall)
7873 	    {
7874 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
7875 			       optimize, 0, NULL);
7876 
7877 	      /* Now delete the delay insn.  */
7878 	      SET_INSN_DELETED (NEXT_INSN (insn));
7879 	      seq_length = 0;
7880 	    }
7881 
7882 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
7883 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7884 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7885 
7886 	  if (sibcall)
7887 	    {
7888 	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7889 	      output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7890 	      output_asm_insn ("bve (%%r1)", xoperands);
7891 	    }
7892 	  else
7893 	    {
7894 	      output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7895 	      output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7896 	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7897 	      seq_length = 1;
7898 	    }
7899 	}
7900       else
7901 	{
7902 	  int indirect_call = 0;
7903 
7904 	  /* Emit a long call.  There are several different sequences
7905 	     of increasing length and complexity.  In most cases,
7906              they don't allow an instruction in the delay slot.  */
7907 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7908 	      && !TARGET_LONG_PIC_SDIFF_CALL
7909 	      && !(TARGET_GAS && !TARGET_SOM
7910 		   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7911 	      && !TARGET_64BIT)
7912 	    indirect_call = 1;
7913 
7914 	  if (seq_length != 0
7915 	      && !sibcall
7916 	      && (!TARGET_PA_20
7917 		  || indirect_call
7918 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7919 	    {
7920 	      /* A non-jump insn in the delay slot.  By definition we can
7921 		 emit this insn before the call (and in fact before argument
7922 		 relocating.  */
7923 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7924 			       NULL);
7925 
7926 	      /* Now delete the delay insn.  */
7927 	      SET_INSN_DELETED (NEXT_INSN (insn));
7928 	      seq_length = 0;
7929 	    }
7930 
7931 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7932 	    {
7933 	      /* This is the best sequence for making long calls in
7934 		 non-pic code.  Unfortunately, GNU ld doesn't provide
7935 		 the stub needed for external calls, and GAS's support
7936 		 for this with the SOM linker is buggy.  It is safe
7937 		 to use this for local calls.  */
7938 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7939 	      if (sibcall)
7940 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7941 	      else
7942 		{
7943 		  if (TARGET_PA_20)
7944 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7945 				     xoperands);
7946 		  else
7947 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7948 
7949 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
7950 		  seq_length = 1;
7951 		}
7952 	    }
7953 	  else
7954 	    {
7955 	      if (TARGET_LONG_PIC_SDIFF_CALL)
7956 		{
7957 		  /* The HP assembler and linker can handle relocations
7958 		     for the difference of two symbols.  The HP assembler
7959 		     recognizes the sequence as a pc-relative call and
7960 		     the linker provides stubs when needed.  */
7961 		  xoperands[1] = gen_label_rtx ();
7962 		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7963 		  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7964 		  targetm.asm_out.internal_label (asm_out_file, "L",
7965 					     CODE_LABEL_NUMBER (xoperands[1]));
7966 		  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7967 		}
7968 	      else if (TARGET_GAS && !TARGET_SOM
7969 		       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7970 		{
7971 		  /*  GAS currently can't generate the relocations that
7972 		      are needed for the SOM linker under HP-UX using this
7973 		      sequence.  The GNU linker doesn't generate the stubs
7974 		      that are needed for external calls on TARGET_ELF32
7975 		      with this sequence.  For now, we have to use a
7976 		      longer plabel sequence when using GAS.  */
7977 		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7978 		  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7979 				   xoperands);
7980 		  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7981 				   xoperands);
7982 		}
7983 	      else
7984 		{
7985 		  /* Emit a long plabel-based call sequence.  This is
7986 		     essentially an inline implementation of $$dyncall.
7987 		     We don't actually try to call $$dyncall as this is
7988 		     as difficult as calling the function itself.  */
7989 		  xoperands[0] = pa_get_deferred_plabel (call_dest);
7990 		  xoperands[1] = gen_label_rtx ();
7991 
7992 		  /* Since the call is indirect, FP arguments in registers
7993 		     need to be copied to the general registers.  Then, the
7994 		     argument relocation stub will copy them back.  */
7995 		  if (TARGET_SOM)
7996 		    copy_fp_args (insn);
7997 
7998 		  if (flag_pic)
7999 		    {
8000 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
8001 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8002 		      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8003 		    }
8004 		  else
8005 		    {
8006 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
8007 				       xoperands);
8008 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8009 				       xoperands);
8010 		    }
8011 
8012 		  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8013 		  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8014 		  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8015 		  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8016 
8017 		  if (!sibcall && !TARGET_PA_20)
8018 		    {
8019 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8020 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8021 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8022 		      else
8023 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8024 		    }
8025 		}
8026 
8027 	      if (TARGET_PA_20)
8028 		{
8029 		  if (sibcall)
8030 		    output_asm_insn ("bve (%%r1)", xoperands);
8031 		  else
8032 		    {
8033 		      if (indirect_call)
8034 			{
8035 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8036 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8037 			  seq_length = 1;
8038 			}
8039 		      else
8040 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8041 		    }
8042 		}
8043 	      else
8044 		{
8045 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8046 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8047 				     xoperands);
8048 
8049 		  if (sibcall)
8050 		    {
8051 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8052 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8053 		      else
8054 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8055 		    }
8056 		  else
8057 		    {
8058 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8059 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8060 		      else
8061 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8062 
8063 		      if (indirect_call)
8064 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8065 		      else
8066 			output_asm_insn ("copy %%r31,%%r2", xoperands);
8067 		      seq_length = 1;
8068 		    }
8069 		}
8070 	    }
8071 	}
8072     }
8073 
8074   if (seq_length == 0)
8075     output_asm_insn ("nop", xoperands);
8076 
8077   return "";
8078 }
8079 
8080 /* Return the attribute length of the indirect call instruction INSN.
8081    The length must match the code generated by output_indirect call.
8082    The returned length includes the delay slot.  Currently, the delay
8083    slot of an indirect call sequence is not exposed and it is used by
8084    the sequence itself.  */
8085 
8086 int
8087 pa_attr_length_indirect_call (rtx_insn *insn)
8088 {
8089   unsigned long distance = -1;
8090   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8091 
8092   if (INSN_ADDRESSES_SET_P ())
8093     {
8094       distance = (total + insn_current_reference_address (insn));
8095       if (distance < total)
8096 	distance = -1;
8097     }
8098 
8099   if (TARGET_64BIT)
8100     return 12;
8101 
8102   if (TARGET_FAST_INDIRECT_CALLS
8103       || (!TARGET_LONG_CALLS
8104 	  && !TARGET_PORTABLE_RUNTIME
8105 	  && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8106 	      || distance < MAX_PCREL17F_OFFSET)))
8107     return 8;
8108 
8109   if (flag_pic)
8110     return 20;
8111 
8112   if (TARGET_PORTABLE_RUNTIME)
8113     return 16;
8114 
8115   /* Out of reach, can use ble.  */
8116   return 12;
8117 }
8118 
8119 const char *
8120 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8121 {
8122   rtx xoperands[1];
8123 
8124   if (TARGET_64BIT)
8125     {
8126       xoperands[0] = call_dest;
8127       output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8128       output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8129       return "";
8130     }
8131 
8132   /* First the special case for kernels, level 0 systems, etc.  */
8133   if (TARGET_FAST_INDIRECT_CALLS)
8134     return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8135 
8136   /* Now the normal case -- we can reach $$dyncall directly or
8137      we're sure that we can get there via a long-branch stub.
8138 
8139      No need to check target flags as the length uniquely identifies
8140      the remaining cases.  */
8141   if (pa_attr_length_indirect_call (insn) == 8)
8142     {
8143       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8144 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8145 	 variant of the B,L instruction can't be used on the SOM target.  */
8146       if (TARGET_PA_20 && !TARGET_SOM)
8147 	return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8148       else
8149 	return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8150     }
8151 
8152   /* Long millicode call, but we are not generating PIC or portable runtime
8153      code.  */
8154   if (pa_attr_length_indirect_call (insn) == 12)
8155     return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8156 
8157   /* Long millicode call for portable runtime.  */
8158   if (pa_attr_length_indirect_call (insn) == 16)
8159     return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8160 
8161   /* We need a long PIC call to $$dyncall.  */
8162   xoperands[0] = NULL_RTX;
8163   output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8164   if (TARGET_SOM || !TARGET_GAS)
8165     {
8166       xoperands[0] = gen_label_rtx ();
8167       output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands);
8168       targetm.asm_out.internal_label (asm_out_file, "L",
8169 				      CODE_LABEL_NUMBER (xoperands[0]));
8170       output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8171     }
8172   else
8173     {
8174       output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands);
8175       output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8176 		       xoperands);
8177     }
8178   output_asm_insn ("bv %%r0(%%r1)", xoperands);
8179   output_asm_insn ("ldo 12(%%r2),%%r2", xoperands);
8180   return "";
8181 }
8182 
8183 /* In HPUX 8.0's shared library scheme, special relocations are needed
8184    for function labels if they might be passed to a function
8185    in a shared library (because shared libraries don't live in code
8186    space), and special magic is needed to construct their address.  */
8187 
8188 void
8189 pa_encode_label (rtx sym)
8190 {
8191   const char *str = XSTR (sym, 0);
8192   int len = strlen (str) + 1;
8193   char *newstr, *p;
8194 
8195   p = newstr = XALLOCAVEC (char, len + 1);
8196   *p++ = '@';
8197   strcpy (p, str);
8198 
8199   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8200 }
8201 
8202 static void
8203 pa_encode_section_info (tree decl, rtx rtl, int first)
8204 {
8205   int old_referenced = 0;
8206 
8207   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8208     old_referenced
8209       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8210 
8211   default_encode_section_info (decl, rtl, first);
8212 
8213   if (first && TEXT_SPACE_P (decl))
8214     {
8215       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8216       if (TREE_CODE (decl) == FUNCTION_DECL)
8217 	pa_encode_label (XEXP (rtl, 0));
8218     }
8219   else if (old_referenced)
8220     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8221 }
8222 
8223 /* This is sort of inverse to pa_encode_section_info.  */
8224 
8225 static const char *
8226 pa_strip_name_encoding (const char *str)
8227 {
8228   str += (*str == '@');
8229   str += (*str == '*');
8230   return str;
8231 }
8232 
8233 /* Returns 1 if OP is a function label involved in a simple addition
8234    with a constant.  Used to keep certain patterns from matching
8235    during instruction combination.  */
8236 int
8237 pa_is_function_label_plus_const (rtx op)
8238 {
8239   /* Strip off any CONST.  */
8240   if (GET_CODE (op) == CONST)
8241     op = XEXP (op, 0);
8242 
8243   return (GET_CODE (op) == PLUS
8244 	  && function_label_operand (XEXP (op, 0), VOIDmode)
8245 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8246 }
8247 
8248 /* Output assembly code for a thunk to FUNCTION.  */
8249 
8250 static void
8251 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8252 			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8253 			tree function)
8254 {
8255   static unsigned int current_thunk_number;
8256   int val_14 = VAL_14_BITS_P (delta);
8257   unsigned int old_last_address = last_address, nbytes = 0;
8258   char label[16];
8259   rtx xoperands[4];
8260 
8261   xoperands[0] = XEXP (DECL_RTL (function), 0);
8262   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8263   xoperands[2] = GEN_INT (delta);
8264 
8265   final_start_function (emit_barrier (), file, 1);
8266 
8267   /* Output the thunk.  We know that the function is in the same
8268      translation unit (i.e., the same space) as the thunk, and that
8269      thunks are output after their method.  Thus, we don't need an
8270      external branch to reach the function.  With SOM and GAS,
8271      functions and thunks are effectively in different sections.
8272      Thus, we can always use a IA-relative branch and the linker
8273      will add a long branch stub if necessary.
8274 
8275      However, we have to be careful when generating PIC code on the
8276      SOM port to ensure that the sequence does not transfer to an
8277      import stub for the target function as this could clobber the
8278      return value saved at SP-24.  This would also apply to the
8279      32-bit linux port if the multi-space model is implemented.  */
8280   if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8281        && !(flag_pic && TREE_PUBLIC (function))
8282        && (TARGET_GAS || last_address < 262132))
8283       || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8284 	  && ((targetm_common.have_named_sections
8285 	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
8286 	       /* The GNU 64-bit linker has rather poor stub management.
8287 		  So, we use a long branch from thunks that aren't in
8288 		  the same section as the target function.  */
8289 	       && ((!TARGET_64BIT
8290 		    && (DECL_SECTION_NAME (thunk_fndecl)
8291 			!= DECL_SECTION_NAME (function)))
8292 		   || ((DECL_SECTION_NAME (thunk_fndecl)
8293 			== DECL_SECTION_NAME (function))
8294 		       && last_address < 262132)))
8295 	      /* In this case, we need to be able to reach the start of
8296 		 the stub table even though the function is likely closer
8297 		 and can be jumped to directly.  */
8298 	      || (targetm_common.have_named_sections
8299 		  && DECL_SECTION_NAME (thunk_fndecl) == NULL
8300 		  && DECL_SECTION_NAME (function) == NULL
8301 		  && total_code_bytes < MAX_PCREL17F_OFFSET)
8302 	      /* Likewise.  */
8303 	      || (!targetm_common.have_named_sections
8304 		  && total_code_bytes < MAX_PCREL17F_OFFSET))))
8305     {
8306       if (!val_14)
8307 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8308 
8309       output_asm_insn ("b %0", xoperands);
8310 
8311       if (val_14)
8312 	{
8313 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8314 	  nbytes += 8;
8315 	}
8316       else
8317 	{
8318 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8319 	  nbytes += 12;
8320 	}
8321     }
8322   else if (TARGET_64BIT)
8323     {
8324       /* We only have one call-clobbered scratch register, so we can't
8325          make use of the delay slot if delta doesn't fit in 14 bits.  */
8326       if (!val_14)
8327 	{
8328 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8329 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8330 	}
8331 
8332       output_asm_insn ("b,l .+8,%%r1", xoperands);
8333 
8334       if (TARGET_GAS)
8335 	{
8336 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8337 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8338 	}
8339       else
8340 	{
8341 	  xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8342 	  output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8343 	}
8344 
8345       if (val_14)
8346 	{
8347 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
8348 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8349 	  nbytes += 20;
8350 	}
8351       else
8352 	{
8353 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8354 	  nbytes += 24;
8355 	}
8356     }
8357   else if (TARGET_PORTABLE_RUNTIME)
8358     {
8359       output_asm_insn ("ldil L'%0,%%r1", xoperands);
8360       output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8361 
8362       if (!val_14)
8363 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8364 
8365       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8366 
8367       if (val_14)
8368 	{
8369 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8370 	  nbytes += 16;
8371 	}
8372       else
8373 	{
8374 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8375 	  nbytes += 20;
8376 	}
8377     }
8378   else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8379     {
8380       /* The function is accessible from outside this module.  The only
8381 	 way to avoid an import stub between the thunk and function is to
8382 	 call the function directly with an indirect sequence similar to
8383 	 that used by $$dyncall.  This is possible because $$dyncall acts
8384 	 as the import stub in an indirect call.  */
8385       ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8386       xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8387       output_asm_insn ("addil LT'%3,%%r19", xoperands);
8388       output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8389       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8390       output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8391       output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8392       output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8393       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8394 
8395       if (!val_14)
8396 	{
8397 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8398 	  nbytes += 4;
8399 	}
8400 
8401       if (TARGET_PA_20)
8402 	{
8403 	  output_asm_insn ("bve (%%r22)", xoperands);
8404 	  nbytes += 36;
8405 	}
8406       else if (TARGET_NO_SPACE_REGS)
8407 	{
8408 	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8409 	  nbytes += 36;
8410 	}
8411       else
8412 	{
8413 	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8414 	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8415 	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8416 	  nbytes += 44;
8417 	}
8418 
8419       if (val_14)
8420 	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8421       else
8422 	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8423     }
8424   else if (flag_pic)
8425     {
8426       output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8427 
8428       if (TARGET_SOM || !TARGET_GAS)
8429 	{
8430 	  output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8431 	  output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8432 	}
8433       else
8434 	{
8435 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8436 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8437 	}
8438 
8439       if (!val_14)
8440 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8441 
8442       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8443 
8444       if (val_14)
8445 	{
8446 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8447 	  nbytes += 20;
8448 	}
8449       else
8450 	{
8451 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8452 	  nbytes += 24;
8453 	}
8454     }
8455   else
8456     {
8457       if (!val_14)
8458 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8459 
8460       output_asm_insn ("ldil L'%0,%%r22", xoperands);
8461       output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8462 
8463       if (val_14)
8464 	{
8465 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8466 	  nbytes += 12;
8467 	}
8468       else
8469 	{
8470 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8471 	  nbytes += 16;
8472 	}
8473     }
8474 
8475   final_end_function ();
8476 
8477   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8478     {
8479       switch_to_section (data_section);
8480       output_asm_insn (".align 4", xoperands);
8481       ASM_OUTPUT_LABEL (file, label);
8482       output_asm_insn (".word P'%0", xoperands);
8483     }
8484 
8485   current_thunk_number++;
8486   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8487 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8488   last_address += nbytes;
8489   if (old_last_address > last_address)
8490     last_address = UINT_MAX;
8491   update_total_code_bytes (nbytes);
8492 }
8493 
8494 /* Only direct calls to static functions are allowed to be sibling (tail)
8495    call optimized.
8496 
8497    This restriction is necessary because some linker generated stubs will
8498    store return pointers into rp' in some cases which might clobber a
8499    live value already in rp'.
8500 
8501    In a sibcall the current function and the target function share stack
8502    space.  Thus if the path to the current function and the path to the
8503    target function save a value in rp', they save the value into the
8504    same stack slot, which has undesirable consequences.
8505 
8506    Because of the deferred binding nature of shared libraries any function
8507    with external scope could be in a different load module and thus require
8508    rp' to be saved when calling that function.  So sibcall optimizations
8509    can only be safe for static function.
8510 
8511    Note that GCC never needs return value relocations, so we don't have to
8512    worry about static calls with return value relocations (which require
8513    saving rp').
8514 
8515    It is safe to perform a sibcall optimization when the target function
8516    will never return.  */
8517 static bool
8518 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8519 {
8520   if (TARGET_PORTABLE_RUNTIME)
8521     return false;
8522 
8523   /* Sibcalls are not ok because the arg pointer register is not a fixed
8524      register.  This prevents the sibcall optimization from occurring.  In
8525      addition, there are problems with stub placement using GNU ld.  This
8526      is because a normal sibcall branch uses a 17-bit relocation while
8527      a regular call branch uses a 22-bit relocation.  As a result, more
8528      care needs to be taken in the placement of long-branch stubs.  */
8529   if (TARGET_64BIT)
8530     return false;
8531 
8532   /* Sibcalls are only ok within a translation unit.  */
8533   return (decl && !TREE_PUBLIC (decl));
8534 }
8535 
8536 /* ??? Addition is not commutative on the PA due to the weird implicit
8537    space register selection rules for memory addresses.  Therefore, we
8538    don't consider a + b == b + a, as this might be inside a MEM.  */
8539 static bool
8540 pa_commutative_p (const_rtx x, int outer_code)
8541 {
8542   return (COMMUTATIVE_P (x)
8543 	  && (TARGET_NO_SPACE_REGS
8544 	      || (outer_code != UNKNOWN && outer_code != MEM)
8545 	      || GET_CODE (x) != PLUS));
8546 }
8547 
8548 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8549    use in fmpyadd instructions.  */
8550 int
8551 pa_fmpyaddoperands (rtx *operands)
8552 {
8553   machine_mode mode = GET_MODE (operands[0]);
8554 
8555   /* Must be a floating point mode.  */
8556   if (mode != SFmode && mode != DFmode)
8557     return 0;
8558 
8559   /* All modes must be the same.  */
8560   if (! (mode == GET_MODE (operands[1])
8561 	 && mode == GET_MODE (operands[2])
8562 	 && mode == GET_MODE (operands[3])
8563 	 && mode == GET_MODE (operands[4])
8564 	 && mode == GET_MODE (operands[5])))
8565     return 0;
8566 
8567   /* All operands must be registers.  */
8568   if (! (GET_CODE (operands[1]) == REG
8569 	 && GET_CODE (operands[2]) == REG
8570 	 && GET_CODE (operands[3]) == REG
8571 	 && GET_CODE (operands[4]) == REG
8572 	 && GET_CODE (operands[5]) == REG))
8573     return 0;
8574 
8575   /* Only 2 real operands to the addition.  One of the input operands must
8576      be the same as the output operand.  */
8577   if (! rtx_equal_p (operands[3], operands[4])
8578       && ! rtx_equal_p (operands[3], operands[5]))
8579     return 0;
8580 
8581   /* Inout operand of add cannot conflict with any operands from multiply.  */
8582   if (rtx_equal_p (operands[3], operands[0])
8583      || rtx_equal_p (operands[3], operands[1])
8584      || rtx_equal_p (operands[3], operands[2]))
8585     return 0;
8586 
8587   /* multiply cannot feed into addition operands.  */
8588   if (rtx_equal_p (operands[4], operands[0])
8589       || rtx_equal_p (operands[5], operands[0]))
8590     return 0;
8591 
8592   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8593   if (mode == SFmode
8594       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8595 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8596 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8597 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8598 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8599 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8600     return 0;
8601 
8602   /* Passed.  Operands are suitable for fmpyadd.  */
8603   return 1;
8604 }
8605 
8606 #if !defined(USE_COLLECT2)
8607 static void
8608 pa_asm_out_constructor (rtx symbol, int priority)
8609 {
8610   if (!function_label_operand (symbol, VOIDmode))
8611     pa_encode_label (symbol);
8612 
8613 #ifdef CTORS_SECTION_ASM_OP
8614   default_ctor_section_asm_out_constructor (symbol, priority);
8615 #else
8616 # ifdef TARGET_ASM_NAMED_SECTION
8617   default_named_section_asm_out_constructor (symbol, priority);
8618 # else
8619   default_stabs_asm_out_constructor (symbol, priority);
8620 # endif
8621 #endif
8622 }
8623 
8624 static void
8625 pa_asm_out_destructor (rtx symbol, int priority)
8626 {
8627   if (!function_label_operand (symbol, VOIDmode))
8628     pa_encode_label (symbol);
8629 
8630 #ifdef DTORS_SECTION_ASM_OP
8631   default_dtor_section_asm_out_destructor (symbol, priority);
8632 #else
8633 # ifdef TARGET_ASM_NAMED_SECTION
8634   default_named_section_asm_out_destructor (symbol, priority);
8635 # else
8636   default_stabs_asm_out_destructor (symbol, priority);
8637 # endif
8638 #endif
8639 }
8640 #endif
8641 
8642 /* This function places uninitialized global data in the bss section.
8643    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8644    function on the SOM port to prevent uninitialized global data from
8645    being placed in the data section.  */
8646 
8647 void
8648 pa_asm_output_aligned_bss (FILE *stream,
8649 			   const char *name,
8650 			   unsigned HOST_WIDE_INT size,
8651 			   unsigned int align)
8652 {
8653   switch_to_section (bss_section);
8654   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8655 
8656 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8657   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8658 #endif
8659 
8660 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8661   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8662 #endif
8663 
8664   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8665   ASM_OUTPUT_LABEL (stream, name);
8666   fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8667 }
8668 
8669 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8670    that doesn't allow the alignment of global common storage to be directly
8671    specified.  The SOM linker aligns common storage based on the rounded
8672    value of the NUM_BYTES parameter in the .comm directive.  It's not
8673    possible to use the .align directive as it doesn't affect the alignment
8674    of the label associated with a .comm directive.  */
8675 
8676 void
8677 pa_asm_output_aligned_common (FILE *stream,
8678 			      const char *name,
8679 			      unsigned HOST_WIDE_INT size,
8680 			      unsigned int align)
8681 {
8682   unsigned int max_common_align;
8683 
8684   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8685   if (align > max_common_align)
8686     {
8687       warning (0, "alignment (%u) for %s exceeds maximum alignment "
8688 	       "for global common data.  Using %u",
8689 	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8690       align = max_common_align;
8691     }
8692 
8693   switch_to_section (bss_section);
8694 
8695   assemble_name (stream, name);
8696   fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8697            MAX (size, align / BITS_PER_UNIT));
8698 }
8699 
8700 /* We can't use .comm for local common storage as the SOM linker effectively
8701    treats the symbol as universal and uses the same storage for local symbols
8702    with the same name in different object files.  The .block directive
8703    reserves an uninitialized block of storage.  However, it's not common
8704    storage.  Fortunately, GCC never requests common storage with the same
8705    name in any given translation unit.  */
8706 
8707 void
8708 pa_asm_output_aligned_local (FILE *stream,
8709 			     const char *name,
8710 			     unsigned HOST_WIDE_INT size,
8711 			     unsigned int align)
8712 {
8713   switch_to_section (bss_section);
8714   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8715 
8716 #ifdef LOCAL_ASM_OP
8717   fprintf (stream, "%s", LOCAL_ASM_OP);
8718   assemble_name (stream, name);
8719   fprintf (stream, "\n");
8720 #endif
8721 
8722   ASM_OUTPUT_LABEL (stream, name);
8723   fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8724 }
8725 
8726 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8727    use in fmpysub instructions.  */
8728 int
8729 pa_fmpysuboperands (rtx *operands)
8730 {
8731   machine_mode mode = GET_MODE (operands[0]);
8732 
8733   /* Must be a floating point mode.  */
8734   if (mode != SFmode && mode != DFmode)
8735     return 0;
8736 
8737   /* All modes must be the same.  */
8738   if (! (mode == GET_MODE (operands[1])
8739 	 && mode == GET_MODE (operands[2])
8740 	 && mode == GET_MODE (operands[3])
8741 	 && mode == GET_MODE (operands[4])
8742 	 && mode == GET_MODE (operands[5])))
8743     return 0;
8744 
8745   /* All operands must be registers.  */
8746   if (! (GET_CODE (operands[1]) == REG
8747 	 && GET_CODE (operands[2]) == REG
8748 	 && GET_CODE (operands[3]) == REG
8749 	 && GET_CODE (operands[4]) == REG
8750 	 && GET_CODE (operands[5]) == REG))
8751     return 0;
8752 
8753   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8754      operation, so operands[4] must be the same as operand[3].  */
8755   if (! rtx_equal_p (operands[3], operands[4]))
8756     return 0;
8757 
8758   /* multiply cannot feed into subtraction.  */
8759   if (rtx_equal_p (operands[5], operands[0]))
8760     return 0;
8761 
8762   /* Inout operand of sub cannot conflict with any operands from multiply.  */
8763   if (rtx_equal_p (operands[3], operands[0])
8764      || rtx_equal_p (operands[3], operands[1])
8765      || rtx_equal_p (operands[3], operands[2]))
8766     return 0;
8767 
8768   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8769   if (mode == SFmode
8770       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8771 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8772 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8773 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8774 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8775 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8776     return 0;
8777 
8778   /* Passed.  Operands are suitable for fmpysub.  */
8779   return 1;
8780 }
8781 
8782 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8783    constants for shadd instructions.  */
8784 int
8785 pa_shadd_constant_p (int val)
8786 {
8787   if (val == 2 || val == 4 || val == 8)
8788     return 1;
8789   else
8790     return 0;
8791 }
8792 
8793 /* Return TRUE if INSN branches forward.  */
8794 
8795 static bool
8796 forward_branch_p (rtx_insn *insn)
8797 {
8798   rtx lab = JUMP_LABEL (insn);
8799 
8800   /* The INSN must have a jump label.  */
8801   gcc_assert (lab != NULL_RTX);
8802 
8803   if (INSN_ADDRESSES_SET_P ())
8804     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8805 
8806   while (insn)
8807     {
8808       if (insn == lab)
8809 	return true;
8810       else
8811 	insn = NEXT_INSN (insn);
8812     }
8813 
8814   return false;
8815 }
8816 
8817 /* Output an unconditional move and branch insn.  */
8818 
8819 const char *
8820 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8821 {
8822   int length = get_attr_length (insn);
8823 
8824   /* These are the cases in which we win.  */
8825   if (length == 4)
8826     return "mov%I1b,tr %1,%0,%2";
8827 
8828   /* None of the following cases win, but they don't lose either.  */
8829   if (length == 8)
8830     {
8831       if (dbr_sequence_length () == 0)
8832 	{
8833 	  /* Nothing in the delay slot, fake it by putting the combined
8834 	     insn (the copy or add) in the delay slot of a bl.  */
8835 	  if (GET_CODE (operands[1]) == CONST_INT)
8836 	    return "b %2\n\tldi %1,%0";
8837 	  else
8838 	    return "b %2\n\tcopy %1,%0";
8839 	}
8840       else
8841 	{
8842 	  /* Something in the delay slot, but we've got a long branch.  */
8843 	  if (GET_CODE (operands[1]) == CONST_INT)
8844 	    return "ldi %1,%0\n\tb %2";
8845 	  else
8846 	    return "copy %1,%0\n\tb %2";
8847 	}
8848     }
8849 
8850   if (GET_CODE (operands[1]) == CONST_INT)
8851     output_asm_insn ("ldi %1,%0", operands);
8852   else
8853     output_asm_insn ("copy %1,%0", operands);
8854   return pa_output_lbranch (operands[2], insn, 1);
8855 }
8856 
8857 /* Output an unconditional add and branch insn.  */
8858 
8859 const char *
8860 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8861 {
8862   int length = get_attr_length (insn);
8863 
8864   /* To make life easy we want operand0 to be the shared input/output
8865      operand and operand1 to be the readonly operand.  */
8866   if (operands[0] == operands[1])
8867     operands[1] = operands[2];
8868 
8869   /* These are the cases in which we win.  */
8870   if (length == 4)
8871     return "add%I1b,tr %1,%0,%3";
8872 
8873   /* None of the following cases win, but they don't lose either.  */
8874   if (length == 8)
8875     {
8876       if (dbr_sequence_length () == 0)
8877 	/* Nothing in the delay slot, fake it by putting the combined
8878 	   insn (the copy or add) in the delay slot of a bl.  */
8879 	return "b %3\n\tadd%I1 %1,%0,%0";
8880       else
8881 	/* Something in the delay slot, but we've got a long branch.  */
8882 	return "add%I1 %1,%0,%0\n\tb %3";
8883     }
8884 
8885   output_asm_insn ("add%I1 %1,%0,%0", operands);
8886   return pa_output_lbranch (operands[3], insn, 1);
8887 }
8888 
8889 /* We use this hook to perform a PA specific optimization which is difficult
8890    to do in earlier passes.  */
8891 
8892 static void
8893 pa_reorg (void)
8894 {
8895   remove_useless_addtr_insns (1);
8896 
8897   if (pa_cpu < PROCESSOR_8000)
8898     pa_combine_instructions ();
8899 }
8900 
8901 /* The PA has a number of odd instructions which can perform multiple
8902    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8903    it may be profitable to combine two instructions into one instruction
8904    with two outputs.  It's not profitable PA2.0 machines because the
8905    two outputs would take two slots in the reorder buffers.
8906 
8907    This routine finds instructions which can be combined and combines
8908    them.  We only support some of the potential combinations, and we
8909    only try common ways to find suitable instructions.
8910 
8911       * addb can add two registers or a register and a small integer
8912       and jump to a nearby (+-8k) location.  Normally the jump to the
8913       nearby location is conditional on the result of the add, but by
8914       using the "true" condition we can make the jump unconditional.
8915       Thus addb can perform two independent operations in one insn.
8916 
8917       * movb is similar to addb in that it can perform a reg->reg
8918       or small immediate->reg copy and jump to a nearby (+-8k location).
8919 
8920       * fmpyadd and fmpysub can perform a FP multiply and either an
8921       FP add or FP sub if the operands of the multiply and add/sub are
8922       independent (there are other minor restrictions).  Note both
8923       the fmpy and fadd/fsub can in theory move to better spots according
8924       to data dependencies, but for now we require the fmpy stay at a
8925       fixed location.
8926 
8927       * Many of the memory operations can perform pre & post updates
8928       of index registers.  GCC's pre/post increment/decrement addressing
8929       is far too simple to take advantage of all the possibilities.  This
8930       pass may not be suitable since those insns may not be independent.
8931 
8932       * comclr can compare two ints or an int and a register, nullify
8933       the following instruction and zero some other register.  This
8934       is more difficult to use as it's harder to find an insn which
8935       will generate a comclr than finding something like an unconditional
8936       branch.  (conditional moves & long branches create comclr insns).
8937 
8938       * Most arithmetic operations can conditionally skip the next
8939       instruction.  They can be viewed as "perform this operation
8940       and conditionally jump to this nearby location" (where nearby
8941       is an insns away).  These are difficult to use due to the
8942       branch length restrictions.  */
8943 
8944 static void
8945 pa_combine_instructions (void)
8946 {
8947   rtx_insn *anchor;
8948 
8949   /* This can get expensive since the basic algorithm is on the
8950      order of O(n^2) (or worse).  Only do it for -O2 or higher
8951      levels of optimization.  */
8952   if (optimize < 2)
8953     return;
8954 
8955   /* Walk down the list of insns looking for "anchor" insns which
8956      may be combined with "floating" insns.  As the name implies,
8957      "anchor" instructions don't move, while "floating" insns may
8958      move around.  */
8959   rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8960   rtx_insn *new_rtx = make_insn_raw (par);
8961 
8962   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8963     {
8964       enum attr_pa_combine_type anchor_attr;
8965       enum attr_pa_combine_type floater_attr;
8966 
8967       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8968 	 Also ignore any special USE insns.  */
8969       if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
8970 	  || GET_CODE (PATTERN (anchor)) == USE
8971 	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
8972 	continue;
8973 
8974       anchor_attr = get_attr_pa_combine_type (anchor);
8975       /* See if anchor is an insn suitable for combination.  */
8976       if (anchor_attr == PA_COMBINE_TYPE_FMPY
8977 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8978 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8979 	      && ! forward_branch_p (anchor)))
8980 	{
8981 	  rtx_insn *floater;
8982 
8983 	  for (floater = PREV_INSN (anchor);
8984 	       floater;
8985 	       floater = PREV_INSN (floater))
8986 	    {
8987 	      if (NOTE_P (floater)
8988 		  || (NONJUMP_INSN_P (floater)
8989 		      && (GET_CODE (PATTERN (floater)) == USE
8990 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
8991 		continue;
8992 
8993 	      /* Anything except a regular INSN will stop our search.  */
8994 	      if (! NONJUMP_INSN_P (floater))
8995 		{
8996 		  floater = NULL;
8997 		  break;
8998 		}
8999 
9000 	      /* See if FLOATER is suitable for combination with the
9001 		 anchor.  */
9002 	      floater_attr = get_attr_pa_combine_type (floater);
9003 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9004 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9005 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9006 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9007 		{
9008 		  /* If ANCHOR and FLOATER can be combined, then we're
9009 		     done with this pass.  */
9010 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9011 					SET_DEST (PATTERN (floater)),
9012 					XEXP (SET_SRC (PATTERN (floater)), 0),
9013 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9014 		    break;
9015 		}
9016 
9017 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9018 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9019 		{
9020 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9021 		    {
9022 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9023 					    SET_DEST (PATTERN (floater)),
9024 					XEXP (SET_SRC (PATTERN (floater)), 0),
9025 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9026 			break;
9027 		    }
9028 		  else
9029 		    {
9030 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9031 					    SET_DEST (PATTERN (floater)),
9032 					    SET_SRC (PATTERN (floater)),
9033 					    SET_SRC (PATTERN (floater))))
9034 			break;
9035 		    }
9036 		}
9037 	    }
9038 
9039 	  /* If we didn't find anything on the backwards scan try forwards.  */
9040 	  if (!floater
9041 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9042 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9043 	    {
9044 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9045 		{
9046 		  if (NOTE_P (floater)
9047 		      || (NONJUMP_INSN_P (floater)
9048 			  && (GET_CODE (PATTERN (floater)) == USE
9049 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9050 
9051 		    continue;
9052 
9053 		  /* Anything except a regular INSN will stop our search.  */
9054 		  if (! NONJUMP_INSN_P (floater))
9055 		    {
9056 		      floater = NULL;
9057 		      break;
9058 		    }
9059 
9060 		  /* See if FLOATER is suitable for combination with the
9061 		     anchor.  */
9062 		  floater_attr = get_attr_pa_combine_type (floater);
9063 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9064 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9065 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9066 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9067 		    {
9068 		      /* If ANCHOR and FLOATER can be combined, then we're
9069 			 done with this pass.  */
9070 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9071 					    SET_DEST (PATTERN (floater)),
9072 					    XEXP (SET_SRC (PATTERN (floater)),
9073 						  0),
9074 					    XEXP (SET_SRC (PATTERN (floater)),
9075 						  1)))
9076 			break;
9077 		    }
9078 		}
9079 	    }
9080 
9081 	  /* FLOATER will be nonzero if we found a suitable floating
9082 	     insn for combination with ANCHOR.  */
9083 	  if (floater
9084 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9085 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9086 	    {
9087 	      /* Emit the new instruction and delete the old anchor.  */
9088 	      emit_insn_before (gen_rtx_PARALLEL
9089 				(VOIDmode,
9090 				 gen_rtvec (2, PATTERN (anchor),
9091 					    PATTERN (floater))),
9092 				anchor);
9093 
9094 	      SET_INSN_DELETED (anchor);
9095 
9096 	      /* Emit a special USE insn for FLOATER, then delete
9097 		 the floating insn.  */
9098 	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9099 	      delete_insn (floater);
9100 
9101 	      continue;
9102 	    }
9103 	  else if (floater
9104 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9105 	    {
9106 	      rtx temp;
9107 	      /* Emit the new_jump instruction and delete the old anchor.  */
9108 	      temp
9109 		= emit_jump_insn_before (gen_rtx_PARALLEL
9110 					 (VOIDmode,
9111 					  gen_rtvec (2, PATTERN (anchor),
9112 						     PATTERN (floater))),
9113 					 anchor);
9114 
9115 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9116 	      SET_INSN_DELETED (anchor);
9117 
9118 	      /* Emit a special USE insn for FLOATER, then delete
9119 		 the floating insn.  */
9120 	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9121 	      delete_insn (floater);
9122 	      continue;
9123 	    }
9124 	}
9125     }
9126 }
9127 
9128 static int
9129 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9130 		  int reversed, rtx dest,
9131 		  rtx src1, rtx src2)
9132 {
9133   int insn_code_number;
9134   rtx_insn *start, *end;
9135 
9136   /* Create a PARALLEL with the patterns of ANCHOR and
9137      FLOATER, try to recognize it, then test constraints
9138      for the resulting pattern.
9139 
9140      If the pattern doesn't match or the constraints
9141      aren't met keep searching for a suitable floater
9142      insn.  */
9143   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9144   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9145   INSN_CODE (new_rtx) = -1;
9146   insn_code_number = recog_memoized (new_rtx);
9147   basic_block bb = BLOCK_FOR_INSN (anchor);
9148   if (insn_code_number < 0
9149       || (extract_insn (new_rtx),
9150 	  !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9151     return 0;
9152 
9153   if (reversed)
9154     {
9155       start = anchor;
9156       end = floater;
9157     }
9158   else
9159     {
9160       start = floater;
9161       end = anchor;
9162     }
9163 
9164   /* There's up to three operands to consider.  One
9165      output and two inputs.
9166 
9167      The output must not be used between FLOATER & ANCHOR
9168      exclusive.  The inputs must not be set between
9169      FLOATER and ANCHOR exclusive.  */
9170 
9171   if (reg_used_between_p (dest, start, end))
9172     return 0;
9173 
9174   if (reg_set_between_p (src1, start, end))
9175     return 0;
9176 
9177   if (reg_set_between_p (src2, start, end))
9178     return 0;
9179 
9180   /* If we get here, then everything is good.  */
9181   return 1;
9182 }
9183 
9184 /* Return nonzero if references for INSN are delayed.
9185 
9186    Millicode insns are actually function calls with some special
9187    constraints on arguments and register usage.
9188 
9189    Millicode calls always expect their arguments in the integer argument
9190    registers, and always return their result in %r29 (ret1).  They
9191    are expected to clobber their arguments, %r1, %r29, and the return
9192    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9193 
9194    This function tells reorg that the references to arguments and
9195    millicode calls do not appear to happen until after the millicode call.
9196    This allows reorg to put insns which set the argument registers into the
9197    delay slot of the millicode call -- thus they act more like traditional
9198    CALL_INSNs.
9199 
9200    Note we cannot consider side effects of the insn to be delayed because
9201    the branch and link insn will clobber the return pointer.  If we happened
9202    to use the return pointer in the delay slot of the call, then we lose.
9203 
9204    get_attr_type will try to recognize the given insn, so make sure to
9205    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9206    in particular.  */
9207 int
9208 pa_insn_refs_are_delayed (rtx_insn *insn)
9209 {
9210   return ((NONJUMP_INSN_P (insn)
9211 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9212 	   && GET_CODE (PATTERN (insn)) != USE
9213 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9214 	   && get_attr_type (insn) == TYPE_MILLI));
9215 }
9216 
9217 /* Promote the return value, but not the arguments.  */
9218 
9219 static machine_mode
9220 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9221                           machine_mode mode,
9222                           int *punsignedp ATTRIBUTE_UNUSED,
9223                           const_tree fntype ATTRIBUTE_UNUSED,
9224                           int for_return)
9225 {
9226   if (for_return == 0)
9227     return mode;
9228   return promote_mode (type, mode, punsignedp);
9229 }
9230 
9231 /* On the HP-PA the value is found in register(s) 28(-29), unless
9232    the mode is SF or DF. Then the value is returned in fr4 (32).
9233 
9234    This must perform the same promotions as PROMOTE_MODE, else promoting
9235    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9236 
9237    Small structures must be returned in a PARALLEL on PA64 in order
9238    to match the HP Compiler ABI.  */
9239 
9240 static rtx
9241 pa_function_value (const_tree valtype,
9242                    const_tree func ATTRIBUTE_UNUSED,
9243                    bool outgoing ATTRIBUTE_UNUSED)
9244 {
9245   machine_mode valmode;
9246 
9247   if (AGGREGATE_TYPE_P (valtype)
9248       || TREE_CODE (valtype) == COMPLEX_TYPE
9249       || TREE_CODE (valtype) == VECTOR_TYPE)
9250     {
9251       HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9252 
9253       /* Handle aggregates that fit exactly in a word or double word.  */
9254       if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9255 	return gen_rtx_REG (TYPE_MODE (valtype), 28);
9256 
9257       if (TARGET_64BIT)
9258 	{
9259           /* Aggregates with a size less than or equal to 128 bits are
9260 	     returned in GR 28(-29).  They are left justified.  The pad
9261 	     bits are undefined.  Larger aggregates are returned in
9262 	     memory.  */
9263 	  rtx loc[2];
9264 	  int i, offset = 0;
9265 	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9266 
9267 	  for (i = 0; i < ub; i++)
9268 	    {
9269 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9270 					  gen_rtx_REG (DImode, 28 + i),
9271 					  GEN_INT (offset));
9272 	      offset += 8;
9273 	    }
9274 
9275 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9276 	}
9277       else if (valsize > UNITS_PER_WORD)
9278 	{
9279 	  /* Aggregates 5 to 8 bytes in size are returned in general
9280 	     registers r28-r29 in the same manner as other non
9281 	     floating-point objects.  The data is right-justified and
9282 	     zero-extended to 64 bits.  This is opposite to the normal
9283 	     justification used on big endian targets and requires
9284 	     special treatment.  */
9285 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9286 				       gen_rtx_REG (DImode, 28), const0_rtx);
9287 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9288 	}
9289     }
9290 
9291   if ((INTEGRAL_TYPE_P (valtype)
9292        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9293       || POINTER_TYPE_P (valtype))
9294     valmode = word_mode;
9295   else
9296     valmode = TYPE_MODE (valtype);
9297 
9298   if (TREE_CODE (valtype) == REAL_TYPE
9299       && !AGGREGATE_TYPE_P (valtype)
9300       && TYPE_MODE (valtype) != TFmode
9301       && !TARGET_SOFT_FLOAT)
9302     return gen_rtx_REG (valmode, 32);
9303 
9304   return gen_rtx_REG (valmode, 28);
9305 }
9306 
9307 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9308 
9309 static rtx
9310 pa_libcall_value (machine_mode mode,
9311 		  const_rtx fun ATTRIBUTE_UNUSED)
9312 {
9313   if (! TARGET_SOFT_FLOAT
9314       && (mode == SFmode || mode == DFmode))
9315     return  gen_rtx_REG (mode, 32);
9316   else
9317     return  gen_rtx_REG (mode, 28);
9318 }
9319 
9320 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9321 
9322 static bool
9323 pa_function_value_regno_p (const unsigned int regno)
9324 {
9325   if (regno == 28
9326       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9327     return true;
9328 
9329   return false;
9330 }
9331 
9332 /* Update the data in CUM to advance over an argument
9333    of mode MODE and data type TYPE.
9334    (TYPE is null for libcalls where that information may not be available.)  */
9335 
9336 static void
9337 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9338 			 const_tree type, bool named ATTRIBUTE_UNUSED)
9339 {
9340   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9341   int arg_size = FUNCTION_ARG_SIZE (mode, type);
9342 
9343   cum->nargs_prototype--;
9344   cum->words += (arg_size
9345 		 + ((cum->words & 01)
9346 		    && type != NULL_TREE
9347 		    && arg_size > 1));
9348 }
9349 
9350 /* Return the location of a parameter that is passed in a register or NULL
9351    if the parameter has any component that is passed in memory.
9352 
9353    This is new code and will be pushed to into the net sources after
9354    further testing.
9355 
9356    ??? We might want to restructure this so that it looks more like other
9357    ports.  */
9358 static rtx
9359 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9360 		 const_tree type, bool named ATTRIBUTE_UNUSED)
9361 {
9362   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9363   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9364   int alignment = 0;
9365   int arg_size;
9366   int fpr_reg_base;
9367   int gpr_reg_base;
9368   rtx retval;
9369 
9370   if (mode == VOIDmode)
9371     return NULL_RTX;
9372 
9373   arg_size = FUNCTION_ARG_SIZE (mode, type);
9374 
9375   /* If this arg would be passed partially or totally on the stack, then
9376      this routine should return zero.  pa_arg_partial_bytes will
9377      handle arguments which are split between regs and stack slots if
9378      the ABI mandates split arguments.  */
9379   if (!TARGET_64BIT)
9380     {
9381       /* The 32-bit ABI does not split arguments.  */
9382       if (cum->words + arg_size > max_arg_words)
9383 	return NULL_RTX;
9384     }
9385   else
9386     {
9387       if (arg_size > 1)
9388 	alignment = cum->words & 1;
9389       if (cum->words + alignment >= max_arg_words)
9390 	return NULL_RTX;
9391     }
9392 
9393   /* The 32bit ABIs and the 64bit ABIs are rather different,
9394      particularly in their handling of FP registers.  We might
9395      be able to cleverly share code between them, but I'm not
9396      going to bother in the hope that splitting them up results
9397      in code that is more easily understood.  */
9398 
9399   if (TARGET_64BIT)
9400     {
9401       /* Advance the base registers to their current locations.
9402 
9403          Remember, gprs grow towards smaller register numbers while
9404 	 fprs grow to higher register numbers.  Also remember that
9405 	 although FP regs are 32-bit addressable, we pretend that
9406 	 the registers are 64-bits wide.  */
9407       gpr_reg_base = 26 - cum->words;
9408       fpr_reg_base = 32 + cum->words;
9409 
9410       /* Arguments wider than one word and small aggregates need special
9411 	 treatment.  */
9412       if (arg_size > 1
9413 	  || mode == BLKmode
9414 	  || (type && (AGGREGATE_TYPE_P (type)
9415 		       || TREE_CODE (type) == COMPLEX_TYPE
9416 		       || TREE_CODE (type) == VECTOR_TYPE)))
9417 	{
9418 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9419 	     and aggregates including complex numbers are aligned on
9420 	     128-bit boundaries.  The first eight 64-bit argument slots
9421 	     are associated one-to-one, with general registers r26
9422 	     through r19, and also with floating-point registers fr4
9423 	     through fr11.  Arguments larger than one word are always
9424 	     passed in general registers.
9425 
9426 	     Using a PARALLEL with a word mode register results in left
9427 	     justified data on a big-endian target.  */
9428 
9429 	  rtx loc[8];
9430 	  int i, offset = 0, ub = arg_size;
9431 
9432 	  /* Align the base register.  */
9433 	  gpr_reg_base -= alignment;
9434 
9435 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9436 	  for (i = 0; i < ub; i++)
9437 	    {
9438 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9439 					  gen_rtx_REG (DImode, gpr_reg_base),
9440 					  GEN_INT (offset));
9441 	      gpr_reg_base -= 1;
9442 	      offset += 8;
9443 	    }
9444 
9445 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9446 	}
9447      }
9448   else
9449     {
9450       /* If the argument is larger than a word, then we know precisely
9451 	 which registers we must use.  */
9452       if (arg_size > 1)
9453 	{
9454 	  if (cum->words)
9455 	    {
9456 	      gpr_reg_base = 23;
9457 	      fpr_reg_base = 38;
9458 	    }
9459 	  else
9460 	    {
9461 	      gpr_reg_base = 25;
9462 	      fpr_reg_base = 34;
9463 	    }
9464 
9465 	  /* Structures 5 to 8 bytes in size are passed in the general
9466 	     registers in the same manner as other non floating-point
9467 	     objects.  The data is right-justified and zero-extended
9468 	     to 64 bits.  This is opposite to the normal justification
9469 	     used on big endian targets and requires special treatment.
9470 	     We now define BLOCK_REG_PADDING to pad these objects.
9471 	     Aggregates, complex and vector types are passed in the same
9472 	     manner as structures.  */
9473 	  if (mode == BLKmode
9474 	      || (type && (AGGREGATE_TYPE_P (type)
9475 			   || TREE_CODE (type) == COMPLEX_TYPE
9476 			   || TREE_CODE (type) == VECTOR_TYPE)))
9477 	    {
9478 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9479 					   gen_rtx_REG (DImode, gpr_reg_base),
9480 					   const0_rtx);
9481 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9482 	    }
9483 	}
9484       else
9485         {
9486 	   /* We have a single word (32 bits).  A simple computation
9487 	      will get us the register #s we need.  */
9488 	   gpr_reg_base = 26 - cum->words;
9489 	   fpr_reg_base = 32 + 2 * cum->words;
9490 	}
9491     }
9492 
9493   /* Determine if the argument needs to be passed in both general and
9494      floating point registers.  */
9495   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9496        /* If we are doing soft-float with portable runtime, then there
9497 	  is no need to worry about FP regs.  */
9498        && !TARGET_SOFT_FLOAT
9499        /* The parameter must be some kind of scalar float, else we just
9500 	  pass it in integer registers.  */
9501        && GET_MODE_CLASS (mode) == MODE_FLOAT
9502        /* The target function must not have a prototype.  */
9503        && cum->nargs_prototype <= 0
9504        /* libcalls do not need to pass items in both FP and general
9505 	  registers.  */
9506        && type != NULL_TREE
9507        /* All this hair applies to "outgoing" args only.  This includes
9508 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9509        && !cum->incoming)
9510       /* Also pass outgoing floating arguments in both registers in indirect
9511 	 calls with the 32 bit ABI and the HP assembler since there is no
9512 	 way to the specify argument locations in static functions.  */
9513       || (!TARGET_64BIT
9514 	  && !TARGET_GAS
9515 	  && !cum->incoming
9516 	  && cum->indirect
9517 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9518     {
9519       retval
9520 	= gen_rtx_PARALLEL
9521 	    (mode,
9522 	     gen_rtvec (2,
9523 			gen_rtx_EXPR_LIST (VOIDmode,
9524 					   gen_rtx_REG (mode, fpr_reg_base),
9525 					   const0_rtx),
9526 			gen_rtx_EXPR_LIST (VOIDmode,
9527 					   gen_rtx_REG (mode, gpr_reg_base),
9528 					   const0_rtx)));
9529     }
9530   else
9531     {
9532       /* See if we should pass this parameter in a general register.  */
9533       if (TARGET_SOFT_FLOAT
9534 	  /* Indirect calls in the normal 32bit ABI require all arguments
9535 	     to be passed in general registers.  */
9536 	  || (!TARGET_PORTABLE_RUNTIME
9537 	      && !TARGET_64BIT
9538 	      && !TARGET_ELF32
9539 	      && cum->indirect)
9540 	  /* If the parameter is not a scalar floating-point parameter,
9541 	     then it belongs in GPRs.  */
9542 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9543 	  /* Structure with single SFmode field belongs in GPR.  */
9544 	  || (type && AGGREGATE_TYPE_P (type)))
9545 	retval = gen_rtx_REG (mode, gpr_reg_base);
9546       else
9547 	retval = gen_rtx_REG (mode, fpr_reg_base);
9548     }
9549   return retval;
9550 }
9551 
9552 /* Arguments larger than one word are double word aligned.  */
9553 
9554 static unsigned int
9555 pa_function_arg_boundary (machine_mode mode, const_tree type)
9556 {
9557   bool singleword = (type
9558 		     ? (integer_zerop (TYPE_SIZE (type))
9559 			|| !TREE_CONSTANT (TYPE_SIZE (type))
9560 			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9561 		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9562 
9563   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9564 }
9565 
9566 /* If this arg would be passed totally in registers or totally on the stack,
9567    then this routine should return zero.  */
9568 
9569 static int
9570 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9571 		      tree type, bool named ATTRIBUTE_UNUSED)
9572 {
9573   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9574   unsigned int max_arg_words = 8;
9575   unsigned int offset = 0;
9576 
9577   if (!TARGET_64BIT)
9578     return 0;
9579 
9580   if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9581     offset = 1;
9582 
9583   if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9584     /* Arg fits fully into registers.  */
9585     return 0;
9586   else if (cum->words + offset >= max_arg_words)
9587     /* Arg fully on the stack.  */
9588     return 0;
9589   else
9590     /* Arg is split.  */
9591     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9592 }
9593 
9594 
9595 /* A get_unnamed_section callback for switching to the text section.
9596 
9597    This function is only used with SOM.  Because we don't support
9598    named subspaces, we can only create a new subspace or switch back
9599    to the default text subspace.  */
9600 
9601 static void
9602 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9603 {
9604   gcc_assert (TARGET_SOM);
9605   if (TARGET_GAS)
9606     {
9607       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9608 	{
9609 	  /* We only want to emit a .nsubspa directive once at the
9610 	     start of the function.  */
9611 	  cfun->machine->in_nsubspa = 1;
9612 
9613 	  /* Create a new subspace for the text.  This provides
9614 	     better stub placement and one-only functions.  */
9615 	  if (cfun->decl
9616 	      && DECL_ONE_ONLY (cfun->decl)
9617 	      && !DECL_WEAK (cfun->decl))
9618 	    {
9619 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
9620 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9621 				     "ACCESS=44,SORT=24,COMDAT");
9622 	      return;
9623 	    }
9624 	}
9625       else
9626 	{
9627 	  /* There isn't a current function or the body of the current
9628 	     function has been completed.  So, we are changing to the
9629 	     text section to output debugging information.  Thus, we
9630 	     need to forget that we are in the text section so that
9631 	     varasm.c will call us when text_section is selected again.  */
9632 	  gcc_assert (!cfun || !cfun->machine
9633 		      || cfun->machine->in_nsubspa == 2);
9634 	  in_section = NULL;
9635 	}
9636       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9637       return;
9638     }
9639   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9640 }
9641 
9642 /* A get_unnamed_section callback for switching to comdat data
9643    sections.  This function is only used with SOM.  */
9644 
9645 static void
9646 som_output_comdat_data_section_asm_op (const void *data)
9647 {
9648   in_section = NULL;
9649   output_section_asm_op (data);
9650 }
9651 
9652 /* Implement TARGET_ASM_INITIALIZE_SECTIONS  */
9653 
9654 static void
9655 pa_som_asm_init_sections (void)
9656 {
9657   text_section
9658     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9659 
9660   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9661      is not being generated.  */
9662   som_readonly_data_section
9663     = get_unnamed_section (0, output_section_asm_op,
9664 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9665 
9666   /* When secondary definitions are not supported, SOM makes readonly
9667      data one-only by creating a new $LIT$ subspace in $TEXT$ with
9668      the comdat flag.  */
9669   som_one_only_readonly_data_section
9670     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9671 			   "\t.SPACE $TEXT$\n"
9672 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9673 			   "ACCESS=0x2c,SORT=16,COMDAT");
9674 
9675 
9676   /* When secondary definitions are not supported, SOM makes data one-only
9677      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9678   som_one_only_data_section
9679     = get_unnamed_section (SECTION_WRITE,
9680 			   som_output_comdat_data_section_asm_op,
9681 			   "\t.SPACE $PRIVATE$\n"
9682 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9683 			   "ACCESS=31,SORT=24,COMDAT");
9684 
9685   if (flag_tm)
9686     som_tm_clone_table_section
9687       = get_unnamed_section (0, output_section_asm_op,
9688 			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9689 
9690   /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9691      which reference data within the $TEXT$ space (for example constant
9692      strings in the $LIT$ subspace).
9693 
9694      The assemblers (GAS and HP as) both have problems with handling
9695      the difference of two symbols which is the other correct way to
9696      reference constant data during PIC code generation.
9697 
9698      So, there's no way to reference constant data which is in the
9699      $TEXT$ space during PIC generation.  Instead place all constant
9700      data into the $PRIVATE$ subspace (this reduces sharing, but it
9701      works correctly).  */
9702   readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9703 
9704   /* We must not have a reference to an external symbol defined in a
9705      shared library in a readonly section, else the SOM linker will
9706      complain.
9707 
9708      So, we force exception information into the data section.  */
9709   exception_section = data_section;
9710 }
9711 
9712 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
9713 
9714 static section *
9715 pa_som_tm_clone_table_section (void)
9716 {
9717   return som_tm_clone_table_section;
9718 }
9719 
9720 /* On hpux10, the linker will give an error if we have a reference
9721    in the read-only data section to a symbol defined in a shared
9722    library.  Therefore, expressions that might require a reloc can
9723    not be placed in the read-only data section.  */
9724 
9725 static section *
9726 pa_select_section (tree exp, int reloc,
9727 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9728 {
9729   if (TREE_CODE (exp) == VAR_DECL
9730       && TREE_READONLY (exp)
9731       && !TREE_THIS_VOLATILE (exp)
9732       && DECL_INITIAL (exp)
9733       && (DECL_INITIAL (exp) == error_mark_node
9734           || TREE_CONSTANT (DECL_INITIAL (exp)))
9735       && !reloc)
9736     {
9737       if (TARGET_SOM
9738 	  && DECL_ONE_ONLY (exp)
9739 	  && !DECL_WEAK (exp))
9740 	return som_one_only_readonly_data_section;
9741       else
9742 	return readonly_data_section;
9743     }
9744   else if (CONSTANT_CLASS_P (exp) && !reloc)
9745     return readonly_data_section;
9746   else if (TARGET_SOM
9747 	   && TREE_CODE (exp) == VAR_DECL
9748 	   && DECL_ONE_ONLY (exp)
9749 	   && !DECL_WEAK (exp))
9750     return som_one_only_data_section;
9751   else
9752     return data_section;
9753 }
9754 
9755 /* Implement pa_reloc_rw_mask.  */
9756 
9757 static int
9758 pa_reloc_rw_mask (void)
9759 {
9760   /* We force (const (plus (symbol) (const_int))) to memory when the
9761      const_int doesn't fit in a 14-bit integer.  The SOM linker can't
9762      handle this construct in read-only memory and we want to avoid
9763      this for ELF.  So, we always force an RTX needing relocation to
9764      the data section.  */
9765   return 3;
9766 }
9767 
9768 static void
9769 pa_globalize_label (FILE *stream, const char *name)
9770 {
9771   /* We only handle DATA objects here, functions are globalized in
9772      ASM_DECLARE_FUNCTION_NAME.  */
9773   if (! FUNCTION_NAME_P (name))
9774   {
9775     fputs ("\t.EXPORT ", stream);
9776     assemble_name (stream, name);
9777     fputs (",DATA\n", stream);
9778   }
9779 }
9780 
9781 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9782 
9783 static rtx
9784 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9785 		     int incoming ATTRIBUTE_UNUSED)
9786 {
9787   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9788 }
9789 
9790 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
9791 
9792 bool
9793 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9794 {
9795   /* SOM ABI says that objects larger than 64 bits are returned in memory.
9796      PA64 ABI says that objects larger than 128 bits are returned in memory.
9797      Note, int_size_in_bytes can return -1 if the size of the object is
9798      variable or larger than the maximum value that can be expressed as
9799      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9800      simplest way to handle variable and empty types is to pass them in
9801      memory.  This avoids problems in defining the boundaries of argument
9802      slots, allocating registers, etc.  */
9803   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9804 	  || int_size_in_bytes (type) <= 0);
9805 }
9806 
9807 /* Structure to hold declaration and name of external symbols that are
9808    emitted by GCC.  We generate a vector of these symbols and output them
9809    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9810    This avoids putting out names that are never really used.  */
9811 
9812 typedef struct GTY(()) extern_symbol
9813 {
9814   tree decl;
9815   const char *name;
9816 } extern_symbol;
9817 
9818 /* Define gc'd vector type for extern_symbol.  */
9819 
9820 /* Vector of extern_symbol pointers.  */
9821 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9822 
9823 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9824 /* Mark DECL (name NAME) as an external reference (assembler output
9825    file FILE).  This saves the names to output at the end of the file
9826    if actually referenced.  */
9827 
9828 void
9829 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9830 {
9831   gcc_assert (file == asm_out_file);
9832   extern_symbol p = {decl, name};
9833   vec_safe_push (extern_symbols, p);
9834 }
9835 
9836 /* Output text required at the end of an assembler file.
9837    This includes deferred plabels and .import directives for
9838    all external symbols that were actually referenced.  */
9839 
9840 static void
9841 pa_hpux_file_end (void)
9842 {
9843   unsigned int i;
9844   extern_symbol *p;
9845 
9846   if (!NO_DEFERRED_PROFILE_COUNTERS)
9847     output_deferred_profile_counters ();
9848 
9849   output_deferred_plabels ();
9850 
9851   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9852     {
9853       tree decl = p->decl;
9854 
9855       if (!TREE_ASM_WRITTEN (decl)
9856 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9857 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9858     }
9859 
9860   vec_free (extern_symbols);
9861 }
9862 #endif
9863 
9864 /* Return true if a change from mode FROM to mode TO for a register
9865    in register class RCLASS is invalid.  */
9866 
9867 bool
9868 pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9869 			     enum reg_class rclass)
9870 {
9871   if (from == to)
9872     return false;
9873 
9874   /* Reject changes to/from complex and vector modes.  */
9875   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9876       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9877     return true;
9878 
9879   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9880     return false;
9881 
9882   /* There is no way to load QImode or HImode values directly from
9883      memory.  SImode loads to the FP registers are not zero extended.
9884      On the 64-bit target, this conflicts with the definition of
9885      LOAD_EXTEND_OP.  Thus, we can't allow changing between modes
9886      with different sizes in the floating-point registers.  */
9887   if (MAYBE_FP_REG_CLASS_P (rclass))
9888     return true;
9889 
9890   /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9891      in specific sets of registers.  Thus, we cannot allow changing
9892      to a larger mode when it's larger than a word.  */
9893   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9894       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9895     return true;
9896 
9897   return false;
9898 }
9899 
9900 /* Returns TRUE if it is a good idea to tie two pseudo registers
9901    when one has mode MODE1 and one has mode MODE2.
9902    If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9903    for any hard reg, then this must be FALSE for correct output.
9904 
9905    We should return FALSE for QImode and HImode because these modes
9906    are not ok in the floating-point registers.  However, this prevents
9907    tieing these modes to SImode and DImode in the general registers.
9908    So, this isn't a good idea.  We rely on HARD_REGNO_MODE_OK and
9909    CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9910    in the floating-point registers.  */
9911 
9912 bool
9913 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9914 {
9915   /* Don't tie modes in different classes.  */
9916   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9917     return false;
9918 
9919   return true;
9920 }
9921 
9922 
9923 /* Length in units of the trampoline instruction code.  */
9924 
9925 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9926 
9927 
9928 /* Output assembler code for a block containing the constant parts
9929    of a trampoline, leaving space for the variable parts.\
9930 
9931    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9932    and then branches to the specified routine.
9933 
9934    This code template is copied from text segment to stack location
9935    and then patched with pa_trampoline_init to contain valid values,
9936    and then entered as a subroutine.
9937 
9938    It is best to keep this as small as possible to avoid having to
9939    flush multiple lines in the cache.  */
9940 
9941 static void
9942 pa_asm_trampoline_template (FILE *f)
9943 {
9944   if (!TARGET_64BIT)
9945     {
9946       fputs ("\tldw	36(%r22),%r21\n", f);
9947       fputs ("\tbb,>=,n	%r21,30,.+16\n", f);
9948       if (ASSEMBLER_DIALECT == 0)
9949 	fputs ("\tdepi	0,31,2,%r21\n", f);
9950       else
9951 	fputs ("\tdepwi	0,31,2,%r21\n", f);
9952       fputs ("\tldw	4(%r21),%r19\n", f);
9953       fputs ("\tldw	0(%r21),%r21\n", f);
9954       if (TARGET_PA_20)
9955 	{
9956 	  fputs ("\tbve	(%r21)\n", f);
9957 	  fputs ("\tldw	40(%r22),%r29\n", f);
9958 	  fputs ("\t.word	0\n", f);
9959 	  fputs ("\t.word	0\n", f);
9960 	}
9961       else
9962 	{
9963 	  fputs ("\tldsid	(%r21),%r1\n", f);
9964 	  fputs ("\tmtsp	%r1,%sr0\n", f);
9965 	  fputs ("\tbe	0(%sr0,%r21)\n", f);
9966 	  fputs ("\tldw	40(%r22),%r29\n", f);
9967 	}
9968       fputs ("\t.word	0\n", f);
9969       fputs ("\t.word	0\n", f);
9970       fputs ("\t.word	0\n", f);
9971       fputs ("\t.word	0\n", f);
9972     }
9973   else
9974     {
9975       fputs ("\t.dword 0\n", f);
9976       fputs ("\t.dword 0\n", f);
9977       fputs ("\t.dword 0\n", f);
9978       fputs ("\t.dword 0\n", f);
9979       fputs ("\tmfia	%r31\n", f);
9980       fputs ("\tldd	24(%r31),%r1\n", f);
9981       fputs ("\tldd	24(%r1),%r27\n", f);
9982       fputs ("\tldd	16(%r1),%r1\n", f);
9983       fputs ("\tbve	(%r1)\n", f);
9984       fputs ("\tldd	32(%r31),%r31\n", f);
9985       fputs ("\t.dword 0  ; fptr\n", f);
9986       fputs ("\t.dword 0  ; static link\n", f);
9987     }
9988 }
9989 
9990 /* Emit RTL insns to initialize the variable parts of a trampoline.
9991    FNADDR is an RTX for the address of the function's pure code.
9992    CXT is an RTX for the static chain value for the function.
9993 
9994    Move the function address to the trampoline template at offset 36.
9995    Move the static chain value to trampoline template at offset 40.
9996    Move the trampoline address to trampoline template at offset 44.
9997    Move r19 to trampoline template at offset 48.  The latter two
9998    words create a plabel for the indirect call to the trampoline.
9999 
10000    A similar sequence is used for the 64-bit port but the plabel is
10001    at the beginning of the trampoline.
10002 
10003    Finally, the cache entries for the trampoline code are flushed.
10004    This is necessary to ensure that the trampoline instruction sequence
10005    is written to memory prior to any attempts at prefetching the code
10006    sequence.  */
10007 
10008 static void
10009 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10010 {
10011   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10012   rtx start_addr = gen_reg_rtx (Pmode);
10013   rtx end_addr = gen_reg_rtx (Pmode);
10014   rtx line_length = gen_reg_rtx (Pmode);
10015   rtx r_tramp, tmp;
10016 
10017   emit_block_move (m_tramp, assemble_trampoline_template (),
10018 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10019   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10020 
10021   if (!TARGET_64BIT)
10022     {
10023       tmp = adjust_address (m_tramp, Pmode, 36);
10024       emit_move_insn (tmp, fnaddr);
10025       tmp = adjust_address (m_tramp, Pmode, 40);
10026       emit_move_insn (tmp, chain_value);
10027 
10028       /* Create a fat pointer for the trampoline.  */
10029       tmp = adjust_address (m_tramp, Pmode, 44);
10030       emit_move_insn (tmp, r_tramp);
10031       tmp = adjust_address (m_tramp, Pmode, 48);
10032       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10033 
10034       /* fdc and fic only use registers for the address to flush,
10035 	 they do not accept integer displacements.  We align the
10036 	 start and end addresses to the beginning of their respective
10037 	 cache lines to minimize the number of lines flushed.  */
10038       emit_insn (gen_andsi3 (start_addr, r_tramp,
10039 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10040       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10041 					     TRAMPOLINE_CODE_SIZE-1));
10042       emit_insn (gen_andsi3 (end_addr, tmp,
10043 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10044       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10045       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10046       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10047 				    gen_reg_rtx (Pmode),
10048 				    gen_reg_rtx (Pmode)));
10049     }
10050   else
10051     {
10052       tmp = adjust_address (m_tramp, Pmode, 56);
10053       emit_move_insn (tmp, fnaddr);
10054       tmp = adjust_address (m_tramp, Pmode, 64);
10055       emit_move_insn (tmp, chain_value);
10056 
10057       /* Create a fat pointer for the trampoline.  */
10058       tmp = adjust_address (m_tramp, Pmode, 16);
10059       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10060 							    r_tramp, 32)));
10061       tmp = adjust_address (m_tramp, Pmode, 24);
10062       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10063 
10064       /* fdc and fic only use registers for the address to flush,
10065 	 they do not accept integer displacements.  We align the
10066 	 start and end addresses to the beginning of their respective
10067 	 cache lines to minimize the number of lines flushed.  */
10068       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10069       emit_insn (gen_anddi3 (start_addr, tmp,
10070 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10071       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10072 					     TRAMPOLINE_CODE_SIZE - 1));
10073       emit_insn (gen_anddi3 (end_addr, tmp,
10074 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10075       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10076       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10077       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10078 				    gen_reg_rtx (Pmode),
10079 				    gen_reg_rtx (Pmode)));
10080     }
10081 
10082 #ifdef HAVE_ENABLE_EXECUTE_STACK
10083   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10084 		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10085 #endif
10086 }
10087 
10088 /* Perform any machine-specific adjustment in the address of the trampoline.
10089    ADDR contains the address that was passed to pa_trampoline_init.
10090    Adjust the trampoline address to point to the plabel at offset 44.  */
10091 
10092 static rtx
10093 pa_trampoline_adjust_address (rtx addr)
10094 {
10095   if (!TARGET_64BIT)
10096     addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10097   return addr;
10098 }
10099 
10100 static rtx
10101 pa_delegitimize_address (rtx orig_x)
10102 {
10103   rtx x = delegitimize_mem_from_attrs (orig_x);
10104 
10105   if (GET_CODE (x) == LO_SUM
10106       && GET_CODE (XEXP (x, 1)) == UNSPEC
10107       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10108     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10109   return x;
10110 }
10111 
10112 static rtx
10113 pa_internal_arg_pointer (void)
10114 {
10115   /* The argument pointer and the hard frame pointer are the same in
10116      the 32-bit runtime, so we don't need a copy.  */
10117   if (TARGET_64BIT)
10118     return copy_to_reg (virtual_incoming_args_rtx);
10119   else
10120     return virtual_incoming_args_rtx;
10121 }
10122 
10123 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10124    Frame pointer elimination is automatically handled.  */
10125 
10126 static bool
10127 pa_can_eliminate (const int from, const int to)
10128 {
10129   /* The argument cannot be eliminated in the 64-bit runtime.  */
10130   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10131     return false;
10132 
10133   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10134           ? ! frame_pointer_needed
10135           : true);
10136 }
10137 
10138 /* Define the offset between two registers, FROM to be eliminated and its
10139    replacement TO, at the start of a routine.  */
10140 HOST_WIDE_INT
10141 pa_initial_elimination_offset (int from, int to)
10142 {
10143   HOST_WIDE_INT offset;
10144 
10145   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10146       && to == STACK_POINTER_REGNUM)
10147     offset = -pa_compute_frame_size (get_frame_size (), 0);
10148   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10149     offset = 0;
10150   else
10151     gcc_unreachable ();
10152 
10153   return offset;
10154 }
10155 
10156 static void
10157 pa_conditional_register_usage (void)
10158 {
10159   int i;
10160 
10161   if (!TARGET_64BIT && !TARGET_PA_11)
10162     {
10163       for (i = 56; i <= FP_REG_LAST; i++)
10164 	fixed_regs[i] = call_used_regs[i] = 1;
10165       for (i = 33; i < 56; i += 2)
10166 	fixed_regs[i] = call_used_regs[i] = 1;
10167     }
10168   if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10169     {
10170       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10171 	fixed_regs[i] = call_used_regs[i] = 1;
10172     }
10173   if (flag_pic)
10174     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10175 }
10176 
10177 /* Target hook for c_mode_for_suffix.  */
10178 
10179 static machine_mode
10180 pa_c_mode_for_suffix (char suffix)
10181 {
10182   if (HPUX_LONG_DOUBLE_LIBRARY)
10183     {
10184       if (suffix == 'q')
10185 	return TFmode;
10186     }
10187 
10188   return VOIDmode;
10189 }
10190 
10191 /* Target hook for function_section.  */
10192 
10193 static section *
10194 pa_function_section (tree decl, enum node_frequency freq,
10195 		     bool startup, bool exit)
10196 {
10197   /* Put functions in text section if target doesn't have named sections.  */
10198   if (!targetm_common.have_named_sections)
10199     return text_section;
10200 
10201   /* Force nested functions into the same section as the containing
10202      function.  */
10203   if (decl
10204       && DECL_SECTION_NAME (decl) == NULL
10205       && DECL_CONTEXT (decl) != NULL_TREE
10206       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10207       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10208     return function_section (DECL_CONTEXT (decl));
10209 
10210   /* Otherwise, use the default function section.  */
10211   return default_function_section (decl, freq, startup, exit);
10212 }
10213 
10214 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10215 
10216    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10217    that need more than three instructions to load prior to reload.  This
10218    limit is somewhat arbitrary.  It takes three instructions to load a
10219    CONST_INT from memory but two are memory accesses.  It may be better
10220    to increase the allowed range for CONST_INTS.  We may also be able
10221    to handle CONST_DOUBLES.  */
10222 
10223 static bool
10224 pa_legitimate_constant_p (machine_mode mode, rtx x)
10225 {
10226   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10227     return false;
10228 
10229   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10230     return false;
10231 
10232   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10233      legitimate constants.  The other variants can't be handled by
10234      the move patterns after reload starts.  */
10235   if (tls_referenced_p (x))
10236     return false;
10237 
10238   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10239     return false;
10240 
10241   if (TARGET_64BIT
10242       && HOST_BITS_PER_WIDE_INT > 32
10243       && GET_CODE (x) == CONST_INT
10244       && !reload_in_progress
10245       && !reload_completed
10246       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10247       && !pa_cint_ok_for_move (INTVAL (x)))
10248     return false;
10249 
10250   if (function_label_operand (x, mode))
10251     return false;
10252 
10253   return true;
10254 }
10255 
10256 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10257 
10258 static unsigned int
10259 pa_section_type_flags (tree decl, const char *name, int reloc)
10260 {
10261   unsigned int flags;
10262 
10263   flags = default_section_type_flags (decl, name, reloc);
10264 
10265   /* Function labels are placed in the constant pool.  This can
10266      cause a section conflict if decls are put in ".data.rel.ro"
10267      or ".data.rel.ro.local" using the __attribute__ construct.  */
10268   if (strcmp (name, ".data.rel.ro") == 0
10269       || strcmp (name, ".data.rel.ro.local") == 0)
10270     flags |= SECTION_WRITE | SECTION_RELRO;
10271 
10272   return flags;
10273 }
10274 
10275 /* pa_legitimate_address_p recognizes an RTL expression that is a
10276    valid memory address for an instruction.  The MODE argument is the
10277    machine mode for the MEM expression that wants to use this address.
10278 
10279    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10280    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10281    available with floating point loads and stores, and integer loads.
10282    We get better code by allowing indexed addresses in the initial
10283    RTL generation.
10284 
10285    The acceptance of indexed addresses as legitimate implies that we
10286    must provide patterns for doing indexed integer stores, or the move
10287    expanders must force the address of an indexed store to a register.
10288    We have adopted the latter approach.
10289 
10290    Another function of pa_legitimate_address_p is to ensure that
10291    the base register is a valid pointer for indexed instructions.
10292    On targets that have non-equivalent space registers, we have to
10293    know at the time of assembler output which register in a REG+REG
10294    pair is the base register.  The REG_POINTER flag is sometimes lost
10295    in reload and the following passes, so it can't be relied on during
10296    code generation.  Thus, we either have to canonicalize the order
10297    of the registers in REG+REG indexed addresses, or treat REG+REG
10298    addresses separately and provide patterns for both permutations.
10299 
10300    The latter approach requires several hundred additional lines of
10301    code in pa.md.  The downside to canonicalizing is that a PLUS
10302    in the wrong order can't combine to form to make a scaled indexed
10303    memory operand.  As we won't need to canonicalize the operands if
10304    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10305 
10306    We initially break out scaled indexed addresses in canonical order
10307    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10308    scaled indexed addresses during RTL generation.  However, fold_rtx
10309    has its own opinion on how the operands of a PLUS should be ordered.
10310    If one of the operands is equivalent to a constant, it will make
10311    that operand the second operand.  As the base register is likely to
10312    be equivalent to a SYMBOL_REF, we have made it the second operand.
10313 
10314    pa_legitimate_address_p accepts REG+REG as legitimate when the
10315    operands are in the order INDEX+BASE on targets with non-equivalent
10316    space registers, and in any order on targets with equivalent space
10317    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10318 
10319    We treat a SYMBOL_REF as legitimate if it is part of the current
10320    function's constant-pool, because such addresses can actually be
10321    output as REG+SMALLINT.  */
10322 
10323 static bool
10324 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10325 {
10326   if ((REG_P (x)
10327        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10328 		  : REG_OK_FOR_BASE_P (x)))
10329       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10330 	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10331 	  && REG_P (XEXP (x, 0))
10332 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10333 		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10334     return true;
10335 
10336   if (GET_CODE (x) == PLUS)
10337     {
10338       rtx base, index;
10339 
10340       /* For REG+REG, the base register should be in XEXP (x, 1),
10341 	 so check it first.  */
10342       if (REG_P (XEXP (x, 1))
10343 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10344 		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10345 	base = XEXP (x, 1), index = XEXP (x, 0);
10346       else if (REG_P (XEXP (x, 0))
10347 	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10348 			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10349 	base = XEXP (x, 0), index = XEXP (x, 1);
10350       else
10351 	return false;
10352 
10353       if (GET_CODE (index) == CONST_INT)
10354 	{
10355 	  if (INT_5_BITS (index))
10356 	    return true;
10357 
10358 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10359 	     to adjust the displacement of SImode and DImode floating point
10360 	     instructions but this may fail when the register also needs
10361 	     reloading.  So, we return false when STRICT is true.  We
10362 	     also reject long displacements for float mode addresses since
10363 	     the majority of accesses will use floating point instructions
10364 	     that don't support 14-bit offsets.  */
10365 	  if (!INT14_OK_STRICT
10366 	      && (strict || !(reload_in_progress || reload_completed))
10367 	      && mode != QImode
10368 	      && mode != HImode)
10369 	    return false;
10370 
10371 	  return base14_operand (index, mode);
10372 	}
10373 
10374       if (!TARGET_DISABLE_INDEXING
10375 	  /* Only accept the "canonical" INDEX+BASE operand order
10376 	     on targets with non-equivalent space registers.  */
10377 	  && (TARGET_NO_SPACE_REGS
10378 	      ? REG_P (index)
10379 	      : (base == XEXP (x, 1) && REG_P (index)
10380 		 && (reload_completed
10381 		     || (reload_in_progress && HARD_REGISTER_P (base))
10382 		     || REG_POINTER (base))
10383 		 && (reload_completed
10384 		     || (reload_in_progress && HARD_REGISTER_P (index))
10385 		     || !REG_POINTER (index))))
10386 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10387 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10388 		     : REG_OK_FOR_INDEX_P (index))
10389 	  && borx_reg_operand (base, Pmode)
10390 	  && borx_reg_operand (index, Pmode))
10391 	return true;
10392 
10393       if (!TARGET_DISABLE_INDEXING
10394 	  && GET_CODE (index) == MULT
10395 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10396 	  && REG_P (XEXP (index, 0))
10397 	  && GET_MODE (XEXP (index, 0)) == Pmode
10398 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10399 		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10400 	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10401 	  && INTVAL (XEXP (index, 1))
10402 	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10403 	  && borx_reg_operand (base, Pmode))
10404 	return true;
10405 
10406       return false;
10407     }
10408 
10409   if (GET_CODE (x) == LO_SUM)
10410     {
10411       rtx y = XEXP (x, 0);
10412 
10413       if (GET_CODE (y) == SUBREG)
10414 	y = SUBREG_REG (y);
10415 
10416       if (REG_P (y)
10417 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10418 		     : REG_OK_FOR_BASE_P (y)))
10419 	{
10420 	  /* Needed for -fPIC */
10421 	  if (mode == Pmode
10422 	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10423 	    return true;
10424 
10425 	  if (!INT14_OK_STRICT
10426 	      && (strict || !(reload_in_progress || reload_completed))
10427 	      && mode != QImode
10428 	      && mode != HImode)
10429 	    return false;
10430 
10431 	  if (CONSTANT_P (XEXP (x, 1)))
10432 	    return true;
10433 	}
10434       return false;
10435     }
10436 
10437   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10438     return true;
10439 
10440   return false;
10441 }
10442 
10443 /* Look for machine dependent ways to make the invalid address AD a
10444    valid address.
10445 
10446    For the PA, transform:
10447 
10448         memory(X + <large int>)
10449 
10450    into:
10451 
10452         if (<large int> & mask) >= 16
10453           Y = (<large int> & ~mask) + mask + 1  Round up.
10454         else
10455           Y = (<large int> & ~mask)             Round down.
10456         Z = X + Y
10457         memory (Z + (<large int> - Y));
10458 
10459    This makes reload inheritance and reload_cse work better since Z
10460    can be reused.
10461 
10462    There may be more opportunities to improve code with this hook.  */
10463 
10464 rtx
10465 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10466 			      int opnum, int type,
10467 			      int ind_levels ATTRIBUTE_UNUSED)
10468 {
10469   long offset, newoffset, mask;
10470   rtx new_rtx, temp = NULL_RTX;
10471 
10472   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10473 	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10474 
10475   if (optimize && GET_CODE (ad) == PLUS)
10476     temp = simplify_binary_operation (PLUS, Pmode,
10477 				      XEXP (ad, 0), XEXP (ad, 1));
10478 
10479   new_rtx = temp ? temp : ad;
10480 
10481   if (optimize
10482       && GET_CODE (new_rtx) == PLUS
10483       && GET_CODE (XEXP (new_rtx, 0)) == REG
10484       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10485     {
10486       offset = INTVAL (XEXP ((new_rtx), 1));
10487 
10488       /* Choose rounding direction.  Round up if we are >= halfway.  */
10489       if ((offset & mask) >= ((mask + 1) / 2))
10490 	newoffset = (offset & ~mask) + mask + 1;
10491       else
10492 	newoffset = offset & ~mask;
10493 
10494       /* Ensure that long displacements are aligned.  */
10495       if (mask == 0x3fff
10496 	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10497 	      || (TARGET_64BIT && (mode) == DImode)))
10498 	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10499 
10500       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10501 	{
10502 	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10503 			       GEN_INT (newoffset));
10504 	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10505 	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10506 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10507 		       opnum, (enum reload_type) type);
10508 	  return ad;
10509 	}
10510     }
10511 
10512   return NULL_RTX;
10513 }
10514 
10515 /* Output address vector.  */
10516 
10517 void
10518 pa_output_addr_vec (rtx lab, rtx body)
10519 {
10520   int idx, vlen = XVECLEN (body, 0);
10521 
10522   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10523   if (TARGET_GAS)
10524     fputs ("\t.begin_brtab\n", asm_out_file);
10525   for (idx = 0; idx < vlen; idx++)
10526     {
10527       ASM_OUTPUT_ADDR_VEC_ELT
10528 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10529     }
10530   if (TARGET_GAS)
10531     fputs ("\t.end_brtab\n", asm_out_file);
10532 }
10533 
10534 /* Output address difference vector.  */
10535 
10536 void
10537 pa_output_addr_diff_vec (rtx lab, rtx body)
10538 {
10539   rtx base = XEXP (XEXP (body, 0), 0);
10540   int idx, vlen = XVECLEN (body, 1);
10541 
10542   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10543   if (TARGET_GAS)
10544     fputs ("\t.begin_brtab\n", asm_out_file);
10545   for (idx = 0; idx < vlen; idx++)
10546     {
10547       ASM_OUTPUT_ADDR_DIFF_ELT
10548 	(asm_out_file,
10549 	 body,
10550 	 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10551 	 CODE_LABEL_NUMBER (base));
10552     }
10553   if (TARGET_GAS)
10554     fputs ("\t.end_brtab\n", asm_out_file);
10555 }
10556 
10557 /* This is a helper function for the other atomic operations.  This function
10558    emits a loop that contains SEQ that iterates until a compare-and-swap
10559    operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
10560    a set of instructions that takes a value from OLD_REG as an input and
10561    produces a value in NEW_REG as an output.  Before SEQ, OLD_REG will be
10562    set to the current contents of MEM.  After SEQ, a compare-and-swap will
10563    attempt to update MEM with NEW_REG.  The function returns true when the
10564    loop was generated successfully.  */
10565 
10566 static bool
10567 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10568 {
10569   machine_mode mode = GET_MODE (mem);
10570   rtx_code_label *label;
10571   rtx cmp_reg, success, oldval;
10572 
10573   /* The loop we want to generate looks like
10574 
10575         cmp_reg = mem;
10576       label:
10577         old_reg = cmp_reg;
10578         seq;
10579         (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10580         if (success)
10581           goto label;
10582 
10583      Note that we only do the plain load from memory once.  Subsequent
10584      iterations use the value loaded by the compare-and-swap pattern.  */
10585 
10586   label = gen_label_rtx ();
10587   cmp_reg = gen_reg_rtx (mode);
10588 
10589   emit_move_insn (cmp_reg, mem);
10590   emit_label (label);
10591   emit_move_insn (old_reg, cmp_reg);
10592   if (seq)
10593     emit_insn (seq);
10594 
10595   success = NULL_RTX;
10596   oldval = cmp_reg;
10597   if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10598                                        new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10599                                        MEMMODEL_RELAXED))
10600     return false;
10601 
10602   if (oldval != cmp_reg)
10603     emit_move_insn (cmp_reg, oldval);
10604 
10605   /* Mark this jump predicted not taken.  */
10606   emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10607                            GET_MODE (success), 1, label, 0);
10608   return true;
10609 }
10610 
10611 /* This function tries to implement an atomic exchange operation using a
10612    compare_and_swap loop. VAL is written to *MEM.  The previous contents of
10613    *MEM are returned, using TARGET if possible.  No memory model is required
10614    since a compare_and_swap loop is seq-cst.  */
10615 
10616 rtx
10617 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10618 {
10619   machine_mode mode = GET_MODE (mem);
10620 
10621   if (can_compare_and_swap_p (mode, true))
10622     {
10623       if (!target || !register_operand (target, mode))
10624         target = gen_reg_rtx (mode);
10625       if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10626         return target;
10627     }
10628 
10629   return NULL_RTX;
10630 }
10631 
10632 #include "gt-pa.h"
10633