xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/pa/pa.c (revision 63aea4bd5b445e491ff0389fe27ec78b3099dba3)
1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992-2013 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "tree.h"
33 #include "output.h"
34 #include "dbxout.h"
35 #include "except.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "reload.h"
39 #include "function.h"
40 #include "diagnostic-core.h"
41 #include "ggc.h"
42 #include "recog.h"
43 #include "predict.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "common/common-target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "df.h"
50 #include "opts.h"
51 
52 /* Return nonzero if there is a bypass for the output of
53    OUT_INSN and the fp store IN_INSN.  */
54 int
55 pa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
56 {
57   enum machine_mode store_mode;
58   enum machine_mode other_mode;
59   rtx set;
60 
61   if (recog_memoized (in_insn) < 0
62       || (get_attr_type (in_insn) != TYPE_FPSTORE
63 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64       || recog_memoized (out_insn) < 0)
65     return 0;
66 
67   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
68 
69   set = single_set (out_insn);
70   if (!set)
71     return 0;
72 
73   other_mode = GET_MODE (SET_SRC (set));
74 
75   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
76 }
77 
78 
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
82 #else
83 #define DO_FRAME_NOTES 0
84 #endif
85 #endif
86 
87 static void pa_option_override (void);
88 static void copy_reg_pointer (rtx, rtx);
89 static void fix_range (const char *);
90 static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
91 				    reg_class_t);
92 static int hppa_address_cost (rtx, enum machine_mode mode, addr_space_t, bool);
93 static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
94 static inline rtx force_mode (enum machine_mode, rtx);
95 static void pa_reorg (void);
96 static void pa_combine_instructions (void);
97 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
98 static bool forward_branch_p (rtx);
99 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
100 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
101 static int compute_movmem_length (rtx);
102 static int compute_clrmem_length (rtx);
103 static bool pa_assemble_integer (rtx, unsigned int, int);
104 static void remove_useless_addtr_insns (int);
105 static void store_reg (int, HOST_WIDE_INT, int);
106 static void store_reg_modify (int, int, HOST_WIDE_INT);
107 static void load_reg (int, HOST_WIDE_INT, int);
108 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
109 static rtx pa_function_value (const_tree, const_tree, bool);
110 static rtx pa_libcall_value (enum machine_mode, const_rtx);
111 static bool pa_function_value_regno_p (const unsigned int);
112 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
113 static void update_total_code_bytes (unsigned int);
114 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
115 static int pa_adjust_cost (rtx, rtx, rtx, int);
116 static int pa_adjust_priority (rtx, int);
117 static int pa_issue_rate (void);
118 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
119 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
120 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
121      ATTRIBUTE_UNUSED;
122 static void pa_encode_section_info (tree, rtx, int);
123 static const char *pa_strip_name_encoding (const char *);
124 static bool pa_function_ok_for_sibcall (tree, tree);
125 static void pa_globalize_label (FILE *, const char *)
126      ATTRIBUTE_UNUSED;
127 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
128 				    HOST_WIDE_INT, tree);
129 #if !defined(USE_COLLECT2)
130 static void pa_asm_out_constructor (rtx, int);
131 static void pa_asm_out_destructor (rtx, int);
132 #endif
133 static void pa_init_builtins (void);
134 static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int);
135 static rtx hppa_builtin_saveregs (void);
136 static void hppa_va_start (tree, rtx);
137 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
138 static bool pa_scalar_mode_supported_p (enum machine_mode);
139 static bool pa_commutative_p (const_rtx x, int outer_code);
140 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
141 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
142 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
143 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
144 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
145 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
146 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
147 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
148 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
149 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
150 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
151 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
152 static void output_deferred_plabels (void);
153 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
154 #ifdef ASM_OUTPUT_EXTERNAL_REAL
155 static void pa_hpux_file_end (void);
156 #endif
157 static void pa_init_libfuncs (void);
158 static rtx pa_struct_value_rtx (tree, int);
159 static bool pa_pass_by_reference (cumulative_args_t, enum machine_mode,
160 				  const_tree, bool);
161 static int pa_arg_partial_bytes (cumulative_args_t, enum machine_mode,
162 				 tree, bool);
163 static void pa_function_arg_advance (cumulative_args_t, enum machine_mode,
164 				     const_tree, bool);
165 static rtx pa_function_arg (cumulative_args_t, enum machine_mode,
166 			    const_tree, bool);
167 static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
168 static struct machine_function * pa_init_machine_status (void);
169 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
170 					enum machine_mode,
171 					secondary_reload_info *);
172 static void pa_extra_live_on_entry (bitmap);
173 static enum machine_mode pa_promote_function_mode (const_tree,
174 						   enum machine_mode, int *,
175 						   const_tree, int);
176 
177 static void pa_asm_trampoline_template (FILE *);
178 static void pa_trampoline_init (rtx, tree, rtx);
179 static rtx pa_trampoline_adjust_address (rtx);
180 static rtx pa_delegitimize_address (rtx);
181 static bool pa_print_operand_punct_valid_p (unsigned char);
182 static rtx pa_internal_arg_pointer (void);
183 static bool pa_can_eliminate (const int, const int);
184 static void pa_conditional_register_usage (void);
185 static enum machine_mode pa_c_mode_for_suffix (char);
186 static section *pa_function_section (tree, enum node_frequency, bool, bool);
187 static bool pa_cannot_force_const_mem (enum machine_mode, rtx);
188 static bool pa_legitimate_constant_p (enum machine_mode, rtx);
189 static unsigned int pa_section_type_flags (tree, const char *, int);
190 static bool pa_legitimate_address_p (enum machine_mode, rtx, bool);
191 
192 /* The following extra sections are only used for SOM.  */
193 static GTY(()) section *som_readonly_data_section;
194 static GTY(()) section *som_one_only_readonly_data_section;
195 static GTY(()) section *som_one_only_data_section;
196 static GTY(()) section *som_tm_clone_table_section;
197 
198 /* Counts for the number of callee-saved general and floating point
199    registers which were saved by the current function's prologue.  */
200 static int gr_saved, fr_saved;
201 
202 /* Boolean indicating whether the return pointer was saved by the
203    current function's prologue.  */
204 static bool rp_saved;
205 
206 static rtx find_addr_reg (rtx);
207 
208 /* Keep track of the number of bytes we have output in the CODE subspace
209    during this compilation so we'll know when to emit inline long-calls.  */
210 unsigned long total_code_bytes;
211 
212 /* The last address of the previous function plus the number of bytes in
213    associated thunks that have been output.  This is used to determine if
214    a thunk can use an IA-relative branch to reach its target function.  */
215 static unsigned int last_address;
216 
217 /* Variables to handle plabels that we discover are necessary at assembly
218    output time.  They are output after the current function.  */
219 struct GTY(()) deferred_plabel
220 {
221   rtx internal_label;
222   rtx symbol;
223 };
224 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
225   deferred_plabels;
226 static size_t n_deferred_plabels = 0;
227 
228 /* Initialize the GCC target structure.  */
229 
230 #undef TARGET_OPTION_OVERRIDE
231 #define TARGET_OPTION_OVERRIDE pa_option_override
232 
233 #undef TARGET_ASM_ALIGNED_HI_OP
234 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
235 #undef TARGET_ASM_ALIGNED_SI_OP
236 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
237 #undef TARGET_ASM_ALIGNED_DI_OP
238 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
239 #undef TARGET_ASM_UNALIGNED_HI_OP
240 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
241 #undef TARGET_ASM_UNALIGNED_SI_OP
242 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
243 #undef TARGET_ASM_UNALIGNED_DI_OP
244 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
245 #undef TARGET_ASM_INTEGER
246 #define TARGET_ASM_INTEGER pa_assemble_integer
247 
248 #undef TARGET_ASM_FUNCTION_PROLOGUE
249 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
250 #undef TARGET_ASM_FUNCTION_EPILOGUE
251 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
252 
253 #undef TARGET_FUNCTION_VALUE
254 #define TARGET_FUNCTION_VALUE pa_function_value
255 #undef TARGET_LIBCALL_VALUE
256 #define TARGET_LIBCALL_VALUE pa_libcall_value
257 #undef TARGET_FUNCTION_VALUE_REGNO_P
258 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
259 
260 #undef TARGET_LEGITIMIZE_ADDRESS
261 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
262 
263 #undef TARGET_SCHED_ADJUST_COST
264 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
265 #undef TARGET_SCHED_ADJUST_PRIORITY
266 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
267 #undef TARGET_SCHED_ISSUE_RATE
268 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
269 
270 #undef TARGET_ENCODE_SECTION_INFO
271 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
272 #undef TARGET_STRIP_NAME_ENCODING
273 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
274 
275 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
276 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
277 
278 #undef TARGET_COMMUTATIVE_P
279 #define TARGET_COMMUTATIVE_P pa_commutative_p
280 
281 #undef TARGET_ASM_OUTPUT_MI_THUNK
282 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
283 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
284 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
285 
286 #undef TARGET_ASM_FILE_END
287 #ifdef ASM_OUTPUT_EXTERNAL_REAL
288 #define TARGET_ASM_FILE_END pa_hpux_file_end
289 #else
290 #define TARGET_ASM_FILE_END output_deferred_plabels
291 #endif
292 
293 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
294 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
295 
296 #if !defined(USE_COLLECT2)
297 #undef TARGET_ASM_CONSTRUCTOR
298 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
299 #undef TARGET_ASM_DESTRUCTOR
300 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
301 #endif
302 
303 #undef TARGET_INIT_BUILTINS
304 #define TARGET_INIT_BUILTINS pa_init_builtins
305 
306 #undef TARGET_EXPAND_BUILTIN
307 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
308 
309 #undef TARGET_REGISTER_MOVE_COST
310 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
311 #undef TARGET_RTX_COSTS
312 #define TARGET_RTX_COSTS hppa_rtx_costs
313 #undef TARGET_ADDRESS_COST
314 #define TARGET_ADDRESS_COST hppa_address_cost
315 
316 #undef TARGET_MACHINE_DEPENDENT_REORG
317 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
318 
319 #undef TARGET_INIT_LIBFUNCS
320 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
321 
322 #undef TARGET_PROMOTE_FUNCTION_MODE
323 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
324 #undef TARGET_PROMOTE_PROTOTYPES
325 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
326 
327 #undef TARGET_STRUCT_VALUE_RTX
328 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
329 #undef TARGET_RETURN_IN_MEMORY
330 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
331 #undef TARGET_MUST_PASS_IN_STACK
332 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
333 #undef TARGET_PASS_BY_REFERENCE
334 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
335 #undef TARGET_CALLEE_COPIES
336 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
337 #undef TARGET_ARG_PARTIAL_BYTES
338 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
339 #undef TARGET_FUNCTION_ARG
340 #define TARGET_FUNCTION_ARG pa_function_arg
341 #undef TARGET_FUNCTION_ARG_ADVANCE
342 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
343 #undef TARGET_FUNCTION_ARG_BOUNDARY
344 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
345 
346 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
347 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
348 #undef TARGET_EXPAND_BUILTIN_VA_START
349 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
350 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
351 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
352 
353 #undef TARGET_SCALAR_MODE_SUPPORTED_P
354 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
355 
356 #undef TARGET_CANNOT_FORCE_CONST_MEM
357 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
358 
359 #undef TARGET_SECONDARY_RELOAD
360 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
361 
362 #undef TARGET_EXTRA_LIVE_ON_ENTRY
363 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
364 
365 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
366 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
367 #undef TARGET_TRAMPOLINE_INIT
368 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
369 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
370 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
371 #undef TARGET_DELEGITIMIZE_ADDRESS
372 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
373 #undef TARGET_INTERNAL_ARG_POINTER
374 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
375 #undef TARGET_CAN_ELIMINATE
376 #define TARGET_CAN_ELIMINATE pa_can_eliminate
377 #undef TARGET_CONDITIONAL_REGISTER_USAGE
378 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
379 #undef TARGET_C_MODE_FOR_SUFFIX
380 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
381 #undef TARGET_ASM_FUNCTION_SECTION
382 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
383 
384 #undef TARGET_LEGITIMATE_CONSTANT_P
385 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
386 #undef TARGET_SECTION_TYPE_FLAGS
387 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
388 #undef TARGET_LEGITIMATE_ADDRESS_P
389 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
390 
391 struct gcc_target targetm = TARGET_INITIALIZER;
392 
393 /* Parse the -mfixed-range= option string.  */
394 
395 static void
396 fix_range (const char *const_str)
397 {
398   int i, first, last;
399   char *str, *dash, *comma;
400 
401   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
402      REG2 are either register names or register numbers.  The effect
403      of this option is to mark the registers in the range from REG1 to
404      REG2 as ``fixed'' so they won't be used by the compiler.  This is
405      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
406 
407   i = strlen (const_str);
408   str = (char *) alloca (i + 1);
409   memcpy (str, const_str, i + 1);
410 
411   while (1)
412     {
413       dash = strchr (str, '-');
414       if (!dash)
415 	{
416 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
417 	  return;
418 	}
419       *dash = '\0';
420 
421       comma = strchr (dash + 1, ',');
422       if (comma)
423 	*comma = '\0';
424 
425       first = decode_reg_name (str);
426       if (first < 0)
427 	{
428 	  warning (0, "unknown register name: %s", str);
429 	  return;
430 	}
431 
432       last = decode_reg_name (dash + 1);
433       if (last < 0)
434 	{
435 	  warning (0, "unknown register name: %s", dash + 1);
436 	  return;
437 	}
438 
439       *dash = '-';
440 
441       if (first > last)
442 	{
443 	  warning (0, "%s-%s is an empty range", str, dash + 1);
444 	  return;
445 	}
446 
447       for (i = first; i <= last; ++i)
448 	fixed_regs[i] = call_used_regs[i] = 1;
449 
450       if (!comma)
451 	break;
452 
453       *comma = ',';
454       str = comma + 1;
455     }
456 
457   /* Check if all floating point registers have been fixed.  */
458   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
459     if (!fixed_regs[i])
460       break;
461 
462   if (i > FP_REG_LAST)
463     target_flags |= MASK_DISABLE_FPREGS;
464 }
465 
466 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
467 
468 static void
469 pa_option_override (void)
470 {
471   unsigned int i;
472   cl_deferred_option *opt;
473   vec<cl_deferred_option> *v
474     = (vec<cl_deferred_option> *) pa_deferred_options;
475 
476   if (v)
477     FOR_EACH_VEC_ELT (*v, i, opt)
478       {
479 	switch (opt->opt_index)
480 	  {
481 	  case OPT_mfixed_range_:
482 	    fix_range (opt->arg);
483 	    break;
484 
485 	  default:
486 	    gcc_unreachable ();
487 	  }
488       }
489 
490   /* Unconditional branches in the delay slot are not compatible with dwarf2
491      call frame information.  There is no benefit in using this optimization
492      on PA8000 and later processors.  */
493   if (pa_cpu >= PROCESSOR_8000
494       || (targetm_common.except_unwind_info (&global_options) == UI_DWARF2
495 	  && flag_exceptions)
496       || flag_unwind_tables)
497     target_flags &= ~MASK_JUMP_IN_DELAY;
498 
499   if (flag_pic && TARGET_PORTABLE_RUNTIME)
500     {
501       warning (0, "PIC code generation is not supported in the portable runtime model");
502     }
503 
504   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
505    {
506       warning (0, "PIC code generation is not compatible with fast indirect calls");
507    }
508 
509   if (! TARGET_GAS && write_symbols != NO_DEBUG)
510     {
511       warning (0, "-g is only supported when using GAS on this processor,");
512       warning (0, "-g option disabled");
513       write_symbols = NO_DEBUG;
514     }
515 
516 #ifdef AUTO_INC_DEC
517   /* FIXME: Disable auto increment and decrement processing until reload
518      is completed.  See PR middle-end 56791.  */
519   flag_auto_inc_dec = reload_completed;
520 #endif
521 
522   /* We only support the "big PIC" model now.  And we always generate PIC
523      code when in 64bit mode.  */
524   if (flag_pic == 1 || TARGET_64BIT)
525     flag_pic = 2;
526 
527   /* Disable -freorder-blocks-and-partition as we don't support hot and
528      cold partitioning.  */
529   if (flag_reorder_blocks_and_partition)
530     {
531       inform (input_location,
532               "-freorder-blocks-and-partition does not work "
533               "on this architecture");
534       flag_reorder_blocks_and_partition = 0;
535       flag_reorder_blocks = 1;
536     }
537 
538   /* We can't guarantee that .dword is available for 32-bit targets.  */
539   if (UNITS_PER_WORD == 4)
540     targetm.asm_out.aligned_op.di = NULL;
541 
542   /* The unaligned ops are only available when using GAS.  */
543   if (!TARGET_GAS)
544     {
545       targetm.asm_out.unaligned_op.hi = NULL;
546       targetm.asm_out.unaligned_op.si = NULL;
547       targetm.asm_out.unaligned_op.di = NULL;
548     }
549 
550   init_machine_status = pa_init_machine_status;
551 }
552 
553 enum pa_builtins
554 {
555   PA_BUILTIN_COPYSIGNQ,
556   PA_BUILTIN_FABSQ,
557   PA_BUILTIN_INFQ,
558   PA_BUILTIN_HUGE_VALQ,
559   PA_BUILTIN_max
560 };
561 
562 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
563 
564 static void
565 pa_init_builtins (void)
566 {
567 #ifdef DONT_HAVE_FPUTC_UNLOCKED
568   {
569     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
570     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
571 		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
572   }
573 #endif
574 #if TARGET_HPUX_11
575   {
576     tree decl;
577 
578     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
579       set_user_assembler_name (decl, "_Isfinite");
580     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
581       set_user_assembler_name (decl, "_Isfinitef");
582   }
583 #endif
584 
585   if (HPUX_LONG_DOUBLE_LIBRARY)
586     {
587       tree decl, ftype;
588 
589       /* Under HPUX, the __float128 type is a synonym for "long double".  */
590       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
591 						 "__float128");
592 
593       /* TFmode support builtins.  */
594       ftype = build_function_type_list (long_double_type_node,
595 					long_double_type_node,
596 					NULL_TREE);
597       decl = add_builtin_function ("__builtin_fabsq", ftype,
598 				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
599 				   "_U_Qfabs", NULL_TREE);
600       TREE_READONLY (decl) = 1;
601       pa_builtins[PA_BUILTIN_FABSQ] = decl;
602 
603       ftype = build_function_type_list (long_double_type_node,
604 					long_double_type_node,
605 					long_double_type_node,
606 					NULL_TREE);
607       decl = add_builtin_function ("__builtin_copysignq", ftype,
608 				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
609 				   "_U_Qfcopysign", NULL_TREE);
610       TREE_READONLY (decl) = 1;
611       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
612 
613       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
614       decl = add_builtin_function ("__builtin_infq", ftype,
615 				   PA_BUILTIN_INFQ, BUILT_IN_MD,
616 				   NULL, NULL_TREE);
617       pa_builtins[PA_BUILTIN_INFQ] = decl;
618 
619       decl = add_builtin_function ("__builtin_huge_valq", ftype,
620                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
621                                    NULL, NULL_TREE);
622       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
623     }
624 }
625 
626 static rtx
627 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
628 		   enum machine_mode mode ATTRIBUTE_UNUSED,
629 		   int ignore ATTRIBUTE_UNUSED)
630 {
631   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
632   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
633 
634   switch (fcode)
635     {
636     case PA_BUILTIN_FABSQ:
637     case PA_BUILTIN_COPYSIGNQ:
638       return expand_call (exp, target, ignore);
639 
640     case PA_BUILTIN_INFQ:
641     case PA_BUILTIN_HUGE_VALQ:
642       {
643 	enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
644 	REAL_VALUE_TYPE inf;
645 	rtx tmp;
646 
647 	real_inf (&inf);
648 	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
649 
650 	tmp = validize_mem (force_const_mem (target_mode, tmp));
651 
652 	if (target == 0)
653 	  target = gen_reg_rtx (target_mode);
654 
655 	emit_move_insn (target, tmp);
656 	return target;
657       }
658 
659     default:
660       gcc_unreachable ();
661     }
662 
663   return NULL_RTX;
664 }
665 
666 /* Function to init struct machine_function.
667    This will be called, via a pointer variable,
668    from push_function_context.  */
669 
670 static struct machine_function *
671 pa_init_machine_status (void)
672 {
673   return ggc_alloc_cleared_machine_function ();
674 }
675 
676 /* If FROM is a probable pointer register, mark TO as a probable
677    pointer register with the same pointer alignment as FROM.  */
678 
679 static void
680 copy_reg_pointer (rtx to, rtx from)
681 {
682   if (REG_POINTER (from))
683     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
684 }
685 
686 /* Return 1 if X contains a symbolic expression.  We know these
687    expressions will have one of a few well defined forms, so
688    we need only check those forms.  */
689 int
690 pa_symbolic_expression_p (rtx x)
691 {
692 
693   /* Strip off any HIGH.  */
694   if (GET_CODE (x) == HIGH)
695     x = XEXP (x, 0);
696 
697   return symbolic_operand (x, VOIDmode);
698 }
699 
700 /* Accept any constant that can be moved in one instruction into a
701    general register.  */
702 int
703 pa_cint_ok_for_move (HOST_WIDE_INT ival)
704 {
705   /* OK if ldo, ldil, or zdepi, can be used.  */
706   return (VAL_14_BITS_P (ival)
707 	  || pa_ldil_cint_p (ival)
708 	  || pa_zdepi_cint_p (ival));
709 }
710 
711 /* True iff ldil can be used to load this CONST_INT.  The least
712    significant 11 bits of the value must be zero and the value must
713    not change sign when extended from 32 to 64 bits.  */
714 int
715 pa_ldil_cint_p (HOST_WIDE_INT ival)
716 {
717   HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
718 
719   return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
720 }
721 
722 /* True iff zdepi can be used to generate this CONST_INT.
723    zdepi first sign extends a 5-bit signed number to a given field
724    length, then places this field anywhere in a zero.  */
725 int
726 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
727 {
728   unsigned HOST_WIDE_INT lsb_mask, t;
729 
730   /* This might not be obvious, but it's at least fast.
731      This function is critical; we don't have the time loops would take.  */
732   lsb_mask = x & -x;
733   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
734   /* Return true iff t is a power of two.  */
735   return ((t & (t - 1)) == 0);
736 }
737 
738 /* True iff depi or extru can be used to compute (reg & mask).
739    Accept bit pattern like these:
740    0....01....1
741    1....10....0
742    1..10..01..1  */
743 int
744 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
745 {
746   mask = ~mask;
747   mask += mask & -mask;
748   return (mask & (mask - 1)) == 0;
749 }
750 
751 /* True iff depi can be used to compute (reg | MASK).  */
752 int
753 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
754 {
755   mask += mask & -mask;
756   return (mask & (mask - 1)) == 0;
757 }
758 
759 /* Legitimize PIC addresses.  If the address is already
760    position-independent, we return ORIG.  Newly generated
761    position-independent addresses go to REG.  If we need more
762    than one register, we lose.  */
763 
764 static rtx
765 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
766 {
767   rtx pic_ref = orig;
768 
769   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
770 
771   /* Labels need special handling.  */
772   if (pic_label_operand (orig, mode))
773     {
774       rtx insn;
775 
776       /* We do not want to go through the movXX expanders here since that
777 	 would create recursion.
778 
779 	 Nor do we really want to call a generator for a named pattern
780 	 since that requires multiple patterns if we want to support
781 	 multiple word sizes.
782 
783 	 So instead we just emit the raw set, which avoids the movXX
784 	 expanders completely.  */
785       mark_reg_pointer (reg, BITS_PER_UNIT);
786       insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
787 
788       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
789       add_reg_note (insn, REG_EQUAL, orig);
790 
791       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
792 	 and update LABEL_NUSES because this is not done automatically.  */
793       if (reload_in_progress || reload_completed)
794 	{
795 	  /* Extract LABEL_REF.  */
796 	  if (GET_CODE (orig) == CONST)
797 	    orig = XEXP (XEXP (orig, 0), 0);
798 	  /* Extract CODE_LABEL.  */
799 	  orig = XEXP (orig, 0);
800 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
801 	  /* Make sure we have label and not a note.  */
802 	  if (LABEL_P (orig))
803 	    LABEL_NUSES (orig)++;
804 	}
805       crtl->uses_pic_offset_table = 1;
806       return reg;
807     }
808   if (GET_CODE (orig) == SYMBOL_REF)
809     {
810       rtx insn, tmp_reg;
811 
812       gcc_assert (reg);
813 
814       /* Before reload, allocate a temporary register for the intermediate
815 	 result.  This allows the sequence to be deleted when the final
816 	 result is unused and the insns are trivially dead.  */
817       tmp_reg = ((reload_in_progress || reload_completed)
818 		 ? reg : gen_reg_rtx (Pmode));
819 
820       if (function_label_operand (orig, VOIDmode))
821 	{
822 	  /* Force function label into memory in word mode.  */
823 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
824 	  /* Load plabel address from DLT.  */
825 	  emit_move_insn (tmp_reg,
826 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
827 					gen_rtx_HIGH (word_mode, orig)));
828 	  pic_ref
829 	    = gen_const_mem (Pmode,
830 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
831 					     gen_rtx_UNSPEC (Pmode,
832 						         gen_rtvec (1, orig),
833 						         UNSPEC_DLTIND14R)));
834 	  emit_move_insn (reg, pic_ref);
835 	  /* Now load address of function descriptor.  */
836 	  pic_ref = gen_rtx_MEM (Pmode, reg);
837 	}
838       else
839 	{
840 	  /* Load symbol reference from DLT.  */
841 	  emit_move_insn (tmp_reg,
842 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
843 					gen_rtx_HIGH (word_mode, orig)));
844 	  pic_ref
845 	    = gen_const_mem (Pmode,
846 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
847 					     gen_rtx_UNSPEC (Pmode,
848 						         gen_rtvec (1, orig),
849 						         UNSPEC_DLTIND14R)));
850 	}
851 
852       crtl->uses_pic_offset_table = 1;
853       mark_reg_pointer (reg, BITS_PER_UNIT);
854       insn = emit_move_insn (reg, pic_ref);
855 
856       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
857       set_unique_reg_note (insn, REG_EQUAL, orig);
858 
859       return reg;
860     }
861   else if (GET_CODE (orig) == CONST)
862     {
863       rtx base;
864 
865       if (GET_CODE (XEXP (orig, 0)) == PLUS
866 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
867 	return orig;
868 
869       gcc_assert (reg);
870       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
871 
872       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
873       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
874 				     base == reg ? 0 : reg);
875 
876       if (GET_CODE (orig) == CONST_INT)
877 	{
878 	  if (INT_14_BITS (orig))
879 	    return plus_constant (Pmode, base, INTVAL (orig));
880 	  orig = force_reg (Pmode, orig);
881 	}
882       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
883       /* Likewise, should we set special REG_NOTEs here?  */
884     }
885 
886   return pic_ref;
887 }
888 
889 static GTY(()) rtx gen_tls_tga;
890 
891 static rtx
892 gen_tls_get_addr (void)
893 {
894   if (!gen_tls_tga)
895     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
896   return gen_tls_tga;
897 }
898 
899 static rtx
900 hppa_tls_call (rtx arg)
901 {
902   rtx ret;
903 
904   ret = gen_reg_rtx (Pmode);
905   emit_library_call_value (gen_tls_get_addr (), ret,
906 		  	   LCT_CONST, Pmode, 1, arg, Pmode);
907 
908   return ret;
909 }
910 
911 static rtx
912 legitimize_tls_address (rtx addr)
913 {
914   rtx ret, insn, tmp, t1, t2, tp;
915 
916   /* Currently, we can't handle anything but a SYMBOL_REF.  */
917   if (GET_CODE (addr) != SYMBOL_REF)
918     return addr;
919 
920   switch (SYMBOL_REF_TLS_MODEL (addr))
921     {
922       case TLS_MODEL_GLOBAL_DYNAMIC:
923 	tmp = gen_reg_rtx (Pmode);
924 	if (flag_pic)
925 	  emit_insn (gen_tgd_load_pic (tmp, addr));
926 	else
927 	  emit_insn (gen_tgd_load (tmp, addr));
928 	ret = hppa_tls_call (tmp);
929 	break;
930 
931       case TLS_MODEL_LOCAL_DYNAMIC:
932 	ret = gen_reg_rtx (Pmode);
933 	tmp = gen_reg_rtx (Pmode);
934 	start_sequence ();
935 	if (flag_pic)
936 	  emit_insn (gen_tld_load_pic (tmp, addr));
937 	else
938 	  emit_insn (gen_tld_load (tmp, addr));
939 	t1 = hppa_tls_call (tmp);
940 	insn = get_insns ();
941 	end_sequence ();
942 	t2 = gen_reg_rtx (Pmode);
943 	emit_libcall_block (insn, t2, t1,
944 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
945 				            UNSPEC_TLSLDBASE));
946 	emit_insn (gen_tld_offset_load (ret, addr, t2));
947 	break;
948 
949       case TLS_MODEL_INITIAL_EXEC:
950 	tp = gen_reg_rtx (Pmode);
951 	tmp = gen_reg_rtx (Pmode);
952 	ret = gen_reg_rtx (Pmode);
953 	emit_insn (gen_tp_load (tp));
954 	if (flag_pic)
955 	  emit_insn (gen_tie_load_pic (tmp, addr));
956 	else
957 	  emit_insn (gen_tie_load (tmp, addr));
958 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
959 	break;
960 
961       case TLS_MODEL_LOCAL_EXEC:
962 	tp = gen_reg_rtx (Pmode);
963 	ret = gen_reg_rtx (Pmode);
964 	emit_insn (gen_tp_load (tp));
965 	emit_insn (gen_tle_load (ret, addr, tp));
966 	break;
967 
968       default:
969 	gcc_unreachable ();
970     }
971 
972   return ret;
973 }
974 
975 /* Try machine-dependent ways of modifying an illegitimate address
976    to be legitimate.  If we find one, return the new, valid address.
977    This macro is used in only one place: `memory_address' in explow.c.
978 
979    OLDX is the address as it was before break_out_memory_refs was called.
980    In some cases it is useful to look at this to decide what needs to be done.
981 
982    It is always safe for this macro to do nothing.  It exists to recognize
983    opportunities to optimize the output.
984 
985    For the PA, transform:
986 
987 	memory(X + <large int>)
988 
989    into:
990 
991 	if (<large int> & mask) >= 16
992 	  Y = (<large int> & ~mask) + mask + 1	Round up.
993 	else
994 	  Y = (<large int> & ~mask)		Round down.
995 	Z = X + Y
996 	memory (Z + (<large int> - Y));
997 
998    This is for CSE to find several similar references, and only use one Z.
999 
1000    X can either be a SYMBOL_REF or REG, but because combine cannot
1001    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1002    D will not fit in 14 bits.
1003 
1004    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1005    0x1f as the mask.
1006 
1007    MODE_INT references allow displacements which fit in 14 bits, so use
1008    0x3fff as the mask.
1009 
1010    This relies on the fact that most mode MODE_FLOAT references will use FP
1011    registers and most mode MODE_INT references will use integer registers.
1012    (In the rare case of an FP register used in an integer MODE, we depend
1013    on secondary reloads to clean things up.)
1014 
1015 
1016    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1017    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1018    addressing modes to be used).
1019 
1020    Put X and Z into registers.  Then put the entire expression into
1021    a register.  */
1022 
1023 rtx
1024 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1025 			 enum machine_mode mode)
1026 {
1027   rtx orig = x;
1028 
1029   /* We need to canonicalize the order of operands in unscaled indexed
1030      addresses since the code that checks if an address is valid doesn't
1031      always try both orders.  */
1032   if (!TARGET_NO_SPACE_REGS
1033       && GET_CODE (x) == PLUS
1034       && GET_MODE (x) == Pmode
1035       && REG_P (XEXP (x, 0))
1036       && REG_P (XEXP (x, 1))
1037       && REG_POINTER (XEXP (x, 0))
1038       && !REG_POINTER (XEXP (x, 1)))
1039     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1040 
1041   if (pa_tls_referenced_p (x))
1042     return legitimize_tls_address (x);
1043   else if (flag_pic)
1044     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1045 
1046   /* Strip off CONST.  */
1047   if (GET_CODE (x) == CONST)
1048     x = XEXP (x, 0);
1049 
1050   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1051      That should always be safe.  */
1052   if (GET_CODE (x) == PLUS
1053       && GET_CODE (XEXP (x, 0)) == REG
1054       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1055     {
1056       rtx reg = force_reg (Pmode, XEXP (x, 1));
1057       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1058     }
1059 
1060   /* Note we must reject symbols which represent function addresses
1061      since the assembler/linker can't handle arithmetic on plabels.  */
1062   if (GET_CODE (x) == PLUS
1063       && GET_CODE (XEXP (x, 1)) == CONST_INT
1064       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1065 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1066 	  || GET_CODE (XEXP (x, 0)) == REG))
1067     {
1068       rtx int_part, ptr_reg;
1069       HOST_WIDE_INT newoffset;
1070       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
1071       HOST_WIDE_INT mask;
1072 
1073       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1074 	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1075 
1076       /* Choose which way to round the offset.  Round up if we
1077 	 are >= halfway to the next boundary.  */
1078       if ((offset & mask) >= ((mask + 1) / 2))
1079 	newoffset = (offset & ~ mask) + mask + 1;
1080       else
1081 	newoffset = (offset & ~ mask);
1082 
1083       /* If the newoffset will not fit in 14 bits (ldo), then
1084 	 handling this would take 4 or 5 instructions (2 to load
1085 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1086 	 add the new offset and the SYMBOL_REF.)  Combine can
1087 	 not handle 4->2 or 5->2 combinations, so do not create
1088 	 them.  */
1089       if (! VAL_14_BITS_P (newoffset)
1090 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1091 	{
1092 	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1093 	  rtx tmp_reg
1094 	    = force_reg (Pmode,
1095 			 gen_rtx_HIGH (Pmode, const_part));
1096 	  ptr_reg
1097 	    = force_reg (Pmode,
1098 			 gen_rtx_LO_SUM (Pmode,
1099 					 tmp_reg, const_part));
1100 	}
1101       else
1102 	{
1103 	  if (! VAL_14_BITS_P (newoffset))
1104 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1105 	  else
1106 	    int_part = GEN_INT (newoffset);
1107 
1108 	  ptr_reg = force_reg (Pmode,
1109 			       gen_rtx_PLUS (Pmode,
1110 					     force_reg (Pmode, XEXP (x, 0)),
1111 					     int_part));
1112 	}
1113       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1114     }
1115 
1116   /* Handle (plus (mult (a) (shadd_constant)) (b)).  */
1117 
1118   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1119       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1120       && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1121       && (OBJECT_P (XEXP (x, 1))
1122 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1123       && GET_CODE (XEXP (x, 1)) != CONST)
1124     {
1125       HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
1126       rtx reg1, reg2;
1127 
1128       reg1 = XEXP (x, 1);
1129       if (GET_CODE (reg1) != REG)
1130 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1131 
1132       reg2 = XEXP (XEXP (x, 0), 0);
1133       if (GET_CODE (reg2) != REG)
1134         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1135 
1136       return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1137 					     gen_rtx_MULT (Pmode,
1138 							   reg2,
1139 							   GEN_INT (val)),
1140 					     reg1));
1141     }
1142 
1143   /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1144 
1145      Only do so for floating point modes since this is more speculative
1146      and we lose if it's an integer store.  */
1147   if (GET_CODE (x) == PLUS
1148       && GET_CODE (XEXP (x, 0)) == PLUS
1149       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1150       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1151       && pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1152       && (mode == SFmode || mode == DFmode))
1153     {
1154 
1155       /* First, try and figure out what to use as a base register.  */
1156       rtx reg1, reg2, base, idx;
1157 
1158       reg1 = XEXP (XEXP (x, 0), 1);
1159       reg2 = XEXP (x, 1);
1160       base = NULL_RTX;
1161       idx = NULL_RTX;
1162 
1163       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1164 	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1165 	 it's a base register below.  */
1166       if (GET_CODE (reg1) != REG)
1167 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1168 
1169       if (GET_CODE (reg2) != REG)
1170 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1171 
1172       /* Figure out what the base and index are.  */
1173 
1174       if (GET_CODE (reg1) == REG
1175 	  && REG_POINTER (reg1))
1176 	{
1177 	  base = reg1;
1178 	  idx = gen_rtx_PLUS (Pmode,
1179 			      gen_rtx_MULT (Pmode,
1180 					    XEXP (XEXP (XEXP (x, 0), 0), 0),
1181 					    XEXP (XEXP (XEXP (x, 0), 0), 1)),
1182 			      XEXP (x, 1));
1183 	}
1184       else if (GET_CODE (reg2) == REG
1185 	       && REG_POINTER (reg2))
1186 	{
1187 	  base = reg2;
1188 	  idx = XEXP (x, 0);
1189 	}
1190 
1191       if (base == 0)
1192 	return orig;
1193 
1194       /* If the index adds a large constant, try to scale the
1195 	 constant so that it can be loaded with only one insn.  */
1196       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1197 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1198 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1199 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1200 	{
1201 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1202 	  HOST_WIDE_INT val = INTVAL (XEXP (idx, 1));
1203 
1204 	  val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1205 	  reg1 = XEXP (XEXP (idx, 0), 0);
1206 	  if (GET_CODE (reg1) != REG)
1207 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1208 
1209 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1210 
1211 	  /* We can now generate a simple scaled indexed address.  */
1212 	  return
1213 	    force_reg
1214 	      (Pmode, gen_rtx_PLUS (Pmode,
1215 				    gen_rtx_MULT (Pmode, reg1,
1216 						  XEXP (XEXP (idx, 0), 1)),
1217 				    base));
1218 	}
1219 
1220       /* If B + C is still a valid base register, then add them.  */
1221       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1222 	  && INTVAL (XEXP (idx, 1)) <= 4096
1223 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1224 	{
1225 	  HOST_WIDE_INT val = INTVAL (XEXP (XEXP (idx, 0), 1));
1226 	  rtx reg1, reg2;
1227 
1228 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1229 
1230 	  reg2 = XEXP (XEXP (idx, 0), 0);
1231 	  if (GET_CODE (reg2) != CONST_INT)
1232 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1233 
1234 	  return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1235 						 gen_rtx_MULT (Pmode,
1236 							       reg2,
1237 							       GEN_INT (val)),
1238 						 reg1));
1239 	}
1240 
1241       /* Get the index into a register, then add the base + index and
1242 	 return a register holding the result.  */
1243 
1244       /* First get A into a register.  */
1245       reg1 = XEXP (XEXP (idx, 0), 0);
1246       if (GET_CODE (reg1) != REG)
1247 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1248 
1249       /* And get B into a register.  */
1250       reg2 = XEXP (idx, 1);
1251       if (GET_CODE (reg2) != REG)
1252 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1253 
1254       reg1 = force_reg (Pmode,
1255 			gen_rtx_PLUS (Pmode,
1256 				      gen_rtx_MULT (Pmode, reg1,
1257 						    XEXP (XEXP (idx, 0), 1)),
1258 				      reg2));
1259 
1260       /* Add the result to our base register and return.  */
1261       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1262 
1263     }
1264 
1265   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1266      special handling to avoid creating an indexed memory address
1267      with x-100000 as the base.
1268 
1269      If the constant part is small enough, then it's still safe because
1270      there is a guard page at the beginning and end of the data segment.
1271 
1272      Scaled references are common enough that we want to try and rearrange the
1273      terms so that we can use indexing for these addresses too.  Only
1274      do the optimization for floatint point modes.  */
1275 
1276   if (GET_CODE (x) == PLUS
1277       && pa_symbolic_expression_p (XEXP (x, 1)))
1278     {
1279       /* Ugly.  We modify things here so that the address offset specified
1280 	 by the index expression is computed first, then added to x to form
1281 	 the entire address.  */
1282 
1283       rtx regx1, regx2, regy1, regy2, y;
1284 
1285       /* Strip off any CONST.  */
1286       y = XEXP (x, 1);
1287       if (GET_CODE (y) == CONST)
1288 	y = XEXP (y, 0);
1289 
1290       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1291 	{
1292 	  /* See if this looks like
1293 		(plus (mult (reg) (shadd_const))
1294 		      (const (plus (symbol_ref) (const_int))))
1295 
1296 	     Where const_int is small.  In that case the const
1297 	     expression is a valid pointer for indexing.
1298 
1299 	     If const_int is big, but can be divided evenly by shadd_const
1300 	     and added to (reg).  This allows more scaled indexed addresses.  */
1301 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1302 	      && GET_CODE (XEXP (x, 0)) == MULT
1303 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1304 	      && INTVAL (XEXP (y, 1)) >= -4096
1305 	      && INTVAL (XEXP (y, 1)) <= 4095
1306 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1307 	      && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1308 	    {
1309 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
1310 	      rtx reg1, reg2;
1311 
1312 	      reg1 = XEXP (x, 1);
1313 	      if (GET_CODE (reg1) != REG)
1314 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1315 
1316 	      reg2 = XEXP (XEXP (x, 0), 0);
1317 	      if (GET_CODE (reg2) != REG)
1318 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1319 
1320 	      return force_reg (Pmode,
1321 				gen_rtx_PLUS (Pmode,
1322 					      gen_rtx_MULT (Pmode,
1323 							    reg2,
1324 							    GEN_INT (val)),
1325 					      reg1));
1326 	    }
1327 	  else if ((mode == DFmode || mode == SFmode)
1328 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1329 		   && GET_CODE (XEXP (x, 0)) == MULT
1330 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1331 		   && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1332 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1333 		   && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1334 	    {
1335 	      regx1
1336 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1337 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1338 	      regx2 = XEXP (XEXP (x, 0), 0);
1339 	      if (GET_CODE (regx2) != REG)
1340 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1341 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1342 							regx2, regx1));
1343 	      return
1344 		force_reg (Pmode,
1345 			   gen_rtx_PLUS (Pmode,
1346 					 gen_rtx_MULT (Pmode, regx2,
1347 						       XEXP (XEXP (x, 0), 1)),
1348 					 force_reg (Pmode, XEXP (y, 0))));
1349 	    }
1350 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1351 		   && INTVAL (XEXP (y, 1)) >= -4096
1352 		   && INTVAL (XEXP (y, 1)) <= 4095)
1353 	    {
1354 	      /* This is safe because of the guard page at the
1355 		 beginning and end of the data space.  Just
1356 		 return the original address.  */
1357 	      return orig;
1358 	    }
1359 	  else
1360 	    {
1361 	      /* Doesn't look like one we can optimize.  */
1362 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1363 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1364 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1365 	      regx1 = force_reg (Pmode,
1366 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1367 						 regx1, regy2));
1368 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1369 	    }
1370 	}
1371     }
1372 
1373   return orig;
1374 }
1375 
1376 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1377 
1378    Compute extra cost of moving data between one register class
1379    and another.
1380 
1381    Make moves from SAR so expensive they should never happen.  We used to
1382    have 0xffff here, but that generates overflow in rare cases.
1383 
1384    Copies involving a FP register and a non-FP register are relatively
1385    expensive because they must go through memory.
1386 
1387    Other copies are reasonably cheap.  */
1388 
1389 static int
1390 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
1391 			 reg_class_t from, reg_class_t to)
1392 {
1393   if (from == SHIFT_REGS)
1394     return 0x100;
1395   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1396     return 18;
1397   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1398            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1399     return 16;
1400   else
1401     return 2;
1402 }
1403 
1404 /* For the HPPA, REG and REG+CONST is cost 0
1405    and addresses involving symbolic constants are cost 2.
1406 
1407    PIC addresses are very expensive.
1408 
1409    It is no coincidence that this has the same structure
1410    as pa_legitimate_address_p.  */
1411 
1412 static int
1413 hppa_address_cost (rtx X, enum machine_mode mode ATTRIBUTE_UNUSED,
1414 		   addr_space_t as ATTRIBUTE_UNUSED,
1415 		   bool speed ATTRIBUTE_UNUSED)
1416 {
1417   switch (GET_CODE (X))
1418     {
1419     case REG:
1420     case PLUS:
1421     case LO_SUM:
1422       return 1;
1423     case HIGH:
1424       return 2;
1425     default:
1426       return 4;
1427     }
1428 }
1429 
1430 /* Compute a (partial) cost for rtx X.  Return true if the complete
1431    cost has been computed, and false if subexpressions should be
1432    scanned.  In either case, *TOTAL contains the cost result.  */
1433 
1434 static bool
1435 hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
1436 		int *total, bool speed ATTRIBUTE_UNUSED)
1437 {
1438   int factor;
1439 
1440   switch (code)
1441     {
1442     case CONST_INT:
1443       if (INTVAL (x) == 0)
1444 	*total = 0;
1445       else if (INT_14_BITS (x))
1446 	*total = 1;
1447       else
1448 	*total = 2;
1449       return true;
1450 
1451     case HIGH:
1452       *total = 2;
1453       return true;
1454 
1455     case CONST:
1456     case LABEL_REF:
1457     case SYMBOL_REF:
1458       *total = 4;
1459       return true;
1460 
1461     case CONST_DOUBLE:
1462       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1463 	  && outer_code != SET)
1464 	*total = 0;
1465       else
1466         *total = 8;
1467       return true;
1468 
1469     case MULT:
1470       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1471 	{
1472 	  *total = COSTS_N_INSNS (3);
1473 	  return true;
1474 	}
1475 
1476       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1477       factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1478       if (factor == 0)
1479 	factor = 1;
1480 
1481       if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1482 	*total = factor * factor * COSTS_N_INSNS (8);
1483       else
1484 	*total = factor * factor * COSTS_N_INSNS (20);
1485       return true;
1486 
1487     case DIV:
1488       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1489 	{
1490 	  *total = COSTS_N_INSNS (14);
1491 	  return true;
1492 	}
1493       /* FALLTHRU */
1494 
1495     case UDIV:
1496     case MOD:
1497     case UMOD:
1498       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1499       factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1500       if (factor == 0)
1501 	factor = 1;
1502 
1503       *total = factor * factor * COSTS_N_INSNS (60);
1504       return true;
1505 
1506     case PLUS: /* this includes shNadd insns */
1507     case MINUS:
1508       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1509 	{
1510 	  *total = COSTS_N_INSNS (3);
1511 	  return true;
1512 	}
1513 
1514       /* A size N times larger than UNITS_PER_WORD needs N times as
1515 	 many insns, taking N times as long.  */
1516       factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD;
1517       if (factor == 0)
1518 	factor = 1;
1519       *total = factor * COSTS_N_INSNS (1);
1520       return true;
1521 
1522     case ASHIFT:
1523     case ASHIFTRT:
1524     case LSHIFTRT:
1525       *total = COSTS_N_INSNS (1);
1526       return true;
1527 
1528     default:
1529       return false;
1530     }
1531 }
1532 
1533 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1534    new rtx with the correct mode.  */
1535 static inline rtx
1536 force_mode (enum machine_mode mode, rtx orig)
1537 {
1538   if (mode == GET_MODE (orig))
1539     return orig;
1540 
1541   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1542 
1543   return gen_rtx_REG (mode, REGNO (orig));
1544 }
1545 
1546 /* Return 1 if *X is a thread-local symbol.  */
1547 
1548 static int
1549 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1550 {
1551   return PA_SYMBOL_REF_TLS_P (*x);
1552 }
1553 
1554 /* Return 1 if X contains a thread-local symbol.  */
1555 
1556 bool
1557 pa_tls_referenced_p (rtx x)
1558 {
1559   if (!TARGET_HAVE_TLS)
1560     return false;
1561 
1562   return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1563 }
1564 
1565 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1566 
1567 static bool
1568 pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1569 {
1570   return pa_tls_referenced_p (x);
1571 }
1572 
1573 /* Emit insns to move operands[1] into operands[0].
1574 
1575    Return 1 if we have written out everything that needs to be done to
1576    do the move.  Otherwise, return 0 and the caller will emit the move
1577    normally.
1578 
1579    Note SCRATCH_REG may not be in the proper mode depending on how it
1580    will be used.  This routine is responsible for creating a new copy
1581    of SCRATCH_REG in the proper mode.  */
1582 
1583 int
1584 pa_emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1585 {
1586   register rtx operand0 = operands[0];
1587   register rtx operand1 = operands[1];
1588   register rtx tem;
1589 
1590   /* We can only handle indexed addresses in the destination operand
1591      of floating point stores.  Thus, we need to break out indexed
1592      addresses from the destination operand.  */
1593   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1594     {
1595       gcc_assert (can_create_pseudo_p ());
1596 
1597       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1598       operand0 = replace_equiv_address (operand0, tem);
1599     }
1600 
1601   /* On targets with non-equivalent space registers, break out unscaled
1602      indexed addresses from the source operand before the final CSE.
1603      We have to do this because the REG_POINTER flag is not correctly
1604      carried through various optimization passes and CSE may substitute
1605      a pseudo without the pointer set for one with the pointer set.  As
1606      a result, we loose various opportunities to create insns with
1607      unscaled indexed addresses.  */
1608   if (!TARGET_NO_SPACE_REGS
1609       && !cse_not_expected
1610       && GET_CODE (operand1) == MEM
1611       && GET_CODE (XEXP (operand1, 0)) == PLUS
1612       && REG_P (XEXP (XEXP (operand1, 0), 0))
1613       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1614     operand1
1615       = replace_equiv_address (operand1,
1616 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1617 
1618   if (scratch_reg
1619       && reload_in_progress && GET_CODE (operand0) == REG
1620       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1621     operand0 = reg_equiv_mem (REGNO (operand0));
1622   else if (scratch_reg
1623 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1624 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1625 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1626     {
1627      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1628 	the code which tracks sets/uses for delete_output_reload.  */
1629       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1630 				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1631 				 SUBREG_BYTE (operand0));
1632       operand0 = alter_subreg (&temp, true);
1633     }
1634 
1635   if (scratch_reg
1636       && reload_in_progress && GET_CODE (operand1) == REG
1637       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1638     operand1 = reg_equiv_mem (REGNO (operand1));
1639   else if (scratch_reg
1640 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1641 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1642 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1643     {
1644      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1645 	the code which tracks sets/uses for delete_output_reload.  */
1646       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1647 				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1648 				 SUBREG_BYTE (operand1));
1649       operand1 = alter_subreg (&temp, true);
1650     }
1651 
1652   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1653       && ((tem = find_replacement (&XEXP (operand0, 0)))
1654 	  != XEXP (operand0, 0)))
1655     operand0 = replace_equiv_address (operand0, tem);
1656 
1657   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1658       && ((tem = find_replacement (&XEXP (operand1, 0)))
1659 	  != XEXP (operand1, 0)))
1660     operand1 = replace_equiv_address (operand1, tem);
1661 
1662   /* Handle secondary reloads for loads/stores of FP registers from
1663      REG+D addresses where D does not fit in 5 or 14 bits, including
1664      (subreg (mem (addr))) cases.  */
1665   if (scratch_reg
1666       && fp_reg_operand (operand0, mode)
1667       && (MEM_P (operand1)
1668 	  || (GET_CODE (operand1) == SUBREG
1669 	      && MEM_P (XEXP (operand1, 0))))
1670       && !floating_point_store_memory_operand (operand1, mode))
1671     {
1672       if (GET_CODE (operand1) == SUBREG)
1673 	operand1 = XEXP (operand1, 0);
1674 
1675       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1676 	 it in WORD_MODE regardless of what mode it was originally given
1677 	 to us.  */
1678       scratch_reg = force_mode (word_mode, scratch_reg);
1679 
1680       /* D might not fit in 14 bits either; for such cases load D into
1681 	 scratch reg.  */
1682       if (reg_plus_base_memory_operand (operand1, mode)
1683 	  && !(TARGET_PA_20
1684 	       && !TARGET_ELF32
1685 	       && INT_14_BITS (XEXP (XEXP (operand1, 0), 1))))
1686 	{
1687 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1688 	  emit_move_insn (scratch_reg,
1689 			  gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1690 					  Pmode,
1691 					  XEXP (XEXP (operand1, 0), 0),
1692 					  scratch_reg));
1693 	}
1694       else
1695 	emit_move_insn (scratch_reg, XEXP (operand1, 0));
1696       emit_insn (gen_rtx_SET (VOIDmode, operand0,
1697 			      replace_equiv_address (operand1, scratch_reg)));
1698       return 1;
1699     }
1700   else if (scratch_reg
1701 	   && fp_reg_operand (operand1, mode)
1702 	   && (MEM_P (operand0)
1703 	       || (GET_CODE (operand0) == SUBREG
1704 		   && MEM_P (XEXP (operand0, 0))))
1705 	   && !floating_point_store_memory_operand (operand0, mode))
1706     {
1707       if (GET_CODE (operand0) == SUBREG)
1708 	operand0 = XEXP (operand0, 0);
1709 
1710       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1711 	 it in WORD_MODE regardless of what mode it was originally given
1712 	 to us.  */
1713       scratch_reg = force_mode (word_mode, scratch_reg);
1714 
1715       /* D might not fit in 14 bits either; for such cases load D into
1716 	 scratch reg.  */
1717       if (reg_plus_base_memory_operand (operand0, mode)
1718 	  && !(TARGET_PA_20
1719 	       && !TARGET_ELF32
1720 	       && INT_14_BITS (XEXP (XEXP (operand0, 0), 1))))
1721 	{
1722 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1723 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1724 								        0)),
1725 						       Pmode,
1726 						       XEXP (XEXP (operand0, 0),
1727 								   0),
1728 						       scratch_reg));
1729 	}
1730       else
1731 	emit_move_insn (scratch_reg, XEXP (operand0, 0));
1732       emit_insn (gen_rtx_SET (VOIDmode,
1733 			      replace_equiv_address (operand0, scratch_reg),
1734 			      operand1));
1735       return 1;
1736     }
1737   /* Handle secondary reloads for loads of FP registers from constant
1738      expressions by forcing the constant into memory.  For the most part,
1739      this is only necessary for SImode and DImode.
1740 
1741      Use scratch_reg to hold the address of the memory location.  */
1742   else if (scratch_reg
1743 	   && CONSTANT_P (operand1)
1744 	   && fp_reg_operand (operand0, mode))
1745     {
1746       rtx const_mem, xoperands[2];
1747 
1748       if (operand1 == CONST0_RTX (mode))
1749 	{
1750 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1751 	  return 1;
1752 	}
1753 
1754       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1755 	 it in WORD_MODE regardless of what mode it was originally given
1756 	 to us.  */
1757       scratch_reg = force_mode (word_mode, scratch_reg);
1758 
1759       /* Force the constant into memory and put the address of the
1760 	 memory location into scratch_reg.  */
1761       const_mem = force_const_mem (mode, operand1);
1762       xoperands[0] = scratch_reg;
1763       xoperands[1] = XEXP (const_mem, 0);
1764       pa_emit_move_sequence (xoperands, Pmode, 0);
1765 
1766       /* Now load the destination register.  */
1767       emit_insn (gen_rtx_SET (mode, operand0,
1768 			      replace_equiv_address (const_mem, scratch_reg)));
1769       return 1;
1770     }
1771   /* Handle secondary reloads for SAR.  These occur when trying to load
1772      the SAR from memory or a constant.  */
1773   else if (scratch_reg
1774 	   && GET_CODE (operand0) == REG
1775 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1776 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1777 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1778     {
1779       /* D might not fit in 14 bits either; for such cases load D into
1780 	 scratch reg.  */
1781       if (GET_CODE (operand1) == MEM
1782 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1783 	{
1784 	  /* We are reloading the address into the scratch register, so we
1785 	     want to make sure the scratch register is a full register.  */
1786 	  scratch_reg = force_mode (word_mode, scratch_reg);
1787 
1788 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1789 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1790 								        0)),
1791 						       Pmode,
1792 						       XEXP (XEXP (operand1, 0),
1793 						       0),
1794 						       scratch_reg));
1795 
1796 	  /* Now we are going to load the scratch register from memory,
1797 	     we want to load it in the same width as the original MEM,
1798 	     which must be the same as the width of the ultimate destination,
1799 	     OPERAND0.  */
1800 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1801 
1802 	  emit_move_insn (scratch_reg,
1803 			  replace_equiv_address (operand1, scratch_reg));
1804 	}
1805       else
1806 	{
1807 	  /* We want to load the scratch register using the same mode as
1808 	     the ultimate destination.  */
1809 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1810 
1811 	  emit_move_insn (scratch_reg, operand1);
1812 	}
1813 
1814       /* And emit the insn to set the ultimate destination.  We know that
1815 	 the scratch register has the same mode as the destination at this
1816 	 point.  */
1817       emit_move_insn (operand0, scratch_reg);
1818       return 1;
1819     }
1820   /* Handle the most common case: storing into a register.  */
1821   else if (register_operand (operand0, mode))
1822     {
1823       /* Legitimize TLS symbol references.  This happens for references
1824 	 that aren't a legitimate constant.  */
1825       if (PA_SYMBOL_REF_TLS_P (operand1))
1826 	operand1 = legitimize_tls_address (operand1);
1827 
1828       if (register_operand (operand1, mode)
1829 	  || (GET_CODE (operand1) == CONST_INT
1830 	      && pa_cint_ok_for_move (INTVAL (operand1)))
1831 	  || (operand1 == CONST0_RTX (mode))
1832 	  || (GET_CODE (operand1) == HIGH
1833 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1834 	  /* Only `general_operands' can come here, so MEM is ok.  */
1835 	  || GET_CODE (operand1) == MEM)
1836 	{
1837 	  /* Various sets are created during RTL generation which don't
1838 	     have the REG_POINTER flag correctly set.  After the CSE pass,
1839 	     instruction recognition can fail if we don't consistently
1840 	     set this flag when performing register copies.  This should
1841 	     also improve the opportunities for creating insns that use
1842 	     unscaled indexing.  */
1843 	  if (REG_P (operand0) && REG_P (operand1))
1844 	    {
1845 	      if (REG_POINTER (operand1)
1846 		  && !REG_POINTER (operand0)
1847 		  && !HARD_REGISTER_P (operand0))
1848 		copy_reg_pointer (operand0, operand1);
1849 	    }
1850 
1851 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
1852 	     get set.  In some cases, we can set the REG_POINTER flag
1853 	     from the declaration for the MEM.  */
1854 	  if (REG_P (operand0)
1855 	      && GET_CODE (operand1) == MEM
1856 	      && !REG_POINTER (operand0))
1857 	    {
1858 	      tree decl = MEM_EXPR (operand1);
1859 
1860 	      /* Set the register pointer flag and register alignment
1861 		 if the declaration for this memory reference is a
1862 		 pointer type.  */
1863 	      if (decl)
1864 		{
1865 		  tree type;
1866 
1867 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1868 		     tree operand 1.  */
1869 		  if (TREE_CODE (decl) == COMPONENT_REF)
1870 		    decl = TREE_OPERAND (decl, 1);
1871 
1872 		  type = TREE_TYPE (decl);
1873 		  type = strip_array_types (type);
1874 
1875 		  if (POINTER_TYPE_P (type))
1876 		    {
1877 		      int align;
1878 
1879 		      type = TREE_TYPE (type);
1880 		      /* Using TYPE_ALIGN_OK is rather conservative as
1881 			 only the ada frontend actually sets it.  */
1882 		      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1883 			       : BITS_PER_UNIT);
1884 		      mark_reg_pointer (operand0, align);
1885 		    }
1886 		}
1887 	    }
1888 
1889 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1890 	  return 1;
1891 	}
1892     }
1893   else if (GET_CODE (operand0) == MEM)
1894     {
1895       if (mode == DFmode && operand1 == CONST0_RTX (mode)
1896 	  && !(reload_in_progress || reload_completed))
1897 	{
1898 	  rtx temp = gen_reg_rtx (DFmode);
1899 
1900 	  emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1901 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1902 	  return 1;
1903 	}
1904       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1905 	{
1906 	  /* Run this case quickly.  */
1907 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1908 	  return 1;
1909 	}
1910       if (! (reload_in_progress || reload_completed))
1911 	{
1912 	  operands[0] = validize_mem (operand0);
1913 	  operands[1] = operand1 = force_reg (mode, operand1);
1914 	}
1915     }
1916 
1917   /* Simplify the source if we need to.
1918      Note we do have to handle function labels here, even though we do
1919      not consider them legitimate constants.  Loop optimizations can
1920      call the emit_move_xxx with one as a source.  */
1921   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1922       || (GET_CODE (operand1) == HIGH
1923 	  && symbolic_operand (XEXP (operand1, 0), mode))
1924       || function_label_operand (operand1, VOIDmode)
1925       || pa_tls_referenced_p (operand1))
1926     {
1927       int ishighonly = 0;
1928 
1929       if (GET_CODE (operand1) == HIGH)
1930 	{
1931 	  ishighonly = 1;
1932 	  operand1 = XEXP (operand1, 0);
1933 	}
1934       if (symbolic_operand (operand1, mode))
1935 	{
1936 	  /* Argh.  The assembler and linker can't handle arithmetic
1937 	     involving plabels.
1938 
1939 	     So we force the plabel into memory, load operand0 from
1940 	     the memory location, then add in the constant part.  */
1941 	  if ((GET_CODE (operand1) == CONST
1942 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
1943 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1944 					  VOIDmode))
1945 	      || function_label_operand (operand1, VOIDmode))
1946 	    {
1947 	      rtx temp, const_part;
1948 
1949 	      /* Figure out what (if any) scratch register to use.  */
1950 	      if (reload_in_progress || reload_completed)
1951 		{
1952 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
1953 		  /* SCRATCH_REG will hold an address and maybe the actual
1954 		     data.  We want it in WORD_MODE regardless of what mode it
1955 		     was originally given to us.  */
1956 		  scratch_reg = force_mode (word_mode, scratch_reg);
1957 		}
1958 	      else if (flag_pic)
1959 		scratch_reg = gen_reg_rtx (Pmode);
1960 
1961 	      if (GET_CODE (operand1) == CONST)
1962 		{
1963 		  /* Save away the constant part of the expression.  */
1964 		  const_part = XEXP (XEXP (operand1, 0), 1);
1965 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
1966 
1967 		  /* Force the function label into memory.  */
1968 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1969 		}
1970 	      else
1971 		{
1972 		  /* No constant part.  */
1973 		  const_part = NULL_RTX;
1974 
1975 		  /* Force the function label into memory.  */
1976 		  temp = force_const_mem (mode, operand1);
1977 		}
1978 
1979 
1980 	      /* Get the address of the memory location.  PIC-ify it if
1981 		 necessary.  */
1982 	      temp = XEXP (temp, 0);
1983 	      if (flag_pic)
1984 		temp = legitimize_pic_address (temp, mode, scratch_reg);
1985 
1986 	      /* Put the address of the memory location into our destination
1987 		 register.  */
1988 	      operands[1] = temp;
1989 	      pa_emit_move_sequence (operands, mode, scratch_reg);
1990 
1991 	      /* Now load from the memory location into our destination
1992 		 register.  */
1993 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1994 	      pa_emit_move_sequence (operands, mode, scratch_reg);
1995 
1996 	      /* And add back in the constant part.  */
1997 	      if (const_part != NULL_RTX)
1998 		expand_inc (operand0, const_part);
1999 
2000 	      return 1;
2001 	    }
2002 
2003 	  if (flag_pic)
2004 	    {
2005 	      rtx temp;
2006 
2007 	      if (reload_in_progress || reload_completed)
2008 		{
2009 		  temp = scratch_reg ? scratch_reg : operand0;
2010 		  /* TEMP will hold an address and maybe the actual
2011 		     data.  We want it in WORD_MODE regardless of what mode it
2012 		     was originally given to us.  */
2013 		  temp = force_mode (word_mode, temp);
2014 		}
2015 	      else
2016 		temp = gen_reg_rtx (Pmode);
2017 
2018 	      /* (const (plus (symbol) (const_int))) must be forced to
2019 		 memory during/after reload if the const_int will not fit
2020 		 in 14 bits.  */
2021 	      if (GET_CODE (operand1) == CONST
2022 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2023 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2024 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
2025 		       && (reload_completed || reload_in_progress)
2026 		       && flag_pic)
2027 		{
2028 		  rtx const_mem = force_const_mem (mode, operand1);
2029 		  operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
2030 							mode, temp);
2031 		  operands[1] = replace_equiv_address (const_mem, operands[1]);
2032 		  pa_emit_move_sequence (operands, mode, temp);
2033 		}
2034 	      else
2035 		{
2036 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2037 		  if (REG_P (operand0) && REG_P (operands[1]))
2038 		    copy_reg_pointer (operand0, operands[1]);
2039 		  emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
2040 		}
2041 	    }
2042 	  /* On the HPPA, references to data space are supposed to use dp,
2043 	     register 27, but showing it in the RTL inhibits various cse
2044 	     and loop optimizations.  */
2045 	  else
2046 	    {
2047 	      rtx temp, set;
2048 
2049 	      if (reload_in_progress || reload_completed)
2050 		{
2051 		  temp = scratch_reg ? scratch_reg : operand0;
2052 		  /* TEMP will hold an address and maybe the actual
2053 		     data.  We want it in WORD_MODE regardless of what mode it
2054 		     was originally given to us.  */
2055 		  temp = force_mode (word_mode, temp);
2056 		}
2057 	      else
2058 		temp = gen_reg_rtx (mode);
2059 
2060 	      /* Loading a SYMBOL_REF into a register makes that register
2061 		 safe to be used as the base in an indexed address.
2062 
2063 		 Don't mark hard registers though.  That loses.  */
2064 	      if (GET_CODE (operand0) == REG
2065 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2066 		mark_reg_pointer (operand0, BITS_PER_UNIT);
2067 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2068 		mark_reg_pointer (temp, BITS_PER_UNIT);
2069 
2070 	      if (ishighonly)
2071 		set = gen_rtx_SET (mode, operand0, temp);
2072 	      else
2073 		set = gen_rtx_SET (VOIDmode,
2074 				   operand0,
2075 				   gen_rtx_LO_SUM (mode, temp, operand1));
2076 
2077 	      emit_insn (gen_rtx_SET (VOIDmode,
2078 				      temp,
2079 				      gen_rtx_HIGH (mode, operand1)));
2080 	      emit_insn (set);
2081 
2082 	    }
2083 	  return 1;
2084 	}
2085       else if (pa_tls_referenced_p (operand1))
2086 	{
2087 	  rtx tmp = operand1;
2088 	  rtx addend = NULL;
2089 
2090 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2091 	    {
2092 	      addend = XEXP (XEXP (tmp, 0), 1);
2093 	      tmp = XEXP (XEXP (tmp, 0), 0);
2094 	    }
2095 
2096 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2097 	  tmp = legitimize_tls_address (tmp);
2098 	  if (addend)
2099 	    {
2100 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2101 	      tmp = force_operand (tmp, operands[0]);
2102 	    }
2103 	  operands[1] = tmp;
2104 	}
2105       else if (GET_CODE (operand1) != CONST_INT
2106 	       || !pa_cint_ok_for_move (INTVAL (operand1)))
2107 	{
2108 	  rtx insn, temp;
2109 	  rtx op1 = operand1;
2110 	  HOST_WIDE_INT value = 0;
2111 	  HOST_WIDE_INT insv = 0;
2112 	  int insert = 0;
2113 
2114 	  if (GET_CODE (operand1) == CONST_INT)
2115 	    value = INTVAL (operand1);
2116 
2117 	  if (TARGET_64BIT
2118 	      && GET_CODE (operand1) == CONST_INT
2119 	      && HOST_BITS_PER_WIDE_INT > 32
2120 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2121 	    {
2122 	      HOST_WIDE_INT nval;
2123 
2124 	      /* Extract the low order 32 bits of the value and sign extend.
2125 		 If the new value is the same as the original value, we can
2126 		 can use the original value as-is.  If the new value is
2127 		 different, we use it and insert the most-significant 32-bits
2128 		 of the original value into the final result.  */
2129 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2130 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2131 	      if (value != nval)
2132 		{
2133 #if HOST_BITS_PER_WIDE_INT > 32
2134 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2135 #endif
2136 		  insert = 1;
2137 		  value = nval;
2138 		  operand1 = GEN_INT (nval);
2139 		}
2140 	    }
2141 
2142 	  if (reload_in_progress || reload_completed)
2143 	    temp = scratch_reg ? scratch_reg : operand0;
2144 	  else
2145 	    temp = gen_reg_rtx (mode);
2146 
2147 	  /* We don't directly split DImode constants on 32-bit targets
2148 	     because PLUS uses an 11-bit immediate and the insn sequence
2149 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2150 	  if (GET_CODE (operand1) == CONST_INT
2151 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2152 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2153 	      && !insert)
2154 	    {
2155 	      /* Directly break constant into high and low parts.  This
2156 		 provides better optimization opportunities because various
2157 		 passes recognize constants split with PLUS but not LO_SUM.
2158 		 We use a 14-bit signed low part except when the addition
2159 		 of 0x4000 to the high part might change the sign of the
2160 		 high part.  */
2161 	      HOST_WIDE_INT low = value & 0x3fff;
2162 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2163 
2164 	      if (low >= 0x2000)
2165 		{
2166 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2167 		    high += 0x2000;
2168 		  else
2169 		    high += 0x4000;
2170 		}
2171 
2172 	      low = value - high;
2173 
2174 	      emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2175 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2176 	    }
2177 	  else
2178 	    {
2179 	      emit_insn (gen_rtx_SET (VOIDmode, temp,
2180 				      gen_rtx_HIGH (mode, operand1)));
2181 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2182 	    }
2183 
2184 	  insn = emit_move_insn (operands[0], operands[1]);
2185 
2186 	  /* Now insert the most significant 32 bits of the value
2187 	     into the register.  When we don't have a second register
2188 	     available, it could take up to nine instructions to load
2189 	     a 64-bit integer constant.  Prior to reload, we force
2190 	     constants that would take more than three instructions
2191 	     to load to the constant pool.  During and after reload,
2192 	     we have to handle all possible values.  */
2193 	  if (insert)
2194 	    {
2195 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2196 		 register and the value to be inserted is outside the
2197 		 range that can be loaded with three depdi instructions.  */
2198 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2199 		{
2200 		  operand1 = GEN_INT (insv);
2201 
2202 		  emit_insn (gen_rtx_SET (VOIDmode, temp,
2203 					  gen_rtx_HIGH (mode, operand1)));
2204 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2205 		  if (mode == DImode)
2206 		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2207 						  const0_rtx, temp));
2208 		  else
2209 		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2210 						  const0_rtx, temp));
2211 		}
2212 	      else
2213 		{
2214 		  int len = 5, pos = 27;
2215 
2216 		  /* Insert the bits using the depdi instruction.  */
2217 		  while (pos >= 0)
2218 		    {
2219 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2220 		      HOST_WIDE_INT sign = v5 < 0;
2221 
2222 		      /* Left extend the insertion.  */
2223 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2224 		      while (pos > 0 && (insv & 1) == sign)
2225 			{
2226 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2227 			  len += 1;
2228 			  pos -= 1;
2229 			}
2230 
2231 		      if (mode == DImode)
2232 			insn = emit_insn (gen_insvdi (operand0,
2233 						      GEN_INT (len),
2234 						      GEN_INT (pos),
2235 						      GEN_INT (v5)));
2236 		      else
2237 			insn = emit_insn (gen_insvsi (operand0,
2238 						      GEN_INT (len),
2239 						      GEN_INT (pos),
2240 						      GEN_INT (v5)));
2241 
2242 		      len = pos > 0 && pos < 5 ? pos : 5;
2243 		      pos -= len;
2244 		    }
2245 		}
2246 	    }
2247 
2248 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2249 
2250 	  return 1;
2251 	}
2252     }
2253   /* Now have insn-emit do whatever it normally does.  */
2254   return 0;
2255 }
2256 
2257 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2258    it will need a link/runtime reloc).  */
2259 
2260 int
2261 pa_reloc_needed (tree exp)
2262 {
2263   int reloc = 0;
2264 
2265   switch (TREE_CODE (exp))
2266     {
2267     case ADDR_EXPR:
2268       return 1;
2269 
2270     case POINTER_PLUS_EXPR:
2271     case PLUS_EXPR:
2272     case MINUS_EXPR:
2273       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2274       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2275       break;
2276 
2277     CASE_CONVERT:
2278     case NON_LVALUE_EXPR:
2279       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2280       break;
2281 
2282     case CONSTRUCTOR:
2283       {
2284 	tree value;
2285 	unsigned HOST_WIDE_INT ix;
2286 
2287 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2288 	  if (value)
2289 	    reloc |= pa_reloc_needed (value);
2290       }
2291       break;
2292 
2293     case ERROR_MARK:
2294       break;
2295 
2296     default:
2297       break;
2298     }
2299   return reloc;
2300 }
2301 
2302 
2303 /* Return the best assembler insn template
2304    for moving operands[1] into operands[0] as a fullword.  */
2305 const char *
2306 pa_singlemove_string (rtx *operands)
2307 {
2308   HOST_WIDE_INT intval;
2309 
2310   if (GET_CODE (operands[0]) == MEM)
2311     return "stw %r1,%0";
2312   if (GET_CODE (operands[1]) == MEM)
2313     return "ldw %1,%0";
2314   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2315     {
2316       long i;
2317       REAL_VALUE_TYPE d;
2318 
2319       gcc_assert (GET_MODE (operands[1]) == SFmode);
2320 
2321       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2322 	 bit pattern.  */
2323       REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2324       REAL_VALUE_TO_TARGET_SINGLE (d, i);
2325 
2326       operands[1] = GEN_INT (i);
2327       /* Fall through to CONST_INT case.  */
2328     }
2329   if (GET_CODE (operands[1]) == CONST_INT)
2330     {
2331       intval = INTVAL (operands[1]);
2332 
2333       if (VAL_14_BITS_P (intval))
2334 	return "ldi %1,%0";
2335       else if ((intval & 0x7ff) == 0)
2336 	return "ldil L'%1,%0";
2337       else if (pa_zdepi_cint_p (intval))
2338 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2339       else
2340 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2341     }
2342   return "copy %1,%0";
2343 }
2344 
2345 
2346 /* Compute position (in OP[1]) and width (in OP[2])
2347    useful for copying IMM to a register using the zdepi
2348    instructions.  Store the immediate value to insert in OP[0].  */
2349 static void
2350 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2351 {
2352   int lsb, len;
2353 
2354   /* Find the least significant set bit in IMM.  */
2355   for (lsb = 0; lsb < 32; lsb++)
2356     {
2357       if ((imm & 1) != 0)
2358         break;
2359       imm >>= 1;
2360     }
2361 
2362   /* Choose variants based on *sign* of the 5-bit field.  */
2363   if ((imm & 0x10) == 0)
2364     len = (lsb <= 28) ? 4 : 32 - lsb;
2365   else
2366     {
2367       /* Find the width of the bitstring in IMM.  */
2368       for (len = 5; len < 32 - lsb; len++)
2369 	{
2370 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2371 	    break;
2372 	}
2373 
2374       /* Sign extend IMM as a 5-bit value.  */
2375       imm = (imm & 0xf) - 0x10;
2376     }
2377 
2378   op[0] = imm;
2379   op[1] = 31 - lsb;
2380   op[2] = len;
2381 }
2382 
2383 /* Compute position (in OP[1]) and width (in OP[2])
2384    useful for copying IMM to a register using the depdi,z
2385    instructions.  Store the immediate value to insert in OP[0].  */
2386 
2387 static void
2388 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2389 {
2390   int lsb, len, maxlen;
2391 
2392   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2393 
2394   /* Find the least significant set bit in IMM.  */
2395   for (lsb = 0; lsb < maxlen; lsb++)
2396     {
2397       if ((imm & 1) != 0)
2398         break;
2399       imm >>= 1;
2400     }
2401 
2402   /* Choose variants based on *sign* of the 5-bit field.  */
2403   if ((imm & 0x10) == 0)
2404     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2405   else
2406     {
2407       /* Find the width of the bitstring in IMM.  */
2408       for (len = 5; len < maxlen - lsb; len++)
2409 	{
2410 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2411 	    break;
2412 	}
2413 
2414       /* Extend length if host is narrow and IMM is negative.  */
2415       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2416 	len += 32;
2417 
2418       /* Sign extend IMM as a 5-bit value.  */
2419       imm = (imm & 0xf) - 0x10;
2420     }
2421 
2422   op[0] = imm;
2423   op[1] = 63 - lsb;
2424   op[2] = len;
2425 }
2426 
2427 /* Output assembler code to perform a doubleword move insn
2428    with operands OPERANDS.  */
2429 
2430 const char *
2431 pa_output_move_double (rtx *operands)
2432 {
2433   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2434   rtx latehalf[2];
2435   rtx addreg0 = 0, addreg1 = 0;
2436 
2437   /* First classify both operands.  */
2438 
2439   if (REG_P (operands[0]))
2440     optype0 = REGOP;
2441   else if (offsettable_memref_p (operands[0]))
2442     optype0 = OFFSOP;
2443   else if (GET_CODE (operands[0]) == MEM)
2444     optype0 = MEMOP;
2445   else
2446     optype0 = RNDOP;
2447 
2448   if (REG_P (operands[1]))
2449     optype1 = REGOP;
2450   else if (CONSTANT_P (operands[1]))
2451     optype1 = CNSTOP;
2452   else if (offsettable_memref_p (operands[1]))
2453     optype1 = OFFSOP;
2454   else if (GET_CODE (operands[1]) == MEM)
2455     optype1 = MEMOP;
2456   else
2457     optype1 = RNDOP;
2458 
2459   /* Check for the cases that the operand constraints are not
2460      supposed to allow to happen.  */
2461   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2462 
2463   /* Handle copies between general and floating registers.  */
2464 
2465   if (optype0 == REGOP && optype1 == REGOP
2466       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2467     {
2468       if (FP_REG_P (operands[0]))
2469 	{
2470 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2471 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2472 	  return "{fldds|fldd} -16(%%sp),%0";
2473 	}
2474       else
2475 	{
2476 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2477 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2478 	  return "{ldws|ldw} -12(%%sp),%R0";
2479 	}
2480     }
2481 
2482    /* Handle auto decrementing and incrementing loads and stores
2483      specifically, since the structure of the function doesn't work
2484      for them without major modification.  Do it better when we learn
2485      this port about the general inc/dec addressing of PA.
2486      (This was written by tege.  Chide him if it doesn't work.)  */
2487 
2488   if (optype0 == MEMOP)
2489     {
2490       /* We have to output the address syntax ourselves, since print_operand
2491 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2492 
2493       rtx addr = XEXP (operands[0], 0);
2494       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2495 	{
2496 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2497 
2498 	  operands[0] = XEXP (addr, 0);
2499 	  gcc_assert (GET_CODE (operands[1]) == REG
2500 		      && GET_CODE (operands[0]) == REG);
2501 
2502 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2503 
2504 	  /* No overlap between high target register and address
2505 	     register.  (We do this in a non-obvious way to
2506 	     save a register file writeback)  */
2507 	  if (GET_CODE (addr) == POST_INC)
2508 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2509 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2510 	}
2511       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2512 	{
2513 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2514 
2515 	  operands[0] = XEXP (addr, 0);
2516 	  gcc_assert (GET_CODE (operands[1]) == REG
2517 		      && GET_CODE (operands[0]) == REG);
2518 
2519 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2520 	  /* No overlap between high target register and address
2521 	     register.  (We do this in a non-obvious way to save a
2522 	     register file writeback)  */
2523 	  if (GET_CODE (addr) == PRE_INC)
2524 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2525 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2526 	}
2527     }
2528   if (optype1 == MEMOP)
2529     {
2530       /* We have to output the address syntax ourselves, since print_operand
2531 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2532 
2533       rtx addr = XEXP (operands[1], 0);
2534       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2535 	{
2536 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2537 
2538 	  operands[1] = XEXP (addr, 0);
2539 	  gcc_assert (GET_CODE (operands[0]) == REG
2540 		      && GET_CODE (operands[1]) == REG);
2541 
2542 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2543 	    {
2544 	      /* No overlap between high target register and address
2545 		 register.  (We do this in a non-obvious way to
2546 		 save a register file writeback)  */
2547 	      if (GET_CODE (addr) == POST_INC)
2548 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2549 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2550 	    }
2551 	  else
2552 	    {
2553 	      /* This is an undefined situation.  We should load into the
2554 		 address register *and* update that register.  Probably
2555 		 we don't need to handle this at all.  */
2556 	      if (GET_CODE (addr) == POST_INC)
2557 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2558 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2559 	    }
2560 	}
2561       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2562 	{
2563 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2564 
2565 	  operands[1] = XEXP (addr, 0);
2566 	  gcc_assert (GET_CODE (operands[0]) == REG
2567 		      && GET_CODE (operands[1]) == REG);
2568 
2569 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2570 	    {
2571 	      /* No overlap between high target register and address
2572 		 register.  (We do this in a non-obvious way to
2573 		 save a register file writeback)  */
2574 	      if (GET_CODE (addr) == PRE_INC)
2575 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2576 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2577 	    }
2578 	  else
2579 	    {
2580 	      /* This is an undefined situation.  We should load into the
2581 		 address register *and* update that register.  Probably
2582 		 we don't need to handle this at all.  */
2583 	      if (GET_CODE (addr) == PRE_INC)
2584 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2585 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2586 	    }
2587 	}
2588       else if (GET_CODE (addr) == PLUS
2589 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2590 	{
2591 	  rtx xoperands[4];
2592 
2593 	  /* Load address into left half of destination register.  */
2594 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2595 	  xoperands[1] = XEXP (addr, 1);
2596 	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2597 	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2598 	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2599 			   xoperands);
2600 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2601 	}
2602       else if (GET_CODE (addr) == PLUS
2603 	       && REG_P (XEXP (addr, 0))
2604 	       && REG_P (XEXP (addr, 1)))
2605 	{
2606 	  rtx xoperands[3];
2607 
2608 	  /* Load address into left half of destination register.  */
2609 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2610 	  xoperands[1] = XEXP (addr, 0);
2611 	  xoperands[2] = XEXP (addr, 1);
2612 	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2613 			   xoperands);
2614 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2615 	}
2616     }
2617 
2618   /* If an operand is an unoffsettable memory ref, find a register
2619      we can increment temporarily to make it refer to the second word.  */
2620 
2621   if (optype0 == MEMOP)
2622     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2623 
2624   if (optype1 == MEMOP)
2625     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2626 
2627   /* Ok, we can do one word at a time.
2628      Normally we do the low-numbered word first.
2629 
2630      In either case, set up in LATEHALF the operands to use
2631      for the high-numbered word and in some cases alter the
2632      operands in OPERANDS to be suitable for the low-numbered word.  */
2633 
2634   if (optype0 == REGOP)
2635     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2636   else if (optype0 == OFFSOP)
2637     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2638   else
2639     latehalf[0] = operands[0];
2640 
2641   if (optype1 == REGOP)
2642     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2643   else if (optype1 == OFFSOP)
2644     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2645   else if (optype1 == CNSTOP)
2646     split_double (operands[1], &operands[1], &latehalf[1]);
2647   else
2648     latehalf[1] = operands[1];
2649 
2650   /* If the first move would clobber the source of the second one,
2651      do them in the other order.
2652 
2653      This can happen in two cases:
2654 
2655 	mem -> register where the first half of the destination register
2656  	is the same register used in the memory's address.  Reload
2657 	can create such insns.
2658 
2659 	mem in this case will be either register indirect or register
2660 	indirect plus a valid offset.
2661 
2662 	register -> register move where REGNO(dst) == REGNO(src + 1)
2663 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2664 
2665      Handle mem -> register case first.  */
2666   if (optype0 == REGOP
2667       && (optype1 == MEMOP || optype1 == OFFSOP)
2668       && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2669 			    operands[1], 0))
2670     {
2671       /* Do the late half first.  */
2672       if (addreg1)
2673 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2674       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2675 
2676       /* Then clobber.  */
2677       if (addreg1)
2678 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2679       return pa_singlemove_string (operands);
2680     }
2681 
2682   /* Now handle register -> register case.  */
2683   if (optype0 == REGOP && optype1 == REGOP
2684       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2685     {
2686       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2687       return pa_singlemove_string (operands);
2688     }
2689 
2690   /* Normal case: do the two words, low-numbered first.  */
2691 
2692   output_asm_insn (pa_singlemove_string (operands), operands);
2693 
2694   /* Make any unoffsettable addresses point at high-numbered word.  */
2695   if (addreg0)
2696     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2697   if (addreg1)
2698     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2699 
2700   /* Do that word.  */
2701   output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2702 
2703   /* Undo the adds we just did.  */
2704   if (addreg0)
2705     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2706   if (addreg1)
2707     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2708 
2709   return "";
2710 }
2711 
2712 const char *
2713 pa_output_fp_move_double (rtx *operands)
2714 {
2715   if (FP_REG_P (operands[0]))
2716     {
2717       if (FP_REG_P (operands[1])
2718 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2719 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2720       else
2721 	output_asm_insn ("fldd%F1 %1,%0", operands);
2722     }
2723   else if (FP_REG_P (operands[1]))
2724     {
2725       output_asm_insn ("fstd%F0 %1,%0", operands);
2726     }
2727   else
2728     {
2729       rtx xoperands[2];
2730 
2731       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2732 
2733       /* This is a pain.  You have to be prepared to deal with an
2734 	 arbitrary address here including pre/post increment/decrement.
2735 
2736 	 so avoid this in the MD.  */
2737       gcc_assert (GET_CODE (operands[0]) == REG);
2738 
2739       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2740       xoperands[0] = operands[0];
2741       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2742     }
2743   return "";
2744 }
2745 
2746 /* Return a REG that occurs in ADDR with coefficient 1.
2747    ADDR can be effectively incremented by incrementing REG.  */
2748 
2749 static rtx
2750 find_addr_reg (rtx addr)
2751 {
2752   while (GET_CODE (addr) == PLUS)
2753     {
2754       if (GET_CODE (XEXP (addr, 0)) == REG)
2755 	addr = XEXP (addr, 0);
2756       else if (GET_CODE (XEXP (addr, 1)) == REG)
2757 	addr = XEXP (addr, 1);
2758       else if (CONSTANT_P (XEXP (addr, 0)))
2759 	addr = XEXP (addr, 1);
2760       else if (CONSTANT_P (XEXP (addr, 1)))
2761 	addr = XEXP (addr, 0);
2762       else
2763 	gcc_unreachable ();
2764     }
2765   gcc_assert (GET_CODE (addr) == REG);
2766   return addr;
2767 }
2768 
2769 /* Emit code to perform a block move.
2770 
2771    OPERANDS[0] is the destination pointer as a REG, clobbered.
2772    OPERANDS[1] is the source pointer as a REG, clobbered.
2773    OPERANDS[2] is a register for temporary storage.
2774    OPERANDS[3] is a register for temporary storage.
2775    OPERANDS[4] is the size as a CONST_INT
2776    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2777    OPERANDS[6] is another temporary register.  */
2778 
2779 const char *
2780 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2781 {
2782   HOST_WIDE_INT align = INTVAL (operands[5]);
2783   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]);
2784 
2785   /* We can't move more than a word at a time because the PA
2786      has no longer integer move insns.  (Could use fp mem ops?)  */
2787   if (align > (TARGET_64BIT ? 8 : 4))
2788     align = (TARGET_64BIT ? 8 : 4);
2789 
2790   /* Note that we know each loop below will execute at least twice
2791      (else we would have open-coded the copy).  */
2792   switch (align)
2793     {
2794       case 8:
2795 	/* Pre-adjust the loop counter.  */
2796 	operands[4] = GEN_INT (n_bytes - 16);
2797 	output_asm_insn ("ldi %4,%2", operands);
2798 
2799 	/* Copying loop.  */
2800 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2801 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2802 	output_asm_insn ("std,ma %3,8(%0)", operands);
2803 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2804 	output_asm_insn ("std,ma %6,8(%0)", operands);
2805 
2806 	/* Handle the residual.  There could be up to 7 bytes of
2807 	   residual to copy!  */
2808 	if (n_bytes % 16 != 0)
2809 	  {
2810 	    operands[4] = GEN_INT (n_bytes % 8);
2811 	    if (n_bytes % 16 >= 8)
2812 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
2813 	    if (n_bytes % 8 != 0)
2814 	      output_asm_insn ("ldd 0(%1),%6", operands);
2815 	    if (n_bytes % 16 >= 8)
2816 	      output_asm_insn ("std,ma %3,8(%0)", operands);
2817 	    if (n_bytes % 8 != 0)
2818 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
2819 	  }
2820 	return "";
2821 
2822       case 4:
2823 	/* Pre-adjust the loop counter.  */
2824 	operands[4] = GEN_INT (n_bytes - 8);
2825 	output_asm_insn ("ldi %4,%2", operands);
2826 
2827 	/* Copying loop.  */
2828 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2829 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2830 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2831 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
2832 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2833 
2834 	/* Handle the residual.  There could be up to 7 bytes of
2835 	   residual to copy!  */
2836 	if (n_bytes % 8 != 0)
2837 	  {
2838 	    operands[4] = GEN_INT (n_bytes % 4);
2839 	    if (n_bytes % 8 >= 4)
2840 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2841 	    if (n_bytes % 4 != 0)
2842 	      output_asm_insn ("ldw 0(%1),%6", operands);
2843 	    if (n_bytes % 8 >= 4)
2844 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2845 	    if (n_bytes % 4 != 0)
2846 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2847 	  }
2848 	return "";
2849 
2850       case 2:
2851 	/* Pre-adjust the loop counter.  */
2852 	operands[4] = GEN_INT (n_bytes - 4);
2853 	output_asm_insn ("ldi %4,%2", operands);
2854 
2855 	/* Copying loop.  */
2856 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2857 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2858 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2859 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
2860 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2861 
2862 	/* Handle the residual.  */
2863 	if (n_bytes % 4 != 0)
2864 	  {
2865 	    if (n_bytes % 4 >= 2)
2866 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2867 	    if (n_bytes % 2 != 0)
2868 	      output_asm_insn ("ldb 0(%1),%6", operands);
2869 	    if (n_bytes % 4 >= 2)
2870 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2871 	    if (n_bytes % 2 != 0)
2872 	      output_asm_insn ("stb %6,0(%0)", operands);
2873 	  }
2874 	return "";
2875 
2876       case 1:
2877 	/* Pre-adjust the loop counter.  */
2878 	operands[4] = GEN_INT (n_bytes - 2);
2879 	output_asm_insn ("ldi %4,%2", operands);
2880 
2881 	/* Copying loop.  */
2882 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2883 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2884 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2885 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
2886 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2887 
2888 	/* Handle the residual.  */
2889 	if (n_bytes % 2 != 0)
2890 	  {
2891 	    output_asm_insn ("ldb 0(%1),%3", operands);
2892 	    output_asm_insn ("stb %3,0(%0)", operands);
2893 	  }
2894 	return "";
2895 
2896       default:
2897 	gcc_unreachable ();
2898     }
2899 }
2900 
2901 /* Count the number of insns necessary to handle this block move.
2902 
2903    Basic structure is the same as emit_block_move, except that we
2904    count insns rather than emit them.  */
2905 
2906 static int
2907 compute_movmem_length (rtx insn)
2908 {
2909   rtx pat = PATTERN (insn);
2910   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2911   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2912   unsigned int n_insns = 0;
2913 
2914   /* We can't move more than four bytes at a time because the PA
2915      has no longer integer move insns.  (Could use fp mem ops?)  */
2916   if (align > (TARGET_64BIT ? 8 : 4))
2917     align = (TARGET_64BIT ? 8 : 4);
2918 
2919   /* The basic copying loop.  */
2920   n_insns = 6;
2921 
2922   /* Residuals.  */
2923   if (n_bytes % (2 * align) != 0)
2924     {
2925       if ((n_bytes % (2 * align)) >= align)
2926 	n_insns += 2;
2927 
2928       if ((n_bytes % align) != 0)
2929 	n_insns += 2;
2930     }
2931 
2932   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2933   return n_insns * 4;
2934 }
2935 
2936 /* Emit code to perform a block clear.
2937 
2938    OPERANDS[0] is the destination pointer as a REG, clobbered.
2939    OPERANDS[1] is a register for temporary storage.
2940    OPERANDS[2] is the size as a CONST_INT
2941    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
2942 
2943 const char *
2944 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2945 {
2946   HOST_WIDE_INT align = INTVAL (operands[3]);
2947   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]);
2948 
2949   /* We can't clear more than a word at a time because the PA
2950      has no longer integer move insns.  */
2951   if (align > (TARGET_64BIT ? 8 : 4))
2952     align = (TARGET_64BIT ? 8 : 4);
2953 
2954   /* Note that we know each loop below will execute at least twice
2955      (else we would have open-coded the copy).  */
2956   switch (align)
2957     {
2958       case 8:
2959 	/* Pre-adjust the loop counter.  */
2960 	operands[2] = GEN_INT (n_bytes - 16);
2961 	output_asm_insn ("ldi %2,%1", operands);
2962 
2963 	/* Loop.  */
2964 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
2965 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
2966 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
2967 
2968 	/* Handle the residual.  There could be up to 7 bytes of
2969 	   residual to copy!  */
2970 	if (n_bytes % 16 != 0)
2971 	  {
2972 	    operands[2] = GEN_INT (n_bytes % 8);
2973 	    if (n_bytes % 16 >= 8)
2974 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
2975 	    if (n_bytes % 8 != 0)
2976 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2977 	  }
2978 	return "";
2979 
2980       case 4:
2981 	/* Pre-adjust the loop counter.  */
2982 	operands[2] = GEN_INT (n_bytes - 8);
2983 	output_asm_insn ("ldi %2,%1", operands);
2984 
2985 	/* Loop.  */
2986 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2987 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
2988 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2989 
2990 	/* Handle the residual.  There could be up to 7 bytes of
2991 	   residual to copy!  */
2992 	if (n_bytes % 8 != 0)
2993 	  {
2994 	    operands[2] = GEN_INT (n_bytes % 4);
2995 	    if (n_bytes % 8 >= 4)
2996 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2997 	    if (n_bytes % 4 != 0)
2998 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2999 	  }
3000 	return "";
3001 
3002       case 2:
3003 	/* Pre-adjust the loop counter.  */
3004 	operands[2] = GEN_INT (n_bytes - 4);
3005 	output_asm_insn ("ldi %2,%1", operands);
3006 
3007 	/* Loop.  */
3008 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3009 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3010 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3011 
3012 	/* Handle the residual.  */
3013 	if (n_bytes % 4 != 0)
3014 	  {
3015 	    if (n_bytes % 4 >= 2)
3016 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3017 	    if (n_bytes % 2 != 0)
3018 	      output_asm_insn ("stb %%r0,0(%0)", operands);
3019 	  }
3020 	return "";
3021 
3022       case 1:
3023 	/* Pre-adjust the loop counter.  */
3024 	operands[2] = GEN_INT (n_bytes - 2);
3025 	output_asm_insn ("ldi %2,%1", operands);
3026 
3027 	/* Loop.  */
3028 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3029 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3030 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3031 
3032 	/* Handle the residual.  */
3033 	if (n_bytes % 2 != 0)
3034 	  output_asm_insn ("stb %%r0,0(%0)", operands);
3035 
3036 	return "";
3037 
3038       default:
3039 	gcc_unreachable ();
3040     }
3041 }
3042 
3043 /* Count the number of insns necessary to handle this block move.
3044 
3045    Basic structure is the same as emit_block_move, except that we
3046    count insns rather than emit them.  */
3047 
3048 static int
3049 compute_clrmem_length (rtx insn)
3050 {
3051   rtx pat = PATTERN (insn);
3052   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3053   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3054   unsigned int n_insns = 0;
3055 
3056   /* We can't clear more than a word at a time because the PA
3057      has no longer integer move insns.  */
3058   if (align > (TARGET_64BIT ? 8 : 4))
3059     align = (TARGET_64BIT ? 8 : 4);
3060 
3061   /* The basic loop.  */
3062   n_insns = 4;
3063 
3064   /* Residuals.  */
3065   if (n_bytes % (2 * align) != 0)
3066     {
3067       if ((n_bytes % (2 * align)) >= align)
3068 	n_insns++;
3069 
3070       if ((n_bytes % align) != 0)
3071 	n_insns++;
3072     }
3073 
3074   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3075   return n_insns * 4;
3076 }
3077 
3078 
3079 const char *
3080 pa_output_and (rtx *operands)
3081 {
3082   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3083     {
3084       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3085       int ls0, ls1, ms0, p, len;
3086 
3087       for (ls0 = 0; ls0 < 32; ls0++)
3088 	if ((mask & (1 << ls0)) == 0)
3089 	  break;
3090 
3091       for (ls1 = ls0; ls1 < 32; ls1++)
3092 	if ((mask & (1 << ls1)) != 0)
3093 	  break;
3094 
3095       for (ms0 = ls1; ms0 < 32; ms0++)
3096 	if ((mask & (1 << ms0)) == 0)
3097 	  break;
3098 
3099       gcc_assert (ms0 == 32);
3100 
3101       if (ls1 == 32)
3102 	{
3103 	  len = ls0;
3104 
3105 	  gcc_assert (len);
3106 
3107 	  operands[2] = GEN_INT (len);
3108 	  return "{extru|extrw,u} %1,31,%2,%0";
3109 	}
3110       else
3111 	{
3112 	  /* We could use this `depi' for the case above as well, but `depi'
3113 	     requires one more register file access than an `extru'.  */
3114 
3115 	  p = 31 - ls0;
3116 	  len = ls1 - ls0;
3117 
3118 	  operands[2] = GEN_INT (p);
3119 	  operands[3] = GEN_INT (len);
3120 	  return "{depi|depwi} 0,%2,%3,%0";
3121 	}
3122     }
3123   else
3124     return "and %1,%2,%0";
3125 }
3126 
3127 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3128    storing the result in operands[0].  */
3129 const char *
3130 pa_output_64bit_and (rtx *operands)
3131 {
3132   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3133     {
3134       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3135       int ls0, ls1, ms0, p, len;
3136 
3137       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3138 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3139 	  break;
3140 
3141       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3142 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3143 	  break;
3144 
3145       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3146 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3147 	  break;
3148 
3149       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3150 
3151       if (ls1 == HOST_BITS_PER_WIDE_INT)
3152 	{
3153 	  len = ls0;
3154 
3155 	  gcc_assert (len);
3156 
3157 	  operands[2] = GEN_INT (len);
3158 	  return "extrd,u %1,63,%2,%0";
3159 	}
3160       else
3161 	{
3162 	  /* We could use this `depi' for the case above as well, but `depi'
3163 	     requires one more register file access than an `extru'.  */
3164 
3165 	  p = 63 - ls0;
3166 	  len = ls1 - ls0;
3167 
3168 	  operands[2] = GEN_INT (p);
3169 	  operands[3] = GEN_INT (len);
3170 	  return "depdi 0,%2,%3,%0";
3171 	}
3172     }
3173   else
3174     return "and %1,%2,%0";
3175 }
3176 
3177 const char *
3178 pa_output_ior (rtx *operands)
3179 {
3180   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3181   int bs0, bs1, p, len;
3182 
3183   if (INTVAL (operands[2]) == 0)
3184     return "copy %1,%0";
3185 
3186   for (bs0 = 0; bs0 < 32; bs0++)
3187     if ((mask & (1 << bs0)) != 0)
3188       break;
3189 
3190   for (bs1 = bs0; bs1 < 32; bs1++)
3191     if ((mask & (1 << bs1)) == 0)
3192       break;
3193 
3194   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3195 
3196   p = 31 - bs0;
3197   len = bs1 - bs0;
3198 
3199   operands[2] = GEN_INT (p);
3200   operands[3] = GEN_INT (len);
3201   return "{depi|depwi} -1,%2,%3,%0";
3202 }
3203 
3204 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3205    storing the result in operands[0].  */
3206 const char *
3207 pa_output_64bit_ior (rtx *operands)
3208 {
3209   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3210   int bs0, bs1, p, len;
3211 
3212   if (INTVAL (operands[2]) == 0)
3213     return "copy %1,%0";
3214 
3215   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3216     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3217       break;
3218 
3219   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3220     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3221       break;
3222 
3223   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3224 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3225 
3226   p = 63 - bs0;
3227   len = bs1 - bs0;
3228 
3229   operands[2] = GEN_INT (p);
3230   operands[3] = GEN_INT (len);
3231   return "depdi -1,%2,%3,%0";
3232 }
3233 
3234 /* Target hook for assembling integer objects.  This code handles
3235    aligned SI and DI integers specially since function references
3236    must be preceded by P%.  */
3237 
3238 static bool
3239 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3240 {
3241   if (size == UNITS_PER_WORD
3242       && aligned_p
3243       && function_label_operand (x, VOIDmode))
3244     {
3245       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3246 
3247       /* We don't want an OPD when generating fast indirect calls.  */
3248       if (!TARGET_FAST_INDIRECT_CALLS)
3249 	fputs ("P%", asm_out_file);
3250 
3251       output_addr_const (asm_out_file, x);
3252       fputc ('\n', asm_out_file);
3253       return true;
3254     }
3255   return default_assemble_integer (x, size, aligned_p);
3256 }
3257 
3258 /* Output an ascii string.  */
3259 void
3260 pa_output_ascii (FILE *file, const char *p, int size)
3261 {
3262   int i;
3263   int chars_output;
3264   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3265 
3266   /* The HP assembler can only take strings of 256 characters at one
3267      time.  This is a limitation on input line length, *not* the
3268      length of the string.  Sigh.  Even worse, it seems that the
3269      restriction is in number of input characters (see \xnn &
3270      \whatever).  So we have to do this very carefully.  */
3271 
3272   fputs ("\t.STRING \"", file);
3273 
3274   chars_output = 0;
3275   for (i = 0; i < size; i += 4)
3276     {
3277       int co = 0;
3278       int io = 0;
3279       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3280 	{
3281 	  register unsigned int c = (unsigned char) p[i + io];
3282 
3283 	  if (c == '\"' || c == '\\')
3284 	    partial_output[co++] = '\\';
3285 	  if (c >= ' ' && c < 0177)
3286 	    partial_output[co++] = c;
3287 	  else
3288 	    {
3289 	      unsigned int hexd;
3290 	      partial_output[co++] = '\\';
3291 	      partial_output[co++] = 'x';
3292 	      hexd =  c  / 16 - 0 + '0';
3293 	      if (hexd > '9')
3294 		hexd -= '9' - 'a' + 1;
3295 	      partial_output[co++] = hexd;
3296 	      hexd =  c % 16 - 0 + '0';
3297 	      if (hexd > '9')
3298 		hexd -= '9' - 'a' + 1;
3299 	      partial_output[co++] = hexd;
3300 	    }
3301 	}
3302       if (chars_output + co > 243)
3303 	{
3304 	  fputs ("\"\n\t.STRING \"", file);
3305 	  chars_output = 0;
3306 	}
3307       fwrite (partial_output, 1, (size_t) co, file);
3308       chars_output += co;
3309       co = 0;
3310     }
3311   fputs ("\"\n", file);
3312 }
3313 
3314 /* Try to rewrite floating point comparisons & branches to avoid
3315    useless add,tr insns.
3316 
3317    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3318    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3319    first attempt to remove useless add,tr insns.  It is zero
3320    for the second pass as reorg sometimes leaves bogus REG_DEAD
3321    notes lying around.
3322 
3323    When CHECK_NOTES is zero we can only eliminate add,tr insns
3324    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3325    instructions.  */
3326 static void
3327 remove_useless_addtr_insns (int check_notes)
3328 {
3329   rtx insn;
3330   static int pass = 0;
3331 
3332   /* This is fairly cheap, so always run it when optimizing.  */
3333   if (optimize > 0)
3334     {
3335       int fcmp_count = 0;
3336       int fbranch_count = 0;
3337 
3338       /* Walk all the insns in this function looking for fcmp & fbranch
3339 	 instructions.  Keep track of how many of each we find.  */
3340       for (insn = get_insns (); insn; insn = next_insn (insn))
3341 	{
3342 	  rtx tmp;
3343 
3344 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3345 	  if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3346 	    continue;
3347 
3348 	  tmp = PATTERN (insn);
3349 
3350 	  /* It must be a set.  */
3351 	  if (GET_CODE (tmp) != SET)
3352 	    continue;
3353 
3354 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3355 	  tmp = SET_DEST (tmp);
3356 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3357 	    {
3358 	      fcmp_count++;
3359 	      continue;
3360 	    }
3361 
3362 	  tmp = PATTERN (insn);
3363 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3364 	  if (GET_CODE (tmp) == SET
3365 	      && SET_DEST (tmp) == pc_rtx
3366 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3367 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3368 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3369 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3370 	    {
3371 	      fbranch_count++;
3372 	      continue;
3373 	    }
3374 	}
3375 
3376 
3377       /* Find all floating point compare + branch insns.  If possible,
3378 	 reverse the comparison & the branch to avoid add,tr insns.  */
3379       for (insn = get_insns (); insn; insn = next_insn (insn))
3380 	{
3381 	  rtx tmp, next;
3382 
3383 	  /* Ignore anything that isn't an INSN.  */
3384 	  if (GET_CODE (insn) != INSN)
3385 	    continue;
3386 
3387 	  tmp = PATTERN (insn);
3388 
3389 	  /* It must be a set.  */
3390 	  if (GET_CODE (tmp) != SET)
3391 	    continue;
3392 
3393 	  /* The destination must be CCFP, which is register zero.  */
3394 	  tmp = SET_DEST (tmp);
3395 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3396 	    continue;
3397 
3398 	  /* INSN should be a set of CCFP.
3399 
3400 	     See if the result of this insn is used in a reversed FP
3401 	     conditional branch.  If so, reverse our condition and
3402 	     the branch.  Doing so avoids useless add,tr insns.  */
3403 	  next = next_insn (insn);
3404 	  while (next)
3405 	    {
3406 	      /* Jumps, calls and labels stop our search.  */
3407 	      if (GET_CODE (next) == JUMP_INSN
3408 		  || GET_CODE (next) == CALL_INSN
3409 		  || GET_CODE (next) == CODE_LABEL)
3410 		break;
3411 
3412 	      /* As does another fcmp insn.  */
3413 	      if (GET_CODE (next) == INSN
3414 		  && GET_CODE (PATTERN (next)) == SET
3415 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3416 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3417 		break;
3418 
3419 	      next = next_insn (next);
3420 	    }
3421 
3422 	  /* Is NEXT_INSN a branch?  */
3423 	  if (next
3424 	      && GET_CODE (next) == JUMP_INSN)
3425 	    {
3426 	      rtx pattern = PATTERN (next);
3427 
3428 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3429 		 and CCFP dies, then reverse our conditional and the branch
3430 		 to avoid the add,tr.  */
3431 	      if (GET_CODE (pattern) == SET
3432 		  && SET_DEST (pattern) == pc_rtx
3433 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3434 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3435 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3436 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3437 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3438 		  && (fcmp_count == fbranch_count
3439 		      || (check_notes
3440 			  && find_regno_note (next, REG_DEAD, 0))))
3441 		{
3442 		  /* Reverse the branch.  */
3443 		  tmp = XEXP (SET_SRC (pattern), 1);
3444 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3445 		  XEXP (SET_SRC (pattern), 2) = tmp;
3446 		  INSN_CODE (next) = -1;
3447 
3448 		  /* Reverse our condition.  */
3449 		  tmp = PATTERN (insn);
3450 		  PUT_CODE (XEXP (tmp, 1),
3451 			    (reverse_condition_maybe_unordered
3452 			     (GET_CODE (XEXP (tmp, 1)))));
3453 		}
3454 	    }
3455 	}
3456     }
3457 
3458   pass = !pass;
3459 
3460 }
3461 
3462 /* You may have trouble believing this, but this is the 32 bit HP-PA
3463    stack layout.  Wow.
3464 
3465    Offset		Contents
3466 
3467    Variable arguments	(optional; any number may be allocated)
3468 
3469    SP-(4*(N+9))		arg word N
3470    	:		    :
3471       SP-56		arg word 5
3472       SP-52		arg word 4
3473 
3474    Fixed arguments	(must be allocated; may remain unused)
3475 
3476       SP-48		arg word 3
3477       SP-44		arg word 2
3478       SP-40		arg word 1
3479       SP-36		arg word 0
3480 
3481    Frame Marker
3482 
3483       SP-32		External Data Pointer (DP)
3484       SP-28		External sr4
3485       SP-24		External/stub RP (RP')
3486       SP-20		Current RP
3487       SP-16		Static Link
3488       SP-12		Clean up
3489       SP-8		Calling Stub RP (RP'')
3490       SP-4		Previous SP
3491 
3492    Top of Frame
3493 
3494       SP-0		Stack Pointer (points to next available address)
3495 
3496 */
3497 
3498 /* This function saves registers as follows.  Registers marked with ' are
3499    this function's registers (as opposed to the previous function's).
3500    If a frame_pointer isn't needed, r4 is saved as a general register;
3501    the space for the frame pointer is still allocated, though, to keep
3502    things simple.
3503 
3504 
3505    Top of Frame
3506 
3507        SP (FP')		Previous FP
3508        SP + 4		Alignment filler (sigh)
3509        SP + 8		Space for locals reserved here.
3510        .
3511        .
3512        .
3513        SP + n		All call saved register used.
3514        .
3515        .
3516        .
3517        SP + o		All call saved fp registers used.
3518        .
3519        .
3520        .
3521        SP + p (SP')	points to next available address.
3522 
3523 */
3524 
3525 /* Global variables set by output_function_prologue().  */
3526 /* Size of frame.  Need to know this to emit return insns from
3527    leaf procedures.  */
3528 static HOST_WIDE_INT actual_fsize, local_fsize;
3529 static int save_fregs;
3530 
3531 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3532    Handle case where DISP > 8k by using the add_high_const patterns.
3533 
3534    Note in DISP > 8k case, we will leave the high part of the address
3535    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3536 
3537 static void
3538 store_reg (int reg, HOST_WIDE_INT disp, int base)
3539 {
3540   rtx insn, dest, src, basereg;
3541 
3542   src = gen_rtx_REG (word_mode, reg);
3543   basereg = gen_rtx_REG (Pmode, base);
3544   if (VAL_14_BITS_P (disp))
3545     {
3546       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3547       insn = emit_move_insn (dest, src);
3548     }
3549   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3550     {
3551       rtx delta = GEN_INT (disp);
3552       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3553 
3554       emit_move_insn (tmpreg, delta);
3555       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3556       if (DO_FRAME_NOTES)
3557 	{
3558 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3559 			gen_rtx_SET (VOIDmode, tmpreg,
3560 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3561 	  RTX_FRAME_RELATED_P (insn) = 1;
3562 	}
3563       dest = gen_rtx_MEM (word_mode, tmpreg);
3564       insn = emit_move_insn (dest, src);
3565     }
3566   else
3567     {
3568       rtx delta = GEN_INT (disp);
3569       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3570       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3571 
3572       emit_move_insn (tmpreg, high);
3573       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3574       insn = emit_move_insn (dest, src);
3575       if (DO_FRAME_NOTES)
3576 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3577 		      gen_rtx_SET (VOIDmode,
3578 				   gen_rtx_MEM (word_mode,
3579 						gen_rtx_PLUS (word_mode,
3580 							      basereg,
3581 							      delta)),
3582 				   src));
3583     }
3584 
3585   if (DO_FRAME_NOTES)
3586     RTX_FRAME_RELATED_P (insn) = 1;
3587 }
3588 
3589 /* Emit RTL to store REG at the memory location specified by BASE and then
3590    add MOD to BASE.  MOD must be <= 8k.  */
3591 
3592 static void
3593 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3594 {
3595   rtx insn, basereg, srcreg, delta;
3596 
3597   gcc_assert (VAL_14_BITS_P (mod));
3598 
3599   basereg = gen_rtx_REG (Pmode, base);
3600   srcreg = gen_rtx_REG (word_mode, reg);
3601   delta = GEN_INT (mod);
3602 
3603   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3604   if (DO_FRAME_NOTES)
3605     {
3606       RTX_FRAME_RELATED_P (insn) = 1;
3607 
3608       /* RTX_FRAME_RELATED_P must be set on each frame related set
3609 	 in a parallel with more than one element.  */
3610       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3611       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3612     }
3613 }
3614 
3615 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3616    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3617    whether to add a frame note or not.
3618 
3619    In the DISP > 8k case, we leave the high part of the address in %r1.
3620    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3621 
3622 static void
3623 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3624 {
3625   rtx insn;
3626 
3627   if (VAL_14_BITS_P (disp))
3628     {
3629       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3630 			     plus_constant (Pmode,
3631 					    gen_rtx_REG (Pmode, base), disp));
3632     }
3633   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3634     {
3635       rtx basereg = gen_rtx_REG (Pmode, base);
3636       rtx delta = GEN_INT (disp);
3637       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3638 
3639       emit_move_insn (tmpreg, delta);
3640       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3641 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3642       if (DO_FRAME_NOTES)
3643 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3644 		      gen_rtx_SET (VOIDmode, tmpreg,
3645 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3646     }
3647   else
3648     {
3649       rtx basereg = gen_rtx_REG (Pmode, base);
3650       rtx delta = GEN_INT (disp);
3651       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3652 
3653       emit_move_insn (tmpreg,
3654 		      gen_rtx_PLUS (Pmode, basereg,
3655 				    gen_rtx_HIGH (Pmode, delta)));
3656       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3657 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3658     }
3659 
3660   if (DO_FRAME_NOTES && note)
3661     RTX_FRAME_RELATED_P (insn) = 1;
3662 }
3663 
3664 HOST_WIDE_INT
3665 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3666 {
3667   int freg_saved = 0;
3668   int i, j;
3669 
3670   /* The code in pa_expand_prologue and pa_expand_epilogue must
3671      be consistent with the rounding and size calculation done here.
3672      Change them at the same time.  */
3673 
3674   /* We do our own stack alignment.  First, round the size of the
3675      stack locals up to a word boundary.  */
3676   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3677 
3678   /* Space for previous frame pointer + filler.  If any frame is
3679      allocated, we need to add in the STARTING_FRAME_OFFSET.  We
3680      waste some space here for the sake of HP compatibility.  The
3681      first slot is only used when the frame pointer is needed.  */
3682   if (size || frame_pointer_needed)
3683     size += STARTING_FRAME_OFFSET;
3684 
3685   /* If the current function calls __builtin_eh_return, then we need
3686      to allocate stack space for registers that will hold data for
3687      the exception handler.  */
3688   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3689     {
3690       unsigned int i;
3691 
3692       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3693 	continue;
3694       size += i * UNITS_PER_WORD;
3695     }
3696 
3697   /* Account for space used by the callee general register saves.  */
3698   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3699     if (df_regs_ever_live_p (i))
3700       size += UNITS_PER_WORD;
3701 
3702   /* Account for space used by the callee floating point register saves.  */
3703   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3704     if (df_regs_ever_live_p (i)
3705 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3706       {
3707 	freg_saved = 1;
3708 
3709 	/* We always save both halves of the FP register, so always
3710 	   increment the frame size by 8 bytes.  */
3711 	size += 8;
3712       }
3713 
3714   /* If any of the floating registers are saved, account for the
3715      alignment needed for the floating point register save block.  */
3716   if (freg_saved)
3717     {
3718       size = (size + 7) & ~7;
3719       if (fregs_live)
3720 	*fregs_live = 1;
3721     }
3722 
3723   /* The various ABIs include space for the outgoing parameters in the
3724      size of the current function's stack frame.  We don't need to align
3725      for the outgoing arguments as their alignment is set by the final
3726      rounding for the frame as a whole.  */
3727   size += crtl->outgoing_args_size;
3728 
3729   /* Allocate space for the fixed frame marker.  This space must be
3730      allocated for any function that makes calls or allocates
3731      stack space.  */
3732   if (!crtl->is_leaf || size)
3733     size += TARGET_64BIT ? 48 : 32;
3734 
3735   /* Finally, round to the preferred stack boundary.  */
3736   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3737 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3738 }
3739 
3740 /* Generate the assembly code for function entry.  FILE is a stdio
3741    stream to output the code to.  SIZE is an int: how many units of
3742    temporary storage to allocate.
3743 
3744    Refer to the array `regs_ever_live' to determine which registers to
3745    save; `regs_ever_live[I]' is nonzero if register number I is ever
3746    used in the function.  This function is responsible for knowing
3747    which registers should not be saved even if used.  */
3748 
3749 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3750    of memory.  If any fpu reg is used in the function, we allocate
3751    such a block here, at the bottom of the frame, just in case it's needed.
3752 
3753    If this function is a leaf procedure, then we may choose not
3754    to do a "save" insn.  The decision about whether or not
3755    to do this is made in regclass.c.  */
3756 
3757 static void
3758 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3759 {
3760   /* The function's label and associated .PROC must never be
3761      separated and must be output *after* any profiling declarations
3762      to avoid changing spaces/subspaces within a procedure.  */
3763   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3764   fputs ("\t.PROC\n", file);
3765 
3766   /* pa_expand_prologue does the dirty work now.  We just need
3767      to output the assembler directives which denote the start
3768      of a function.  */
3769   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3770   if (crtl->is_leaf)
3771     fputs (",NO_CALLS", file);
3772   else
3773     fputs (",CALLS", file);
3774   if (rp_saved)
3775     fputs (",SAVE_RP", file);
3776 
3777   /* The SAVE_SP flag is used to indicate that register %r3 is stored
3778      at the beginning of the frame and that it is used as the frame
3779      pointer for the frame.  We do this because our current frame
3780      layout doesn't conform to that specified in the HP runtime
3781      documentation and we need a way to indicate to programs such as
3782      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3783      isn't used by HP compilers but is supported by the assembler.
3784      However, SAVE_SP is supposed to indicate that the previous stack
3785      pointer has been saved in the frame marker.  */
3786   if (frame_pointer_needed)
3787     fputs (",SAVE_SP", file);
3788 
3789   /* Pass on information about the number of callee register saves
3790      performed in the prologue.
3791 
3792      The compiler is supposed to pass the highest register number
3793      saved, the assembler then has to adjust that number before
3794      entering it into the unwind descriptor (to account for any
3795      caller saved registers with lower register numbers than the
3796      first callee saved register).  */
3797   if (gr_saved)
3798     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3799 
3800   if (fr_saved)
3801     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3802 
3803   fputs ("\n\t.ENTRY\n", file);
3804 
3805   remove_useless_addtr_insns (0);
3806 }
3807 
3808 void
3809 pa_expand_prologue (void)
3810 {
3811   int merge_sp_adjust_with_store = 0;
3812   HOST_WIDE_INT size = get_frame_size ();
3813   HOST_WIDE_INT offset;
3814   int i;
3815   rtx insn, tmpreg;
3816 
3817   gr_saved = 0;
3818   fr_saved = 0;
3819   save_fregs = 0;
3820 
3821   /* Compute total size for frame pointer, filler, locals and rounding to
3822      the next word boundary.  Similar code appears in pa_compute_frame_size
3823      and must be changed in tandem with this code.  */
3824   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3825   if (local_fsize || frame_pointer_needed)
3826     local_fsize += STARTING_FRAME_OFFSET;
3827 
3828   actual_fsize = pa_compute_frame_size (size, &save_fregs);
3829   if (flag_stack_usage_info)
3830     current_function_static_stack_size = actual_fsize;
3831 
3832   /* Compute a few things we will use often.  */
3833   tmpreg = gen_rtx_REG (word_mode, 1);
3834 
3835   /* Save RP first.  The calling conventions manual states RP will
3836      always be stored into the caller's frame at sp - 20 or sp - 16
3837      depending on which ABI is in use.  */
3838   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3839     {
3840       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3841       rp_saved = true;
3842     }
3843   else
3844     rp_saved = false;
3845 
3846   /* Allocate the local frame and set up the frame pointer if needed.  */
3847   if (actual_fsize != 0)
3848     {
3849       if (frame_pointer_needed)
3850 	{
3851 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
3852 	     new stack pointer, then store away the saved old frame pointer
3853 	     into the stack at sp and at the same time update the stack
3854 	     pointer by actual_fsize bytes.  Two versions, first
3855 	     handles small (<8k) frames.  The second handles large (>=8k)
3856 	     frames.  */
3857 	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3858 	  if (DO_FRAME_NOTES)
3859 	    RTX_FRAME_RELATED_P (insn) = 1;
3860 
3861 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3862 	  if (DO_FRAME_NOTES)
3863 	    RTX_FRAME_RELATED_P (insn) = 1;
3864 
3865 	  if (VAL_14_BITS_P (actual_fsize))
3866 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3867 	  else
3868 	    {
3869 	      /* It is incorrect to store the saved frame pointer at *sp,
3870 		 then increment sp (writes beyond the current stack boundary).
3871 
3872 		 So instead use stwm to store at *sp and post-increment the
3873 		 stack pointer as an atomic operation.  Then increment sp to
3874 		 finish allocating the new frame.  */
3875 	      HOST_WIDE_INT adjust1 = 8192 - 64;
3876 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3877 
3878 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3879 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3880 			      adjust2, 1);
3881 	    }
3882 
3883 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3884 	     we need to store the previous stack pointer (frame pointer)
3885 	     into the frame marker on targets that use the HP unwind
3886 	     library.  This allows the HP unwind library to be used to
3887 	     unwind GCC frames.  However, we are not fully compatible
3888 	     with the HP library because our frame layout differs from
3889 	     that specified in the HP runtime specification.
3890 
3891 	     We don't want a frame note on this instruction as the frame
3892 	     marker moves during dynamic stack allocation.
3893 
3894 	     This instruction also serves as a blockage to prevent
3895 	     register spills from being scheduled before the stack
3896 	     pointer is raised.  This is necessary as we store
3897 	     registers using the frame pointer as a base register,
3898 	     and the frame pointer is set before sp is raised.  */
3899 	  if (TARGET_HPUX_UNWIND_LIBRARY)
3900 	    {
3901 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3902 				       GEN_INT (TARGET_64BIT ? -8 : -4));
3903 
3904 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
3905 			      hard_frame_pointer_rtx);
3906 	    }
3907 	  else
3908 	    emit_insn (gen_blockage ());
3909 	}
3910       /* no frame pointer needed.  */
3911       else
3912 	{
3913 	  /* In some cases we can perform the first callee register save
3914 	     and allocating the stack frame at the same time.   If so, just
3915 	     make a note of it and defer allocating the frame until saving
3916 	     the callee registers.  */
3917 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3918 	    merge_sp_adjust_with_store = 1;
3919 	  /* Can not optimize.  Adjust the stack frame by actual_fsize
3920 	     bytes.  */
3921 	  else
3922 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3923 			    actual_fsize, 1);
3924 	}
3925     }
3926 
3927   /* Normal register save.
3928 
3929      Do not save the frame pointer in the frame_pointer_needed case.  It
3930      was done earlier.  */
3931   if (frame_pointer_needed)
3932     {
3933       offset = local_fsize;
3934 
3935       /* Saving the EH return data registers in the frame is the simplest
3936 	 way to get the frame unwind information emitted.  We put them
3937 	 just before the general registers.  */
3938       if (DO_FRAME_NOTES && crtl->calls_eh_return)
3939 	{
3940 	  unsigned int i, regno;
3941 
3942 	  for (i = 0; ; ++i)
3943 	    {
3944 	      regno = EH_RETURN_DATA_REGNO (i);
3945 	      if (regno == INVALID_REGNUM)
3946 		break;
3947 
3948 	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
3949 	      offset += UNITS_PER_WORD;
3950 	    }
3951 	}
3952 
3953       for (i = 18; i >= 4; i--)
3954 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3955 	  {
3956 	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
3957 	    offset += UNITS_PER_WORD;
3958 	    gr_saved++;
3959 	  }
3960       /* Account for %r3 which is saved in a special place.  */
3961       gr_saved++;
3962     }
3963   /* No frame pointer needed.  */
3964   else
3965     {
3966       offset = local_fsize - actual_fsize;
3967 
3968       /* Saving the EH return data registers in the frame is the simplest
3969          way to get the frame unwind information emitted.  */
3970       if (DO_FRAME_NOTES && crtl->calls_eh_return)
3971 	{
3972 	  unsigned int i, regno;
3973 
3974 	  for (i = 0; ; ++i)
3975 	    {
3976 	      regno = EH_RETURN_DATA_REGNO (i);
3977 	      if (regno == INVALID_REGNUM)
3978 		break;
3979 
3980 	      /* If merge_sp_adjust_with_store is nonzero, then we can
3981 		 optimize the first save.  */
3982 	      if (merge_sp_adjust_with_store)
3983 		{
3984 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3985 		  merge_sp_adjust_with_store = 0;
3986 		}
3987 	      else
3988 		store_reg (regno, offset, STACK_POINTER_REGNUM);
3989 	      offset += UNITS_PER_WORD;
3990 	    }
3991 	}
3992 
3993       for (i = 18; i >= 3; i--)
3994       	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3995 	  {
3996 	    /* If merge_sp_adjust_with_store is nonzero, then we can
3997 	       optimize the first GR save.  */
3998 	    if (merge_sp_adjust_with_store)
3999 	      {
4000 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4001 		merge_sp_adjust_with_store = 0;
4002 	      }
4003 	    else
4004 	      store_reg (i, offset, STACK_POINTER_REGNUM);
4005 	    offset += UNITS_PER_WORD;
4006 	    gr_saved++;
4007 	  }
4008 
4009       /* If we wanted to merge the SP adjustment with a GR save, but we never
4010 	 did any GR saves, then just emit the adjustment here.  */
4011       if (merge_sp_adjust_with_store)
4012 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4013 			actual_fsize, 1);
4014     }
4015 
4016   /* The hppa calling conventions say that %r19, the pic offset
4017      register, is saved at sp - 32 (in this function's frame)
4018      when generating PIC code.  FIXME:  What is the correct thing
4019      to do for functions which make no calls and allocate no
4020      frame?  Do we need to allocate a frame, or can we just omit
4021      the save?   For now we'll just omit the save.
4022 
4023      We don't want a note on this insn as the frame marker can
4024      move if there is a dynamic stack allocation.  */
4025   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4026     {
4027       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4028 
4029       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4030 
4031     }
4032 
4033   /* Align pointer properly (doubleword boundary).  */
4034   offset = (offset + 7) & ~7;
4035 
4036   /* Floating point register store.  */
4037   if (save_fregs)
4038     {
4039       rtx base;
4040 
4041       /* First get the frame or stack pointer to the start of the FP register
4042 	 save area.  */
4043       if (frame_pointer_needed)
4044 	{
4045 	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4046 	  base = hard_frame_pointer_rtx;
4047 	}
4048       else
4049 	{
4050 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4051 	  base = stack_pointer_rtx;
4052 	}
4053 
4054       /* Now actually save the FP registers.  */
4055       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4056 	{
4057 	  if (df_regs_ever_live_p (i)
4058 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4059 	    {
4060 	      rtx addr, insn, reg;
4061 	      addr = gen_rtx_MEM (DFmode,
4062 				  gen_rtx_POST_INC (word_mode, tmpreg));
4063 	      reg = gen_rtx_REG (DFmode, i);
4064 	      insn = emit_move_insn (addr, reg);
4065 	      if (DO_FRAME_NOTES)
4066 		{
4067 		  RTX_FRAME_RELATED_P (insn) = 1;
4068 		  if (TARGET_64BIT)
4069 		    {
4070 		      rtx mem = gen_rtx_MEM (DFmode,
4071 					     plus_constant (Pmode, base,
4072 							    offset));
4073 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4074 				    gen_rtx_SET (VOIDmode, mem, reg));
4075 		    }
4076 		  else
4077 		    {
4078 		      rtx meml = gen_rtx_MEM (SFmode,
4079 					      plus_constant (Pmode, base,
4080 							     offset));
4081 		      rtx memr = gen_rtx_MEM (SFmode,
4082 					      plus_constant (Pmode, base,
4083 							     offset + 4));
4084 		      rtx regl = gen_rtx_REG (SFmode, i);
4085 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4086 		      rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4087 		      rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4088 		      rtvec vec;
4089 
4090 		      RTX_FRAME_RELATED_P (setl) = 1;
4091 		      RTX_FRAME_RELATED_P (setr) = 1;
4092 		      vec = gen_rtvec (2, setl, setr);
4093 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4094 				    gen_rtx_SEQUENCE (VOIDmode, vec));
4095 		    }
4096 		}
4097 	      offset += GET_MODE_SIZE (DFmode);
4098 	      fr_saved++;
4099 	    }
4100 	}
4101     }
4102 }
4103 
4104 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4105    Handle case where DISP > 8k by using the add_high_const patterns.  */
4106 
4107 static void
4108 load_reg (int reg, HOST_WIDE_INT disp, int base)
4109 {
4110   rtx dest = gen_rtx_REG (word_mode, reg);
4111   rtx basereg = gen_rtx_REG (Pmode, base);
4112   rtx src;
4113 
4114   if (VAL_14_BITS_P (disp))
4115     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4116   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4117     {
4118       rtx delta = GEN_INT (disp);
4119       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4120 
4121       emit_move_insn (tmpreg, delta);
4122       if (TARGET_DISABLE_INDEXING)
4123 	{
4124 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4125 	  src = gen_rtx_MEM (word_mode, tmpreg);
4126 	}
4127       else
4128 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4129     }
4130   else
4131     {
4132       rtx delta = GEN_INT (disp);
4133       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4134       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4135 
4136       emit_move_insn (tmpreg, high);
4137       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4138     }
4139 
4140   emit_move_insn (dest, src);
4141 }
4142 
4143 /* Update the total code bytes output to the text section.  */
4144 
4145 static void
4146 update_total_code_bytes (unsigned int nbytes)
4147 {
4148   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4149       && !IN_NAMED_SECTION_P (cfun->decl))
4150     {
4151       unsigned int old_total = total_code_bytes;
4152 
4153       total_code_bytes += nbytes;
4154 
4155       /* Be prepared to handle overflows.  */
4156       if (old_total > total_code_bytes)
4157         total_code_bytes = UINT_MAX;
4158     }
4159 }
4160 
4161 /* This function generates the assembly code for function exit.
4162    Args are as for output_function_prologue ().
4163 
4164    The function epilogue should not depend on the current stack
4165    pointer!  It should use the frame pointer only.  This is mandatory
4166    because of alloca; we also take advantage of it to omit stack
4167    adjustments before returning.  */
4168 
4169 static void
4170 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4171 {
4172   rtx insn = get_last_insn ();
4173   bool extra_nop;
4174 
4175   /* pa_expand_epilogue does the dirty work now.  We just need
4176      to output the assembler directives which denote the end
4177      of a function.
4178 
4179      To make debuggers happy, emit a nop if the epilogue was completely
4180      eliminated due to a volatile call as the last insn in the
4181      current function.  That way the return address (in %r2) will
4182      always point to a valid instruction in the current function.  */
4183 
4184   /* Get the last real insn.  */
4185   if (GET_CODE (insn) == NOTE)
4186     insn = prev_real_insn (insn);
4187 
4188   /* If it is a sequence, then look inside.  */
4189   if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4190     insn = XVECEXP (PATTERN (insn), 0, 0);
4191 
4192   /* If insn is a CALL_INSN, then it must be a call to a volatile
4193      function (otherwise there would be epilogue insns).  */
4194   if (insn && GET_CODE (insn) == CALL_INSN)
4195     {
4196       fputs ("\tnop\n", file);
4197       extra_nop = true;
4198     }
4199   else
4200     extra_nop = false;
4201 
4202   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4203 
4204   if (TARGET_SOM && TARGET_GAS)
4205     {
4206       /* We are done with this subspace except possibly for some additional
4207 	 debug information.  Forget that we are in this subspace to ensure
4208 	 that the next function is output in its own subspace.  */
4209       in_section = NULL;
4210       cfun->machine->in_nsubspa = 2;
4211     }
4212 
4213   /* Thunks do their own insn accounting.  */
4214   if (cfun->is_thunk)
4215     return;
4216 
4217   if (INSN_ADDRESSES_SET_P ())
4218     {
4219       last_address = extra_nop ? 4 : 0;
4220       insn = get_last_nonnote_insn ();
4221       if (insn)
4222 	{
4223 	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4224 	  if (INSN_P (insn))
4225 	    last_address += insn_default_length (insn);
4226 	}
4227       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4228 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4229     }
4230   else
4231     last_address = UINT_MAX;
4232 
4233   /* Finally, update the total number of code bytes output so far.  */
4234   update_total_code_bytes (last_address);
4235 }
4236 
4237 void
4238 pa_expand_epilogue (void)
4239 {
4240   rtx tmpreg;
4241   HOST_WIDE_INT offset;
4242   HOST_WIDE_INT ret_off = 0;
4243   int i;
4244   int merge_sp_adjust_with_load = 0;
4245 
4246   /* We will use this often.  */
4247   tmpreg = gen_rtx_REG (word_mode, 1);
4248 
4249   /* Try to restore RP early to avoid load/use interlocks when
4250      RP gets used in the return (bv) instruction.  This appears to still
4251      be necessary even when we schedule the prologue and epilogue.  */
4252   if (rp_saved)
4253     {
4254       ret_off = TARGET_64BIT ? -16 : -20;
4255       if (frame_pointer_needed)
4256 	{
4257 	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4258 	  ret_off = 0;
4259 	}
4260       else
4261 	{
4262 	  /* No frame pointer, and stack is smaller than 8k.  */
4263 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4264 	    {
4265 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4266 	      ret_off = 0;
4267 	    }
4268 	}
4269     }
4270 
4271   /* General register restores.  */
4272   if (frame_pointer_needed)
4273     {
4274       offset = local_fsize;
4275 
4276       /* If the current function calls __builtin_eh_return, then we need
4277          to restore the saved EH data registers.  */
4278       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4279 	{
4280 	  unsigned int i, regno;
4281 
4282 	  for (i = 0; ; ++i)
4283 	    {
4284 	      regno = EH_RETURN_DATA_REGNO (i);
4285 	      if (regno == INVALID_REGNUM)
4286 		break;
4287 
4288 	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4289 	      offset += UNITS_PER_WORD;
4290 	    }
4291 	}
4292 
4293       for (i = 18; i >= 4; i--)
4294 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4295 	  {
4296 	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4297 	    offset += UNITS_PER_WORD;
4298 	  }
4299     }
4300   else
4301     {
4302       offset = local_fsize - actual_fsize;
4303 
4304       /* If the current function calls __builtin_eh_return, then we need
4305          to restore the saved EH data registers.  */
4306       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4307 	{
4308 	  unsigned int i, regno;
4309 
4310 	  for (i = 0; ; ++i)
4311 	    {
4312 	      regno = EH_RETURN_DATA_REGNO (i);
4313 	      if (regno == INVALID_REGNUM)
4314 		break;
4315 
4316 	      /* Only for the first load.
4317 	         merge_sp_adjust_with_load holds the register load
4318 	         with which we will merge the sp adjustment.  */
4319 	      if (merge_sp_adjust_with_load == 0
4320 		  && local_fsize == 0
4321 		  && VAL_14_BITS_P (-actual_fsize))
4322 	        merge_sp_adjust_with_load = regno;
4323 	      else
4324 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4325 	      offset += UNITS_PER_WORD;
4326 	    }
4327 	}
4328 
4329       for (i = 18; i >= 3; i--)
4330 	{
4331 	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4332 	    {
4333 	      /* Only for the first load.
4334 	         merge_sp_adjust_with_load holds the register load
4335 	         with which we will merge the sp adjustment.  */
4336 	      if (merge_sp_adjust_with_load == 0
4337 		  && local_fsize == 0
4338 		  && VAL_14_BITS_P (-actual_fsize))
4339 	        merge_sp_adjust_with_load = i;
4340 	      else
4341 		load_reg (i, offset, STACK_POINTER_REGNUM);
4342 	      offset += UNITS_PER_WORD;
4343 	    }
4344 	}
4345     }
4346 
4347   /* Align pointer properly (doubleword boundary).  */
4348   offset = (offset + 7) & ~7;
4349 
4350   /* FP register restores.  */
4351   if (save_fregs)
4352     {
4353       /* Adjust the register to index off of.  */
4354       if (frame_pointer_needed)
4355 	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4356       else
4357 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4358 
4359       /* Actually do the restores now.  */
4360       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4361 	if (df_regs_ever_live_p (i)
4362 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4363 	  {
4364 	    rtx src = gen_rtx_MEM (DFmode,
4365 				   gen_rtx_POST_INC (word_mode, tmpreg));
4366 	    rtx dest = gen_rtx_REG (DFmode, i);
4367 	    emit_move_insn (dest, src);
4368 	  }
4369     }
4370 
4371   /* Emit a blockage insn here to keep these insns from being moved to
4372      an earlier spot in the epilogue, or into the main instruction stream.
4373 
4374      This is necessary as we must not cut the stack back before all the
4375      restores are finished.  */
4376   emit_insn (gen_blockage ());
4377 
4378   /* Reset stack pointer (and possibly frame pointer).  The stack
4379      pointer is initially set to fp + 64 to avoid a race condition.  */
4380   if (frame_pointer_needed)
4381     {
4382       rtx delta = GEN_INT (-64);
4383 
4384       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4385       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4386 			       stack_pointer_rtx, delta));
4387     }
4388   /* If we were deferring a callee register restore, do it now.  */
4389   else if (merge_sp_adjust_with_load)
4390     {
4391       rtx delta = GEN_INT (-actual_fsize);
4392       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4393 
4394       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4395     }
4396   else if (actual_fsize != 0)
4397     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4398 		    - actual_fsize, 0);
4399 
4400   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4401      frame greater than 8k), do so now.  */
4402   if (ret_off != 0)
4403     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4404 
4405   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4406     {
4407       rtx sa = EH_RETURN_STACKADJ_RTX;
4408 
4409       emit_insn (gen_blockage ());
4410       emit_insn (TARGET_64BIT
4411 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4412 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4413     }
4414 }
4415 
4416 bool
4417 pa_can_use_return_insn (void)
4418 {
4419   if (!reload_completed)
4420     return false;
4421 
4422   if (frame_pointer_needed)
4423     return false;
4424 
4425   if (df_regs_ever_live_p (2))
4426     return false;
4427 
4428   if (crtl->profile)
4429     return false;
4430 
4431   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4432 }
4433 
4434 rtx
4435 hppa_pic_save_rtx (void)
4436 {
4437   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4438 }
4439 
4440 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4441 #define NO_DEFERRED_PROFILE_COUNTERS 0
4442 #endif
4443 
4444 
4445 /* Vector of funcdef numbers.  */
4446 static vec<int> funcdef_nos;
4447 
4448 /* Output deferred profile counters.  */
4449 static void
4450 output_deferred_profile_counters (void)
4451 {
4452   unsigned int i;
4453   int align, n;
4454 
4455   if (funcdef_nos.is_empty ())
4456    return;
4457 
4458   switch_to_section (data_section);
4459   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4460   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4461 
4462   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4463     {
4464       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4465       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4466     }
4467 
4468   funcdef_nos.release ();
4469 }
4470 
4471 void
4472 hppa_profile_hook (int label_no)
4473 {
4474   /* We use SImode for the address of the function in both 32 and
4475      64-bit code to avoid having to provide DImode versions of the
4476      lcla2 and load_offset_label_address insn patterns.  */
4477   rtx reg = gen_reg_rtx (SImode);
4478   rtx label_rtx = gen_label_rtx ();
4479   rtx begin_label_rtx, call_insn;
4480   char begin_label_name[16];
4481 
4482   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4483 			       label_no);
4484   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4485 
4486   if (TARGET_64BIT)
4487     emit_move_insn (arg_pointer_rtx,
4488 		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4489 				  GEN_INT (64)));
4490 
4491   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4492 
4493   /* The address of the function is loaded into %r25 with an instruction-
4494      relative sequence that avoids the use of relocations.  The sequence
4495      is split so that the load_offset_label_address instruction can
4496      occupy the delay slot of the call to _mcount.  */
4497   if (TARGET_PA_20)
4498     emit_insn (gen_lcla2 (reg, label_rtx));
4499   else
4500     emit_insn (gen_lcla1 (reg, label_rtx));
4501 
4502   emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4503 					    reg, begin_label_rtx, label_rtx));
4504 
4505 #if !NO_DEFERRED_PROFILE_COUNTERS
4506   {
4507     rtx count_label_rtx, addr, r24;
4508     char count_label_name[16];
4509 
4510     funcdef_nos.safe_push (label_no);
4511     ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4512     count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4513 
4514     addr = force_reg (Pmode, count_label_rtx);
4515     r24 = gen_rtx_REG (Pmode, 24);
4516     emit_move_insn (r24, addr);
4517 
4518     call_insn =
4519       emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4520 					     gen_rtx_SYMBOL_REF (Pmode,
4521 								 "_mcount")),
4522 				GEN_INT (TARGET_64BIT ? 24 : 12)));
4523 
4524     use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4525   }
4526 #else
4527 
4528   call_insn =
4529     emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4530 					   gen_rtx_SYMBOL_REF (Pmode,
4531 							       "_mcount")),
4532 			      GEN_INT (TARGET_64BIT ? 16 : 8)));
4533 
4534 #endif
4535 
4536   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4537   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4538 
4539   /* Indicate the _mcount call cannot throw, nor will it execute a
4540      non-local goto.  */
4541   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4542 }
4543 
4544 /* Fetch the return address for the frame COUNT steps up from
4545    the current frame, after the prologue.  FRAMEADDR is the
4546    frame pointer of the COUNT frame.
4547 
4548    We want to ignore any export stub remnants here.  To handle this,
4549    we examine the code at the return address, and if it is an export
4550    stub, we return a memory rtx for the stub return address stored
4551    at frame-24.
4552 
4553    The value returned is used in two different ways:
4554 
4555 	1. To find a function's caller.
4556 
4557 	2. To change the return address for a function.
4558 
4559    This function handles most instances of case 1; however, it will
4560    fail if there are two levels of stubs to execute on the return
4561    path.  The only way I believe that can happen is if the return value
4562    needs a parameter relocation, which never happens for C code.
4563 
4564    This function handles most instances of case 2; however, it will
4565    fail if we did not originally have stub code on the return path
4566    but will need stub code on the new return path.  This can happen if
4567    the caller & callee are both in the main program, but the new
4568    return location is in a shared library.  */
4569 
4570 rtx
4571 pa_return_addr_rtx (int count, rtx frameaddr)
4572 {
4573   rtx label;
4574   rtx rp;
4575   rtx saved_rp;
4576   rtx ins;
4577 
4578   /* The instruction stream at the return address of a PA1.X export stub is:
4579 
4580 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4581 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4582 	0x00011820 | stub+16:  mtsp r1,sr0
4583 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4584 
4585      0xe0400002 must be specified as -532676606 so that it won't be
4586      rejected as an invalid immediate operand on 64-bit hosts.
4587 
4588      The instruction stream at the return address of a PA2.0 export stub is:
4589 
4590 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4591 	0xe840d002 | stub+12:  bve,n (rp)
4592   */
4593 
4594   HOST_WIDE_INT insns[4];
4595   int i, len;
4596 
4597   if (count != 0)
4598     return NULL_RTX;
4599 
4600   rp = get_hard_reg_initial_val (Pmode, 2);
4601 
4602   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4603     return rp;
4604 
4605   /* If there is no export stub then just use the value saved from
4606      the return pointer register.  */
4607 
4608   saved_rp = gen_reg_rtx (Pmode);
4609   emit_move_insn (saved_rp, rp);
4610 
4611   /* Get pointer to the instruction stream.  We have to mask out the
4612      privilege level from the two low order bits of the return address
4613      pointer here so that ins will point to the start of the first
4614      instruction that would have been executed if we returned.  */
4615   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4616   label = gen_label_rtx ();
4617 
4618   if (TARGET_PA_20)
4619     {
4620       insns[0] = 0x4bc23fd1;
4621       insns[1] = -398405630;
4622       len = 2;
4623     }
4624   else
4625     {
4626       insns[0] = 0x4bc23fd1;
4627       insns[1] = 0x004010a1;
4628       insns[2] = 0x00011820;
4629       insns[3] = -532676606;
4630       len = 4;
4631     }
4632 
4633   /* Check the instruction stream at the normal return address for the
4634      export stub.  If it is an export stub, than our return address is
4635      really in -24[frameaddr].  */
4636 
4637   for (i = 0; i < len; i++)
4638     {
4639       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4640       rtx op1 = GEN_INT (insns[i]);
4641       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4642     }
4643 
4644   /* Here we know that our return address points to an export
4645      stub.  We don't want to return the address of the export stub,
4646      but rather the return address of the export stub.  That return
4647      address is stored at -24[frameaddr].  */
4648 
4649   emit_move_insn (saved_rp,
4650 		  gen_rtx_MEM (Pmode,
4651 			       memory_address (Pmode,
4652 					       plus_constant (Pmode, frameaddr,
4653 							      -24))));
4654 
4655   emit_label (label);
4656 
4657   return saved_rp;
4658 }
4659 
4660 void
4661 pa_emit_bcond_fp (rtx operands[])
4662 {
4663   enum rtx_code code = GET_CODE (operands[0]);
4664   rtx operand0 = operands[1];
4665   rtx operand1 = operands[2];
4666   rtx label = operands[3];
4667 
4668   emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4669 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4670 
4671   emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4672 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4673 						     gen_rtx_fmt_ee (NE,
4674 							      VOIDmode,
4675 							      gen_rtx_REG (CCFPmode, 0),
4676 							      const0_rtx),
4677 						     gen_rtx_LABEL_REF (VOIDmode, label),
4678 						     pc_rtx)));
4679 
4680 }
4681 
4682 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4683    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4684 
4685 static int
4686 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4687 {
4688   enum attr_type attr_type;
4689 
4690   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4691      true dependencies as they are described with bypasses now.  */
4692   if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4693     return cost;
4694 
4695   if (! recog_memoized (insn))
4696     return 0;
4697 
4698   attr_type = get_attr_type (insn);
4699 
4700   switch (REG_NOTE_KIND (link))
4701     {
4702     case REG_DEP_ANTI:
4703       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4704 	 cycles later.  */
4705 
4706       if (attr_type == TYPE_FPLOAD)
4707 	{
4708 	  rtx pat = PATTERN (insn);
4709 	  rtx dep_pat = PATTERN (dep_insn);
4710 	  if (GET_CODE (pat) == PARALLEL)
4711 	    {
4712 	      /* This happens for the fldXs,mb patterns.  */
4713 	      pat = XVECEXP (pat, 0, 0);
4714 	    }
4715 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4716 	    /* If this happens, we have to extend this to schedule
4717 	       optimally.  Return 0 for now.  */
4718 	  return 0;
4719 
4720 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4721 	    {
4722 	      if (! recog_memoized (dep_insn))
4723 		return 0;
4724 	      switch (get_attr_type (dep_insn))
4725 		{
4726 		case TYPE_FPALU:
4727 		case TYPE_FPMULSGL:
4728 		case TYPE_FPMULDBL:
4729 		case TYPE_FPDIVSGL:
4730 		case TYPE_FPDIVDBL:
4731 		case TYPE_FPSQRTSGL:
4732 		case TYPE_FPSQRTDBL:
4733 		  /* A fpload can't be issued until one cycle before a
4734 		     preceding arithmetic operation has finished if
4735 		     the target of the fpload is any of the sources
4736 		     (or destination) of the arithmetic operation.  */
4737 		  return insn_default_latency (dep_insn) - 1;
4738 
4739 		default:
4740 		  return 0;
4741 		}
4742 	    }
4743 	}
4744       else if (attr_type == TYPE_FPALU)
4745 	{
4746 	  rtx pat = PATTERN (insn);
4747 	  rtx dep_pat = PATTERN (dep_insn);
4748 	  if (GET_CODE (pat) == PARALLEL)
4749 	    {
4750 	      /* This happens for the fldXs,mb patterns.  */
4751 	      pat = XVECEXP (pat, 0, 0);
4752 	    }
4753 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4754 	    /* If this happens, we have to extend this to schedule
4755 	       optimally.  Return 0 for now.  */
4756 	  return 0;
4757 
4758 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4759 	    {
4760 	      if (! recog_memoized (dep_insn))
4761 		return 0;
4762 	      switch (get_attr_type (dep_insn))
4763 		{
4764 		case TYPE_FPDIVSGL:
4765 		case TYPE_FPDIVDBL:
4766 		case TYPE_FPSQRTSGL:
4767 		case TYPE_FPSQRTDBL:
4768 		  /* An ALU flop can't be issued until two cycles before a
4769 		     preceding divide or sqrt operation has finished if
4770 		     the target of the ALU flop is any of the sources
4771 		     (or destination) of the divide or sqrt operation.  */
4772 		  return insn_default_latency (dep_insn) - 2;
4773 
4774 		default:
4775 		  return 0;
4776 		}
4777 	    }
4778 	}
4779 
4780       /* For other anti dependencies, the cost is 0.  */
4781       return 0;
4782 
4783     case REG_DEP_OUTPUT:
4784       /* Output dependency; DEP_INSN writes a register that INSN writes some
4785 	 cycles later.  */
4786       if (attr_type == TYPE_FPLOAD)
4787 	{
4788 	  rtx pat = PATTERN (insn);
4789 	  rtx dep_pat = PATTERN (dep_insn);
4790 	  if (GET_CODE (pat) == PARALLEL)
4791 	    {
4792 	      /* This happens for the fldXs,mb patterns.  */
4793 	      pat = XVECEXP (pat, 0, 0);
4794 	    }
4795 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4796 	    /* If this happens, we have to extend this to schedule
4797 	       optimally.  Return 0 for now.  */
4798 	  return 0;
4799 
4800 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4801 	    {
4802 	      if (! recog_memoized (dep_insn))
4803 		return 0;
4804 	      switch (get_attr_type (dep_insn))
4805 		{
4806 		case TYPE_FPALU:
4807 		case TYPE_FPMULSGL:
4808 		case TYPE_FPMULDBL:
4809 		case TYPE_FPDIVSGL:
4810 		case TYPE_FPDIVDBL:
4811 		case TYPE_FPSQRTSGL:
4812 		case TYPE_FPSQRTDBL:
4813 		  /* A fpload can't be issued until one cycle before a
4814 		     preceding arithmetic operation has finished if
4815 		     the target of the fpload is the destination of the
4816 		     arithmetic operation.
4817 
4818 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
4819 		     is 3 cycles, unless they bundle together.   We also
4820 		     pay the penalty if the second insn is a fpload.  */
4821 		  return insn_default_latency (dep_insn) - 1;
4822 
4823 		default:
4824 		  return 0;
4825 		}
4826 	    }
4827 	}
4828       else if (attr_type == TYPE_FPALU)
4829 	{
4830 	  rtx pat = PATTERN (insn);
4831 	  rtx dep_pat = PATTERN (dep_insn);
4832 	  if (GET_CODE (pat) == PARALLEL)
4833 	    {
4834 	      /* This happens for the fldXs,mb patterns.  */
4835 	      pat = XVECEXP (pat, 0, 0);
4836 	    }
4837 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4838 	    /* If this happens, we have to extend this to schedule
4839 	       optimally.  Return 0 for now.  */
4840 	  return 0;
4841 
4842 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4843 	    {
4844 	      if (! recog_memoized (dep_insn))
4845 		return 0;
4846 	      switch (get_attr_type (dep_insn))
4847 		{
4848 		case TYPE_FPDIVSGL:
4849 		case TYPE_FPDIVDBL:
4850 		case TYPE_FPSQRTSGL:
4851 		case TYPE_FPSQRTDBL:
4852 		  /* An ALU flop can't be issued until two cycles before a
4853 		     preceding divide or sqrt operation has finished if
4854 		     the target of the ALU flop is also the target of
4855 		     the divide or sqrt operation.  */
4856 		  return insn_default_latency (dep_insn) - 2;
4857 
4858 		default:
4859 		  return 0;
4860 		}
4861 	    }
4862 	}
4863 
4864       /* For other output dependencies, the cost is 0.  */
4865       return 0;
4866 
4867     default:
4868       gcc_unreachable ();
4869     }
4870 }
4871 
4872 /* Adjust scheduling priorities.  We use this to try and keep addil
4873    and the next use of %r1 close together.  */
4874 static int
4875 pa_adjust_priority (rtx insn, int priority)
4876 {
4877   rtx set = single_set (insn);
4878   rtx src, dest;
4879   if (set)
4880     {
4881       src = SET_SRC (set);
4882       dest = SET_DEST (set);
4883       if (GET_CODE (src) == LO_SUM
4884 	  && symbolic_operand (XEXP (src, 1), VOIDmode)
4885 	  && ! read_only_operand (XEXP (src, 1), VOIDmode))
4886 	priority >>= 3;
4887 
4888       else if (GET_CODE (src) == MEM
4889 	       && GET_CODE (XEXP (src, 0)) == LO_SUM
4890 	       && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4891 	       && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4892 	priority >>= 1;
4893 
4894       else if (GET_CODE (dest) == MEM
4895 	       && GET_CODE (XEXP (dest, 0)) == LO_SUM
4896 	       && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4897 	       && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4898 	priority >>= 3;
4899     }
4900   return priority;
4901 }
4902 
4903 /* The 700 can only issue a single insn at a time.
4904    The 7XXX processors can issue two insns at a time.
4905    The 8000 can issue 4 insns at a time.  */
4906 static int
4907 pa_issue_rate (void)
4908 {
4909   switch (pa_cpu)
4910     {
4911     case PROCESSOR_700:		return 1;
4912     case PROCESSOR_7100:	return 2;
4913     case PROCESSOR_7100LC:	return 2;
4914     case PROCESSOR_7200:	return 2;
4915     case PROCESSOR_7300:	return 2;
4916     case PROCESSOR_8000:	return 4;
4917 
4918     default:
4919       gcc_unreachable ();
4920     }
4921 }
4922 
4923 
4924 
4925 /* Return any length plus adjustment needed by INSN which already has
4926    its length computed as LENGTH.   Return LENGTH if no adjustment is
4927    necessary.
4928 
4929    Also compute the length of an inline block move here as it is too
4930    complicated to express as a length attribute in pa.md.  */
4931 int
4932 pa_adjust_insn_length (rtx insn, int length)
4933 {
4934   rtx pat = PATTERN (insn);
4935 
4936   /* If length is negative or undefined, provide initial length.  */
4937   if ((unsigned int) length >= INT_MAX)
4938     {
4939       if (GET_CODE (pat) == SEQUENCE)
4940 	insn = XVECEXP (pat, 0, 0);
4941 
4942       switch (get_attr_type (insn))
4943 	{
4944 	case TYPE_MILLI:
4945 	  length = pa_attr_length_millicode_call (insn);
4946 	  break;
4947 	case TYPE_CALL:
4948 	  length = pa_attr_length_call (insn, 0);
4949 	  break;
4950 	case TYPE_SIBCALL:
4951 	  length = pa_attr_length_call (insn, 1);
4952 	  break;
4953 	case TYPE_DYNCALL:
4954 	  length = pa_attr_length_indirect_call (insn);
4955 	  break;
4956 	case TYPE_SH_FUNC_ADRS:
4957 	  length = pa_attr_length_millicode_call (insn) + 20;
4958 	  break;
4959 	default:
4960 	  gcc_unreachable ();
4961 	}
4962     }
4963 
4964   /* Jumps inside switch tables which have unfilled delay slots need
4965      adjustment.  */
4966   if (GET_CODE (insn) == JUMP_INSN
4967       && GET_CODE (pat) == PARALLEL
4968       && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4969     length += 4;
4970   /* Block move pattern.  */
4971   else if (GET_CODE (insn) == INSN
4972 	   && GET_CODE (pat) == PARALLEL
4973 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4974 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4975 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4976 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4977 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4978     length += compute_movmem_length (insn) - 4;
4979   /* Block clear pattern.  */
4980   else if (GET_CODE (insn) == INSN
4981 	   && GET_CODE (pat) == PARALLEL
4982 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4983 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4984 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4985 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4986     length += compute_clrmem_length (insn) - 4;
4987   /* Conditional branch with an unfilled delay slot.  */
4988   else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4989     {
4990       /* Adjust a short backwards conditional with an unfilled delay slot.  */
4991       if (GET_CODE (pat) == SET
4992 	  && length == 4
4993 	  && JUMP_LABEL (insn) != NULL_RTX
4994 	  && ! forward_branch_p (insn))
4995 	length += 4;
4996       else if (GET_CODE (pat) == PARALLEL
4997 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4998 	       && length == 4)
4999 	length += 4;
5000       /* Adjust dbra insn with short backwards conditional branch with
5001 	 unfilled delay slot -- only for case where counter is in a
5002 	 general register register.  */
5003       else if (GET_CODE (pat) == PARALLEL
5004 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5005 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5006  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5007 	       && length == 4
5008 	       && ! forward_branch_p (insn))
5009 	length += 4;
5010     }
5011   return length;
5012 }
5013 
5014 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5015 
5016 static bool
5017 pa_print_operand_punct_valid_p (unsigned char code)
5018 {
5019   if (code == '@'
5020       || code == '#'
5021       || code == '*'
5022       || code == '^')
5023     return true;
5024 
5025   return false;
5026 }
5027 
5028 /* Print operand X (an rtx) in assembler syntax to file FILE.
5029    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5030    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5031 
5032 void
5033 pa_print_operand (FILE *file, rtx x, int code)
5034 {
5035   switch (code)
5036     {
5037     case '#':
5038       /* Output a 'nop' if there's nothing for the delay slot.  */
5039       if (dbr_sequence_length () == 0)
5040 	fputs ("\n\tnop", file);
5041       return;
5042     case '*':
5043       /* Output a nullification completer if there's nothing for the */
5044       /* delay slot or nullification is requested.  */
5045       if (dbr_sequence_length () == 0 ||
5046 	  (final_sequence &&
5047 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5048         fputs (",n", file);
5049       return;
5050     case 'R':
5051       /* Print out the second register name of a register pair.
5052 	 I.e., R (6) => 7.  */
5053       fputs (reg_names[REGNO (x) + 1], file);
5054       return;
5055     case 'r':
5056       /* A register or zero.  */
5057       if (x == const0_rtx
5058 	  || (x == CONST0_RTX (DFmode))
5059 	  || (x == CONST0_RTX (SFmode)))
5060 	{
5061 	  fputs ("%r0", file);
5062 	  return;
5063 	}
5064       else
5065 	break;
5066     case 'f':
5067       /* A register or zero (floating point).  */
5068       if (x == const0_rtx
5069 	  || (x == CONST0_RTX (DFmode))
5070 	  || (x == CONST0_RTX (SFmode)))
5071 	{
5072 	  fputs ("%fr0", file);
5073 	  return;
5074 	}
5075       else
5076 	break;
5077     case 'A':
5078       {
5079 	rtx xoperands[2];
5080 
5081 	xoperands[0] = XEXP (XEXP (x, 0), 0);
5082 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5083 	pa_output_global_address (file, xoperands[1], 0);
5084         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5085 	return;
5086       }
5087 
5088     case 'C':			/* Plain (C)ondition */
5089     case 'X':
5090       switch (GET_CODE (x))
5091 	{
5092 	case EQ:
5093 	  fputs ("=", file);  break;
5094 	case NE:
5095 	  fputs ("<>", file);  break;
5096 	case GT:
5097 	  fputs (">", file);  break;
5098 	case GE:
5099 	  fputs (">=", file);  break;
5100 	case GEU:
5101 	  fputs (">>=", file);  break;
5102 	case GTU:
5103 	  fputs (">>", file);  break;
5104 	case LT:
5105 	  fputs ("<", file);  break;
5106 	case LE:
5107 	  fputs ("<=", file);  break;
5108 	case LEU:
5109 	  fputs ("<<=", file);  break;
5110 	case LTU:
5111 	  fputs ("<<", file);  break;
5112 	default:
5113 	  gcc_unreachable ();
5114 	}
5115       return;
5116     case 'N':			/* Condition, (N)egated */
5117       switch (GET_CODE (x))
5118 	{
5119 	case EQ:
5120 	  fputs ("<>", file);  break;
5121 	case NE:
5122 	  fputs ("=", file);  break;
5123 	case GT:
5124 	  fputs ("<=", file);  break;
5125 	case GE:
5126 	  fputs ("<", file);  break;
5127 	case GEU:
5128 	  fputs ("<<", file);  break;
5129 	case GTU:
5130 	  fputs ("<<=", file);  break;
5131 	case LT:
5132 	  fputs (">=", file);  break;
5133 	case LE:
5134 	  fputs (">", file);  break;
5135 	case LEU:
5136 	  fputs (">>", file);  break;
5137 	case LTU:
5138 	  fputs (">>=", file);  break;
5139 	default:
5140 	  gcc_unreachable ();
5141 	}
5142       return;
5143     /* For floating point comparisons.  Note that the output
5144        predicates are the complement of the desired mode.  The
5145        conditions for GT, GE, LT, LE and LTGT cause an invalid
5146        operation exception if the result is unordered and this
5147        exception is enabled in the floating-point status register.  */
5148     case 'Y':
5149       switch (GET_CODE (x))
5150 	{
5151 	case EQ:
5152 	  fputs ("!=", file);  break;
5153 	case NE:
5154 	  fputs ("=", file);  break;
5155 	case GT:
5156 	  fputs ("!>", file);  break;
5157 	case GE:
5158 	  fputs ("!>=", file);  break;
5159 	case LT:
5160 	  fputs ("!<", file);  break;
5161 	case LE:
5162 	  fputs ("!<=", file);  break;
5163 	case LTGT:
5164 	  fputs ("!<>", file);  break;
5165 	case UNLE:
5166 	  fputs ("!?<=", file);  break;
5167 	case UNLT:
5168 	  fputs ("!?<", file);  break;
5169 	case UNGE:
5170 	  fputs ("!?>=", file);  break;
5171 	case UNGT:
5172 	  fputs ("!?>", file);  break;
5173 	case UNEQ:
5174 	  fputs ("!?=", file);  break;
5175 	case UNORDERED:
5176 	  fputs ("!?", file);  break;
5177 	case ORDERED:
5178 	  fputs ("?", file);  break;
5179 	default:
5180 	  gcc_unreachable ();
5181 	}
5182       return;
5183     case 'S':			/* Condition, operands are (S)wapped.  */
5184       switch (GET_CODE (x))
5185 	{
5186 	case EQ:
5187 	  fputs ("=", file);  break;
5188 	case NE:
5189 	  fputs ("<>", file);  break;
5190 	case GT:
5191 	  fputs ("<", file);  break;
5192 	case GE:
5193 	  fputs ("<=", file);  break;
5194 	case GEU:
5195 	  fputs ("<<=", file);  break;
5196 	case GTU:
5197 	  fputs ("<<", file);  break;
5198 	case LT:
5199 	  fputs (">", file);  break;
5200 	case LE:
5201 	  fputs (">=", file);  break;
5202 	case LEU:
5203 	  fputs (">>=", file);  break;
5204 	case LTU:
5205 	  fputs (">>", file);  break;
5206 	default:
5207 	  gcc_unreachable ();
5208 	}
5209       return;
5210     case 'B':			/* Condition, (B)oth swapped and negate.  */
5211       switch (GET_CODE (x))
5212 	{
5213 	case EQ:
5214 	  fputs ("<>", file);  break;
5215 	case NE:
5216 	  fputs ("=", file);  break;
5217 	case GT:
5218 	  fputs (">=", file);  break;
5219 	case GE:
5220 	  fputs (">", file);  break;
5221 	case GEU:
5222 	  fputs (">>", file);  break;
5223 	case GTU:
5224 	  fputs (">>=", file);  break;
5225 	case LT:
5226 	  fputs ("<=", file);  break;
5227 	case LE:
5228 	  fputs ("<", file);  break;
5229 	case LEU:
5230 	  fputs ("<<", file);  break;
5231 	case LTU:
5232 	  fputs ("<<=", file);  break;
5233 	default:
5234 	  gcc_unreachable ();
5235 	}
5236       return;
5237     case 'k':
5238       gcc_assert (GET_CODE (x) == CONST_INT);
5239       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5240       return;
5241     case 'Q':
5242       gcc_assert (GET_CODE (x) == CONST_INT);
5243       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5244       return;
5245     case 'L':
5246       gcc_assert (GET_CODE (x) == CONST_INT);
5247       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5248       return;
5249     case 'O':
5250       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5251       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5252       return;
5253     case 'p':
5254       gcc_assert (GET_CODE (x) == CONST_INT);
5255       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5256       return;
5257     case 'P':
5258       gcc_assert (GET_CODE (x) == CONST_INT);
5259       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5260       return;
5261     case 'I':
5262       if (GET_CODE (x) == CONST_INT)
5263 	fputs ("i", file);
5264       return;
5265     case 'M':
5266     case 'F':
5267       switch (GET_CODE (XEXP (x, 0)))
5268 	{
5269 	case PRE_DEC:
5270 	case PRE_INC:
5271 	  if (ASSEMBLER_DIALECT == 0)
5272 	    fputs ("s,mb", file);
5273 	  else
5274 	    fputs (",mb", file);
5275 	  break;
5276 	case POST_DEC:
5277 	case POST_INC:
5278 	  if (ASSEMBLER_DIALECT == 0)
5279 	    fputs ("s,ma", file);
5280 	  else
5281 	    fputs (",ma", file);
5282 	  break;
5283 	case PLUS:
5284 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5285 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5286 	    {
5287 	      if (ASSEMBLER_DIALECT == 0)
5288 		fputs ("x", file);
5289 	    }
5290 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5291 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5292 	    {
5293 	      if (ASSEMBLER_DIALECT == 0)
5294 		fputs ("x,s", file);
5295 	      else
5296 		fputs (",s", file);
5297 	    }
5298 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5299 	    fputs ("s", file);
5300 	  break;
5301 	default:
5302 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5303 	    fputs ("s", file);
5304 	  break;
5305 	}
5306       return;
5307     case 'G':
5308       pa_output_global_address (file, x, 0);
5309       return;
5310     case 'H':
5311       pa_output_global_address (file, x, 1);
5312       return;
5313     case 0:			/* Don't do anything special */
5314       break;
5315     case 'Z':
5316       {
5317 	unsigned op[3];
5318 	compute_zdepwi_operands (INTVAL (x), op);
5319 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5320 	return;
5321       }
5322     case 'z':
5323       {
5324 	unsigned op[3];
5325 	compute_zdepdi_operands (INTVAL (x), op);
5326 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5327 	return;
5328       }
5329     case 'c':
5330       /* We can get here from a .vtable_inherit due to our
5331 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5332 	 addresses.  */
5333       break;
5334     default:
5335       gcc_unreachable ();
5336     }
5337   if (GET_CODE (x) == REG)
5338     {
5339       fputs (reg_names [REGNO (x)], file);
5340       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5341 	{
5342 	  fputs ("R", file);
5343 	  return;
5344 	}
5345       if (FP_REG_P (x)
5346 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5347 	  && (REGNO (x) & 1) == 0)
5348 	fputs ("L", file);
5349     }
5350   else if (GET_CODE (x) == MEM)
5351     {
5352       int size = GET_MODE_SIZE (GET_MODE (x));
5353       rtx base = NULL_RTX;
5354       switch (GET_CODE (XEXP (x, 0)))
5355 	{
5356 	case PRE_DEC:
5357 	case POST_DEC:
5358           base = XEXP (XEXP (x, 0), 0);
5359 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5360 	  break;
5361 	case PRE_INC:
5362 	case POST_INC:
5363           base = XEXP (XEXP (x, 0), 0);
5364 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5365 	  break;
5366 	case PLUS:
5367 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5368 	    fprintf (file, "%s(%s)",
5369 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5370 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5371 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5372 	    fprintf (file, "%s(%s)",
5373 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5374 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5375 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5376 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5377 	    {
5378 	      /* Because the REG_POINTER flag can get lost during reload,
5379 		 pa_legitimate_address_p canonicalizes the order of the
5380 		 index and base registers in the combined move patterns.  */
5381 	      rtx base = XEXP (XEXP (x, 0), 1);
5382 	      rtx index = XEXP (XEXP (x, 0), 0);
5383 
5384 	      fprintf (file, "%s(%s)",
5385 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5386 	    }
5387 	  else
5388 	    output_address (XEXP (x, 0));
5389 	  break;
5390 	default:
5391 	  output_address (XEXP (x, 0));
5392 	  break;
5393 	}
5394     }
5395   else
5396     output_addr_const (file, x);
5397 }
5398 
5399 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5400 
5401 void
5402 pa_output_global_address (FILE *file, rtx x, int round_constant)
5403 {
5404 
5405   /* Imagine  (high (const (plus ...))).  */
5406   if (GET_CODE (x) == HIGH)
5407     x = XEXP (x, 0);
5408 
5409   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5410     output_addr_const (file, x);
5411   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5412     {
5413       output_addr_const (file, x);
5414       fputs ("-$global$", file);
5415     }
5416   else if (GET_CODE (x) == CONST)
5417     {
5418       const char *sep = "";
5419       int offset = 0;		/* assembler wants -$global$ at end */
5420       rtx base = NULL_RTX;
5421 
5422       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5423 	{
5424 	case LABEL_REF:
5425 	case SYMBOL_REF:
5426 	  base = XEXP (XEXP (x, 0), 0);
5427 	  output_addr_const (file, base);
5428 	  break;
5429 	case CONST_INT:
5430 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5431 	  break;
5432 	default:
5433 	  gcc_unreachable ();
5434 	}
5435 
5436       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5437 	{
5438 	case LABEL_REF:
5439 	case SYMBOL_REF:
5440 	  base = XEXP (XEXP (x, 0), 1);
5441 	  output_addr_const (file, base);
5442 	  break;
5443 	case CONST_INT:
5444 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5445 	  break;
5446 	default:
5447 	  gcc_unreachable ();
5448 	}
5449 
5450       /* How bogus.  The compiler is apparently responsible for
5451 	 rounding the constant if it uses an LR field selector.
5452 
5453 	 The linker and/or assembler seem a better place since
5454 	 they have to do this kind of thing already.
5455 
5456 	 If we fail to do this, HP's optimizing linker may eliminate
5457 	 an addil, but not update the ldw/stw/ldo instruction that
5458 	 uses the result of the addil.  */
5459       if (round_constant)
5460 	offset = ((offset + 0x1000) & ~0x1fff);
5461 
5462       switch (GET_CODE (XEXP (x, 0)))
5463 	{
5464 	case PLUS:
5465 	  if (offset < 0)
5466 	    {
5467 	      offset = -offset;
5468 	      sep = "-";
5469 	    }
5470 	  else
5471 	    sep = "+";
5472 	  break;
5473 
5474 	case MINUS:
5475 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5476 	  sep = "-";
5477 	  break;
5478 
5479 	default:
5480 	  gcc_unreachable ();
5481 	}
5482 
5483       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5484 	fputs ("-$global$", file);
5485       if (offset)
5486 	fprintf (file, "%s%d", sep, offset);
5487     }
5488   else
5489     output_addr_const (file, x);
5490 }
5491 
5492 /* Output boilerplate text to appear at the beginning of the file.
5493    There are several possible versions.  */
5494 #define aputs(x) fputs(x, asm_out_file)
5495 static inline void
5496 pa_file_start_level (void)
5497 {
5498   if (TARGET_64BIT)
5499     aputs ("\t.LEVEL 2.0w\n");
5500   else if (TARGET_PA_20)
5501     aputs ("\t.LEVEL 2.0\n");
5502   else if (TARGET_PA_11)
5503     aputs ("\t.LEVEL 1.1\n");
5504   else
5505     aputs ("\t.LEVEL 1.0\n");
5506 }
5507 
5508 static inline void
5509 pa_file_start_space (int sortspace)
5510 {
5511   aputs ("\t.SPACE $PRIVATE$");
5512   if (sortspace)
5513     aputs (",SORT=16");
5514   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5515   if (flag_tm)
5516     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5517   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5518 	 "\n\t.SPACE $TEXT$");
5519   if (sortspace)
5520     aputs (",SORT=8");
5521   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5522 	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5523 }
5524 
5525 static inline void
5526 pa_file_start_file (int want_version)
5527 {
5528   if (write_symbols != NO_DEBUG)
5529     {
5530       output_file_directive (asm_out_file, main_input_filename);
5531       if (want_version)
5532 	aputs ("\t.version\t\"01.01\"\n");
5533     }
5534 }
5535 
5536 static inline void
5537 pa_file_start_mcount (const char *aswhat)
5538 {
5539   if (profile_flag)
5540     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5541 }
5542 
5543 static void
5544 pa_elf_file_start (void)
5545 {
5546   pa_file_start_level ();
5547   pa_file_start_mcount ("ENTRY");
5548   pa_file_start_file (0);
5549 }
5550 
5551 static void
5552 pa_som_file_start (void)
5553 {
5554   pa_file_start_level ();
5555   pa_file_start_space (0);
5556   aputs ("\t.IMPORT $global$,DATA\n"
5557          "\t.IMPORT $$dyncall,MILLICODE\n");
5558   pa_file_start_mcount ("CODE");
5559   pa_file_start_file (0);
5560 }
5561 
5562 static void
5563 pa_linux_file_start (void)
5564 {
5565   pa_file_start_file (0);
5566   pa_file_start_level ();
5567   pa_file_start_mcount ("CODE");
5568 }
5569 
5570 static void
5571 pa_hpux64_gas_file_start (void)
5572 {
5573   pa_file_start_level ();
5574 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5575   if (profile_flag)
5576     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5577 #endif
5578   pa_file_start_file (1);
5579 }
5580 
5581 static void
5582 pa_hpux64_hpas_file_start (void)
5583 {
5584   pa_file_start_level ();
5585   pa_file_start_space (1);
5586   pa_file_start_mcount ("CODE");
5587   pa_file_start_file (0);
5588 }
5589 #undef aputs
5590 
5591 /* Search the deferred plabel list for SYMBOL and return its internal
5592    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5593 
5594 rtx
5595 pa_get_deferred_plabel (rtx symbol)
5596 {
5597   const char *fname = XSTR (symbol, 0);
5598   size_t i;
5599 
5600   /* See if we have already put this function on the list of deferred
5601      plabels.  This list is generally small, so a liner search is not
5602      too ugly.  If it proves too slow replace it with something faster.  */
5603   for (i = 0; i < n_deferred_plabels; i++)
5604     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5605       break;
5606 
5607   /* If the deferred plabel list is empty, or this entry was not found
5608      on the list, create a new entry on the list.  */
5609   if (deferred_plabels == NULL || i == n_deferred_plabels)
5610     {
5611       tree id;
5612 
5613       if (deferred_plabels == 0)
5614 	deferred_plabels =  ggc_alloc_deferred_plabel ();
5615       else
5616         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5617                                           deferred_plabels,
5618                                           n_deferred_plabels + 1);
5619 
5620       i = n_deferred_plabels++;
5621       deferred_plabels[i].internal_label = gen_label_rtx ();
5622       deferred_plabels[i].symbol = symbol;
5623 
5624       /* Gross.  We have just implicitly taken the address of this
5625 	 function.  Mark it in the same manner as assemble_name.  */
5626       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5627       if (id)
5628 	mark_referenced (id);
5629     }
5630 
5631   return deferred_plabels[i].internal_label;
5632 }
5633 
5634 static void
5635 output_deferred_plabels (void)
5636 {
5637   size_t i;
5638 
5639   /* If we have some deferred plabels, then we need to switch into the
5640      data or readonly data section, and align it to a 4 byte boundary
5641      before outputting the deferred plabels.  */
5642   if (n_deferred_plabels)
5643     {
5644       switch_to_section (flag_pic ? data_section : readonly_data_section);
5645       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5646     }
5647 
5648   /* Now output the deferred plabels.  */
5649   for (i = 0; i < n_deferred_plabels; i++)
5650     {
5651       targetm.asm_out.internal_label (asm_out_file, "L",
5652 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5653       assemble_integer (deferred_plabels[i].symbol,
5654 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5655     }
5656 }
5657 
5658 /* Initialize optabs to point to emulation routines.  */
5659 
5660 static void
5661 pa_init_libfuncs (void)
5662 {
5663   if (HPUX_LONG_DOUBLE_LIBRARY)
5664     {
5665       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5666       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5667       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5668       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5669       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5670       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5671       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5672       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5673       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5674 
5675       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5676       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5677       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5678       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5679       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5680       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5681       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5682 
5683       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5684       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5685       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5686       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5687 
5688       set_conv_libfunc (sfix_optab, SImode, TFmode,
5689 			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5690 				     : "_U_Qfcnvfxt_quad_to_sgl");
5691       set_conv_libfunc (sfix_optab, DImode, TFmode,
5692 			"_U_Qfcnvfxt_quad_to_dbl");
5693       set_conv_libfunc (ufix_optab, SImode, TFmode,
5694 			"_U_Qfcnvfxt_quad_to_usgl");
5695       set_conv_libfunc (ufix_optab, DImode, TFmode,
5696 			"_U_Qfcnvfxt_quad_to_udbl");
5697 
5698       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5699 			"_U_Qfcnvxf_sgl_to_quad");
5700       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5701 			"_U_Qfcnvxf_dbl_to_quad");
5702       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5703 			"_U_Qfcnvxf_usgl_to_quad");
5704       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5705 			"_U_Qfcnvxf_udbl_to_quad");
5706     }
5707 
5708   if (TARGET_SYNC_LIBCALL)
5709     init_sync_libfuncs (UNITS_PER_WORD);
5710 }
5711 
5712 /* HP's millicode routines mean something special to the assembler.
5713    Keep track of which ones we have used.  */
5714 
5715 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5716 static void import_milli (enum millicodes);
5717 static char imported[(int) end1000];
5718 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5719 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5720 #define MILLI_START 10
5721 
5722 static void
5723 import_milli (enum millicodes code)
5724 {
5725   char str[sizeof (import_string)];
5726 
5727   if (!imported[(int) code])
5728     {
5729       imported[(int) code] = 1;
5730       strcpy (str, import_string);
5731       strncpy (str + MILLI_START, milli_names[(int) code], 4);
5732       output_asm_insn (str, 0);
5733     }
5734 }
5735 
5736 /* The register constraints have put the operands and return value in
5737    the proper registers.  */
5738 
5739 const char *
5740 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5741 {
5742   import_milli (mulI);
5743   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5744 }
5745 
5746 /* Emit the rtl for doing a division by a constant.  */
5747 
5748 /* Do magic division millicodes exist for this value? */
5749 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5750 
5751 /* We'll use an array to keep track of the magic millicodes and
5752    whether or not we've used them already. [n][0] is signed, [n][1] is
5753    unsigned.  */
5754 
5755 static int div_milli[16][2];
5756 
5757 int
5758 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5759 {
5760   if (GET_CODE (operands[2]) == CONST_INT
5761       && INTVAL (operands[2]) > 0
5762       && INTVAL (operands[2]) < 16
5763       && pa_magic_milli[INTVAL (operands[2])])
5764     {
5765       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5766 
5767       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5768       emit
5769 	(gen_rtx_PARALLEL
5770 	 (VOIDmode,
5771 	  gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5772 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5773 						     SImode,
5774 						     gen_rtx_REG (SImode, 26),
5775 						     operands[2])),
5776 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5777 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5778 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5779 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5780 		     gen_rtx_CLOBBER (VOIDmode, ret))));
5781       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5782       return 1;
5783     }
5784   return 0;
5785 }
5786 
5787 const char *
5788 pa_output_div_insn (rtx *operands, int unsignedp, rtx insn)
5789 {
5790   HOST_WIDE_INT divisor;
5791 
5792   /* If the divisor is a constant, try to use one of the special
5793      opcodes .*/
5794   if (GET_CODE (operands[0]) == CONST_INT)
5795     {
5796       static char buf[100];
5797       divisor = INTVAL (operands[0]);
5798       if (!div_milli[divisor][unsignedp])
5799 	{
5800 	  div_milli[divisor][unsignedp] = 1;
5801 	  if (unsignedp)
5802 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5803 	  else
5804 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5805 	}
5806       if (unsignedp)
5807 	{
5808 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5809 		   INTVAL (operands[0]));
5810 	  return pa_output_millicode_call (insn,
5811 					   gen_rtx_SYMBOL_REF (SImode, buf));
5812 	}
5813       else
5814 	{
5815 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5816 		   INTVAL (operands[0]));
5817 	  return pa_output_millicode_call (insn,
5818 					   gen_rtx_SYMBOL_REF (SImode, buf));
5819 	}
5820     }
5821   /* Divisor isn't a special constant.  */
5822   else
5823     {
5824       if (unsignedp)
5825 	{
5826 	  import_milli (divU);
5827 	  return pa_output_millicode_call (insn,
5828 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5829 	}
5830       else
5831 	{
5832 	  import_milli (divI);
5833 	  return pa_output_millicode_call (insn,
5834 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5835 	}
5836     }
5837 }
5838 
5839 /* Output a $$rem millicode to do mod.  */
5840 
5841 const char *
5842 pa_output_mod_insn (int unsignedp, rtx insn)
5843 {
5844   if (unsignedp)
5845     {
5846       import_milli (remU);
5847       return pa_output_millicode_call (insn,
5848 				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5849     }
5850   else
5851     {
5852       import_milli (remI);
5853       return pa_output_millicode_call (insn,
5854 				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5855     }
5856 }
5857 
5858 void
5859 pa_output_arg_descriptor (rtx call_insn)
5860 {
5861   const char *arg_regs[4];
5862   enum machine_mode arg_mode;
5863   rtx link;
5864   int i, output_flag = 0;
5865   int regno;
5866 
5867   /* We neither need nor want argument location descriptors for the
5868      64bit runtime environment or the ELF32 environment.  */
5869   if (TARGET_64BIT || TARGET_ELF32)
5870     return;
5871 
5872   for (i = 0; i < 4; i++)
5873     arg_regs[i] = 0;
5874 
5875   /* Specify explicitly that no argument relocations should take place
5876      if using the portable runtime calling conventions.  */
5877   if (TARGET_PORTABLE_RUNTIME)
5878     {
5879       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5880 	     asm_out_file);
5881       return;
5882     }
5883 
5884   gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5885   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5886        link; link = XEXP (link, 1))
5887     {
5888       rtx use = XEXP (link, 0);
5889 
5890       if (! (GET_CODE (use) == USE
5891 	     && GET_CODE (XEXP (use, 0)) == REG
5892 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5893 	continue;
5894 
5895       arg_mode = GET_MODE (XEXP (use, 0));
5896       regno = REGNO (XEXP (use, 0));
5897       if (regno >= 23 && regno <= 26)
5898 	{
5899 	  arg_regs[26 - regno] = "GR";
5900 	  if (arg_mode == DImode)
5901 	    arg_regs[25 - regno] = "GR";
5902 	}
5903       else if (regno >= 32 && regno <= 39)
5904 	{
5905 	  if (arg_mode == SFmode)
5906 	    arg_regs[(regno - 32) / 2] = "FR";
5907 	  else
5908 	    {
5909 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5910 	      arg_regs[(regno - 34) / 2] = "FR";
5911 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
5912 #else
5913 	      arg_regs[(regno - 34) / 2] = "FU";
5914 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
5915 #endif
5916 	    }
5917 	}
5918     }
5919   fputs ("\t.CALL ", asm_out_file);
5920   for (i = 0; i < 4; i++)
5921     {
5922       if (arg_regs[i])
5923 	{
5924 	  if (output_flag++)
5925 	    fputc (',', asm_out_file);
5926 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5927 	}
5928     }
5929   fputc ('\n', asm_out_file);
5930 }
5931 
5932 /* Inform reload about cases where moving X with a mode MODE to or from
5933    a register in RCLASS requires an extra scratch or immediate register.
5934    Return the class needed for the immediate register.  */
5935 
5936 static reg_class_t
5937 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
5938 		     enum machine_mode mode, secondary_reload_info *sri)
5939 {
5940   int regno;
5941   enum reg_class rclass = (enum reg_class) rclass_i;
5942 
5943   /* Handle the easy stuff first.  */
5944   if (rclass == R1_REGS)
5945     return NO_REGS;
5946 
5947   if (REG_P (x))
5948     {
5949       regno = REGNO (x);
5950       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5951 	return NO_REGS;
5952     }
5953   else
5954     regno = -1;
5955 
5956   /* If we have something like (mem (mem (...)), we can safely assume the
5957      inner MEM will end up in a general register after reloading, so there's
5958      no need for a secondary reload.  */
5959   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5960     return NO_REGS;
5961 
5962   /* Trying to load a constant into a FP register during PIC code
5963      generation requires %r1 as a scratch register.  For float modes,
5964      the only legitimate constant is CONST0_RTX.  However, there are
5965      a few patterns that accept constant double operands.  */
5966   if (flag_pic
5967       && FP_REG_CLASS_P (rclass)
5968       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5969     {
5970       switch (mode)
5971 	{
5972 	case SImode:
5973 	  sri->icode = CODE_FOR_reload_insi_r1;
5974 	  break;
5975 
5976 	case DImode:
5977 	  sri->icode = CODE_FOR_reload_indi_r1;
5978 	  break;
5979 
5980 	case SFmode:
5981 	  sri->icode = CODE_FOR_reload_insf_r1;
5982 	  break;
5983 
5984 	case DFmode:
5985 	  sri->icode = CODE_FOR_reload_indf_r1;
5986 	  break;
5987 
5988 	default:
5989 	  gcc_unreachable ();
5990 	}
5991       return NO_REGS;
5992     }
5993 
5994   /* Secondary reloads of symbolic expressions require %r1 as a scratch
5995      register when we're generating PIC code or when the operand isn't
5996      readonly.  */
5997   if (pa_symbolic_expression_p (x))
5998     {
5999       if (GET_CODE (x) == HIGH)
6000 	x = XEXP (x, 0);
6001 
6002       if (flag_pic || !read_only_operand (x, VOIDmode))
6003 	{
6004 	  switch (mode)
6005 	    {
6006 	    case SImode:
6007 	      sri->icode = CODE_FOR_reload_insi_r1;
6008 	      break;
6009 
6010 	    case DImode:
6011 	      sri->icode = CODE_FOR_reload_indi_r1;
6012 	      break;
6013 
6014 	    default:
6015 	      gcc_unreachable ();
6016 	    }
6017 	  return NO_REGS;
6018 	}
6019     }
6020 
6021   /* Profiling showed the PA port spends about 1.3% of its compilation
6022      time in true_regnum from calls inside pa_secondary_reload_class.  */
6023   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6024     regno = true_regnum (x);
6025 
6026   /* Handle reloads for floating point loads and stores.  */
6027   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6028       && FP_REG_CLASS_P (rclass))
6029     {
6030       if (MEM_P (x))
6031 	{
6032 	  x = XEXP (x, 0);
6033 
6034 	  /* We don't need an intermediate for indexed and LO_SUM DLT
6035 	     memory addresses.  When INT14_OK_STRICT is true, it might
6036 	     appear that we could directly allow register indirect
6037 	     memory addresses.  However, this doesn't work because we
6038 	     don't support SUBREGs in floating-point register copies
6039 	     and reload doesn't tell us when it's going to use a SUBREG.  */
6040 	  if (IS_INDEX_ADDR_P (x)
6041 	      || IS_LO_SUM_DLT_ADDR_P (x))
6042 	    return NO_REGS;
6043 
6044 	  /* Request intermediate general register.  */
6045 	  return GENERAL_REGS;
6046 	}
6047 
6048       /* Request a secondary reload with a general scratch register
6049 	 for everything else.  ??? Could symbolic operands be handled
6050 	 directly when generating non-pic PA 2.0 code?  */
6051       sri->icode = (in_p
6052 		    ? direct_optab_handler (reload_in_optab, mode)
6053 		    : direct_optab_handler (reload_out_optab, mode));
6054       return NO_REGS;
6055     }
6056 
6057   /* A SAR<->FP register copy requires an intermediate general register
6058      and secondary memory.  We need a secondary reload with a general
6059      scratch register for spills.  */
6060   if (rclass == SHIFT_REGS)
6061     {
6062       /* Handle spill.  */
6063       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6064 	{
6065 	  sri->icode = (in_p
6066 			? direct_optab_handler (reload_in_optab, mode)
6067 			: direct_optab_handler (reload_out_optab, mode));
6068 	  return NO_REGS;
6069 	}
6070 
6071       /* Handle FP copy.  */
6072       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6073 	return GENERAL_REGS;
6074     }
6075 
6076   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6077       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6078       && FP_REG_CLASS_P (rclass))
6079     return GENERAL_REGS;
6080 
6081   return NO_REGS;
6082 }
6083 
6084 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6085    is only marked as live on entry by df-scan when it is a fixed
6086    register.  It isn't a fixed register in the 64-bit runtime,
6087    so we need to mark it here.  */
6088 
6089 static void
6090 pa_extra_live_on_entry (bitmap regs)
6091 {
6092   if (TARGET_64BIT)
6093     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6094 }
6095 
6096 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6097    to prevent it from being deleted.  */
6098 
6099 rtx
6100 pa_eh_return_handler_rtx (void)
6101 {
6102   rtx tmp;
6103 
6104   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6105 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6106   tmp = gen_rtx_MEM (word_mode, tmp);
6107   tmp->volatil = 1;
6108   return tmp;
6109 }
6110 
6111 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6112    by invisible reference.  As a GCC extension, we also pass anything
6113    with a zero or variable size by reference.
6114 
6115    The 64-bit runtime does not describe passing any types by invisible
6116    reference.  The internals of GCC can't currently handle passing
6117    empty structures, and zero or variable length arrays when they are
6118    not passed entirely on the stack or by reference.  Thus, as a GCC
6119    extension, we pass these types by reference.  The HP compiler doesn't
6120    support these types, so hopefully there shouldn't be any compatibility
6121    issues.  This may have to be revisited when HP releases a C99 compiler
6122    or updates the ABI.  */
6123 
6124 static bool
6125 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6126 		      enum machine_mode mode, const_tree type,
6127 		      bool named ATTRIBUTE_UNUSED)
6128 {
6129   HOST_WIDE_INT size;
6130 
6131   if (type)
6132     size = int_size_in_bytes (type);
6133   else
6134     size = GET_MODE_SIZE (mode);
6135 
6136   if (TARGET_64BIT)
6137     return size <= 0;
6138   else
6139     return size <= 0 || size > 8;
6140 }
6141 
6142 enum direction
6143 pa_function_arg_padding (enum machine_mode mode, const_tree type)
6144 {
6145   if (mode == BLKmode
6146       || (TARGET_64BIT
6147 	  && type
6148 	  && (AGGREGATE_TYPE_P (type)
6149 	      || TREE_CODE (type) == COMPLEX_TYPE
6150 	      || TREE_CODE (type) == VECTOR_TYPE)))
6151     {
6152       /* Return none if justification is not required.  */
6153       if (type
6154 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6155 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6156 	return none;
6157 
6158       /* The directions set here are ignored when a BLKmode argument larger
6159 	 than a word is placed in a register.  Different code is used for
6160 	 the stack and registers.  This makes it difficult to have a
6161 	 consistent data representation for both the stack and registers.
6162 	 For both runtimes, the justification and padding for arguments on
6163 	 the stack and in registers should be identical.  */
6164       if (TARGET_64BIT)
6165 	/* The 64-bit runtime specifies left justification for aggregates.  */
6166         return upward;
6167       else
6168 	/* The 32-bit runtime architecture specifies right justification.
6169 	   When the argument is passed on the stack, the argument is padded
6170 	   with garbage on the left.  The HP compiler pads with zeros.  */
6171 	return downward;
6172     }
6173 
6174   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6175     return downward;
6176   else
6177     return none;
6178 }
6179 
6180 
6181 /* Do what is necessary for `va_start'.  We look at the current function
6182    to determine if stdargs or varargs is used and fill in an initial
6183    va_list.  A pointer to this constructor is returned.  */
6184 
6185 static rtx
6186 hppa_builtin_saveregs (void)
6187 {
6188   rtx offset, dest;
6189   tree fntype = TREE_TYPE (current_function_decl);
6190   int argadj = ((!stdarg_p (fntype))
6191 		? UNITS_PER_WORD : 0);
6192 
6193   if (argadj)
6194     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6195   else
6196     offset = crtl->args.arg_offset_rtx;
6197 
6198   if (TARGET_64BIT)
6199     {
6200       int i, off;
6201 
6202       /* Adjust for varargs/stdarg differences.  */
6203       if (argadj)
6204 	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6205       else
6206 	offset = crtl->args.arg_offset_rtx;
6207 
6208       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6209 	 from the incoming arg pointer and growing to larger addresses.  */
6210       for (i = 26, off = -64; i >= 19; i--, off += 8)
6211 	emit_move_insn (gen_rtx_MEM (word_mode,
6212 				     plus_constant (Pmode,
6213 						    arg_pointer_rtx, off)),
6214 			gen_rtx_REG (word_mode, i));
6215 
6216       /* The incoming args pointer points just beyond the flushback area;
6217 	 normally this is not a serious concern.  However, when we are doing
6218 	 varargs/stdargs we want to make the arg pointer point to the start
6219 	 of the incoming argument area.  */
6220       emit_move_insn (virtual_incoming_args_rtx,
6221 		      plus_constant (Pmode, arg_pointer_rtx, -64));
6222 
6223       /* Now return a pointer to the first anonymous argument.  */
6224       return copy_to_reg (expand_binop (Pmode, add_optab,
6225 					virtual_incoming_args_rtx,
6226 					offset, 0, 0, OPTAB_LIB_WIDEN));
6227     }
6228 
6229   /* Store general registers on the stack.  */
6230   dest = gen_rtx_MEM (BLKmode,
6231 		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6232 				     -16));
6233   set_mem_alias_set (dest, get_varargs_alias_set ());
6234   set_mem_align (dest, BITS_PER_WORD);
6235   move_block_from_reg (23, dest, 4);
6236 
6237   /* move_block_from_reg will emit code to store the argument registers
6238      individually as scalar stores.
6239 
6240      However, other insns may later load from the same addresses for
6241      a structure load (passing a struct to a varargs routine).
6242 
6243      The alias code assumes that such aliasing can never happen, so we
6244      have to keep memory referencing insns from moving up beyond the
6245      last argument register store.  So we emit a blockage insn here.  */
6246   emit_insn (gen_blockage ());
6247 
6248   return copy_to_reg (expand_binop (Pmode, add_optab,
6249 				    crtl->args.internal_arg_pointer,
6250 				    offset, 0, 0, OPTAB_LIB_WIDEN));
6251 }
6252 
6253 static void
6254 hppa_va_start (tree valist, rtx nextarg)
6255 {
6256   nextarg = expand_builtin_saveregs ();
6257   std_expand_builtin_va_start (valist, nextarg);
6258 }
6259 
6260 static tree
6261 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6262 			   gimple_seq *post_p)
6263 {
6264   if (TARGET_64BIT)
6265     {
6266       /* Args grow upward.  We can use the generic routines.  */
6267       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6268     }
6269   else /* !TARGET_64BIT */
6270     {
6271       tree ptr = build_pointer_type (type);
6272       tree valist_type;
6273       tree t, u;
6274       unsigned int size, ofs;
6275       bool indirect;
6276 
6277       indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6278       if (indirect)
6279 	{
6280 	  type = ptr;
6281 	  ptr = build_pointer_type (type);
6282 	}
6283       size = int_size_in_bytes (type);
6284       valist_type = TREE_TYPE (valist);
6285 
6286       /* Args grow down.  Not handled by generic routines.  */
6287 
6288       u = fold_convert (sizetype, size_in_bytes (type));
6289       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6290       t = fold_build_pointer_plus (valist, u);
6291 
6292       /* Align to 4 or 8 byte boundary depending on argument size.  */
6293 
6294       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6295       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6296       t = fold_convert (valist_type, t);
6297 
6298       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6299 
6300       ofs = (8 - size) % 4;
6301       if (ofs != 0)
6302 	t = fold_build_pointer_plus_hwi (t, ofs);
6303 
6304       t = fold_convert (ptr, t);
6305       t = build_va_arg_indirect_ref (t);
6306 
6307       if (indirect)
6308 	t = build_va_arg_indirect_ref (t);
6309 
6310       return t;
6311     }
6312 }
6313 
6314 /* True if MODE is valid for the target.  By "valid", we mean able to
6315    be manipulated in non-trivial ways.  In particular, this means all
6316    the arithmetic is supported.
6317 
6318    Currently, TImode is not valid as the HP 64-bit runtime documentation
6319    doesn't document the alignment and calling conventions for this type.
6320    Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6321    2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6322 
6323 static bool
6324 pa_scalar_mode_supported_p (enum machine_mode mode)
6325 {
6326   int precision = GET_MODE_PRECISION (mode);
6327 
6328   switch (GET_MODE_CLASS (mode))
6329     {
6330     case MODE_PARTIAL_INT:
6331     case MODE_INT:
6332       if (precision == CHAR_TYPE_SIZE)
6333 	return true;
6334       if (precision == SHORT_TYPE_SIZE)
6335 	return true;
6336       if (precision == INT_TYPE_SIZE)
6337 	return true;
6338       if (precision == LONG_TYPE_SIZE)
6339 	return true;
6340       if (precision == LONG_LONG_TYPE_SIZE)
6341 	return true;
6342       return false;
6343 
6344     case MODE_FLOAT:
6345       if (precision == FLOAT_TYPE_SIZE)
6346 	return true;
6347       if (precision == DOUBLE_TYPE_SIZE)
6348 	return true;
6349       if (precision == LONG_DOUBLE_TYPE_SIZE)
6350 	return true;
6351       return false;
6352 
6353     case MODE_DECIMAL_FLOAT:
6354       return false;
6355 
6356     default:
6357       gcc_unreachable ();
6358     }
6359 }
6360 
6361 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6362    it branches into the delay slot.  Otherwise, return FALSE.  */
6363 
6364 static bool
6365 branch_to_delay_slot_p (rtx insn)
6366 {
6367   rtx jump_insn;
6368 
6369   if (dbr_sequence_length ())
6370     return FALSE;
6371 
6372   jump_insn = next_active_insn (JUMP_LABEL (insn));
6373   while (insn)
6374     {
6375       insn = next_active_insn (insn);
6376       if (jump_insn == insn)
6377 	return TRUE;
6378 
6379       /* We can't rely on the length of asms.  So, we return FALSE when
6380 	 the branch is followed by an asm.  */
6381       if (!insn
6382 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6383 	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6384 	  || get_attr_length (insn) > 0)
6385 	break;
6386     }
6387 
6388   return FALSE;
6389 }
6390 
6391 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6392 
6393    This occurs when INSN has an unfilled delay slot and is followed
6394    by an asm.  Disaster can occur if the asm is empty and the jump
6395    branches into the delay slot.  So, we add a nop in the delay slot
6396    when this occurs.  */
6397 
6398 static bool
6399 branch_needs_nop_p (rtx insn)
6400 {
6401   rtx jump_insn;
6402 
6403   if (dbr_sequence_length ())
6404     return FALSE;
6405 
6406   jump_insn = next_active_insn (JUMP_LABEL (insn));
6407   while (insn)
6408     {
6409       insn = next_active_insn (insn);
6410       if (!insn || jump_insn == insn)
6411 	return TRUE;
6412 
6413       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6414 	   || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6415 	  && get_attr_length (insn) > 0)
6416 	break;
6417     }
6418 
6419   return FALSE;
6420 }
6421 
6422 /* Return TRUE if INSN, a forward jump insn, can use nullification
6423    to skip the following instruction.  This avoids an extra cycle due
6424    to a mis-predicted branch when we fall through.  */
6425 
6426 static bool
6427 use_skip_p (rtx insn)
6428 {
6429   rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
6430 
6431   while (insn)
6432     {
6433       insn = next_active_insn (insn);
6434 
6435       /* We can't rely on the length of asms, so we can't skip asms.  */
6436       if (!insn
6437 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6438 	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6439 	break;
6440       if (get_attr_length (insn) == 4
6441 	  && jump_insn == next_active_insn (insn))
6442 	return TRUE;
6443       if (get_attr_length (insn) > 0)
6444 	break;
6445     }
6446 
6447   return FALSE;
6448 }
6449 
6450 /* This routine handles all the normal conditional branch sequences we
6451    might need to generate.  It handles compare immediate vs compare
6452    register, nullification of delay slots, varying length branches,
6453    negated branches, and all combinations of the above.  It returns the
6454    output appropriate to emit the branch corresponding to all given
6455    parameters.  */
6456 
6457 const char *
6458 pa_output_cbranch (rtx *operands, int negated, rtx insn)
6459 {
6460   static char buf[100];
6461   bool useskip;
6462   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6463   int length = get_attr_length (insn);
6464   int xdelay;
6465 
6466   /* A conditional branch to the following instruction (e.g. the delay slot)
6467      is asking for a disaster.  This can happen when not optimizing and
6468      when jump optimization fails.
6469 
6470      While it is usually safe to emit nothing, this can fail if the
6471      preceding instruction is a nullified branch with an empty delay
6472      slot and the same branch target as this branch.  We could check
6473      for this but jump optimization should eliminate nop jumps.  It
6474      is always safe to emit a nop.  */
6475   if (branch_to_delay_slot_p (insn))
6476     return "nop";
6477 
6478   /* The doubleword form of the cmpib instruction doesn't have the LEU
6479      and GTU conditions while the cmpb instruction does.  Since we accept
6480      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6481   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6482     operands[2] = gen_rtx_REG (DImode, 0);
6483   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6484     operands[1] = gen_rtx_REG (DImode, 0);
6485 
6486   /* If this is a long branch with its delay slot unfilled, set `nullify'
6487      as it can nullify the delay slot and save a nop.  */
6488   if (length == 8 && dbr_sequence_length () == 0)
6489     nullify = 1;
6490 
6491   /* If this is a short forward conditional branch which did not get
6492      its delay slot filled, the delay slot can still be nullified.  */
6493   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6494     nullify = forward_branch_p (insn);
6495 
6496   /* A forward branch over a single nullified insn can be done with a
6497      comclr instruction.  This avoids a single cycle penalty due to
6498      mis-predicted branch if we fall through (branch not taken).  */
6499   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6500 
6501   switch (length)
6502     {
6503       /* All short conditional branches except backwards with an unfilled
6504 	 delay slot.  */
6505       case 4:
6506 	if (useskip)
6507 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6508 	else
6509 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6510 	if (GET_MODE (operands[1]) == DImode)
6511 	  strcat (buf, "*");
6512 	if (negated)
6513 	  strcat (buf, "%B3");
6514 	else
6515 	  strcat (buf, "%S3");
6516 	if (useskip)
6517 	  strcat (buf, " %2,%r1,%%r0");
6518 	else if (nullify)
6519 	  {
6520 	    if (branch_needs_nop_p (insn))
6521 	      strcat (buf, ",n %2,%r1,%0%#");
6522 	    else
6523 	      strcat (buf, ",n %2,%r1,%0");
6524 	  }
6525 	else
6526 	  strcat (buf, " %2,%r1,%0");
6527 	break;
6528 
6529      /* All long conditionals.  Note a short backward branch with an
6530 	unfilled delay slot is treated just like a long backward branch
6531 	with an unfilled delay slot.  */
6532       case 8:
6533 	/* Handle weird backwards branch with a filled delay slot
6534 	   which is nullified.  */
6535 	if (dbr_sequence_length () != 0
6536 	    && ! forward_branch_p (insn)
6537 	    && nullify)
6538 	  {
6539 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6540 	    if (GET_MODE (operands[1]) == DImode)
6541 	      strcat (buf, "*");
6542 	    if (negated)
6543 	      strcat (buf, "%S3");
6544 	    else
6545 	      strcat (buf, "%B3");
6546 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6547 	  }
6548 	/* Handle short backwards branch with an unfilled delay slot.
6549 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6550 	   taken and untaken branches.  */
6551 	else if (dbr_sequence_length () == 0
6552 		 && ! forward_branch_p (insn)
6553 		 && INSN_ADDRESSES_SET_P ()
6554 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6555 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6556 	  {
6557 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6558 	    if (GET_MODE (operands[1]) == DImode)
6559 	      strcat (buf, "*");
6560 	    if (negated)
6561 	      strcat (buf, "%B3 %2,%r1,%0%#");
6562 	    else
6563 	      strcat (buf, "%S3 %2,%r1,%0%#");
6564 	  }
6565 	else
6566 	  {
6567 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6568 	    if (GET_MODE (operands[1]) == DImode)
6569 	      strcat (buf, "*");
6570 	    if (negated)
6571 	      strcat (buf, "%S3");
6572 	    else
6573 	      strcat (buf, "%B3");
6574 	    if (nullify)
6575 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6576 	    else
6577 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6578 	  }
6579 	break;
6580 
6581       default:
6582 	/* The reversed conditional branch must branch over one additional
6583 	   instruction if the delay slot is filled and needs to be extracted
6584 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6585 	   nullified forward branch, the instruction after the reversed
6586 	   condition branch must be nullified.  */
6587 	if (dbr_sequence_length () == 0
6588 	    || (nullify && forward_branch_p (insn)))
6589 	  {
6590 	    nullify = 1;
6591 	    xdelay = 0;
6592 	    operands[4] = GEN_INT (length);
6593 	  }
6594 	else
6595 	  {
6596 	    xdelay = 1;
6597 	    operands[4] = GEN_INT (length + 4);
6598 	  }
6599 
6600 	/* Create a reversed conditional branch which branches around
6601 	   the following insns.  */
6602 	if (GET_MODE (operands[1]) != DImode)
6603 	  {
6604 	    if (nullify)
6605 	      {
6606 		if (negated)
6607 		  strcpy (buf,
6608 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6609 		else
6610 		  strcpy (buf,
6611 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6612 	      }
6613 	    else
6614 	      {
6615 		if (negated)
6616 		  strcpy (buf,
6617 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6618 		else
6619 		  strcpy (buf,
6620 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6621 	      }
6622 	  }
6623 	else
6624 	  {
6625 	    if (nullify)
6626 	      {
6627 		if (negated)
6628 		  strcpy (buf,
6629 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6630 		else
6631 		  strcpy (buf,
6632 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6633 	      }
6634 	    else
6635 	      {
6636 		if (negated)
6637 		  strcpy (buf,
6638 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6639 		else
6640 		  strcpy (buf,
6641 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6642 	      }
6643 	  }
6644 
6645 	output_asm_insn (buf, operands);
6646 	return pa_output_lbranch (operands[0], insn, xdelay);
6647     }
6648   return buf;
6649 }
6650 
6651 /* This routine handles output of long unconditional branches that
6652    exceed the maximum range of a simple branch instruction.  Since
6653    we don't have a register available for the branch, we save register
6654    %r1 in the frame marker, load the branch destination DEST into %r1,
6655    execute the branch, and restore %r1 in the delay slot of the branch.
6656 
6657    Since long branches may have an insn in the delay slot and the
6658    delay slot is used to restore %r1, we in general need to extract
6659    this insn and execute it before the branch.  However, to facilitate
6660    use of this function by conditional branches, we also provide an
6661    option to not extract the delay insn so that it will be emitted
6662    after the long branch.  So, if there is an insn in the delay slot,
6663    it is extracted if XDELAY is nonzero.
6664 
6665    The lengths of the various long-branch sequences are 20, 16 and 24
6666    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6667 
6668 const char *
6669 pa_output_lbranch (rtx dest, rtx insn, int xdelay)
6670 {
6671   rtx xoperands[2];
6672 
6673   xoperands[0] = dest;
6674 
6675   /* First, free up the delay slot.  */
6676   if (xdelay && dbr_sequence_length () != 0)
6677     {
6678       /* We can't handle a jump in the delay slot.  */
6679       gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6680 
6681       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6682 		       optimize, 0, NULL);
6683 
6684       /* Now delete the delay insn.  */
6685       SET_INSN_DELETED (NEXT_INSN (insn));
6686     }
6687 
6688   /* Output an insn to save %r1.  The runtime documentation doesn't
6689      specify whether the "Clean Up" slot in the callers frame can
6690      be clobbered by the callee.  It isn't copied by HP's builtin
6691      alloca, so this suggests that it can be clobbered if necessary.
6692      The "Static Link" location is copied by HP builtin alloca, so
6693      we avoid using it.  Using the cleanup slot might be a problem
6694      if we have to interoperate with languages that pass cleanup
6695      information.  However, it should be possible to handle these
6696      situations with GCC's asm feature.
6697 
6698      The "Current RP" slot is reserved for the called procedure, so
6699      we try to use it when we don't have a frame of our own.  It's
6700      rather unlikely that we won't have a frame when we need to emit
6701      a very long branch.
6702 
6703      Really the way to go long term is a register scavenger; goto
6704      the target of the jump and find a register which we can use
6705      as a scratch to hold the value in %r1.  Then, we wouldn't have
6706      to free up the delay slot or clobber a slot that may be needed
6707      for other purposes.  */
6708   if (TARGET_64BIT)
6709     {
6710       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6711 	/* Use the return pointer slot in the frame marker.  */
6712 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6713       else
6714 	/* Use the slot at -40 in the frame marker since HP builtin
6715 	   alloca doesn't copy it.  */
6716 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6717     }
6718   else
6719     {
6720       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6721 	/* Use the return pointer slot in the frame marker.  */
6722 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6723       else
6724 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6725 	   the only other use of this location is for copying a
6726 	   floating point double argument from a floating-point
6727 	   register to two general registers.  The copy is done
6728 	   as an "atomic" operation when outputting a call, so it
6729 	   won't interfere with our using the location here.  */
6730 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6731     }
6732 
6733   if (TARGET_PORTABLE_RUNTIME)
6734     {
6735       output_asm_insn ("ldil L'%0,%%r1", xoperands);
6736       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6737       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6738     }
6739   else if (flag_pic)
6740     {
6741       output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6742       if (TARGET_SOM || !TARGET_GAS)
6743 	{
6744 	  xoperands[1] = gen_label_rtx ();
6745 	  output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6746 	  targetm.asm_out.internal_label (asm_out_file, "L",
6747 					  CODE_LABEL_NUMBER (xoperands[1]));
6748 	  output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6749 	}
6750       else
6751 	{
6752 	  output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6753 	  output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6754 	}
6755       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6756     }
6757   else
6758     /* Now output a very long branch to the original target.  */
6759     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6760 
6761   /* Now restore the value of %r1 in the delay slot.  */
6762   if (TARGET_64BIT)
6763     {
6764       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6765 	return "ldd -16(%%r30),%%r1";
6766       else
6767 	return "ldd -40(%%r30),%%r1";
6768     }
6769   else
6770     {
6771       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6772 	return "ldw -20(%%r30),%%r1";
6773       else
6774 	return "ldw -12(%%r30),%%r1";
6775     }
6776 }
6777 
6778 /* This routine handles all the branch-on-bit conditional branch sequences we
6779    might need to generate.  It handles nullification of delay slots,
6780    varying length branches, negated branches and all combinations of the
6781    above.  it returns the appropriate output template to emit the branch.  */
6782 
6783 const char *
6784 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6785 {
6786   static char buf[100];
6787   bool useskip;
6788   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6789   int length = get_attr_length (insn);
6790   int xdelay;
6791 
6792   /* A conditional branch to the following instruction (e.g. the delay slot) is
6793      asking for a disaster.  I do not think this can happen as this pattern
6794      is only used when optimizing; jump optimization should eliminate the
6795      jump.  But be prepared just in case.  */
6796 
6797   if (branch_to_delay_slot_p (insn))
6798     return "nop";
6799 
6800   /* If this is a long branch with its delay slot unfilled, set `nullify'
6801      as it can nullify the delay slot and save a nop.  */
6802   if (length == 8 && dbr_sequence_length () == 0)
6803     nullify = 1;
6804 
6805   /* If this is a short forward conditional branch which did not get
6806      its delay slot filled, the delay slot can still be nullified.  */
6807   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6808     nullify = forward_branch_p (insn);
6809 
6810   /* A forward branch over a single nullified insn can be done with a
6811      extrs instruction.  This avoids a single cycle penalty due to
6812      mis-predicted branch if we fall through (branch not taken).  */
6813   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6814 
6815   switch (length)
6816     {
6817 
6818       /* All short conditional branches except backwards with an unfilled
6819 	 delay slot.  */
6820       case 4:
6821 	if (useskip)
6822 	  strcpy (buf, "{extrs,|extrw,s,}");
6823 	else
6824 	  strcpy (buf, "bb,");
6825 	if (useskip && GET_MODE (operands[0]) == DImode)
6826 	  strcpy (buf, "extrd,s,*");
6827 	else if (GET_MODE (operands[0]) == DImode)
6828 	  strcpy (buf, "bb,*");
6829 	if ((which == 0 && negated)
6830 	     || (which == 1 && ! negated))
6831 	  strcat (buf, ">=");
6832 	else
6833 	  strcat (buf, "<");
6834 	if (useskip)
6835 	  strcat (buf, " %0,%1,1,%%r0");
6836 	else if (nullify && negated)
6837 	  {
6838 	    if (branch_needs_nop_p (insn))
6839 	      strcat (buf, ",n %0,%1,%3%#");
6840 	    else
6841 	      strcat (buf, ",n %0,%1,%3");
6842 	  }
6843 	else if (nullify && ! negated)
6844 	  {
6845 	    if (branch_needs_nop_p (insn))
6846 	      strcat (buf, ",n %0,%1,%2%#");
6847 	    else
6848 	      strcat (buf, ",n %0,%1,%2");
6849 	  }
6850 	else if (! nullify && negated)
6851 	  strcat (buf, " %0,%1,%3");
6852 	else if (! nullify && ! negated)
6853 	  strcat (buf, " %0,%1,%2");
6854 	break;
6855 
6856      /* All long conditionals.  Note a short backward branch with an
6857 	unfilled delay slot is treated just like a long backward branch
6858 	with an unfilled delay slot.  */
6859       case 8:
6860 	/* Handle weird backwards branch with a filled delay slot
6861 	   which is nullified.  */
6862 	if (dbr_sequence_length () != 0
6863 	    && ! forward_branch_p (insn)
6864 	    && nullify)
6865 	  {
6866 	    strcpy (buf, "bb,");
6867 	    if (GET_MODE (operands[0]) == DImode)
6868 	      strcat (buf, "*");
6869 	    if ((which == 0 && negated)
6870 		|| (which == 1 && ! negated))
6871 	      strcat (buf, "<");
6872 	    else
6873 	      strcat (buf, ">=");
6874 	    if (negated)
6875 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
6876 	    else
6877 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
6878 	  }
6879 	/* Handle short backwards branch with an unfilled delay slot.
6880 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
6881 	   taken and untaken branches.  */
6882 	else if (dbr_sequence_length () == 0
6883 		 && ! forward_branch_p (insn)
6884 		 && INSN_ADDRESSES_SET_P ()
6885 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6886 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6887 	  {
6888 	    strcpy (buf, "bb,");
6889 	    if (GET_MODE (operands[0]) == DImode)
6890 	      strcat (buf, "*");
6891 	    if ((which == 0 && negated)
6892 		|| (which == 1 && ! negated))
6893 	      strcat (buf, ">=");
6894 	    else
6895 	      strcat (buf, "<");
6896 	    if (negated)
6897 	      strcat (buf, " %0,%1,%3%#");
6898 	    else
6899 	      strcat (buf, " %0,%1,%2%#");
6900 	  }
6901 	else
6902 	  {
6903 	    if (GET_MODE (operands[0]) == DImode)
6904 	      strcpy (buf, "extrd,s,*");
6905 	    else
6906 	      strcpy (buf, "{extrs,|extrw,s,}");
6907 	    if ((which == 0 && negated)
6908 		|| (which == 1 && ! negated))
6909 	      strcat (buf, "<");
6910 	    else
6911 	      strcat (buf, ">=");
6912 	    if (nullify && negated)
6913 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6914 	    else if (nullify && ! negated)
6915 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6916 	    else if (negated)
6917 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6918 	    else
6919 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6920 	  }
6921 	break;
6922 
6923       default:
6924 	/* The reversed conditional branch must branch over one additional
6925 	   instruction if the delay slot is filled and needs to be extracted
6926 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6927 	   nullified forward branch, the instruction after the reversed
6928 	   condition branch must be nullified.  */
6929 	if (dbr_sequence_length () == 0
6930 	    || (nullify && forward_branch_p (insn)))
6931 	  {
6932 	    nullify = 1;
6933 	    xdelay = 0;
6934 	    operands[4] = GEN_INT (length);
6935 	  }
6936 	else
6937 	  {
6938 	    xdelay = 1;
6939 	    operands[4] = GEN_INT (length + 4);
6940 	  }
6941 
6942 	if (GET_MODE (operands[0]) == DImode)
6943 	  strcpy (buf, "bb,*");
6944 	else
6945 	  strcpy (buf, "bb,");
6946 	if ((which == 0 && negated)
6947 	    || (which == 1 && !negated))
6948 	  strcat (buf, "<");
6949 	else
6950 	  strcat (buf, ">=");
6951 	if (nullify)
6952 	  strcat (buf, ",n %0,%1,.+%4");
6953 	else
6954 	  strcat (buf, " %0,%1,.+%4");
6955 	output_asm_insn (buf, operands);
6956 	return pa_output_lbranch (negated ? operands[3] : operands[2],
6957 				  insn, xdelay);
6958     }
6959   return buf;
6960 }
6961 
6962 /* This routine handles all the branch-on-variable-bit conditional branch
6963    sequences we might need to generate.  It handles nullification of delay
6964    slots, varying length branches, negated branches and all combinations
6965    of the above.  it returns the appropriate output template to emit the
6966    branch.  */
6967 
6968 const char *
6969 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn,
6970 	       int which)
6971 {
6972   static char buf[100];
6973   bool useskip;
6974   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6975   int length = get_attr_length (insn);
6976   int xdelay;
6977 
6978   /* A conditional branch to the following instruction (e.g. the delay slot) is
6979      asking for a disaster.  I do not think this can happen as this pattern
6980      is only used when optimizing; jump optimization should eliminate the
6981      jump.  But be prepared just in case.  */
6982 
6983   if (branch_to_delay_slot_p (insn))
6984     return "nop";
6985 
6986   /* If this is a long branch with its delay slot unfilled, set `nullify'
6987      as it can nullify the delay slot and save a nop.  */
6988   if (length == 8 && dbr_sequence_length () == 0)
6989     nullify = 1;
6990 
6991   /* If this is a short forward conditional branch which did not get
6992      its delay slot filled, the delay slot can still be nullified.  */
6993   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6994     nullify = forward_branch_p (insn);
6995 
6996   /* A forward branch over a single nullified insn can be done with a
6997      extrs instruction.  This avoids a single cycle penalty due to
6998      mis-predicted branch if we fall through (branch not taken).  */
6999   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7000 
7001   switch (length)
7002     {
7003 
7004       /* All short conditional branches except backwards with an unfilled
7005 	 delay slot.  */
7006       case 4:
7007 	if (useskip)
7008 	  strcpy (buf, "{vextrs,|extrw,s,}");
7009 	else
7010 	  strcpy (buf, "{bvb,|bb,}");
7011 	if (useskip && GET_MODE (operands[0]) == DImode)
7012 	  strcpy (buf, "extrd,s,*");
7013 	else if (GET_MODE (operands[0]) == DImode)
7014 	  strcpy (buf, "bb,*");
7015 	if ((which == 0 && negated)
7016 	     || (which == 1 && ! negated))
7017 	  strcat (buf, ">=");
7018 	else
7019 	  strcat (buf, "<");
7020 	if (useskip)
7021 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7022 	else if (nullify && negated)
7023 	  {
7024 	    if (branch_needs_nop_p (insn))
7025 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7026 	    else
7027 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7028 	  }
7029 	else if (nullify && ! negated)
7030 	  {
7031 	    if (branch_needs_nop_p (insn))
7032 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7033 	    else
7034 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7035 	  }
7036 	else if (! nullify && negated)
7037 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7038 	else if (! nullify && ! negated)
7039 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7040 	break;
7041 
7042      /* All long conditionals.  Note a short backward branch with an
7043 	unfilled delay slot is treated just like a long backward branch
7044 	with an unfilled delay slot.  */
7045       case 8:
7046 	/* Handle weird backwards branch with a filled delay slot
7047 	   which is nullified.  */
7048 	if (dbr_sequence_length () != 0
7049 	    && ! forward_branch_p (insn)
7050 	    && nullify)
7051 	  {
7052 	    strcpy (buf, "{bvb,|bb,}");
7053 	    if (GET_MODE (operands[0]) == DImode)
7054 	      strcat (buf, "*");
7055 	    if ((which == 0 && negated)
7056 		|| (which == 1 && ! negated))
7057 	      strcat (buf, "<");
7058 	    else
7059 	      strcat (buf, ">=");
7060 	    if (negated)
7061 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7062 	    else
7063 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7064 	  }
7065 	/* Handle short backwards branch with an unfilled delay slot.
7066 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7067 	   taken and untaken branches.  */
7068 	else if (dbr_sequence_length () == 0
7069 		 && ! forward_branch_p (insn)
7070 		 && INSN_ADDRESSES_SET_P ()
7071 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7072 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7073 	  {
7074 	    strcpy (buf, "{bvb,|bb,}");
7075 	    if (GET_MODE (operands[0]) == DImode)
7076 	      strcat (buf, "*");
7077 	    if ((which == 0 && negated)
7078 		|| (which == 1 && ! negated))
7079 	      strcat (buf, ">=");
7080 	    else
7081 	      strcat (buf, "<");
7082 	    if (negated)
7083 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7084 	    else
7085 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7086 	  }
7087 	else
7088 	  {
7089 	    strcpy (buf, "{vextrs,|extrw,s,}");
7090 	    if (GET_MODE (operands[0]) == DImode)
7091 	      strcpy (buf, "extrd,s,*");
7092 	    if ((which == 0 && negated)
7093 		|| (which == 1 && ! negated))
7094 	      strcat (buf, "<");
7095 	    else
7096 	      strcat (buf, ">=");
7097 	    if (nullify && negated)
7098 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7099 	    else if (nullify && ! negated)
7100 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7101 	    else if (negated)
7102 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7103 	    else
7104 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7105 	  }
7106 	break;
7107 
7108       default:
7109 	/* The reversed conditional branch must branch over one additional
7110 	   instruction if the delay slot is filled and needs to be extracted
7111 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7112 	   nullified forward branch, the instruction after the reversed
7113 	   condition branch must be nullified.  */
7114 	if (dbr_sequence_length () == 0
7115 	    || (nullify && forward_branch_p (insn)))
7116 	  {
7117 	    nullify = 1;
7118 	    xdelay = 0;
7119 	    operands[4] = GEN_INT (length);
7120 	  }
7121 	else
7122 	  {
7123 	    xdelay = 1;
7124 	    operands[4] = GEN_INT (length + 4);
7125 	  }
7126 
7127 	if (GET_MODE (operands[0]) == DImode)
7128 	  strcpy (buf, "bb,*");
7129 	else
7130 	  strcpy (buf, "{bvb,|bb,}");
7131 	if ((which == 0 && negated)
7132 	    || (which == 1 && !negated))
7133 	  strcat (buf, "<");
7134 	else
7135 	  strcat (buf, ">=");
7136 	if (nullify)
7137 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7138 	else
7139 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7140 	output_asm_insn (buf, operands);
7141 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7142 				  insn, xdelay);
7143     }
7144   return buf;
7145 }
7146 
7147 /* Return the output template for emitting a dbra type insn.
7148 
7149    Note it may perform some output operations on its own before
7150    returning the final output string.  */
7151 const char *
7152 pa_output_dbra (rtx *operands, rtx insn, int which_alternative)
7153 {
7154   int length = get_attr_length (insn);
7155 
7156   /* A conditional branch to the following instruction (e.g. the delay slot) is
7157      asking for a disaster.  Be prepared!  */
7158 
7159   if (branch_to_delay_slot_p (insn))
7160     {
7161       if (which_alternative == 0)
7162 	return "ldo %1(%0),%0";
7163       else if (which_alternative == 1)
7164 	{
7165 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7166 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7167 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7168 	  return "{fldws|fldw} -16(%%r30),%0";
7169 	}
7170       else
7171 	{
7172 	  output_asm_insn ("ldw %0,%4", operands);
7173 	  return "ldo %1(%4),%4\n\tstw %4,%0";
7174 	}
7175     }
7176 
7177   if (which_alternative == 0)
7178     {
7179       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7180       int xdelay;
7181 
7182       /* If this is a long branch with its delay slot unfilled, set `nullify'
7183 	 as it can nullify the delay slot and save a nop.  */
7184       if (length == 8 && dbr_sequence_length () == 0)
7185 	nullify = 1;
7186 
7187       /* If this is a short forward conditional branch which did not get
7188 	 its delay slot filled, the delay slot can still be nullified.  */
7189       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7190 	nullify = forward_branch_p (insn);
7191 
7192       switch (length)
7193 	{
7194 	case 4:
7195 	  if (nullify)
7196 	    {
7197 	      if (branch_needs_nop_p (insn))
7198 		return "addib,%C2,n %1,%0,%3%#";
7199 	      else
7200 		return "addib,%C2,n %1,%0,%3";
7201 	    }
7202 	  else
7203 	    return "addib,%C2 %1,%0,%3";
7204 
7205 	case 8:
7206 	  /* Handle weird backwards branch with a fulled delay slot
7207 	     which is nullified.  */
7208 	  if (dbr_sequence_length () != 0
7209 	      && ! forward_branch_p (insn)
7210 	      && nullify)
7211 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7212 	  /* Handle short backwards branch with an unfilled delay slot.
7213 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7214 	     taken and untaken branches.  */
7215 	  else if (dbr_sequence_length () == 0
7216 		   && ! forward_branch_p (insn)
7217 		   && INSN_ADDRESSES_SET_P ()
7218 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7219 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7220 	      return "addib,%C2 %1,%0,%3%#";
7221 
7222 	  /* Handle normal cases.  */
7223 	  if (nullify)
7224 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7225 	  else
7226 	    return "addi,%N2 %1,%0,%0\n\tb %3";
7227 
7228 	default:
7229 	  /* The reversed conditional branch must branch over one additional
7230 	     instruction if the delay slot is filled and needs to be extracted
7231 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7232 	     nullified forward branch, the instruction after the reversed
7233 	     condition branch must be nullified.  */
7234 	  if (dbr_sequence_length () == 0
7235 	      || (nullify && forward_branch_p (insn)))
7236 	    {
7237 	      nullify = 1;
7238 	      xdelay = 0;
7239 	      operands[4] = GEN_INT (length);
7240 	    }
7241 	  else
7242 	    {
7243 	      xdelay = 1;
7244 	      operands[4] = GEN_INT (length + 4);
7245 	    }
7246 
7247 	  if (nullify)
7248 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7249 	  else
7250 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7251 
7252 	  return pa_output_lbranch (operands[3], insn, xdelay);
7253 	}
7254 
7255     }
7256   /* Deal with gross reload from FP register case.  */
7257   else if (which_alternative == 1)
7258     {
7259       /* Move loop counter from FP register to MEM then into a GR,
7260 	 increment the GR, store the GR into MEM, and finally reload
7261 	 the FP register from MEM from within the branch's delay slot.  */
7262       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7263 		       operands);
7264       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7265       if (length == 24)
7266 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7267       else if (length == 28)
7268 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7269       else
7270 	{
7271 	  operands[5] = GEN_INT (length - 16);
7272 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7273 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7274 	  return pa_output_lbranch (operands[3], insn, 0);
7275 	}
7276     }
7277   /* Deal with gross reload from memory case.  */
7278   else
7279     {
7280       /* Reload loop counter from memory, the store back to memory
7281 	 happens in the branch's delay slot.  */
7282       output_asm_insn ("ldw %0,%4", operands);
7283       if (length == 12)
7284 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7285       else if (length == 16)
7286 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7287       else
7288 	{
7289 	  operands[5] = GEN_INT (length - 4);
7290 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7291 	  return pa_output_lbranch (operands[3], insn, 0);
7292 	}
7293     }
7294 }
7295 
7296 /* Return the output template for emitting a movb type insn.
7297 
7298    Note it may perform some output operations on its own before
7299    returning the final output string.  */
7300 const char *
7301 pa_output_movb (rtx *operands, rtx insn, int which_alternative,
7302 	     int reverse_comparison)
7303 {
7304   int length = get_attr_length (insn);
7305 
7306   /* A conditional branch to the following instruction (e.g. the delay slot) is
7307      asking for a disaster.  Be prepared!  */
7308 
7309   if (branch_to_delay_slot_p (insn))
7310     {
7311       if (which_alternative == 0)
7312 	return "copy %1,%0";
7313       else if (which_alternative == 1)
7314 	{
7315 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7316 	  return "{fldws|fldw} -16(%%r30),%0";
7317 	}
7318       else if (which_alternative == 2)
7319 	return "stw %1,%0";
7320       else
7321 	return "mtsar %r1";
7322     }
7323 
7324   /* Support the second variant.  */
7325   if (reverse_comparison)
7326     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7327 
7328   if (which_alternative == 0)
7329     {
7330       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7331       int xdelay;
7332 
7333       /* If this is a long branch with its delay slot unfilled, set `nullify'
7334 	 as it can nullify the delay slot and save a nop.  */
7335       if (length == 8 && dbr_sequence_length () == 0)
7336 	nullify = 1;
7337 
7338       /* If this is a short forward conditional branch which did not get
7339 	 its delay slot filled, the delay slot can still be nullified.  */
7340       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7341 	nullify = forward_branch_p (insn);
7342 
7343       switch (length)
7344 	{
7345 	case 4:
7346 	  if (nullify)
7347 	    {
7348 	      if (branch_needs_nop_p (insn))
7349 		return "movb,%C2,n %1,%0,%3%#";
7350 	      else
7351 		return "movb,%C2,n %1,%0,%3";
7352 	    }
7353 	  else
7354 	    return "movb,%C2 %1,%0,%3";
7355 
7356 	case 8:
7357 	  /* Handle weird backwards branch with a filled delay slot
7358 	     which is nullified.  */
7359 	  if (dbr_sequence_length () != 0
7360 	      && ! forward_branch_p (insn)
7361 	      && nullify)
7362 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7363 
7364 	  /* Handle short backwards branch with an unfilled delay slot.
7365 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7366 	     taken and untaken branches.  */
7367 	  else if (dbr_sequence_length () == 0
7368 		   && ! forward_branch_p (insn)
7369 		   && INSN_ADDRESSES_SET_P ()
7370 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7371 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7372 	    return "movb,%C2 %1,%0,%3%#";
7373 	  /* Handle normal cases.  */
7374 	  if (nullify)
7375 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7376 	  else
7377 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7378 
7379 	default:
7380 	  /* The reversed conditional branch must branch over one additional
7381 	     instruction if the delay slot is filled and needs to be extracted
7382 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7383 	     nullified forward branch, the instruction after the reversed
7384 	     condition branch must be nullified.  */
7385 	  if (dbr_sequence_length () == 0
7386 	      || (nullify && forward_branch_p (insn)))
7387 	    {
7388 	      nullify = 1;
7389 	      xdelay = 0;
7390 	      operands[4] = GEN_INT (length);
7391 	    }
7392 	  else
7393 	    {
7394 	      xdelay = 1;
7395 	      operands[4] = GEN_INT (length + 4);
7396 	    }
7397 
7398 	  if (nullify)
7399 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7400 	  else
7401 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7402 
7403 	  return pa_output_lbranch (operands[3], insn, xdelay);
7404 	}
7405     }
7406   /* Deal with gross reload for FP destination register case.  */
7407   else if (which_alternative == 1)
7408     {
7409       /* Move source register to MEM, perform the branch test, then
7410 	 finally load the FP register from MEM from within the branch's
7411 	 delay slot.  */
7412       output_asm_insn ("stw %1,-16(%%r30)", operands);
7413       if (length == 12)
7414 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7415       else if (length == 16)
7416 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7417       else
7418 	{
7419 	  operands[4] = GEN_INT (length - 4);
7420 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7421 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7422 	  return pa_output_lbranch (operands[3], insn, 0);
7423 	}
7424     }
7425   /* Deal with gross reload from memory case.  */
7426   else if (which_alternative == 2)
7427     {
7428       /* Reload loop counter from memory, the store back to memory
7429 	 happens in the branch's delay slot.  */
7430       if (length == 8)
7431 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7432       else if (length == 12)
7433 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7434       else
7435 	{
7436 	  operands[4] = GEN_INT (length);
7437 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7438 			   operands);
7439 	  return pa_output_lbranch (operands[3], insn, 0);
7440 	}
7441     }
7442   /* Handle SAR as a destination.  */
7443   else
7444     {
7445       if (length == 8)
7446 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7447       else if (length == 12)
7448 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7449       else
7450 	{
7451 	  operands[4] = GEN_INT (length);
7452 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7453 			   operands);
7454 	  return pa_output_lbranch (operands[3], insn, 0);
7455 	}
7456     }
7457 }
7458 
7459 /* Copy any FP arguments in INSN into integer registers.  */
7460 static void
7461 copy_fp_args (rtx insn)
7462 {
7463   rtx link;
7464   rtx xoperands[2];
7465 
7466   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7467     {
7468       int arg_mode, regno;
7469       rtx use = XEXP (link, 0);
7470 
7471       if (! (GET_CODE (use) == USE
7472 	  && GET_CODE (XEXP (use, 0)) == REG
7473 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7474 	continue;
7475 
7476       arg_mode = GET_MODE (XEXP (use, 0));
7477       regno = REGNO (XEXP (use, 0));
7478 
7479       /* Is it a floating point register?  */
7480       if (regno >= 32 && regno <= 39)
7481 	{
7482 	  /* Copy the FP register into an integer register via memory.  */
7483 	  if (arg_mode == SFmode)
7484 	    {
7485 	      xoperands[0] = XEXP (use, 0);
7486 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7487 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7488 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7489 	    }
7490 	  else
7491 	    {
7492 	      xoperands[0] = XEXP (use, 0);
7493 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7494 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7495 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7496 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7497 	    }
7498 	}
7499     }
7500 }
7501 
7502 /* Compute length of the FP argument copy sequence for INSN.  */
7503 static int
7504 length_fp_args (rtx insn)
7505 {
7506   int length = 0;
7507   rtx link;
7508 
7509   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7510     {
7511       int arg_mode, regno;
7512       rtx use = XEXP (link, 0);
7513 
7514       if (! (GET_CODE (use) == USE
7515 	  && GET_CODE (XEXP (use, 0)) == REG
7516 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7517 	continue;
7518 
7519       arg_mode = GET_MODE (XEXP (use, 0));
7520       regno = REGNO (XEXP (use, 0));
7521 
7522       /* Is it a floating point register?  */
7523       if (regno >= 32 && regno <= 39)
7524 	{
7525 	  if (arg_mode == SFmode)
7526 	    length += 8;
7527 	  else
7528 	    length += 12;
7529 	}
7530     }
7531 
7532   return length;
7533 }
7534 
7535 /* Return the attribute length for the millicode call instruction INSN.
7536    The length must match the code generated by pa_output_millicode_call.
7537    We include the delay slot in the returned length as it is better to
7538    over estimate the length than to under estimate it.  */
7539 
7540 int
7541 pa_attr_length_millicode_call (rtx insn)
7542 {
7543   unsigned long distance = -1;
7544   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7545 
7546   if (INSN_ADDRESSES_SET_P ())
7547     {
7548       distance = (total + insn_current_reference_address (insn));
7549       if (distance < total)
7550 	distance = -1;
7551     }
7552 
7553   if (TARGET_64BIT)
7554     {
7555       if (!TARGET_LONG_CALLS && distance < 7600000)
7556 	return 8;
7557 
7558       return 20;
7559     }
7560   else if (TARGET_PORTABLE_RUNTIME)
7561     return 24;
7562   else
7563     {
7564       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7565 	return 8;
7566 
7567       if (!flag_pic)
7568 	return 12;
7569 
7570       return 24;
7571     }
7572 }
7573 
7574 /* INSN is a function call.  It may have an unconditional jump
7575    in its delay slot.
7576 
7577    CALL_DEST is the routine we are calling.  */
7578 
7579 const char *
7580 pa_output_millicode_call (rtx insn, rtx call_dest)
7581 {
7582   int attr_length = get_attr_length (insn);
7583   int seq_length = dbr_sequence_length ();
7584   int distance;
7585   rtx seq_insn;
7586   rtx xoperands[3];
7587 
7588   xoperands[0] = call_dest;
7589   xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7590 
7591   /* Handle the common case where we are sure that the branch will
7592      reach the beginning of the $CODE$ subspace.  The within reach
7593      form of the $$sh_func_adrs call has a length of 28.  Because it
7594      has an attribute type of sh_func_adrs, it never has a nonzero
7595      sequence length (i.e., the delay slot is never filled).  */
7596   if (!TARGET_LONG_CALLS
7597       && (attr_length == 8
7598 	  || (attr_length == 28
7599 	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7600     {
7601       output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7602     }
7603   else
7604     {
7605       if (TARGET_64BIT)
7606 	{
7607 	  /* It might seem that one insn could be saved by accessing
7608 	     the millicode function using the linkage table.  However,
7609 	     this doesn't work in shared libraries and other dynamically
7610 	     loaded objects.  Using a pc-relative sequence also avoids
7611 	     problems related to the implicit use of the gp register.  */
7612 	  output_asm_insn ("b,l .+8,%%r1", xoperands);
7613 
7614 	  if (TARGET_GAS)
7615 	    {
7616 	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7617 	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7618 	    }
7619 	  else
7620 	    {
7621 	      xoperands[1] = gen_label_rtx ();
7622 	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7623 	      targetm.asm_out.internal_label (asm_out_file, "L",
7624 					 CODE_LABEL_NUMBER (xoperands[1]));
7625 	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7626 	    }
7627 
7628 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7629 	}
7630       else if (TARGET_PORTABLE_RUNTIME)
7631 	{
7632 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7633 	     have PIC support in the assembler/linker, so this sequence
7634 	     is needed.  */
7635 
7636 	  /* Get the address of our target into %r1.  */
7637 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7638 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7639 
7640 	  /* Get our return address into %r31.  */
7641 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7642 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7643 
7644 	  /* Jump to our target address in %r1.  */
7645 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7646 	}
7647       else if (!flag_pic)
7648 	{
7649 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7650 	  if (TARGET_PA_20)
7651 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7652 	  else
7653 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7654 	}
7655       else
7656 	{
7657 	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7658 	  output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7659 
7660 	  if (TARGET_SOM || !TARGET_GAS)
7661 	    {
7662 	      /* The HP assembler can generate relocations for the
7663 		 difference of two symbols.  GAS can do this for a
7664 		 millicode symbol but not an arbitrary external
7665 		 symbol when generating SOM output.  */
7666 	      xoperands[1] = gen_label_rtx ();
7667 	      targetm.asm_out.internal_label (asm_out_file, "L",
7668 					 CODE_LABEL_NUMBER (xoperands[1]));
7669 	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7670 	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7671 	    }
7672 	  else
7673 	    {
7674 	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7675 	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7676 			       xoperands);
7677 	    }
7678 
7679 	  /* Jump to our target address in %r1.  */
7680 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7681 	}
7682     }
7683 
7684   if (seq_length == 0)
7685     output_asm_insn ("nop", xoperands);
7686 
7687   /* We are done if there isn't a jump in the delay slot.  */
7688   if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7689     return "";
7690 
7691   /* This call has an unconditional jump in its delay slot.  */
7692   xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7693 
7694   /* See if the return address can be adjusted.  Use the containing
7695      sequence insn's address.  */
7696   if (INSN_ADDRESSES_SET_P ())
7697     {
7698       seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7699       distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7700 		  - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7701 
7702       if (VAL_14_BITS_P (distance))
7703 	{
7704 	  xoperands[1] = gen_label_rtx ();
7705 	  output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7706 	  targetm.asm_out.internal_label (asm_out_file, "L",
7707 					  CODE_LABEL_NUMBER (xoperands[1]));
7708 	}
7709       else
7710 	/* ??? This branch may not reach its target.  */
7711 	output_asm_insn ("nop\n\tb,n %0", xoperands);
7712     }
7713   else
7714     /* ??? This branch may not reach its target.  */
7715     output_asm_insn ("nop\n\tb,n %0", xoperands);
7716 
7717   /* Delete the jump.  */
7718   SET_INSN_DELETED (NEXT_INSN (insn));
7719 
7720   return "";
7721 }
7722 
7723 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7724    flag indicates whether INSN is a regular call or a sibling call.  The
7725    length returned must be longer than the code actually generated by
7726    pa_output_call.  Since branch shortening is done before delay branch
7727    sequencing, there is no way to determine whether or not the delay
7728    slot will be filled during branch shortening.  Even when the delay
7729    slot is filled, we may have to add a nop if the delay slot contains
7730    a branch that can't reach its target.  Thus, we always have to include
7731    the delay slot in the length estimate.  This used to be done in
7732    pa_adjust_insn_length but we do it here now as some sequences always
7733    fill the delay slot and we can save four bytes in the estimate for
7734    these sequences.  */
7735 
7736 int
7737 pa_attr_length_call (rtx insn, int sibcall)
7738 {
7739   int local_call;
7740   rtx call, call_dest;
7741   tree call_decl;
7742   int length = 0;
7743   rtx pat = PATTERN (insn);
7744   unsigned long distance = -1;
7745 
7746   gcc_assert (GET_CODE (insn) == CALL_INSN);
7747 
7748   if (INSN_ADDRESSES_SET_P ())
7749     {
7750       unsigned long total;
7751 
7752       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7753       distance = (total + insn_current_reference_address (insn));
7754       if (distance < total)
7755 	distance = -1;
7756     }
7757 
7758   gcc_assert (GET_CODE (pat) == PARALLEL);
7759 
7760   /* Get the call rtx.  */
7761   call = XVECEXP (pat, 0, 0);
7762   if (GET_CODE (call) == SET)
7763     call = SET_SRC (call);
7764 
7765   gcc_assert (GET_CODE (call) == CALL);
7766 
7767   /* Determine if this is a local call.  */
7768   call_dest = XEXP (XEXP (call, 0), 0);
7769   call_decl = SYMBOL_REF_DECL (call_dest);
7770   local_call = call_decl && targetm.binds_local_p (call_decl);
7771 
7772   /* pc-relative branch.  */
7773   if (!TARGET_LONG_CALLS
7774       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7775 	  || distance < MAX_PCREL17F_OFFSET))
7776     length += 8;
7777 
7778   /* 64-bit plabel sequence.  */
7779   else if (TARGET_64BIT && !local_call)
7780     length += sibcall ? 28 : 24;
7781 
7782   /* non-pic long absolute branch sequence.  */
7783   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7784     length += 12;
7785 
7786   /* long pc-relative branch sequence.  */
7787   else if (TARGET_LONG_PIC_SDIFF_CALL
7788 	   || (TARGET_GAS && !TARGET_SOM
7789 	       && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7790     {
7791       length += 20;
7792 
7793       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7794 	length += 8;
7795     }
7796 
7797   /* 32-bit plabel sequence.  */
7798   else
7799     {
7800       length += 32;
7801 
7802       if (TARGET_SOM)
7803 	length += length_fp_args (insn);
7804 
7805       if (flag_pic)
7806 	length += 4;
7807 
7808       if (!TARGET_PA_20)
7809 	{
7810 	  if (!sibcall)
7811 	    length += 8;
7812 
7813 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7814 	    length += 8;
7815 	}
7816     }
7817 
7818   return length;
7819 }
7820 
7821 /* INSN is a function call.  It may have an unconditional jump
7822    in its delay slot.
7823 
7824    CALL_DEST is the routine we are calling.  */
7825 
7826 const char *
7827 pa_output_call (rtx insn, rtx call_dest, int sibcall)
7828 {
7829   int delay_insn_deleted = 0;
7830   int delay_slot_filled = 0;
7831   int seq_length = dbr_sequence_length ();
7832   tree call_decl = SYMBOL_REF_DECL (call_dest);
7833   int local_call = call_decl && targetm.binds_local_p (call_decl);
7834   rtx xoperands[2];
7835 
7836   xoperands[0] = call_dest;
7837 
7838   /* Handle the common case where we're sure that the branch will reach
7839      the beginning of the "$CODE$" subspace.  This is the beginning of
7840      the current function if we are in a named section.  */
7841   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7842     {
7843       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7844       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7845     }
7846   else
7847     {
7848       if (TARGET_64BIT && !local_call)
7849 	{
7850 	  /* ??? As far as I can tell, the HP linker doesn't support the
7851 	     long pc-relative sequence described in the 64-bit runtime
7852 	     architecture.  So, we use a slightly longer indirect call.  */
7853 	  xoperands[0] = pa_get_deferred_plabel (call_dest);
7854 	  xoperands[1] = gen_label_rtx ();
7855 
7856 	  /* If this isn't a sibcall, we put the load of %r27 into the
7857 	     delay slot.  We can't do this in a sibcall as we don't
7858 	     have a second call-clobbered scratch register available.  */
7859 	  if (seq_length != 0
7860 	      && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7861 	      && !sibcall)
7862 	    {
7863 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
7864 			       optimize, 0, NULL);
7865 
7866 	      /* Now delete the delay insn.  */
7867 	      SET_INSN_DELETED (NEXT_INSN (insn));
7868 	      delay_insn_deleted = 1;
7869 	    }
7870 
7871 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
7872 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7873 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7874 
7875 	  if (sibcall)
7876 	    {
7877 	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7878 	      output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7879 	      output_asm_insn ("bve (%%r1)", xoperands);
7880 	    }
7881 	  else
7882 	    {
7883 	      output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7884 	      output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7885 	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7886 	      delay_slot_filled = 1;
7887 	    }
7888 	}
7889       else
7890 	{
7891 	  int indirect_call = 0;
7892 
7893 	  /* Emit a long call.  There are several different sequences
7894 	     of increasing length and complexity.  In most cases,
7895              they don't allow an instruction in the delay slot.  */
7896 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7897 	      && !TARGET_LONG_PIC_SDIFF_CALL
7898 	      && !(TARGET_GAS && !TARGET_SOM
7899 		   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7900 	      && !TARGET_64BIT)
7901 	    indirect_call = 1;
7902 
7903 	  if (seq_length != 0
7904 	      && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7905 	      && !sibcall
7906 	      && (!TARGET_PA_20
7907 		  || indirect_call
7908 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7909 	    {
7910 	      /* A non-jump insn in the delay slot.  By definition we can
7911 		 emit this insn before the call (and in fact before argument
7912 		 relocating.  */
7913 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7914 			       NULL);
7915 
7916 	      /* Now delete the delay insn.  */
7917 	      SET_INSN_DELETED (NEXT_INSN (insn));
7918 	      delay_insn_deleted = 1;
7919 	    }
7920 
7921 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7922 	    {
7923 	      /* This is the best sequence for making long calls in
7924 		 non-pic code.  Unfortunately, GNU ld doesn't provide
7925 		 the stub needed for external calls, and GAS's support
7926 		 for this with the SOM linker is buggy.  It is safe
7927 		 to use this for local calls.  */
7928 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7929 	      if (sibcall)
7930 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7931 	      else
7932 		{
7933 		  if (TARGET_PA_20)
7934 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7935 				     xoperands);
7936 		  else
7937 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7938 
7939 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
7940 		  delay_slot_filled = 1;
7941 		}
7942 	    }
7943 	  else
7944 	    {
7945 	      if (TARGET_LONG_PIC_SDIFF_CALL)
7946 		{
7947 		  /* The HP assembler and linker can handle relocations
7948 		     for the difference of two symbols.  The HP assembler
7949 		     recognizes the sequence as a pc-relative call and
7950 		     the linker provides stubs when needed.  */
7951 		  xoperands[1] = gen_label_rtx ();
7952 		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7953 		  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7954 		  targetm.asm_out.internal_label (asm_out_file, "L",
7955 					     CODE_LABEL_NUMBER (xoperands[1]));
7956 		  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7957 		}
7958 	      else if (TARGET_GAS && !TARGET_SOM
7959 		       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7960 		{
7961 		  /*  GAS currently can't generate the relocations that
7962 		      are needed for the SOM linker under HP-UX using this
7963 		      sequence.  The GNU linker doesn't generate the stubs
7964 		      that are needed for external calls on TARGET_ELF32
7965 		      with this sequence.  For now, we have to use a
7966 		      longer plabel sequence when using GAS.  */
7967 		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7968 		  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7969 				   xoperands);
7970 		  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7971 				   xoperands);
7972 		}
7973 	      else
7974 		{
7975 		  /* Emit a long plabel-based call sequence.  This is
7976 		     essentially an inline implementation of $$dyncall.
7977 		     We don't actually try to call $$dyncall as this is
7978 		     as difficult as calling the function itself.  */
7979 		  xoperands[0] = pa_get_deferred_plabel (call_dest);
7980 		  xoperands[1] = gen_label_rtx ();
7981 
7982 		  /* Since the call is indirect, FP arguments in registers
7983 		     need to be copied to the general registers.  Then, the
7984 		     argument relocation stub will copy them back.  */
7985 		  if (TARGET_SOM)
7986 		    copy_fp_args (insn);
7987 
7988 		  if (flag_pic)
7989 		    {
7990 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
7991 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7992 		      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7993 		    }
7994 		  else
7995 		    {
7996 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
7997 				       xoperands);
7998 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7999 				       xoperands);
8000 		    }
8001 
8002 		  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8003 		  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8004 		  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8005 		  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8006 
8007 		  if (!sibcall && !TARGET_PA_20)
8008 		    {
8009 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8010 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8011 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8012 		      else
8013 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8014 		    }
8015 		}
8016 
8017 	      if (TARGET_PA_20)
8018 		{
8019 		  if (sibcall)
8020 		    output_asm_insn ("bve (%%r1)", xoperands);
8021 		  else
8022 		    {
8023 		      if (indirect_call)
8024 			{
8025 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8026 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8027 			  delay_slot_filled = 1;
8028 			}
8029 		      else
8030 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8031 		    }
8032 		}
8033 	      else
8034 		{
8035 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8036 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8037 				     xoperands);
8038 
8039 		  if (sibcall)
8040 		    {
8041 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8042 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8043 		      else
8044 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8045 		    }
8046 		  else
8047 		    {
8048 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8049 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8050 		      else
8051 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8052 
8053 		      if (indirect_call)
8054 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8055 		      else
8056 			output_asm_insn ("copy %%r31,%%r2", xoperands);
8057 		      delay_slot_filled = 1;
8058 		    }
8059 		}
8060 	    }
8061 	}
8062     }
8063 
8064   if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
8065     output_asm_insn ("nop", xoperands);
8066 
8067   /* We are done if there isn't a jump in the delay slot.  */
8068   if (seq_length == 0
8069       || delay_insn_deleted
8070       || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
8071     return "";
8072 
8073   /* A sibcall should never have a branch in the delay slot.  */
8074   gcc_assert (!sibcall);
8075 
8076   /* This call has an unconditional jump in its delay slot.  */
8077   xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
8078 
8079   if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
8080     {
8081       /* See if the return address can be adjusted.  Use the containing
8082          sequence insn's address.  This would break the regular call/return@
8083          relationship assumed by the table based eh unwinder, so only do that
8084          if the call is not possibly throwing.  */
8085       rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
8086       int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
8087 		      - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
8088 
8089       if (VAL_14_BITS_P (distance)
8090 	  && !(can_throw_internal (insn) || can_throw_external (insn)))
8091 	{
8092 	  xoperands[1] = gen_label_rtx ();
8093 	  output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
8094 	  targetm.asm_out.internal_label (asm_out_file, "L",
8095 					  CODE_LABEL_NUMBER (xoperands[1]));
8096 	}
8097       else
8098 	output_asm_insn ("nop\n\tb,n %0", xoperands);
8099     }
8100   else
8101     output_asm_insn ("b,n %0", xoperands);
8102 
8103   /* Delete the jump.  */
8104   SET_INSN_DELETED (NEXT_INSN (insn));
8105 
8106   return "";
8107 }
8108 
8109 /* Return the attribute length of the indirect call instruction INSN.
8110    The length must match the code generated by output_indirect call.
8111    The returned length includes the delay slot.  Currently, the delay
8112    slot of an indirect call sequence is not exposed and it is used by
8113    the sequence itself.  */
8114 
8115 int
8116 pa_attr_length_indirect_call (rtx insn)
8117 {
8118   unsigned long distance = -1;
8119   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8120 
8121   if (INSN_ADDRESSES_SET_P ())
8122     {
8123       distance = (total + insn_current_reference_address (insn));
8124       if (distance < total)
8125 	distance = -1;
8126     }
8127 
8128   if (TARGET_64BIT)
8129     return 12;
8130 
8131   if (TARGET_FAST_INDIRECT_CALLS
8132       || (!TARGET_LONG_CALLS
8133 	  && !TARGET_PORTABLE_RUNTIME
8134 	  && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8135 	      || distance < MAX_PCREL17F_OFFSET)))
8136     return 8;
8137 
8138   if (flag_pic)
8139     return 24;
8140 
8141   if (TARGET_PORTABLE_RUNTIME)
8142     return 20;
8143 
8144   /* Out of reach, can use ble.  */
8145   return 12;
8146 }
8147 
8148 const char *
8149 pa_output_indirect_call (rtx insn, rtx call_dest)
8150 {
8151   rtx xoperands[1];
8152 
8153   if (TARGET_64BIT)
8154     {
8155       xoperands[0] = call_dest;
8156       output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8157       output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8158       return "";
8159     }
8160 
8161   /* First the special case for kernels, level 0 systems, etc.  */
8162   if (TARGET_FAST_INDIRECT_CALLS)
8163     return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8164 
8165   /* Now the normal case -- we can reach $$dyncall directly or
8166      we're sure that we can get there via a long-branch stub.
8167 
8168      No need to check target flags as the length uniquely identifies
8169      the remaining cases.  */
8170   if (pa_attr_length_indirect_call (insn) == 8)
8171     {
8172       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8173 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8174 	 variant of the B,L instruction can't be used on the SOM target.  */
8175       if (TARGET_PA_20 && !TARGET_SOM)
8176 	return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8177       else
8178 	return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8179     }
8180 
8181   /* Long millicode call, but we are not generating PIC or portable runtime
8182      code.  */
8183   if (pa_attr_length_indirect_call (insn) == 12)
8184     return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8185 
8186   /* Long millicode call for portable runtime.  */
8187   if (pa_attr_length_indirect_call (insn) == 20)
8188     return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
8189 
8190   /* We need a long PIC call to $$dyncall.  */
8191   xoperands[0] = NULL_RTX;
8192   output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8193   if (TARGET_SOM || !TARGET_GAS)
8194     {
8195       xoperands[0] = gen_label_rtx ();
8196       output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
8197       targetm.asm_out.internal_label (asm_out_file, "L",
8198 				      CODE_LABEL_NUMBER (xoperands[0]));
8199       output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8200     }
8201   else
8202     {
8203       output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
8204       output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8205 		       xoperands);
8206     }
8207   output_asm_insn ("blr %%r0,%%r2", xoperands);
8208   output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
8209   return "";
8210 }
8211 
8212 /* In HPUX 8.0's shared library scheme, special relocations are needed
8213    for function labels if they might be passed to a function
8214    in a shared library (because shared libraries don't live in code
8215    space), and special magic is needed to construct their address.  */
8216 
8217 void
8218 pa_encode_label (rtx sym)
8219 {
8220   const char *str = XSTR (sym, 0);
8221   int len = strlen (str) + 1;
8222   char *newstr, *p;
8223 
8224   p = newstr = XALLOCAVEC (char, len + 1);
8225   *p++ = '@';
8226   strcpy (p, str);
8227 
8228   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8229 }
8230 
8231 static void
8232 pa_encode_section_info (tree decl, rtx rtl, int first)
8233 {
8234   int old_referenced = 0;
8235 
8236   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8237     old_referenced
8238       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8239 
8240   default_encode_section_info (decl, rtl, first);
8241 
8242   if (first && TEXT_SPACE_P (decl))
8243     {
8244       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8245       if (TREE_CODE (decl) == FUNCTION_DECL)
8246 	pa_encode_label (XEXP (rtl, 0));
8247     }
8248   else if (old_referenced)
8249     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8250 }
8251 
8252 /* This is sort of inverse to pa_encode_section_info.  */
8253 
8254 static const char *
8255 pa_strip_name_encoding (const char *str)
8256 {
8257   str += (*str == '@');
8258   str += (*str == '*');
8259   return str;
8260 }
8261 
8262 /* Returns 1 if OP is a function label involved in a simple addition
8263    with a constant.  Used to keep certain patterns from matching
8264    during instruction combination.  */
8265 int
8266 pa_is_function_label_plus_const (rtx op)
8267 {
8268   /* Strip off any CONST.  */
8269   if (GET_CODE (op) == CONST)
8270     op = XEXP (op, 0);
8271 
8272   return (GET_CODE (op) == PLUS
8273 	  && function_label_operand (XEXP (op, 0), VOIDmode)
8274 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8275 }
8276 
8277 /* Output assembly code for a thunk to FUNCTION.  */
8278 
8279 static void
8280 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8281 			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8282 			tree function)
8283 {
8284   static unsigned int current_thunk_number;
8285   int val_14 = VAL_14_BITS_P (delta);
8286   unsigned int old_last_address = last_address, nbytes = 0;
8287   char label[16];
8288   rtx xoperands[4];
8289 
8290   xoperands[0] = XEXP (DECL_RTL (function), 0);
8291   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8292   xoperands[2] = GEN_INT (delta);
8293 
8294   final_start_function (emit_barrier (), file, 1);
8295 
8296   /* Output the thunk.  We know that the function is in the same
8297      translation unit (i.e., the same space) as the thunk, and that
8298      thunks are output after their method.  Thus, we don't need an
8299      external branch to reach the function.  With SOM and GAS,
8300      functions and thunks are effectively in different sections.
8301      Thus, we can always use a IA-relative branch and the linker
8302      will add a long branch stub if necessary.
8303 
8304      However, we have to be careful when generating PIC code on the
8305      SOM port to ensure that the sequence does not transfer to an
8306      import stub for the target function as this could clobber the
8307      return value saved at SP-24.  This would also apply to the
8308      32-bit linux port if the multi-space model is implemented.  */
8309   if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8310        && !(flag_pic && TREE_PUBLIC (function))
8311        && (TARGET_GAS || last_address < 262132))
8312       || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8313 	  && ((targetm_common.have_named_sections
8314 	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
8315 	       /* The GNU 64-bit linker has rather poor stub management.
8316 		  So, we use a long branch from thunks that aren't in
8317 		  the same section as the target function.  */
8318 	       && ((!TARGET_64BIT
8319 		    && (DECL_SECTION_NAME (thunk_fndecl)
8320 			!= DECL_SECTION_NAME (function)))
8321 		   || ((DECL_SECTION_NAME (thunk_fndecl)
8322 			== DECL_SECTION_NAME (function))
8323 		       && last_address < 262132)))
8324 	      /* In this case, we need to be able to reach the start of
8325 		 the stub table even though the function is likely closer
8326 		 and can be jumped to directly.  */
8327 	      || (targetm_common.have_named_sections
8328 		  && DECL_SECTION_NAME (thunk_fndecl) == NULL
8329 		  && DECL_SECTION_NAME (function) == NULL
8330 		  && total_code_bytes < MAX_PCREL17F_OFFSET)
8331 	      /* Likewise.  */
8332 	      || (!targetm_common.have_named_sections
8333 		  && total_code_bytes < MAX_PCREL17F_OFFSET))))
8334     {
8335       if (!val_14)
8336 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8337 
8338       output_asm_insn ("b %0", xoperands);
8339 
8340       if (val_14)
8341 	{
8342 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8343 	  nbytes += 8;
8344 	}
8345       else
8346 	{
8347 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8348 	  nbytes += 12;
8349 	}
8350     }
8351   else if (TARGET_64BIT)
8352     {
8353       /* We only have one call-clobbered scratch register, so we can't
8354          make use of the delay slot if delta doesn't fit in 14 bits.  */
8355       if (!val_14)
8356 	{
8357 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8358 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8359 	}
8360 
8361       output_asm_insn ("b,l .+8,%%r1", xoperands);
8362 
8363       if (TARGET_GAS)
8364 	{
8365 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8366 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8367 	}
8368       else
8369 	{
8370 	  xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8371 	  output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8372 	}
8373 
8374       if (val_14)
8375 	{
8376 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
8377 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8378 	  nbytes += 20;
8379 	}
8380       else
8381 	{
8382 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8383 	  nbytes += 24;
8384 	}
8385     }
8386   else if (TARGET_PORTABLE_RUNTIME)
8387     {
8388       output_asm_insn ("ldil L'%0,%%r1", xoperands);
8389       output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8390 
8391       if (!val_14)
8392 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8393 
8394       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8395 
8396       if (val_14)
8397 	{
8398 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8399 	  nbytes += 16;
8400 	}
8401       else
8402 	{
8403 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8404 	  nbytes += 20;
8405 	}
8406     }
8407   else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8408     {
8409       /* The function is accessible from outside this module.  The only
8410 	 way to avoid an import stub between the thunk and function is to
8411 	 call the function directly with an indirect sequence similar to
8412 	 that used by $$dyncall.  This is possible because $$dyncall acts
8413 	 as the import stub in an indirect call.  */
8414       ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8415       xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8416       output_asm_insn ("addil LT'%3,%%r19", xoperands);
8417       output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8418       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8419       output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8420       output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8421       output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8422       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8423 
8424       if (!val_14)
8425 	{
8426 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8427 	  nbytes += 4;
8428 	}
8429 
8430       if (TARGET_PA_20)
8431 	{
8432 	  output_asm_insn ("bve (%%r22)", xoperands);
8433 	  nbytes += 36;
8434 	}
8435       else if (TARGET_NO_SPACE_REGS)
8436 	{
8437 	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8438 	  nbytes += 36;
8439 	}
8440       else
8441 	{
8442 	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8443 	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8444 	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8445 	  nbytes += 44;
8446 	}
8447 
8448       if (val_14)
8449 	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8450       else
8451 	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8452     }
8453   else if (flag_pic)
8454     {
8455       output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8456 
8457       if (TARGET_SOM || !TARGET_GAS)
8458 	{
8459 	  output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8460 	  output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8461 	}
8462       else
8463 	{
8464 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8465 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8466 	}
8467 
8468       if (!val_14)
8469 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8470 
8471       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8472 
8473       if (val_14)
8474 	{
8475 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8476 	  nbytes += 20;
8477 	}
8478       else
8479 	{
8480 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8481 	  nbytes += 24;
8482 	}
8483     }
8484   else
8485     {
8486       if (!val_14)
8487 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8488 
8489       output_asm_insn ("ldil L'%0,%%r22", xoperands);
8490       output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8491 
8492       if (val_14)
8493 	{
8494 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8495 	  nbytes += 12;
8496 	}
8497       else
8498 	{
8499 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8500 	  nbytes += 16;
8501 	}
8502     }
8503 
8504   final_end_function ();
8505 
8506   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8507     {
8508       switch_to_section (data_section);
8509       output_asm_insn (".align 4", xoperands);
8510       ASM_OUTPUT_LABEL (file, label);
8511       output_asm_insn (".word P'%0", xoperands);
8512     }
8513 
8514   current_thunk_number++;
8515   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8516 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8517   last_address += nbytes;
8518   if (old_last_address > last_address)
8519     last_address = UINT_MAX;
8520   update_total_code_bytes (nbytes);
8521 }
8522 
8523 /* Only direct calls to static functions are allowed to be sibling (tail)
8524    call optimized.
8525 
8526    This restriction is necessary because some linker generated stubs will
8527    store return pointers into rp' in some cases which might clobber a
8528    live value already in rp'.
8529 
8530    In a sibcall the current function and the target function share stack
8531    space.  Thus if the path to the current function and the path to the
8532    target function save a value in rp', they save the value into the
8533    same stack slot, which has undesirable consequences.
8534 
8535    Because of the deferred binding nature of shared libraries any function
8536    with external scope could be in a different load module and thus require
8537    rp' to be saved when calling that function.  So sibcall optimizations
8538    can only be safe for static function.
8539 
8540    Note that GCC never needs return value relocations, so we don't have to
8541    worry about static calls with return value relocations (which require
8542    saving rp').
8543 
8544    It is safe to perform a sibcall optimization when the target function
8545    will never return.  */
8546 static bool
8547 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8548 {
8549   if (TARGET_PORTABLE_RUNTIME)
8550     return false;
8551 
8552   /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8553      single subspace mode and the call is not indirect.  As far as I know,
8554      there is no operating system support for the multiple subspace mode.
8555      It might be possible to support indirect calls if we didn't use
8556      $$dyncall (see the indirect sequence generated in pa_output_call).  */
8557   if (TARGET_ELF32)
8558     return (decl != NULL_TREE);
8559 
8560   /* Sibcalls are not ok because the arg pointer register is not a fixed
8561      register.  This prevents the sibcall optimization from occurring.  In
8562      addition, there are problems with stub placement using GNU ld.  This
8563      is because a normal sibcall branch uses a 17-bit relocation while
8564      a regular call branch uses a 22-bit relocation.  As a result, more
8565      care needs to be taken in the placement of long-branch stubs.  */
8566   if (TARGET_64BIT)
8567     return false;
8568 
8569   /* Sibcalls are only ok within a translation unit.  */
8570   return (decl && !TREE_PUBLIC (decl));
8571 }
8572 
8573 /* ??? Addition is not commutative on the PA due to the weird implicit
8574    space register selection rules for memory addresses.  Therefore, we
8575    don't consider a + b == b + a, as this might be inside a MEM.  */
8576 static bool
8577 pa_commutative_p (const_rtx x, int outer_code)
8578 {
8579   return (COMMUTATIVE_P (x)
8580 	  && (TARGET_NO_SPACE_REGS
8581 	      || (outer_code != UNKNOWN && outer_code != MEM)
8582 	      || GET_CODE (x) != PLUS));
8583 }
8584 
8585 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8586    use in fmpyadd instructions.  */
8587 int
8588 pa_fmpyaddoperands (rtx *operands)
8589 {
8590   enum machine_mode mode = GET_MODE (operands[0]);
8591 
8592   /* Must be a floating point mode.  */
8593   if (mode != SFmode && mode != DFmode)
8594     return 0;
8595 
8596   /* All modes must be the same.  */
8597   if (! (mode == GET_MODE (operands[1])
8598 	 && mode == GET_MODE (operands[2])
8599 	 && mode == GET_MODE (operands[3])
8600 	 && mode == GET_MODE (operands[4])
8601 	 && mode == GET_MODE (operands[5])))
8602     return 0;
8603 
8604   /* All operands must be registers.  */
8605   if (! (GET_CODE (operands[1]) == REG
8606 	 && GET_CODE (operands[2]) == REG
8607 	 && GET_CODE (operands[3]) == REG
8608 	 && GET_CODE (operands[4]) == REG
8609 	 && GET_CODE (operands[5]) == REG))
8610     return 0;
8611 
8612   /* Only 2 real operands to the addition.  One of the input operands must
8613      be the same as the output operand.  */
8614   if (! rtx_equal_p (operands[3], operands[4])
8615       && ! rtx_equal_p (operands[3], operands[5]))
8616     return 0;
8617 
8618   /* Inout operand of add cannot conflict with any operands from multiply.  */
8619   if (rtx_equal_p (operands[3], operands[0])
8620      || rtx_equal_p (operands[3], operands[1])
8621      || rtx_equal_p (operands[3], operands[2]))
8622     return 0;
8623 
8624   /* multiply cannot feed into addition operands.  */
8625   if (rtx_equal_p (operands[4], operands[0])
8626       || rtx_equal_p (operands[5], operands[0]))
8627     return 0;
8628 
8629   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8630   if (mode == SFmode
8631       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8632 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8633 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8634 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8635 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8636 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8637     return 0;
8638 
8639   /* Passed.  Operands are suitable for fmpyadd.  */
8640   return 1;
8641 }
8642 
8643 #if !defined(USE_COLLECT2)
8644 static void
8645 pa_asm_out_constructor (rtx symbol, int priority)
8646 {
8647   if (!function_label_operand (symbol, VOIDmode))
8648     pa_encode_label (symbol);
8649 
8650 #ifdef CTORS_SECTION_ASM_OP
8651   default_ctor_section_asm_out_constructor (symbol, priority);
8652 #else
8653 # ifdef TARGET_ASM_NAMED_SECTION
8654   default_named_section_asm_out_constructor (symbol, priority);
8655 # else
8656   default_stabs_asm_out_constructor (symbol, priority);
8657 # endif
8658 #endif
8659 }
8660 
8661 static void
8662 pa_asm_out_destructor (rtx symbol, int priority)
8663 {
8664   if (!function_label_operand (symbol, VOIDmode))
8665     pa_encode_label (symbol);
8666 
8667 #ifdef DTORS_SECTION_ASM_OP
8668   default_dtor_section_asm_out_destructor (symbol, priority);
8669 #else
8670 # ifdef TARGET_ASM_NAMED_SECTION
8671   default_named_section_asm_out_destructor (symbol, priority);
8672 # else
8673   default_stabs_asm_out_destructor (symbol, priority);
8674 # endif
8675 #endif
8676 }
8677 #endif
8678 
8679 /* This function places uninitialized global data in the bss section.
8680    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8681    function on the SOM port to prevent uninitialized global data from
8682    being placed in the data section.  */
8683 
8684 void
8685 pa_asm_output_aligned_bss (FILE *stream,
8686 			   const char *name,
8687 			   unsigned HOST_WIDE_INT size,
8688 			   unsigned int align)
8689 {
8690   switch_to_section (bss_section);
8691   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8692 
8693 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8694   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8695 #endif
8696 
8697 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8698   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8699 #endif
8700 
8701   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8702   ASM_OUTPUT_LABEL (stream, name);
8703   fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8704 }
8705 
8706 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8707    that doesn't allow the alignment of global common storage to be directly
8708    specified.  The SOM linker aligns common storage based on the rounded
8709    value of the NUM_BYTES parameter in the .comm directive.  It's not
8710    possible to use the .align directive as it doesn't affect the alignment
8711    of the label associated with a .comm directive.  */
8712 
8713 void
8714 pa_asm_output_aligned_common (FILE *stream,
8715 			      const char *name,
8716 			      unsigned HOST_WIDE_INT size,
8717 			      unsigned int align)
8718 {
8719   unsigned int max_common_align;
8720 
8721   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8722   if (align > max_common_align)
8723     {
8724       warning (0, "alignment (%u) for %s exceeds maximum alignment "
8725 	       "for global common data.  Using %u",
8726 	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8727       align = max_common_align;
8728     }
8729 
8730   switch_to_section (bss_section);
8731 
8732   assemble_name (stream, name);
8733   fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8734            MAX (size, align / BITS_PER_UNIT));
8735 }
8736 
8737 /* We can't use .comm for local common storage as the SOM linker effectively
8738    treats the symbol as universal and uses the same storage for local symbols
8739    with the same name in different object files.  The .block directive
8740    reserves an uninitialized block of storage.  However, it's not common
8741    storage.  Fortunately, GCC never requests common storage with the same
8742    name in any given translation unit.  */
8743 
8744 void
8745 pa_asm_output_aligned_local (FILE *stream,
8746 			     const char *name,
8747 			     unsigned HOST_WIDE_INT size,
8748 			     unsigned int align)
8749 {
8750   switch_to_section (bss_section);
8751   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8752 
8753 #ifdef LOCAL_ASM_OP
8754   fprintf (stream, "%s", LOCAL_ASM_OP);
8755   assemble_name (stream, name);
8756   fprintf (stream, "\n");
8757 #endif
8758 
8759   ASM_OUTPUT_LABEL (stream, name);
8760   fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8761 }
8762 
8763 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8764    use in fmpysub instructions.  */
8765 int
8766 pa_fmpysuboperands (rtx *operands)
8767 {
8768   enum machine_mode mode = GET_MODE (operands[0]);
8769 
8770   /* Must be a floating point mode.  */
8771   if (mode != SFmode && mode != DFmode)
8772     return 0;
8773 
8774   /* All modes must be the same.  */
8775   if (! (mode == GET_MODE (operands[1])
8776 	 && mode == GET_MODE (operands[2])
8777 	 && mode == GET_MODE (operands[3])
8778 	 && mode == GET_MODE (operands[4])
8779 	 && mode == GET_MODE (operands[5])))
8780     return 0;
8781 
8782   /* All operands must be registers.  */
8783   if (! (GET_CODE (operands[1]) == REG
8784 	 && GET_CODE (operands[2]) == REG
8785 	 && GET_CODE (operands[3]) == REG
8786 	 && GET_CODE (operands[4]) == REG
8787 	 && GET_CODE (operands[5]) == REG))
8788     return 0;
8789 
8790   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8791      operation, so operands[4] must be the same as operand[3].  */
8792   if (! rtx_equal_p (operands[3], operands[4]))
8793     return 0;
8794 
8795   /* multiply cannot feed into subtraction.  */
8796   if (rtx_equal_p (operands[5], operands[0]))
8797     return 0;
8798 
8799   /* Inout operand of sub cannot conflict with any operands from multiply.  */
8800   if (rtx_equal_p (operands[3], operands[0])
8801      || rtx_equal_p (operands[3], operands[1])
8802      || rtx_equal_p (operands[3], operands[2]))
8803     return 0;
8804 
8805   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8806   if (mode == SFmode
8807       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8808 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8809 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8810 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8811 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8812 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8813     return 0;
8814 
8815   /* Passed.  Operands are suitable for fmpysub.  */
8816   return 1;
8817 }
8818 
8819 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8820    constants for shadd instructions.  */
8821 int
8822 pa_shadd_constant_p (int val)
8823 {
8824   if (val == 2 || val == 4 || val == 8)
8825     return 1;
8826   else
8827     return 0;
8828 }
8829 
8830 /* Return TRUE if INSN branches forward.  */
8831 
8832 static bool
8833 forward_branch_p (rtx insn)
8834 {
8835   rtx lab = JUMP_LABEL (insn);
8836 
8837   /* The INSN must have a jump label.  */
8838   gcc_assert (lab != NULL_RTX);
8839 
8840   if (INSN_ADDRESSES_SET_P ())
8841     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8842 
8843   while (insn)
8844     {
8845       if (insn == lab)
8846 	return true;
8847       else
8848 	insn = NEXT_INSN (insn);
8849     }
8850 
8851   return false;
8852 }
8853 
8854 /* Return 1 if INSN is in the delay slot of a call instruction.  */
8855 int
8856 pa_jump_in_call_delay (rtx insn)
8857 {
8858 
8859   if (GET_CODE (insn) != JUMP_INSN)
8860     return 0;
8861 
8862   if (PREV_INSN (insn)
8863       && PREV_INSN (PREV_INSN (insn))
8864       && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8865     {
8866       rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8867 
8868       return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8869 	      && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8870 
8871     }
8872   else
8873     return 0;
8874 }
8875 
8876 /* Output an unconditional move and branch insn.  */
8877 
8878 const char *
8879 pa_output_parallel_movb (rtx *operands, rtx insn)
8880 {
8881   int length = get_attr_length (insn);
8882 
8883   /* These are the cases in which we win.  */
8884   if (length == 4)
8885     return "mov%I1b,tr %1,%0,%2";
8886 
8887   /* None of the following cases win, but they don't lose either.  */
8888   if (length == 8)
8889     {
8890       if (dbr_sequence_length () == 0)
8891 	{
8892 	  /* Nothing in the delay slot, fake it by putting the combined
8893 	     insn (the copy or add) in the delay slot of a bl.  */
8894 	  if (GET_CODE (operands[1]) == CONST_INT)
8895 	    return "b %2\n\tldi %1,%0";
8896 	  else
8897 	    return "b %2\n\tcopy %1,%0";
8898 	}
8899       else
8900 	{
8901 	  /* Something in the delay slot, but we've got a long branch.  */
8902 	  if (GET_CODE (operands[1]) == CONST_INT)
8903 	    return "ldi %1,%0\n\tb %2";
8904 	  else
8905 	    return "copy %1,%0\n\tb %2";
8906 	}
8907     }
8908 
8909   if (GET_CODE (operands[1]) == CONST_INT)
8910     output_asm_insn ("ldi %1,%0", operands);
8911   else
8912     output_asm_insn ("copy %1,%0", operands);
8913   return pa_output_lbranch (operands[2], insn, 1);
8914 }
8915 
8916 /* Output an unconditional add and branch insn.  */
8917 
8918 const char *
8919 pa_output_parallel_addb (rtx *operands, rtx insn)
8920 {
8921   int length = get_attr_length (insn);
8922 
8923   /* To make life easy we want operand0 to be the shared input/output
8924      operand and operand1 to be the readonly operand.  */
8925   if (operands[0] == operands[1])
8926     operands[1] = operands[2];
8927 
8928   /* These are the cases in which we win.  */
8929   if (length == 4)
8930     return "add%I1b,tr %1,%0,%3";
8931 
8932   /* None of the following cases win, but they don't lose either.  */
8933   if (length == 8)
8934     {
8935       if (dbr_sequence_length () == 0)
8936 	/* Nothing in the delay slot, fake it by putting the combined
8937 	   insn (the copy or add) in the delay slot of a bl.  */
8938 	return "b %3\n\tadd%I1 %1,%0,%0";
8939       else
8940 	/* Something in the delay slot, but we've got a long branch.  */
8941 	return "add%I1 %1,%0,%0\n\tb %3";
8942     }
8943 
8944   output_asm_insn ("add%I1 %1,%0,%0", operands);
8945   return pa_output_lbranch (operands[3], insn, 1);
8946 }
8947 
8948 /* Return nonzero if INSN (a jump insn) immediately follows a call
8949    to a named function.  This is used to avoid filling the delay slot
8950    of the jump since it can usually be eliminated by modifying RP in
8951    the delay slot of the call.  */
8952 
8953 int
8954 pa_following_call (rtx insn)
8955 {
8956   if (! TARGET_JUMP_IN_DELAY)
8957     return 0;
8958 
8959   /* Find the previous real insn, skipping NOTEs.  */
8960   insn = PREV_INSN (insn);
8961   while (insn && GET_CODE (insn) == NOTE)
8962     insn = PREV_INSN (insn);
8963 
8964   /* Check for CALL_INSNs and millicode calls.  */
8965   if (insn
8966       && ((GET_CODE (insn) == CALL_INSN
8967 	   && get_attr_type (insn) != TYPE_DYNCALL)
8968 	  || (GET_CODE (insn) == INSN
8969 	      && GET_CODE (PATTERN (insn)) != SEQUENCE
8970 	      && GET_CODE (PATTERN (insn)) != USE
8971 	      && GET_CODE (PATTERN (insn)) != CLOBBER
8972 	      && get_attr_type (insn) == TYPE_MILLI)))
8973     return 1;
8974 
8975   return 0;
8976 }
8977 
8978 /* We use this hook to perform a PA specific optimization which is difficult
8979    to do in earlier passes.
8980 
8981    We want the delay slots of branches within jump tables to be filled.
8982    None of the compiler passes at the moment even has the notion that a
8983    PA jump table doesn't contain addresses, but instead contains actual
8984    instructions!
8985 
8986    Because we actually jump into the table, the addresses of each entry
8987    must stay constant in relation to the beginning of the table (which
8988    itself must stay constant relative to the instruction to jump into
8989    it).  I don't believe we can guarantee earlier passes of the compiler
8990    will adhere to those rules.
8991 
8992    So, late in the compilation process we find all the jump tables, and
8993    expand them into real code -- e.g. each entry in the jump table vector
8994    will get an appropriate label followed by a jump to the final target.
8995 
8996    Reorg and the final jump pass can then optimize these branches and
8997    fill their delay slots.  We end up with smaller, more efficient code.
8998 
8999    The jump instructions within the table are special; we must be able
9000    to identify them during assembly output (if the jumps don't get filled
9001    we need to emit a nop rather than nullifying the delay slot)).  We
9002    identify jumps in switch tables by using insns with the attribute
9003    type TYPE_BTABLE_BRANCH.
9004 
9005    We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
9006    insns.  This serves two purposes, first it prevents jump.c from
9007    noticing that the last N entries in the table jump to the instruction
9008    immediately after the table and deleting the jumps.  Second, those
9009    insns mark where we should emit .begin_brtab and .end_brtab directives
9010    when using GAS (allows for better link time optimizations).  */
9011 
9012 static void
9013 pa_reorg (void)
9014 {
9015   rtx insn;
9016 
9017   remove_useless_addtr_insns (1);
9018 
9019   if (pa_cpu < PROCESSOR_8000)
9020     pa_combine_instructions ();
9021 
9022 
9023   /* This is fairly cheap, so always run it if optimizing.  */
9024   if (optimize > 0 && !TARGET_BIG_SWITCH)
9025     {
9026       /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns.  */
9027       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9028 	{
9029 	  rtx pattern, tmp, location, label;
9030 	  unsigned int length, i;
9031 
9032 	  /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode.  */
9033 	  if (GET_CODE (insn) != JUMP_INSN
9034 	      || (GET_CODE (PATTERN (insn)) != ADDR_VEC
9035 		  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
9036 	    continue;
9037 
9038 	  /* Emit marker for the beginning of the branch table.  */
9039 	  emit_insn_before (gen_begin_brtab (), insn);
9040 
9041 	  pattern = PATTERN (insn);
9042 	  location = PREV_INSN (insn);
9043           length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
9044 
9045 	  for (i = 0; i < length; i++)
9046 	    {
9047 	      /* Emit a label before each jump to keep jump.c from
9048 		 removing this code.  */
9049 	      tmp = gen_label_rtx ();
9050 	      LABEL_NUSES (tmp) = 1;
9051 	      emit_label_after (tmp, location);
9052 	      location = NEXT_INSN (location);
9053 
9054 	      if (GET_CODE (pattern) == ADDR_VEC)
9055 		label = XEXP (XVECEXP (pattern, 0, i), 0);
9056 	      else
9057 		label = XEXP (XVECEXP (pattern, 1, i), 0);
9058 
9059 	      tmp = gen_short_jump (label);
9060 
9061 	      /* Emit the jump itself.  */
9062 	      tmp = emit_jump_insn_after (tmp, location);
9063 	      JUMP_LABEL (tmp) = label;
9064 	      LABEL_NUSES (label)++;
9065 	      location = NEXT_INSN (location);
9066 
9067 	      /* Emit a BARRIER after the jump.  */
9068 	      emit_barrier_after (location);
9069 	      location = NEXT_INSN (location);
9070 	    }
9071 
9072 	  /* Emit marker for the end of the branch table.  */
9073 	  emit_insn_before (gen_end_brtab (), location);
9074 	  location = NEXT_INSN (location);
9075 	  emit_barrier_after (location);
9076 
9077 	  /* Delete the ADDR_VEC or ADDR_DIFF_VEC.  */
9078 	  delete_insn (insn);
9079 	}
9080     }
9081   else
9082     {
9083       /* Still need brtab marker insns.  FIXME: the presence of these
9084 	 markers disables output of the branch table to readonly memory,
9085 	 and any alignment directives that might be needed.  Possibly,
9086 	 the begin_brtab insn should be output before the label for the
9087 	 table.  This doesn't matter at the moment since the tables are
9088 	 always output in the text section.  */
9089       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9090 	{
9091 	  /* Find an ADDR_VEC insn.  */
9092 	  if (GET_CODE (insn) != JUMP_INSN
9093 	      || (GET_CODE (PATTERN (insn)) != ADDR_VEC
9094 		  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
9095 	    continue;
9096 
9097 	  /* Now generate markers for the beginning and end of the
9098 	     branch table.  */
9099 	  emit_insn_before (gen_begin_brtab (), insn);
9100 	  emit_insn_after (gen_end_brtab (), insn);
9101 	}
9102     }
9103 }
9104 
9105 /* The PA has a number of odd instructions which can perform multiple
9106    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
9107    it may be profitable to combine two instructions into one instruction
9108    with two outputs.  It's not profitable PA2.0 machines because the
9109    two outputs would take two slots in the reorder buffers.
9110 
9111    This routine finds instructions which can be combined and combines
9112    them.  We only support some of the potential combinations, and we
9113    only try common ways to find suitable instructions.
9114 
9115       * addb can add two registers or a register and a small integer
9116       and jump to a nearby (+-8k) location.  Normally the jump to the
9117       nearby location is conditional on the result of the add, but by
9118       using the "true" condition we can make the jump unconditional.
9119       Thus addb can perform two independent operations in one insn.
9120 
9121       * movb is similar to addb in that it can perform a reg->reg
9122       or small immediate->reg copy and jump to a nearby (+-8k location).
9123 
9124       * fmpyadd and fmpysub can perform a FP multiply and either an
9125       FP add or FP sub if the operands of the multiply and add/sub are
9126       independent (there are other minor restrictions).  Note both
9127       the fmpy and fadd/fsub can in theory move to better spots according
9128       to data dependencies, but for now we require the fmpy stay at a
9129       fixed location.
9130 
9131       * Many of the memory operations can perform pre & post updates
9132       of index registers.  GCC's pre/post increment/decrement addressing
9133       is far too simple to take advantage of all the possibilities.  This
9134       pass may not be suitable since those insns may not be independent.
9135 
9136       * comclr can compare two ints or an int and a register, nullify
9137       the following instruction and zero some other register.  This
9138       is more difficult to use as it's harder to find an insn which
9139       will generate a comclr than finding something like an unconditional
9140       branch.  (conditional moves & long branches create comclr insns).
9141 
9142       * Most arithmetic operations can conditionally skip the next
9143       instruction.  They can be viewed as "perform this operation
9144       and conditionally jump to this nearby location" (where nearby
9145       is an insns away).  These are difficult to use due to the
9146       branch length restrictions.  */
9147 
9148 static void
9149 pa_combine_instructions (void)
9150 {
9151   rtx anchor, new_rtx;
9152 
9153   /* This can get expensive since the basic algorithm is on the
9154      order of O(n^2) (or worse).  Only do it for -O2 or higher
9155      levels of optimization.  */
9156   if (optimize < 2)
9157     return;
9158 
9159   /* Walk down the list of insns looking for "anchor" insns which
9160      may be combined with "floating" insns.  As the name implies,
9161      "anchor" instructions don't move, while "floating" insns may
9162      move around.  */
9163   new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9164   new_rtx = make_insn_raw (new_rtx);
9165 
9166   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9167     {
9168       enum attr_pa_combine_type anchor_attr;
9169       enum attr_pa_combine_type floater_attr;
9170 
9171       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9172 	 Also ignore any special USE insns.  */
9173       if ((GET_CODE (anchor) != INSN
9174 	  && GET_CODE (anchor) != JUMP_INSN
9175 	  && GET_CODE (anchor) != CALL_INSN)
9176 	  || GET_CODE (PATTERN (anchor)) == USE
9177 	  || GET_CODE (PATTERN (anchor)) == CLOBBER
9178 	  || GET_CODE (PATTERN (anchor)) == ADDR_VEC
9179 	  || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
9180 	continue;
9181 
9182       anchor_attr = get_attr_pa_combine_type (anchor);
9183       /* See if anchor is an insn suitable for combination.  */
9184       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9185 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9186 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9187 	      && ! forward_branch_p (anchor)))
9188 	{
9189 	  rtx floater;
9190 
9191 	  for (floater = PREV_INSN (anchor);
9192 	       floater;
9193 	       floater = PREV_INSN (floater))
9194 	    {
9195 	      if (GET_CODE (floater) == NOTE
9196 		  || (GET_CODE (floater) == INSN
9197 		      && (GET_CODE (PATTERN (floater)) == USE
9198 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
9199 		continue;
9200 
9201 	      /* Anything except a regular INSN will stop our search.  */
9202 	      if (GET_CODE (floater) != INSN
9203 		  || GET_CODE (PATTERN (floater)) == ADDR_VEC
9204 		  || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9205 		{
9206 		  floater = NULL_RTX;
9207 		  break;
9208 		}
9209 
9210 	      /* See if FLOATER is suitable for combination with the
9211 		 anchor.  */
9212 	      floater_attr = get_attr_pa_combine_type (floater);
9213 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9214 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9215 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9216 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9217 		{
9218 		  /* If ANCHOR and FLOATER can be combined, then we're
9219 		     done with this pass.  */
9220 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9221 					SET_DEST (PATTERN (floater)),
9222 					XEXP (SET_SRC (PATTERN (floater)), 0),
9223 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9224 		    break;
9225 		}
9226 
9227 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9228 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9229 		{
9230 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9231 		    {
9232 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9233 					    SET_DEST (PATTERN (floater)),
9234 					XEXP (SET_SRC (PATTERN (floater)), 0),
9235 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9236 			break;
9237 		    }
9238 		  else
9239 		    {
9240 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9241 					    SET_DEST (PATTERN (floater)),
9242 					    SET_SRC (PATTERN (floater)),
9243 					    SET_SRC (PATTERN (floater))))
9244 			break;
9245 		    }
9246 		}
9247 	    }
9248 
9249 	  /* If we didn't find anything on the backwards scan try forwards.  */
9250 	  if (!floater
9251 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9252 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9253 	    {
9254 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9255 		{
9256 		  if (GET_CODE (floater) == NOTE
9257 		      || (GET_CODE (floater) == INSN
9258 			  && (GET_CODE (PATTERN (floater)) == USE
9259 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9260 
9261 		    continue;
9262 
9263 		  /* Anything except a regular INSN will stop our search.  */
9264 		  if (GET_CODE (floater) != INSN
9265 		      || GET_CODE (PATTERN (floater)) == ADDR_VEC
9266 		      || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9267 		    {
9268 		      floater = NULL_RTX;
9269 		      break;
9270 		    }
9271 
9272 		  /* See if FLOATER is suitable for combination with the
9273 		     anchor.  */
9274 		  floater_attr = get_attr_pa_combine_type (floater);
9275 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9276 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9277 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9278 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9279 		    {
9280 		      /* If ANCHOR and FLOATER can be combined, then we're
9281 			 done with this pass.  */
9282 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9283 					    SET_DEST (PATTERN (floater)),
9284 					    XEXP (SET_SRC (PATTERN (floater)),
9285 						  0),
9286 					    XEXP (SET_SRC (PATTERN (floater)),
9287 						  1)))
9288 			break;
9289 		    }
9290 		}
9291 	    }
9292 
9293 	  /* FLOATER will be nonzero if we found a suitable floating
9294 	     insn for combination with ANCHOR.  */
9295 	  if (floater
9296 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9297 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9298 	    {
9299 	      /* Emit the new instruction and delete the old anchor.  */
9300 	      emit_insn_before (gen_rtx_PARALLEL
9301 				(VOIDmode,
9302 				 gen_rtvec (2, PATTERN (anchor),
9303 					    PATTERN (floater))),
9304 				anchor);
9305 
9306 	      SET_INSN_DELETED (anchor);
9307 
9308 	      /* Emit a special USE insn for FLOATER, then delete
9309 		 the floating insn.  */
9310 	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9311 	      delete_insn (floater);
9312 
9313 	      continue;
9314 	    }
9315 	  else if (floater
9316 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9317 	    {
9318 	      rtx temp;
9319 	      /* Emit the new_jump instruction and delete the old anchor.  */
9320 	      temp
9321 		= emit_jump_insn_before (gen_rtx_PARALLEL
9322 					 (VOIDmode,
9323 					  gen_rtvec (2, PATTERN (anchor),
9324 						     PATTERN (floater))),
9325 					 anchor);
9326 
9327 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9328 	      SET_INSN_DELETED (anchor);
9329 
9330 	      /* Emit a special USE insn for FLOATER, then delete
9331 		 the floating insn.  */
9332 	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9333 	      delete_insn (floater);
9334 	      continue;
9335 	    }
9336 	}
9337     }
9338 }
9339 
9340 static int
9341 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9342 		  rtx src1, rtx src2)
9343 {
9344   int insn_code_number;
9345   rtx start, end;
9346 
9347   /* Create a PARALLEL with the patterns of ANCHOR and
9348      FLOATER, try to recognize it, then test constraints
9349      for the resulting pattern.
9350 
9351      If the pattern doesn't match or the constraints
9352      aren't met keep searching for a suitable floater
9353      insn.  */
9354   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9355   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9356   INSN_CODE (new_rtx) = -1;
9357   insn_code_number = recog_memoized (new_rtx);
9358   if (insn_code_number < 0
9359       || (extract_insn (new_rtx), ! constrain_operands (1)))
9360     return 0;
9361 
9362   if (reversed)
9363     {
9364       start = anchor;
9365       end = floater;
9366     }
9367   else
9368     {
9369       start = floater;
9370       end = anchor;
9371     }
9372 
9373   /* There's up to three operands to consider.  One
9374      output and two inputs.
9375 
9376      The output must not be used between FLOATER & ANCHOR
9377      exclusive.  The inputs must not be set between
9378      FLOATER and ANCHOR exclusive.  */
9379 
9380   if (reg_used_between_p (dest, start, end))
9381     return 0;
9382 
9383   if (reg_set_between_p (src1, start, end))
9384     return 0;
9385 
9386   if (reg_set_between_p (src2, start, end))
9387     return 0;
9388 
9389   /* If we get here, then everything is good.  */
9390   return 1;
9391 }
9392 
9393 /* Return nonzero if references for INSN are delayed.
9394 
9395    Millicode insns are actually function calls with some special
9396    constraints on arguments and register usage.
9397 
9398    Millicode calls always expect their arguments in the integer argument
9399    registers, and always return their result in %r29 (ret1).  They
9400    are expected to clobber their arguments, %r1, %r29, and the return
9401    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9402 
9403    This function tells reorg that the references to arguments and
9404    millicode calls do not appear to happen until after the millicode call.
9405    This allows reorg to put insns which set the argument registers into the
9406    delay slot of the millicode call -- thus they act more like traditional
9407    CALL_INSNs.
9408 
9409    Note we cannot consider side effects of the insn to be delayed because
9410    the branch and link insn will clobber the return pointer.  If we happened
9411    to use the return pointer in the delay slot of the call, then we lose.
9412 
9413    get_attr_type will try to recognize the given insn, so make sure to
9414    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9415    in particular.  */
9416 int
9417 pa_insn_refs_are_delayed (rtx insn)
9418 {
9419   return ((GET_CODE (insn) == INSN
9420 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9421 	   && GET_CODE (PATTERN (insn)) != USE
9422 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9423 	   && get_attr_type (insn) == TYPE_MILLI));
9424 }
9425 
9426 /* Promote the return value, but not the arguments.  */
9427 
9428 static enum machine_mode
9429 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9430                           enum machine_mode mode,
9431                           int *punsignedp ATTRIBUTE_UNUSED,
9432                           const_tree fntype ATTRIBUTE_UNUSED,
9433                           int for_return)
9434 {
9435   if (for_return == 0)
9436     return mode;
9437   return promote_mode (type, mode, punsignedp);
9438 }
9439 
9440 /* On the HP-PA the value is found in register(s) 28(-29), unless
9441    the mode is SF or DF. Then the value is returned in fr4 (32).
9442 
9443    This must perform the same promotions as PROMOTE_MODE, else promoting
9444    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9445 
9446    Small structures must be returned in a PARALLEL on PA64 in order
9447    to match the HP Compiler ABI.  */
9448 
9449 static rtx
9450 pa_function_value (const_tree valtype,
9451                    const_tree func ATTRIBUTE_UNUSED,
9452                    bool outgoing ATTRIBUTE_UNUSED)
9453 {
9454   enum machine_mode valmode;
9455 
9456   if (AGGREGATE_TYPE_P (valtype)
9457       || TREE_CODE (valtype) == COMPLEX_TYPE
9458       || TREE_CODE (valtype) == VECTOR_TYPE)
9459     {
9460       if (TARGET_64BIT)
9461 	{
9462           /* Aggregates with a size less than or equal to 128 bits are
9463 	     returned in GR 28(-29).  They are left justified.  The pad
9464 	     bits are undefined.  Larger aggregates are returned in
9465 	     memory.  */
9466 	  rtx loc[2];
9467 	  int i, offset = 0;
9468 	  int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9469 
9470 	  for (i = 0; i < ub; i++)
9471 	    {
9472 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9473 					  gen_rtx_REG (DImode, 28 + i),
9474 					  GEN_INT (offset));
9475 	      offset += 8;
9476 	    }
9477 
9478 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9479 	}
9480       else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9481 	{
9482 	  /* Aggregates 5 to 8 bytes in size are returned in general
9483 	     registers r28-r29 in the same manner as other non
9484 	     floating-point objects.  The data is right-justified and
9485 	     zero-extended to 64 bits.  This is opposite to the normal
9486 	     justification used on big endian targets and requires
9487 	     special treatment.  */
9488 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9489 				       gen_rtx_REG (DImode, 28), const0_rtx);
9490 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9491 	}
9492     }
9493 
9494   if ((INTEGRAL_TYPE_P (valtype)
9495        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9496       || POINTER_TYPE_P (valtype))
9497     valmode = word_mode;
9498   else
9499     valmode = TYPE_MODE (valtype);
9500 
9501   if (TREE_CODE (valtype) == REAL_TYPE
9502       && !AGGREGATE_TYPE_P (valtype)
9503       && TYPE_MODE (valtype) != TFmode
9504       && !TARGET_SOFT_FLOAT)
9505     return gen_rtx_REG (valmode, 32);
9506 
9507   return gen_rtx_REG (valmode, 28);
9508 }
9509 
9510 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9511 
9512 static rtx
9513 pa_libcall_value (enum machine_mode mode,
9514 		  const_rtx fun ATTRIBUTE_UNUSED)
9515 {
9516   if (! TARGET_SOFT_FLOAT
9517       && (mode == SFmode || mode == DFmode))
9518     return  gen_rtx_REG (mode, 32);
9519   else
9520     return  gen_rtx_REG (mode, 28);
9521 }
9522 
9523 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9524 
9525 static bool
9526 pa_function_value_regno_p (const unsigned int regno)
9527 {
9528   if (regno == 28
9529       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9530     return true;
9531 
9532   return false;
9533 }
9534 
9535 /* Update the data in CUM to advance over an argument
9536    of mode MODE and data type TYPE.
9537    (TYPE is null for libcalls where that information may not be available.)  */
9538 
9539 static void
9540 pa_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
9541 			 const_tree type, bool named ATTRIBUTE_UNUSED)
9542 {
9543   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9544   int arg_size = FUNCTION_ARG_SIZE (mode, type);
9545 
9546   cum->nargs_prototype--;
9547   cum->words += (arg_size
9548 		 + ((cum->words & 01)
9549 		    && type != NULL_TREE
9550 		    && arg_size > 1));
9551 }
9552 
9553 /* Return the location of a parameter that is passed in a register or NULL
9554    if the parameter has any component that is passed in memory.
9555 
9556    This is new code and will be pushed to into the net sources after
9557    further testing.
9558 
9559    ??? We might want to restructure this so that it looks more like other
9560    ports.  */
9561 static rtx
9562 pa_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
9563 		 const_tree type, bool named ATTRIBUTE_UNUSED)
9564 {
9565   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9566   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9567   int alignment = 0;
9568   int arg_size;
9569   int fpr_reg_base;
9570   int gpr_reg_base;
9571   rtx retval;
9572 
9573   if (mode == VOIDmode)
9574     return NULL_RTX;
9575 
9576   arg_size = FUNCTION_ARG_SIZE (mode, type);
9577 
9578   /* If this arg would be passed partially or totally on the stack, then
9579      this routine should return zero.  pa_arg_partial_bytes will
9580      handle arguments which are split between regs and stack slots if
9581      the ABI mandates split arguments.  */
9582   if (!TARGET_64BIT)
9583     {
9584       /* The 32-bit ABI does not split arguments.  */
9585       if (cum->words + arg_size > max_arg_words)
9586 	return NULL_RTX;
9587     }
9588   else
9589     {
9590       if (arg_size > 1)
9591 	alignment = cum->words & 1;
9592       if (cum->words + alignment >= max_arg_words)
9593 	return NULL_RTX;
9594     }
9595 
9596   /* The 32bit ABIs and the 64bit ABIs are rather different,
9597      particularly in their handling of FP registers.  We might
9598      be able to cleverly share code between them, but I'm not
9599      going to bother in the hope that splitting them up results
9600      in code that is more easily understood.  */
9601 
9602   if (TARGET_64BIT)
9603     {
9604       /* Advance the base registers to their current locations.
9605 
9606          Remember, gprs grow towards smaller register numbers while
9607 	 fprs grow to higher register numbers.  Also remember that
9608 	 although FP regs are 32-bit addressable, we pretend that
9609 	 the registers are 64-bits wide.  */
9610       gpr_reg_base = 26 - cum->words;
9611       fpr_reg_base = 32 + cum->words;
9612 
9613       /* Arguments wider than one word and small aggregates need special
9614 	 treatment.  */
9615       if (arg_size > 1
9616 	  || mode == BLKmode
9617 	  || (type && (AGGREGATE_TYPE_P (type)
9618 		       || TREE_CODE (type) == COMPLEX_TYPE
9619 		       || TREE_CODE (type) == VECTOR_TYPE)))
9620 	{
9621 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9622 	     and aggregates including complex numbers are aligned on
9623 	     128-bit boundaries.  The first eight 64-bit argument slots
9624 	     are associated one-to-one, with general registers r26
9625 	     through r19, and also with floating-point registers fr4
9626 	     through fr11.  Arguments larger than one word are always
9627 	     passed in general registers.
9628 
9629 	     Using a PARALLEL with a word mode register results in left
9630 	     justified data on a big-endian target.  */
9631 
9632 	  rtx loc[8];
9633 	  int i, offset = 0, ub = arg_size;
9634 
9635 	  /* Align the base register.  */
9636 	  gpr_reg_base -= alignment;
9637 
9638 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9639 	  for (i = 0; i < ub; i++)
9640 	    {
9641 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9642 					  gen_rtx_REG (DImode, gpr_reg_base),
9643 					  GEN_INT (offset));
9644 	      gpr_reg_base -= 1;
9645 	      offset += 8;
9646 	    }
9647 
9648 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9649 	}
9650      }
9651   else
9652     {
9653       /* If the argument is larger than a word, then we know precisely
9654 	 which registers we must use.  */
9655       if (arg_size > 1)
9656 	{
9657 	  if (cum->words)
9658 	    {
9659 	      gpr_reg_base = 23;
9660 	      fpr_reg_base = 38;
9661 	    }
9662 	  else
9663 	    {
9664 	      gpr_reg_base = 25;
9665 	      fpr_reg_base = 34;
9666 	    }
9667 
9668 	  /* Structures 5 to 8 bytes in size are passed in the general
9669 	     registers in the same manner as other non floating-point
9670 	     objects.  The data is right-justified and zero-extended
9671 	     to 64 bits.  This is opposite to the normal justification
9672 	     used on big endian targets and requires special treatment.
9673 	     We now define BLOCK_REG_PADDING to pad these objects.
9674 	     Aggregates, complex and vector types are passed in the same
9675 	     manner as structures.  */
9676 	  if (mode == BLKmode
9677 	      || (type && (AGGREGATE_TYPE_P (type)
9678 			   || TREE_CODE (type) == COMPLEX_TYPE
9679 			   || TREE_CODE (type) == VECTOR_TYPE)))
9680 	    {
9681 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9682 					   gen_rtx_REG (DImode, gpr_reg_base),
9683 					   const0_rtx);
9684 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9685 	    }
9686 	}
9687       else
9688         {
9689 	   /* We have a single word (32 bits).  A simple computation
9690 	      will get us the register #s we need.  */
9691 	   gpr_reg_base = 26 - cum->words;
9692 	   fpr_reg_base = 32 + 2 * cum->words;
9693 	}
9694     }
9695 
9696   /* Determine if the argument needs to be passed in both general and
9697      floating point registers.  */
9698   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9699        /* If we are doing soft-float with portable runtime, then there
9700 	  is no need to worry about FP regs.  */
9701        && !TARGET_SOFT_FLOAT
9702        /* The parameter must be some kind of scalar float, else we just
9703 	  pass it in integer registers.  */
9704        && GET_MODE_CLASS (mode) == MODE_FLOAT
9705        /* The target function must not have a prototype.  */
9706        && cum->nargs_prototype <= 0
9707        /* libcalls do not need to pass items in both FP and general
9708 	  registers.  */
9709        && type != NULL_TREE
9710        /* All this hair applies to "outgoing" args only.  This includes
9711 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9712        && !cum->incoming)
9713       /* Also pass outgoing floating arguments in both registers in indirect
9714 	 calls with the 32 bit ABI and the HP assembler since there is no
9715 	 way to the specify argument locations in static functions.  */
9716       || (!TARGET_64BIT
9717 	  && !TARGET_GAS
9718 	  && !cum->incoming
9719 	  && cum->indirect
9720 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9721     {
9722       retval
9723 	= gen_rtx_PARALLEL
9724 	    (mode,
9725 	     gen_rtvec (2,
9726 			gen_rtx_EXPR_LIST (VOIDmode,
9727 					   gen_rtx_REG (mode, fpr_reg_base),
9728 					   const0_rtx),
9729 			gen_rtx_EXPR_LIST (VOIDmode,
9730 					   gen_rtx_REG (mode, gpr_reg_base),
9731 					   const0_rtx)));
9732     }
9733   else
9734     {
9735       /* See if we should pass this parameter in a general register.  */
9736       if (TARGET_SOFT_FLOAT
9737 	  /* Indirect calls in the normal 32bit ABI require all arguments
9738 	     to be passed in general registers.  */
9739 	  || (!TARGET_PORTABLE_RUNTIME
9740 	      && !TARGET_64BIT
9741 	      && !TARGET_ELF32
9742 	      && cum->indirect)
9743 	  /* If the parameter is not a scalar floating-point parameter,
9744 	     then it belongs in GPRs.  */
9745 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9746 	  /* Structure with single SFmode field belongs in GPR.  */
9747 	  || (type && AGGREGATE_TYPE_P (type)))
9748 	retval = gen_rtx_REG (mode, gpr_reg_base);
9749       else
9750 	retval = gen_rtx_REG (mode, fpr_reg_base);
9751     }
9752   return retval;
9753 }
9754 
9755 /* Arguments larger than one word are double word aligned.  */
9756 
9757 static unsigned int
9758 pa_function_arg_boundary (enum machine_mode mode, const_tree type)
9759 {
9760   bool singleword = (type
9761 		     ? (integer_zerop (TYPE_SIZE (type))
9762 			|| !TREE_CONSTANT (TYPE_SIZE (type))
9763 			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9764 		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9765 
9766   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9767 }
9768 
9769 /* If this arg would be passed totally in registers or totally on the stack,
9770    then this routine should return zero.  */
9771 
9772 static int
9773 pa_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
9774 		      tree type, bool named ATTRIBUTE_UNUSED)
9775 {
9776   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9777   unsigned int max_arg_words = 8;
9778   unsigned int offset = 0;
9779 
9780   if (!TARGET_64BIT)
9781     return 0;
9782 
9783   if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9784     offset = 1;
9785 
9786   if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9787     /* Arg fits fully into registers.  */
9788     return 0;
9789   else if (cum->words + offset >= max_arg_words)
9790     /* Arg fully on the stack.  */
9791     return 0;
9792   else
9793     /* Arg is split.  */
9794     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9795 }
9796 
9797 
9798 /* A get_unnamed_section callback for switching to the text section.
9799 
9800    This function is only used with SOM.  Because we don't support
9801    named subspaces, we can only create a new subspace or switch back
9802    to the default text subspace.  */
9803 
9804 static void
9805 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9806 {
9807   gcc_assert (TARGET_SOM);
9808   if (TARGET_GAS)
9809     {
9810       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9811 	{
9812 	  /* We only want to emit a .nsubspa directive once at the
9813 	     start of the function.  */
9814 	  cfun->machine->in_nsubspa = 1;
9815 
9816 	  /* Create a new subspace for the text.  This provides
9817 	     better stub placement and one-only functions.  */
9818 	  if (cfun->decl
9819 	      && DECL_ONE_ONLY (cfun->decl)
9820 	      && !DECL_WEAK (cfun->decl))
9821 	    {
9822 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
9823 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9824 				     "ACCESS=44,SORT=24,COMDAT");
9825 	      return;
9826 	    }
9827 	}
9828       else
9829 	{
9830 	  /* There isn't a current function or the body of the current
9831 	     function has been completed.  So, we are changing to the
9832 	     text section to output debugging information.  Thus, we
9833 	     need to forget that we are in the text section so that
9834 	     varasm.c will call us when text_section is selected again.  */
9835 	  gcc_assert (!cfun || !cfun->machine
9836 		      || cfun->machine->in_nsubspa == 2);
9837 	  in_section = NULL;
9838 	}
9839       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9840       return;
9841     }
9842   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9843 }
9844 
9845 /* A get_unnamed_section callback for switching to comdat data
9846    sections.  This function is only used with SOM.  */
9847 
9848 static void
9849 som_output_comdat_data_section_asm_op (const void *data)
9850 {
9851   in_section = NULL;
9852   output_section_asm_op (data);
9853 }
9854 
9855 /* Implement TARGET_ASM_INITIALIZE_SECTIONS  */
9856 
9857 static void
9858 pa_som_asm_init_sections (void)
9859 {
9860   text_section
9861     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9862 
9863   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9864      is not being generated.  */
9865   som_readonly_data_section
9866     = get_unnamed_section (0, output_section_asm_op,
9867 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9868 
9869   /* When secondary definitions are not supported, SOM makes readonly
9870      data one-only by creating a new $LIT$ subspace in $TEXT$ with
9871      the comdat flag.  */
9872   som_one_only_readonly_data_section
9873     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9874 			   "\t.SPACE $TEXT$\n"
9875 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9876 			   "ACCESS=0x2c,SORT=16,COMDAT");
9877 
9878 
9879   /* When secondary definitions are not supported, SOM makes data one-only
9880      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9881   som_one_only_data_section
9882     = get_unnamed_section (SECTION_WRITE,
9883 			   som_output_comdat_data_section_asm_op,
9884 			   "\t.SPACE $PRIVATE$\n"
9885 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9886 			   "ACCESS=31,SORT=24,COMDAT");
9887 
9888   if (flag_tm)
9889     som_tm_clone_table_section
9890       = get_unnamed_section (0, output_section_asm_op,
9891 			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9892 
9893   /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9894      which reference data within the $TEXT$ space (for example constant
9895      strings in the $LIT$ subspace).
9896 
9897      The assemblers (GAS and HP as) both have problems with handling
9898      the difference of two symbols which is the other correct way to
9899      reference constant data during PIC code generation.
9900 
9901      So, there's no way to reference constant data which is in the
9902      $TEXT$ space during PIC generation.  Instead place all constant
9903      data into the $PRIVATE$ subspace (this reduces sharing, but it
9904      works correctly).  */
9905   readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9906 
9907   /* We must not have a reference to an external symbol defined in a
9908      shared library in a readonly section, else the SOM linker will
9909      complain.
9910 
9911      So, we force exception information into the data section.  */
9912   exception_section = data_section;
9913 }
9914 
9915 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
9916 
9917 static section *
9918 pa_som_tm_clone_table_section (void)
9919 {
9920   return som_tm_clone_table_section;
9921 }
9922 
9923 /* On hpux10, the linker will give an error if we have a reference
9924    in the read-only data section to a symbol defined in a shared
9925    library.  Therefore, expressions that might require a reloc can
9926    not be placed in the read-only data section.  */
9927 
9928 static section *
9929 pa_select_section (tree exp, int reloc,
9930 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9931 {
9932   if (TREE_CODE (exp) == VAR_DECL
9933       && TREE_READONLY (exp)
9934       && !TREE_THIS_VOLATILE (exp)
9935       && DECL_INITIAL (exp)
9936       && (DECL_INITIAL (exp) == error_mark_node
9937           || TREE_CONSTANT (DECL_INITIAL (exp)))
9938       && !reloc)
9939     {
9940       if (TARGET_SOM
9941 	  && DECL_ONE_ONLY (exp)
9942 	  && !DECL_WEAK (exp))
9943 	return som_one_only_readonly_data_section;
9944       else
9945 	return readonly_data_section;
9946     }
9947   else if (CONSTANT_CLASS_P (exp) && !reloc)
9948     return readonly_data_section;
9949   else if (TARGET_SOM
9950 	   && TREE_CODE (exp) == VAR_DECL
9951 	   && DECL_ONE_ONLY (exp)
9952 	   && !DECL_WEAK (exp))
9953     return som_one_only_data_section;
9954   else
9955     return data_section;
9956 }
9957 
9958 static void
9959 pa_globalize_label (FILE *stream, const char *name)
9960 {
9961   /* We only handle DATA objects here, functions are globalized in
9962      ASM_DECLARE_FUNCTION_NAME.  */
9963   if (! FUNCTION_NAME_P (name))
9964   {
9965     fputs ("\t.EXPORT ", stream);
9966     assemble_name (stream, name);
9967     fputs (",DATA\n", stream);
9968   }
9969 }
9970 
9971 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9972 
9973 static rtx
9974 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9975 		     int incoming ATTRIBUTE_UNUSED)
9976 {
9977   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9978 }
9979 
9980 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
9981 
9982 bool
9983 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9984 {
9985   /* SOM ABI says that objects larger than 64 bits are returned in memory.
9986      PA64 ABI says that objects larger than 128 bits are returned in memory.
9987      Note, int_size_in_bytes can return -1 if the size of the object is
9988      variable or larger than the maximum value that can be expressed as
9989      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9990      simplest way to handle variable and empty types is to pass them in
9991      memory.  This avoids problems in defining the boundaries of argument
9992      slots, allocating registers, etc.  */
9993   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9994 	  || int_size_in_bytes (type) <= 0);
9995 }
9996 
9997 /* Structure to hold declaration and name of external symbols that are
9998    emitted by GCC.  We generate a vector of these symbols and output them
9999    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10000    This avoids putting out names that are never really used.  */
10001 
10002 typedef struct GTY(()) extern_symbol
10003 {
10004   tree decl;
10005   const char *name;
10006 } extern_symbol;
10007 
10008 /* Define gc'd vector type for extern_symbol.  */
10009 
10010 /* Vector of extern_symbol pointers.  */
10011 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10012 
10013 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10014 /* Mark DECL (name NAME) as an external reference (assembler output
10015    file FILE).  This saves the names to output at the end of the file
10016    if actually referenced.  */
10017 
10018 void
10019 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10020 {
10021   gcc_assert (file == asm_out_file);
10022   extern_symbol p = {decl, name};
10023   vec_safe_push (extern_symbols, p);
10024 }
10025 
10026 /* Output text required at the end of an assembler file.
10027    This includes deferred plabels and .import directives for
10028    all external symbols that were actually referenced.  */
10029 
10030 static void
10031 pa_hpux_file_end (void)
10032 {
10033   unsigned int i;
10034   extern_symbol *p;
10035 
10036   if (!NO_DEFERRED_PROFILE_COUNTERS)
10037     output_deferred_profile_counters ();
10038 
10039   output_deferred_plabels ();
10040 
10041   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10042     {
10043       tree decl = p->decl;
10044 
10045       if (!TREE_ASM_WRITTEN (decl)
10046 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10047 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10048     }
10049 
10050   vec_free (extern_symbols);
10051 }
10052 #endif
10053 
10054 /* Return true if a change from mode FROM to mode TO for a register
10055    in register class RCLASS is invalid.  */
10056 
10057 bool
10058 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10059 			     enum reg_class rclass)
10060 {
10061   if (from == to)
10062     return false;
10063 
10064   /* Reject changes to/from complex and vector modes.  */
10065   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10066       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10067     return true;
10068 
10069   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10070     return false;
10071 
10072   /* There is no way to load QImode or HImode values directly from
10073      memory.  SImode loads to the FP registers are not zero extended.
10074      On the 64-bit target, this conflicts with the definition of
10075      LOAD_EXTEND_OP.  Thus, we can't allow changing between modes
10076      with different sizes in the floating-point registers.  */
10077   if (MAYBE_FP_REG_CLASS_P (rclass))
10078     return true;
10079 
10080   /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
10081      in specific sets of registers.  Thus, we cannot allow changing
10082      to a larger mode when it's larger than a word.  */
10083   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10084       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10085     return true;
10086 
10087   return false;
10088 }
10089 
10090 /* Returns TRUE if it is a good idea to tie two pseudo registers
10091    when one has mode MODE1 and one has mode MODE2.
10092    If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
10093    for any hard reg, then this must be FALSE for correct output.
10094 
10095    We should return FALSE for QImode and HImode because these modes
10096    are not ok in the floating-point registers.  However, this prevents
10097    tieing these modes to SImode and DImode in the general registers.
10098    So, this isn't a good idea.  We rely on HARD_REGNO_MODE_OK and
10099    CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
10100    in the floating-point registers.  */
10101 
10102 bool
10103 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
10104 {
10105   /* Don't tie modes in different classes.  */
10106   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10107     return false;
10108 
10109   return true;
10110 }
10111 
10112 
10113 /* Length in units of the trampoline instruction code.  */
10114 
10115 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10116 
10117 
10118 /* Output assembler code for a block containing the constant parts
10119    of a trampoline, leaving space for the variable parts.\
10120 
10121    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10122    and then branches to the specified routine.
10123 
10124    This code template is copied from text segment to stack location
10125    and then patched with pa_trampoline_init to contain valid values,
10126    and then entered as a subroutine.
10127 
10128    It is best to keep this as small as possible to avoid having to
10129    flush multiple lines in the cache.  */
10130 
10131 static void
10132 pa_asm_trampoline_template (FILE *f)
10133 {
10134   if (!TARGET_64BIT)
10135     {
10136       fputs ("\tldw	36(%r22),%r21\n", f);
10137       fputs ("\tbb,>=,n	%r21,30,.+16\n", f);
10138       if (ASSEMBLER_DIALECT == 0)
10139 	fputs ("\tdepi	0,31,2,%r21\n", f);
10140       else
10141 	fputs ("\tdepwi	0,31,2,%r21\n", f);
10142       fputs ("\tldw	4(%r21),%r19\n", f);
10143       fputs ("\tldw	0(%r21),%r21\n", f);
10144       if (TARGET_PA_20)
10145 	{
10146 	  fputs ("\tbve	(%r21)\n", f);
10147 	  fputs ("\tldw	40(%r22),%r29\n", f);
10148 	  fputs ("\t.word	0\n", f);
10149 	  fputs ("\t.word	0\n", f);
10150 	}
10151       else
10152 	{
10153 	  fputs ("\tldsid	(%r21),%r1\n", f);
10154 	  fputs ("\tmtsp	%r1,%sr0\n", f);
10155 	  fputs ("\tbe	0(%sr0,%r21)\n", f);
10156 	  fputs ("\tldw	40(%r22),%r29\n", f);
10157 	}
10158       fputs ("\t.word	0\n", f);
10159       fputs ("\t.word	0\n", f);
10160       fputs ("\t.word	0\n", f);
10161       fputs ("\t.word	0\n", f);
10162     }
10163   else
10164     {
10165       fputs ("\t.dword 0\n", f);
10166       fputs ("\t.dword 0\n", f);
10167       fputs ("\t.dword 0\n", f);
10168       fputs ("\t.dword 0\n", f);
10169       fputs ("\tmfia	%r31\n", f);
10170       fputs ("\tldd	24(%r31),%r1\n", f);
10171       fputs ("\tldd	24(%r1),%r27\n", f);
10172       fputs ("\tldd	16(%r1),%r1\n", f);
10173       fputs ("\tbve	(%r1)\n", f);
10174       fputs ("\tldd	32(%r31),%r31\n", f);
10175       fputs ("\t.dword 0  ; fptr\n", f);
10176       fputs ("\t.dword 0  ; static link\n", f);
10177     }
10178 }
10179 
10180 /* Emit RTL insns to initialize the variable parts of a trampoline.
10181    FNADDR is an RTX for the address of the function's pure code.
10182    CXT is an RTX for the static chain value for the function.
10183 
10184    Move the function address to the trampoline template at offset 36.
10185    Move the static chain value to trampoline template at offset 40.
10186    Move the trampoline address to trampoline template at offset 44.
10187    Move r19 to trampoline template at offset 48.  The latter two
10188    words create a plabel for the indirect call to the trampoline.
10189 
10190    A similar sequence is used for the 64-bit port but the plabel is
10191    at the beginning of the trampoline.
10192 
10193    Finally, the cache entries for the trampoline code are flushed.
10194    This is necessary to ensure that the trampoline instruction sequence
10195    is written to memory prior to any attempts at prefetching the code
10196    sequence.  */
10197 
10198 static void
10199 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10200 {
10201   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10202   rtx start_addr = gen_reg_rtx (Pmode);
10203   rtx end_addr = gen_reg_rtx (Pmode);
10204   rtx line_length = gen_reg_rtx (Pmode);
10205   rtx r_tramp, tmp;
10206 
10207   emit_block_move (m_tramp, assemble_trampoline_template (),
10208 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10209   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10210 
10211   if (!TARGET_64BIT)
10212     {
10213       tmp = adjust_address (m_tramp, Pmode, 36);
10214       emit_move_insn (tmp, fnaddr);
10215       tmp = adjust_address (m_tramp, Pmode, 40);
10216       emit_move_insn (tmp, chain_value);
10217 
10218       /* Create a fat pointer for the trampoline.  */
10219       tmp = adjust_address (m_tramp, Pmode, 44);
10220       emit_move_insn (tmp, r_tramp);
10221       tmp = adjust_address (m_tramp, Pmode, 48);
10222       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10223 
10224       /* fdc and fic only use registers for the address to flush,
10225 	 they do not accept integer displacements.  We align the
10226 	 start and end addresses to the beginning of their respective
10227 	 cache lines to minimize the number of lines flushed.  */
10228       emit_insn (gen_andsi3 (start_addr, r_tramp,
10229 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10230       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10231 					     TRAMPOLINE_CODE_SIZE-1));
10232       emit_insn (gen_andsi3 (end_addr, tmp,
10233 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10234       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10235       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10236       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10237 				    gen_reg_rtx (Pmode),
10238 				    gen_reg_rtx (Pmode)));
10239     }
10240   else
10241     {
10242       tmp = adjust_address (m_tramp, Pmode, 56);
10243       emit_move_insn (tmp, fnaddr);
10244       tmp = adjust_address (m_tramp, Pmode, 64);
10245       emit_move_insn (tmp, chain_value);
10246 
10247       /* Create a fat pointer for the trampoline.  */
10248       tmp = adjust_address (m_tramp, Pmode, 16);
10249       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10250 							    r_tramp, 32)));
10251       tmp = adjust_address (m_tramp, Pmode, 24);
10252       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10253 
10254       /* fdc and fic only use registers for the address to flush,
10255 	 they do not accept integer displacements.  We align the
10256 	 start and end addresses to the beginning of their respective
10257 	 cache lines to minimize the number of lines flushed.  */
10258       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10259       emit_insn (gen_anddi3 (start_addr, tmp,
10260 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10261       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10262 					     TRAMPOLINE_CODE_SIZE - 1));
10263       emit_insn (gen_anddi3 (end_addr, tmp,
10264 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10265       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10266       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10267       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10268 				    gen_reg_rtx (Pmode),
10269 				    gen_reg_rtx (Pmode)));
10270     }
10271 }
10272 
10273 /* Perform any machine-specific adjustment in the address of the trampoline.
10274    ADDR contains the address that was passed to pa_trampoline_init.
10275    Adjust the trampoline address to point to the plabel at offset 44.  */
10276 
10277 static rtx
10278 pa_trampoline_adjust_address (rtx addr)
10279 {
10280   if (!TARGET_64BIT)
10281     addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10282   return addr;
10283 }
10284 
10285 static rtx
10286 pa_delegitimize_address (rtx orig_x)
10287 {
10288   rtx x = delegitimize_mem_from_attrs (orig_x);
10289 
10290   if (GET_CODE (x) == LO_SUM
10291       && GET_CODE (XEXP (x, 1)) == UNSPEC
10292       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10293     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10294   return x;
10295 }
10296 
10297 static rtx
10298 pa_internal_arg_pointer (void)
10299 {
10300   /* The argument pointer and the hard frame pointer are the same in
10301      the 32-bit runtime, so we don't need a copy.  */
10302   if (TARGET_64BIT)
10303     return copy_to_reg (virtual_incoming_args_rtx);
10304   else
10305     return virtual_incoming_args_rtx;
10306 }
10307 
10308 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10309    Frame pointer elimination is automatically handled.  */
10310 
10311 static bool
10312 pa_can_eliminate (const int from, const int to)
10313 {
10314   /* The argument cannot be eliminated in the 64-bit runtime.  */
10315   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10316     return false;
10317 
10318   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10319           ? ! frame_pointer_needed
10320           : true);
10321 }
10322 
10323 /* Define the offset between two registers, FROM to be eliminated and its
10324    replacement TO, at the start of a routine.  */
10325 HOST_WIDE_INT
10326 pa_initial_elimination_offset (int from, int to)
10327 {
10328   HOST_WIDE_INT offset;
10329 
10330   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10331       && to == STACK_POINTER_REGNUM)
10332     offset = -pa_compute_frame_size (get_frame_size (), 0);
10333   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10334     offset = 0;
10335   else
10336     gcc_unreachable ();
10337 
10338   return offset;
10339 }
10340 
10341 static void
10342 pa_conditional_register_usage (void)
10343 {
10344   int i;
10345 
10346   if (!TARGET_64BIT && !TARGET_PA_11)
10347     {
10348       for (i = 56; i <= FP_REG_LAST; i++)
10349 	fixed_regs[i] = call_used_regs[i] = 1;
10350       for (i = 33; i < 56; i += 2)
10351 	fixed_regs[i] = call_used_regs[i] = 1;
10352     }
10353   if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10354     {
10355       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10356 	fixed_regs[i] = call_used_regs[i] = 1;
10357     }
10358   if (flag_pic)
10359     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10360 }
10361 
10362 /* Target hook for c_mode_for_suffix.  */
10363 
10364 static enum machine_mode
10365 pa_c_mode_for_suffix (char suffix)
10366 {
10367   if (HPUX_LONG_DOUBLE_LIBRARY)
10368     {
10369       if (suffix == 'q')
10370 	return TFmode;
10371     }
10372 
10373   return VOIDmode;
10374 }
10375 
10376 /* Target hook for function_section.  */
10377 
10378 static section *
10379 pa_function_section (tree decl, enum node_frequency freq,
10380 		     bool startup, bool exit)
10381 {
10382   /* Put functions in text section if target doesn't have named sections.  */
10383   if (!targetm_common.have_named_sections)
10384     return text_section;
10385 
10386   /* Force nested functions into the same section as the containing
10387      function.  */
10388   if (decl
10389       && DECL_SECTION_NAME (decl) == NULL_TREE
10390       && DECL_CONTEXT (decl) != NULL_TREE
10391       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10392       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE)
10393     return function_section (DECL_CONTEXT (decl));
10394 
10395   /* Otherwise, use the default function section.  */
10396   return default_function_section (decl, freq, startup, exit);
10397 }
10398 
10399 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10400 
10401    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10402    that need more than three instructions to load prior to reload.  This
10403    limit is somewhat arbitrary.  It takes three instructions to load a
10404    CONST_INT from memory but two are memory accesses.  It may be better
10405    to increase the allowed range for CONST_INTS.  We may also be able
10406    to handle CONST_DOUBLES.  */
10407 
10408 static bool
10409 pa_legitimate_constant_p (enum machine_mode mode, rtx x)
10410 {
10411   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10412     return false;
10413 
10414   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10415     return false;
10416 
10417   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10418      legitimate constants.  The other variants can't be handled by
10419      the move patterns after reload starts.  */
10420   if (pa_tls_referenced_p (x))
10421     return false;
10422 
10423   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10424     return false;
10425 
10426   if (TARGET_64BIT
10427       && HOST_BITS_PER_WIDE_INT > 32
10428       && GET_CODE (x) == CONST_INT
10429       && !reload_in_progress
10430       && !reload_completed
10431       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10432       && !pa_cint_ok_for_move (INTVAL (x)))
10433     return false;
10434 
10435   if (function_label_operand (x, mode))
10436     return false;
10437 
10438   return true;
10439 }
10440 
10441 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10442 
10443 static unsigned int
10444 pa_section_type_flags (tree decl, const char *name, int reloc)
10445 {
10446   unsigned int flags;
10447 
10448   flags = default_section_type_flags (decl, name, reloc);
10449 
10450   /* Function labels are placed in the constant pool.  This can
10451      cause a section conflict if decls are put in ".data.rel.ro"
10452      or ".data.rel.ro.local" using the __attribute__ construct.  */
10453   if (strcmp (name, ".data.rel.ro") == 0
10454       || strcmp (name, ".data.rel.ro.local") == 0)
10455     flags |= SECTION_WRITE | SECTION_RELRO;
10456 
10457   return flags;
10458 }
10459 
10460 /* pa_legitimate_address_p recognizes an RTL expression that is a
10461    valid memory address for an instruction.  The MODE argument is the
10462    machine mode for the MEM expression that wants to use this address.
10463 
10464    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10465    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10466    available with floating point loads and stores, and integer loads.
10467    We get better code by allowing indexed addresses in the initial
10468    RTL generation.
10469 
10470    The acceptance of indexed addresses as legitimate implies that we
10471    must provide patterns for doing indexed integer stores, or the move
10472    expanders must force the address of an indexed store to a register.
10473    We have adopted the latter approach.
10474 
10475    Another function of pa_legitimate_address_p is to ensure that
10476    the base register is a valid pointer for indexed instructions.
10477    On targets that have non-equivalent space registers, we have to
10478    know at the time of assembler output which register in a REG+REG
10479    pair is the base register.  The REG_POINTER flag is sometimes lost
10480    in reload and the following passes, so it can't be relied on during
10481    code generation.  Thus, we either have to canonicalize the order
10482    of the registers in REG+REG indexed addresses, or treat REG+REG
10483    addresses separately and provide patterns for both permutations.
10484 
10485    The latter approach requires several hundred additional lines of
10486    code in pa.md.  The downside to canonicalizing is that a PLUS
10487    in the wrong order can't combine to form to make a scaled indexed
10488    memory operand.  As we won't need to canonicalize the operands if
10489    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10490 
10491    We initially break out scaled indexed addresses in canonical order
10492    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10493    scaled indexed addresses during RTL generation.  However, fold_rtx
10494    has its own opinion on how the operands of a PLUS should be ordered.
10495    If one of the operands is equivalent to a constant, it will make
10496    that operand the second operand.  As the base register is likely to
10497    be equivalent to a SYMBOL_REF, we have made it the second operand.
10498 
10499    pa_legitimate_address_p accepts REG+REG as legitimate when the
10500    operands are in the order INDEX+BASE on targets with non-equivalent
10501    space registers, and in any order on targets with equivalent space
10502    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10503 
10504    We treat a SYMBOL_REF as legitimate if it is part of the current
10505    function's constant-pool, because such addresses can actually be
10506    output as REG+SMALLINT.  */
10507 
10508 static bool
10509 pa_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10510 {
10511   if ((REG_P (x)
10512        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10513 		  : REG_OK_FOR_BASE_P (x)))
10514       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10515 	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10516 	  && REG_P (XEXP (x, 0))
10517 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10518 		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10519     return true;
10520 
10521   if (GET_CODE (x) == PLUS)
10522     {
10523       rtx base, index;
10524 
10525       /* For REG+REG, the base register should be in XEXP (x, 1),
10526 	 so check it first.  */
10527       if (REG_P (XEXP (x, 1))
10528 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10529 		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10530 	base = XEXP (x, 1), index = XEXP (x, 0);
10531       else if (REG_P (XEXP (x, 0))
10532 	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10533 			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10534 	base = XEXP (x, 0), index = XEXP (x, 1);
10535       else
10536 	return false;
10537 
10538       if (GET_CODE (index) == CONST_INT)
10539 	{
10540 	  if (INT_5_BITS (index))
10541 	    return true;
10542 
10543 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10544 	     to adjust the displacement of SImode and DImode floating point
10545 	     instructions but this may fail when the register also needs
10546 	     reloading.  So, we return false when STRICT is true.  We
10547 	     also reject long displacements for float mode addresses since
10548 	     the majority of accesses will use floating point instructions
10549 	     that don't support 14-bit offsets.  */
10550 	  if (!INT14_OK_STRICT
10551 	      && (strict || !(reload_in_progress || reload_completed))
10552 	      && mode != QImode
10553 	      && mode != HImode)
10554 	    return false;
10555 
10556 	  return base14_operand (index, mode);
10557 	}
10558 
10559       if (!TARGET_DISABLE_INDEXING
10560 	  /* Only accept the "canonical" INDEX+BASE operand order
10561 	     on targets with non-equivalent space registers.  */
10562 	  && (TARGET_NO_SPACE_REGS
10563 	      ? REG_P (index)
10564 	      : (base == XEXP (x, 1) && REG_P (index)
10565 		 && (reload_completed
10566 		     || (reload_in_progress && HARD_REGISTER_P (base))
10567 		     || REG_POINTER (base))
10568 		 && (reload_completed
10569 		     || (reload_in_progress && HARD_REGISTER_P (index))
10570 		     || !REG_POINTER (index))))
10571 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10572 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10573 		     : REG_OK_FOR_INDEX_P (index))
10574 	  && borx_reg_operand (base, Pmode)
10575 	  && borx_reg_operand (index, Pmode))
10576 	return true;
10577 
10578       if (!TARGET_DISABLE_INDEXING
10579 	  && GET_CODE (index) == MULT
10580 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10581 	  && REG_P (XEXP (index, 0))
10582 	  && GET_MODE (XEXP (index, 0)) == Pmode
10583 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10584 		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10585 	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10586 	  && INTVAL (XEXP (index, 1))
10587 	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10588 	  && borx_reg_operand (base, Pmode))
10589 	return true;
10590 
10591       return false;
10592     }
10593 
10594   if (GET_CODE (x) == LO_SUM)
10595     {
10596       rtx y = XEXP (x, 0);
10597 
10598       if (GET_CODE (y) == SUBREG)
10599 	y = SUBREG_REG (y);
10600 
10601       if (REG_P (y)
10602 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10603 		     : REG_OK_FOR_BASE_P (y)))
10604 	{
10605 	  /* Needed for -fPIC */
10606 	  if (mode == Pmode
10607 	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10608 	    return true;
10609 
10610 	  if (!INT14_OK_STRICT
10611 	      && (strict || !(reload_in_progress || reload_completed))
10612 	      && mode != QImode
10613 	      && mode != HImode)
10614 	    return false;
10615 
10616 	  if (CONSTANT_P (XEXP (x, 1)))
10617 	    return true;
10618 	}
10619       return false;
10620     }
10621 
10622   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10623     return true;
10624 
10625   return false;
10626 }
10627 
10628 /* Look for machine dependent ways to make the invalid address AD a
10629    valid address.
10630 
10631    For the PA, transform:
10632 
10633         memory(X + <large int>)
10634 
10635    into:
10636 
10637         if (<large int> & mask) >= 16
10638           Y = (<large int> & ~mask) + mask + 1  Round up.
10639         else
10640           Y = (<large int> & ~mask)             Round down.
10641         Z = X + Y
10642         memory (Z + (<large int> - Y));
10643 
10644    This makes reload inheritance and reload_cse work better since Z
10645    can be reused.
10646 
10647    There may be more opportunities to improve code with this hook.  */
10648 
10649 rtx
10650 pa_legitimize_reload_address (rtx ad, enum machine_mode mode,
10651 			      int opnum, int type,
10652 			      int ind_levels ATTRIBUTE_UNUSED)
10653 {
10654   long offset, newoffset, mask;
10655   rtx new_rtx, temp = NULL_RTX;
10656 
10657   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10658 	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10659 
10660   if (optimize && GET_CODE (ad) == PLUS)
10661     temp = simplify_binary_operation (PLUS, Pmode,
10662 				      XEXP (ad, 0), XEXP (ad, 1));
10663 
10664   new_rtx = temp ? temp : ad;
10665 
10666   if (optimize
10667       && GET_CODE (new_rtx) == PLUS
10668       && GET_CODE (XEXP (new_rtx, 0)) == REG
10669       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10670     {
10671       offset = INTVAL (XEXP ((new_rtx), 1));
10672 
10673       /* Choose rounding direction.  Round up if we are >= halfway.  */
10674       if ((offset & mask) >= ((mask + 1) / 2))
10675 	newoffset = (offset & ~mask) + mask + 1;
10676       else
10677 	newoffset = offset & ~mask;
10678 
10679       /* Ensure that long displacements are aligned.  */
10680       if (mask == 0x3fff
10681 	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10682 	      || (TARGET_64BIT && (mode) == DImode)))
10683 	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10684 
10685       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10686 	{
10687 	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10688 			       GEN_INT (newoffset));
10689 	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10690 	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10691 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10692 		       opnum, (enum reload_type) type);
10693 	  return ad;
10694 	}
10695     }
10696 
10697   return NULL_RTX;
10698 }
10699 
10700 #include "gt-pa.h"
10701