xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/pa/pa.c (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3    2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4    Free Software Foundation, Inc.
5    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13 
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "tree.h"
36 #include "output.h"
37 #include "except.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "recog.h"
46 #include "predict.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "df.h"
51 
52 /* Return nonzero if there is a bypass for the output of
53    OUT_INSN and the fp store IN_INSN.  */
54 int
55 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
56 {
57   enum machine_mode store_mode;
58   enum machine_mode other_mode;
59   rtx set;
60 
61   if (recog_memoized (in_insn) < 0
62       || (get_attr_type (in_insn) != TYPE_FPSTORE
63 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64       || recog_memoized (out_insn) < 0)
65     return 0;
66 
67   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
68 
69   set = single_set (out_insn);
70   if (!set)
71     return 0;
72 
73   other_mode = GET_MODE (SET_SRC (set));
74 
75   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
76 }
77 
78 
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
82 #else
83 #define DO_FRAME_NOTES 0
84 #endif
85 #endif
86 
87 static void copy_reg_pointer (rtx, rtx);
88 static void fix_range (const char *);
89 static bool pa_handle_option (size_t, const char *, int);
90 static int hppa_address_cost (rtx, bool);
91 static bool hppa_rtx_costs (rtx, int, int, int *, bool);
92 static inline rtx force_mode (enum machine_mode, rtx);
93 static void pa_reorg (void);
94 static void pa_combine_instructions (void);
95 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
96 static bool forward_branch_p (rtx);
97 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
98 static int compute_movmem_length (rtx);
99 static int compute_clrmem_length (rtx);
100 static bool pa_assemble_integer (rtx, unsigned int, int);
101 static void remove_useless_addtr_insns (int);
102 static void store_reg (int, HOST_WIDE_INT, int);
103 static void store_reg_modify (int, int, HOST_WIDE_INT);
104 static void load_reg (int, HOST_WIDE_INT, int);
105 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
106 static rtx pa_function_value (const_tree, const_tree, bool);
107 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
108 static void update_total_code_bytes (unsigned int);
109 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
110 static int pa_adjust_cost (rtx, rtx, rtx, int);
111 static int pa_adjust_priority (rtx, int);
112 static int pa_issue_rate (void);
113 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
114 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
115      ATTRIBUTE_UNUSED;
116 static void pa_encode_section_info (tree, rtx, int);
117 static const char *pa_strip_name_encoding (const char *);
118 static bool pa_function_ok_for_sibcall (tree, tree);
119 static void pa_globalize_label (FILE *, const char *)
120      ATTRIBUTE_UNUSED;
121 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
122 				    HOST_WIDE_INT, tree);
123 #if !defined(USE_COLLECT2)
124 static void pa_asm_out_constructor (rtx, int);
125 static void pa_asm_out_destructor (rtx, int);
126 #endif
127 static void pa_init_builtins (void);
128 static rtx hppa_builtin_saveregs (void);
129 static void hppa_va_start (tree, rtx);
130 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
131 static bool pa_scalar_mode_supported_p (enum machine_mode);
132 static bool pa_commutative_p (const_rtx x, int outer_code);
133 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
134 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
135 static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
136 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
137 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
138 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
139 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
140 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
141 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
142 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
143 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
144 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
145 static void output_deferred_plabels (void);
146 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
147 #ifdef ASM_OUTPUT_EXTERNAL_REAL
148 static void pa_hpux_file_end (void);
149 #endif
150 #ifdef HPUX_LONG_DOUBLE_LIBRARY
151 static void pa_hpux_init_libfuncs (void);
152 #endif
153 static rtx pa_struct_value_rtx (tree, int);
154 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
155 				  const_tree, bool);
156 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
157 				 tree, bool);
158 static struct machine_function * pa_init_machine_status (void);
159 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
160 					   enum machine_mode,
161 					   secondary_reload_info *);
162 static void pa_extra_live_on_entry (bitmap);
163 static enum machine_mode pa_promote_function_mode (const_tree,
164 						   enum machine_mode, int *,
165 						   const_tree, int);
166 
167 static void pa_asm_trampoline_template (FILE *);
168 static void pa_trampoline_init (rtx, tree, rtx);
169 static rtx pa_trampoline_adjust_address (rtx);
170 static rtx pa_delegitimize_address (rtx);
171 
172 /* The following extra sections are only used for SOM.  */
173 static GTY(()) section *som_readonly_data_section;
174 static GTY(()) section *som_one_only_readonly_data_section;
175 static GTY(()) section *som_one_only_data_section;
176 
177 /* Which cpu we are scheduling for.  */
178 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
179 
180 /* The UNIX standard to use for predefines and linking.  */
181 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
182 
183 /* Counts for the number of callee-saved general and floating point
184    registers which were saved by the current function's prologue.  */
185 static int gr_saved, fr_saved;
186 
187 /* Boolean indicating whether the return pointer was saved by the
188    current function's prologue.  */
189 static bool rp_saved;
190 
191 static rtx find_addr_reg (rtx);
192 
193 /* Keep track of the number of bytes we have output in the CODE subspace
194    during this compilation so we'll know when to emit inline long-calls.  */
195 unsigned long total_code_bytes;
196 
197 /* The last address of the previous function plus the number of bytes in
198    associated thunks that have been output.  This is used to determine if
199    a thunk can use an IA-relative branch to reach its target function.  */
200 static unsigned int last_address;
201 
202 /* Variables to handle plabels that we discover are necessary at assembly
203    output time.  They are output after the current function.  */
204 struct GTY(()) deferred_plabel
205 {
206   rtx internal_label;
207   rtx symbol;
208 };
209 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
210   deferred_plabels;
211 static size_t n_deferred_plabels = 0;
212 
213 
214 /* Initialize the GCC target structure.  */
215 
216 #undef TARGET_ASM_ALIGNED_HI_OP
217 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
218 #undef TARGET_ASM_ALIGNED_SI_OP
219 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
220 #undef TARGET_ASM_ALIGNED_DI_OP
221 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
222 #undef TARGET_ASM_UNALIGNED_HI_OP
223 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
224 #undef TARGET_ASM_UNALIGNED_SI_OP
225 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
226 #undef TARGET_ASM_UNALIGNED_DI_OP
227 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
228 #undef TARGET_ASM_INTEGER
229 #define TARGET_ASM_INTEGER pa_assemble_integer
230 
231 #undef TARGET_ASM_FUNCTION_PROLOGUE
232 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
233 #undef TARGET_ASM_FUNCTION_EPILOGUE
234 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
235 
236 #undef TARGET_FUNCTION_VALUE
237 #define TARGET_FUNCTION_VALUE pa_function_value
238 
239 #undef TARGET_LEGITIMIZE_ADDRESS
240 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
241 
242 #undef TARGET_SCHED_ADJUST_COST
243 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
244 #undef TARGET_SCHED_ADJUST_PRIORITY
245 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
246 #undef TARGET_SCHED_ISSUE_RATE
247 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
248 
249 #undef TARGET_ENCODE_SECTION_INFO
250 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
251 #undef TARGET_STRIP_NAME_ENCODING
252 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
253 
254 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
255 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
256 
257 #undef TARGET_COMMUTATIVE_P
258 #define TARGET_COMMUTATIVE_P pa_commutative_p
259 
260 #undef TARGET_ASM_OUTPUT_MI_THUNK
261 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
262 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
263 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
264 
265 #undef TARGET_ASM_FILE_END
266 #ifdef ASM_OUTPUT_EXTERNAL_REAL
267 #define TARGET_ASM_FILE_END pa_hpux_file_end
268 #else
269 #define TARGET_ASM_FILE_END output_deferred_plabels
270 #endif
271 
272 #if !defined(USE_COLLECT2)
273 #undef TARGET_ASM_CONSTRUCTOR
274 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
275 #undef TARGET_ASM_DESTRUCTOR
276 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
277 #endif
278 
279 #undef TARGET_DEFAULT_TARGET_FLAGS
280 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
281 #undef TARGET_HANDLE_OPTION
282 #define TARGET_HANDLE_OPTION pa_handle_option
283 
284 #undef TARGET_INIT_BUILTINS
285 #define TARGET_INIT_BUILTINS pa_init_builtins
286 
287 #undef TARGET_RTX_COSTS
288 #define TARGET_RTX_COSTS hppa_rtx_costs
289 #undef TARGET_ADDRESS_COST
290 #define TARGET_ADDRESS_COST hppa_address_cost
291 
292 #undef TARGET_MACHINE_DEPENDENT_REORG
293 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
294 
295 #ifdef HPUX_LONG_DOUBLE_LIBRARY
296 #undef TARGET_INIT_LIBFUNCS
297 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
298 #endif
299 
300 #undef TARGET_PROMOTE_FUNCTION_MODE
301 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
302 #undef TARGET_PROMOTE_PROTOTYPES
303 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
304 
305 #undef TARGET_STRUCT_VALUE_RTX
306 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
307 #undef TARGET_RETURN_IN_MEMORY
308 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
309 #undef TARGET_MUST_PASS_IN_STACK
310 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
311 #undef TARGET_PASS_BY_REFERENCE
312 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
313 #undef TARGET_CALLEE_COPIES
314 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
315 #undef TARGET_ARG_PARTIAL_BYTES
316 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
317 
318 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
319 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
320 #undef TARGET_EXPAND_BUILTIN_VA_START
321 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
322 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
323 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
324 
325 #undef TARGET_SCALAR_MODE_SUPPORTED_P
326 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
327 
328 #undef TARGET_CANNOT_FORCE_CONST_MEM
329 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
330 
331 #undef TARGET_SECONDARY_RELOAD
332 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
333 
334 #undef TARGET_EXTRA_LIVE_ON_ENTRY
335 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
336 
337 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
338 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
339 #undef TARGET_TRAMPOLINE_INIT
340 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
341 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
342 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
343 #undef TARGET_DELEGITIMIZE_ADDRESS
344 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
345 
346 struct gcc_target targetm = TARGET_INITIALIZER;
347 
348 /* Parse the -mfixed-range= option string.  */
349 
350 static void
351 fix_range (const char *const_str)
352 {
353   int i, first, last;
354   char *str, *dash, *comma;
355 
356   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
357      REG2 are either register names or register numbers.  The effect
358      of this option is to mark the registers in the range from REG1 to
359      REG2 as ``fixed'' so they won't be used by the compiler.  This is
360      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
361 
362   i = strlen (const_str);
363   str = (char *) alloca (i + 1);
364   memcpy (str, const_str, i + 1);
365 
366   while (1)
367     {
368       dash = strchr (str, '-');
369       if (!dash)
370 	{
371 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
372 	  return;
373 	}
374       *dash = '\0';
375 
376       comma = strchr (dash + 1, ',');
377       if (comma)
378 	*comma = '\0';
379 
380       first = decode_reg_name (str);
381       if (first < 0)
382 	{
383 	  warning (0, "unknown register name: %s", str);
384 	  return;
385 	}
386 
387       last = decode_reg_name (dash + 1);
388       if (last < 0)
389 	{
390 	  warning (0, "unknown register name: %s", dash + 1);
391 	  return;
392 	}
393 
394       *dash = '-';
395 
396       if (first > last)
397 	{
398 	  warning (0, "%s-%s is an empty range", str, dash + 1);
399 	  return;
400 	}
401 
402       for (i = first; i <= last; ++i)
403 	fixed_regs[i] = call_used_regs[i] = 1;
404 
405       if (!comma)
406 	break;
407 
408       *comma = ',';
409       str = comma + 1;
410     }
411 
412   /* Check if all floating point registers have been fixed.  */
413   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
414     if (!fixed_regs[i])
415       break;
416 
417   if (i > FP_REG_LAST)
418     target_flags |= MASK_DISABLE_FPREGS;
419 }
420 
421 /* Implement TARGET_HANDLE_OPTION.  */
422 
423 static bool
424 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
425 {
426   switch (code)
427     {
428     case OPT_mnosnake:
429     case OPT_mpa_risc_1_0:
430     case OPT_march_1_0:
431       target_flags &= ~(MASK_PA_11 | MASK_PA_20);
432       return true;
433 
434     case OPT_msnake:
435     case OPT_mpa_risc_1_1:
436     case OPT_march_1_1:
437       target_flags &= ~MASK_PA_20;
438       target_flags |= MASK_PA_11;
439       return true;
440 
441     case OPT_mpa_risc_2_0:
442     case OPT_march_2_0:
443       target_flags |= MASK_PA_11 | MASK_PA_20;
444       return true;
445 
446     case OPT_mschedule_:
447       if (strcmp (arg, "8000") == 0)
448 	pa_cpu = PROCESSOR_8000;
449       else if (strcmp (arg, "7100") == 0)
450 	pa_cpu = PROCESSOR_7100;
451       else if (strcmp (arg, "700") == 0)
452 	pa_cpu = PROCESSOR_700;
453       else if (strcmp (arg, "7100LC") == 0)
454 	pa_cpu = PROCESSOR_7100LC;
455       else if (strcmp (arg, "7200") == 0)
456 	pa_cpu = PROCESSOR_7200;
457       else if (strcmp (arg, "7300") == 0)
458 	pa_cpu = PROCESSOR_7300;
459       else
460 	return false;
461       return true;
462 
463     case OPT_mfixed_range_:
464       fix_range (arg);
465       return true;
466 
467 #if TARGET_HPUX
468     case OPT_munix_93:
469       flag_pa_unix = 1993;
470       return true;
471 #endif
472 
473 #if TARGET_HPUX_10_10
474     case OPT_munix_95:
475       flag_pa_unix = 1995;
476       return true;
477 #endif
478 
479 #if TARGET_HPUX_11_11
480     case OPT_munix_98:
481       flag_pa_unix = 1998;
482       return true;
483 #endif
484 
485     default:
486       return true;
487     }
488 }
489 
490 void
491 override_options (void)
492 {
493   /* Unconditional branches in the delay slot are not compatible with dwarf2
494      call frame information.  There is no benefit in using this optimization
495      on PA8000 and later processors.  */
496   if (pa_cpu >= PROCESSOR_8000
497       || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
498       || flag_unwind_tables)
499     target_flags &= ~MASK_JUMP_IN_DELAY;
500 
501   if (flag_pic && TARGET_PORTABLE_RUNTIME)
502     {
503       warning (0, "PIC code generation is not supported in the portable runtime model");
504     }
505 
506   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
507    {
508       warning (0, "PIC code generation is not compatible with fast indirect calls");
509    }
510 
511   if (! TARGET_GAS && write_symbols != NO_DEBUG)
512     {
513       warning (0, "-g is only supported when using GAS on this processor,");
514       warning (0, "-g option disabled");
515       write_symbols = NO_DEBUG;
516     }
517 
518   /* We only support the "big PIC" model now.  And we always generate PIC
519      code when in 64bit mode.  */
520   if (flag_pic == 1 || TARGET_64BIT)
521     flag_pic = 2;
522 
523   /* Disable -freorder-blocks-and-partition as we don't support hot and
524      cold partitioning.  */
525   if (flag_reorder_blocks_and_partition)
526     {
527       inform (input_location,
528               "-freorder-blocks-and-partition does not work "
529               "on this architecture");
530       flag_reorder_blocks_and_partition = 0;
531       flag_reorder_blocks = 1;
532     }
533 
534   /* We can't guarantee that .dword is available for 32-bit targets.  */
535   if (UNITS_PER_WORD == 4)
536     targetm.asm_out.aligned_op.di = NULL;
537 
538   /* The unaligned ops are only available when using GAS.  */
539   if (!TARGET_GAS)
540     {
541       targetm.asm_out.unaligned_op.hi = NULL;
542       targetm.asm_out.unaligned_op.si = NULL;
543       targetm.asm_out.unaligned_op.di = NULL;
544     }
545 
546   init_machine_status = pa_init_machine_status;
547 }
548 
549 static void
550 pa_init_builtins (void)
551 {
552 #ifdef DONT_HAVE_FPUTC_UNLOCKED
553   built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
554     built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
555   implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
556     = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
557 #endif
558 #if TARGET_HPUX_11
559   if (built_in_decls [BUILT_IN_FINITE])
560     set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
561   if (built_in_decls [BUILT_IN_FINITEF])
562     set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
563 #endif
564 }
565 
566 /* Function to init struct machine_function.
567    This will be called, via a pointer variable,
568    from push_function_context.  */
569 
570 static struct machine_function *
571 pa_init_machine_status (void)
572 {
573   return GGC_CNEW (machine_function);
574 }
575 
576 /* If FROM is a probable pointer register, mark TO as a probable
577    pointer register with the same pointer alignment as FROM.  */
578 
579 static void
580 copy_reg_pointer (rtx to, rtx from)
581 {
582   if (REG_POINTER (from))
583     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
584 }
585 
586 /* Return 1 if X contains a symbolic expression.  We know these
587    expressions will have one of a few well defined forms, so
588    we need only check those forms.  */
589 int
590 symbolic_expression_p (rtx x)
591 {
592 
593   /* Strip off any HIGH.  */
594   if (GET_CODE (x) == HIGH)
595     x = XEXP (x, 0);
596 
597   return (symbolic_operand (x, VOIDmode));
598 }
599 
600 /* Accept any constant that can be moved in one instruction into a
601    general register.  */
602 int
603 cint_ok_for_move (HOST_WIDE_INT ival)
604 {
605   /* OK if ldo, ldil, or zdepi, can be used.  */
606   return (VAL_14_BITS_P (ival)
607 	  || ldil_cint_p (ival)
608 	  || zdepi_cint_p (ival));
609 }
610 
611 /* Return truth value of whether OP can be used as an operand in a
612    adddi3 insn.  */
613 int
614 adddi3_operand (rtx op, enum machine_mode mode)
615 {
616   return (register_operand (op, mode)
617 	  || (GET_CODE (op) == CONST_INT
618 	      && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
619 }
620 
621 /* True iff the operand OP can be used as the destination operand of
622    an integer store.  This also implies the operand could be used as
623    the source operand of an integer load.  Symbolic, lo_sum and indexed
624    memory operands are not allowed.  We accept reloading pseudos and
625    other memory operands.  */
626 int
627 integer_store_memory_operand (rtx op, enum machine_mode mode)
628 {
629   return ((reload_in_progress
630 	   && REG_P (op)
631 	   && REGNO (op) >= FIRST_PSEUDO_REGISTER
632 	   && reg_renumber [REGNO (op)] < 0)
633 	  || (GET_CODE (op) == MEM
634 	      && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
635 	      && !symbolic_memory_operand (op, VOIDmode)
636 	      && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
637 	      && !IS_INDEX_ADDR_P (XEXP (op, 0))));
638 }
639 
640 /* True iff ldil can be used to load this CONST_INT.  The least
641    significant 11 bits of the value must be zero and the value must
642    not change sign when extended from 32 to 64 bits.  */
643 int
644 ldil_cint_p (HOST_WIDE_INT ival)
645 {
646   HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
647 
648   return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
649 }
650 
651 /* True iff zdepi can be used to generate this CONST_INT.
652    zdepi first sign extends a 5-bit signed number to a given field
653    length, then places this field anywhere in a zero.  */
654 int
655 zdepi_cint_p (unsigned HOST_WIDE_INT x)
656 {
657   unsigned HOST_WIDE_INT lsb_mask, t;
658 
659   /* This might not be obvious, but it's at least fast.
660      This function is critical; we don't have the time loops would take.  */
661   lsb_mask = x & -x;
662   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
663   /* Return true iff t is a power of two.  */
664   return ((t & (t - 1)) == 0);
665 }
666 
667 /* True iff depi or extru can be used to compute (reg & mask).
668    Accept bit pattern like these:
669    0....01....1
670    1....10....0
671    1..10..01..1  */
672 int
673 and_mask_p (unsigned HOST_WIDE_INT mask)
674 {
675   mask = ~mask;
676   mask += mask & -mask;
677   return (mask & (mask - 1)) == 0;
678 }
679 
680 /* True iff depi can be used to compute (reg | MASK).  */
681 int
682 ior_mask_p (unsigned HOST_WIDE_INT mask)
683 {
684   mask += mask & -mask;
685   return (mask & (mask - 1)) == 0;
686 }
687 
688 /* Legitimize PIC addresses.  If the address is already
689    position-independent, we return ORIG.  Newly generated
690    position-independent addresses go to REG.  If we need more
691    than one register, we lose.  */
692 
693 rtx
694 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
695 {
696   rtx pic_ref = orig;
697 
698   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
699 
700   /* Labels need special handling.  */
701   if (pic_label_operand (orig, mode))
702     {
703       rtx insn;
704 
705       /* We do not want to go through the movXX expanders here since that
706 	 would create recursion.
707 
708 	 Nor do we really want to call a generator for a named pattern
709 	 since that requires multiple patterns if we want to support
710 	 multiple word sizes.
711 
712 	 So instead we just emit the raw set, which avoids the movXX
713 	 expanders completely.  */
714       mark_reg_pointer (reg, BITS_PER_UNIT);
715       insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
716 
717       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
718       add_reg_note (insn, REG_EQUAL, orig);
719 
720       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
721 	 and update LABEL_NUSES because this is not done automatically.  */
722       if (reload_in_progress || reload_completed)
723 	{
724 	  /* Extract LABEL_REF.  */
725 	  if (GET_CODE (orig) == CONST)
726 	    orig = XEXP (XEXP (orig, 0), 0);
727 	  /* Extract CODE_LABEL.  */
728 	  orig = XEXP (orig, 0);
729 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
730 	  LABEL_NUSES (orig)++;
731 	}
732       crtl->uses_pic_offset_table = 1;
733       return reg;
734     }
735   if (GET_CODE (orig) == SYMBOL_REF)
736     {
737       rtx insn, tmp_reg;
738 
739       gcc_assert (reg);
740 
741       /* Before reload, allocate a temporary register for the intermediate
742 	 result.  This allows the sequence to be deleted when the final
743 	 result is unused and the insns are trivially dead.  */
744       tmp_reg = ((reload_in_progress || reload_completed)
745 		 ? reg : gen_reg_rtx (Pmode));
746 
747       if (function_label_operand (orig, mode))
748 	{
749 	  /* Force function label into memory in word mode.  */
750 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
751 	  /* Load plabel address from DLT.  */
752 	  emit_move_insn (tmp_reg,
753 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
754 					gen_rtx_HIGH (word_mode, orig)));
755 	  pic_ref
756 	    = gen_const_mem (Pmode,
757 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
758 					     gen_rtx_UNSPEC (Pmode,
759 						         gen_rtvec (1, orig),
760 						         UNSPEC_DLTIND14R)));
761 	  emit_move_insn (reg, pic_ref);
762 	  /* Now load address of function descriptor.  */
763 	  pic_ref = gen_rtx_MEM (Pmode, reg);
764 	}
765       else
766 	{
767 	  /* Load symbol reference from DLT.  */
768 	  emit_move_insn (tmp_reg,
769 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
770 					gen_rtx_HIGH (word_mode, orig)));
771 	  pic_ref
772 	    = gen_const_mem (Pmode,
773 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
774 					     gen_rtx_UNSPEC (Pmode,
775 						         gen_rtvec (1, orig),
776 						         UNSPEC_DLTIND14R)));
777 	}
778 
779       crtl->uses_pic_offset_table = 1;
780       mark_reg_pointer (reg, BITS_PER_UNIT);
781       insn = emit_move_insn (reg, pic_ref);
782 
783       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
784       set_unique_reg_note (insn, REG_EQUAL, orig);
785 
786       return reg;
787     }
788   else if (GET_CODE (orig) == CONST)
789     {
790       rtx base;
791 
792       if (GET_CODE (XEXP (orig, 0)) == PLUS
793 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
794 	return orig;
795 
796       gcc_assert (reg);
797       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
798 
799       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
800       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
801 				     base == reg ? 0 : reg);
802 
803       if (GET_CODE (orig) == CONST_INT)
804 	{
805 	  if (INT_14_BITS (orig))
806 	    return plus_constant (base, INTVAL (orig));
807 	  orig = force_reg (Pmode, orig);
808 	}
809       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
810       /* Likewise, should we set special REG_NOTEs here?  */
811     }
812 
813   return pic_ref;
814 }
815 
816 static GTY(()) rtx gen_tls_tga;
817 
818 static rtx
819 gen_tls_get_addr (void)
820 {
821   if (!gen_tls_tga)
822     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
823   return gen_tls_tga;
824 }
825 
826 static rtx
827 hppa_tls_call (rtx arg)
828 {
829   rtx ret;
830 
831   ret = gen_reg_rtx (Pmode);
832   emit_library_call_value (gen_tls_get_addr (), ret,
833 		  	   LCT_CONST, Pmode, 1, arg, Pmode);
834 
835   return ret;
836 }
837 
838 static rtx
839 legitimize_tls_address (rtx addr)
840 {
841   rtx ret, insn, tmp, t1, t2, tp;
842   enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
843 
844   switch (model)
845     {
846       case TLS_MODEL_GLOBAL_DYNAMIC:
847 	tmp = gen_reg_rtx (Pmode);
848 	if (flag_pic)
849 	  emit_insn (gen_tgd_load_pic (tmp, addr));
850 	else
851 	  emit_insn (gen_tgd_load (tmp, addr));
852 	ret = hppa_tls_call (tmp);
853 	break;
854 
855       case TLS_MODEL_LOCAL_DYNAMIC:
856 	ret = gen_reg_rtx (Pmode);
857 	tmp = gen_reg_rtx (Pmode);
858 	start_sequence ();
859 	if (flag_pic)
860 	  emit_insn (gen_tld_load_pic (tmp, addr));
861 	else
862 	  emit_insn (gen_tld_load (tmp, addr));
863 	t1 = hppa_tls_call (tmp);
864 	insn = get_insns ();
865 	end_sequence ();
866 	t2 = gen_reg_rtx (Pmode);
867 	emit_libcall_block (insn, t2, t1,
868 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
869 				            UNSPEC_TLSLDBASE));
870 	emit_insn (gen_tld_offset_load (ret, addr, t2));
871 	break;
872 
873       case TLS_MODEL_INITIAL_EXEC:
874 	tp = gen_reg_rtx (Pmode);
875 	tmp = gen_reg_rtx (Pmode);
876 	ret = gen_reg_rtx (Pmode);
877 	emit_insn (gen_tp_load (tp));
878 	if (flag_pic)
879 	  emit_insn (gen_tie_load_pic (tmp, addr));
880 	else
881 	  emit_insn (gen_tie_load (tmp, addr));
882 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
883 	break;
884 
885       case TLS_MODEL_LOCAL_EXEC:
886 	tp = gen_reg_rtx (Pmode);
887 	ret = gen_reg_rtx (Pmode);
888 	emit_insn (gen_tp_load (tp));
889 	emit_insn (gen_tle_load (ret, addr, tp));
890 	break;
891 
892       default:
893 	gcc_unreachable ();
894     }
895 
896   return ret;
897 }
898 
899 /* Try machine-dependent ways of modifying an illegitimate address
900    to be legitimate.  If we find one, return the new, valid address.
901    This macro is used in only one place: `memory_address' in explow.c.
902 
903    OLDX is the address as it was before break_out_memory_refs was called.
904    In some cases it is useful to look at this to decide what needs to be done.
905 
906    It is always safe for this macro to do nothing.  It exists to recognize
907    opportunities to optimize the output.
908 
909    For the PA, transform:
910 
911 	memory(X + <large int>)
912 
913    into:
914 
915 	if (<large int> & mask) >= 16
916 	  Y = (<large int> & ~mask) + mask + 1	Round up.
917 	else
918 	  Y = (<large int> & ~mask)		Round down.
919 	Z = X + Y
920 	memory (Z + (<large int> - Y));
921 
922    This is for CSE to find several similar references, and only use one Z.
923 
924    X can either be a SYMBOL_REF or REG, but because combine cannot
925    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
926    D will not fit in 14 bits.
927 
928    MODE_FLOAT references allow displacements which fit in 5 bits, so use
929    0x1f as the mask.
930 
931    MODE_INT references allow displacements which fit in 14 bits, so use
932    0x3fff as the mask.
933 
934    This relies on the fact that most mode MODE_FLOAT references will use FP
935    registers and most mode MODE_INT references will use integer registers.
936    (In the rare case of an FP register used in an integer MODE, we depend
937    on secondary reloads to clean things up.)
938 
939 
940    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
941    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
942    addressing modes to be used).
943 
944    Put X and Z into registers.  Then put the entire expression into
945    a register.  */
946 
947 rtx
948 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
949 			 enum machine_mode mode)
950 {
951   rtx orig = x;
952 
953   /* We need to canonicalize the order of operands in unscaled indexed
954      addresses since the code that checks if an address is valid doesn't
955      always try both orders.  */
956   if (!TARGET_NO_SPACE_REGS
957       && GET_CODE (x) == PLUS
958       && GET_MODE (x) == Pmode
959       && REG_P (XEXP (x, 0))
960       && REG_P (XEXP (x, 1))
961       && REG_POINTER (XEXP (x, 0))
962       && !REG_POINTER (XEXP (x, 1)))
963     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
964 
965   if (PA_SYMBOL_REF_TLS_P (x))
966     return legitimize_tls_address (x);
967   else if (flag_pic)
968     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
969 
970   /* Strip off CONST.  */
971   if (GET_CODE (x) == CONST)
972     x = XEXP (x, 0);
973 
974   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
975      That should always be safe.  */
976   if (GET_CODE (x) == PLUS
977       && GET_CODE (XEXP (x, 0)) == REG
978       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
979     {
980       rtx reg = force_reg (Pmode, XEXP (x, 1));
981       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
982     }
983 
984   /* Note we must reject symbols which represent function addresses
985      since the assembler/linker can't handle arithmetic on plabels.  */
986   if (GET_CODE (x) == PLUS
987       && GET_CODE (XEXP (x, 1)) == CONST_INT
988       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
989 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
990 	  || GET_CODE (XEXP (x, 0)) == REG))
991     {
992       rtx int_part, ptr_reg;
993       HOST_WIDE_INT newoffset;
994       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
995       HOST_WIDE_INT mask;
996 
997       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
998 	      ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
999 
1000       /* Choose which way to round the offset.  Round up if we
1001 	 are >= halfway to the next boundary.  */
1002       if ((offset & mask) >= ((mask + 1) / 2))
1003 	newoffset = (offset & ~ mask) + mask + 1;
1004       else
1005 	newoffset = (offset & ~ mask);
1006 
1007       /* If the newoffset will not fit in 14 bits (ldo), then
1008 	 handling this would take 4 or 5 instructions (2 to load
1009 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1010 	 add the new offset and the SYMBOL_REF.)  Combine can
1011 	 not handle 4->2 or 5->2 combinations, so do not create
1012 	 them.  */
1013       if (! VAL_14_BITS_P (newoffset)
1014 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1015 	{
1016 	  rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1017 	  rtx tmp_reg
1018 	    = force_reg (Pmode,
1019 			 gen_rtx_HIGH (Pmode, const_part));
1020 	  ptr_reg
1021 	    = force_reg (Pmode,
1022 			 gen_rtx_LO_SUM (Pmode,
1023 					 tmp_reg, const_part));
1024 	}
1025       else
1026 	{
1027 	  if (! VAL_14_BITS_P (newoffset))
1028 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1029 	  else
1030 	    int_part = GEN_INT (newoffset);
1031 
1032 	  ptr_reg = force_reg (Pmode,
1033 			       gen_rtx_PLUS (Pmode,
1034 					     force_reg (Pmode, XEXP (x, 0)),
1035 					     int_part));
1036 	}
1037       return plus_constant (ptr_reg, offset - newoffset);
1038     }
1039 
1040   /* Handle (plus (mult (a) (shadd_constant)) (b)).  */
1041 
1042   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1043       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1044       && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1045       && (OBJECT_P (XEXP (x, 1))
1046 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1047       && GET_CODE (XEXP (x, 1)) != CONST)
1048     {
1049       HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
1050       rtx reg1, reg2;
1051 
1052       reg1 = XEXP (x, 1);
1053       if (GET_CODE (reg1) != REG)
1054 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1055 
1056       reg2 = XEXP (XEXP (x, 0), 0);
1057       if (GET_CODE (reg2) != REG)
1058         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1059 
1060       return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1061 					     gen_rtx_MULT (Pmode,
1062 							   reg2,
1063 							   GEN_INT (val)),
1064 					     reg1));
1065     }
1066 
1067   /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1068 
1069      Only do so for floating point modes since this is more speculative
1070      and we lose if it's an integer store.  */
1071   if (GET_CODE (x) == PLUS
1072       && GET_CODE (XEXP (x, 0)) == PLUS
1073       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1074       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1075       && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1076       && (mode == SFmode || mode == DFmode))
1077     {
1078 
1079       /* First, try and figure out what to use as a base register.  */
1080       rtx reg1, reg2, base, idx, orig_base;
1081 
1082       reg1 = XEXP (XEXP (x, 0), 1);
1083       reg2 = XEXP (x, 1);
1084       base = NULL_RTX;
1085       idx = NULL_RTX;
1086 
1087       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1088 	 then emit_move_sequence will turn on REG_POINTER so we'll know
1089 	 it's a base register below.  */
1090       if (GET_CODE (reg1) != REG)
1091 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1092 
1093       if (GET_CODE (reg2) != REG)
1094 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1095 
1096       /* Figure out what the base and index are.  */
1097 
1098       if (GET_CODE (reg1) == REG
1099 	  && REG_POINTER (reg1))
1100 	{
1101 	  base = reg1;
1102 	  orig_base = XEXP (XEXP (x, 0), 1);
1103 	  idx = gen_rtx_PLUS (Pmode,
1104 			      gen_rtx_MULT (Pmode,
1105 					    XEXP (XEXP (XEXP (x, 0), 0), 0),
1106 					    XEXP (XEXP (XEXP (x, 0), 0), 1)),
1107 			      XEXP (x, 1));
1108 	}
1109       else if (GET_CODE (reg2) == REG
1110 	       && REG_POINTER (reg2))
1111 	{
1112 	  base = reg2;
1113 	  orig_base = XEXP (x, 1);
1114 	  idx = XEXP (x, 0);
1115 	}
1116 
1117       if (base == 0)
1118 	return orig;
1119 
1120       /* If the index adds a large constant, try to scale the
1121 	 constant so that it can be loaded with only one insn.  */
1122       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1123 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1124 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1125 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1126 	{
1127 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1128 	  HOST_WIDE_INT val = INTVAL (XEXP (idx, 1));
1129 
1130 	  val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1131 	  reg1 = XEXP (XEXP (idx, 0), 0);
1132 	  if (GET_CODE (reg1) != REG)
1133 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1134 
1135 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1136 
1137 	  /* We can now generate a simple scaled indexed address.  */
1138 	  return
1139 	    force_reg
1140 	      (Pmode, gen_rtx_PLUS (Pmode,
1141 				    gen_rtx_MULT (Pmode, reg1,
1142 						  XEXP (XEXP (idx, 0), 1)),
1143 				    base));
1144 	}
1145 
1146       /* If B + C is still a valid base register, then add them.  */
1147       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1148 	  && INTVAL (XEXP (idx, 1)) <= 4096
1149 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1150 	{
1151 	  HOST_WIDE_INT val = INTVAL (XEXP (XEXP (idx, 0), 1));
1152 	  rtx reg1, reg2;
1153 
1154 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1155 
1156 	  reg2 = XEXP (XEXP (idx, 0), 0);
1157 	  if (GET_CODE (reg2) != CONST_INT)
1158 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1159 
1160 	  return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1161 						 gen_rtx_MULT (Pmode,
1162 							       reg2,
1163 							       GEN_INT (val)),
1164 						 reg1));
1165 	}
1166 
1167       /* Get the index into a register, then add the base + index and
1168 	 return a register holding the result.  */
1169 
1170       /* First get A into a register.  */
1171       reg1 = XEXP (XEXP (idx, 0), 0);
1172       if (GET_CODE (reg1) != REG)
1173 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1174 
1175       /* And get B into a register.  */
1176       reg2 = XEXP (idx, 1);
1177       if (GET_CODE (reg2) != REG)
1178 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1179 
1180       reg1 = force_reg (Pmode,
1181 			gen_rtx_PLUS (Pmode,
1182 				      gen_rtx_MULT (Pmode, reg1,
1183 						    XEXP (XEXP (idx, 0), 1)),
1184 				      reg2));
1185 
1186       /* Add the result to our base register and return.  */
1187       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1188 
1189     }
1190 
1191   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1192      special handling to avoid creating an indexed memory address
1193      with x-100000 as the base.
1194 
1195      If the constant part is small enough, then it's still safe because
1196      there is a guard page at the beginning and end of the data segment.
1197 
1198      Scaled references are common enough that we want to try and rearrange the
1199      terms so that we can use indexing for these addresses too.  Only
1200      do the optimization for floatint point modes.  */
1201 
1202   if (GET_CODE (x) == PLUS
1203       && symbolic_expression_p (XEXP (x, 1)))
1204     {
1205       /* Ugly.  We modify things here so that the address offset specified
1206 	 by the index expression is computed first, then added to x to form
1207 	 the entire address.  */
1208 
1209       rtx regx1, regx2, regy1, regy2, y;
1210 
1211       /* Strip off any CONST.  */
1212       y = XEXP (x, 1);
1213       if (GET_CODE (y) == CONST)
1214 	y = XEXP (y, 0);
1215 
1216       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1217 	{
1218 	  /* See if this looks like
1219 		(plus (mult (reg) (shadd_const))
1220 		      (const (plus (symbol_ref) (const_int))))
1221 
1222 	     Where const_int is small.  In that case the const
1223 	     expression is a valid pointer for indexing.
1224 
1225 	     If const_int is big, but can be divided evenly by shadd_const
1226 	     and added to (reg).  This allows more scaled indexed addresses.  */
1227 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1228 	      && GET_CODE (XEXP (x, 0)) == MULT
1229 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1230 	      && INTVAL (XEXP (y, 1)) >= -4096
1231 	      && INTVAL (XEXP (y, 1)) <= 4095
1232 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1233 	      && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1234 	    {
1235 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
1236 	      rtx reg1, reg2;
1237 
1238 	      reg1 = XEXP (x, 1);
1239 	      if (GET_CODE (reg1) != REG)
1240 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1241 
1242 	      reg2 = XEXP (XEXP (x, 0), 0);
1243 	      if (GET_CODE (reg2) != REG)
1244 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1245 
1246 	      return force_reg (Pmode,
1247 				gen_rtx_PLUS (Pmode,
1248 					      gen_rtx_MULT (Pmode,
1249 							    reg2,
1250 							    GEN_INT (val)),
1251 					      reg1));
1252 	    }
1253 	  else if ((mode == DFmode || mode == SFmode)
1254 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1255 		   && GET_CODE (XEXP (x, 0)) == MULT
1256 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1257 		   && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1258 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1259 		   && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1260 	    {
1261 	      regx1
1262 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1263 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1264 	      regx2 = XEXP (XEXP (x, 0), 0);
1265 	      if (GET_CODE (regx2) != REG)
1266 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1267 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1268 							regx2, regx1));
1269 	      return
1270 		force_reg (Pmode,
1271 			   gen_rtx_PLUS (Pmode,
1272 					 gen_rtx_MULT (Pmode, regx2,
1273 						       XEXP (XEXP (x, 0), 1)),
1274 					 force_reg (Pmode, XEXP (y, 0))));
1275 	    }
1276 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1277 		   && INTVAL (XEXP (y, 1)) >= -4096
1278 		   && INTVAL (XEXP (y, 1)) <= 4095)
1279 	    {
1280 	      /* This is safe because of the guard page at the
1281 		 beginning and end of the data space.  Just
1282 		 return the original address.  */
1283 	      return orig;
1284 	    }
1285 	  else
1286 	    {
1287 	      /* Doesn't look like one we can optimize.  */
1288 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1289 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1290 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1291 	      regx1 = force_reg (Pmode,
1292 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1293 						 regx1, regy2));
1294 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1295 	    }
1296 	}
1297     }
1298 
1299   return orig;
1300 }
1301 
1302 /* For the HPPA, REG and REG+CONST is cost 0
1303    and addresses involving symbolic constants are cost 2.
1304 
1305    PIC addresses are very expensive.
1306 
1307    It is no coincidence that this has the same structure
1308    as GO_IF_LEGITIMATE_ADDRESS.  */
1309 
1310 static int
1311 hppa_address_cost (rtx X,
1312 		   bool speed ATTRIBUTE_UNUSED)
1313 {
1314   switch (GET_CODE (X))
1315     {
1316     case REG:
1317     case PLUS:
1318     case LO_SUM:
1319       return 1;
1320     case HIGH:
1321       return 2;
1322     default:
1323       return 4;
1324     }
1325 }
1326 
1327 /* Compute a (partial) cost for rtx X.  Return true if the complete
1328    cost has been computed, and false if subexpressions should be
1329    scanned.  In either case, *TOTAL contains the cost result.  */
1330 
1331 static bool
1332 hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1333 		bool speed ATTRIBUTE_UNUSED)
1334 {
1335   switch (code)
1336     {
1337     case CONST_INT:
1338       if (INTVAL (x) == 0)
1339 	*total = 0;
1340       else if (INT_14_BITS (x))
1341 	*total = 1;
1342       else
1343 	*total = 2;
1344       return true;
1345 
1346     case HIGH:
1347       *total = 2;
1348       return true;
1349 
1350     case CONST:
1351     case LABEL_REF:
1352     case SYMBOL_REF:
1353       *total = 4;
1354       return true;
1355 
1356     case CONST_DOUBLE:
1357       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1358 	  && outer_code != SET)
1359 	*total = 0;
1360       else
1361         *total = 8;
1362       return true;
1363 
1364     case MULT:
1365       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1366         *total = COSTS_N_INSNS (3);
1367       else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1368 	*total = COSTS_N_INSNS (8);
1369       else
1370 	*total = COSTS_N_INSNS (20);
1371       return true;
1372 
1373     case DIV:
1374       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1375 	{
1376 	  *total = COSTS_N_INSNS (14);
1377 	  return true;
1378 	}
1379       /* FALLTHRU */
1380 
1381     case UDIV:
1382     case MOD:
1383     case UMOD:
1384       *total = COSTS_N_INSNS (60);
1385       return true;
1386 
1387     case PLUS: /* this includes shNadd insns */
1388     case MINUS:
1389       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1390 	*total = COSTS_N_INSNS (3);
1391       else
1392         *total = COSTS_N_INSNS (1);
1393       return true;
1394 
1395     case ASHIFT:
1396     case ASHIFTRT:
1397     case LSHIFTRT:
1398       *total = COSTS_N_INSNS (1);
1399       return true;
1400 
1401     default:
1402       return false;
1403     }
1404 }
1405 
1406 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1407    new rtx with the correct mode.  */
1408 static inline rtx
1409 force_mode (enum machine_mode mode, rtx orig)
1410 {
1411   if (mode == GET_MODE (orig))
1412     return orig;
1413 
1414   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1415 
1416   return gen_rtx_REG (mode, REGNO (orig));
1417 }
1418 
1419 /* Return 1 if *X is a thread-local symbol.  */
1420 
1421 static int
1422 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1423 {
1424   return PA_SYMBOL_REF_TLS_P (*x);
1425 }
1426 
1427 /* Return 1 if X contains a thread-local symbol.  */
1428 
1429 bool
1430 pa_tls_referenced_p (rtx x)
1431 {
1432   if (!TARGET_HAVE_TLS)
1433     return false;
1434 
1435   return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1436 }
1437 
1438 /* Emit insns to move operands[1] into operands[0].
1439 
1440    Return 1 if we have written out everything that needs to be done to
1441    do the move.  Otherwise, return 0 and the caller will emit the move
1442    normally.
1443 
1444    Note SCRATCH_REG may not be in the proper mode depending on how it
1445    will be used.  This routine is responsible for creating a new copy
1446    of SCRATCH_REG in the proper mode.  */
1447 
1448 int
1449 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1450 {
1451   register rtx operand0 = operands[0];
1452   register rtx operand1 = operands[1];
1453   register rtx tem;
1454 
1455   /* We can only handle indexed addresses in the destination operand
1456      of floating point stores.  Thus, we need to break out indexed
1457      addresses from the destination operand.  */
1458   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1459     {
1460       gcc_assert (can_create_pseudo_p ());
1461 
1462       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1463       operand0 = replace_equiv_address (operand0, tem);
1464     }
1465 
1466   /* On targets with non-equivalent space registers, break out unscaled
1467      indexed addresses from the source operand before the final CSE.
1468      We have to do this because the REG_POINTER flag is not correctly
1469      carried through various optimization passes and CSE may substitute
1470      a pseudo without the pointer set for one with the pointer set.  As
1471      a result, we loose various opportunities to create insns with
1472      unscaled indexed addresses.  */
1473   if (!TARGET_NO_SPACE_REGS
1474       && !cse_not_expected
1475       && GET_CODE (operand1) == MEM
1476       && GET_CODE (XEXP (operand1, 0)) == PLUS
1477       && REG_P (XEXP (XEXP (operand1, 0), 0))
1478       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1479     operand1
1480       = replace_equiv_address (operand1,
1481 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1482 
1483   if (scratch_reg
1484       && reload_in_progress && GET_CODE (operand0) == REG
1485       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1486     operand0 = reg_equiv_mem[REGNO (operand0)];
1487   else if (scratch_reg
1488 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1489 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1490 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1491     {
1492      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1493 	the code which tracks sets/uses for delete_output_reload.  */
1494       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1495 				 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1496 				 SUBREG_BYTE (operand0));
1497       operand0 = alter_subreg (&temp);
1498     }
1499 
1500   if (scratch_reg
1501       && reload_in_progress && GET_CODE (operand1) == REG
1502       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1503     operand1 = reg_equiv_mem[REGNO (operand1)];
1504   else if (scratch_reg
1505 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1506 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1507 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1508     {
1509      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1510 	the code which tracks sets/uses for delete_output_reload.  */
1511       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1512 				 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1513 				 SUBREG_BYTE (operand1));
1514       operand1 = alter_subreg (&temp);
1515     }
1516 
1517   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1518       && ((tem = find_replacement (&XEXP (operand0, 0)))
1519 	  != XEXP (operand0, 0)))
1520     operand0 = replace_equiv_address (operand0, tem);
1521 
1522   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1523       && ((tem = find_replacement (&XEXP (operand1, 0)))
1524 	  != XEXP (operand1, 0)))
1525     operand1 = replace_equiv_address (operand1, tem);
1526 
1527   /* Handle secondary reloads for loads/stores of FP registers from
1528      REG+D addresses where D does not fit in 5 or 14 bits, including
1529      (subreg (mem (addr))) cases.  */
1530   if (scratch_reg
1531       && fp_reg_operand (operand0, mode)
1532       && ((GET_CODE (operand1) == MEM
1533 	   && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1534 				 XEXP (operand1, 0)))
1535 	  || ((GET_CODE (operand1) == SUBREG
1536 	       && GET_CODE (XEXP (operand1, 0)) == MEM
1537 	       && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1538 				      ? SFmode : DFmode),
1539 				     XEXP (XEXP (operand1, 0), 0))))))
1540     {
1541       if (GET_CODE (operand1) == SUBREG)
1542 	operand1 = XEXP (operand1, 0);
1543 
1544       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1545 	 it in WORD_MODE regardless of what mode it was originally given
1546 	 to us.  */
1547       scratch_reg = force_mode (word_mode, scratch_reg);
1548 
1549       /* D might not fit in 14 bits either; for such cases load D into
1550 	 scratch reg.  */
1551       if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1552 	{
1553 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1554 	  emit_move_insn (scratch_reg,
1555 			  gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1556 					  Pmode,
1557 					  XEXP (XEXP (operand1, 0), 0),
1558 					  scratch_reg));
1559 	}
1560       else
1561 	emit_move_insn (scratch_reg, XEXP (operand1, 0));
1562       emit_insn (gen_rtx_SET (VOIDmode, operand0,
1563 			      replace_equiv_address (operand1, scratch_reg)));
1564       return 1;
1565     }
1566   else if (scratch_reg
1567 	   && fp_reg_operand (operand1, mode)
1568 	   && ((GET_CODE (operand0) == MEM
1569 		&& !memory_address_p ((GET_MODE_SIZE (mode) == 4
1570 					? SFmode : DFmode),
1571 				       XEXP (operand0, 0)))
1572 	       || ((GET_CODE (operand0) == SUBREG)
1573 		   && GET_CODE (XEXP (operand0, 0)) == MEM
1574 		   && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1575 					  ? SFmode : DFmode),
1576 			   		 XEXP (XEXP (operand0, 0), 0)))))
1577     {
1578       if (GET_CODE (operand0) == SUBREG)
1579 	operand0 = XEXP (operand0, 0);
1580 
1581       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1582 	 it in WORD_MODE regardless of what mode it was originally given
1583 	 to us.  */
1584       scratch_reg = force_mode (word_mode, scratch_reg);
1585 
1586       /* D might not fit in 14 bits either; for such cases load D into
1587 	 scratch reg.  */
1588       if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1589 	{
1590 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1591 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1592 								        0)),
1593 						       Pmode,
1594 						       XEXP (XEXP (operand0, 0),
1595 								   0),
1596 						       scratch_reg));
1597 	}
1598       else
1599 	emit_move_insn (scratch_reg, XEXP (operand0, 0));
1600       emit_insn (gen_rtx_SET (VOIDmode,
1601 			      replace_equiv_address (operand0, scratch_reg),
1602 			      operand1));
1603       return 1;
1604     }
1605   /* Handle secondary reloads for loads of FP registers from constant
1606      expressions by forcing the constant into memory.
1607 
1608      Use scratch_reg to hold the address of the memory location.
1609 
1610      The proper fix is to change PREFERRED_RELOAD_CLASS to return
1611      NO_REGS when presented with a const_int and a register class
1612      containing only FP registers.  Doing so unfortunately creates
1613      more problems than it solves.   Fix this for 2.5.  */
1614   else if (scratch_reg
1615 	   && CONSTANT_P (operand1)
1616 	   && fp_reg_operand (operand0, mode))
1617     {
1618       rtx const_mem, xoperands[2];
1619 
1620       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1621 	 it in WORD_MODE regardless of what mode it was originally given
1622 	 to us.  */
1623       scratch_reg = force_mode (word_mode, scratch_reg);
1624 
1625       /* Force the constant into memory and put the address of the
1626 	 memory location into scratch_reg.  */
1627       const_mem = force_const_mem (mode, operand1);
1628       xoperands[0] = scratch_reg;
1629       xoperands[1] = XEXP (const_mem, 0);
1630       emit_move_sequence (xoperands, Pmode, 0);
1631 
1632       /* Now load the destination register.  */
1633       emit_insn (gen_rtx_SET (mode, operand0,
1634 			      replace_equiv_address (const_mem, scratch_reg)));
1635       return 1;
1636     }
1637   /* Handle secondary reloads for SAR.  These occur when trying to load
1638      the SAR from memory or a constant.  */
1639   else if (scratch_reg
1640 	   && GET_CODE (operand0) == REG
1641 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1642 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1643 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1644     {
1645       /* D might not fit in 14 bits either; for such cases load D into
1646 	 scratch reg.  */
1647       if (GET_CODE (operand1) == MEM
1648 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1649 	{
1650 	  /* We are reloading the address into the scratch register, so we
1651 	     want to make sure the scratch register is a full register.  */
1652 	  scratch_reg = force_mode (word_mode, scratch_reg);
1653 
1654 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1655 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1656 								        0)),
1657 						       Pmode,
1658 						       XEXP (XEXP (operand1, 0),
1659 						       0),
1660 						       scratch_reg));
1661 
1662 	  /* Now we are going to load the scratch register from memory,
1663 	     we want to load it in the same width as the original MEM,
1664 	     which must be the same as the width of the ultimate destination,
1665 	     OPERAND0.  */
1666 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1667 
1668 	  emit_move_insn (scratch_reg,
1669 			  replace_equiv_address (operand1, scratch_reg));
1670 	}
1671       else
1672 	{
1673 	  /* We want to load the scratch register using the same mode as
1674 	     the ultimate destination.  */
1675 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1676 
1677 	  emit_move_insn (scratch_reg, operand1);
1678 	}
1679 
1680       /* And emit the insn to set the ultimate destination.  We know that
1681 	 the scratch register has the same mode as the destination at this
1682 	 point.  */
1683       emit_move_insn (operand0, scratch_reg);
1684       return 1;
1685     }
1686   /* Handle the most common case: storing into a register.  */
1687   else if (register_operand (operand0, mode))
1688     {
1689       /* Legitimize TLS symbol references.  This happens for references
1690 	 that aren't a legitimate constant.  */
1691       if (PA_SYMBOL_REF_TLS_P (operand1))
1692 	operand1 = legitimize_tls_address (operand1);
1693 
1694       if (register_operand (operand1, mode)
1695 	  || (GET_CODE (operand1) == CONST_INT
1696 	      && cint_ok_for_move (INTVAL (operand1)))
1697 	  || (operand1 == CONST0_RTX (mode))
1698 	  || (GET_CODE (operand1) == HIGH
1699 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1700 	  /* Only `general_operands' can come here, so MEM is ok.  */
1701 	  || GET_CODE (operand1) == MEM)
1702 	{
1703 	  /* Various sets are created during RTL generation which don't
1704 	     have the REG_POINTER flag correctly set.  After the CSE pass,
1705 	     instruction recognition can fail if we don't consistently
1706 	     set this flag when performing register copies.  This should
1707 	     also improve the opportunities for creating insns that use
1708 	     unscaled indexing.  */
1709 	  if (REG_P (operand0) && REG_P (operand1))
1710 	    {
1711 	      if (REG_POINTER (operand1)
1712 		  && !REG_POINTER (operand0)
1713 		  && !HARD_REGISTER_P (operand0))
1714 		copy_reg_pointer (operand0, operand1);
1715 	    }
1716 
1717 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
1718 	     get set.  In some cases, we can set the REG_POINTER flag
1719 	     from the declaration for the MEM.  */
1720 	  if (REG_P (operand0)
1721 	      && GET_CODE (operand1) == MEM
1722 	      && !REG_POINTER (operand0))
1723 	    {
1724 	      tree decl = MEM_EXPR (operand1);
1725 
1726 	      /* Set the register pointer flag and register alignment
1727 		 if the declaration for this memory reference is a
1728 		 pointer type.  Fortran indirect argument references
1729 		 are ignored.  */
1730 	      if (decl
1731 		  && !(flag_argument_noalias > 1
1732 		       && TREE_CODE (decl) == INDIRECT_REF
1733 		       && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1734 		{
1735 		  tree type;
1736 
1737 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1738 		     tree operand 1.  */
1739 		  if (TREE_CODE (decl) == COMPONENT_REF)
1740 		    decl = TREE_OPERAND (decl, 1);
1741 
1742 		  type = TREE_TYPE (decl);
1743 		  type = strip_array_types (type);
1744 
1745 		  if (POINTER_TYPE_P (type))
1746 		    {
1747 		      int align;
1748 
1749 		      type = TREE_TYPE (type);
1750 		      /* Using TYPE_ALIGN_OK is rather conservative as
1751 			 only the ada frontend actually sets it.  */
1752 		      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1753 			       : BITS_PER_UNIT);
1754 		      mark_reg_pointer (operand0, align);
1755 		    }
1756 		}
1757 	    }
1758 
1759 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1760 	  return 1;
1761 	}
1762     }
1763   else if (GET_CODE (operand0) == MEM)
1764     {
1765       if (mode == DFmode && operand1 == CONST0_RTX (mode)
1766 	  && !(reload_in_progress || reload_completed))
1767 	{
1768 	  rtx temp = gen_reg_rtx (DFmode);
1769 
1770 	  emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1771 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1772 	  return 1;
1773 	}
1774       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1775 	{
1776 	  /* Run this case quickly.  */
1777 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1778 	  return 1;
1779 	}
1780       if (! (reload_in_progress || reload_completed))
1781 	{
1782 	  operands[0] = validize_mem (operand0);
1783 	  operands[1] = operand1 = force_reg (mode, operand1);
1784 	}
1785     }
1786 
1787   /* Simplify the source if we need to.
1788      Note we do have to handle function labels here, even though we do
1789      not consider them legitimate constants.  Loop optimizations can
1790      call the emit_move_xxx with one as a source.  */
1791   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1792       || function_label_operand (operand1, mode)
1793       || (GET_CODE (operand1) == HIGH
1794 	  && symbolic_operand (XEXP (operand1, 0), mode)))
1795     {
1796       int ishighonly = 0;
1797 
1798       if (GET_CODE (operand1) == HIGH)
1799 	{
1800 	  ishighonly = 1;
1801 	  operand1 = XEXP (operand1, 0);
1802 	}
1803       if (symbolic_operand (operand1, mode))
1804 	{
1805 	  /* Argh.  The assembler and linker can't handle arithmetic
1806 	     involving plabels.
1807 
1808 	     So we force the plabel into memory, load operand0 from
1809 	     the memory location, then add in the constant part.  */
1810 	  if ((GET_CODE (operand1) == CONST
1811 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
1812 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1813 	      || function_label_operand (operand1, mode))
1814 	    {
1815 	      rtx temp, const_part;
1816 
1817 	      /* Figure out what (if any) scratch register to use.  */
1818 	      if (reload_in_progress || reload_completed)
1819 		{
1820 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
1821 		  /* SCRATCH_REG will hold an address and maybe the actual
1822 		     data.  We want it in WORD_MODE regardless of what mode it
1823 		     was originally given to us.  */
1824 		  scratch_reg = force_mode (word_mode, scratch_reg);
1825 		}
1826 	      else if (flag_pic)
1827 		scratch_reg = gen_reg_rtx (Pmode);
1828 
1829 	      if (GET_CODE (operand1) == CONST)
1830 		{
1831 		  /* Save away the constant part of the expression.  */
1832 		  const_part = XEXP (XEXP (operand1, 0), 1);
1833 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
1834 
1835 		  /* Force the function label into memory.  */
1836 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1837 		}
1838 	      else
1839 		{
1840 		  /* No constant part.  */
1841 		  const_part = NULL_RTX;
1842 
1843 		  /* Force the function label into memory.  */
1844 		  temp = force_const_mem (mode, operand1);
1845 		}
1846 
1847 
1848 	      /* Get the address of the memory location.  PIC-ify it if
1849 		 necessary.  */
1850 	      temp = XEXP (temp, 0);
1851 	      if (flag_pic)
1852 		temp = legitimize_pic_address (temp, mode, scratch_reg);
1853 
1854 	      /* Put the address of the memory location into our destination
1855 		 register.  */
1856 	      operands[1] = temp;
1857 	      emit_move_sequence (operands, mode, scratch_reg);
1858 
1859 	      /* Now load from the memory location into our destination
1860 		 register.  */
1861 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1862 	      emit_move_sequence (operands, mode, scratch_reg);
1863 
1864 	      /* And add back in the constant part.  */
1865 	      if (const_part != NULL_RTX)
1866 		expand_inc (operand0, const_part);
1867 
1868 	      return 1;
1869 	    }
1870 
1871 	  if (flag_pic)
1872 	    {
1873 	      rtx temp;
1874 
1875 	      if (reload_in_progress || reload_completed)
1876 		{
1877 		  temp = scratch_reg ? scratch_reg : operand0;
1878 		  /* TEMP will hold an address and maybe the actual
1879 		     data.  We want it in WORD_MODE regardless of what mode it
1880 		     was originally given to us.  */
1881 		  temp = force_mode (word_mode, temp);
1882 		}
1883 	      else
1884 		temp = gen_reg_rtx (Pmode);
1885 
1886 	      /* (const (plus (symbol) (const_int))) must be forced to
1887 		 memory during/after reload if the const_int will not fit
1888 		 in 14 bits.  */
1889 	      if (GET_CODE (operand1) == CONST
1890 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
1891 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1892 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1893 		       && (reload_completed || reload_in_progress)
1894 		       && flag_pic)
1895 		{
1896 		  rtx const_mem = force_const_mem (mode, operand1);
1897 		  operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1898 							mode, temp);
1899 		  operands[1] = replace_equiv_address (const_mem, operands[1]);
1900 		  emit_move_sequence (operands, mode, temp);
1901 		}
1902 	      else
1903 		{
1904 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
1905 		  if (REG_P (operand0) && REG_P (operands[1]))
1906 		    copy_reg_pointer (operand0, operands[1]);
1907 		  emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1908 		}
1909 	    }
1910 	  /* On the HPPA, references to data space are supposed to use dp,
1911 	     register 27, but showing it in the RTL inhibits various cse
1912 	     and loop optimizations.  */
1913 	  else
1914 	    {
1915 	      rtx temp, set;
1916 
1917 	      if (reload_in_progress || reload_completed)
1918 		{
1919 		  temp = scratch_reg ? scratch_reg : operand0;
1920 		  /* TEMP will hold an address and maybe the actual
1921 		     data.  We want it in WORD_MODE regardless of what mode it
1922 		     was originally given to us.  */
1923 		  temp = force_mode (word_mode, temp);
1924 		}
1925 	      else
1926 		temp = gen_reg_rtx (mode);
1927 
1928 	      /* Loading a SYMBOL_REF into a register makes that register
1929 		 safe to be used as the base in an indexed address.
1930 
1931 		 Don't mark hard registers though.  That loses.  */
1932 	      if (GET_CODE (operand0) == REG
1933 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1934 		mark_reg_pointer (operand0, BITS_PER_UNIT);
1935 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1936 		mark_reg_pointer (temp, BITS_PER_UNIT);
1937 
1938 	      if (ishighonly)
1939 		set = gen_rtx_SET (mode, operand0, temp);
1940 	      else
1941 		set = gen_rtx_SET (VOIDmode,
1942 				   operand0,
1943 				   gen_rtx_LO_SUM (mode, temp, operand1));
1944 
1945 	      emit_insn (gen_rtx_SET (VOIDmode,
1946 				      temp,
1947 				      gen_rtx_HIGH (mode, operand1)));
1948 	      emit_insn (set);
1949 
1950 	    }
1951 	  return 1;
1952 	}
1953       else if (pa_tls_referenced_p (operand1))
1954 	{
1955 	  rtx tmp = operand1;
1956 	  rtx addend = NULL;
1957 
1958 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1959 	    {
1960 	      addend = XEXP (XEXP (tmp, 0), 1);
1961 	      tmp = XEXP (XEXP (tmp, 0), 0);
1962 	    }
1963 
1964 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1965 	  tmp = legitimize_tls_address (tmp);
1966 	  if (addend)
1967 	    {
1968 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
1969 	      tmp = force_operand (tmp, operands[0]);
1970 	    }
1971 	  operands[1] = tmp;
1972 	}
1973       else if (GET_CODE (operand1) != CONST_INT
1974 	       || !cint_ok_for_move (INTVAL (operand1)))
1975 	{
1976 	  rtx insn, temp;
1977 	  rtx op1 = operand1;
1978 	  HOST_WIDE_INT value = 0;
1979 	  HOST_WIDE_INT insv = 0;
1980 	  int insert = 0;
1981 
1982 	  if (GET_CODE (operand1) == CONST_INT)
1983 	    value = INTVAL (operand1);
1984 
1985 	  if (TARGET_64BIT
1986 	      && GET_CODE (operand1) == CONST_INT
1987 	      && HOST_BITS_PER_WIDE_INT > 32
1988 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1989 	    {
1990 	      HOST_WIDE_INT nval;
1991 
1992 	      /* Extract the low order 32 bits of the value and sign extend.
1993 		 If the new value is the same as the original value, we can
1994 		 can use the original value as-is.  If the new value is
1995 		 different, we use it and insert the most-significant 32-bits
1996 		 of the original value into the final result.  */
1997 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1998 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1999 	      if (value != nval)
2000 		{
2001 #if HOST_BITS_PER_WIDE_INT > 32
2002 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2003 #endif
2004 		  insert = 1;
2005 		  value = nval;
2006 		  operand1 = GEN_INT (nval);
2007 		}
2008 	    }
2009 
2010 	  if (reload_in_progress || reload_completed)
2011 	    temp = scratch_reg ? scratch_reg : operand0;
2012 	  else
2013 	    temp = gen_reg_rtx (mode);
2014 
2015 	  /* We don't directly split DImode constants on 32-bit targets
2016 	     because PLUS uses an 11-bit immediate and the insn sequence
2017 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2018 	  if (GET_CODE (operand1) == CONST_INT
2019 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2020 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2021 	      && !insert)
2022 	    {
2023 	      /* Directly break constant into high and low parts.  This
2024 		 provides better optimization opportunities because various
2025 		 passes recognize constants split with PLUS but not LO_SUM.
2026 		 We use a 14-bit signed low part except when the addition
2027 		 of 0x4000 to the high part might change the sign of the
2028 		 high part.  */
2029 	      HOST_WIDE_INT low = value & 0x3fff;
2030 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2031 
2032 	      if (low >= 0x2000)
2033 		{
2034 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2035 		    high += 0x2000;
2036 		  else
2037 		    high += 0x4000;
2038 		}
2039 
2040 	      low = value - high;
2041 
2042 	      emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2043 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2044 	    }
2045 	  else
2046 	    {
2047 	      emit_insn (gen_rtx_SET (VOIDmode, temp,
2048 				      gen_rtx_HIGH (mode, operand1)));
2049 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2050 	    }
2051 
2052 	  insn = emit_move_insn (operands[0], operands[1]);
2053 
2054 	  /* Now insert the most significant 32 bits of the value
2055 	     into the register.  When we don't have a second register
2056 	     available, it could take up to nine instructions to load
2057 	     a 64-bit integer constant.  Prior to reload, we force
2058 	     constants that would take more than three instructions
2059 	     to load to the constant pool.  During and after reload,
2060 	     we have to handle all possible values.  */
2061 	  if (insert)
2062 	    {
2063 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2064 		 register and the value to be inserted is outside the
2065 		 range that can be loaded with three depdi instructions.  */
2066 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2067 		{
2068 		  operand1 = GEN_INT (insv);
2069 
2070 		  emit_insn (gen_rtx_SET (VOIDmode, temp,
2071 					  gen_rtx_HIGH (mode, operand1)));
2072 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2073 		  emit_insn (gen_insv (operand0, GEN_INT (32),
2074 				       const0_rtx, temp));
2075 		}
2076 	      else
2077 		{
2078 		  int len = 5, pos = 27;
2079 
2080 		  /* Insert the bits using the depdi instruction.  */
2081 		  while (pos >= 0)
2082 		    {
2083 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2084 		      HOST_WIDE_INT sign = v5 < 0;
2085 
2086 		      /* Left extend the insertion.  */
2087 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2088 		      while (pos > 0 && (insv & 1) == sign)
2089 			{
2090 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2091 			  len += 1;
2092 			  pos -= 1;
2093 			}
2094 
2095 		      emit_insn (gen_insv (operand0, GEN_INT (len),
2096 					   GEN_INT (pos), GEN_INT (v5)));
2097 
2098 		      len = pos > 0 && pos < 5 ? pos : 5;
2099 		      pos -= len;
2100 		    }
2101 		}
2102 	    }
2103 
2104 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2105 
2106 	  return 1;
2107 	}
2108     }
2109   /* Now have insn-emit do whatever it normally does.  */
2110   return 0;
2111 }
2112 
2113 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2114    it will need a link/runtime reloc).  */
2115 
2116 int
2117 reloc_needed (tree exp)
2118 {
2119   int reloc = 0;
2120 
2121   switch (TREE_CODE (exp))
2122     {
2123     case ADDR_EXPR:
2124       return 1;
2125 
2126     case POINTER_PLUS_EXPR:
2127     case PLUS_EXPR:
2128     case MINUS_EXPR:
2129       reloc = reloc_needed (TREE_OPERAND (exp, 0));
2130       reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2131       break;
2132 
2133     CASE_CONVERT:
2134     case NON_LVALUE_EXPR:
2135       reloc = reloc_needed (TREE_OPERAND (exp, 0));
2136       break;
2137 
2138     case CONSTRUCTOR:
2139       {
2140 	tree value;
2141 	unsigned HOST_WIDE_INT ix;
2142 
2143 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2144 	  if (value)
2145 	    reloc |= reloc_needed (value);
2146       }
2147       break;
2148 
2149     case ERROR_MARK:
2150       break;
2151 
2152     default:
2153       break;
2154     }
2155   return reloc;
2156 }
2157 
2158 /* Does operand (which is a symbolic_operand) live in text space?
2159    If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2160    will be true.  */
2161 
2162 int
2163 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2164 {
2165   if (GET_CODE (operand) == CONST)
2166     operand = XEXP (XEXP (operand, 0), 0);
2167   if (flag_pic)
2168     {
2169       if (GET_CODE (operand) == SYMBOL_REF)
2170 	return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2171     }
2172   else
2173     {
2174       if (GET_CODE (operand) == SYMBOL_REF)
2175 	return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2176     }
2177   return 1;
2178 }
2179 
2180 
2181 /* Return the best assembler insn template
2182    for moving operands[1] into operands[0] as a fullword.  */
2183 const char *
2184 singlemove_string (rtx *operands)
2185 {
2186   HOST_WIDE_INT intval;
2187 
2188   if (GET_CODE (operands[0]) == MEM)
2189     return "stw %r1,%0";
2190   if (GET_CODE (operands[1]) == MEM)
2191     return "ldw %1,%0";
2192   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2193     {
2194       long i;
2195       REAL_VALUE_TYPE d;
2196 
2197       gcc_assert (GET_MODE (operands[1]) == SFmode);
2198 
2199       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2200 	 bit pattern.  */
2201       REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2202       REAL_VALUE_TO_TARGET_SINGLE (d, i);
2203 
2204       operands[1] = GEN_INT (i);
2205       /* Fall through to CONST_INT case.  */
2206     }
2207   if (GET_CODE (operands[1]) == CONST_INT)
2208     {
2209       intval = INTVAL (operands[1]);
2210 
2211       if (VAL_14_BITS_P (intval))
2212 	return "ldi %1,%0";
2213       else if ((intval & 0x7ff) == 0)
2214 	return "ldil L'%1,%0";
2215       else if (zdepi_cint_p (intval))
2216 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2217       else
2218 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2219     }
2220   return "copy %1,%0";
2221 }
2222 
2223 
2224 /* Compute position (in OP[1]) and width (in OP[2])
2225    useful for copying IMM to a register using the zdepi
2226    instructions.  Store the immediate value to insert in OP[0].  */
2227 static void
2228 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2229 {
2230   int lsb, len;
2231 
2232   /* Find the least significant set bit in IMM.  */
2233   for (lsb = 0; lsb < 32; lsb++)
2234     {
2235       if ((imm & 1) != 0)
2236         break;
2237       imm >>= 1;
2238     }
2239 
2240   /* Choose variants based on *sign* of the 5-bit field.  */
2241   if ((imm & 0x10) == 0)
2242     len = (lsb <= 28) ? 4 : 32 - lsb;
2243   else
2244     {
2245       /* Find the width of the bitstring in IMM.  */
2246       for (len = 5; len < 32 - lsb; len++)
2247 	{
2248 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2249 	    break;
2250 	}
2251 
2252       /* Sign extend IMM as a 5-bit value.  */
2253       imm = (imm & 0xf) - 0x10;
2254     }
2255 
2256   op[0] = imm;
2257   op[1] = 31 - lsb;
2258   op[2] = len;
2259 }
2260 
2261 /* Compute position (in OP[1]) and width (in OP[2])
2262    useful for copying IMM to a register using the depdi,z
2263    instructions.  Store the immediate value to insert in OP[0].  */
2264 void
2265 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2266 {
2267   int lsb, len, maxlen;
2268 
2269   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2270 
2271   /* Find the least significant set bit in IMM.  */
2272   for (lsb = 0; lsb < maxlen; lsb++)
2273     {
2274       if ((imm & 1) != 0)
2275         break;
2276       imm >>= 1;
2277     }
2278 
2279   /* Choose variants based on *sign* of the 5-bit field.  */
2280   if ((imm & 0x10) == 0)
2281     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2282   else
2283     {
2284       /* Find the width of the bitstring in IMM.  */
2285       for (len = 5; len < maxlen - lsb; len++)
2286 	{
2287 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2288 	    break;
2289 	}
2290 
2291       /* Extend length if host is narrow and IMM is negative.  */
2292       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2293 	len += 32;
2294 
2295       /* Sign extend IMM as a 5-bit value.  */
2296       imm = (imm & 0xf) - 0x10;
2297     }
2298 
2299   op[0] = imm;
2300   op[1] = 63 - lsb;
2301   op[2] = len;
2302 }
2303 
2304 /* Output assembler code to perform a doubleword move insn
2305    with operands OPERANDS.  */
2306 
2307 const char *
2308 output_move_double (rtx *operands)
2309 {
2310   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2311   rtx latehalf[2];
2312   rtx addreg0 = 0, addreg1 = 0;
2313 
2314   /* First classify both operands.  */
2315 
2316   if (REG_P (operands[0]))
2317     optype0 = REGOP;
2318   else if (offsettable_memref_p (operands[0]))
2319     optype0 = OFFSOP;
2320   else if (GET_CODE (operands[0]) == MEM)
2321     optype0 = MEMOP;
2322   else
2323     optype0 = RNDOP;
2324 
2325   if (REG_P (operands[1]))
2326     optype1 = REGOP;
2327   else if (CONSTANT_P (operands[1]))
2328     optype1 = CNSTOP;
2329   else if (offsettable_memref_p (operands[1]))
2330     optype1 = OFFSOP;
2331   else if (GET_CODE (operands[1]) == MEM)
2332     optype1 = MEMOP;
2333   else
2334     optype1 = RNDOP;
2335 
2336   /* Check for the cases that the operand constraints are not
2337      supposed to allow to happen.  */
2338   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2339 
2340   /* Handle copies between general and floating registers.  */
2341 
2342   if (optype0 == REGOP && optype1 == REGOP
2343       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2344     {
2345       if (FP_REG_P (operands[0]))
2346 	{
2347 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2348 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2349 	  return "{fldds|fldd} -16(%%sp),%0";
2350 	}
2351       else
2352 	{
2353 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2354 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2355 	  return "{ldws|ldw} -12(%%sp),%R0";
2356 	}
2357     }
2358 
2359    /* Handle auto decrementing and incrementing loads and stores
2360      specifically, since the structure of the function doesn't work
2361      for them without major modification.  Do it better when we learn
2362      this port about the general inc/dec addressing of PA.
2363      (This was written by tege.  Chide him if it doesn't work.)  */
2364 
2365   if (optype0 == MEMOP)
2366     {
2367       /* We have to output the address syntax ourselves, since print_operand
2368 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2369 
2370       rtx addr = XEXP (operands[0], 0);
2371       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2372 	{
2373 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2374 
2375 	  operands[0] = XEXP (addr, 0);
2376 	  gcc_assert (GET_CODE (operands[1]) == REG
2377 		      && GET_CODE (operands[0]) == REG);
2378 
2379 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2380 
2381 	  /* No overlap between high target register and address
2382 	     register.  (We do this in a non-obvious way to
2383 	     save a register file writeback)  */
2384 	  if (GET_CODE (addr) == POST_INC)
2385 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2386 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2387 	}
2388       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2389 	{
2390 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2391 
2392 	  operands[0] = XEXP (addr, 0);
2393 	  gcc_assert (GET_CODE (operands[1]) == REG
2394 		      && GET_CODE (operands[0]) == REG);
2395 
2396 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2397 	  /* No overlap between high target register and address
2398 	     register.  (We do this in a non-obvious way to save a
2399 	     register file writeback)  */
2400 	  if (GET_CODE (addr) == PRE_INC)
2401 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2402 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2403 	}
2404     }
2405   if (optype1 == MEMOP)
2406     {
2407       /* We have to output the address syntax ourselves, since print_operand
2408 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2409 
2410       rtx addr = XEXP (operands[1], 0);
2411       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2412 	{
2413 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2414 
2415 	  operands[1] = XEXP (addr, 0);
2416 	  gcc_assert (GET_CODE (operands[0]) == REG
2417 		      && GET_CODE (operands[1]) == REG);
2418 
2419 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2420 	    {
2421 	      /* No overlap between high target register and address
2422 		 register.  (We do this in a non-obvious way to
2423 		 save a register file writeback)  */
2424 	      if (GET_CODE (addr) == POST_INC)
2425 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2426 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2427 	    }
2428 	  else
2429 	    {
2430 	      /* This is an undefined situation.  We should load into the
2431 		 address register *and* update that register.  Probably
2432 		 we don't need to handle this at all.  */
2433 	      if (GET_CODE (addr) == POST_INC)
2434 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2435 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2436 	    }
2437 	}
2438       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2439 	{
2440 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2441 
2442 	  operands[1] = XEXP (addr, 0);
2443 	  gcc_assert (GET_CODE (operands[0]) == REG
2444 		      && GET_CODE (operands[1]) == REG);
2445 
2446 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2447 	    {
2448 	      /* No overlap between high target register and address
2449 		 register.  (We do this in a non-obvious way to
2450 		 save a register file writeback)  */
2451 	      if (GET_CODE (addr) == PRE_INC)
2452 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2453 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2454 	    }
2455 	  else
2456 	    {
2457 	      /* This is an undefined situation.  We should load into the
2458 		 address register *and* update that register.  Probably
2459 		 we don't need to handle this at all.  */
2460 	      if (GET_CODE (addr) == PRE_INC)
2461 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2462 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2463 	    }
2464 	}
2465       else if (GET_CODE (addr) == PLUS
2466 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2467 	{
2468 	  rtx xoperands[4];
2469 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2470 
2471 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2472 	    {
2473 	      xoperands[0] = high_reg;
2474 	      xoperands[1] = XEXP (addr, 1);
2475 	      xoperands[2] = XEXP (XEXP (addr, 0), 0);
2476 	      xoperands[3] = XEXP (XEXP (addr, 0), 1);
2477 	      output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2478 			       xoperands);
2479 	      return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2480 	    }
2481 	  else
2482 	    {
2483 	      xoperands[0] = high_reg;
2484 	      xoperands[1] = XEXP (addr, 1);
2485 	      xoperands[2] = XEXP (XEXP (addr, 0), 0);
2486 	      xoperands[3] = XEXP (XEXP (addr, 0), 1);
2487 	      output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2488 			       xoperands);
2489 	      return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2490 	    }
2491 	}
2492     }
2493 
2494   /* If an operand is an unoffsettable memory ref, find a register
2495      we can increment temporarily to make it refer to the second word.  */
2496 
2497   if (optype0 == MEMOP)
2498     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2499 
2500   if (optype1 == MEMOP)
2501     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2502 
2503   /* Ok, we can do one word at a time.
2504      Normally we do the low-numbered word first.
2505 
2506      In either case, set up in LATEHALF the operands to use
2507      for the high-numbered word and in some cases alter the
2508      operands in OPERANDS to be suitable for the low-numbered word.  */
2509 
2510   if (optype0 == REGOP)
2511     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2512   else if (optype0 == OFFSOP)
2513     latehalf[0] = adjust_address (operands[0], SImode, 4);
2514   else
2515     latehalf[0] = operands[0];
2516 
2517   if (optype1 == REGOP)
2518     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2519   else if (optype1 == OFFSOP)
2520     latehalf[1] = adjust_address (operands[1], SImode, 4);
2521   else if (optype1 == CNSTOP)
2522     split_double (operands[1], &operands[1], &latehalf[1]);
2523   else
2524     latehalf[1] = operands[1];
2525 
2526   /* If the first move would clobber the source of the second one,
2527      do them in the other order.
2528 
2529      This can happen in two cases:
2530 
2531 	mem -> register where the first half of the destination register
2532  	is the same register used in the memory's address.  Reload
2533 	can create such insns.
2534 
2535 	mem in this case will be either register indirect or register
2536 	indirect plus a valid offset.
2537 
2538 	register -> register move where REGNO(dst) == REGNO(src + 1)
2539 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2540 
2541      Handle mem -> register case first.  */
2542   if (optype0 == REGOP
2543       && (optype1 == MEMOP || optype1 == OFFSOP)
2544       && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2545 			    operands[1], 0))
2546     {
2547       /* Do the late half first.  */
2548       if (addreg1)
2549 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2550       output_asm_insn (singlemove_string (latehalf), latehalf);
2551 
2552       /* Then clobber.  */
2553       if (addreg1)
2554 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2555       return singlemove_string (operands);
2556     }
2557 
2558   /* Now handle register -> register case.  */
2559   if (optype0 == REGOP && optype1 == REGOP
2560       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2561     {
2562       output_asm_insn (singlemove_string (latehalf), latehalf);
2563       return singlemove_string (operands);
2564     }
2565 
2566   /* Normal case: do the two words, low-numbered first.  */
2567 
2568   output_asm_insn (singlemove_string (operands), operands);
2569 
2570   /* Make any unoffsettable addresses point at high-numbered word.  */
2571   if (addreg0)
2572     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2573   if (addreg1)
2574     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2575 
2576   /* Do that word.  */
2577   output_asm_insn (singlemove_string (latehalf), latehalf);
2578 
2579   /* Undo the adds we just did.  */
2580   if (addreg0)
2581     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2582   if (addreg1)
2583     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2584 
2585   return "";
2586 }
2587 
2588 const char *
2589 output_fp_move_double (rtx *operands)
2590 {
2591   if (FP_REG_P (operands[0]))
2592     {
2593       if (FP_REG_P (operands[1])
2594 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2595 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2596       else
2597 	output_asm_insn ("fldd%F1 %1,%0", operands);
2598     }
2599   else if (FP_REG_P (operands[1]))
2600     {
2601       output_asm_insn ("fstd%F0 %1,%0", operands);
2602     }
2603   else
2604     {
2605       rtx xoperands[2];
2606 
2607       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2608 
2609       /* This is a pain.  You have to be prepared to deal with an
2610 	 arbitrary address here including pre/post increment/decrement.
2611 
2612 	 so avoid this in the MD.  */
2613       gcc_assert (GET_CODE (operands[0]) == REG);
2614 
2615       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2616       xoperands[0] = operands[0];
2617       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2618     }
2619   return "";
2620 }
2621 
2622 /* Return a REG that occurs in ADDR with coefficient 1.
2623    ADDR can be effectively incremented by incrementing REG.  */
2624 
2625 static rtx
2626 find_addr_reg (rtx addr)
2627 {
2628   while (GET_CODE (addr) == PLUS)
2629     {
2630       if (GET_CODE (XEXP (addr, 0)) == REG)
2631 	addr = XEXP (addr, 0);
2632       else if (GET_CODE (XEXP (addr, 1)) == REG)
2633 	addr = XEXP (addr, 1);
2634       else if (CONSTANT_P (XEXP (addr, 0)))
2635 	addr = XEXP (addr, 1);
2636       else if (CONSTANT_P (XEXP (addr, 1)))
2637 	addr = XEXP (addr, 0);
2638       else
2639 	gcc_unreachable ();
2640     }
2641   gcc_assert (GET_CODE (addr) == REG);
2642   return addr;
2643 }
2644 
2645 /* Emit code to perform a block move.
2646 
2647    OPERANDS[0] is the destination pointer as a REG, clobbered.
2648    OPERANDS[1] is the source pointer as a REG, clobbered.
2649    OPERANDS[2] is a register for temporary storage.
2650    OPERANDS[3] is a register for temporary storage.
2651    OPERANDS[4] is the size as a CONST_INT
2652    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2653    OPERANDS[6] is another temporary register.  */
2654 
2655 const char *
2656 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2657 {
2658   HOST_WIDE_INT align = INTVAL (operands[5]);
2659   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[4]);
2660 
2661   /* We can't move more than a word at a time because the PA
2662      has no longer integer move insns.  (Could use fp mem ops?)  */
2663   if (align > (TARGET_64BIT ? 8 : 4))
2664     align = (TARGET_64BIT ? 8 : 4);
2665 
2666   /* Note that we know each loop below will execute at least twice
2667      (else we would have open-coded the copy).  */
2668   switch (align)
2669     {
2670       case 8:
2671 	/* Pre-adjust the loop counter.  */
2672 	operands[4] = GEN_INT (n_bytes - 16);
2673 	output_asm_insn ("ldi %4,%2", operands);
2674 
2675 	/* Copying loop.  */
2676 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2677 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2678 	output_asm_insn ("std,ma %3,8(%0)", operands);
2679 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2680 	output_asm_insn ("std,ma %6,8(%0)", operands);
2681 
2682 	/* Handle the residual.  There could be up to 7 bytes of
2683 	   residual to copy!  */
2684 	if (n_bytes % 16 != 0)
2685 	  {
2686 	    operands[4] = GEN_INT (n_bytes % 8);
2687 	    if (n_bytes % 16 >= 8)
2688 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
2689 	    if (n_bytes % 8 != 0)
2690 	      output_asm_insn ("ldd 0(%1),%6", operands);
2691 	    if (n_bytes % 16 >= 8)
2692 	      output_asm_insn ("std,ma %3,8(%0)", operands);
2693 	    if (n_bytes % 8 != 0)
2694 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
2695 	  }
2696 	return "";
2697 
2698       case 4:
2699 	/* Pre-adjust the loop counter.  */
2700 	operands[4] = GEN_INT (n_bytes - 8);
2701 	output_asm_insn ("ldi %4,%2", operands);
2702 
2703 	/* Copying loop.  */
2704 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2705 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2706 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2707 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
2708 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2709 
2710 	/* Handle the residual.  There could be up to 7 bytes of
2711 	   residual to copy!  */
2712 	if (n_bytes % 8 != 0)
2713 	  {
2714 	    operands[4] = GEN_INT (n_bytes % 4);
2715 	    if (n_bytes % 8 >= 4)
2716 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2717 	    if (n_bytes % 4 != 0)
2718 	      output_asm_insn ("ldw 0(%1),%6", operands);
2719 	    if (n_bytes % 8 >= 4)
2720 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2721 	    if (n_bytes % 4 != 0)
2722 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2723 	  }
2724 	return "";
2725 
2726       case 2:
2727 	/* Pre-adjust the loop counter.  */
2728 	operands[4] = GEN_INT (n_bytes - 4);
2729 	output_asm_insn ("ldi %4,%2", operands);
2730 
2731 	/* Copying loop.  */
2732 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2733 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2734 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2735 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
2736 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2737 
2738 	/* Handle the residual.  */
2739 	if (n_bytes % 4 != 0)
2740 	  {
2741 	    if (n_bytes % 4 >= 2)
2742 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2743 	    if (n_bytes % 2 != 0)
2744 	      output_asm_insn ("ldb 0(%1),%6", operands);
2745 	    if (n_bytes % 4 >= 2)
2746 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2747 	    if (n_bytes % 2 != 0)
2748 	      output_asm_insn ("stb %6,0(%0)", operands);
2749 	  }
2750 	return "";
2751 
2752       case 1:
2753 	/* Pre-adjust the loop counter.  */
2754 	operands[4] = GEN_INT (n_bytes - 2);
2755 	output_asm_insn ("ldi %4,%2", operands);
2756 
2757 	/* Copying loop.  */
2758 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2759 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2760 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2761 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
2762 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2763 
2764 	/* Handle the residual.  */
2765 	if (n_bytes % 2 != 0)
2766 	  {
2767 	    output_asm_insn ("ldb 0(%1),%3", operands);
2768 	    output_asm_insn ("stb %3,0(%0)", operands);
2769 	  }
2770 	return "";
2771 
2772       default:
2773 	gcc_unreachable ();
2774     }
2775 }
2776 
2777 /* Count the number of insns necessary to handle this block move.
2778 
2779    Basic structure is the same as emit_block_move, except that we
2780    count insns rather than emit them.  */
2781 
2782 static int
2783 compute_movmem_length (rtx insn)
2784 {
2785   rtx pat = PATTERN (insn);
2786   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2787   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2788   unsigned int n_insns = 0;
2789 
2790   /* We can't move more than four bytes at a time because the PA
2791      has no longer integer move insns.  (Could use fp mem ops?)  */
2792   if (align > (TARGET_64BIT ? 8 : 4))
2793     align = (TARGET_64BIT ? 8 : 4);
2794 
2795   /* The basic copying loop.  */
2796   n_insns = 6;
2797 
2798   /* Residuals.  */
2799   if (n_bytes % (2 * align) != 0)
2800     {
2801       if ((n_bytes % (2 * align)) >= align)
2802 	n_insns += 2;
2803 
2804       if ((n_bytes % align) != 0)
2805 	n_insns += 2;
2806     }
2807 
2808   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2809   return n_insns * 4;
2810 }
2811 
2812 /* Emit code to perform a block clear.
2813 
2814    OPERANDS[0] is the destination pointer as a REG, clobbered.
2815    OPERANDS[1] is a register for temporary storage.
2816    OPERANDS[2] is the size as a CONST_INT
2817    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
2818 
2819 const char *
2820 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2821 {
2822   HOST_WIDE_INT align = INTVAL (operands[3]);
2823   unsigned HOST_WIDE_INT n_bytes = INTVAL (operands[2]);
2824 
2825   /* We can't clear more than a word at a time because the PA
2826      has no longer integer move insns.  */
2827   if (align > (TARGET_64BIT ? 8 : 4))
2828     align = (TARGET_64BIT ? 8 : 4);
2829 
2830   /* Note that we know each loop below will execute at least twice
2831      (else we would have open-coded the copy).  */
2832   switch (align)
2833     {
2834       case 8:
2835 	/* Pre-adjust the loop counter.  */
2836 	operands[2] = GEN_INT (n_bytes - 16);
2837 	output_asm_insn ("ldi %2,%1", operands);
2838 
2839 	/* Loop.  */
2840 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
2841 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
2842 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
2843 
2844 	/* Handle the residual.  There could be up to 7 bytes of
2845 	   residual to copy!  */
2846 	if (n_bytes % 16 != 0)
2847 	  {
2848 	    operands[2] = GEN_INT (n_bytes % 8);
2849 	    if (n_bytes % 16 >= 8)
2850 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
2851 	    if (n_bytes % 8 != 0)
2852 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2853 	  }
2854 	return "";
2855 
2856       case 4:
2857 	/* Pre-adjust the loop counter.  */
2858 	operands[2] = GEN_INT (n_bytes - 8);
2859 	output_asm_insn ("ldi %2,%1", operands);
2860 
2861 	/* Loop.  */
2862 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2863 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
2864 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2865 
2866 	/* Handle the residual.  There could be up to 7 bytes of
2867 	   residual to copy!  */
2868 	if (n_bytes % 8 != 0)
2869 	  {
2870 	    operands[2] = GEN_INT (n_bytes % 4);
2871 	    if (n_bytes % 8 >= 4)
2872 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2873 	    if (n_bytes % 4 != 0)
2874 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2875 	  }
2876 	return "";
2877 
2878       case 2:
2879 	/* Pre-adjust the loop counter.  */
2880 	operands[2] = GEN_INT (n_bytes - 4);
2881 	output_asm_insn ("ldi %2,%1", operands);
2882 
2883 	/* Loop.  */
2884 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2885 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
2886 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2887 
2888 	/* Handle the residual.  */
2889 	if (n_bytes % 4 != 0)
2890 	  {
2891 	    if (n_bytes % 4 >= 2)
2892 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2893 	    if (n_bytes % 2 != 0)
2894 	      output_asm_insn ("stb %%r0,0(%0)", operands);
2895 	  }
2896 	return "";
2897 
2898       case 1:
2899 	/* Pre-adjust the loop counter.  */
2900 	operands[2] = GEN_INT (n_bytes - 2);
2901 	output_asm_insn ("ldi %2,%1", operands);
2902 
2903 	/* Loop.  */
2904 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2905 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
2906 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2907 
2908 	/* Handle the residual.  */
2909 	if (n_bytes % 2 != 0)
2910 	  output_asm_insn ("stb %%r0,0(%0)", operands);
2911 
2912 	return "";
2913 
2914       default:
2915 	gcc_unreachable ();
2916     }
2917 }
2918 
2919 /* Count the number of insns necessary to handle this block move.
2920 
2921    Basic structure is the same as emit_block_move, except that we
2922    count insns rather than emit them.  */
2923 
2924 static int
2925 compute_clrmem_length (rtx insn)
2926 {
2927   rtx pat = PATTERN (insn);
2928   unsigned HOST_WIDE_INT align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2929   unsigned HOST_WIDE_INT n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2930   unsigned int n_insns = 0;
2931 
2932   /* We can't clear more than a word at a time because the PA
2933      has no longer integer move insns.  */
2934   if (align > (TARGET_64BIT ? 8 : 4))
2935     align = (TARGET_64BIT ? 8 : 4);
2936 
2937   /* The basic loop.  */
2938   n_insns = 4;
2939 
2940   /* Residuals.  */
2941   if (n_bytes % (2 * align) != 0)
2942     {
2943       if ((n_bytes % (2 * align)) >= align)
2944 	n_insns++;
2945 
2946       if ((n_bytes % align) != 0)
2947 	n_insns++;
2948     }
2949 
2950   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2951   return n_insns * 4;
2952 }
2953 
2954 
2955 const char *
2956 output_and (rtx *operands)
2957 {
2958   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2959     {
2960       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2961       int ls0, ls1, ms0, p, len;
2962 
2963       for (ls0 = 0; ls0 < 32; ls0++)
2964 	if ((mask & (1 << ls0)) == 0)
2965 	  break;
2966 
2967       for (ls1 = ls0; ls1 < 32; ls1++)
2968 	if ((mask & (1 << ls1)) != 0)
2969 	  break;
2970 
2971       for (ms0 = ls1; ms0 < 32; ms0++)
2972 	if ((mask & (1 << ms0)) == 0)
2973 	  break;
2974 
2975       gcc_assert (ms0 == 32);
2976 
2977       if (ls1 == 32)
2978 	{
2979 	  len = ls0;
2980 
2981 	  gcc_assert (len);
2982 
2983 	  operands[2] = GEN_INT (len);
2984 	  return "{extru|extrw,u} %1,31,%2,%0";
2985 	}
2986       else
2987 	{
2988 	  /* We could use this `depi' for the case above as well, but `depi'
2989 	     requires one more register file access than an `extru'.  */
2990 
2991 	  p = 31 - ls0;
2992 	  len = ls1 - ls0;
2993 
2994 	  operands[2] = GEN_INT (p);
2995 	  operands[3] = GEN_INT (len);
2996 	  return "{depi|depwi} 0,%2,%3,%0";
2997 	}
2998     }
2999   else
3000     return "and %1,%2,%0";
3001 }
3002 
3003 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3004    storing the result in operands[0].  */
3005 const char *
3006 output_64bit_and (rtx *operands)
3007 {
3008   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3009     {
3010       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3011       int ls0, ls1, ms0, p, len;
3012 
3013       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3014 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3015 	  break;
3016 
3017       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3018 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3019 	  break;
3020 
3021       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3022 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3023 	  break;
3024 
3025       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3026 
3027       if (ls1 == HOST_BITS_PER_WIDE_INT)
3028 	{
3029 	  len = ls0;
3030 
3031 	  gcc_assert (len);
3032 
3033 	  operands[2] = GEN_INT (len);
3034 	  return "extrd,u %1,63,%2,%0";
3035 	}
3036       else
3037 	{
3038 	  /* We could use this `depi' for the case above as well, but `depi'
3039 	     requires one more register file access than an `extru'.  */
3040 
3041 	  p = 63 - ls0;
3042 	  len = ls1 - ls0;
3043 
3044 	  operands[2] = GEN_INT (p);
3045 	  operands[3] = GEN_INT (len);
3046 	  return "depdi 0,%2,%3,%0";
3047 	}
3048     }
3049   else
3050     return "and %1,%2,%0";
3051 }
3052 
3053 const char *
3054 output_ior (rtx *operands)
3055 {
3056   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3057   int bs0, bs1, p, len;
3058 
3059   if (INTVAL (operands[2]) == 0)
3060     return "copy %1,%0";
3061 
3062   for (bs0 = 0; bs0 < 32; bs0++)
3063     if ((mask & (1 << bs0)) != 0)
3064       break;
3065 
3066   for (bs1 = bs0; bs1 < 32; bs1++)
3067     if ((mask & (1 << bs1)) == 0)
3068       break;
3069 
3070   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3071 
3072   p = 31 - bs0;
3073   len = bs1 - bs0;
3074 
3075   operands[2] = GEN_INT (p);
3076   operands[3] = GEN_INT (len);
3077   return "{depi|depwi} -1,%2,%3,%0";
3078 }
3079 
3080 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3081    storing the result in operands[0].  */
3082 const char *
3083 output_64bit_ior (rtx *operands)
3084 {
3085   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3086   int bs0, bs1, p, len;
3087 
3088   if (INTVAL (operands[2]) == 0)
3089     return "copy %1,%0";
3090 
3091   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3092     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3093       break;
3094 
3095   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3096     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3097       break;
3098 
3099   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3100 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3101 
3102   p = 63 - bs0;
3103   len = bs1 - bs0;
3104 
3105   operands[2] = GEN_INT (p);
3106   operands[3] = GEN_INT (len);
3107   return "depdi -1,%2,%3,%0";
3108 }
3109 
3110 /* Target hook for assembling integer objects.  This code handles
3111    aligned SI and DI integers specially since function references
3112    must be preceded by P%.  */
3113 
3114 static bool
3115 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3116 {
3117   if (size == UNITS_PER_WORD
3118       && aligned_p
3119       && function_label_operand (x, VOIDmode))
3120     {
3121       fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3122       output_addr_const (asm_out_file, x);
3123       fputc ('\n', asm_out_file);
3124       return true;
3125     }
3126   return default_assemble_integer (x, size, aligned_p);
3127 }
3128 
3129 /* Output an ascii string.  */
3130 void
3131 output_ascii (FILE *file, const char *p, int size)
3132 {
3133   int i;
3134   int chars_output;
3135   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3136 
3137   /* The HP assembler can only take strings of 256 characters at one
3138      time.  This is a limitation on input line length, *not* the
3139      length of the string.  Sigh.  Even worse, it seems that the
3140      restriction is in number of input characters (see \xnn &
3141      \whatever).  So we have to do this very carefully.  */
3142 
3143   fputs ("\t.STRING \"", file);
3144 
3145   chars_output = 0;
3146   for (i = 0; i < size; i += 4)
3147     {
3148       int co = 0;
3149       int io = 0;
3150       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3151 	{
3152 	  register unsigned int c = (unsigned char) p[i + io];
3153 
3154 	  if (c == '\"' || c == '\\')
3155 	    partial_output[co++] = '\\';
3156 	  if (c >= ' ' && c < 0177)
3157 	    partial_output[co++] = c;
3158 	  else
3159 	    {
3160 	      unsigned int hexd;
3161 	      partial_output[co++] = '\\';
3162 	      partial_output[co++] = 'x';
3163 	      hexd =  c  / 16 - 0 + '0';
3164 	      if (hexd > '9')
3165 		hexd -= '9' - 'a' + 1;
3166 	      partial_output[co++] = hexd;
3167 	      hexd =  c % 16 - 0 + '0';
3168 	      if (hexd > '9')
3169 		hexd -= '9' - 'a' + 1;
3170 	      partial_output[co++] = hexd;
3171 	    }
3172 	}
3173       if (chars_output + co > 243)
3174 	{
3175 	  fputs ("\"\n\t.STRING \"", file);
3176 	  chars_output = 0;
3177 	}
3178       fwrite (partial_output, 1, (size_t) co, file);
3179       chars_output += co;
3180       co = 0;
3181     }
3182   fputs ("\"\n", file);
3183 }
3184 
3185 /* Try to rewrite floating point comparisons & branches to avoid
3186    useless add,tr insns.
3187 
3188    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3189    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3190    first attempt to remove useless add,tr insns.  It is zero
3191    for the second pass as reorg sometimes leaves bogus REG_DEAD
3192    notes lying around.
3193 
3194    When CHECK_NOTES is zero we can only eliminate add,tr insns
3195    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3196    instructions.  */
3197 static void
3198 remove_useless_addtr_insns (int check_notes)
3199 {
3200   rtx insn;
3201   static int pass = 0;
3202 
3203   /* This is fairly cheap, so always run it when optimizing.  */
3204   if (optimize > 0)
3205     {
3206       int fcmp_count = 0;
3207       int fbranch_count = 0;
3208 
3209       /* Walk all the insns in this function looking for fcmp & fbranch
3210 	 instructions.  Keep track of how many of each we find.  */
3211       for (insn = get_insns (); insn; insn = next_insn (insn))
3212 	{
3213 	  rtx tmp;
3214 
3215 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3216 	  if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3217 	    continue;
3218 
3219 	  tmp = PATTERN (insn);
3220 
3221 	  /* It must be a set.  */
3222 	  if (GET_CODE (tmp) != SET)
3223 	    continue;
3224 
3225 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3226 	  tmp = SET_DEST (tmp);
3227 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3228 	    {
3229 	      fcmp_count++;
3230 	      continue;
3231 	    }
3232 
3233 	  tmp = PATTERN (insn);
3234 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3235 	  if (GET_CODE (tmp) == SET
3236 	      && SET_DEST (tmp) == pc_rtx
3237 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3238 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3239 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3240 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3241 	    {
3242 	      fbranch_count++;
3243 	      continue;
3244 	    }
3245 	}
3246 
3247 
3248       /* Find all floating point compare + branch insns.  If possible,
3249 	 reverse the comparison & the branch to avoid add,tr insns.  */
3250       for (insn = get_insns (); insn; insn = next_insn (insn))
3251 	{
3252 	  rtx tmp, next;
3253 
3254 	  /* Ignore anything that isn't an INSN.  */
3255 	  if (GET_CODE (insn) != INSN)
3256 	    continue;
3257 
3258 	  tmp = PATTERN (insn);
3259 
3260 	  /* It must be a set.  */
3261 	  if (GET_CODE (tmp) != SET)
3262 	    continue;
3263 
3264 	  /* The destination must be CCFP, which is register zero.  */
3265 	  tmp = SET_DEST (tmp);
3266 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3267 	    continue;
3268 
3269 	  /* INSN should be a set of CCFP.
3270 
3271 	     See if the result of this insn is used in a reversed FP
3272 	     conditional branch.  If so, reverse our condition and
3273 	     the branch.  Doing so avoids useless add,tr insns.  */
3274 	  next = next_insn (insn);
3275 	  while (next)
3276 	    {
3277 	      /* Jumps, calls and labels stop our search.  */
3278 	      if (GET_CODE (next) == JUMP_INSN
3279 		  || GET_CODE (next) == CALL_INSN
3280 		  || GET_CODE (next) == CODE_LABEL)
3281 		break;
3282 
3283 	      /* As does another fcmp insn.  */
3284 	      if (GET_CODE (next) == INSN
3285 		  && GET_CODE (PATTERN (next)) == SET
3286 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3287 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3288 		break;
3289 
3290 	      next = next_insn (next);
3291 	    }
3292 
3293 	  /* Is NEXT_INSN a branch?  */
3294 	  if (next
3295 	      && GET_CODE (next) == JUMP_INSN)
3296 	    {
3297 	      rtx pattern = PATTERN (next);
3298 
3299 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3300 		 and CCFP dies, then reverse our conditional and the branch
3301 		 to avoid the add,tr.  */
3302 	      if (GET_CODE (pattern) == SET
3303 		  && SET_DEST (pattern) == pc_rtx
3304 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3305 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3306 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3307 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3308 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3309 		  && (fcmp_count == fbranch_count
3310 		      || (check_notes
3311 			  && find_regno_note (next, REG_DEAD, 0))))
3312 		{
3313 		  /* Reverse the branch.  */
3314 		  tmp = XEXP (SET_SRC (pattern), 1);
3315 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3316 		  XEXP (SET_SRC (pattern), 2) = tmp;
3317 		  INSN_CODE (next) = -1;
3318 
3319 		  /* Reverse our condition.  */
3320 		  tmp = PATTERN (insn);
3321 		  PUT_CODE (XEXP (tmp, 1),
3322 			    (reverse_condition_maybe_unordered
3323 			     (GET_CODE (XEXP (tmp, 1)))));
3324 		}
3325 	    }
3326 	}
3327     }
3328 
3329   pass = !pass;
3330 
3331 }
3332 
3333 /* You may have trouble believing this, but this is the 32 bit HP-PA
3334    stack layout.  Wow.
3335 
3336    Offset		Contents
3337 
3338    Variable arguments	(optional; any number may be allocated)
3339 
3340    SP-(4*(N+9))		arg word N
3341    	:		    :
3342       SP-56		arg word 5
3343       SP-52		arg word 4
3344 
3345    Fixed arguments	(must be allocated; may remain unused)
3346 
3347       SP-48		arg word 3
3348       SP-44		arg word 2
3349       SP-40		arg word 1
3350       SP-36		arg word 0
3351 
3352    Frame Marker
3353 
3354       SP-32		External Data Pointer (DP)
3355       SP-28		External sr4
3356       SP-24		External/stub RP (RP')
3357       SP-20		Current RP
3358       SP-16		Static Link
3359       SP-12		Clean up
3360       SP-8		Calling Stub RP (RP'')
3361       SP-4		Previous SP
3362 
3363    Top of Frame
3364 
3365       SP-0		Stack Pointer (points to next available address)
3366 
3367 */
3368 
3369 /* This function saves registers as follows.  Registers marked with ' are
3370    this function's registers (as opposed to the previous function's).
3371    If a frame_pointer isn't needed, r4 is saved as a general register;
3372    the space for the frame pointer is still allocated, though, to keep
3373    things simple.
3374 
3375 
3376    Top of Frame
3377 
3378        SP (FP')		Previous FP
3379        SP + 4		Alignment filler (sigh)
3380        SP + 8		Space for locals reserved here.
3381        .
3382        .
3383        .
3384        SP + n		All call saved register used.
3385        .
3386        .
3387        .
3388        SP + o		All call saved fp registers used.
3389        .
3390        .
3391        .
3392        SP + p (SP')	points to next available address.
3393 
3394 */
3395 
3396 /* Global variables set by output_function_prologue().  */
3397 /* Size of frame.  Need to know this to emit return insns from
3398    leaf procedures.  */
3399 static HOST_WIDE_INT actual_fsize, local_fsize;
3400 static int save_fregs;
3401 
3402 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3403    Handle case where DISP > 8k by using the add_high_const patterns.
3404 
3405    Note in DISP > 8k case, we will leave the high part of the address
3406    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3407 
3408 static void
3409 store_reg (int reg, HOST_WIDE_INT disp, int base)
3410 {
3411   rtx insn, dest, src, basereg;
3412 
3413   src = gen_rtx_REG (word_mode, reg);
3414   basereg = gen_rtx_REG (Pmode, base);
3415   if (VAL_14_BITS_P (disp))
3416     {
3417       dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3418       insn = emit_move_insn (dest, src);
3419     }
3420   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3421     {
3422       rtx delta = GEN_INT (disp);
3423       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3424 
3425       emit_move_insn (tmpreg, delta);
3426       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3427       if (DO_FRAME_NOTES)
3428 	{
3429 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3430 			gen_rtx_SET (VOIDmode, tmpreg,
3431 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3432 	  RTX_FRAME_RELATED_P (insn) = 1;
3433 	}
3434       dest = gen_rtx_MEM (word_mode, tmpreg);
3435       insn = emit_move_insn (dest, src);
3436     }
3437   else
3438     {
3439       rtx delta = GEN_INT (disp);
3440       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3441       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3442 
3443       emit_move_insn (tmpreg, high);
3444       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3445       insn = emit_move_insn (dest, src);
3446       if (DO_FRAME_NOTES)
3447 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3448 		      gen_rtx_SET (VOIDmode,
3449 				   gen_rtx_MEM (word_mode,
3450 						gen_rtx_PLUS (word_mode,
3451 							      basereg,
3452 							      delta)),
3453 				   src));
3454     }
3455 
3456   if (DO_FRAME_NOTES)
3457     RTX_FRAME_RELATED_P (insn) = 1;
3458 }
3459 
3460 /* Emit RTL to store REG at the memory location specified by BASE and then
3461    add MOD to BASE.  MOD must be <= 8k.  */
3462 
3463 static void
3464 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3465 {
3466   rtx insn, basereg, srcreg, delta;
3467 
3468   gcc_assert (VAL_14_BITS_P (mod));
3469 
3470   basereg = gen_rtx_REG (Pmode, base);
3471   srcreg = gen_rtx_REG (word_mode, reg);
3472   delta = GEN_INT (mod);
3473 
3474   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3475   if (DO_FRAME_NOTES)
3476     {
3477       RTX_FRAME_RELATED_P (insn) = 1;
3478 
3479       /* RTX_FRAME_RELATED_P must be set on each frame related set
3480 	 in a parallel with more than one element.  */
3481       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3482       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3483     }
3484 }
3485 
3486 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3487    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3488    whether to add a frame note or not.
3489 
3490    In the DISP > 8k case, we leave the high part of the address in %r1.
3491    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3492 
3493 static void
3494 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3495 {
3496   rtx insn;
3497 
3498   if (VAL_14_BITS_P (disp))
3499     {
3500       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3501 			     plus_constant (gen_rtx_REG (Pmode, base), disp));
3502     }
3503   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3504     {
3505       rtx basereg = gen_rtx_REG (Pmode, base);
3506       rtx delta = GEN_INT (disp);
3507       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3508 
3509       emit_move_insn (tmpreg, delta);
3510       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3511 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3512       if (DO_FRAME_NOTES)
3513 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3514 		      gen_rtx_SET (VOIDmode, tmpreg,
3515 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3516     }
3517   else
3518     {
3519       rtx basereg = gen_rtx_REG (Pmode, base);
3520       rtx delta = GEN_INT (disp);
3521       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3522 
3523       emit_move_insn (tmpreg,
3524 		      gen_rtx_PLUS (Pmode, basereg,
3525 				    gen_rtx_HIGH (Pmode, delta)));
3526       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3527 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3528     }
3529 
3530   if (DO_FRAME_NOTES && note)
3531     RTX_FRAME_RELATED_P (insn) = 1;
3532 }
3533 
3534 HOST_WIDE_INT
3535 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3536 {
3537   int freg_saved = 0;
3538   int i, j;
3539 
3540   /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3541      be consistent with the rounding and size calculation done here.
3542      Change them at the same time.  */
3543 
3544   /* We do our own stack alignment.  First, round the size of the
3545      stack locals up to a word boundary.  */
3546   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3547 
3548   /* Space for previous frame pointer + filler.  If any frame is
3549      allocated, we need to add in the STARTING_FRAME_OFFSET.  We
3550      waste some space here for the sake of HP compatibility.  The
3551      first slot is only used when the frame pointer is needed.  */
3552   if (size || frame_pointer_needed)
3553     size += STARTING_FRAME_OFFSET;
3554 
3555   /* If the current function calls __builtin_eh_return, then we need
3556      to allocate stack space for registers that will hold data for
3557      the exception handler.  */
3558   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3559     {
3560       unsigned int i;
3561 
3562       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3563 	continue;
3564       size += i * UNITS_PER_WORD;
3565     }
3566 
3567   /* Account for space used by the callee general register saves.  */
3568   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3569     if (df_regs_ever_live_p (i))
3570       size += UNITS_PER_WORD;
3571 
3572   /* Account for space used by the callee floating point register saves.  */
3573   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3574     if (df_regs_ever_live_p (i)
3575 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3576       {
3577 	freg_saved = 1;
3578 
3579 	/* We always save both halves of the FP register, so always
3580 	   increment the frame size by 8 bytes.  */
3581 	size += 8;
3582       }
3583 
3584   /* If any of the floating registers are saved, account for the
3585      alignment needed for the floating point register save block.  */
3586   if (freg_saved)
3587     {
3588       size = (size + 7) & ~7;
3589       if (fregs_live)
3590 	*fregs_live = 1;
3591     }
3592 
3593   /* The various ABIs include space for the outgoing parameters in the
3594      size of the current function's stack frame.  We don't need to align
3595      for the outgoing arguments as their alignment is set by the final
3596      rounding for the frame as a whole.  */
3597   size += crtl->outgoing_args_size;
3598 
3599   /* Allocate space for the fixed frame marker.  This space must be
3600      allocated for any function that makes calls or allocates
3601      stack space.  */
3602   if (!current_function_is_leaf || size)
3603     size += TARGET_64BIT ? 48 : 32;
3604 
3605   /* Finally, round to the preferred stack boundary.  */
3606   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3607 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3608 }
3609 
3610 /* Generate the assembly code for function entry.  FILE is a stdio
3611    stream to output the code to.  SIZE is an int: how many units of
3612    temporary storage to allocate.
3613 
3614    Refer to the array `regs_ever_live' to determine which registers to
3615    save; `regs_ever_live[I]' is nonzero if register number I is ever
3616    used in the function.  This function is responsible for knowing
3617    which registers should not be saved even if used.  */
3618 
3619 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3620    of memory.  If any fpu reg is used in the function, we allocate
3621    such a block here, at the bottom of the frame, just in case it's needed.
3622 
3623    If this function is a leaf procedure, then we may choose not
3624    to do a "save" insn.  The decision about whether or not
3625    to do this is made in regclass.c.  */
3626 
3627 static void
3628 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3629 {
3630   /* The function's label and associated .PROC must never be
3631      separated and must be output *after* any profiling declarations
3632      to avoid changing spaces/subspaces within a procedure.  */
3633   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3634   fputs ("\t.PROC\n", file);
3635 
3636   /* hppa_expand_prologue does the dirty work now.  We just need
3637      to output the assembler directives which denote the start
3638      of a function.  */
3639   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3640   if (current_function_is_leaf)
3641     fputs (",NO_CALLS", file);
3642   else
3643     fputs (",CALLS", file);
3644   if (rp_saved)
3645     fputs (",SAVE_RP", file);
3646 
3647   /* The SAVE_SP flag is used to indicate that register %r3 is stored
3648      at the beginning of the frame and that it is used as the frame
3649      pointer for the frame.  We do this because our current frame
3650      layout doesn't conform to that specified in the HP runtime
3651      documentation and we need a way to indicate to programs such as
3652      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3653      isn't used by HP compilers but is supported by the assembler.
3654      However, SAVE_SP is supposed to indicate that the previous stack
3655      pointer has been saved in the frame marker.  */
3656   if (frame_pointer_needed)
3657     fputs (",SAVE_SP", file);
3658 
3659   /* Pass on information about the number of callee register saves
3660      performed in the prologue.
3661 
3662      The compiler is supposed to pass the highest register number
3663      saved, the assembler then has to adjust that number before
3664      entering it into the unwind descriptor (to account for any
3665      caller saved registers with lower register numbers than the
3666      first callee saved register).  */
3667   if (gr_saved)
3668     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3669 
3670   if (fr_saved)
3671     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3672 
3673   fputs ("\n\t.ENTRY\n", file);
3674 
3675   remove_useless_addtr_insns (0);
3676 }
3677 
3678 void
3679 hppa_expand_prologue (void)
3680 {
3681   int merge_sp_adjust_with_store = 0;
3682   HOST_WIDE_INT size = get_frame_size ();
3683   HOST_WIDE_INT offset;
3684   int i;
3685   rtx insn, tmpreg;
3686 
3687   gr_saved = 0;
3688   fr_saved = 0;
3689   save_fregs = 0;
3690 
3691   /* Compute total size for frame pointer, filler, locals and rounding to
3692      the next word boundary.  Similar code appears in compute_frame_size
3693      and must be changed in tandem with this code.  */
3694   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3695   if (local_fsize || frame_pointer_needed)
3696     local_fsize += STARTING_FRAME_OFFSET;
3697 
3698   actual_fsize = compute_frame_size (size, &save_fregs);
3699 
3700   /* Compute a few things we will use often.  */
3701   tmpreg = gen_rtx_REG (word_mode, 1);
3702 
3703   /* Save RP first.  The calling conventions manual states RP will
3704      always be stored into the caller's frame at sp - 20 or sp - 16
3705      depending on which ABI is in use.  */
3706   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3707     {
3708       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3709       rp_saved = true;
3710     }
3711   else
3712     rp_saved = false;
3713 
3714   /* Allocate the local frame and set up the frame pointer if needed.  */
3715   if (actual_fsize != 0)
3716     {
3717       if (frame_pointer_needed)
3718 	{
3719 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
3720 	     new stack pointer, then store away the saved old frame pointer
3721 	     into the stack at sp and at the same time update the stack
3722 	     pointer by actual_fsize bytes.  Two versions, first
3723 	     handles small (<8k) frames.  The second handles large (>=8k)
3724 	     frames.  */
3725 	  insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3726 	  if (DO_FRAME_NOTES)
3727 	    RTX_FRAME_RELATED_P (insn) = 1;
3728 
3729 	  insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3730 	  if (DO_FRAME_NOTES)
3731 	    RTX_FRAME_RELATED_P (insn) = 1;
3732 
3733 	  if (VAL_14_BITS_P (actual_fsize))
3734 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3735 	  else
3736 	    {
3737 	      /* It is incorrect to store the saved frame pointer at *sp,
3738 		 then increment sp (writes beyond the current stack boundary).
3739 
3740 		 So instead use stwm to store at *sp and post-increment the
3741 		 stack pointer as an atomic operation.  Then increment sp to
3742 		 finish allocating the new frame.  */
3743 	      HOST_WIDE_INT adjust1 = 8192 - 64;
3744 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3745 
3746 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3747 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3748 			      adjust2, 1);
3749 	    }
3750 
3751 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3752 	     we need to store the previous stack pointer (frame pointer)
3753 	     into the frame marker on targets that use the HP unwind
3754 	     library.  This allows the HP unwind library to be used to
3755 	     unwind GCC frames.  However, we are not fully compatible
3756 	     with the HP library because our frame layout differs from
3757 	     that specified in the HP runtime specification.
3758 
3759 	     We don't want a frame note on this instruction as the frame
3760 	     marker moves during dynamic stack allocation.
3761 
3762 	     This instruction also serves as a blockage to prevent
3763 	     register spills from being scheduled before the stack
3764 	     pointer is raised.  This is necessary as we store
3765 	     registers using the frame pointer as a base register,
3766 	     and the frame pointer is set before sp is raised.  */
3767 	  if (TARGET_HPUX_UNWIND_LIBRARY)
3768 	    {
3769 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3770 				       GEN_INT (TARGET_64BIT ? -8 : -4));
3771 
3772 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
3773 			      frame_pointer_rtx);
3774 	    }
3775 	  else
3776 	    emit_insn (gen_blockage ());
3777 	}
3778       /* no frame pointer needed.  */
3779       else
3780 	{
3781 	  /* In some cases we can perform the first callee register save
3782 	     and allocating the stack frame at the same time.   If so, just
3783 	     make a note of it and defer allocating the frame until saving
3784 	     the callee registers.  */
3785 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3786 	    merge_sp_adjust_with_store = 1;
3787 	  /* Can not optimize.  Adjust the stack frame by actual_fsize
3788 	     bytes.  */
3789 	  else
3790 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3791 			    actual_fsize, 1);
3792 	}
3793     }
3794 
3795   /* Normal register save.
3796 
3797      Do not save the frame pointer in the frame_pointer_needed case.  It
3798      was done earlier.  */
3799   if (frame_pointer_needed)
3800     {
3801       offset = local_fsize;
3802 
3803       /* Saving the EH return data registers in the frame is the simplest
3804 	 way to get the frame unwind information emitted.  We put them
3805 	 just before the general registers.  */
3806       if (DO_FRAME_NOTES && crtl->calls_eh_return)
3807 	{
3808 	  unsigned int i, regno;
3809 
3810 	  for (i = 0; ; ++i)
3811 	    {
3812 	      regno = EH_RETURN_DATA_REGNO (i);
3813 	      if (regno == INVALID_REGNUM)
3814 		break;
3815 
3816 	      store_reg (regno, offset, FRAME_POINTER_REGNUM);
3817 	      offset += UNITS_PER_WORD;
3818 	    }
3819 	}
3820 
3821       for (i = 18; i >= 4; i--)
3822 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3823 	  {
3824 	    store_reg (i, offset, FRAME_POINTER_REGNUM);
3825 	    offset += UNITS_PER_WORD;
3826 	    gr_saved++;
3827 	  }
3828       /* Account for %r3 which is saved in a special place.  */
3829       gr_saved++;
3830     }
3831   /* No frame pointer needed.  */
3832   else
3833     {
3834       offset = local_fsize - actual_fsize;
3835 
3836       /* Saving the EH return data registers in the frame is the simplest
3837          way to get the frame unwind information emitted.  */
3838       if (DO_FRAME_NOTES && crtl->calls_eh_return)
3839 	{
3840 	  unsigned int i, regno;
3841 
3842 	  for (i = 0; ; ++i)
3843 	    {
3844 	      regno = EH_RETURN_DATA_REGNO (i);
3845 	      if (regno == INVALID_REGNUM)
3846 		break;
3847 
3848 	      /* If merge_sp_adjust_with_store is nonzero, then we can
3849 		 optimize the first save.  */
3850 	      if (merge_sp_adjust_with_store)
3851 		{
3852 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3853 		  merge_sp_adjust_with_store = 0;
3854 		}
3855 	      else
3856 		store_reg (regno, offset, STACK_POINTER_REGNUM);
3857 	      offset += UNITS_PER_WORD;
3858 	    }
3859 	}
3860 
3861       for (i = 18; i >= 3; i--)
3862       	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3863 	  {
3864 	    /* If merge_sp_adjust_with_store is nonzero, then we can
3865 	       optimize the first GR save.  */
3866 	    if (merge_sp_adjust_with_store)
3867 	      {
3868 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3869 		merge_sp_adjust_with_store = 0;
3870 	      }
3871 	    else
3872 	      store_reg (i, offset, STACK_POINTER_REGNUM);
3873 	    offset += UNITS_PER_WORD;
3874 	    gr_saved++;
3875 	  }
3876 
3877       /* If we wanted to merge the SP adjustment with a GR save, but we never
3878 	 did any GR saves, then just emit the adjustment here.  */
3879       if (merge_sp_adjust_with_store)
3880 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3881 			actual_fsize, 1);
3882     }
3883 
3884   /* The hppa calling conventions say that %r19, the pic offset
3885      register, is saved at sp - 32 (in this function's frame)
3886      when generating PIC code.  FIXME:  What is the correct thing
3887      to do for functions which make no calls and allocate no
3888      frame?  Do we need to allocate a frame, or can we just omit
3889      the save?   For now we'll just omit the save.
3890 
3891      We don't want a note on this insn as the frame marker can
3892      move if there is a dynamic stack allocation.  */
3893   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3894     {
3895       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3896 
3897       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3898 
3899     }
3900 
3901   /* Align pointer properly (doubleword boundary).  */
3902   offset = (offset + 7) & ~7;
3903 
3904   /* Floating point register store.  */
3905   if (save_fregs)
3906     {
3907       rtx base;
3908 
3909       /* First get the frame or stack pointer to the start of the FP register
3910 	 save area.  */
3911       if (frame_pointer_needed)
3912 	{
3913 	  set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3914 	  base = frame_pointer_rtx;
3915 	}
3916       else
3917 	{
3918 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3919 	  base = stack_pointer_rtx;
3920 	}
3921 
3922       /* Now actually save the FP registers.  */
3923       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3924 	{
3925 	  if (df_regs_ever_live_p (i)
3926 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3927 	    {
3928 	      rtx addr, insn, reg;
3929 	      addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3930 	      reg = gen_rtx_REG (DFmode, i);
3931 	      insn = emit_move_insn (addr, reg);
3932 	      if (DO_FRAME_NOTES)
3933 		{
3934 		  RTX_FRAME_RELATED_P (insn) = 1;
3935 		  if (TARGET_64BIT)
3936 		    {
3937 		      rtx mem = gen_rtx_MEM (DFmode,
3938 					     plus_constant (base, offset));
3939 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3940 				    gen_rtx_SET (VOIDmode, mem, reg));
3941 		    }
3942 		  else
3943 		    {
3944 		      rtx meml = gen_rtx_MEM (SFmode,
3945 					      plus_constant (base, offset));
3946 		      rtx memr = gen_rtx_MEM (SFmode,
3947 					      plus_constant (base, offset + 4));
3948 		      rtx regl = gen_rtx_REG (SFmode, i);
3949 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
3950 		      rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3951 		      rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3952 		      rtvec vec;
3953 
3954 		      RTX_FRAME_RELATED_P (setl) = 1;
3955 		      RTX_FRAME_RELATED_P (setr) = 1;
3956 		      vec = gen_rtvec (2, setl, setr);
3957 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3958 				    gen_rtx_SEQUENCE (VOIDmode, vec));
3959 		    }
3960 		}
3961 	      offset += GET_MODE_SIZE (DFmode);
3962 	      fr_saved++;
3963 	    }
3964 	}
3965     }
3966 }
3967 
3968 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3969    Handle case where DISP > 8k by using the add_high_const patterns.  */
3970 
3971 static void
3972 load_reg (int reg, HOST_WIDE_INT disp, int base)
3973 {
3974   rtx dest = gen_rtx_REG (word_mode, reg);
3975   rtx basereg = gen_rtx_REG (Pmode, base);
3976   rtx src;
3977 
3978   if (VAL_14_BITS_P (disp))
3979     src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3980   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3981     {
3982       rtx delta = GEN_INT (disp);
3983       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3984 
3985       emit_move_insn (tmpreg, delta);
3986       if (TARGET_DISABLE_INDEXING)
3987 	{
3988 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3989 	  src = gen_rtx_MEM (word_mode, tmpreg);
3990 	}
3991       else
3992 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3993     }
3994   else
3995     {
3996       rtx delta = GEN_INT (disp);
3997       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3998       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3999 
4000       emit_move_insn (tmpreg, high);
4001       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4002     }
4003 
4004   emit_move_insn (dest, src);
4005 }
4006 
4007 /* Update the total code bytes output to the text section.  */
4008 
4009 static void
4010 update_total_code_bytes (unsigned int nbytes)
4011 {
4012   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4013       && !IN_NAMED_SECTION_P (cfun->decl))
4014     {
4015       unsigned int old_total = total_code_bytes;
4016 
4017       total_code_bytes += nbytes;
4018 
4019       /* Be prepared to handle overflows.  */
4020       if (old_total > total_code_bytes)
4021         total_code_bytes = UINT_MAX;
4022     }
4023 }
4024 
4025 /* This function generates the assembly code for function exit.
4026    Args are as for output_function_prologue ().
4027 
4028    The function epilogue should not depend on the current stack
4029    pointer!  It should use the frame pointer only.  This is mandatory
4030    because of alloca; we also take advantage of it to omit stack
4031    adjustments before returning.  */
4032 
4033 static void
4034 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4035 {
4036   rtx insn = get_last_insn ();
4037 
4038   last_address = 0;
4039 
4040   /* hppa_expand_epilogue does the dirty work now.  We just need
4041      to output the assembler directives which denote the end
4042      of a function.
4043 
4044      To make debuggers happy, emit a nop if the epilogue was completely
4045      eliminated due to a volatile call as the last insn in the
4046      current function.  That way the return address (in %r2) will
4047      always point to a valid instruction in the current function.  */
4048 
4049   /* Get the last real insn.  */
4050   if (GET_CODE (insn) == NOTE)
4051     insn = prev_real_insn (insn);
4052 
4053   /* If it is a sequence, then look inside.  */
4054   if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4055     insn = XVECEXP (PATTERN (insn), 0, 0);
4056 
4057   /* If insn is a CALL_INSN, then it must be a call to a volatile
4058      function (otherwise there would be epilogue insns).  */
4059   if (insn && GET_CODE (insn) == CALL_INSN)
4060     {
4061       fputs ("\tnop\n", file);
4062       last_address += 4;
4063     }
4064 
4065   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4066 
4067   if (TARGET_SOM && TARGET_GAS)
4068     {
4069       /* We done with this subspace except possibly for some additional
4070 	 debug information.  Forget that we are in this subspace to ensure
4071 	 that the next function is output in its own subspace.  */
4072       in_section = NULL;
4073       cfun->machine->in_nsubspa = 2;
4074     }
4075 
4076   if (INSN_ADDRESSES_SET_P ())
4077     {
4078       insn = get_last_nonnote_insn ();
4079       last_address += INSN_ADDRESSES (INSN_UID (insn));
4080       if (INSN_P (insn))
4081 	last_address += insn_default_length (insn);
4082       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4083 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4084     }
4085   else
4086     last_address = UINT_MAX;
4087 
4088   /* Finally, update the total number of code bytes output so far.  */
4089   update_total_code_bytes (last_address);
4090 }
4091 
4092 void
4093 hppa_expand_epilogue (void)
4094 {
4095   rtx tmpreg;
4096   HOST_WIDE_INT offset;
4097   HOST_WIDE_INT ret_off = 0;
4098   int i;
4099   int merge_sp_adjust_with_load = 0;
4100 
4101   /* We will use this often.  */
4102   tmpreg = gen_rtx_REG (word_mode, 1);
4103 
4104   /* Try to restore RP early to avoid load/use interlocks when
4105      RP gets used in the return (bv) instruction.  This appears to still
4106      be necessary even when we schedule the prologue and epilogue.  */
4107   if (rp_saved)
4108     {
4109       ret_off = TARGET_64BIT ? -16 : -20;
4110       if (frame_pointer_needed)
4111 	{
4112 	  load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4113 	  ret_off = 0;
4114 	}
4115       else
4116 	{
4117 	  /* No frame pointer, and stack is smaller than 8k.  */
4118 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4119 	    {
4120 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4121 	      ret_off = 0;
4122 	    }
4123 	}
4124     }
4125 
4126   /* General register restores.  */
4127   if (frame_pointer_needed)
4128     {
4129       offset = local_fsize;
4130 
4131       /* If the current function calls __builtin_eh_return, then we need
4132          to restore the saved EH data registers.  */
4133       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4134 	{
4135 	  unsigned int i, regno;
4136 
4137 	  for (i = 0; ; ++i)
4138 	    {
4139 	      regno = EH_RETURN_DATA_REGNO (i);
4140 	      if (regno == INVALID_REGNUM)
4141 		break;
4142 
4143 	      load_reg (regno, offset, FRAME_POINTER_REGNUM);
4144 	      offset += UNITS_PER_WORD;
4145 	    }
4146 	}
4147 
4148       for (i = 18; i >= 4; i--)
4149 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4150 	  {
4151 	    load_reg (i, offset, FRAME_POINTER_REGNUM);
4152 	    offset += UNITS_PER_WORD;
4153 	  }
4154     }
4155   else
4156     {
4157       offset = local_fsize - actual_fsize;
4158 
4159       /* If the current function calls __builtin_eh_return, then we need
4160          to restore the saved EH data registers.  */
4161       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4162 	{
4163 	  unsigned int i, regno;
4164 
4165 	  for (i = 0; ; ++i)
4166 	    {
4167 	      regno = EH_RETURN_DATA_REGNO (i);
4168 	      if (regno == INVALID_REGNUM)
4169 		break;
4170 
4171 	      /* Only for the first load.
4172 	         merge_sp_adjust_with_load holds the register load
4173 	         with which we will merge the sp adjustment.  */
4174 	      if (merge_sp_adjust_with_load == 0
4175 		  && local_fsize == 0
4176 		  && VAL_14_BITS_P (-actual_fsize))
4177 	        merge_sp_adjust_with_load = regno;
4178 	      else
4179 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4180 	      offset += UNITS_PER_WORD;
4181 	    }
4182 	}
4183 
4184       for (i = 18; i >= 3; i--)
4185 	{
4186 	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4187 	    {
4188 	      /* Only for the first load.
4189 	         merge_sp_adjust_with_load holds the register load
4190 	         with which we will merge the sp adjustment.  */
4191 	      if (merge_sp_adjust_with_load == 0
4192 		  && local_fsize == 0
4193 		  && VAL_14_BITS_P (-actual_fsize))
4194 	        merge_sp_adjust_with_load = i;
4195 	      else
4196 		load_reg (i, offset, STACK_POINTER_REGNUM);
4197 	      offset += UNITS_PER_WORD;
4198 	    }
4199 	}
4200     }
4201 
4202   /* Align pointer properly (doubleword boundary).  */
4203   offset = (offset + 7) & ~7;
4204 
4205   /* FP register restores.  */
4206   if (save_fregs)
4207     {
4208       /* Adjust the register to index off of.  */
4209       if (frame_pointer_needed)
4210 	set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4211       else
4212 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4213 
4214       /* Actually do the restores now.  */
4215       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4216 	if (df_regs_ever_live_p (i)
4217 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4218 	  {
4219 	    rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4220 	    rtx dest = gen_rtx_REG (DFmode, i);
4221 	    emit_move_insn (dest, src);
4222 	  }
4223     }
4224 
4225   /* Emit a blockage insn here to keep these insns from being moved to
4226      an earlier spot in the epilogue, or into the main instruction stream.
4227 
4228      This is necessary as we must not cut the stack back before all the
4229      restores are finished.  */
4230   emit_insn (gen_blockage ());
4231 
4232   /* Reset stack pointer (and possibly frame pointer).  The stack
4233      pointer is initially set to fp + 64 to avoid a race condition.  */
4234   if (frame_pointer_needed)
4235     {
4236       rtx delta = GEN_INT (-64);
4237 
4238       set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4239       emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4240     }
4241   /* If we were deferring a callee register restore, do it now.  */
4242   else if (merge_sp_adjust_with_load)
4243     {
4244       rtx delta = GEN_INT (-actual_fsize);
4245       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4246 
4247       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4248     }
4249   else if (actual_fsize != 0)
4250     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4251 		    - actual_fsize, 0);
4252 
4253   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4254      frame greater than 8k), do so now.  */
4255   if (ret_off != 0)
4256     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4257 
4258   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4259     {
4260       rtx sa = EH_RETURN_STACKADJ_RTX;
4261 
4262       emit_insn (gen_blockage ());
4263       emit_insn (TARGET_64BIT
4264 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4265 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4266     }
4267 }
4268 
4269 rtx
4270 hppa_pic_save_rtx (void)
4271 {
4272   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4273 }
4274 
4275 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4276 #define NO_DEFERRED_PROFILE_COUNTERS 0
4277 #endif
4278 
4279 
4280 /* Vector of funcdef numbers.  */
4281 static VEC(int,heap) *funcdef_nos;
4282 
4283 /* Output deferred profile counters.  */
4284 static void
4285 output_deferred_profile_counters (void)
4286 {
4287   unsigned int i;
4288   int align, n;
4289 
4290   if (VEC_empty (int, funcdef_nos))
4291    return;
4292 
4293   switch_to_section (data_section);
4294   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4295   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4296 
4297   for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4298     {
4299       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4300       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4301     }
4302 
4303   VEC_free (int, heap, funcdef_nos);
4304 }
4305 
4306 void
4307 hppa_profile_hook (int label_no)
4308 {
4309   /* We use SImode for the address of the function in both 32 and
4310      64-bit code to avoid having to provide DImode versions of the
4311      lcla2 and load_offset_label_address insn patterns.  */
4312   rtx reg = gen_reg_rtx (SImode);
4313   rtx label_rtx = gen_label_rtx ();
4314   rtx begin_label_rtx, call_insn;
4315   char begin_label_name[16];
4316 
4317   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4318 			       label_no);
4319   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4320 
4321   if (TARGET_64BIT)
4322     emit_move_insn (arg_pointer_rtx,
4323 		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4324 				  GEN_INT (64)));
4325 
4326   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4327 
4328   /* The address of the function is loaded into %r25 with an instruction-
4329      relative sequence that avoids the use of relocations.  The sequence
4330      is split so that the load_offset_label_address instruction can
4331      occupy the delay slot of the call to _mcount.  */
4332   if (TARGET_PA_20)
4333     emit_insn (gen_lcla2 (reg, label_rtx));
4334   else
4335     emit_insn (gen_lcla1 (reg, label_rtx));
4336 
4337   emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4338 					    reg, begin_label_rtx, label_rtx));
4339 
4340 #if !NO_DEFERRED_PROFILE_COUNTERS
4341   {
4342     rtx count_label_rtx, addr, r24;
4343     char count_label_name[16];
4344 
4345     VEC_safe_push (int, heap, funcdef_nos, label_no);
4346     ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4347     count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4348 
4349     addr = force_reg (Pmode, count_label_rtx);
4350     r24 = gen_rtx_REG (Pmode, 24);
4351     emit_move_insn (r24, addr);
4352 
4353     call_insn =
4354       emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4355 					     gen_rtx_SYMBOL_REF (Pmode,
4356 								 "_mcount")),
4357 				GEN_INT (TARGET_64BIT ? 24 : 12)));
4358 
4359     use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4360   }
4361 #else
4362 
4363   call_insn =
4364     emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4365 					   gen_rtx_SYMBOL_REF (Pmode,
4366 							       "_mcount")),
4367 			      GEN_INT (TARGET_64BIT ? 16 : 8)));
4368 
4369 #endif
4370 
4371   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4372   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4373 
4374   /* Indicate the _mcount call cannot throw, nor will it execute a
4375      non-local goto.  */
4376   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4377 }
4378 
4379 /* Fetch the return address for the frame COUNT steps up from
4380    the current frame, after the prologue.  FRAMEADDR is the
4381    frame pointer of the COUNT frame.
4382 
4383    We want to ignore any export stub remnants here.  To handle this,
4384    we examine the code at the return address, and if it is an export
4385    stub, we return a memory rtx for the stub return address stored
4386    at frame-24.
4387 
4388    The value returned is used in two different ways:
4389 
4390 	1. To find a function's caller.
4391 
4392 	2. To change the return address for a function.
4393 
4394    This function handles most instances of case 1; however, it will
4395    fail if there are two levels of stubs to execute on the return
4396    path.  The only way I believe that can happen is if the return value
4397    needs a parameter relocation, which never happens for C code.
4398 
4399    This function handles most instances of case 2; however, it will
4400    fail if we did not originally have stub code on the return path
4401    but will need stub code on the new return path.  This can happen if
4402    the caller & callee are both in the main program, but the new
4403    return location is in a shared library.  */
4404 
4405 rtx
4406 return_addr_rtx (int count, rtx frameaddr)
4407 {
4408   rtx label;
4409   rtx rp;
4410   rtx saved_rp;
4411   rtx ins;
4412 
4413   /* Instruction stream at the normal return address for the export stub:
4414 
4415 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4416 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4417 	0x00011820 | stub+16:  mtsp r1,sr0
4418 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4419 
4420      0xe0400002 must be specified as -532676606 so that it won't be
4421      rejected as an invalid immediate operand on 64-bit hosts.  */
4422 
4423   HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4424   int i;
4425 
4426   if (count != 0)
4427     return NULL_RTX;
4428 
4429   rp = get_hard_reg_initial_val (Pmode, 2);
4430 
4431   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4432     return rp;
4433 
4434   /* If there is no export stub then just use the value saved from
4435      the return pointer register.  */
4436 
4437   saved_rp = gen_reg_rtx (Pmode);
4438   emit_move_insn (saved_rp, rp);
4439 
4440   /* Get pointer to the instruction stream.  We have to mask out the
4441      privilege level from the two low order bits of the return address
4442      pointer here so that ins will point to the start of the first
4443      instruction that would have been executed if we returned.  */
4444   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4445   label = gen_label_rtx ();
4446 
4447   /* Check the instruction stream at the normal return address for the
4448      export stub.  If it is an export stub, than our return address is
4449      really in -24[frameaddr].  */
4450 
4451   for (i = 0; i < 3; i++)
4452     {
4453       rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4454       rtx op1 = GEN_INT (insns[i]);
4455       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4456     }
4457 
4458   /* Here we know that our return address points to an export
4459      stub.  We don't want to return the address of the export stub,
4460      but rather the return address of the export stub.  That return
4461      address is stored at -24[frameaddr].  */
4462 
4463   emit_move_insn (saved_rp,
4464 		  gen_rtx_MEM (Pmode,
4465 			       memory_address (Pmode,
4466 					       plus_constant (frameaddr,
4467 							      -24))));
4468 
4469   emit_label (label);
4470 
4471   return saved_rp;
4472 }
4473 
4474 void
4475 emit_bcond_fp (rtx operands[])
4476 {
4477   enum rtx_code code = GET_CODE (operands[0]);
4478   rtx operand0 = operands[1];
4479   rtx operand1 = operands[2];
4480   rtx label = operands[3];
4481 
4482   emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4483 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4484 
4485   emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4486 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4487 						     gen_rtx_fmt_ee (NE,
4488 							      VOIDmode,
4489 							      gen_rtx_REG (CCFPmode, 0),
4490 							      const0_rtx),
4491 						     gen_rtx_LABEL_REF (VOIDmode, label),
4492 						     pc_rtx)));
4493 
4494 }
4495 
4496 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4497    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4498 
4499 static int
4500 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4501 {
4502   enum attr_type attr_type;
4503 
4504   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4505      true dependencies as they are described with bypasses now.  */
4506   if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4507     return cost;
4508 
4509   if (! recog_memoized (insn))
4510     return 0;
4511 
4512   attr_type = get_attr_type (insn);
4513 
4514   switch (REG_NOTE_KIND (link))
4515     {
4516     case REG_DEP_ANTI:
4517       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4518 	 cycles later.  */
4519 
4520       if (attr_type == TYPE_FPLOAD)
4521 	{
4522 	  rtx pat = PATTERN (insn);
4523 	  rtx dep_pat = PATTERN (dep_insn);
4524 	  if (GET_CODE (pat) == PARALLEL)
4525 	    {
4526 	      /* This happens for the fldXs,mb patterns.  */
4527 	      pat = XVECEXP (pat, 0, 0);
4528 	    }
4529 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4530 	    /* If this happens, we have to extend this to schedule
4531 	       optimally.  Return 0 for now.  */
4532 	  return 0;
4533 
4534 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4535 	    {
4536 	      if (! recog_memoized (dep_insn))
4537 		return 0;
4538 	      switch (get_attr_type (dep_insn))
4539 		{
4540 		case TYPE_FPALU:
4541 		case TYPE_FPMULSGL:
4542 		case TYPE_FPMULDBL:
4543 		case TYPE_FPDIVSGL:
4544 		case TYPE_FPDIVDBL:
4545 		case TYPE_FPSQRTSGL:
4546 		case TYPE_FPSQRTDBL:
4547 		  /* A fpload can't be issued until one cycle before a
4548 		     preceding arithmetic operation has finished if
4549 		     the target of the fpload is any of the sources
4550 		     (or destination) of the arithmetic operation.  */
4551 		  return insn_default_latency (dep_insn) - 1;
4552 
4553 		default:
4554 		  return 0;
4555 		}
4556 	    }
4557 	}
4558       else if (attr_type == TYPE_FPALU)
4559 	{
4560 	  rtx pat = PATTERN (insn);
4561 	  rtx dep_pat = PATTERN (dep_insn);
4562 	  if (GET_CODE (pat) == PARALLEL)
4563 	    {
4564 	      /* This happens for the fldXs,mb patterns.  */
4565 	      pat = XVECEXP (pat, 0, 0);
4566 	    }
4567 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4568 	    /* If this happens, we have to extend this to schedule
4569 	       optimally.  Return 0 for now.  */
4570 	  return 0;
4571 
4572 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4573 	    {
4574 	      if (! recog_memoized (dep_insn))
4575 		return 0;
4576 	      switch (get_attr_type (dep_insn))
4577 		{
4578 		case TYPE_FPDIVSGL:
4579 		case TYPE_FPDIVDBL:
4580 		case TYPE_FPSQRTSGL:
4581 		case TYPE_FPSQRTDBL:
4582 		  /* An ALU flop can't be issued until two cycles before a
4583 		     preceding divide or sqrt operation has finished if
4584 		     the target of the ALU flop is any of the sources
4585 		     (or destination) of the divide or sqrt operation.  */
4586 		  return insn_default_latency (dep_insn) - 2;
4587 
4588 		default:
4589 		  return 0;
4590 		}
4591 	    }
4592 	}
4593 
4594       /* For other anti dependencies, the cost is 0.  */
4595       return 0;
4596 
4597     case REG_DEP_OUTPUT:
4598       /* Output dependency; DEP_INSN writes a register that INSN writes some
4599 	 cycles later.  */
4600       if (attr_type == TYPE_FPLOAD)
4601 	{
4602 	  rtx pat = PATTERN (insn);
4603 	  rtx dep_pat = PATTERN (dep_insn);
4604 	  if (GET_CODE (pat) == PARALLEL)
4605 	    {
4606 	      /* This happens for the fldXs,mb patterns.  */
4607 	      pat = XVECEXP (pat, 0, 0);
4608 	    }
4609 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4610 	    /* If this happens, we have to extend this to schedule
4611 	       optimally.  Return 0 for now.  */
4612 	  return 0;
4613 
4614 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4615 	    {
4616 	      if (! recog_memoized (dep_insn))
4617 		return 0;
4618 	      switch (get_attr_type (dep_insn))
4619 		{
4620 		case TYPE_FPALU:
4621 		case TYPE_FPMULSGL:
4622 		case TYPE_FPMULDBL:
4623 		case TYPE_FPDIVSGL:
4624 		case TYPE_FPDIVDBL:
4625 		case TYPE_FPSQRTSGL:
4626 		case TYPE_FPSQRTDBL:
4627 		  /* A fpload can't be issued until one cycle before a
4628 		     preceding arithmetic operation has finished if
4629 		     the target of the fpload is the destination of the
4630 		     arithmetic operation.
4631 
4632 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
4633 		     is 3 cycles, unless they bundle together.   We also
4634 		     pay the penalty if the second insn is a fpload.  */
4635 		  return insn_default_latency (dep_insn) - 1;
4636 
4637 		default:
4638 		  return 0;
4639 		}
4640 	    }
4641 	}
4642       else if (attr_type == TYPE_FPALU)
4643 	{
4644 	  rtx pat = PATTERN (insn);
4645 	  rtx dep_pat = PATTERN (dep_insn);
4646 	  if (GET_CODE (pat) == PARALLEL)
4647 	    {
4648 	      /* This happens for the fldXs,mb patterns.  */
4649 	      pat = XVECEXP (pat, 0, 0);
4650 	    }
4651 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4652 	    /* If this happens, we have to extend this to schedule
4653 	       optimally.  Return 0 for now.  */
4654 	  return 0;
4655 
4656 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4657 	    {
4658 	      if (! recog_memoized (dep_insn))
4659 		return 0;
4660 	      switch (get_attr_type (dep_insn))
4661 		{
4662 		case TYPE_FPDIVSGL:
4663 		case TYPE_FPDIVDBL:
4664 		case TYPE_FPSQRTSGL:
4665 		case TYPE_FPSQRTDBL:
4666 		  /* An ALU flop can't be issued until two cycles before a
4667 		     preceding divide or sqrt operation has finished if
4668 		     the target of the ALU flop is also the target of
4669 		     the divide or sqrt operation.  */
4670 		  return insn_default_latency (dep_insn) - 2;
4671 
4672 		default:
4673 		  return 0;
4674 		}
4675 	    }
4676 	}
4677 
4678       /* For other output dependencies, the cost is 0.  */
4679       return 0;
4680 
4681     default:
4682       gcc_unreachable ();
4683     }
4684 }
4685 
4686 /* Adjust scheduling priorities.  We use this to try and keep addil
4687    and the next use of %r1 close together.  */
4688 static int
4689 pa_adjust_priority (rtx insn, int priority)
4690 {
4691   rtx set = single_set (insn);
4692   rtx src, dest;
4693   if (set)
4694     {
4695       src = SET_SRC (set);
4696       dest = SET_DEST (set);
4697       if (GET_CODE (src) == LO_SUM
4698 	  && symbolic_operand (XEXP (src, 1), VOIDmode)
4699 	  && ! read_only_operand (XEXP (src, 1), VOIDmode))
4700 	priority >>= 3;
4701 
4702       else if (GET_CODE (src) == MEM
4703 	       && GET_CODE (XEXP (src, 0)) == LO_SUM
4704 	       && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4705 	       && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4706 	priority >>= 1;
4707 
4708       else if (GET_CODE (dest) == MEM
4709 	       && GET_CODE (XEXP (dest, 0)) == LO_SUM
4710 	       && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4711 	       && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4712 	priority >>= 3;
4713     }
4714   return priority;
4715 }
4716 
4717 /* The 700 can only issue a single insn at a time.
4718    The 7XXX processors can issue two insns at a time.
4719    The 8000 can issue 4 insns at a time.  */
4720 static int
4721 pa_issue_rate (void)
4722 {
4723   switch (pa_cpu)
4724     {
4725     case PROCESSOR_700:		return 1;
4726     case PROCESSOR_7100:	return 2;
4727     case PROCESSOR_7100LC:	return 2;
4728     case PROCESSOR_7200:	return 2;
4729     case PROCESSOR_7300:	return 2;
4730     case PROCESSOR_8000:	return 4;
4731 
4732     default:
4733       gcc_unreachable ();
4734     }
4735 }
4736 
4737 
4738 
4739 /* Return any length adjustment needed by INSN which already has its length
4740    computed as LENGTH.   Return zero if no adjustment is necessary.
4741 
4742    For the PA: function calls, millicode calls, and backwards short
4743    conditional branches with unfilled delay slots need an adjustment by +1
4744    (to account for the NOP which will be inserted into the instruction stream).
4745 
4746    Also compute the length of an inline block move here as it is too
4747    complicated to express as a length attribute in pa.md.  */
4748 int
4749 pa_adjust_insn_length (rtx insn, int length)
4750 {
4751   rtx pat = PATTERN (insn);
4752 
4753   /* Jumps inside switch tables which have unfilled delay slots need
4754      adjustment.  */
4755   if (GET_CODE (insn) == JUMP_INSN
4756       && GET_CODE (pat) == PARALLEL
4757       && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4758     return 4;
4759   /* Millicode insn with an unfilled delay slot.  */
4760   else if (GET_CODE (insn) == INSN
4761 	   && GET_CODE (pat) != SEQUENCE
4762 	   && GET_CODE (pat) != USE
4763 	   && GET_CODE (pat) != CLOBBER
4764 	   && get_attr_type (insn) == TYPE_MILLI)
4765     return 4;
4766   /* Block move pattern.  */
4767   else if (GET_CODE (insn) == INSN
4768 	   && GET_CODE (pat) == PARALLEL
4769 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4770 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4771 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4772 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4773 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4774     return compute_movmem_length (insn) - 4;
4775   /* Block clear pattern.  */
4776   else if (GET_CODE (insn) == INSN
4777 	   && GET_CODE (pat) == PARALLEL
4778 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4779 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4780 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4781 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4782     return compute_clrmem_length (insn) - 4;
4783   /* Conditional branch with an unfilled delay slot.  */
4784   else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4785     {
4786       /* Adjust a short backwards conditional with an unfilled delay slot.  */
4787       if (GET_CODE (pat) == SET
4788 	  && length == 4
4789 	  && JUMP_LABEL (insn) != NULL_RTX
4790 	  && ! forward_branch_p (insn))
4791 	return 4;
4792       else if (GET_CODE (pat) == PARALLEL
4793 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4794 	       && length == 4)
4795 	return 4;
4796       /* Adjust dbra insn with short backwards conditional branch with
4797 	 unfilled delay slot -- only for case where counter is in a
4798 	 general register register.  */
4799       else if (GET_CODE (pat) == PARALLEL
4800 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4801 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4802  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4803 	       && length == 4
4804 	       && ! forward_branch_p (insn))
4805 	return 4;
4806       else
4807 	return 0;
4808     }
4809   return 0;
4810 }
4811 
4812 /* Print operand X (an rtx) in assembler syntax to file FILE.
4813    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4814    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
4815 
4816 void
4817 print_operand (FILE *file, rtx x, int code)
4818 {
4819   switch (code)
4820     {
4821     case '#':
4822       /* Output a 'nop' if there's nothing for the delay slot.  */
4823       if (dbr_sequence_length () == 0)
4824 	fputs ("\n\tnop", file);
4825       return;
4826     case '*':
4827       /* Output a nullification completer if there's nothing for the */
4828       /* delay slot or nullification is requested.  */
4829       if (dbr_sequence_length () == 0 ||
4830 	  (final_sequence &&
4831 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4832         fputs (",n", file);
4833       return;
4834     case 'R':
4835       /* Print out the second register name of a register pair.
4836 	 I.e., R (6) => 7.  */
4837       fputs (reg_names[REGNO (x) + 1], file);
4838       return;
4839     case 'r':
4840       /* A register or zero.  */
4841       if (x == const0_rtx
4842 	  || (x == CONST0_RTX (DFmode))
4843 	  || (x == CONST0_RTX (SFmode)))
4844 	{
4845 	  fputs ("%r0", file);
4846 	  return;
4847 	}
4848       else
4849 	break;
4850     case 'f':
4851       /* A register or zero (floating point).  */
4852       if (x == const0_rtx
4853 	  || (x == CONST0_RTX (DFmode))
4854 	  || (x == CONST0_RTX (SFmode)))
4855 	{
4856 	  fputs ("%fr0", file);
4857 	  return;
4858 	}
4859       else
4860 	break;
4861     case 'A':
4862       {
4863 	rtx xoperands[2];
4864 
4865 	xoperands[0] = XEXP (XEXP (x, 0), 0);
4866 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4867 	output_global_address (file, xoperands[1], 0);
4868         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4869 	return;
4870       }
4871 
4872     case 'C':			/* Plain (C)ondition */
4873     case 'X':
4874       switch (GET_CODE (x))
4875 	{
4876 	case EQ:
4877 	  fputs ("=", file);  break;
4878 	case NE:
4879 	  fputs ("<>", file);  break;
4880 	case GT:
4881 	  fputs (">", file);  break;
4882 	case GE:
4883 	  fputs (">=", file);  break;
4884 	case GEU:
4885 	  fputs (">>=", file);  break;
4886 	case GTU:
4887 	  fputs (">>", file);  break;
4888 	case LT:
4889 	  fputs ("<", file);  break;
4890 	case LE:
4891 	  fputs ("<=", file);  break;
4892 	case LEU:
4893 	  fputs ("<<=", file);  break;
4894 	case LTU:
4895 	  fputs ("<<", file);  break;
4896 	default:
4897 	  gcc_unreachable ();
4898 	}
4899       return;
4900     case 'N':			/* Condition, (N)egated */
4901       switch (GET_CODE (x))
4902 	{
4903 	case EQ:
4904 	  fputs ("<>", file);  break;
4905 	case NE:
4906 	  fputs ("=", file);  break;
4907 	case GT:
4908 	  fputs ("<=", file);  break;
4909 	case GE:
4910 	  fputs ("<", file);  break;
4911 	case GEU:
4912 	  fputs ("<<", file);  break;
4913 	case GTU:
4914 	  fputs ("<<=", file);  break;
4915 	case LT:
4916 	  fputs (">=", file);  break;
4917 	case LE:
4918 	  fputs (">", file);  break;
4919 	case LEU:
4920 	  fputs (">>", file);  break;
4921 	case LTU:
4922 	  fputs (">>=", file);  break;
4923 	default:
4924 	  gcc_unreachable ();
4925 	}
4926       return;
4927     /* For floating point comparisons.  Note that the output
4928        predicates are the complement of the desired mode.  The
4929        conditions for GT, GE, LT, LE and LTGT cause an invalid
4930        operation exception if the result is unordered and this
4931        exception is enabled in the floating-point status register.  */
4932     case 'Y':
4933       switch (GET_CODE (x))
4934 	{
4935 	case EQ:
4936 	  fputs ("!=", file);  break;
4937 	case NE:
4938 	  fputs ("=", file);  break;
4939 	case GT:
4940 	  fputs ("!>", file);  break;
4941 	case GE:
4942 	  fputs ("!>=", file);  break;
4943 	case LT:
4944 	  fputs ("!<", file);  break;
4945 	case LE:
4946 	  fputs ("!<=", file);  break;
4947 	case LTGT:
4948 	  fputs ("!<>", file);  break;
4949 	case UNLE:
4950 	  fputs ("!?<=", file);  break;
4951 	case UNLT:
4952 	  fputs ("!?<", file);  break;
4953 	case UNGE:
4954 	  fputs ("!?>=", file);  break;
4955 	case UNGT:
4956 	  fputs ("!?>", file);  break;
4957 	case UNEQ:
4958 	  fputs ("!?=", file);  break;
4959 	case UNORDERED:
4960 	  fputs ("!?", file);  break;
4961 	case ORDERED:
4962 	  fputs ("?", file);  break;
4963 	default:
4964 	  gcc_unreachable ();
4965 	}
4966       return;
4967     case 'S':			/* Condition, operands are (S)wapped.  */
4968       switch (GET_CODE (x))
4969 	{
4970 	case EQ:
4971 	  fputs ("=", file);  break;
4972 	case NE:
4973 	  fputs ("<>", file);  break;
4974 	case GT:
4975 	  fputs ("<", file);  break;
4976 	case GE:
4977 	  fputs ("<=", file);  break;
4978 	case GEU:
4979 	  fputs ("<<=", file);  break;
4980 	case GTU:
4981 	  fputs ("<<", file);  break;
4982 	case LT:
4983 	  fputs (">", file);  break;
4984 	case LE:
4985 	  fputs (">=", file);  break;
4986 	case LEU:
4987 	  fputs (">>=", file);  break;
4988 	case LTU:
4989 	  fputs (">>", file);  break;
4990 	default:
4991 	  gcc_unreachable ();
4992 	}
4993       return;
4994     case 'B':			/* Condition, (B)oth swapped and negate.  */
4995       switch (GET_CODE (x))
4996 	{
4997 	case EQ:
4998 	  fputs ("<>", file);  break;
4999 	case NE:
5000 	  fputs ("=", file);  break;
5001 	case GT:
5002 	  fputs (">=", file);  break;
5003 	case GE:
5004 	  fputs (">", file);  break;
5005 	case GEU:
5006 	  fputs (">>", file);  break;
5007 	case GTU:
5008 	  fputs (">>=", file);  break;
5009 	case LT:
5010 	  fputs ("<=", file);  break;
5011 	case LE:
5012 	  fputs ("<", file);  break;
5013 	case LEU:
5014 	  fputs ("<<", file);  break;
5015 	case LTU:
5016 	  fputs ("<<=", file);  break;
5017 	default:
5018 	  gcc_unreachable ();
5019 	}
5020       return;
5021     case 'k':
5022       gcc_assert (GET_CODE (x) == CONST_INT);
5023       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5024       return;
5025     case 'Q':
5026       gcc_assert (GET_CODE (x) == CONST_INT);
5027       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5028       return;
5029     case 'L':
5030       gcc_assert (GET_CODE (x) == CONST_INT);
5031       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5032       return;
5033     case 'O':
5034       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5035       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5036       return;
5037     case 'p':
5038       gcc_assert (GET_CODE (x) == CONST_INT);
5039       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5040       return;
5041     case 'P':
5042       gcc_assert (GET_CODE (x) == CONST_INT);
5043       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5044       return;
5045     case 'I':
5046       if (GET_CODE (x) == CONST_INT)
5047 	fputs ("i", file);
5048       return;
5049     case 'M':
5050     case 'F':
5051       switch (GET_CODE (XEXP (x, 0)))
5052 	{
5053 	case PRE_DEC:
5054 	case PRE_INC:
5055 	  if (ASSEMBLER_DIALECT == 0)
5056 	    fputs ("s,mb", file);
5057 	  else
5058 	    fputs (",mb", file);
5059 	  break;
5060 	case POST_DEC:
5061 	case POST_INC:
5062 	  if (ASSEMBLER_DIALECT == 0)
5063 	    fputs ("s,ma", file);
5064 	  else
5065 	    fputs (",ma", file);
5066 	  break;
5067 	case PLUS:
5068 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5069 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5070 	    {
5071 	      if (ASSEMBLER_DIALECT == 0)
5072 		fputs ("x", file);
5073 	    }
5074 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5075 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5076 	    {
5077 	      if (ASSEMBLER_DIALECT == 0)
5078 		fputs ("x,s", file);
5079 	      else
5080 		fputs (",s", file);
5081 	    }
5082 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5083 	    fputs ("s", file);
5084 	  break;
5085 	default:
5086 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5087 	    fputs ("s", file);
5088 	  break;
5089 	}
5090       return;
5091     case 'G':
5092       output_global_address (file, x, 0);
5093       return;
5094     case 'H':
5095       output_global_address (file, x, 1);
5096       return;
5097     case 0:			/* Don't do anything special */
5098       break;
5099     case 'Z':
5100       {
5101 	unsigned op[3];
5102 	compute_zdepwi_operands (INTVAL (x), op);
5103 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5104 	return;
5105       }
5106     case 'z':
5107       {
5108 	unsigned op[3];
5109 	compute_zdepdi_operands (INTVAL (x), op);
5110 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5111 	return;
5112       }
5113     case 'c':
5114       /* We can get here from a .vtable_inherit due to our
5115 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5116 	 addresses.  */
5117       break;
5118     default:
5119       gcc_unreachable ();
5120     }
5121   if (GET_CODE (x) == REG)
5122     {
5123       fputs (reg_names [REGNO (x)], file);
5124       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5125 	{
5126 	  fputs ("R", file);
5127 	  return;
5128 	}
5129       if (FP_REG_P (x)
5130 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5131 	  && (REGNO (x) & 1) == 0)
5132 	fputs ("L", file);
5133     }
5134   else if (GET_CODE (x) == MEM)
5135     {
5136       int size = GET_MODE_SIZE (GET_MODE (x));
5137       rtx base = NULL_RTX;
5138       switch (GET_CODE (XEXP (x, 0)))
5139 	{
5140 	case PRE_DEC:
5141 	case POST_DEC:
5142           base = XEXP (XEXP (x, 0), 0);
5143 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5144 	  break;
5145 	case PRE_INC:
5146 	case POST_INC:
5147           base = XEXP (XEXP (x, 0), 0);
5148 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5149 	  break;
5150 	case PLUS:
5151 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5152 	    fprintf (file, "%s(%s)",
5153 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5154 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5155 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5156 	    fprintf (file, "%s(%s)",
5157 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5158 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5159 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5160 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5161 	    {
5162 	      /* Because the REG_POINTER flag can get lost during reload,
5163 		 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5164 		 index and base registers in the combined move patterns.  */
5165 	      rtx base = XEXP (XEXP (x, 0), 1);
5166 	      rtx index = XEXP (XEXP (x, 0), 0);
5167 
5168 	      fprintf (file, "%s(%s)",
5169 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5170 	    }
5171 	  else
5172 	    output_address (XEXP (x, 0));
5173 	  break;
5174 	default:
5175 	  output_address (XEXP (x, 0));
5176 	  break;
5177 	}
5178     }
5179   else
5180     output_addr_const (file, x);
5181 }
5182 
5183 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5184 
5185 void
5186 output_global_address (FILE *file, rtx x, int round_constant)
5187 {
5188 
5189   /* Imagine  (high (const (plus ...))).  */
5190   if (GET_CODE (x) == HIGH)
5191     x = XEXP (x, 0);
5192 
5193   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5194     output_addr_const (file, x);
5195   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5196     {
5197       output_addr_const (file, x);
5198       fputs ("-$global$", file);
5199     }
5200   else if (GET_CODE (x) == CONST)
5201     {
5202       const char *sep = "";
5203       int offset = 0;		/* assembler wants -$global$ at end */
5204       rtx base = NULL_RTX;
5205 
5206       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5207 	{
5208 	case SYMBOL_REF:
5209 	  base = XEXP (XEXP (x, 0), 0);
5210 	  output_addr_const (file, base);
5211 	  break;
5212 	case CONST_INT:
5213 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5214 	  break;
5215 	default:
5216 	  gcc_unreachable ();
5217 	}
5218 
5219       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5220 	{
5221 	case SYMBOL_REF:
5222 	  base = XEXP (XEXP (x, 0), 1);
5223 	  output_addr_const (file, base);
5224 	  break;
5225 	case CONST_INT:
5226 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5227 	  break;
5228 	default:
5229 	  gcc_unreachable ();
5230 	}
5231 
5232       /* How bogus.  The compiler is apparently responsible for
5233 	 rounding the constant if it uses an LR field selector.
5234 
5235 	 The linker and/or assembler seem a better place since
5236 	 they have to do this kind of thing already.
5237 
5238 	 If we fail to do this, HP's optimizing linker may eliminate
5239 	 an addil, but not update the ldw/stw/ldo instruction that
5240 	 uses the result of the addil.  */
5241       if (round_constant)
5242 	offset = ((offset + 0x1000) & ~0x1fff);
5243 
5244       switch (GET_CODE (XEXP (x, 0)))
5245 	{
5246 	case PLUS:
5247 	  if (offset < 0)
5248 	    {
5249 	      offset = -offset;
5250 	      sep = "-";
5251 	    }
5252 	  else
5253 	    sep = "+";
5254 	  break;
5255 
5256 	case MINUS:
5257 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5258 	  sep = "-";
5259 	  break;
5260 
5261 	default:
5262 	  gcc_unreachable ();
5263 	}
5264 
5265       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5266 	fputs ("-$global$", file);
5267       if (offset)
5268 	fprintf (file, "%s%d", sep, offset);
5269     }
5270   else
5271     output_addr_const (file, x);
5272 }
5273 
5274 /* Output boilerplate text to appear at the beginning of the file.
5275    There are several possible versions.  */
5276 #define aputs(x) fputs(x, asm_out_file)
5277 static inline void
5278 pa_file_start_level (void)
5279 {
5280   if (TARGET_64BIT)
5281     aputs ("\t.LEVEL 2.0w\n");
5282   else if (TARGET_PA_20)
5283     aputs ("\t.LEVEL 2.0\n");
5284   else if (TARGET_PA_11)
5285     aputs ("\t.LEVEL 1.1\n");
5286   else
5287     aputs ("\t.LEVEL 1.0\n");
5288 }
5289 
5290 static inline void
5291 pa_file_start_space (int sortspace)
5292 {
5293   aputs ("\t.SPACE $PRIVATE$");
5294   if (sortspace)
5295     aputs (",SORT=16");
5296   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5297          "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5298          "\n\t.SPACE $TEXT$");
5299   if (sortspace)
5300     aputs (",SORT=8");
5301   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5302          "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5303 }
5304 
5305 static inline void
5306 pa_file_start_file (int want_version)
5307 {
5308   if (write_symbols != NO_DEBUG)
5309     {
5310       output_file_directive (asm_out_file, main_input_filename);
5311       if (want_version)
5312 	aputs ("\t.version\t\"01.01\"\n");
5313     }
5314 }
5315 
5316 static inline void
5317 pa_file_start_mcount (const char *aswhat)
5318 {
5319   if (profile_flag)
5320     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5321 }
5322 
5323 static void
5324 pa_elf_file_start (void)
5325 {
5326   pa_file_start_level ();
5327   pa_file_start_mcount ("ENTRY");
5328   pa_file_start_file (0);
5329 }
5330 
5331 static void
5332 pa_som_file_start (void)
5333 {
5334   pa_file_start_level ();
5335   pa_file_start_space (0);
5336   aputs ("\t.IMPORT $global$,DATA\n"
5337          "\t.IMPORT $$dyncall,MILLICODE\n");
5338   pa_file_start_mcount ("CODE");
5339   pa_file_start_file (0);
5340 }
5341 
5342 static void
5343 pa_linux_file_start (void)
5344 {
5345   pa_file_start_file (0);
5346   pa_file_start_level ();
5347   pa_file_start_mcount ("CODE");
5348 }
5349 
5350 static void
5351 pa_hpux64_gas_file_start (void)
5352 {
5353   pa_file_start_level ();
5354 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5355   if (profile_flag)
5356     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5357 #endif
5358   pa_file_start_file (1);
5359 }
5360 
5361 static void
5362 pa_hpux64_hpas_file_start (void)
5363 {
5364   pa_file_start_level ();
5365   pa_file_start_space (1);
5366   pa_file_start_mcount ("CODE");
5367   pa_file_start_file (0);
5368 }
5369 #undef aputs
5370 
5371 /* Search the deferred plabel list for SYMBOL and return its internal
5372    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5373 
5374 rtx
5375 get_deferred_plabel (rtx symbol)
5376 {
5377   const char *fname = XSTR (symbol, 0);
5378   size_t i;
5379 
5380   /* See if we have already put this function on the list of deferred
5381      plabels.  This list is generally small, so a liner search is not
5382      too ugly.  If it proves too slow replace it with something faster.  */
5383   for (i = 0; i < n_deferred_plabels; i++)
5384     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5385       break;
5386 
5387   /* If the deferred plabel list is empty, or this entry was not found
5388      on the list, create a new entry on the list.  */
5389   if (deferred_plabels == NULL || i == n_deferred_plabels)
5390     {
5391       tree id;
5392 
5393       if (deferred_plabels == 0)
5394 	deferred_plabels = (struct deferred_plabel *)
5395 	  ggc_alloc (sizeof (struct deferred_plabel));
5396       else
5397 	deferred_plabels = (struct deferred_plabel *)
5398 	  ggc_realloc (deferred_plabels,
5399 		       ((n_deferred_plabels + 1)
5400 			* sizeof (struct deferred_plabel)));
5401 
5402       i = n_deferred_plabels++;
5403       deferred_plabels[i].internal_label = gen_label_rtx ();
5404       deferred_plabels[i].symbol = symbol;
5405 
5406       /* Gross.  We have just implicitly taken the address of this
5407 	 function.  Mark it in the same manner as assemble_name.  */
5408       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5409       if (id)
5410 	mark_referenced (id);
5411     }
5412 
5413   return deferred_plabels[i].internal_label;
5414 }
5415 
5416 static void
5417 output_deferred_plabels (void)
5418 {
5419   size_t i;
5420 
5421   /* If we have some deferred plabels, then we need to switch into the
5422      data or readonly data section, and align it to a 4 byte boundary
5423      before outputting the deferred plabels.  */
5424   if (n_deferred_plabels)
5425     {
5426       switch_to_section (flag_pic ? data_section : readonly_data_section);
5427       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5428     }
5429 
5430   /* Now output the deferred plabels.  */
5431   for (i = 0; i < n_deferred_plabels; i++)
5432     {
5433       targetm.asm_out.internal_label (asm_out_file, "L",
5434 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5435       assemble_integer (deferred_plabels[i].symbol,
5436 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5437     }
5438 }
5439 
5440 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5441 /* Initialize optabs to point to HPUX long double emulation routines.  */
5442 static void
5443 pa_hpux_init_libfuncs (void)
5444 {
5445   set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5446   set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5447   set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5448   set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5449   set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5450   set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5451   set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5452   set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5453   set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5454 
5455   set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5456   set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5457   set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5458   set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5459   set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5460   set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5461   set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5462 
5463   set_conv_libfunc (sext_optab,   TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5464   set_conv_libfunc (sext_optab,   TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5465   set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5466   set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5467 
5468   set_conv_libfunc (sfix_optab,   SImode, TFmode, TARGET_64BIT
5469 						  ? "__U_Qfcnvfxt_quad_to_sgl"
5470 						  : "_U_Qfcnvfxt_quad_to_sgl");
5471   set_conv_libfunc (sfix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5472   set_conv_libfunc (ufix_optab,   SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5473   set_conv_libfunc (ufix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5474 
5475   set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5476   set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5477   set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5478   set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5479 }
5480 #endif
5481 
5482 /* HP's millicode routines mean something special to the assembler.
5483    Keep track of which ones we have used.  */
5484 
5485 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5486 static void import_milli (enum millicodes);
5487 static char imported[(int) end1000];
5488 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5489 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5490 #define MILLI_START 10
5491 
5492 static void
5493 import_milli (enum millicodes code)
5494 {
5495   char str[sizeof (import_string)];
5496 
5497   if (!imported[(int) code])
5498     {
5499       imported[(int) code] = 1;
5500       strcpy (str, import_string);
5501       strncpy (str + MILLI_START, milli_names[(int) code], 4);
5502       output_asm_insn (str, 0);
5503     }
5504 }
5505 
5506 /* The register constraints have put the operands and return value in
5507    the proper registers.  */
5508 
5509 const char *
5510 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5511 {
5512   import_milli (mulI);
5513   return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5514 }
5515 
5516 /* Emit the rtl for doing a division by a constant.  */
5517 
5518 /* Do magic division millicodes exist for this value? */
5519 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5520 
5521 /* We'll use an array to keep track of the magic millicodes and
5522    whether or not we've used them already. [n][0] is signed, [n][1] is
5523    unsigned.  */
5524 
5525 static int div_milli[16][2];
5526 
5527 int
5528 emit_hpdiv_const (rtx *operands, int unsignedp)
5529 {
5530   if (GET_CODE (operands[2]) == CONST_INT
5531       && INTVAL (operands[2]) > 0
5532       && INTVAL (operands[2]) < 16
5533       && magic_milli[INTVAL (operands[2])])
5534     {
5535       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5536 
5537       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5538       emit
5539 	(gen_rtx_PARALLEL
5540 	 (VOIDmode,
5541 	  gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5542 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5543 						     SImode,
5544 						     gen_rtx_REG (SImode, 26),
5545 						     operands[2])),
5546 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5547 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5548 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5549 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5550 		     gen_rtx_CLOBBER (VOIDmode, ret))));
5551       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5552       return 1;
5553     }
5554   return 0;
5555 }
5556 
5557 const char *
5558 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5559 {
5560   HOST_WIDE_INT divisor;
5561 
5562   /* If the divisor is a constant, try to use one of the special
5563      opcodes .*/
5564   if (GET_CODE (operands[0]) == CONST_INT)
5565     {
5566       static char buf[100];
5567       divisor = INTVAL (operands[0]);
5568       if (!div_milli[divisor][unsignedp])
5569 	{
5570 	  div_milli[divisor][unsignedp] = 1;
5571 	  if (unsignedp)
5572 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5573 	  else
5574 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5575 	}
5576       if (unsignedp)
5577 	{
5578 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5579 		   INTVAL (operands[0]));
5580 	  return output_millicode_call (insn,
5581 					gen_rtx_SYMBOL_REF (SImode, buf));
5582 	}
5583       else
5584 	{
5585 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5586 		   INTVAL (operands[0]));
5587 	  return output_millicode_call (insn,
5588 					gen_rtx_SYMBOL_REF (SImode, buf));
5589 	}
5590     }
5591   /* Divisor isn't a special constant.  */
5592   else
5593     {
5594       if (unsignedp)
5595 	{
5596 	  import_milli (divU);
5597 	  return output_millicode_call (insn,
5598 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5599 	}
5600       else
5601 	{
5602 	  import_milli (divI);
5603 	  return output_millicode_call (insn,
5604 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5605 	}
5606     }
5607 }
5608 
5609 /* Output a $$rem millicode to do mod.  */
5610 
5611 const char *
5612 output_mod_insn (int unsignedp, rtx insn)
5613 {
5614   if (unsignedp)
5615     {
5616       import_milli (remU);
5617       return output_millicode_call (insn,
5618 				    gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5619     }
5620   else
5621     {
5622       import_milli (remI);
5623       return output_millicode_call (insn,
5624 				    gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5625     }
5626 }
5627 
5628 void
5629 output_arg_descriptor (rtx call_insn)
5630 {
5631   const char *arg_regs[4];
5632   enum machine_mode arg_mode;
5633   rtx link;
5634   int i, output_flag = 0;
5635   int regno;
5636 
5637   /* We neither need nor want argument location descriptors for the
5638      64bit runtime environment or the ELF32 environment.  */
5639   if (TARGET_64BIT || TARGET_ELF32)
5640     return;
5641 
5642   for (i = 0; i < 4; i++)
5643     arg_regs[i] = 0;
5644 
5645   /* Specify explicitly that no argument relocations should take place
5646      if using the portable runtime calling conventions.  */
5647   if (TARGET_PORTABLE_RUNTIME)
5648     {
5649       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5650 	     asm_out_file);
5651       return;
5652     }
5653 
5654   gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5655   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5656        link; link = XEXP (link, 1))
5657     {
5658       rtx use = XEXP (link, 0);
5659 
5660       if (! (GET_CODE (use) == USE
5661 	     && GET_CODE (XEXP (use, 0)) == REG
5662 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5663 	continue;
5664 
5665       arg_mode = GET_MODE (XEXP (use, 0));
5666       regno = REGNO (XEXP (use, 0));
5667       if (regno >= 23 && regno <= 26)
5668 	{
5669 	  arg_regs[26 - regno] = "GR";
5670 	  if (arg_mode == DImode)
5671 	    arg_regs[25 - regno] = "GR";
5672 	}
5673       else if (regno >= 32 && regno <= 39)
5674 	{
5675 	  if (arg_mode == SFmode)
5676 	    arg_regs[(regno - 32) / 2] = "FR";
5677 	  else
5678 	    {
5679 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5680 	      arg_regs[(regno - 34) / 2] = "FR";
5681 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
5682 #else
5683 	      arg_regs[(regno - 34) / 2] = "FU";
5684 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
5685 #endif
5686 	    }
5687 	}
5688     }
5689   fputs ("\t.CALL ", asm_out_file);
5690   for (i = 0; i < 4; i++)
5691     {
5692       if (arg_regs[i])
5693 	{
5694 	  if (output_flag++)
5695 	    fputc (',', asm_out_file);
5696 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5697 	}
5698     }
5699   fputc ('\n', asm_out_file);
5700 }
5701 
5702 /* Inform reload about cases where moving X with a mode MODE to a register in
5703    RCLASS requires an extra scratch or immediate register.  Return the class
5704    needed for the immediate register.  */
5705 
5706 static enum reg_class
5707 pa_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
5708 		     enum machine_mode mode, secondary_reload_info *sri)
5709 {
5710   int regno;
5711 
5712   /* Handle the easy stuff first.  */
5713   if (rclass == R1_REGS)
5714     return NO_REGS;
5715 
5716   if (REG_P (x))
5717     {
5718       regno = REGNO (x);
5719       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5720 	return NO_REGS;
5721     }
5722   else
5723     regno = -1;
5724 
5725   /* If we have something like (mem (mem (...)), we can safely assume the
5726      inner MEM will end up in a general register after reloading, so there's
5727      no need for a secondary reload.  */
5728   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5729     return NO_REGS;
5730 
5731   /* Trying to load a constant into a FP register during PIC code
5732      generation requires %r1 as a scratch register.  */
5733   if (flag_pic
5734       && (mode == SImode || mode == DImode)
5735       && FP_REG_CLASS_P (rclass)
5736       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5737     {
5738       sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5739 		    : CODE_FOR_reload_indi_r1);
5740       return NO_REGS;
5741     }
5742 
5743   /* Secondary reloads of symbolic operands require %r1 as a scratch
5744      register when we're generating PIC code and when the operand isn't
5745      readonly.  */
5746   if (symbolic_expression_p (x))
5747     {
5748       if (GET_CODE (x) == HIGH)
5749 	x = XEXP (x, 0);
5750 
5751       if (flag_pic || !read_only_operand (x, VOIDmode))
5752 	{
5753 	  gcc_assert (mode == SImode || mode == DImode);
5754 	  sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5755 			: CODE_FOR_reload_indi_r1);
5756 	  return NO_REGS;
5757 	}
5758     }
5759 
5760   /* Profiling showed the PA port spends about 1.3% of its compilation
5761      time in true_regnum from calls inside pa_secondary_reload_class.  */
5762   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5763     regno = true_regnum (x);
5764 
5765   /* In order to allow 14-bit displacements in integer loads and stores,
5766      we need to prevent reload from generating out of range integer mode
5767      loads and stores to the floating point registers.  Previously, we
5768      used to call for a secondary reload and have emit_move_sequence()
5769      fix the instruction sequence.  However, reload occasionally wouldn't
5770      generate the reload and we would end up with an invalid REG+D memory
5771      address.  So, now we use an intermediate general register for most
5772      memory loads and stores.  */
5773   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5774       && GET_MODE_CLASS (mode) == MODE_INT
5775       && FP_REG_CLASS_P (rclass))
5776     {
5777       /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5778 	 the secondary reload needed for a pseudo.  It never passes a
5779 	 REG+D address.  */
5780       if (GET_CODE (x) == MEM)
5781 	{
5782 	  x = XEXP (x, 0);
5783 
5784 	  /* We don't need an intermediate for indexed and LO_SUM DLT
5785 	     memory addresses.  When INT14_OK_STRICT is true, it might
5786 	     appear that we could directly allow register indirect
5787 	     memory addresses.  However, this doesn't work because we
5788 	     don't support SUBREGs in floating-point register copies
5789 	     and reload doesn't tell us when it's going to use a SUBREG.  */
5790 	  if (IS_INDEX_ADDR_P (x)
5791 	      || IS_LO_SUM_DLT_ADDR_P (x))
5792 	    return NO_REGS;
5793 
5794 	  /* Otherwise, we need an intermediate general register.  */
5795 	  return GENERAL_REGS;
5796 	}
5797 
5798       /* Request a secondary reload with a general scratch register
5799 	 for everthing else.  ??? Could symbolic operands be handled
5800 	 directly when generating non-pic PA 2.0 code?  */
5801       sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5802       return NO_REGS;
5803     }
5804 
5805   /* A SAR<->FP register copy requires an intermediate general register
5806      and secondary memory.  We need a secondary reload with a general
5807      scratch register for spills.  */
5808   if (rclass == SHIFT_REGS)
5809     {
5810       /* Handle spill.  */
5811       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
5812 	{
5813 	  sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5814 	  return NO_REGS;
5815 	}
5816 
5817       /* Handle FP copy.  */
5818       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
5819 	return GENERAL_REGS;
5820     }
5821 
5822   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5823       && REGNO_REG_CLASS (regno) == SHIFT_REGS
5824       && FP_REG_CLASS_P (rclass))
5825     return GENERAL_REGS;
5826 
5827   return NO_REGS;
5828 }
5829 
5830 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
5831    is only marked as live on entry by df-scan when it is a fixed
5832    register.  It isn't a fixed register in the 64-bit runtime,
5833    so we need to mark it here.  */
5834 
5835 static void
5836 pa_extra_live_on_entry (bitmap regs)
5837 {
5838   if (TARGET_64BIT)
5839     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5840 }
5841 
5842 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
5843    to prevent it from being deleted.  */
5844 
5845 rtx
5846 pa_eh_return_handler_rtx (void)
5847 {
5848   rtx tmp;
5849 
5850   tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5851 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5852   tmp = gen_rtx_MEM (word_mode, tmp);
5853   tmp->volatil = 1;
5854   return tmp;
5855 }
5856 
5857 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5858    by invisible reference.  As a GCC extension, we also pass anything
5859    with a zero or variable size by reference.
5860 
5861    The 64-bit runtime does not describe passing any types by invisible
5862    reference.  The internals of GCC can't currently handle passing
5863    empty structures, and zero or variable length arrays when they are
5864    not passed entirely on the stack or by reference.  Thus, as a GCC
5865    extension, we pass these types by reference.  The HP compiler doesn't
5866    support these types, so hopefully there shouldn't be any compatibility
5867    issues.  This may have to be revisited when HP releases a C99 compiler
5868    or updates the ABI.  */
5869 
5870 static bool
5871 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5872 		      enum machine_mode mode, const_tree type,
5873 		      bool named ATTRIBUTE_UNUSED)
5874 {
5875   HOST_WIDE_INT size;
5876 
5877   if (type)
5878     size = int_size_in_bytes (type);
5879   else
5880     size = GET_MODE_SIZE (mode);
5881 
5882   if (TARGET_64BIT)
5883     return size <= 0;
5884   else
5885     return size <= 0 || size > 8;
5886 }
5887 
5888 enum direction
5889 function_arg_padding (enum machine_mode mode, const_tree type)
5890 {
5891   if (mode == BLKmode
5892       || (TARGET_64BIT
5893 	  && type
5894 	  && (AGGREGATE_TYPE_P (type)
5895 	      || TREE_CODE (type) == COMPLEX_TYPE
5896 	      || TREE_CODE (type) == VECTOR_TYPE)))
5897     {
5898       /* Return none if justification is not required.  */
5899       if (type
5900 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5901 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5902 	return none;
5903 
5904       /* The directions set here are ignored when a BLKmode argument larger
5905 	 than a word is placed in a register.  Different code is used for
5906 	 the stack and registers.  This makes it difficult to have a
5907 	 consistent data representation for both the stack and registers.
5908 	 For both runtimes, the justification and padding for arguments on
5909 	 the stack and in registers should be identical.  */
5910       if (TARGET_64BIT)
5911 	/* The 64-bit runtime specifies left justification for aggregates.  */
5912         return upward;
5913       else
5914 	/* The 32-bit runtime architecture specifies right justification.
5915 	   When the argument is passed on the stack, the argument is padded
5916 	   with garbage on the left.  The HP compiler pads with zeros.  */
5917 	return downward;
5918     }
5919 
5920   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5921     return downward;
5922   else
5923     return none;
5924 }
5925 
5926 
5927 /* Do what is necessary for `va_start'.  We look at the current function
5928    to determine if stdargs or varargs is used and fill in an initial
5929    va_list.  A pointer to this constructor is returned.  */
5930 
5931 static rtx
5932 hppa_builtin_saveregs (void)
5933 {
5934   rtx offset, dest;
5935   tree fntype = TREE_TYPE (current_function_decl);
5936   int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5937 		   && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5938 		       != void_type_node)))
5939 		? UNITS_PER_WORD : 0);
5940 
5941   if (argadj)
5942     offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
5943   else
5944     offset = crtl->args.arg_offset_rtx;
5945 
5946   if (TARGET_64BIT)
5947     {
5948       int i, off;
5949 
5950       /* Adjust for varargs/stdarg differences.  */
5951       if (argadj)
5952 	offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
5953       else
5954 	offset = crtl->args.arg_offset_rtx;
5955 
5956       /* We need to save %r26 .. %r19 inclusive starting at offset -64
5957 	 from the incoming arg pointer and growing to larger addresses.  */
5958       for (i = 26, off = -64; i >= 19; i--, off += 8)
5959 	emit_move_insn (gen_rtx_MEM (word_mode,
5960 				     plus_constant (arg_pointer_rtx, off)),
5961 			gen_rtx_REG (word_mode, i));
5962 
5963       /* The incoming args pointer points just beyond the flushback area;
5964 	 normally this is not a serious concern.  However, when we are doing
5965 	 varargs/stdargs we want to make the arg pointer point to the start
5966 	 of the incoming argument area.  */
5967       emit_move_insn (virtual_incoming_args_rtx,
5968 		      plus_constant (arg_pointer_rtx, -64));
5969 
5970       /* Now return a pointer to the first anonymous argument.  */
5971       return copy_to_reg (expand_binop (Pmode, add_optab,
5972 					virtual_incoming_args_rtx,
5973 					offset, 0, 0, OPTAB_LIB_WIDEN));
5974     }
5975 
5976   /* Store general registers on the stack.  */
5977   dest = gen_rtx_MEM (BLKmode,
5978 		      plus_constant (crtl->args.internal_arg_pointer,
5979 				     -16));
5980   set_mem_alias_set (dest, get_varargs_alias_set ());
5981   set_mem_align (dest, BITS_PER_WORD);
5982   move_block_from_reg (23, dest, 4);
5983 
5984   /* move_block_from_reg will emit code to store the argument registers
5985      individually as scalar stores.
5986 
5987      However, other insns may later load from the same addresses for
5988      a structure load (passing a struct to a varargs routine).
5989 
5990      The alias code assumes that such aliasing can never happen, so we
5991      have to keep memory referencing insns from moving up beyond the
5992      last argument register store.  So we emit a blockage insn here.  */
5993   emit_insn (gen_blockage ());
5994 
5995   return copy_to_reg (expand_binop (Pmode, add_optab,
5996 				    crtl->args.internal_arg_pointer,
5997 				    offset, 0, 0, OPTAB_LIB_WIDEN));
5998 }
5999 
6000 static void
6001 hppa_va_start (tree valist, rtx nextarg)
6002 {
6003   nextarg = expand_builtin_saveregs ();
6004   std_expand_builtin_va_start (valist, nextarg);
6005 }
6006 
6007 static tree
6008 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6009 			   gimple_seq *post_p)
6010 {
6011   if (TARGET_64BIT)
6012     {
6013       /* Args grow upward.  We can use the generic routines.  */
6014       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6015     }
6016   else /* !TARGET_64BIT */
6017     {
6018       tree ptr = build_pointer_type (type);
6019       tree valist_type;
6020       tree t, u;
6021       unsigned int size, ofs;
6022       bool indirect;
6023 
6024       indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6025       if (indirect)
6026 	{
6027 	  type = ptr;
6028 	  ptr = build_pointer_type (type);
6029 	}
6030       size = int_size_in_bytes (type);
6031       valist_type = TREE_TYPE (valist);
6032 
6033       /* Args grow down.  Not handled by generic routines.  */
6034 
6035       u = fold_convert (sizetype, size_in_bytes (type));
6036       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6037       t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6038 
6039       /* Copied from va-pa.h, but we probably don't need to align to
6040 	 word size, since we generate and preserve that invariant.  */
6041       u = size_int (size > 4 ? -8 : -4);
6042       t = fold_convert (sizetype, t);
6043       t = build2 (BIT_AND_EXPR, sizetype, t, u);
6044       t = fold_convert (valist_type, t);
6045 
6046       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6047 
6048       ofs = (8 - size) % 4;
6049       if (ofs != 0)
6050 	{
6051 	  u = size_int (ofs);
6052 	  t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6053 	}
6054 
6055       t = fold_convert (ptr, t);
6056       t = build_va_arg_indirect_ref (t);
6057 
6058       if (indirect)
6059 	t = build_va_arg_indirect_ref (t);
6060 
6061       return t;
6062     }
6063 }
6064 
6065 /* True if MODE is valid for the target.  By "valid", we mean able to
6066    be manipulated in non-trivial ways.  In particular, this means all
6067    the arithmetic is supported.
6068 
6069    Currently, TImode is not valid as the HP 64-bit runtime documentation
6070    doesn't document the alignment and calling conventions for this type.
6071    Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6072    2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6073 
6074 static bool
6075 pa_scalar_mode_supported_p (enum machine_mode mode)
6076 {
6077   int precision = GET_MODE_PRECISION (mode);
6078 
6079   switch (GET_MODE_CLASS (mode))
6080     {
6081     case MODE_PARTIAL_INT:
6082     case MODE_INT:
6083       if (precision == CHAR_TYPE_SIZE)
6084 	return true;
6085       if (precision == SHORT_TYPE_SIZE)
6086 	return true;
6087       if (precision == INT_TYPE_SIZE)
6088 	return true;
6089       if (precision == LONG_TYPE_SIZE)
6090 	return true;
6091       if (precision == LONG_LONG_TYPE_SIZE)
6092 	return true;
6093       return false;
6094 
6095     case MODE_FLOAT:
6096       if (precision == FLOAT_TYPE_SIZE)
6097 	return true;
6098       if (precision == DOUBLE_TYPE_SIZE)
6099 	return true;
6100       if (precision == LONG_DOUBLE_TYPE_SIZE)
6101 	return true;
6102       return false;
6103 
6104     case MODE_DECIMAL_FLOAT:
6105       return false;
6106 
6107     default:
6108       gcc_unreachable ();
6109     }
6110 }
6111 
6112 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6113    it branches into the delay slot.  Otherwise, return FALSE.  */
6114 
6115 static bool
6116 branch_to_delay_slot_p (rtx insn)
6117 {
6118   rtx jump_insn;
6119 
6120   if (dbr_sequence_length ())
6121     return FALSE;
6122 
6123   jump_insn = next_active_insn (JUMP_LABEL (insn));
6124   while (insn)
6125     {
6126       insn = next_active_insn (insn);
6127       if (jump_insn == insn)
6128 	return TRUE;
6129 
6130       /* We can't rely on the length of asms.  So, we return FALSE when
6131 	 the branch is followed by an asm.  */
6132       if (!insn
6133 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6134 	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6135 	  || get_attr_length (insn) > 0)
6136 	break;
6137     }
6138 
6139   return FALSE;
6140 }
6141 
6142 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6143 
6144    This occurs when INSN has an unfilled delay slot and is followed
6145    by an asm.  Disaster can occur if the asm is empty and the jump
6146    branches into the delay slot.  So, we add a nop in the delay slot
6147    when this occurs.  */
6148 
6149 static bool
6150 branch_needs_nop_p (rtx insn)
6151 {
6152   rtx jump_insn;
6153 
6154   if (dbr_sequence_length ())
6155     return FALSE;
6156 
6157   jump_insn = next_active_insn (JUMP_LABEL (insn));
6158   while (insn)
6159     {
6160       insn = next_active_insn (insn);
6161       if (!insn || jump_insn == insn)
6162 	return TRUE;
6163 
6164       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6165 	   || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6166 	  && get_attr_length (insn) > 0)
6167 	break;
6168     }
6169 
6170   return FALSE;
6171 }
6172 
6173 /* Return TRUE if INSN, a forward jump insn, can use nullification
6174    to skip the following instruction.  This avoids an extra cycle due
6175    to a mis-predicted branch when we fall through.  */
6176 
6177 static bool
6178 use_skip_p (rtx insn)
6179 {
6180   rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
6181 
6182   while (insn)
6183     {
6184       insn = next_active_insn (insn);
6185 
6186       /* We can't rely on the length of asms, so we can't skip asms.  */
6187       if (!insn
6188 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6189 	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6190 	break;
6191       if (get_attr_length (insn) == 4
6192 	  && jump_insn == next_active_insn (insn))
6193 	return TRUE;
6194       if (get_attr_length (insn) > 0)
6195 	break;
6196     }
6197 
6198   return FALSE;
6199 }
6200 
6201 /* This routine handles all the normal conditional branch sequences we
6202    might need to generate.  It handles compare immediate vs compare
6203    register, nullification of delay slots, varying length branches,
6204    negated branches, and all combinations of the above.  It returns the
6205    output appropriate to emit the branch corresponding to all given
6206    parameters.  */
6207 
6208 const char *
6209 output_cbranch (rtx *operands, int negated, rtx insn)
6210 {
6211   static char buf[100];
6212   bool useskip;
6213   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6214   int length = get_attr_length (insn);
6215   int xdelay;
6216 
6217   /* A conditional branch to the following instruction (e.g. the delay slot)
6218      is asking for a disaster.  This can happen when not optimizing and
6219      when jump optimization fails.
6220 
6221      While it is usually safe to emit nothing, this can fail if the
6222      preceding instruction is a nullified branch with an empty delay
6223      slot and the same branch target as this branch.  We could check
6224      for this but jump optimization should eliminate nop jumps.  It
6225      is always safe to emit a nop.  */
6226   if (branch_to_delay_slot_p (insn))
6227     return "nop";
6228 
6229   /* The doubleword form of the cmpib instruction doesn't have the LEU
6230      and GTU conditions while the cmpb instruction does.  Since we accept
6231      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6232   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6233     operands[2] = gen_rtx_REG (DImode, 0);
6234   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6235     operands[1] = gen_rtx_REG (DImode, 0);
6236 
6237   /* If this is a long branch with its delay slot unfilled, set `nullify'
6238      as it can nullify the delay slot and save a nop.  */
6239   if (length == 8 && dbr_sequence_length () == 0)
6240     nullify = 1;
6241 
6242   /* If this is a short forward conditional branch which did not get
6243      its delay slot filled, the delay slot can still be nullified.  */
6244   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6245     nullify = forward_branch_p (insn);
6246 
6247   /* A forward branch over a single nullified insn can be done with a
6248      comclr instruction.  This avoids a single cycle penalty due to
6249      mis-predicted branch if we fall through (branch not taken).  */
6250   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6251 
6252   switch (length)
6253     {
6254       /* All short conditional branches except backwards with an unfilled
6255 	 delay slot.  */
6256       case 4:
6257 	if (useskip)
6258 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6259 	else
6260 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6261 	if (GET_MODE (operands[1]) == DImode)
6262 	  strcat (buf, "*");
6263 	if (negated)
6264 	  strcat (buf, "%B3");
6265 	else
6266 	  strcat (buf, "%S3");
6267 	if (useskip)
6268 	  strcat (buf, " %2,%r1,%%r0");
6269 	else if (nullify)
6270 	  {
6271 	    if (branch_needs_nop_p (insn))
6272 	      strcat (buf, ",n %2,%r1,%0%#");
6273 	    else
6274 	      strcat (buf, ",n %2,%r1,%0");
6275 	  }
6276 	else
6277 	  strcat (buf, " %2,%r1,%0");
6278 	break;
6279 
6280      /* All long conditionals.  Note a short backward branch with an
6281 	unfilled delay slot is treated just like a long backward branch
6282 	with an unfilled delay slot.  */
6283       case 8:
6284 	/* Handle weird backwards branch with a filled delay slot
6285 	   which is nullified.  */
6286 	if (dbr_sequence_length () != 0
6287 	    && ! forward_branch_p (insn)
6288 	    && nullify)
6289 	  {
6290 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6291 	    if (GET_MODE (operands[1]) == DImode)
6292 	      strcat (buf, "*");
6293 	    if (negated)
6294 	      strcat (buf, "%S3");
6295 	    else
6296 	      strcat (buf, "%B3");
6297 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6298 	  }
6299 	/* Handle short backwards branch with an unfilled delay slot.
6300 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6301 	   taken and untaken branches.  */
6302 	else if (dbr_sequence_length () == 0
6303 		 && ! forward_branch_p (insn)
6304 		 && INSN_ADDRESSES_SET_P ()
6305 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6306 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6307 	  {
6308 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6309 	    if (GET_MODE (operands[1]) == DImode)
6310 	      strcat (buf, "*");
6311 	    if (negated)
6312 	      strcat (buf, "%B3 %2,%r1,%0%#");
6313 	    else
6314 	      strcat (buf, "%S3 %2,%r1,%0%#");
6315 	  }
6316 	else
6317 	  {
6318 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6319 	    if (GET_MODE (operands[1]) == DImode)
6320 	      strcat (buf, "*");
6321 	    if (negated)
6322 	      strcat (buf, "%S3");
6323 	    else
6324 	      strcat (buf, "%B3");
6325 	    if (nullify)
6326 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6327 	    else
6328 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6329 	  }
6330 	break;
6331 
6332       default:
6333 	/* The reversed conditional branch must branch over one additional
6334 	   instruction if the delay slot is filled and needs to be extracted
6335 	   by output_lbranch.  If the delay slot is empty or this is a
6336 	   nullified forward branch, the instruction after the reversed
6337 	   condition branch must be nullified.  */
6338 	if (dbr_sequence_length () == 0
6339 	    || (nullify && forward_branch_p (insn)))
6340 	  {
6341 	    nullify = 1;
6342 	    xdelay = 0;
6343 	    operands[4] = GEN_INT (length);
6344 	  }
6345 	else
6346 	  {
6347 	    xdelay = 1;
6348 	    operands[4] = GEN_INT (length + 4);
6349 	  }
6350 
6351 	/* Create a reversed conditional branch which branches around
6352 	   the following insns.  */
6353 	if (GET_MODE (operands[1]) != DImode)
6354 	  {
6355 	    if (nullify)
6356 	      {
6357 		if (negated)
6358 		  strcpy (buf,
6359 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6360 		else
6361 		  strcpy (buf,
6362 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6363 	      }
6364 	    else
6365 	      {
6366 		if (negated)
6367 		  strcpy (buf,
6368 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6369 		else
6370 		  strcpy (buf,
6371 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6372 	      }
6373 	  }
6374 	else
6375 	  {
6376 	    if (nullify)
6377 	      {
6378 		if (negated)
6379 		  strcpy (buf,
6380 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6381 		else
6382 		  strcpy (buf,
6383 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6384 	      }
6385 	    else
6386 	      {
6387 		if (negated)
6388 		  strcpy (buf,
6389 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6390 		else
6391 		  strcpy (buf,
6392 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6393 	      }
6394 	  }
6395 
6396 	output_asm_insn (buf, operands);
6397 	return output_lbranch (operands[0], insn, xdelay);
6398     }
6399   return buf;
6400 }
6401 
6402 /* This routine handles output of long unconditional branches that
6403    exceed the maximum range of a simple branch instruction.  Since
6404    we don't have a register available for the branch, we save register
6405    %r1 in the frame marker, load the branch destination DEST into %r1,
6406    execute the branch, and restore %r1 in the delay slot of the branch.
6407 
6408    Since long branches may have an insn in the delay slot and the
6409    delay slot is used to restore %r1, we in general need to extract
6410    this insn and execute it before the branch.  However, to facilitate
6411    use of this function by conditional branches, we also provide an
6412    option to not extract the delay insn so that it will be emitted
6413    after the long branch.  So, if there is an insn in the delay slot,
6414    it is extracted if XDELAY is nonzero.
6415 
6416    The lengths of the various long-branch sequences are 20, 16 and 24
6417    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6418 
6419 const char *
6420 output_lbranch (rtx dest, rtx insn, int xdelay)
6421 {
6422   rtx xoperands[2];
6423 
6424   xoperands[0] = dest;
6425 
6426   /* First, free up the delay slot.  */
6427   if (xdelay && dbr_sequence_length () != 0)
6428     {
6429       /* We can't handle a jump in the delay slot.  */
6430       gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6431 
6432       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6433 		       optimize, 0, NULL);
6434 
6435       /* Now delete the delay insn.  */
6436       SET_INSN_DELETED (NEXT_INSN (insn));
6437     }
6438 
6439   /* Output an insn to save %r1.  The runtime documentation doesn't
6440      specify whether the "Clean Up" slot in the callers frame can
6441      be clobbered by the callee.  It isn't copied by HP's builtin
6442      alloca, so this suggests that it can be clobbered if necessary.
6443      The "Static Link" location is copied by HP builtin alloca, so
6444      we avoid using it.  Using the cleanup slot might be a problem
6445      if we have to interoperate with languages that pass cleanup
6446      information.  However, it should be possible to handle these
6447      situations with GCC's asm feature.
6448 
6449      The "Current RP" slot is reserved for the called procedure, so
6450      we try to use it when we don't have a frame of our own.  It's
6451      rather unlikely that we won't have a frame when we need to emit
6452      a very long branch.
6453 
6454      Really the way to go long term is a register scavenger; goto
6455      the target of the jump and find a register which we can use
6456      as a scratch to hold the value in %r1.  Then, we wouldn't have
6457      to free up the delay slot or clobber a slot that may be needed
6458      for other purposes.  */
6459   if (TARGET_64BIT)
6460     {
6461       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6462 	/* Use the return pointer slot in the frame marker.  */
6463 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6464       else
6465 	/* Use the slot at -40 in the frame marker since HP builtin
6466 	   alloca doesn't copy it.  */
6467 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6468     }
6469   else
6470     {
6471       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6472 	/* Use the return pointer slot in the frame marker.  */
6473 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6474       else
6475 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6476 	   the only other use of this location is for copying a
6477 	   floating point double argument from a floating-point
6478 	   register to two general registers.  The copy is done
6479 	   as an "atomic" operation when outputting a call, so it
6480 	   won't interfere with our using the location here.  */
6481 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6482     }
6483 
6484   if (TARGET_PORTABLE_RUNTIME)
6485     {
6486       output_asm_insn ("ldil L'%0,%%r1", xoperands);
6487       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6488       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6489     }
6490   else if (flag_pic)
6491     {
6492       output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6493       if (TARGET_SOM || !TARGET_GAS)
6494 	{
6495 	  xoperands[1] = gen_label_rtx ();
6496 	  output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6497 	  targetm.asm_out.internal_label (asm_out_file, "L",
6498 					  CODE_LABEL_NUMBER (xoperands[1]));
6499 	  output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6500 	}
6501       else
6502 	{
6503 	  output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6504 	  output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6505 	}
6506       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6507     }
6508   else
6509     /* Now output a very long branch to the original target.  */
6510     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6511 
6512   /* Now restore the value of %r1 in the delay slot.  */
6513   if (TARGET_64BIT)
6514     {
6515       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6516 	return "ldd -16(%%r30),%%r1";
6517       else
6518 	return "ldd -40(%%r30),%%r1";
6519     }
6520   else
6521     {
6522       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6523 	return "ldw -20(%%r30),%%r1";
6524       else
6525 	return "ldw -12(%%r30),%%r1";
6526     }
6527 }
6528 
6529 /* This routine handles all the branch-on-bit conditional branch sequences we
6530    might need to generate.  It handles nullification of delay slots,
6531    varying length branches, negated branches and all combinations of the
6532    above.  it returns the appropriate output template to emit the branch.  */
6533 
6534 const char *
6535 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6536 {
6537   static char buf[100];
6538   bool useskip;
6539   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6540   int length = get_attr_length (insn);
6541   int xdelay;
6542 
6543   /* A conditional branch to the following instruction (e.g. the delay slot) is
6544      asking for a disaster.  I do not think this can happen as this pattern
6545      is only used when optimizing; jump optimization should eliminate the
6546      jump.  But be prepared just in case.  */
6547 
6548   if (branch_to_delay_slot_p (insn))
6549     return "nop";
6550 
6551   /* If this is a long branch with its delay slot unfilled, set `nullify'
6552      as it can nullify the delay slot and save a nop.  */
6553   if (length == 8 && dbr_sequence_length () == 0)
6554     nullify = 1;
6555 
6556   /* If this is a short forward conditional branch which did not get
6557      its delay slot filled, the delay slot can still be nullified.  */
6558   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6559     nullify = forward_branch_p (insn);
6560 
6561   /* A forward branch over a single nullified insn can be done with a
6562      extrs instruction.  This avoids a single cycle penalty due to
6563      mis-predicted branch if we fall through (branch not taken).  */
6564   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6565 
6566   switch (length)
6567     {
6568 
6569       /* All short conditional branches except backwards with an unfilled
6570 	 delay slot.  */
6571       case 4:
6572 	if (useskip)
6573 	  strcpy (buf, "{extrs,|extrw,s,}");
6574 	else
6575 	  strcpy (buf, "bb,");
6576 	if (useskip && GET_MODE (operands[0]) == DImode)
6577 	  strcpy (buf, "extrd,s,*");
6578 	else if (GET_MODE (operands[0]) == DImode)
6579 	  strcpy (buf, "bb,*");
6580 	if ((which == 0 && negated)
6581 	     || (which == 1 && ! negated))
6582 	  strcat (buf, ">=");
6583 	else
6584 	  strcat (buf, "<");
6585 	if (useskip)
6586 	  strcat (buf, " %0,%1,1,%%r0");
6587 	else if (nullify && negated)
6588 	  {
6589 	    if (branch_needs_nop_p (insn))
6590 	      strcat (buf, ",n %0,%1,%3%#");
6591 	    else
6592 	      strcat (buf, ",n %0,%1,%3");
6593 	  }
6594 	else if (nullify && ! negated)
6595 	  {
6596 	    if (branch_needs_nop_p (insn))
6597 	      strcat (buf, ",n %0,%1,%2%#");
6598 	    else
6599 	      strcat (buf, ",n %0,%1,%2");
6600 	  }
6601 	else if (! nullify && negated)
6602 	  strcat (buf, " %0,%1,%3");
6603 	else if (! nullify && ! negated)
6604 	  strcat (buf, " %0,%1,%2");
6605 	break;
6606 
6607      /* All long conditionals.  Note a short backward branch with an
6608 	unfilled delay slot is treated just like a long backward branch
6609 	with an unfilled delay slot.  */
6610       case 8:
6611 	/* Handle weird backwards branch with a filled delay slot
6612 	   which is nullified.  */
6613 	if (dbr_sequence_length () != 0
6614 	    && ! forward_branch_p (insn)
6615 	    && nullify)
6616 	  {
6617 	    strcpy (buf, "bb,");
6618 	    if (GET_MODE (operands[0]) == DImode)
6619 	      strcat (buf, "*");
6620 	    if ((which == 0 && negated)
6621 		|| (which == 1 && ! negated))
6622 	      strcat (buf, "<");
6623 	    else
6624 	      strcat (buf, ">=");
6625 	    if (negated)
6626 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
6627 	    else
6628 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
6629 	  }
6630 	/* Handle short backwards branch with an unfilled delay slot.
6631 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
6632 	   taken and untaken branches.  */
6633 	else if (dbr_sequence_length () == 0
6634 		 && ! forward_branch_p (insn)
6635 		 && INSN_ADDRESSES_SET_P ()
6636 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6637 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6638 	  {
6639 	    strcpy (buf, "bb,");
6640 	    if (GET_MODE (operands[0]) == DImode)
6641 	      strcat (buf, "*");
6642 	    if ((which == 0 && negated)
6643 		|| (which == 1 && ! negated))
6644 	      strcat (buf, ">=");
6645 	    else
6646 	      strcat (buf, "<");
6647 	    if (negated)
6648 	      strcat (buf, " %0,%1,%3%#");
6649 	    else
6650 	      strcat (buf, " %0,%1,%2%#");
6651 	  }
6652 	else
6653 	  {
6654 	    if (GET_MODE (operands[0]) == DImode)
6655 	      strcpy (buf, "extrd,s,*");
6656 	    else
6657 	      strcpy (buf, "{extrs,|extrw,s,}");
6658 	    if ((which == 0 && negated)
6659 		|| (which == 1 && ! negated))
6660 	      strcat (buf, "<");
6661 	    else
6662 	      strcat (buf, ">=");
6663 	    if (nullify && negated)
6664 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6665 	    else if (nullify && ! negated)
6666 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6667 	    else if (negated)
6668 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6669 	    else
6670 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6671 	  }
6672 	break;
6673 
6674       default:
6675 	/* The reversed conditional branch must branch over one additional
6676 	   instruction if the delay slot is filled and needs to be extracted
6677 	   by output_lbranch.  If the delay slot is empty or this is a
6678 	   nullified forward branch, the instruction after the reversed
6679 	   condition branch must be nullified.  */
6680 	if (dbr_sequence_length () == 0
6681 	    || (nullify && forward_branch_p (insn)))
6682 	  {
6683 	    nullify = 1;
6684 	    xdelay = 0;
6685 	    operands[4] = GEN_INT (length);
6686 	  }
6687 	else
6688 	  {
6689 	    xdelay = 1;
6690 	    operands[4] = GEN_INT (length + 4);
6691 	  }
6692 
6693 	if (GET_MODE (operands[0]) == DImode)
6694 	  strcpy (buf, "bb,*");
6695 	else
6696 	  strcpy (buf, "bb,");
6697 	if ((which == 0 && negated)
6698 	    || (which == 1 && !negated))
6699 	  strcat (buf, "<");
6700 	else
6701 	  strcat (buf, ">=");
6702 	if (nullify)
6703 	  strcat (buf, ",n %0,%1,.+%4");
6704 	else
6705 	  strcat (buf, " %0,%1,.+%4");
6706 	output_asm_insn (buf, operands);
6707 	return output_lbranch (negated ? operands[3] : operands[2],
6708 			       insn, xdelay);
6709     }
6710   return buf;
6711 }
6712 
6713 /* This routine handles all the branch-on-variable-bit conditional branch
6714    sequences we might need to generate.  It handles nullification of delay
6715    slots, varying length branches, negated branches and all combinations
6716    of the above.  it returns the appropriate output template to emit the
6717    branch.  */
6718 
6719 const char *
6720 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6721 {
6722   static char buf[100];
6723   bool useskip;
6724   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6725   int length = get_attr_length (insn);
6726   int xdelay;
6727 
6728   /* A conditional branch to the following instruction (e.g. the delay slot) is
6729      asking for a disaster.  I do not think this can happen as this pattern
6730      is only used when optimizing; jump optimization should eliminate the
6731      jump.  But be prepared just in case.  */
6732 
6733   if (branch_to_delay_slot_p (insn))
6734     return "nop";
6735 
6736   /* If this is a long branch with its delay slot unfilled, set `nullify'
6737      as it can nullify the delay slot and save a nop.  */
6738   if (length == 8 && dbr_sequence_length () == 0)
6739     nullify = 1;
6740 
6741   /* If this is a short forward conditional branch which did not get
6742      its delay slot filled, the delay slot can still be nullified.  */
6743   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6744     nullify = forward_branch_p (insn);
6745 
6746   /* A forward branch over a single nullified insn can be done with a
6747      extrs instruction.  This avoids a single cycle penalty due to
6748      mis-predicted branch if we fall through (branch not taken).  */
6749   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6750 
6751   switch (length)
6752     {
6753 
6754       /* All short conditional branches except backwards with an unfilled
6755 	 delay slot.  */
6756       case 4:
6757 	if (useskip)
6758 	  strcpy (buf, "{vextrs,|extrw,s,}");
6759 	else
6760 	  strcpy (buf, "{bvb,|bb,}");
6761 	if (useskip && GET_MODE (operands[0]) == DImode)
6762 	  strcpy (buf, "extrd,s,*");
6763 	else if (GET_MODE (operands[0]) == DImode)
6764 	  strcpy (buf, "bb,*");
6765 	if ((which == 0 && negated)
6766 	     || (which == 1 && ! negated))
6767 	  strcat (buf, ">=");
6768 	else
6769 	  strcat (buf, "<");
6770 	if (useskip)
6771 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6772 	else if (nullify && negated)
6773 	  {
6774 	    if (branch_needs_nop_p (insn))
6775 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6776 	    else
6777 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6778 	  }
6779 	else if (nullify && ! negated)
6780 	  {
6781 	    if (branch_needs_nop_p (insn))
6782 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6783 	    else
6784 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6785 	  }
6786 	else if (! nullify && negated)
6787 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6788 	else if (! nullify && ! negated)
6789 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6790 	break;
6791 
6792      /* All long conditionals.  Note a short backward branch with an
6793 	unfilled delay slot is treated just like a long backward branch
6794 	with an unfilled delay slot.  */
6795       case 8:
6796 	/* Handle weird backwards branch with a filled delay slot
6797 	   which is nullified.  */
6798 	if (dbr_sequence_length () != 0
6799 	    && ! forward_branch_p (insn)
6800 	    && nullify)
6801 	  {
6802 	    strcpy (buf, "{bvb,|bb,}");
6803 	    if (GET_MODE (operands[0]) == DImode)
6804 	      strcat (buf, "*");
6805 	    if ((which == 0 && negated)
6806 		|| (which == 1 && ! negated))
6807 	      strcat (buf, "<");
6808 	    else
6809 	      strcat (buf, ">=");
6810 	    if (negated)
6811 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6812 	    else
6813 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6814 	  }
6815 	/* Handle short backwards branch with an unfilled delay slot.
6816 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
6817 	   taken and untaken branches.  */
6818 	else if (dbr_sequence_length () == 0
6819 		 && ! forward_branch_p (insn)
6820 		 && INSN_ADDRESSES_SET_P ()
6821 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6822 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6823 	  {
6824 	    strcpy (buf, "{bvb,|bb,}");
6825 	    if (GET_MODE (operands[0]) == DImode)
6826 	      strcat (buf, "*");
6827 	    if ((which == 0 && negated)
6828 		|| (which == 1 && ! negated))
6829 	      strcat (buf, ">=");
6830 	    else
6831 	      strcat (buf, "<");
6832 	    if (negated)
6833 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6834 	    else
6835 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6836 	  }
6837 	else
6838 	  {
6839 	    strcpy (buf, "{vextrs,|extrw,s,}");
6840 	    if (GET_MODE (operands[0]) == DImode)
6841 	      strcpy (buf, "extrd,s,*");
6842 	    if ((which == 0 && negated)
6843 		|| (which == 1 && ! negated))
6844 	      strcat (buf, "<");
6845 	    else
6846 	      strcat (buf, ">=");
6847 	    if (nullify && negated)
6848 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6849 	    else if (nullify && ! negated)
6850 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6851 	    else if (negated)
6852 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6853 	    else
6854 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6855 	  }
6856 	break;
6857 
6858       default:
6859 	/* The reversed conditional branch must branch over one additional
6860 	   instruction if the delay slot is filled and needs to be extracted
6861 	   by output_lbranch.  If the delay slot is empty or this is a
6862 	   nullified forward branch, the instruction after the reversed
6863 	   condition branch must be nullified.  */
6864 	if (dbr_sequence_length () == 0
6865 	    || (nullify && forward_branch_p (insn)))
6866 	  {
6867 	    nullify = 1;
6868 	    xdelay = 0;
6869 	    operands[4] = GEN_INT (length);
6870 	  }
6871 	else
6872 	  {
6873 	    xdelay = 1;
6874 	    operands[4] = GEN_INT (length + 4);
6875 	  }
6876 
6877 	if (GET_MODE (operands[0]) == DImode)
6878 	  strcpy (buf, "bb,*");
6879 	else
6880 	  strcpy (buf, "{bvb,|bb,}");
6881 	if ((which == 0 && negated)
6882 	    || (which == 1 && !negated))
6883 	  strcat (buf, "<");
6884 	else
6885 	  strcat (buf, ">=");
6886 	if (nullify)
6887 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6888 	else
6889 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6890 	output_asm_insn (buf, operands);
6891 	return output_lbranch (negated ? operands[3] : operands[2],
6892 			       insn, xdelay);
6893     }
6894   return buf;
6895 }
6896 
6897 /* Return the output template for emitting a dbra type insn.
6898 
6899    Note it may perform some output operations on its own before
6900    returning the final output string.  */
6901 const char *
6902 output_dbra (rtx *operands, rtx insn, int which_alternative)
6903 {
6904   int length = get_attr_length (insn);
6905 
6906   /* A conditional branch to the following instruction (e.g. the delay slot) is
6907      asking for a disaster.  Be prepared!  */
6908 
6909   if (branch_to_delay_slot_p (insn))
6910     {
6911       if (which_alternative == 0)
6912 	return "ldo %1(%0),%0";
6913       else if (which_alternative == 1)
6914 	{
6915 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6916 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
6917 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6918 	  return "{fldws|fldw} -16(%%r30),%0";
6919 	}
6920       else
6921 	{
6922 	  output_asm_insn ("ldw %0,%4", operands);
6923 	  return "ldo %1(%4),%4\n\tstw %4,%0";
6924 	}
6925     }
6926 
6927   if (which_alternative == 0)
6928     {
6929       int nullify = INSN_ANNULLED_BRANCH_P (insn);
6930       int xdelay;
6931 
6932       /* If this is a long branch with its delay slot unfilled, set `nullify'
6933 	 as it can nullify the delay slot and save a nop.  */
6934       if (length == 8 && dbr_sequence_length () == 0)
6935 	nullify = 1;
6936 
6937       /* If this is a short forward conditional branch which did not get
6938 	 its delay slot filled, the delay slot can still be nullified.  */
6939       if (! nullify && length == 4 && dbr_sequence_length () == 0)
6940 	nullify = forward_branch_p (insn);
6941 
6942       switch (length)
6943 	{
6944 	case 4:
6945 	  if (nullify)
6946 	    {
6947 	      if (branch_needs_nop_p (insn))
6948 		return "addib,%C2,n %1,%0,%3%#";
6949 	      else
6950 		return "addib,%C2,n %1,%0,%3";
6951 	    }
6952 	  else
6953 	    return "addib,%C2 %1,%0,%3";
6954 
6955 	case 8:
6956 	  /* Handle weird backwards branch with a fulled delay slot
6957 	     which is nullified.  */
6958 	  if (dbr_sequence_length () != 0
6959 	      && ! forward_branch_p (insn)
6960 	      && nullify)
6961 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
6962 	  /* Handle short backwards branch with an unfilled delay slot.
6963 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
6964 	     taken and untaken branches.  */
6965 	  else if (dbr_sequence_length () == 0
6966 		   && ! forward_branch_p (insn)
6967 		   && INSN_ADDRESSES_SET_P ()
6968 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6969 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6970 	      return "addib,%C2 %1,%0,%3%#";
6971 
6972 	  /* Handle normal cases.  */
6973 	  if (nullify)
6974 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
6975 	  else
6976 	    return "addi,%N2 %1,%0,%0\n\tb %3";
6977 
6978 	default:
6979 	  /* The reversed conditional branch must branch over one additional
6980 	     instruction if the delay slot is filled and needs to be extracted
6981 	     by output_lbranch.  If the delay slot is empty or this is a
6982 	     nullified forward branch, the instruction after the reversed
6983 	     condition branch must be nullified.  */
6984 	  if (dbr_sequence_length () == 0
6985 	      || (nullify && forward_branch_p (insn)))
6986 	    {
6987 	      nullify = 1;
6988 	      xdelay = 0;
6989 	      operands[4] = GEN_INT (length);
6990 	    }
6991 	  else
6992 	    {
6993 	      xdelay = 1;
6994 	      operands[4] = GEN_INT (length + 4);
6995 	    }
6996 
6997 	  if (nullify)
6998 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6999 	  else
7000 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7001 
7002 	  return output_lbranch (operands[3], insn, xdelay);
7003 	}
7004 
7005     }
7006   /* Deal with gross reload from FP register case.  */
7007   else if (which_alternative == 1)
7008     {
7009       /* Move loop counter from FP register to MEM then into a GR,
7010 	 increment the GR, store the GR into MEM, and finally reload
7011 	 the FP register from MEM from within the branch's delay slot.  */
7012       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7013 		       operands);
7014       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7015       if (length == 24)
7016 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7017       else if (length == 28)
7018 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7019       else
7020 	{
7021 	  operands[5] = GEN_INT (length - 16);
7022 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7023 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7024 	  return output_lbranch (operands[3], insn, 0);
7025 	}
7026     }
7027   /* Deal with gross reload from memory case.  */
7028   else
7029     {
7030       /* Reload loop counter from memory, the store back to memory
7031 	 happens in the branch's delay slot.  */
7032       output_asm_insn ("ldw %0,%4", operands);
7033       if (length == 12)
7034 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7035       else if (length == 16)
7036 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7037       else
7038 	{
7039 	  operands[5] = GEN_INT (length - 4);
7040 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7041 	  return output_lbranch (operands[3], insn, 0);
7042 	}
7043     }
7044 }
7045 
7046 /* Return the output template for emitting a movb type insn.
7047 
7048    Note it may perform some output operations on its own before
7049    returning the final output string.  */
7050 const char *
7051 output_movb (rtx *operands, rtx insn, int which_alternative,
7052 	     int reverse_comparison)
7053 {
7054   int length = get_attr_length (insn);
7055 
7056   /* A conditional branch to the following instruction (e.g. the delay slot) is
7057      asking for a disaster.  Be prepared!  */
7058 
7059   if (branch_to_delay_slot_p (insn))
7060     {
7061       if (which_alternative == 0)
7062 	return "copy %1,%0";
7063       else if (which_alternative == 1)
7064 	{
7065 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7066 	  return "{fldws|fldw} -16(%%r30),%0";
7067 	}
7068       else if (which_alternative == 2)
7069 	return "stw %1,%0";
7070       else
7071 	return "mtsar %r1";
7072     }
7073 
7074   /* Support the second variant.  */
7075   if (reverse_comparison)
7076     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7077 
7078   if (which_alternative == 0)
7079     {
7080       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7081       int xdelay;
7082 
7083       /* If this is a long branch with its delay slot unfilled, set `nullify'
7084 	 as it can nullify the delay slot and save a nop.  */
7085       if (length == 8 && dbr_sequence_length () == 0)
7086 	nullify = 1;
7087 
7088       /* If this is a short forward conditional branch which did not get
7089 	 its delay slot filled, the delay slot can still be nullified.  */
7090       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7091 	nullify = forward_branch_p (insn);
7092 
7093       switch (length)
7094 	{
7095 	case 4:
7096 	  if (nullify)
7097 	    {
7098 	      if (branch_needs_nop_p (insn))
7099 		return "movb,%C2,n %1,%0,%3%#";
7100 	      else
7101 		return "movb,%C2,n %1,%0,%3";
7102 	    }
7103 	  else
7104 	    return "movb,%C2 %1,%0,%3";
7105 
7106 	case 8:
7107 	  /* Handle weird backwards branch with a filled delay slot
7108 	     which is nullified.  */
7109 	  if (dbr_sequence_length () != 0
7110 	      && ! forward_branch_p (insn)
7111 	      && nullify)
7112 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7113 
7114 	  /* Handle short backwards branch with an unfilled delay slot.
7115 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7116 	     taken and untaken branches.  */
7117 	  else if (dbr_sequence_length () == 0
7118 		   && ! forward_branch_p (insn)
7119 		   && INSN_ADDRESSES_SET_P ()
7120 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7121 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7122 	    return "movb,%C2 %1,%0,%3%#";
7123 	  /* Handle normal cases.  */
7124 	  if (nullify)
7125 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7126 	  else
7127 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7128 
7129 	default:
7130 	  /* The reversed conditional branch must branch over one additional
7131 	     instruction if the delay slot is filled and needs to be extracted
7132 	     by output_lbranch.  If the delay slot is empty or this is a
7133 	     nullified forward branch, the instruction after the reversed
7134 	     condition branch must be nullified.  */
7135 	  if (dbr_sequence_length () == 0
7136 	      || (nullify && forward_branch_p (insn)))
7137 	    {
7138 	      nullify = 1;
7139 	      xdelay = 0;
7140 	      operands[4] = GEN_INT (length);
7141 	    }
7142 	  else
7143 	    {
7144 	      xdelay = 1;
7145 	      operands[4] = GEN_INT (length + 4);
7146 	    }
7147 
7148 	  if (nullify)
7149 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7150 	  else
7151 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7152 
7153 	  return output_lbranch (operands[3], insn, xdelay);
7154 	}
7155     }
7156   /* Deal with gross reload for FP destination register case.  */
7157   else if (which_alternative == 1)
7158     {
7159       /* Move source register to MEM, perform the branch test, then
7160 	 finally load the FP register from MEM from within the branch's
7161 	 delay slot.  */
7162       output_asm_insn ("stw %1,-16(%%r30)", operands);
7163       if (length == 12)
7164 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7165       else if (length == 16)
7166 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7167       else
7168 	{
7169 	  operands[4] = GEN_INT (length - 4);
7170 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7171 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7172 	  return output_lbranch (operands[3], insn, 0);
7173 	}
7174     }
7175   /* Deal with gross reload from memory case.  */
7176   else if (which_alternative == 2)
7177     {
7178       /* Reload loop counter from memory, the store back to memory
7179 	 happens in the branch's delay slot.  */
7180       if (length == 8)
7181 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7182       else if (length == 12)
7183 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7184       else
7185 	{
7186 	  operands[4] = GEN_INT (length);
7187 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7188 			   operands);
7189 	  return output_lbranch (operands[3], insn, 0);
7190 	}
7191     }
7192   /* Handle SAR as a destination.  */
7193   else
7194     {
7195       if (length == 8)
7196 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7197       else if (length == 12)
7198 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7199       else
7200 	{
7201 	  operands[4] = GEN_INT (length);
7202 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7203 			   operands);
7204 	  return output_lbranch (operands[3], insn, 0);
7205 	}
7206     }
7207 }
7208 
7209 /* Copy any FP arguments in INSN into integer registers.  */
7210 static void
7211 copy_fp_args (rtx insn)
7212 {
7213   rtx link;
7214   rtx xoperands[2];
7215 
7216   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7217     {
7218       int arg_mode, regno;
7219       rtx use = XEXP (link, 0);
7220 
7221       if (! (GET_CODE (use) == USE
7222 	  && GET_CODE (XEXP (use, 0)) == REG
7223 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7224 	continue;
7225 
7226       arg_mode = GET_MODE (XEXP (use, 0));
7227       regno = REGNO (XEXP (use, 0));
7228 
7229       /* Is it a floating point register?  */
7230       if (regno >= 32 && regno <= 39)
7231 	{
7232 	  /* Copy the FP register into an integer register via memory.  */
7233 	  if (arg_mode == SFmode)
7234 	    {
7235 	      xoperands[0] = XEXP (use, 0);
7236 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7237 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7238 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7239 	    }
7240 	  else
7241 	    {
7242 	      xoperands[0] = XEXP (use, 0);
7243 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7244 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7245 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7246 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7247 	    }
7248 	}
7249     }
7250 }
7251 
7252 /* Compute length of the FP argument copy sequence for INSN.  */
7253 static int
7254 length_fp_args (rtx insn)
7255 {
7256   int length = 0;
7257   rtx link;
7258 
7259   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7260     {
7261       int arg_mode, regno;
7262       rtx use = XEXP (link, 0);
7263 
7264       if (! (GET_CODE (use) == USE
7265 	  && GET_CODE (XEXP (use, 0)) == REG
7266 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7267 	continue;
7268 
7269       arg_mode = GET_MODE (XEXP (use, 0));
7270       regno = REGNO (XEXP (use, 0));
7271 
7272       /* Is it a floating point register?  */
7273       if (regno >= 32 && regno <= 39)
7274 	{
7275 	  if (arg_mode == SFmode)
7276 	    length += 8;
7277 	  else
7278 	    length += 12;
7279 	}
7280     }
7281 
7282   return length;
7283 }
7284 
7285 /* Return the attribute length for the millicode call instruction INSN.
7286    The length must match the code generated by output_millicode_call.
7287    We include the delay slot in the returned length as it is better to
7288    over estimate the length than to under estimate it.  */
7289 
7290 int
7291 attr_length_millicode_call (rtx insn)
7292 {
7293   unsigned long distance = -1;
7294   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7295 
7296   if (INSN_ADDRESSES_SET_P ())
7297     {
7298       distance = (total + insn_current_reference_address (insn));
7299       if (distance < total)
7300 	distance = -1;
7301     }
7302 
7303   if (TARGET_64BIT)
7304     {
7305       if (!TARGET_LONG_CALLS && distance < 7600000)
7306 	return 8;
7307 
7308       return 20;
7309     }
7310   else if (TARGET_PORTABLE_RUNTIME)
7311     return 24;
7312   else
7313     {
7314       if (!TARGET_LONG_CALLS && distance < 240000)
7315 	return 8;
7316 
7317       if (TARGET_LONG_ABS_CALL && !flag_pic)
7318 	return 12;
7319 
7320       return 24;
7321     }
7322 }
7323 
7324 /* INSN is a function call.  It may have an unconditional jump
7325    in its delay slot.
7326 
7327    CALL_DEST is the routine we are calling.  */
7328 
7329 const char *
7330 output_millicode_call (rtx insn, rtx call_dest)
7331 {
7332   int attr_length = get_attr_length (insn);
7333   int seq_length = dbr_sequence_length ();
7334   int distance;
7335   rtx seq_insn;
7336   rtx xoperands[3];
7337 
7338   xoperands[0] = call_dest;
7339   xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7340 
7341   /* Handle the common case where we are sure that the branch will
7342      reach the beginning of the $CODE$ subspace.  The within reach
7343      form of the $$sh_func_adrs call has a length of 28.  Because
7344      it has an attribute type of multi, it never has a nonzero
7345      sequence length.  The length of the $$sh_func_adrs is the same
7346      as certain out of reach PIC calls to other routines.  */
7347   if (!TARGET_LONG_CALLS
7348       && ((seq_length == 0
7349 	   && (attr_length == 12
7350 	       || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7351 	  || (seq_length != 0 && attr_length == 8)))
7352     {
7353       output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7354     }
7355   else
7356     {
7357       if (TARGET_64BIT)
7358 	{
7359 	  /* It might seem that one insn could be saved by accessing
7360 	     the millicode function using the linkage table.  However,
7361 	     this doesn't work in shared libraries and other dynamically
7362 	     loaded objects.  Using a pc-relative sequence also avoids
7363 	     problems related to the implicit use of the gp register.  */
7364 	  output_asm_insn ("b,l .+8,%%r1", xoperands);
7365 
7366 	  if (TARGET_GAS)
7367 	    {
7368 	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7369 	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7370 	    }
7371 	  else
7372 	    {
7373 	      xoperands[1] = gen_label_rtx ();
7374 	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7375 	      targetm.asm_out.internal_label (asm_out_file, "L",
7376 					 CODE_LABEL_NUMBER (xoperands[1]));
7377 	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7378 	    }
7379 
7380 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7381 	}
7382       else if (TARGET_PORTABLE_RUNTIME)
7383 	{
7384 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7385 	     have PIC support in the assembler/linker, so this sequence
7386 	     is needed.  */
7387 
7388 	  /* Get the address of our target into %r1.  */
7389 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7390 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7391 
7392 	  /* Get our return address into %r31.  */
7393 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7394 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7395 
7396 	  /* Jump to our target address in %r1.  */
7397 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7398 	}
7399       else if (!flag_pic)
7400 	{
7401 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7402 	  if (TARGET_PA_20)
7403 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7404 	  else
7405 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7406 	}
7407       else
7408 	{
7409 	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7410 	  output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7411 
7412 	  if (TARGET_SOM || !TARGET_GAS)
7413 	    {
7414 	      /* The HP assembler can generate relocations for the
7415 		 difference of two symbols.  GAS can do this for a
7416 		 millicode symbol but not an arbitrary external
7417 		 symbol when generating SOM output.  */
7418 	      xoperands[1] = gen_label_rtx ();
7419 	      targetm.asm_out.internal_label (asm_out_file, "L",
7420 					 CODE_LABEL_NUMBER (xoperands[1]));
7421 	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7422 	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7423 	    }
7424 	  else
7425 	    {
7426 	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7427 	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7428 			       xoperands);
7429 	    }
7430 
7431 	  /* Jump to our target address in %r1.  */
7432 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7433 	}
7434     }
7435 
7436   if (seq_length == 0)
7437     output_asm_insn ("nop", xoperands);
7438 
7439   /* We are done if there isn't a jump in the delay slot.  */
7440   if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7441     return "";
7442 
7443   /* This call has an unconditional jump in its delay slot.  */
7444   xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7445 
7446   /* See if the return address can be adjusted.  Use the containing
7447      sequence insn's address.  */
7448   if (INSN_ADDRESSES_SET_P ())
7449     {
7450       seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7451       distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7452 		  - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7453 
7454       if (VAL_14_BITS_P (distance))
7455 	{
7456 	  xoperands[1] = gen_label_rtx ();
7457 	  output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7458 	  targetm.asm_out.internal_label (asm_out_file, "L",
7459 					  CODE_LABEL_NUMBER (xoperands[1]));
7460 	}
7461       else
7462 	/* ??? This branch may not reach its target.  */
7463 	output_asm_insn ("nop\n\tb,n %0", xoperands);
7464     }
7465   else
7466     /* ??? This branch may not reach its target.  */
7467     output_asm_insn ("nop\n\tb,n %0", xoperands);
7468 
7469   /* Delete the jump.  */
7470   SET_INSN_DELETED (NEXT_INSN (insn));
7471 
7472   return "";
7473 }
7474 
7475 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7476    flag indicates whether INSN is a regular call or a sibling call.  The
7477    length returned must be longer than the code actually generated by
7478    output_call.  Since branch shortening is done before delay branch
7479    sequencing, there is no way to determine whether or not the delay
7480    slot will be filled during branch shortening.  Even when the delay
7481    slot is filled, we may have to add a nop if the delay slot contains
7482    a branch that can't reach its target.  Thus, we always have to include
7483    the delay slot in the length estimate.  This used to be done in
7484    pa_adjust_insn_length but we do it here now as some sequences always
7485    fill the delay slot and we can save four bytes in the estimate for
7486    these sequences.  */
7487 
7488 int
7489 attr_length_call (rtx insn, int sibcall)
7490 {
7491   int local_call;
7492   rtx call, call_dest;
7493   tree call_decl;
7494   int length = 0;
7495   rtx pat = PATTERN (insn);
7496   unsigned long distance = -1;
7497 
7498   gcc_assert (GET_CODE (insn) == CALL_INSN);
7499 
7500   if (INSN_ADDRESSES_SET_P ())
7501     {
7502       unsigned long total;
7503 
7504       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7505       distance = (total + insn_current_reference_address (insn));
7506       if (distance < total)
7507 	distance = -1;
7508     }
7509 
7510   gcc_assert (GET_CODE (pat) == PARALLEL);
7511 
7512   /* Get the call rtx.  */
7513   call = XVECEXP (pat, 0, 0);
7514   if (GET_CODE (call) == SET)
7515     call = SET_SRC (call);
7516 
7517   gcc_assert (GET_CODE (call) == CALL);
7518 
7519   /* Determine if this is a local call.  */
7520   call_dest = XEXP (XEXP (call, 0), 0);
7521   call_decl = SYMBOL_REF_DECL (call_dest);
7522   local_call = call_decl && targetm.binds_local_p (call_decl);
7523 
7524   /* pc-relative branch.  */
7525   if (!TARGET_LONG_CALLS
7526       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7527 	  || distance < 240000))
7528     length += 8;
7529 
7530   /* 64-bit plabel sequence.  */
7531   else if (TARGET_64BIT && !local_call)
7532     length += sibcall ? 28 : 24;
7533 
7534   /* non-pic long absolute branch sequence.  */
7535   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7536     length += 12;
7537 
7538   /* long pc-relative branch sequence.  */
7539   else if (TARGET_LONG_PIC_SDIFF_CALL
7540 	   || (TARGET_GAS && !TARGET_SOM
7541 	       && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7542     {
7543       length += 20;
7544 
7545       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7546 	length += 8;
7547     }
7548 
7549   /* 32-bit plabel sequence.  */
7550   else
7551     {
7552       length += 32;
7553 
7554       if (TARGET_SOM)
7555 	length += length_fp_args (insn);
7556 
7557       if (flag_pic)
7558 	length += 4;
7559 
7560       if (!TARGET_PA_20)
7561 	{
7562 	  if (!sibcall)
7563 	    length += 8;
7564 
7565 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7566 	    length += 8;
7567 	}
7568     }
7569 
7570   return length;
7571 }
7572 
7573 /* INSN is a function call.  It may have an unconditional jump
7574    in its delay slot.
7575 
7576    CALL_DEST is the routine we are calling.  */
7577 
7578 const char *
7579 output_call (rtx insn, rtx call_dest, int sibcall)
7580 {
7581   int delay_insn_deleted = 0;
7582   int delay_slot_filled = 0;
7583   int seq_length = dbr_sequence_length ();
7584   tree call_decl = SYMBOL_REF_DECL (call_dest);
7585   int local_call = call_decl && targetm.binds_local_p (call_decl);
7586   rtx xoperands[2];
7587 
7588   xoperands[0] = call_dest;
7589 
7590   /* Handle the common case where we're sure that the branch will reach
7591      the beginning of the "$CODE$" subspace.  This is the beginning of
7592      the current function if we are in a named section.  */
7593   if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7594     {
7595       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7596       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7597     }
7598   else
7599     {
7600       if (TARGET_64BIT && !local_call)
7601 	{
7602 	  /* ??? As far as I can tell, the HP linker doesn't support the
7603 	     long pc-relative sequence described in the 64-bit runtime
7604 	     architecture.  So, we use a slightly longer indirect call.  */
7605 	  xoperands[0] = get_deferred_plabel (call_dest);
7606 	  xoperands[1] = gen_label_rtx ();
7607 
7608 	  /* If this isn't a sibcall, we put the load of %r27 into the
7609 	     delay slot.  We can't do this in a sibcall as we don't
7610 	     have a second call-clobbered scratch register available.  */
7611 	  if (seq_length != 0
7612 	      && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7613 	      && !sibcall)
7614 	    {
7615 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
7616 			       optimize, 0, NULL);
7617 
7618 	      /* Now delete the delay insn.  */
7619 	      SET_INSN_DELETED (NEXT_INSN (insn));
7620 	      delay_insn_deleted = 1;
7621 	    }
7622 
7623 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
7624 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7625 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7626 
7627 	  if (sibcall)
7628 	    {
7629 	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7630 	      output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7631 	      output_asm_insn ("bve (%%r1)", xoperands);
7632 	    }
7633 	  else
7634 	    {
7635 	      output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7636 	      output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7637 	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7638 	      delay_slot_filled = 1;
7639 	    }
7640 	}
7641       else
7642 	{
7643 	  int indirect_call = 0;
7644 
7645 	  /* Emit a long call.  There are several different sequences
7646 	     of increasing length and complexity.  In most cases,
7647              they don't allow an instruction in the delay slot.  */
7648 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7649 	      && !TARGET_LONG_PIC_SDIFF_CALL
7650 	      && !(TARGET_GAS && !TARGET_SOM
7651 		   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7652 	      && !TARGET_64BIT)
7653 	    indirect_call = 1;
7654 
7655 	  if (seq_length != 0
7656 	      && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7657 	      && !sibcall
7658 	      && (!TARGET_PA_20
7659 		  || indirect_call
7660 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7661 	    {
7662 	      /* A non-jump insn in the delay slot.  By definition we can
7663 		 emit this insn before the call (and in fact before argument
7664 		 relocating.  */
7665 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7666 			       NULL);
7667 
7668 	      /* Now delete the delay insn.  */
7669 	      SET_INSN_DELETED (NEXT_INSN (insn));
7670 	      delay_insn_deleted = 1;
7671 	    }
7672 
7673 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7674 	    {
7675 	      /* This is the best sequence for making long calls in
7676 		 non-pic code.  Unfortunately, GNU ld doesn't provide
7677 		 the stub needed for external calls, and GAS's support
7678 		 for this with the SOM linker is buggy.  It is safe
7679 		 to use this for local calls.  */
7680 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7681 	      if (sibcall)
7682 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7683 	      else
7684 		{
7685 		  if (TARGET_PA_20)
7686 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7687 				     xoperands);
7688 		  else
7689 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7690 
7691 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
7692 		  delay_slot_filled = 1;
7693 		}
7694 	    }
7695 	  else
7696 	    {
7697 	      if (TARGET_LONG_PIC_SDIFF_CALL)
7698 		{
7699 		  /* The HP assembler and linker can handle relocations
7700 		     for the difference of two symbols.  The HP assembler
7701 		     recognizes the sequence as a pc-relative call and
7702 		     the linker provides stubs when needed.  */
7703 		  xoperands[1] = gen_label_rtx ();
7704 		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7705 		  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7706 		  targetm.asm_out.internal_label (asm_out_file, "L",
7707 					     CODE_LABEL_NUMBER (xoperands[1]));
7708 		  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7709 		}
7710 	      else if (TARGET_GAS && !TARGET_SOM
7711 		       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7712 		{
7713 		  /*  GAS currently can't generate the relocations that
7714 		      are needed for the SOM linker under HP-UX using this
7715 		      sequence.  The GNU linker doesn't generate the stubs
7716 		      that are needed for external calls on TARGET_ELF32
7717 		      with this sequence.  For now, we have to use a
7718 		      longer plabel sequence when using GAS.  */
7719 		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7720 		  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7721 				   xoperands);
7722 		  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7723 				   xoperands);
7724 		}
7725 	      else
7726 		{
7727 		  /* Emit a long plabel-based call sequence.  This is
7728 		     essentially an inline implementation of $$dyncall.
7729 		     We don't actually try to call $$dyncall as this is
7730 		     as difficult as calling the function itself.  */
7731 		  xoperands[0] = get_deferred_plabel (call_dest);
7732 		  xoperands[1] = gen_label_rtx ();
7733 
7734 		  /* Since the call is indirect, FP arguments in registers
7735 		     need to be copied to the general registers.  Then, the
7736 		     argument relocation stub will copy them back.  */
7737 		  if (TARGET_SOM)
7738 		    copy_fp_args (insn);
7739 
7740 		  if (flag_pic)
7741 		    {
7742 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
7743 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7744 		      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7745 		    }
7746 		  else
7747 		    {
7748 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
7749 				       xoperands);
7750 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7751 				       xoperands);
7752 		    }
7753 
7754 		  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7755 		  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7756 		  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7757 		  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7758 
7759 		  if (!sibcall && !TARGET_PA_20)
7760 		    {
7761 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7762 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7763 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7764 		      else
7765 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7766 		    }
7767 		}
7768 
7769 	      if (TARGET_PA_20)
7770 		{
7771 		  if (sibcall)
7772 		    output_asm_insn ("bve (%%r1)", xoperands);
7773 		  else
7774 		    {
7775 		      if (indirect_call)
7776 			{
7777 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7778 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7779 			  delay_slot_filled = 1;
7780 			}
7781 		      else
7782 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7783 		    }
7784 		}
7785 	      else
7786 		{
7787 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7788 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7789 				     xoperands);
7790 
7791 		  if (sibcall)
7792 		    {
7793 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7794 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7795 		      else
7796 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7797 		    }
7798 		  else
7799 		    {
7800 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7801 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7802 		      else
7803 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7804 
7805 		      if (indirect_call)
7806 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7807 		      else
7808 			output_asm_insn ("copy %%r31,%%r2", xoperands);
7809 		      delay_slot_filled = 1;
7810 		    }
7811 		}
7812 	    }
7813 	}
7814     }
7815 
7816   if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7817     output_asm_insn ("nop", xoperands);
7818 
7819   /* We are done if there isn't a jump in the delay slot.  */
7820   if (seq_length == 0
7821       || delay_insn_deleted
7822       || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7823     return "";
7824 
7825   /* A sibcall should never have a branch in the delay slot.  */
7826   gcc_assert (!sibcall);
7827 
7828   /* This call has an unconditional jump in its delay slot.  */
7829   xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7830 
7831   if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7832     {
7833       /* See if the return address can be adjusted.  Use the containing
7834          sequence insn's address.  This would break the regular call/return@
7835          relationship assumed by the table based eh unwinder, so only do that
7836          if the call is not possibly throwing.  */
7837       rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7838       int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7839 		      - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7840 
7841       if (VAL_14_BITS_P (distance)
7842 	  && !(can_throw_internal (insn) || can_throw_external (insn)))
7843 	{
7844 	  xoperands[1] = gen_label_rtx ();
7845 	  output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7846 	  targetm.asm_out.internal_label (asm_out_file, "L",
7847 					  CODE_LABEL_NUMBER (xoperands[1]));
7848 	}
7849       else
7850 	output_asm_insn ("nop\n\tb,n %0", xoperands);
7851     }
7852   else
7853     output_asm_insn ("b,n %0", xoperands);
7854 
7855   /* Delete the jump.  */
7856   SET_INSN_DELETED (NEXT_INSN (insn));
7857 
7858   return "";
7859 }
7860 
7861 /* Return the attribute length of the indirect call instruction INSN.
7862    The length must match the code generated by output_indirect call.
7863    The returned length includes the delay slot.  Currently, the delay
7864    slot of an indirect call sequence is not exposed and it is used by
7865    the sequence itself.  */
7866 
7867 int
7868 attr_length_indirect_call (rtx insn)
7869 {
7870   unsigned long distance = -1;
7871   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7872 
7873   if (INSN_ADDRESSES_SET_P ())
7874     {
7875       distance = (total + insn_current_reference_address (insn));
7876       if (distance < total)
7877 	distance = -1;
7878     }
7879 
7880   if (TARGET_64BIT)
7881     return 12;
7882 
7883   if (TARGET_FAST_INDIRECT_CALLS
7884       || (!TARGET_PORTABLE_RUNTIME
7885 	  && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7886 	      || distance < 240000)))
7887     return 8;
7888 
7889   if (flag_pic)
7890     return 24;
7891 
7892   if (TARGET_PORTABLE_RUNTIME)
7893     return 20;
7894 
7895   /* Out of reach, can use ble.  */
7896   return 12;
7897 }
7898 
7899 const char *
7900 output_indirect_call (rtx insn, rtx call_dest)
7901 {
7902   rtx xoperands[1];
7903 
7904   if (TARGET_64BIT)
7905     {
7906       xoperands[0] = call_dest;
7907       output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7908       output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7909       return "";
7910     }
7911 
7912   /* First the special case for kernels, level 0 systems, etc.  */
7913   if (TARGET_FAST_INDIRECT_CALLS)
7914     return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7915 
7916   /* Now the normal case -- we can reach $$dyncall directly or
7917      we're sure that we can get there via a long-branch stub.
7918 
7919      No need to check target flags as the length uniquely identifies
7920      the remaining cases.  */
7921   if (attr_length_indirect_call (insn) == 8)
7922     {
7923       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7924 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
7925 	 variant of the B,L instruction can't be used on the SOM target.  */
7926       if (TARGET_PA_20 && !TARGET_SOM)
7927 	return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7928       else
7929 	return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7930     }
7931 
7932   /* Long millicode call, but we are not generating PIC or portable runtime
7933      code.  */
7934   if (attr_length_indirect_call (insn) == 12)
7935     return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7936 
7937   /* Long millicode call for portable runtime.  */
7938   if (attr_length_indirect_call (insn) == 20)
7939     return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7940 
7941   /* We need a long PIC call to $$dyncall.  */
7942   xoperands[0] = NULL_RTX;
7943   output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7944   if (TARGET_SOM || !TARGET_GAS)
7945     {
7946       xoperands[0] = gen_label_rtx ();
7947       output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7948       targetm.asm_out.internal_label (asm_out_file, "L",
7949 				      CODE_LABEL_NUMBER (xoperands[0]));
7950       output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7951     }
7952   else
7953     {
7954       output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7955       output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7956 		       xoperands);
7957     }
7958   output_asm_insn ("blr %%r0,%%r2", xoperands);
7959   output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7960   return "";
7961 }
7962 
7963 /* Return the total length of the save and restore instructions needed for
7964    the data linkage table pointer (i.e., the PIC register) across the call
7965    instruction INSN.  No-return calls do not require a save and restore.
7966    In addition, we may be able to avoid the save and restore for calls
7967    within the same translation unit.  */
7968 
7969 int
7970 attr_length_save_restore_dltp (rtx insn)
7971 {
7972   if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7973     return 0;
7974 
7975   return 8;
7976 }
7977 
7978 /* In HPUX 8.0's shared library scheme, special relocations are needed
7979    for function labels if they might be passed to a function
7980    in a shared library (because shared libraries don't live in code
7981    space), and special magic is needed to construct their address.  */
7982 
7983 void
7984 hppa_encode_label (rtx sym)
7985 {
7986   const char *str = XSTR (sym, 0);
7987   int len = strlen (str) + 1;
7988   char *newstr, *p;
7989 
7990   p = newstr = XALLOCAVEC (char, len + 1);
7991   *p++ = '@';
7992   strcpy (p, str);
7993 
7994   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7995 }
7996 
7997 static void
7998 pa_encode_section_info (tree decl, rtx rtl, int first)
7999 {
8000   int old_referenced = 0;
8001 
8002   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8003     old_referenced
8004       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8005 
8006   default_encode_section_info (decl, rtl, first);
8007 
8008   if (first && TEXT_SPACE_P (decl))
8009     {
8010       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8011       if (TREE_CODE (decl) == FUNCTION_DECL)
8012 	hppa_encode_label (XEXP (rtl, 0));
8013     }
8014   else if (old_referenced)
8015     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8016 }
8017 
8018 /* This is sort of inverse to pa_encode_section_info.  */
8019 
8020 static const char *
8021 pa_strip_name_encoding (const char *str)
8022 {
8023   str += (*str == '@');
8024   str += (*str == '*');
8025   return str;
8026 }
8027 
8028 int
8029 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8030 {
8031   return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
8032 }
8033 
8034 /* Returns 1 if OP is a function label involved in a simple addition
8035    with a constant.  Used to keep certain patterns from matching
8036    during instruction combination.  */
8037 int
8038 is_function_label_plus_const (rtx op)
8039 {
8040   /* Strip off any CONST.  */
8041   if (GET_CODE (op) == CONST)
8042     op = XEXP (op, 0);
8043 
8044   return (GET_CODE (op) == PLUS
8045 	  && function_label_operand (XEXP (op, 0), Pmode)
8046 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8047 }
8048 
8049 /* Output assembly code for a thunk to FUNCTION.  */
8050 
8051 static void
8052 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8053 			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8054 			tree function)
8055 {
8056   static unsigned int current_thunk_number;
8057   int val_14 = VAL_14_BITS_P (delta);
8058   unsigned int old_last_address = last_address, nbytes = 0;
8059   char label[16];
8060   rtx xoperands[4];
8061 
8062   xoperands[0] = XEXP (DECL_RTL (function), 0);
8063   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8064   xoperands[2] = GEN_INT (delta);
8065 
8066   ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8067   fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8068 
8069   /* Output the thunk.  We know that the function is in the same
8070      translation unit (i.e., the same space) as the thunk, and that
8071      thunks are output after their method.  Thus, we don't need an
8072      external branch to reach the function.  With SOM and GAS,
8073      functions and thunks are effectively in different sections.
8074      Thus, we can always use a IA-relative branch and the linker
8075      will add a long branch stub if necessary.
8076 
8077      However, we have to be careful when generating PIC code on the
8078      SOM port to ensure that the sequence does not transfer to an
8079      import stub for the target function as this could clobber the
8080      return value saved at SP-24.  This would also apply to the
8081      32-bit linux port if the multi-space model is implemented.  */
8082   if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8083        && !(flag_pic && TREE_PUBLIC (function))
8084        && (TARGET_GAS || last_address < 262132))
8085       || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8086 	  && ((targetm.have_named_sections
8087 	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
8088 	       /* The GNU 64-bit linker has rather poor stub management.
8089 		  So, we use a long branch from thunks that aren't in
8090 		  the same section as the target function.  */
8091 	       && ((!TARGET_64BIT
8092 		    && (DECL_SECTION_NAME (thunk_fndecl)
8093 			!= DECL_SECTION_NAME (function)))
8094 		   || ((DECL_SECTION_NAME (thunk_fndecl)
8095 			== DECL_SECTION_NAME (function))
8096 		       && last_address < 262132)))
8097 	      || (targetm.have_named_sections
8098 		  && DECL_SECTION_NAME (thunk_fndecl) == NULL
8099 		  && DECL_SECTION_NAME (function) == NULL
8100 		  && last_address < 262132)
8101 	      || (!targetm.have_named_sections && last_address < 262132))))
8102     {
8103       if (!val_14)
8104 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8105 
8106       output_asm_insn ("b %0", xoperands);
8107 
8108       if (val_14)
8109 	{
8110 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8111 	  nbytes += 8;
8112 	}
8113       else
8114 	{
8115 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8116 	  nbytes += 12;
8117 	}
8118     }
8119   else if (TARGET_64BIT)
8120     {
8121       /* We only have one call-clobbered scratch register, so we can't
8122          make use of the delay slot if delta doesn't fit in 14 bits.  */
8123       if (!val_14)
8124 	{
8125 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8126 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8127 	}
8128 
8129       output_asm_insn ("b,l .+8,%%r1", xoperands);
8130 
8131       if (TARGET_GAS)
8132 	{
8133 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8134 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8135 	}
8136       else
8137 	{
8138 	  xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8139 	  output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8140 	}
8141 
8142       if (val_14)
8143 	{
8144 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
8145 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8146 	  nbytes += 20;
8147 	}
8148       else
8149 	{
8150 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8151 	  nbytes += 24;
8152 	}
8153     }
8154   else if (TARGET_PORTABLE_RUNTIME)
8155     {
8156       output_asm_insn ("ldil L'%0,%%r1", xoperands);
8157       output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8158 
8159       if (!val_14)
8160 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8161 
8162       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8163 
8164       if (val_14)
8165 	{
8166 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8167 	  nbytes += 16;
8168 	}
8169       else
8170 	{
8171 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8172 	  nbytes += 20;
8173 	}
8174     }
8175   else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8176     {
8177       /* The function is accessible from outside this module.  The only
8178 	 way to avoid an import stub between the thunk and function is to
8179 	 call the function directly with an indirect sequence similar to
8180 	 that used by $$dyncall.  This is possible because $$dyncall acts
8181 	 as the import stub in an indirect call.  */
8182       ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8183       xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8184       output_asm_insn ("addil LT'%3,%%r19", xoperands);
8185       output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8186       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8187       output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8188       output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8189       output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8190       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8191 
8192       if (!val_14)
8193 	{
8194 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8195 	  nbytes += 4;
8196 	}
8197 
8198       if (TARGET_PA_20)
8199 	{
8200 	  output_asm_insn ("bve (%%r22)", xoperands);
8201 	  nbytes += 36;
8202 	}
8203       else if (TARGET_NO_SPACE_REGS)
8204 	{
8205 	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8206 	  nbytes += 36;
8207 	}
8208       else
8209 	{
8210 	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8211 	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8212 	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8213 	  nbytes += 44;
8214 	}
8215 
8216       if (val_14)
8217 	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8218       else
8219 	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8220     }
8221   else if (flag_pic)
8222     {
8223       output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8224 
8225       if (TARGET_SOM || !TARGET_GAS)
8226 	{
8227 	  output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8228 	  output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8229 	}
8230       else
8231 	{
8232 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8233 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8234 	}
8235 
8236       if (!val_14)
8237 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8238 
8239       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8240 
8241       if (val_14)
8242 	{
8243 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8244 	  nbytes += 20;
8245 	}
8246       else
8247 	{
8248 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8249 	  nbytes += 24;
8250 	}
8251     }
8252   else
8253     {
8254       if (!val_14)
8255 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8256 
8257       output_asm_insn ("ldil L'%0,%%r22", xoperands);
8258       output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8259 
8260       if (val_14)
8261 	{
8262 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8263 	  nbytes += 12;
8264 	}
8265       else
8266 	{
8267 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8268 	  nbytes += 16;
8269 	}
8270     }
8271 
8272   fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8273 
8274   if (TARGET_SOM && TARGET_GAS)
8275     {
8276       /* We done with this subspace except possibly for some additional
8277 	 debug information.  Forget that we are in this subspace to ensure
8278 	 that the next function is output in its own subspace.  */
8279       in_section = NULL;
8280       cfun->machine->in_nsubspa = 2;
8281     }
8282 
8283   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8284     {
8285       switch_to_section (data_section);
8286       output_asm_insn (".align 4", xoperands);
8287       ASM_OUTPUT_LABEL (file, label);
8288       output_asm_insn (".word P'%0", xoperands);
8289     }
8290 
8291   current_thunk_number++;
8292   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8293 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8294   last_address += nbytes;
8295   if (old_last_address > last_address)
8296     last_address = UINT_MAX;
8297   update_total_code_bytes (nbytes);
8298 }
8299 
8300 /* Only direct calls to static functions are allowed to be sibling (tail)
8301    call optimized.
8302 
8303    This restriction is necessary because some linker generated stubs will
8304    store return pointers into rp' in some cases which might clobber a
8305    live value already in rp'.
8306 
8307    In a sibcall the current function and the target function share stack
8308    space.  Thus if the path to the current function and the path to the
8309    target function save a value in rp', they save the value into the
8310    same stack slot, which has undesirable consequences.
8311 
8312    Because of the deferred binding nature of shared libraries any function
8313    with external scope could be in a different load module and thus require
8314    rp' to be saved when calling that function.  So sibcall optimizations
8315    can only be safe for static function.
8316 
8317    Note that GCC never needs return value relocations, so we don't have to
8318    worry about static calls with return value relocations (which require
8319    saving rp').
8320 
8321    It is safe to perform a sibcall optimization when the target function
8322    will never return.  */
8323 static bool
8324 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8325 {
8326   if (TARGET_PORTABLE_RUNTIME)
8327     return false;
8328 
8329   /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8330      single subspace mode and the call is not indirect.  As far as I know,
8331      there is no operating system support for the multiple subspace mode.
8332      It might be possible to support indirect calls if we didn't use
8333      $$dyncall (see the indirect sequence generated in output_call).  */
8334   if (TARGET_ELF32)
8335     return (decl != NULL_TREE);
8336 
8337   /* Sibcalls are not ok because the arg pointer register is not a fixed
8338      register.  This prevents the sibcall optimization from occurring.  In
8339      addition, there are problems with stub placement using GNU ld.  This
8340      is because a normal sibcall branch uses a 17-bit relocation while
8341      a regular call branch uses a 22-bit relocation.  As a result, more
8342      care needs to be taken in the placement of long-branch stubs.  */
8343   if (TARGET_64BIT)
8344     return false;
8345 
8346   /* Sibcalls are only ok within a translation unit.  */
8347   return (decl && !TREE_PUBLIC (decl));
8348 }
8349 
8350 /* ??? Addition is not commutative on the PA due to the weird implicit
8351    space register selection rules for memory addresses.  Therefore, we
8352    don't consider a + b == b + a, as this might be inside a MEM.  */
8353 static bool
8354 pa_commutative_p (const_rtx x, int outer_code)
8355 {
8356   return (COMMUTATIVE_P (x)
8357 	  && (TARGET_NO_SPACE_REGS
8358 	      || (outer_code != UNKNOWN && outer_code != MEM)
8359 	      || GET_CODE (x) != PLUS));
8360 }
8361 
8362 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8363    use in fmpyadd instructions.  */
8364 int
8365 fmpyaddoperands (rtx *operands)
8366 {
8367   enum machine_mode mode = GET_MODE (operands[0]);
8368 
8369   /* Must be a floating point mode.  */
8370   if (mode != SFmode && mode != DFmode)
8371     return 0;
8372 
8373   /* All modes must be the same.  */
8374   if (! (mode == GET_MODE (operands[1])
8375 	 && mode == GET_MODE (operands[2])
8376 	 && mode == GET_MODE (operands[3])
8377 	 && mode == GET_MODE (operands[4])
8378 	 && mode == GET_MODE (operands[5])))
8379     return 0;
8380 
8381   /* All operands must be registers.  */
8382   if (! (GET_CODE (operands[1]) == REG
8383 	 && GET_CODE (operands[2]) == REG
8384 	 && GET_CODE (operands[3]) == REG
8385 	 && GET_CODE (operands[4]) == REG
8386 	 && GET_CODE (operands[5]) == REG))
8387     return 0;
8388 
8389   /* Only 2 real operands to the addition.  One of the input operands must
8390      be the same as the output operand.  */
8391   if (! rtx_equal_p (operands[3], operands[4])
8392       && ! rtx_equal_p (operands[3], operands[5]))
8393     return 0;
8394 
8395   /* Inout operand of add cannot conflict with any operands from multiply.  */
8396   if (rtx_equal_p (operands[3], operands[0])
8397      || rtx_equal_p (operands[3], operands[1])
8398      || rtx_equal_p (operands[3], operands[2]))
8399     return 0;
8400 
8401   /* multiply cannot feed into addition operands.  */
8402   if (rtx_equal_p (operands[4], operands[0])
8403       || rtx_equal_p (operands[5], operands[0]))
8404     return 0;
8405 
8406   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8407   if (mode == SFmode
8408       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8409 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8410 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8411 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8412 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8413 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8414     return 0;
8415 
8416   /* Passed.  Operands are suitable for fmpyadd.  */
8417   return 1;
8418 }
8419 
8420 #if !defined(USE_COLLECT2)
8421 static void
8422 pa_asm_out_constructor (rtx symbol, int priority)
8423 {
8424   if (!function_label_operand (symbol, VOIDmode))
8425     hppa_encode_label (symbol);
8426 
8427 #ifdef CTORS_SECTION_ASM_OP
8428   default_ctor_section_asm_out_constructor (symbol, priority);
8429 #else
8430 # ifdef TARGET_ASM_NAMED_SECTION
8431   default_named_section_asm_out_constructor (symbol, priority);
8432 # else
8433   default_stabs_asm_out_constructor (symbol, priority);
8434 # endif
8435 #endif
8436 }
8437 
8438 static void
8439 pa_asm_out_destructor (rtx symbol, int priority)
8440 {
8441   if (!function_label_operand (symbol, VOIDmode))
8442     hppa_encode_label (symbol);
8443 
8444 #ifdef DTORS_SECTION_ASM_OP
8445   default_dtor_section_asm_out_destructor (symbol, priority);
8446 #else
8447 # ifdef TARGET_ASM_NAMED_SECTION
8448   default_named_section_asm_out_destructor (symbol, priority);
8449 # else
8450   default_stabs_asm_out_destructor (symbol, priority);
8451 # endif
8452 #endif
8453 }
8454 #endif
8455 
8456 /* This function places uninitialized global data in the bss section.
8457    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8458    function on the SOM port to prevent uninitialized global data from
8459    being placed in the data section.  */
8460 
8461 void
8462 pa_asm_output_aligned_bss (FILE *stream,
8463 			   const char *name,
8464 			   unsigned HOST_WIDE_INT size,
8465 			   unsigned int align)
8466 {
8467   switch_to_section (bss_section);
8468   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8469 
8470 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8471   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8472 #endif
8473 
8474 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8475   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8476 #endif
8477 
8478   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8479   ASM_OUTPUT_LABEL (stream, name);
8480   fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8481 }
8482 
8483 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8484    that doesn't allow the alignment of global common storage to be directly
8485    specified.  The SOM linker aligns common storage based on the rounded
8486    value of the NUM_BYTES parameter in the .comm directive.  It's not
8487    possible to use the .align directive as it doesn't affect the alignment
8488    of the label associated with a .comm directive.  */
8489 
8490 void
8491 pa_asm_output_aligned_common (FILE *stream,
8492 			      const char *name,
8493 			      unsigned HOST_WIDE_INT size,
8494 			      unsigned int align)
8495 {
8496   unsigned int max_common_align;
8497 
8498   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8499   if (align > max_common_align)
8500     {
8501       warning (0, "alignment (%u) for %s exceeds maximum alignment "
8502 	       "for global common data.  Using %u",
8503 	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8504       align = max_common_align;
8505     }
8506 
8507   switch_to_section (bss_section);
8508 
8509   assemble_name (stream, name);
8510   fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8511            MAX (size, align / BITS_PER_UNIT));
8512 }
8513 
8514 /* We can't use .comm for local common storage as the SOM linker effectively
8515    treats the symbol as universal and uses the same storage for local symbols
8516    with the same name in different object files.  The .block directive
8517    reserves an uninitialized block of storage.  However, it's not common
8518    storage.  Fortunately, GCC never requests common storage with the same
8519    name in any given translation unit.  */
8520 
8521 void
8522 pa_asm_output_aligned_local (FILE *stream,
8523 			     const char *name,
8524 			     unsigned HOST_WIDE_INT size,
8525 			     unsigned int align)
8526 {
8527   switch_to_section (bss_section);
8528   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8529 
8530 #ifdef LOCAL_ASM_OP
8531   fprintf (stream, "%s", LOCAL_ASM_OP);
8532   assemble_name (stream, name);
8533   fprintf (stream, "\n");
8534 #endif
8535 
8536   ASM_OUTPUT_LABEL (stream, name);
8537   fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8538 }
8539 
8540 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8541    use in fmpysub instructions.  */
8542 int
8543 fmpysuboperands (rtx *operands)
8544 {
8545   enum machine_mode mode = GET_MODE (operands[0]);
8546 
8547   /* Must be a floating point mode.  */
8548   if (mode != SFmode && mode != DFmode)
8549     return 0;
8550 
8551   /* All modes must be the same.  */
8552   if (! (mode == GET_MODE (operands[1])
8553 	 && mode == GET_MODE (operands[2])
8554 	 && mode == GET_MODE (operands[3])
8555 	 && mode == GET_MODE (operands[4])
8556 	 && mode == GET_MODE (operands[5])))
8557     return 0;
8558 
8559   /* All operands must be registers.  */
8560   if (! (GET_CODE (operands[1]) == REG
8561 	 && GET_CODE (operands[2]) == REG
8562 	 && GET_CODE (operands[3]) == REG
8563 	 && GET_CODE (operands[4]) == REG
8564 	 && GET_CODE (operands[5]) == REG))
8565     return 0;
8566 
8567   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8568      operation, so operands[4] must be the same as operand[3].  */
8569   if (! rtx_equal_p (operands[3], operands[4]))
8570     return 0;
8571 
8572   /* multiply cannot feed into subtraction.  */
8573   if (rtx_equal_p (operands[5], operands[0]))
8574     return 0;
8575 
8576   /* Inout operand of sub cannot conflict with any operands from multiply.  */
8577   if (rtx_equal_p (operands[3], operands[0])
8578      || rtx_equal_p (operands[3], operands[1])
8579      || rtx_equal_p (operands[3], operands[2]))
8580     return 0;
8581 
8582   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8583   if (mode == SFmode
8584       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8585 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8586 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8587 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8588 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8589 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8590     return 0;
8591 
8592   /* Passed.  Operands are suitable for fmpysub.  */
8593   return 1;
8594 }
8595 
8596 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8597    constants for shadd instructions.  */
8598 int
8599 shadd_constant_p (int val)
8600 {
8601   if (val == 2 || val == 4 || val == 8)
8602     return 1;
8603   else
8604     return 0;
8605 }
8606 
8607 /* Return 1 if OP is valid as a base or index register in a
8608    REG+REG address.  */
8609 
8610 int
8611 borx_reg_operand (rtx op, enum machine_mode mode)
8612 {
8613   if (GET_CODE (op) != REG)
8614     return 0;
8615 
8616   /* We must reject virtual registers as the only expressions that
8617      can be instantiated are REG and REG+CONST.  */
8618   if (op == virtual_incoming_args_rtx
8619       || op == virtual_stack_vars_rtx
8620       || op == virtual_stack_dynamic_rtx
8621       || op == virtual_outgoing_args_rtx
8622       || op == virtual_cfa_rtx)
8623     return 0;
8624 
8625   /* While it's always safe to index off the frame pointer, it's not
8626      profitable to do so when the frame pointer is being eliminated.  */
8627   if (!reload_completed
8628       && flag_omit_frame_pointer
8629       && !cfun->calls_alloca
8630       && op == frame_pointer_rtx)
8631     return 0;
8632 
8633   return register_operand (op, mode);
8634 }
8635 
8636 /* Return 1 if this operand is anything other than a hard register.  */
8637 
8638 int
8639 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8640 {
8641   return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8642 }
8643 
8644 /* Return TRUE if INSN branches forward.  */
8645 
8646 static bool
8647 forward_branch_p (rtx insn)
8648 {
8649   rtx lab = JUMP_LABEL (insn);
8650 
8651   /* The INSN must have a jump label.  */
8652   gcc_assert (lab != NULL_RTX);
8653 
8654   if (INSN_ADDRESSES_SET_P ())
8655     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8656 
8657   while (insn)
8658     {
8659       if (insn == lab)
8660 	return true;
8661       else
8662 	insn = NEXT_INSN (insn);
8663     }
8664 
8665   return false;
8666 }
8667 
8668 /* Return 1 if OP is an equality comparison, else return 0.  */
8669 int
8670 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8671 {
8672   return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8673 }
8674 
8675 /* Return 1 if INSN is in the delay slot of a call instruction.  */
8676 int
8677 jump_in_call_delay (rtx insn)
8678 {
8679 
8680   if (GET_CODE (insn) != JUMP_INSN)
8681     return 0;
8682 
8683   if (PREV_INSN (insn)
8684       && PREV_INSN (PREV_INSN (insn))
8685       && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8686     {
8687       rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8688 
8689       return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8690 	      && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8691 
8692     }
8693   else
8694     return 0;
8695 }
8696 
8697 /* Output an unconditional move and branch insn.  */
8698 
8699 const char *
8700 output_parallel_movb (rtx *operands, rtx insn)
8701 {
8702   int length = get_attr_length (insn);
8703 
8704   /* These are the cases in which we win.  */
8705   if (length == 4)
8706     return "mov%I1b,tr %1,%0,%2";
8707 
8708   /* None of the following cases win, but they don't lose either.  */
8709   if (length == 8)
8710     {
8711       if (dbr_sequence_length () == 0)
8712 	{
8713 	  /* Nothing in the delay slot, fake it by putting the combined
8714 	     insn (the copy or add) in the delay slot of a bl.  */
8715 	  if (GET_CODE (operands[1]) == CONST_INT)
8716 	    return "b %2\n\tldi %1,%0";
8717 	  else
8718 	    return "b %2\n\tcopy %1,%0";
8719 	}
8720       else
8721 	{
8722 	  /* Something in the delay slot, but we've got a long branch.  */
8723 	  if (GET_CODE (operands[1]) == CONST_INT)
8724 	    return "ldi %1,%0\n\tb %2";
8725 	  else
8726 	    return "copy %1,%0\n\tb %2";
8727 	}
8728     }
8729 
8730   if (GET_CODE (operands[1]) == CONST_INT)
8731     output_asm_insn ("ldi %1,%0", operands);
8732   else
8733     output_asm_insn ("copy %1,%0", operands);
8734   return output_lbranch (operands[2], insn, 1);
8735 }
8736 
8737 /* Output an unconditional add and branch insn.  */
8738 
8739 const char *
8740 output_parallel_addb (rtx *operands, rtx insn)
8741 {
8742   int length = get_attr_length (insn);
8743 
8744   /* To make life easy we want operand0 to be the shared input/output
8745      operand and operand1 to be the readonly operand.  */
8746   if (operands[0] == operands[1])
8747     operands[1] = operands[2];
8748 
8749   /* These are the cases in which we win.  */
8750   if (length == 4)
8751     return "add%I1b,tr %1,%0,%3";
8752 
8753   /* None of the following cases win, but they don't lose either.  */
8754   if (length == 8)
8755     {
8756       if (dbr_sequence_length () == 0)
8757 	/* Nothing in the delay slot, fake it by putting the combined
8758 	   insn (the copy or add) in the delay slot of a bl.  */
8759 	return "b %3\n\tadd%I1 %1,%0,%0";
8760       else
8761 	/* Something in the delay slot, but we've got a long branch.  */
8762 	return "add%I1 %1,%0,%0\n\tb %3";
8763     }
8764 
8765   output_asm_insn ("add%I1 %1,%0,%0", operands);
8766   return output_lbranch (operands[3], insn, 1);
8767 }
8768 
8769 /* Return nonzero if INSN (a jump insn) immediately follows a call
8770    to a named function.  This is used to avoid filling the delay slot
8771    of the jump since it can usually be eliminated by modifying RP in
8772    the delay slot of the call.  */
8773 
8774 int
8775 following_call (rtx insn)
8776 {
8777   if (! TARGET_JUMP_IN_DELAY)
8778     return 0;
8779 
8780   /* Find the previous real insn, skipping NOTEs.  */
8781   insn = PREV_INSN (insn);
8782   while (insn && GET_CODE (insn) == NOTE)
8783     insn = PREV_INSN (insn);
8784 
8785   /* Check for CALL_INSNs and millicode calls.  */
8786   if (insn
8787       && ((GET_CODE (insn) == CALL_INSN
8788 	   && get_attr_type (insn) != TYPE_DYNCALL)
8789 	  || (GET_CODE (insn) == INSN
8790 	      && GET_CODE (PATTERN (insn)) != SEQUENCE
8791 	      && GET_CODE (PATTERN (insn)) != USE
8792 	      && GET_CODE (PATTERN (insn)) != CLOBBER
8793 	      && get_attr_type (insn) == TYPE_MILLI)))
8794     return 1;
8795 
8796   return 0;
8797 }
8798 
8799 /* We use this hook to perform a PA specific optimization which is difficult
8800    to do in earlier passes.
8801 
8802    We want the delay slots of branches within jump tables to be filled.
8803    None of the compiler passes at the moment even has the notion that a
8804    PA jump table doesn't contain addresses, but instead contains actual
8805    instructions!
8806 
8807    Because we actually jump into the table, the addresses of each entry
8808    must stay constant in relation to the beginning of the table (which
8809    itself must stay constant relative to the instruction to jump into
8810    it).  I don't believe we can guarantee earlier passes of the compiler
8811    will adhere to those rules.
8812 
8813    So, late in the compilation process we find all the jump tables, and
8814    expand them into real code -- e.g. each entry in the jump table vector
8815    will get an appropriate label followed by a jump to the final target.
8816 
8817    Reorg and the final jump pass can then optimize these branches and
8818    fill their delay slots.  We end up with smaller, more efficient code.
8819 
8820    The jump instructions within the table are special; we must be able
8821    to identify them during assembly output (if the jumps don't get filled
8822    we need to emit a nop rather than nullifying the delay slot)).  We
8823    identify jumps in switch tables by using insns with the attribute
8824    type TYPE_BTABLE_BRANCH.
8825 
8826    We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8827    insns.  This serves two purposes, first it prevents jump.c from
8828    noticing that the last N entries in the table jump to the instruction
8829    immediately after the table and deleting the jumps.  Second, those
8830    insns mark where we should emit .begin_brtab and .end_brtab directives
8831    when using GAS (allows for better link time optimizations).  */
8832 
8833 static void
8834 pa_reorg (void)
8835 {
8836   rtx insn;
8837 
8838   remove_useless_addtr_insns (1);
8839 
8840   if (pa_cpu < PROCESSOR_8000)
8841     pa_combine_instructions ();
8842 
8843 
8844   /* This is fairly cheap, so always run it if optimizing.  */
8845   if (optimize > 0 && !TARGET_BIG_SWITCH)
8846     {
8847       /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns.  */
8848       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8849 	{
8850 	  rtx pattern, tmp, location, label;
8851 	  unsigned int length, i;
8852 
8853 	  /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode.  */
8854 	  if (GET_CODE (insn) != JUMP_INSN
8855 	      || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8856 		  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8857 	    continue;
8858 
8859 	  /* Emit marker for the beginning of the branch table.  */
8860 	  emit_insn_before (gen_begin_brtab (), insn);
8861 
8862 	  pattern = PATTERN (insn);
8863 	  location = PREV_INSN (insn);
8864           length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8865 
8866 	  for (i = 0; i < length; i++)
8867 	    {
8868 	      /* Emit a label before each jump to keep jump.c from
8869 		 removing this code.  */
8870 	      tmp = gen_label_rtx ();
8871 	      LABEL_NUSES (tmp) = 1;
8872 	      emit_label_after (tmp, location);
8873 	      location = NEXT_INSN (location);
8874 
8875 	      if (GET_CODE (pattern) == ADDR_VEC)
8876 		label = XEXP (XVECEXP (pattern, 0, i), 0);
8877 	      else
8878 		label = XEXP (XVECEXP (pattern, 1, i), 0);
8879 
8880 	      tmp = gen_short_jump (label);
8881 
8882 	      /* Emit the jump itself.  */
8883 	      tmp = emit_jump_insn_after (tmp, location);
8884 	      JUMP_LABEL (tmp) = label;
8885 	      LABEL_NUSES (label)++;
8886 	      location = NEXT_INSN (location);
8887 
8888 	      /* Emit a BARRIER after the jump.  */
8889 	      emit_barrier_after (location);
8890 	      location = NEXT_INSN (location);
8891 	    }
8892 
8893 	  /* Emit marker for the end of the branch table.  */
8894 	  emit_insn_before (gen_end_brtab (), location);
8895 	  location = NEXT_INSN (location);
8896 	  emit_barrier_after (location);
8897 
8898 	  /* Delete the ADDR_VEC or ADDR_DIFF_VEC.  */
8899 	  delete_insn (insn);
8900 	}
8901     }
8902   else
8903     {
8904       /* Still need brtab marker insns.  FIXME: the presence of these
8905 	 markers disables output of the branch table to readonly memory,
8906 	 and any alignment directives that might be needed.  Possibly,
8907 	 the begin_brtab insn should be output before the label for the
8908 	 table.  This doesn't matter at the moment since the tables are
8909 	 always output in the text section.  */
8910       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8911 	{
8912 	  /* Find an ADDR_VEC insn.  */
8913 	  if (GET_CODE (insn) != JUMP_INSN
8914 	      || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8915 		  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8916 	    continue;
8917 
8918 	  /* Now generate markers for the beginning and end of the
8919 	     branch table.  */
8920 	  emit_insn_before (gen_begin_brtab (), insn);
8921 	  emit_insn_after (gen_end_brtab (), insn);
8922 	}
8923     }
8924 }
8925 
8926 /* The PA has a number of odd instructions which can perform multiple
8927    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8928    it may be profitable to combine two instructions into one instruction
8929    with two outputs.  It's not profitable PA2.0 machines because the
8930    two outputs would take two slots in the reorder buffers.
8931 
8932    This routine finds instructions which can be combined and combines
8933    them.  We only support some of the potential combinations, and we
8934    only try common ways to find suitable instructions.
8935 
8936       * addb can add two registers or a register and a small integer
8937       and jump to a nearby (+-8k) location.  Normally the jump to the
8938       nearby location is conditional on the result of the add, but by
8939       using the "true" condition we can make the jump unconditional.
8940       Thus addb can perform two independent operations in one insn.
8941 
8942       * movb is similar to addb in that it can perform a reg->reg
8943       or small immediate->reg copy and jump to a nearby (+-8k location).
8944 
8945       * fmpyadd and fmpysub can perform a FP multiply and either an
8946       FP add or FP sub if the operands of the multiply and add/sub are
8947       independent (there are other minor restrictions).  Note both
8948       the fmpy and fadd/fsub can in theory move to better spots according
8949       to data dependencies, but for now we require the fmpy stay at a
8950       fixed location.
8951 
8952       * Many of the memory operations can perform pre & post updates
8953       of index registers.  GCC's pre/post increment/decrement addressing
8954       is far too simple to take advantage of all the possibilities.  This
8955       pass may not be suitable since those insns may not be independent.
8956 
8957       * comclr can compare two ints or an int and a register, nullify
8958       the following instruction and zero some other register.  This
8959       is more difficult to use as it's harder to find an insn which
8960       will generate a comclr than finding something like an unconditional
8961       branch.  (conditional moves & long branches create comclr insns).
8962 
8963       * Most arithmetic operations can conditionally skip the next
8964       instruction.  They can be viewed as "perform this operation
8965       and conditionally jump to this nearby location" (where nearby
8966       is an insns away).  These are difficult to use due to the
8967       branch length restrictions.  */
8968 
8969 static void
8970 pa_combine_instructions (void)
8971 {
8972   rtx anchor, new_rtx;
8973 
8974   /* This can get expensive since the basic algorithm is on the
8975      order of O(n^2) (or worse).  Only do it for -O2 or higher
8976      levels of optimization.  */
8977   if (optimize < 2)
8978     return;
8979 
8980   /* Walk down the list of insns looking for "anchor" insns which
8981      may be combined with "floating" insns.  As the name implies,
8982      "anchor" instructions don't move, while "floating" insns may
8983      move around.  */
8984   new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8985   new_rtx = make_insn_raw (new_rtx);
8986 
8987   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8988     {
8989       enum attr_pa_combine_type anchor_attr;
8990       enum attr_pa_combine_type floater_attr;
8991 
8992       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8993 	 Also ignore any special USE insns.  */
8994       if ((GET_CODE (anchor) != INSN
8995 	  && GET_CODE (anchor) != JUMP_INSN
8996 	  && GET_CODE (anchor) != CALL_INSN)
8997 	  || GET_CODE (PATTERN (anchor)) == USE
8998 	  || GET_CODE (PATTERN (anchor)) == CLOBBER
8999 	  || GET_CODE (PATTERN (anchor)) == ADDR_VEC
9000 	  || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
9001 	continue;
9002 
9003       anchor_attr = get_attr_pa_combine_type (anchor);
9004       /* See if anchor is an insn suitable for combination.  */
9005       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9006 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9007 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9008 	      && ! forward_branch_p (anchor)))
9009 	{
9010 	  rtx floater;
9011 
9012 	  for (floater = PREV_INSN (anchor);
9013 	       floater;
9014 	       floater = PREV_INSN (floater))
9015 	    {
9016 	      if (GET_CODE (floater) == NOTE
9017 		  || (GET_CODE (floater) == INSN
9018 		      && (GET_CODE (PATTERN (floater)) == USE
9019 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
9020 		continue;
9021 
9022 	      /* Anything except a regular INSN will stop our search.  */
9023 	      if (GET_CODE (floater) != INSN
9024 		  || GET_CODE (PATTERN (floater)) == ADDR_VEC
9025 		  || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9026 		{
9027 		  floater = NULL_RTX;
9028 		  break;
9029 		}
9030 
9031 	      /* See if FLOATER is suitable for combination with the
9032 		 anchor.  */
9033 	      floater_attr = get_attr_pa_combine_type (floater);
9034 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9035 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9036 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9037 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9038 		{
9039 		  /* If ANCHOR and FLOATER can be combined, then we're
9040 		     done with this pass.  */
9041 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9042 					SET_DEST (PATTERN (floater)),
9043 					XEXP (SET_SRC (PATTERN (floater)), 0),
9044 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9045 		    break;
9046 		}
9047 
9048 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9049 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9050 		{
9051 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9052 		    {
9053 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9054 					    SET_DEST (PATTERN (floater)),
9055 					XEXP (SET_SRC (PATTERN (floater)), 0),
9056 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9057 			break;
9058 		    }
9059 		  else
9060 		    {
9061 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9062 					    SET_DEST (PATTERN (floater)),
9063 					    SET_SRC (PATTERN (floater)),
9064 					    SET_SRC (PATTERN (floater))))
9065 			break;
9066 		    }
9067 		}
9068 	    }
9069 
9070 	  /* If we didn't find anything on the backwards scan try forwards.  */
9071 	  if (!floater
9072 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9073 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9074 	    {
9075 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9076 		{
9077 		  if (GET_CODE (floater) == NOTE
9078 		      || (GET_CODE (floater) == INSN
9079 			  && (GET_CODE (PATTERN (floater)) == USE
9080 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9081 
9082 		    continue;
9083 
9084 		  /* Anything except a regular INSN will stop our search.  */
9085 		  if (GET_CODE (floater) != INSN
9086 		      || GET_CODE (PATTERN (floater)) == ADDR_VEC
9087 		      || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9088 		    {
9089 		      floater = NULL_RTX;
9090 		      break;
9091 		    }
9092 
9093 		  /* See if FLOATER is suitable for combination with the
9094 		     anchor.  */
9095 		  floater_attr = get_attr_pa_combine_type (floater);
9096 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9097 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9098 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9099 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9100 		    {
9101 		      /* If ANCHOR and FLOATER can be combined, then we're
9102 			 done with this pass.  */
9103 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9104 					    SET_DEST (PATTERN (floater)),
9105 					    XEXP (SET_SRC (PATTERN (floater)),
9106 						  0),
9107 					    XEXP (SET_SRC (PATTERN (floater)),
9108 						  1)))
9109 			break;
9110 		    }
9111 		}
9112 	    }
9113 
9114 	  /* FLOATER will be nonzero if we found a suitable floating
9115 	     insn for combination with ANCHOR.  */
9116 	  if (floater
9117 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9118 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9119 	    {
9120 	      /* Emit the new instruction and delete the old anchor.  */
9121 	      emit_insn_before (gen_rtx_PARALLEL
9122 				(VOIDmode,
9123 				 gen_rtvec (2, PATTERN (anchor),
9124 					    PATTERN (floater))),
9125 				anchor);
9126 
9127 	      SET_INSN_DELETED (anchor);
9128 
9129 	      /* Emit a special USE insn for FLOATER, then delete
9130 		 the floating insn.  */
9131 	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9132 	      delete_insn (floater);
9133 
9134 	      continue;
9135 	    }
9136 	  else if (floater
9137 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9138 	    {
9139 	      rtx temp;
9140 	      /* Emit the new_jump instruction and delete the old anchor.  */
9141 	      temp
9142 		= emit_jump_insn_before (gen_rtx_PARALLEL
9143 					 (VOIDmode,
9144 					  gen_rtvec (2, PATTERN (anchor),
9145 						     PATTERN (floater))),
9146 					 anchor);
9147 
9148 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9149 	      SET_INSN_DELETED (anchor);
9150 
9151 	      /* Emit a special USE insn for FLOATER, then delete
9152 		 the floating insn.  */
9153 	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9154 	      delete_insn (floater);
9155 	      continue;
9156 	    }
9157 	}
9158     }
9159 }
9160 
9161 static int
9162 pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9163 		  rtx src1, rtx src2)
9164 {
9165   int insn_code_number;
9166   rtx start, end;
9167 
9168   /* Create a PARALLEL with the patterns of ANCHOR and
9169      FLOATER, try to recognize it, then test constraints
9170      for the resulting pattern.
9171 
9172      If the pattern doesn't match or the constraints
9173      aren't met keep searching for a suitable floater
9174      insn.  */
9175   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9176   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9177   INSN_CODE (new_rtx) = -1;
9178   insn_code_number = recog_memoized (new_rtx);
9179   if (insn_code_number < 0
9180       || (extract_insn (new_rtx), ! constrain_operands (1)))
9181     return 0;
9182 
9183   if (reversed)
9184     {
9185       start = anchor;
9186       end = floater;
9187     }
9188   else
9189     {
9190       start = floater;
9191       end = anchor;
9192     }
9193 
9194   /* There's up to three operands to consider.  One
9195      output and two inputs.
9196 
9197      The output must not be used between FLOATER & ANCHOR
9198      exclusive.  The inputs must not be set between
9199      FLOATER and ANCHOR exclusive.  */
9200 
9201   if (reg_used_between_p (dest, start, end))
9202     return 0;
9203 
9204   if (reg_set_between_p (src1, start, end))
9205     return 0;
9206 
9207   if (reg_set_between_p (src2, start, end))
9208     return 0;
9209 
9210   /* If we get here, then everything is good.  */
9211   return 1;
9212 }
9213 
9214 /* Return nonzero if references for INSN are delayed.
9215 
9216    Millicode insns are actually function calls with some special
9217    constraints on arguments and register usage.
9218 
9219    Millicode calls always expect their arguments in the integer argument
9220    registers, and always return their result in %r29 (ret1).  They
9221    are expected to clobber their arguments, %r1, %r29, and the return
9222    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9223 
9224    This function tells reorg that the references to arguments and
9225    millicode calls do not appear to happen until after the millicode call.
9226    This allows reorg to put insns which set the argument registers into the
9227    delay slot of the millicode call -- thus they act more like traditional
9228    CALL_INSNs.
9229 
9230    Note we cannot consider side effects of the insn to be delayed because
9231    the branch and link insn will clobber the return pointer.  If we happened
9232    to use the return pointer in the delay slot of the call, then we lose.
9233 
9234    get_attr_type will try to recognize the given insn, so make sure to
9235    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9236    in particular.  */
9237 int
9238 insn_refs_are_delayed (rtx insn)
9239 {
9240   return ((GET_CODE (insn) == INSN
9241 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9242 	   && GET_CODE (PATTERN (insn)) != USE
9243 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9244 	   && get_attr_type (insn) == TYPE_MILLI));
9245 }
9246 
9247 /* Promote the return value, but not the arguments.  */
9248 
9249 static enum machine_mode
9250 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9251                           enum machine_mode mode,
9252                           int *punsignedp ATTRIBUTE_UNUSED,
9253                           const_tree fntype ATTRIBUTE_UNUSED,
9254                           int for_return)
9255 {
9256   if (for_return == 0)
9257     return mode;
9258   return promote_mode (type, mode, punsignedp);
9259 }
9260 
9261 /* On the HP-PA the value is found in register(s) 28(-29), unless
9262    the mode is SF or DF. Then the value is returned in fr4 (32).
9263 
9264    This must perform the same promotions as PROMOTE_MODE, else promoting
9265    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9266 
9267    Small structures must be returned in a PARALLEL on PA64 in order
9268    to match the HP Compiler ABI.  */
9269 
9270 rtx
9271 pa_function_value (const_tree valtype,
9272                    const_tree func ATTRIBUTE_UNUSED,
9273                    bool outgoing ATTRIBUTE_UNUSED)
9274 {
9275   enum machine_mode valmode;
9276 
9277   if (AGGREGATE_TYPE_P (valtype)
9278       || TREE_CODE (valtype) == COMPLEX_TYPE
9279       || TREE_CODE (valtype) == VECTOR_TYPE)
9280     {
9281       if (TARGET_64BIT)
9282 	{
9283           /* Aggregates with a size less than or equal to 128 bits are
9284 	     returned in GR 28(-29).  They are left justified.  The pad
9285 	     bits are undefined.  Larger aggregates are returned in
9286 	     memory.  */
9287 	  rtx loc[2];
9288 	  int i, offset = 0;
9289 	  int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9290 
9291 	  for (i = 0; i < ub; i++)
9292 	    {
9293 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9294 					  gen_rtx_REG (DImode, 28 + i),
9295 					  GEN_INT (offset));
9296 	      offset += 8;
9297 	    }
9298 
9299 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9300 	}
9301       else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9302 	{
9303 	  /* Aggregates 5 to 8 bytes in size are returned in general
9304 	     registers r28-r29 in the same manner as other non
9305 	     floating-point objects.  The data is right-justified and
9306 	     zero-extended to 64 bits.  This is opposite to the normal
9307 	     justification used on big endian targets and requires
9308 	     special treatment.  */
9309 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9310 				       gen_rtx_REG (DImode, 28), const0_rtx);
9311 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9312 	}
9313     }
9314 
9315   if ((INTEGRAL_TYPE_P (valtype)
9316        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9317       || POINTER_TYPE_P (valtype))
9318     valmode = word_mode;
9319   else
9320     valmode = TYPE_MODE (valtype);
9321 
9322   if (TREE_CODE (valtype) == REAL_TYPE
9323       && !AGGREGATE_TYPE_P (valtype)
9324       && TYPE_MODE (valtype) != TFmode
9325       && !TARGET_SOFT_FLOAT)
9326     return gen_rtx_REG (valmode, 32);
9327 
9328   return gen_rtx_REG (valmode, 28);
9329 }
9330 
9331 /* Return the location of a parameter that is passed in a register or NULL
9332    if the parameter has any component that is passed in memory.
9333 
9334    This is new code and will be pushed to into the net sources after
9335    further testing.
9336 
9337    ??? We might want to restructure this so that it looks more like other
9338    ports.  */
9339 rtx
9340 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9341 	      int named ATTRIBUTE_UNUSED)
9342 {
9343   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9344   int alignment = 0;
9345   int arg_size;
9346   int fpr_reg_base;
9347   int gpr_reg_base;
9348   rtx retval;
9349 
9350   if (mode == VOIDmode)
9351     return NULL_RTX;
9352 
9353   arg_size = FUNCTION_ARG_SIZE (mode, type);
9354 
9355   /* If this arg would be passed partially or totally on the stack, then
9356      this routine should return zero.  pa_arg_partial_bytes will
9357      handle arguments which are split between regs and stack slots if
9358      the ABI mandates split arguments.  */
9359   if (!TARGET_64BIT)
9360     {
9361       /* The 32-bit ABI does not split arguments.  */
9362       if (cum->words + arg_size > max_arg_words)
9363 	return NULL_RTX;
9364     }
9365   else
9366     {
9367       if (arg_size > 1)
9368 	alignment = cum->words & 1;
9369       if (cum->words + alignment >= max_arg_words)
9370 	return NULL_RTX;
9371     }
9372 
9373   /* The 32bit ABIs and the 64bit ABIs are rather different,
9374      particularly in their handling of FP registers.  We might
9375      be able to cleverly share code between them, but I'm not
9376      going to bother in the hope that splitting them up results
9377      in code that is more easily understood.  */
9378 
9379   if (TARGET_64BIT)
9380     {
9381       /* Advance the base registers to their current locations.
9382 
9383          Remember, gprs grow towards smaller register numbers while
9384 	 fprs grow to higher register numbers.  Also remember that
9385 	 although FP regs are 32-bit addressable, we pretend that
9386 	 the registers are 64-bits wide.  */
9387       gpr_reg_base = 26 - cum->words;
9388       fpr_reg_base = 32 + cum->words;
9389 
9390       /* Arguments wider than one word and small aggregates need special
9391 	 treatment.  */
9392       if (arg_size > 1
9393 	  || mode == BLKmode
9394 	  || (type && (AGGREGATE_TYPE_P (type)
9395 		       || TREE_CODE (type) == COMPLEX_TYPE
9396 		       || TREE_CODE (type) == VECTOR_TYPE)))
9397 	{
9398 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9399 	     and aggregates including complex numbers are aligned on
9400 	     128-bit boundaries.  The first eight 64-bit argument slots
9401 	     are associated one-to-one, with general registers r26
9402 	     through r19, and also with floating-point registers fr4
9403 	     through fr11.  Arguments larger than one word are always
9404 	     passed in general registers.
9405 
9406 	     Using a PARALLEL with a word mode register results in left
9407 	     justified data on a big-endian target.  */
9408 
9409 	  rtx loc[8];
9410 	  int i, offset = 0, ub = arg_size;
9411 
9412 	  /* Align the base register.  */
9413 	  gpr_reg_base -= alignment;
9414 
9415 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9416 	  for (i = 0; i < ub; i++)
9417 	    {
9418 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9419 					  gen_rtx_REG (DImode, gpr_reg_base),
9420 					  GEN_INT (offset));
9421 	      gpr_reg_base -= 1;
9422 	      offset += 8;
9423 	    }
9424 
9425 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9426 	}
9427      }
9428   else
9429     {
9430       /* If the argument is larger than a word, then we know precisely
9431 	 which registers we must use.  */
9432       if (arg_size > 1)
9433 	{
9434 	  if (cum->words)
9435 	    {
9436 	      gpr_reg_base = 23;
9437 	      fpr_reg_base = 38;
9438 	    }
9439 	  else
9440 	    {
9441 	      gpr_reg_base = 25;
9442 	      fpr_reg_base = 34;
9443 	    }
9444 
9445 	  /* Structures 5 to 8 bytes in size are passed in the general
9446 	     registers in the same manner as other non floating-point
9447 	     objects.  The data is right-justified and zero-extended
9448 	     to 64 bits.  This is opposite to the normal justification
9449 	     used on big endian targets and requires special treatment.
9450 	     We now define BLOCK_REG_PADDING to pad these objects.
9451 	     Aggregates, complex and vector types are passed in the same
9452 	     manner as structures.  */
9453 	  if (mode == BLKmode
9454 	      || (type && (AGGREGATE_TYPE_P (type)
9455 			   || TREE_CODE (type) == COMPLEX_TYPE
9456 			   || TREE_CODE (type) == VECTOR_TYPE)))
9457 	    {
9458 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9459 					   gen_rtx_REG (DImode, gpr_reg_base),
9460 					   const0_rtx);
9461 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9462 	    }
9463 	}
9464       else
9465         {
9466 	   /* We have a single word (32 bits).  A simple computation
9467 	      will get us the register #s we need.  */
9468 	   gpr_reg_base = 26 - cum->words;
9469 	   fpr_reg_base = 32 + 2 * cum->words;
9470 	}
9471     }
9472 
9473   /* Determine if the argument needs to be passed in both general and
9474      floating point registers.  */
9475   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9476        /* If we are doing soft-float with portable runtime, then there
9477 	  is no need to worry about FP regs.  */
9478        && !TARGET_SOFT_FLOAT
9479        /* The parameter must be some kind of scalar float, else we just
9480 	  pass it in integer registers.  */
9481        && GET_MODE_CLASS (mode) == MODE_FLOAT
9482        /* The target function must not have a prototype.  */
9483        && cum->nargs_prototype <= 0
9484        /* libcalls do not need to pass items in both FP and general
9485 	  registers.  */
9486        && type != NULL_TREE
9487        /* All this hair applies to "outgoing" args only.  This includes
9488 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9489        && !cum->incoming)
9490       /* Also pass outgoing floating arguments in both registers in indirect
9491 	 calls with the 32 bit ABI and the HP assembler since there is no
9492 	 way to the specify argument locations in static functions.  */
9493       || (!TARGET_64BIT
9494 	  && !TARGET_GAS
9495 	  && !cum->incoming
9496 	  && cum->indirect
9497 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9498     {
9499       retval
9500 	= gen_rtx_PARALLEL
9501 	    (mode,
9502 	     gen_rtvec (2,
9503 			gen_rtx_EXPR_LIST (VOIDmode,
9504 					   gen_rtx_REG (mode, fpr_reg_base),
9505 					   const0_rtx),
9506 			gen_rtx_EXPR_LIST (VOIDmode,
9507 					   gen_rtx_REG (mode, gpr_reg_base),
9508 					   const0_rtx)));
9509     }
9510   else
9511     {
9512       /* See if we should pass this parameter in a general register.  */
9513       if (TARGET_SOFT_FLOAT
9514 	  /* Indirect calls in the normal 32bit ABI require all arguments
9515 	     to be passed in general registers.  */
9516 	  || (!TARGET_PORTABLE_RUNTIME
9517 	      && !TARGET_64BIT
9518 	      && !TARGET_ELF32
9519 	      && cum->indirect)
9520 	  /* If the parameter is not a scalar floating-point parameter,
9521 	     then it belongs in GPRs.  */
9522 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9523 	  /* Structure with single SFmode field belongs in GPR.  */
9524 	  || (type && AGGREGATE_TYPE_P (type)))
9525 	retval = gen_rtx_REG (mode, gpr_reg_base);
9526       else
9527 	retval = gen_rtx_REG (mode, fpr_reg_base);
9528     }
9529   return retval;
9530 }
9531 
9532 
9533 /* If this arg would be passed totally in registers or totally on the stack,
9534    then this routine should return zero.  */
9535 
9536 static int
9537 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9538 		      tree type, bool named ATTRIBUTE_UNUSED)
9539 {
9540   unsigned int max_arg_words = 8;
9541   unsigned int offset = 0;
9542 
9543   if (!TARGET_64BIT)
9544     return 0;
9545 
9546   if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9547     offset = 1;
9548 
9549   if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9550     /* Arg fits fully into registers.  */
9551     return 0;
9552   else if (cum->words + offset >= max_arg_words)
9553     /* Arg fully on the stack.  */
9554     return 0;
9555   else
9556     /* Arg is split.  */
9557     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9558 }
9559 
9560 
9561 /* A get_unnamed_section callback for switching to the text section.
9562 
9563    This function is only used with SOM.  Because we don't support
9564    named subspaces, we can only create a new subspace or switch back
9565    to the default text subspace.  */
9566 
9567 static void
9568 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9569 {
9570   gcc_assert (TARGET_SOM);
9571   if (TARGET_GAS)
9572     {
9573       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9574 	{
9575 	  /* We only want to emit a .nsubspa directive once at the
9576 	     start of the function.  */
9577 	  cfun->machine->in_nsubspa = 1;
9578 
9579 	  /* Create a new subspace for the text.  This provides
9580 	     better stub placement and one-only functions.  */
9581 	  if (cfun->decl
9582 	      && DECL_ONE_ONLY (cfun->decl)
9583 	      && !DECL_WEAK (cfun->decl))
9584 	    {
9585 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
9586 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9587 				     "ACCESS=44,SORT=24,COMDAT");
9588 	      return;
9589 	    }
9590 	}
9591       else
9592 	{
9593 	  /* There isn't a current function or the body of the current
9594 	     function has been completed.  So, we are changing to the
9595 	     text section to output debugging information.  Thus, we
9596 	     need to forget that we are in the text section so that
9597 	     varasm.c will call us when text_section is selected again.  */
9598 	  gcc_assert (!cfun || !cfun->machine
9599 		      || cfun->machine->in_nsubspa == 2);
9600 	  in_section = NULL;
9601 	}
9602       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9603       return;
9604     }
9605   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9606 }
9607 
9608 /* A get_unnamed_section callback for switching to comdat data
9609    sections.  This function is only used with SOM.  */
9610 
9611 static void
9612 som_output_comdat_data_section_asm_op (const void *data)
9613 {
9614   in_section = NULL;
9615   output_section_asm_op (data);
9616 }
9617 
9618 /* Implement TARGET_ASM_INITIALIZE_SECTIONS  */
9619 
9620 static void
9621 pa_som_asm_init_sections (void)
9622 {
9623   text_section
9624     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9625 
9626   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9627      is not being generated.  */
9628   som_readonly_data_section
9629     = get_unnamed_section (0, output_section_asm_op,
9630 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9631 
9632   /* When secondary definitions are not supported, SOM makes readonly
9633      data one-only by creating a new $LIT$ subspace in $TEXT$ with
9634      the comdat flag.  */
9635   som_one_only_readonly_data_section
9636     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9637 			   "\t.SPACE $TEXT$\n"
9638 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9639 			   "ACCESS=0x2c,SORT=16,COMDAT");
9640 
9641 
9642   /* When secondary definitions are not supported, SOM makes data one-only
9643      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9644   som_one_only_data_section
9645     = get_unnamed_section (SECTION_WRITE,
9646 			   som_output_comdat_data_section_asm_op,
9647 			   "\t.SPACE $PRIVATE$\n"
9648 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9649 			   "ACCESS=31,SORT=24,COMDAT");
9650 
9651   /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9652      which reference data within the $TEXT$ space (for example constant
9653      strings in the $LIT$ subspace).
9654 
9655      The assemblers (GAS and HP as) both have problems with handling
9656      the difference of two symbols which is the other correct way to
9657      reference constant data during PIC code generation.
9658 
9659      So, there's no way to reference constant data which is in the
9660      $TEXT$ space during PIC generation.  Instead place all constant
9661      data into the $PRIVATE$ subspace (this reduces sharing, but it
9662      works correctly).  */
9663   readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9664 
9665   /* We must not have a reference to an external symbol defined in a
9666      shared library in a readonly section, else the SOM linker will
9667      complain.
9668 
9669      So, we force exception information into the data section.  */
9670   exception_section = data_section;
9671 }
9672 
9673 /* On hpux10, the linker will give an error if we have a reference
9674    in the read-only data section to a symbol defined in a shared
9675    library.  Therefore, expressions that might require a reloc can
9676    not be placed in the read-only data section.  */
9677 
9678 static section *
9679 pa_select_section (tree exp, int reloc,
9680 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9681 {
9682   if (TREE_CODE (exp) == VAR_DECL
9683       && TREE_READONLY (exp)
9684       && !TREE_THIS_VOLATILE (exp)
9685       && DECL_INITIAL (exp)
9686       && (DECL_INITIAL (exp) == error_mark_node
9687           || TREE_CONSTANT (DECL_INITIAL (exp)))
9688       && !reloc)
9689     {
9690       if (TARGET_SOM
9691 	  && DECL_ONE_ONLY (exp)
9692 	  && !DECL_WEAK (exp))
9693 	return som_one_only_readonly_data_section;
9694       else
9695 	return readonly_data_section;
9696     }
9697   else if (CONSTANT_CLASS_P (exp) && !reloc)
9698     return readonly_data_section;
9699   else if (TARGET_SOM
9700 	   && TREE_CODE (exp) == VAR_DECL
9701 	   && DECL_ONE_ONLY (exp)
9702 	   && !DECL_WEAK (exp))
9703     return som_one_only_data_section;
9704   else
9705     return data_section;
9706 }
9707 
9708 static void
9709 pa_globalize_label (FILE *stream, const char *name)
9710 {
9711   /* We only handle DATA objects here, functions are globalized in
9712      ASM_DECLARE_FUNCTION_NAME.  */
9713   if (! FUNCTION_NAME_P (name))
9714   {
9715     fputs ("\t.EXPORT ", stream);
9716     assemble_name (stream, name);
9717     fputs (",DATA\n", stream);
9718   }
9719 }
9720 
9721 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9722 
9723 static rtx
9724 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9725 		     int incoming ATTRIBUTE_UNUSED)
9726 {
9727   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9728 }
9729 
9730 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
9731 
9732 bool
9733 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9734 {
9735   /* SOM ABI says that objects larger than 64 bits are returned in memory.
9736      PA64 ABI says that objects larger than 128 bits are returned in memory.
9737      Note, int_size_in_bytes can return -1 if the size of the object is
9738      variable or larger than the maximum value that can be expressed as
9739      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9740      simplest way to handle variable and empty types is to pass them in
9741      memory.  This avoids problems in defining the boundaries of argument
9742      slots, allocating registers, etc.  */
9743   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9744 	  || int_size_in_bytes (type) <= 0);
9745 }
9746 
9747 /* Structure to hold declaration and name of external symbols that are
9748    emitted by GCC.  We generate a vector of these symbols and output them
9749    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9750    This avoids putting out names that are never really used.  */
9751 
9752 typedef struct GTY(()) extern_symbol
9753 {
9754   tree decl;
9755   const char *name;
9756 } extern_symbol;
9757 
9758 /* Define gc'd vector type for extern_symbol.  */
9759 DEF_VEC_O(extern_symbol);
9760 DEF_VEC_ALLOC_O(extern_symbol,gc);
9761 
9762 /* Vector of extern_symbol pointers.  */
9763 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9764 
9765 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9766 /* Mark DECL (name NAME) as an external reference (assembler output
9767    file FILE).  This saves the names to output at the end of the file
9768    if actually referenced.  */
9769 
9770 void
9771 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9772 {
9773   extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9774 
9775   gcc_assert (file == asm_out_file);
9776   p->decl = decl;
9777   p->name = name;
9778 }
9779 
9780 /* Output text required at the end of an assembler file.
9781    This includes deferred plabels and .import directives for
9782    all external symbols that were actually referenced.  */
9783 
9784 static void
9785 pa_hpux_file_end (void)
9786 {
9787   unsigned int i;
9788   extern_symbol *p;
9789 
9790   if (!NO_DEFERRED_PROFILE_COUNTERS)
9791     output_deferred_profile_counters ();
9792 
9793   output_deferred_plabels ();
9794 
9795   for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9796     {
9797       tree decl = p->decl;
9798 
9799       if (!TREE_ASM_WRITTEN (decl)
9800 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9801 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9802     }
9803 
9804   VEC_free (extern_symbol, gc, extern_symbols);
9805 }
9806 #endif
9807 
9808 /* Return true if a change from mode FROM to mode TO for a register
9809    in register class RCLASS is invalid.  */
9810 
9811 bool
9812 pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9813 			     enum reg_class rclass)
9814 {
9815   if (from == to)
9816     return false;
9817 
9818   /* Reject changes to/from complex and vector modes.  */
9819   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9820       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9821     return true;
9822 
9823   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9824     return false;
9825 
9826   /* There is no way to load QImode or HImode values directly from
9827      memory.  SImode loads to the FP registers are not zero extended.
9828      On the 64-bit target, this conflicts with the definition of
9829      LOAD_EXTEND_OP.  Thus, we can't allow changing between modes
9830      with different sizes in the floating-point registers.  */
9831   if (MAYBE_FP_REG_CLASS_P (rclass))
9832     return true;
9833 
9834   /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9835      in specific sets of registers.  Thus, we cannot allow changing
9836      to a larger mode when it's larger than a word.  */
9837   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9838       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9839     return true;
9840 
9841   return false;
9842 }
9843 
9844 /* Returns TRUE if it is a good idea to tie two pseudo registers
9845    when one has mode MODE1 and one has mode MODE2.
9846    If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9847    for any hard reg, then this must be FALSE for correct output.
9848 
9849    We should return FALSE for QImode and HImode because these modes
9850    are not ok in the floating-point registers.  However, this prevents
9851    tieing these modes to SImode and DImode in the general registers.
9852    So, this isn't a good idea.  We rely on HARD_REGNO_MODE_OK and
9853    CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9854    in the floating-point registers.  */
9855 
9856 bool
9857 pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9858 {
9859   /* Don't tie modes in different classes.  */
9860   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9861     return false;
9862 
9863   return true;
9864 }
9865 
9866 
9867 /* Length in units of the trampoline instruction code.  */
9868 
9869 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9870 
9871 
9872 /* Output assembler code for a block containing the constant parts
9873    of a trampoline, leaving space for the variable parts.\
9874 
9875    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9876    and then branches to the specified routine.
9877 
9878    This code template is copied from text segment to stack location
9879    and then patched with pa_trampoline_init to contain valid values,
9880    and then entered as a subroutine.
9881 
9882    It is best to keep this as small as possible to avoid having to
9883    flush multiple lines in the cache.  */
9884 
9885 static void
9886 pa_asm_trampoline_template (FILE *f)
9887 {
9888   if (!TARGET_64BIT)
9889     {
9890       fputs ("\tldw	36(%r22),%r21\n", f);
9891       fputs ("\tbb,>=,n	%r21,30,.+16\n", f);
9892       if (ASSEMBLER_DIALECT == 0)
9893 	fputs ("\tdepi	0,31,2,%r21\n", f);
9894       else
9895 	fputs ("\tdepwi	0,31,2,%r21\n", f);
9896       fputs ("\tldw	4(%r21),%r19\n", f);
9897       fputs ("\tldw	0(%r21),%r21\n", f);
9898       if (TARGET_PA_20)
9899 	{
9900 	  fputs ("\tbve	(%r21)\n", f);
9901 	  fputs ("\tldw	40(%r22),%r29\n", f);
9902 	  fputs ("\t.word	0\n", f);
9903 	  fputs ("\t.word	0\n", f);
9904 	}
9905       else
9906 	{
9907 	  fputs ("\tldsid	(%r21),%r1\n", f);
9908 	  fputs ("\tmtsp	%r1,%sr0\n", f);
9909 	  fputs ("\tbe	0(%sr0,%r21)\n", f);
9910 	  fputs ("\tldw	40(%r22),%r29\n", f);
9911 	}
9912       fputs ("\t.word	0\n", f);
9913       fputs ("\t.word	0\n", f);
9914       fputs ("\t.word	0\n", f);
9915       fputs ("\t.word	0\n", f);
9916     }
9917   else
9918     {
9919       fputs ("\t.dword 0\n", f);
9920       fputs ("\t.dword 0\n", f);
9921       fputs ("\t.dword 0\n", f);
9922       fputs ("\t.dword 0\n", f);
9923       fputs ("\tmfia	%r31\n", f);
9924       fputs ("\tldd	24(%r31),%r1\n", f);
9925       fputs ("\tldd	24(%r1),%r27\n", f);
9926       fputs ("\tldd	16(%r1),%r1\n", f);
9927       fputs ("\tbve	(%r1)\n", f);
9928       fputs ("\tldd	32(%r31),%r31\n", f);
9929       fputs ("\t.dword 0  ; fptr\n", f);
9930       fputs ("\t.dword 0  ; static link\n", f);
9931     }
9932 }
9933 
9934 /* Emit RTL insns to initialize the variable parts of a trampoline.
9935    FNADDR is an RTX for the address of the function's pure code.
9936    CXT is an RTX for the static chain value for the function.
9937 
9938    Move the function address to the trampoline template at offset 36.
9939    Move the static chain value to trampoline template at offset 40.
9940    Move the trampoline address to trampoline template at offset 44.
9941    Move r19 to trampoline template at offset 48.  The latter two
9942    words create a plabel for the indirect call to the trampoline.
9943 
9944    A similar sequence is used for the 64-bit port but the plabel is
9945    at the beginning of the trampoline.
9946 
9947    Finally, the cache entries for the trampoline code are flushed.
9948    This is necessary to ensure that the trampoline instruction sequence
9949    is written to memory prior to any attempts at prefetching the code
9950    sequence.  */
9951 
9952 static void
9953 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9954 {
9955   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9956   rtx start_addr = gen_reg_rtx (Pmode);
9957   rtx end_addr = gen_reg_rtx (Pmode);
9958   rtx line_length = gen_reg_rtx (Pmode);
9959   rtx r_tramp, tmp;
9960 
9961   emit_block_move (m_tramp, assemble_trampoline_template (),
9962 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
9963   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
9964 
9965   if (!TARGET_64BIT)
9966     {
9967       tmp = adjust_address (m_tramp, Pmode, 36);
9968       emit_move_insn (tmp, fnaddr);
9969       tmp = adjust_address (m_tramp, Pmode, 40);
9970       emit_move_insn (tmp, chain_value);
9971 
9972       /* Create a fat pointer for the trampoline.  */
9973       tmp = adjust_address (m_tramp, Pmode, 44);
9974       emit_move_insn (tmp, r_tramp);
9975       tmp = adjust_address (m_tramp, Pmode, 48);
9976       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
9977 
9978       /* fdc and fic only use registers for the address to flush,
9979 	 they do not accept integer displacements.  We align the
9980 	 start and end addresses to the beginning of their respective
9981 	 cache lines to minimize the number of lines flushed.  */
9982       emit_insn (gen_andsi3 (start_addr, r_tramp,
9983 			     GEN_INT (-MIN_CACHELINE_SIZE)));
9984       tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
9985       emit_insn (gen_andsi3 (end_addr, tmp,
9986 			     GEN_INT (-MIN_CACHELINE_SIZE)));
9987       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9988       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
9989       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
9990 				    gen_reg_rtx (Pmode),
9991 				    gen_reg_rtx (Pmode)));
9992     }
9993   else
9994     {
9995       tmp = adjust_address (m_tramp, Pmode, 56);
9996       emit_move_insn (tmp, fnaddr);
9997       tmp = adjust_address (m_tramp, Pmode, 64);
9998       emit_move_insn (tmp, chain_value);
9999 
10000       /* Create a fat pointer for the trampoline.  */
10001       tmp = adjust_address (m_tramp, Pmode, 16);
10002       emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
10003       tmp = adjust_address (m_tramp, Pmode, 24);
10004       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10005 
10006       /* fdc and fic only use registers for the address to flush,
10007 	 they do not accept integer displacements.  We align the
10008 	 start and end addresses to the beginning of their respective
10009 	 cache lines to minimize the number of lines flushed.  */
10010       tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
10011       emit_insn (gen_anddi3 (start_addr, tmp,
10012 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10013       tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
10014       emit_insn (gen_anddi3 (end_addr, tmp,
10015 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10016       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10017       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10018       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10019 				    gen_reg_rtx (Pmode),
10020 				    gen_reg_rtx (Pmode)));
10021     }
10022 }
10023 
10024 /* Perform any machine-specific adjustment in the address of the trampoline.
10025    ADDR contains the address that was passed to pa_trampoline_init.
10026    Adjust the trampoline address to point to the plabel at offset 44.  */
10027 
10028 static rtx
10029 pa_trampoline_adjust_address (rtx addr)
10030 {
10031   if (!TARGET_64BIT)
10032     addr = memory_address (Pmode, plus_constant (addr, 46));
10033   return addr;
10034 }
10035 
10036 static rtx
10037 pa_delegitimize_address (rtx orig_x)
10038 {
10039   rtx x = delegitimize_mem_from_attrs (orig_x);
10040 
10041   if (GET_CODE (x) == LO_SUM
10042       && GET_CODE (XEXP (x, 1)) == UNSPEC
10043       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10044     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10045   return x;
10046 }
10047 
10048 #include "gt-pa.h"
10049