xref: /openbsd-src/gnu/gcc/gcc/config/pa/pa.c (revision ee76c7cda092e5d4730025132d61df935e82cee3)
1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3    2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING.  If not, write to
20 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "tree.h"
36 #include "output.h"
37 #include "except.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "integrate.h"
42 #include "function.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "recog.h"
46 #include "predict.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 
51 /* Return nonzero if there is a bypass for the output of
52    OUT_INSN and the fp store IN_INSN.  */
53 int
hppa_fpstore_bypass_p(rtx out_insn,rtx in_insn)54 hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
55 {
56   enum machine_mode store_mode;
57   enum machine_mode other_mode;
58   rtx set;
59 
60   if (recog_memoized (in_insn) < 0
61       || get_attr_type (in_insn) != TYPE_FPSTORE
62       || recog_memoized (out_insn) < 0)
63     return 0;
64 
65   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
66 
67   set = single_set (out_insn);
68   if (!set)
69     return 0;
70 
71   other_mode = GET_MODE (SET_SRC (set));
72 
73   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
74 }
75 
76 
77 #ifndef DO_FRAME_NOTES
78 #ifdef INCOMING_RETURN_ADDR_RTX
79 #define DO_FRAME_NOTES 1
80 #else
81 #define DO_FRAME_NOTES 0
82 #endif
83 #endif
84 
85 static void copy_reg_pointer (rtx, rtx);
86 static void fix_range (const char *);
87 static bool pa_handle_option (size_t, const char *, int);
88 static int hppa_address_cost (rtx);
89 static bool hppa_rtx_costs (rtx, int, int, int *);
90 static inline rtx force_mode (enum machine_mode, rtx);
91 static void pa_reorg (void);
92 static void pa_combine_instructions (void);
93 static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
94 static int forward_branch_p (rtx);
95 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
96 static int compute_movmem_length (rtx);
97 static int compute_clrmem_length (rtx);
98 static bool pa_assemble_integer (rtx, unsigned int, int);
99 static void remove_useless_addtr_insns (int);
100 static void store_reg (int, HOST_WIDE_INT, int);
101 static void store_reg_modify (int, int, HOST_WIDE_INT);
102 static void load_reg (int, HOST_WIDE_INT, int);
103 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
104 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
105 static void update_total_code_bytes (int);
106 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
107 static int pa_adjust_cost (rtx, rtx, rtx, int);
108 static int pa_adjust_priority (rtx, int);
109 static int pa_issue_rate (void);
110 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
111 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
112      ATTRIBUTE_UNUSED;
113 static void pa_encode_section_info (tree, rtx, int);
114 static const char *pa_strip_name_encoding (const char *);
115 static bool pa_function_ok_for_sibcall (tree, tree);
116 static void pa_globalize_label (FILE *, const char *)
117      ATTRIBUTE_UNUSED;
118 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
119 				    HOST_WIDE_INT, tree);
120 #if !defined(USE_COLLECT2)
121 static void pa_asm_out_constructor (rtx, int);
122 static void pa_asm_out_destructor (rtx, int);
123 #endif
124 static void pa_init_builtins (void);
125 static rtx hppa_builtin_saveregs (void);
126 static tree hppa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
127 static bool pa_scalar_mode_supported_p (enum machine_mode);
128 static bool pa_commutative_p (rtx x, int outer_code);
129 static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
130 static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
131 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
132 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
133 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
134 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
135 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
136 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
137 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
138 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
139 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
140 static void output_deferred_plabels (void);
141 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
142 #ifdef ASM_OUTPUT_EXTERNAL_REAL
143 static void pa_hpux_file_end (void);
144 #endif
145 #ifdef HPUX_LONG_DOUBLE_LIBRARY
146 static void pa_hpux_init_libfuncs (void);
147 #endif
148 static rtx pa_struct_value_rtx (tree, int);
149 static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
150 				  tree, bool);
151 static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
152 				 tree, bool);
153 static struct machine_function * pa_init_machine_status (void);
154 static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
155 					   enum machine_mode,
156 					   secondary_reload_info *);
157 
158 
159 /* The following extra sections are only used for SOM.  */
160 static GTY(()) section *som_readonly_data_section;
161 static GTY(()) section *som_one_only_readonly_data_section;
162 static GTY(()) section *som_one_only_data_section;
163 
164 /* Save the operands last given to a compare for use when we
165    generate a scc or bcc insn.  */
166 rtx hppa_compare_op0, hppa_compare_op1;
167 enum cmp_type hppa_branch_type;
168 
169 /* Which cpu we are scheduling for.  */
170 enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
171 
172 /* The UNIX standard to use for predefines and linking.  */
173 int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
174 
175 /* Counts for the number of callee-saved general and floating point
176    registers which were saved by the current function's prologue.  */
177 static int gr_saved, fr_saved;
178 
179 static rtx find_addr_reg (rtx);
180 
181 /* Keep track of the number of bytes we have output in the CODE subspace
182    during this compilation so we'll know when to emit inline long-calls.  */
183 unsigned long total_code_bytes;
184 
185 /* The last address of the previous function plus the number of bytes in
186    associated thunks that have been output.  This is used to determine if
187    a thunk can use an IA-relative branch to reach its target function.  */
188 static int last_address;
189 
190 /* Variables to handle plabels that we discover are necessary at assembly
191    output time.  They are output after the current function.  */
192 struct deferred_plabel GTY(())
193 {
194   rtx internal_label;
195   rtx symbol;
196 };
197 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
198   deferred_plabels;
199 static size_t n_deferred_plabels = 0;
200 
201 
202 /* Initialize the GCC target structure.  */
203 
204 #undef TARGET_ASM_ALIGNED_HI_OP
205 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
206 #undef TARGET_ASM_ALIGNED_SI_OP
207 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
208 #undef TARGET_ASM_ALIGNED_DI_OP
209 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
210 #undef TARGET_ASM_UNALIGNED_HI_OP
211 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
212 #undef TARGET_ASM_UNALIGNED_SI_OP
213 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
214 #undef TARGET_ASM_UNALIGNED_DI_OP
215 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
216 #undef TARGET_ASM_INTEGER
217 #define TARGET_ASM_INTEGER pa_assemble_integer
218 
219 #undef TARGET_ASM_FUNCTION_PROLOGUE
220 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
221 #undef TARGET_ASM_FUNCTION_EPILOGUE
222 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
223 
224 #undef TARGET_SCHED_ADJUST_COST
225 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
226 #undef TARGET_SCHED_ADJUST_PRIORITY
227 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
228 #undef TARGET_SCHED_ISSUE_RATE
229 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
230 
231 #undef TARGET_ENCODE_SECTION_INFO
232 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
233 #undef TARGET_STRIP_NAME_ENCODING
234 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
235 
236 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
237 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
238 
239 #undef TARGET_COMMUTATIVE_P
240 #define TARGET_COMMUTATIVE_P pa_commutative_p
241 
242 #undef TARGET_ASM_OUTPUT_MI_THUNK
243 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
244 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
245 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
246 
247 #undef TARGET_ASM_FILE_END
248 #ifdef ASM_OUTPUT_EXTERNAL_REAL
249 #define TARGET_ASM_FILE_END pa_hpux_file_end
250 #else
251 #define TARGET_ASM_FILE_END output_deferred_plabels
252 #endif
253 
254 #if !defined(USE_COLLECT2)
255 #undef TARGET_ASM_CONSTRUCTOR
256 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
257 #undef TARGET_ASM_DESTRUCTOR
258 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
259 #endif
260 
261 #undef TARGET_DEFAULT_TARGET_FLAGS
262 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
263 #undef TARGET_HANDLE_OPTION
264 #define TARGET_HANDLE_OPTION pa_handle_option
265 
266 #undef TARGET_INIT_BUILTINS
267 #define TARGET_INIT_BUILTINS pa_init_builtins
268 
269 #undef TARGET_RTX_COSTS
270 #define TARGET_RTX_COSTS hppa_rtx_costs
271 #undef TARGET_ADDRESS_COST
272 #define TARGET_ADDRESS_COST hppa_address_cost
273 
274 #undef TARGET_MACHINE_DEPENDENT_REORG
275 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
276 
277 #ifdef HPUX_LONG_DOUBLE_LIBRARY
278 #undef TARGET_INIT_LIBFUNCS
279 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
280 #endif
281 
282 #undef TARGET_PROMOTE_FUNCTION_RETURN
283 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
284 #undef TARGET_PROMOTE_PROTOTYPES
285 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
286 
287 #undef TARGET_STRUCT_VALUE_RTX
288 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
289 #undef TARGET_RETURN_IN_MEMORY
290 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
291 #undef TARGET_MUST_PASS_IN_STACK
292 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
293 #undef TARGET_PASS_BY_REFERENCE
294 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
295 #undef TARGET_CALLEE_COPIES
296 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
297 #undef TARGET_ARG_PARTIAL_BYTES
298 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
299 
300 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
301 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
302 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
303 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
304 
305 #undef TARGET_SCALAR_MODE_SUPPORTED_P
306 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
307 
308 #undef TARGET_CANNOT_FORCE_CONST_MEM
309 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
310 
311 #undef TARGET_SECONDARY_RELOAD
312 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
313 
314 struct gcc_target targetm = TARGET_INITIALIZER;
315 
316 /* Parse the -mfixed-range= option string.  */
317 
318 static void
fix_range(const char * const_str)319 fix_range (const char *const_str)
320 {
321   int i, first, last;
322   char *str, *dash, *comma;
323 
324   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
325      REG2 are either register names or register numbers.  The effect
326      of this option is to mark the registers in the range from REG1 to
327      REG2 as ``fixed'' so they won't be used by the compiler.  This is
328      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
329 
330   i = strlen (const_str);
331   str = (char *) alloca (i + 1);
332   memcpy (str, const_str, i + 1);
333 
334   while (1)
335     {
336       dash = strchr (str, '-');
337       if (!dash)
338 	{
339 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
340 	  return;
341 	}
342       *dash = '\0';
343 
344       comma = strchr (dash + 1, ',');
345       if (comma)
346 	*comma = '\0';
347 
348       first = decode_reg_name (str);
349       if (first < 0)
350 	{
351 	  warning (0, "unknown register name: %s", str);
352 	  return;
353 	}
354 
355       last = decode_reg_name (dash + 1);
356       if (last < 0)
357 	{
358 	  warning (0, "unknown register name: %s", dash + 1);
359 	  return;
360 	}
361 
362       *dash = '-';
363 
364       if (first > last)
365 	{
366 	  warning (0, "%s-%s is an empty range", str, dash + 1);
367 	  return;
368 	}
369 
370       for (i = first; i <= last; ++i)
371 	fixed_regs[i] = call_used_regs[i] = 1;
372 
373       if (!comma)
374 	break;
375 
376       *comma = ',';
377       str = comma + 1;
378     }
379 
380   /* Check if all floating point registers have been fixed.  */
381   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
382     if (!fixed_regs[i])
383       break;
384 
385   if (i > FP_REG_LAST)
386     target_flags |= MASK_DISABLE_FPREGS;
387 }
388 
389 /* Implement TARGET_HANDLE_OPTION.  */
390 
391 static bool
pa_handle_option(size_t code,const char * arg,int value ATTRIBUTE_UNUSED)392 pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
393 {
394   switch (code)
395     {
396     case OPT_mnosnake:
397     case OPT_mpa_risc_1_0:
398     case OPT_march_1_0:
399       target_flags &= ~(MASK_PA_11 | MASK_PA_20);
400       return true;
401 
402     case OPT_msnake:
403     case OPT_mpa_risc_1_1:
404     case OPT_march_1_1:
405       target_flags &= ~MASK_PA_20;
406       target_flags |= MASK_PA_11;
407       return true;
408 
409     case OPT_mpa_risc_2_0:
410     case OPT_march_2_0:
411       target_flags |= MASK_PA_11 | MASK_PA_20;
412       return true;
413 
414     case OPT_mschedule_:
415       if (strcmp (arg, "8000") == 0)
416 	pa_cpu = PROCESSOR_8000;
417       else if (strcmp (arg, "7100") == 0)
418 	pa_cpu = PROCESSOR_7100;
419       else if (strcmp (arg, "700") == 0)
420 	pa_cpu = PROCESSOR_700;
421       else if (strcmp (arg, "7100LC") == 0)
422 	pa_cpu = PROCESSOR_7100LC;
423       else if (strcmp (arg, "7200") == 0)
424 	pa_cpu = PROCESSOR_7200;
425       else if (strcmp (arg, "7300") == 0)
426 	pa_cpu = PROCESSOR_7300;
427       else
428 	return false;
429       return true;
430 
431     case OPT_mfixed_range_:
432       fix_range (arg);
433       return true;
434 
435 #if TARGET_HPUX
436     case OPT_munix_93:
437       flag_pa_unix = 1993;
438       return true;
439 #endif
440 
441 #if TARGET_HPUX_10_10
442     case OPT_munix_95:
443       flag_pa_unix = 1995;
444       return true;
445 #endif
446 
447 #if TARGET_HPUX_11_11
448     case OPT_munix_98:
449       flag_pa_unix = 1998;
450       return true;
451 #endif
452 
453     default:
454       return true;
455     }
456 }
457 
458 void
override_options(void)459 override_options (void)
460 {
461   /* Unconditional branches in the delay slot are not compatible with dwarf2
462      call frame information.  There is no benefit in using this optimization
463      on PA8000 and later processors.  */
464   if (pa_cpu >= PROCESSOR_8000
465       || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
466       || flag_unwind_tables)
467     target_flags &= ~MASK_JUMP_IN_DELAY;
468 
469   if (flag_pic && TARGET_PORTABLE_RUNTIME)
470     {
471       warning (0, "PIC code generation is not supported in the portable runtime model");
472     }
473 
474   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
475    {
476       warning (0, "PIC code generation is not compatible with fast indirect calls");
477    }
478 
479   if (! TARGET_GAS && write_symbols != NO_DEBUG)
480     {
481       warning (0, "-g is only supported when using GAS on this processor,");
482       warning (0, "-g option disabled");
483       write_symbols = NO_DEBUG;
484     }
485 
486   /* We only support the "big PIC" model now.  And we always generate PIC
487      code when in 64bit mode.  */
488   if (flag_pic == 1 || TARGET_64BIT)
489     flag_pic = 2;
490 
491   /* We can't guarantee that .dword is available for 32-bit targets.  */
492   if (UNITS_PER_WORD == 4)
493     targetm.asm_out.aligned_op.di = NULL;
494 
495   /* The unaligned ops are only available when using GAS.  */
496   if (!TARGET_GAS)
497     {
498       targetm.asm_out.unaligned_op.hi = NULL;
499       targetm.asm_out.unaligned_op.si = NULL;
500       targetm.asm_out.unaligned_op.di = NULL;
501     }
502 
503   init_machine_status = pa_init_machine_status;
504 }
505 
506 static void
pa_init_builtins(void)507 pa_init_builtins (void)
508 {
509 #ifdef DONT_HAVE_FPUTC_UNLOCKED
510   built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
511     built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
512   implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
513     = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
514 #endif
515 }
516 
517 /* Function to init struct machine_function.
518    This will be called, via a pointer variable,
519    from push_function_context.  */
520 
521 static struct machine_function *
pa_init_machine_status(void)522 pa_init_machine_status (void)
523 {
524   return ggc_alloc_cleared (sizeof (machine_function));
525 }
526 
527 /* If FROM is a probable pointer register, mark TO as a probable
528    pointer register with the same pointer alignment as FROM.  */
529 
530 static void
copy_reg_pointer(rtx to,rtx from)531 copy_reg_pointer (rtx to, rtx from)
532 {
533   if (REG_POINTER (from))
534     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
535 }
536 
537 /* Return 1 if X contains a symbolic expression.  We know these
538    expressions will have one of a few well defined forms, so
539    we need only check those forms.  */
540 int
symbolic_expression_p(rtx x)541 symbolic_expression_p (rtx x)
542 {
543 
544   /* Strip off any HIGH.  */
545   if (GET_CODE (x) == HIGH)
546     x = XEXP (x, 0);
547 
548   return (symbolic_operand (x, VOIDmode));
549 }
550 
551 /* Accept any constant that can be moved in one instruction into a
552    general register.  */
553 int
cint_ok_for_move(HOST_WIDE_INT intval)554 cint_ok_for_move (HOST_WIDE_INT intval)
555 {
556   /* OK if ldo, ldil, or zdepi, can be used.  */
557   return (CONST_OK_FOR_LETTER_P (intval, 'J')
558 	  || CONST_OK_FOR_LETTER_P (intval, 'N')
559 	  || CONST_OK_FOR_LETTER_P (intval, 'K'));
560 }
561 
562 /* Return truth value of whether OP can be used as an operand in a
563    adddi3 insn.  */
564 int
adddi3_operand(rtx op,enum machine_mode mode)565 adddi3_operand (rtx op, enum machine_mode mode)
566 {
567   return (register_operand (op, mode)
568 	  || (GET_CODE (op) == CONST_INT
569 	      && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
570 }
571 
572 /* True iff zdepi can be used to generate this CONST_INT.
573    zdepi first sign extends a 5 bit signed number to a given field
574    length, then places this field anywhere in a zero.  */
575 int
zdepi_cint_p(unsigned HOST_WIDE_INT x)576 zdepi_cint_p (unsigned HOST_WIDE_INT x)
577 {
578   unsigned HOST_WIDE_INT lsb_mask, t;
579 
580   /* This might not be obvious, but it's at least fast.
581      This function is critical; we don't have the time loops would take.  */
582   lsb_mask = x & -x;
583   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
584   /* Return true iff t is a power of two.  */
585   return ((t & (t - 1)) == 0);
586 }
587 
588 /* True iff depi or extru can be used to compute (reg & mask).
589    Accept bit pattern like these:
590    0....01....1
591    1....10....0
592    1..10..01..1  */
593 int
and_mask_p(unsigned HOST_WIDE_INT mask)594 and_mask_p (unsigned HOST_WIDE_INT mask)
595 {
596   mask = ~mask;
597   mask += mask & -mask;
598   return (mask & (mask - 1)) == 0;
599 }
600 
601 /* True iff depi can be used to compute (reg | MASK).  */
602 int
ior_mask_p(unsigned HOST_WIDE_INT mask)603 ior_mask_p (unsigned HOST_WIDE_INT mask)
604 {
605   mask += mask & -mask;
606   return (mask & (mask - 1)) == 0;
607 }
608 
609 /* Legitimize PIC addresses.  If the address is already
610    position-independent, we return ORIG.  Newly generated
611    position-independent addresses go to REG.  If we need more
612    than one register, we lose.  */
613 
614 rtx
legitimize_pic_address(rtx orig,enum machine_mode mode,rtx reg)615 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
616 {
617   rtx pic_ref = orig;
618 
619   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
620 
621   /* Labels need special handling.  */
622   if (pic_label_operand (orig, mode))
623     {
624       /* We do not want to go through the movXX expanders here since that
625 	 would create recursion.
626 
627 	 Nor do we really want to call a generator for a named pattern
628 	 since that requires multiple patterns if we want to support
629 	 multiple word sizes.
630 
631 	 So instead we just emit the raw set, which avoids the movXX
632 	 expanders completely.  */
633       mark_reg_pointer (reg, BITS_PER_UNIT);
634       emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
635       current_function_uses_pic_offset_table = 1;
636       return reg;
637     }
638   if (GET_CODE (orig) == SYMBOL_REF)
639     {
640       rtx insn, tmp_reg;
641 
642       gcc_assert (reg);
643 
644       /* Before reload, allocate a temporary register for the intermediate
645 	 result.  This allows the sequence to be deleted when the final
646 	 result is unused and the insns are trivially dead.  */
647       tmp_reg = ((reload_in_progress || reload_completed)
648 		 ? reg : gen_reg_rtx (Pmode));
649 
650       emit_move_insn (tmp_reg,
651 		      gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
652 				    gen_rtx_HIGH (word_mode, orig)));
653       pic_ref
654 	= gen_const_mem (Pmode,
655 		         gen_rtx_LO_SUM (Pmode, tmp_reg,
656 				         gen_rtx_UNSPEC (Pmode,
657 						         gen_rtvec (1, orig),
658 						         UNSPEC_DLTIND14R)));
659 
660       current_function_uses_pic_offset_table = 1;
661       mark_reg_pointer (reg, BITS_PER_UNIT);
662       insn = emit_move_insn (reg, pic_ref);
663 
664       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
665       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
666 
667       return reg;
668     }
669   else if (GET_CODE (orig) == CONST)
670     {
671       rtx base;
672 
673       if (GET_CODE (XEXP (orig, 0)) == PLUS
674 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
675 	return orig;
676 
677       gcc_assert (reg);
678       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
679 
680       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
681       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
682 				     base == reg ? 0 : reg);
683 
684       if (GET_CODE (orig) == CONST_INT)
685 	{
686 	  if (INT_14_BITS (orig))
687 	    return plus_constant (base, INTVAL (orig));
688 	  orig = force_reg (Pmode, orig);
689 	}
690       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
691       /* Likewise, should we set special REG_NOTEs here?  */
692     }
693 
694   return pic_ref;
695 }
696 
697 static GTY(()) rtx gen_tls_tga;
698 
699 static rtx
gen_tls_get_addr(void)700 gen_tls_get_addr (void)
701 {
702   if (!gen_tls_tga)
703     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
704   return gen_tls_tga;
705 }
706 
707 static rtx
hppa_tls_call(rtx arg)708 hppa_tls_call (rtx arg)
709 {
710   rtx ret;
711 
712   ret = gen_reg_rtx (Pmode);
713   emit_library_call_value (gen_tls_get_addr (), ret,
714 		  	   LCT_CONST, Pmode, 1, arg, Pmode);
715 
716   return ret;
717 }
718 
719 static rtx
legitimize_tls_address(rtx addr)720 legitimize_tls_address (rtx addr)
721 {
722   rtx ret, insn, tmp, t1, t2, tp;
723   enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
724 
725   switch (model)
726     {
727       case TLS_MODEL_GLOBAL_DYNAMIC:
728 	tmp = gen_reg_rtx (Pmode);
729 	if (flag_pic)
730 	  emit_insn (gen_tgd_load_pic (tmp, addr));
731 	else
732 	  emit_insn (gen_tgd_load (tmp, addr));
733 	ret = hppa_tls_call (tmp);
734 	break;
735 
736       case TLS_MODEL_LOCAL_DYNAMIC:
737 	ret = gen_reg_rtx (Pmode);
738 	tmp = gen_reg_rtx (Pmode);
739 	start_sequence ();
740 	if (flag_pic)
741 	  emit_insn (gen_tld_load_pic (tmp, addr));
742 	else
743 	  emit_insn (gen_tld_load (tmp, addr));
744 	t1 = hppa_tls_call (tmp);
745 	insn = get_insns ();
746 	end_sequence ();
747 	t2 = gen_reg_rtx (Pmode);
748 	emit_libcall_block (insn, t2, t1,
749 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
750 				            UNSPEC_TLSLDBASE));
751 	emit_insn (gen_tld_offset_load (ret, addr, t2));
752 	break;
753 
754       case TLS_MODEL_INITIAL_EXEC:
755 	tp = gen_reg_rtx (Pmode);
756 	tmp = gen_reg_rtx (Pmode);
757 	ret = gen_reg_rtx (Pmode);
758 	emit_insn (gen_tp_load (tp));
759 	if (flag_pic)
760 	  emit_insn (gen_tie_load_pic (tmp, addr));
761 	else
762 	  emit_insn (gen_tie_load (tmp, addr));
763 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
764 	break;
765 
766       case TLS_MODEL_LOCAL_EXEC:
767 	tp = gen_reg_rtx (Pmode);
768 	ret = gen_reg_rtx (Pmode);
769 	emit_insn (gen_tp_load (tp));
770 	emit_insn (gen_tle_load (ret, addr, tp));
771 	break;
772 
773       default:
774 	gcc_unreachable ();
775     }
776 
777   return ret;
778 }
779 
780 /* Try machine-dependent ways of modifying an illegitimate address
781    to be legitimate.  If we find one, return the new, valid address.
782    This macro is used in only one place: `memory_address' in explow.c.
783 
784    OLDX is the address as it was before break_out_memory_refs was called.
785    In some cases it is useful to look at this to decide what needs to be done.
786 
787    MODE and WIN are passed so that this macro can use
788    GO_IF_LEGITIMATE_ADDRESS.
789 
790    It is always safe for this macro to do nothing.  It exists to recognize
791    opportunities to optimize the output.
792 
793    For the PA, transform:
794 
795 	memory(X + <large int>)
796 
797    into:
798 
799 	if (<large int> & mask) >= 16
800 	  Y = (<large int> & ~mask) + mask + 1	Round up.
801 	else
802 	  Y = (<large int> & ~mask)		Round down.
803 	Z = X + Y
804 	memory (Z + (<large int> - Y));
805 
806    This is for CSE to find several similar references, and only use one Z.
807 
808    X can either be a SYMBOL_REF or REG, but because combine cannot
809    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
810    D will not fit in 14 bits.
811 
812    MODE_FLOAT references allow displacements which fit in 5 bits, so use
813    0x1f as the mask.
814 
815    MODE_INT references allow displacements which fit in 14 bits, so use
816    0x3fff as the mask.
817 
818    This relies on the fact that most mode MODE_FLOAT references will use FP
819    registers and most mode MODE_INT references will use integer registers.
820    (In the rare case of an FP register used in an integer MODE, we depend
821    on secondary reloads to clean things up.)
822 
823 
824    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
825    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
826    addressing modes to be used).
827 
828    Put X and Z into registers.  Then put the entire expression into
829    a register.  */
830 
831 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,enum machine_mode mode)832 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
833 			 enum machine_mode mode)
834 {
835   rtx orig = x;
836 
837   /* We need to canonicalize the order of operands in unscaled indexed
838      addresses since the code that checks if an address is valid doesn't
839      always try both orders.  */
840   if (!TARGET_NO_SPACE_REGS
841       && GET_CODE (x) == PLUS
842       && GET_MODE (x) == Pmode
843       && REG_P (XEXP (x, 0))
844       && REG_P (XEXP (x, 1))
845       && REG_POINTER (XEXP (x, 0))
846       && !REG_POINTER (XEXP (x, 1)))
847     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
848 
849   if (PA_SYMBOL_REF_TLS_P (x))
850     return legitimize_tls_address (x);
851   else if (flag_pic)
852     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
853 
854   /* Strip off CONST.  */
855   if (GET_CODE (x) == CONST)
856     x = XEXP (x, 0);
857 
858   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
859      That should always be safe.  */
860   if (GET_CODE (x) == PLUS
861       && GET_CODE (XEXP (x, 0)) == REG
862       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
863     {
864       rtx reg = force_reg (Pmode, XEXP (x, 1));
865       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
866     }
867 
868   /* Note we must reject symbols which represent function addresses
869      since the assembler/linker can't handle arithmetic on plabels.  */
870   if (GET_CODE (x) == PLUS
871       && GET_CODE (XEXP (x, 1)) == CONST_INT
872       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
873 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
874 	  || GET_CODE (XEXP (x, 0)) == REG))
875     {
876       rtx int_part, ptr_reg;
877       int newoffset;
878       int offset = INTVAL (XEXP (x, 1));
879       int mask;
880 
881       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
882 	      ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
883 
884       /* Choose which way to round the offset.  Round up if we
885 	 are >= halfway to the next boundary.  */
886       if ((offset & mask) >= ((mask + 1) / 2))
887 	newoffset = (offset & ~ mask) + mask + 1;
888       else
889 	newoffset = (offset & ~ mask);
890 
891       /* If the newoffset will not fit in 14 bits (ldo), then
892 	 handling this would take 4 or 5 instructions (2 to load
893 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
894 	 add the new offset and the SYMBOL_REF.)  Combine can
895 	 not handle 4->2 or 5->2 combinations, so do not create
896 	 them.  */
897       if (! VAL_14_BITS_P (newoffset)
898 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
899 	{
900 	  rtx const_part = plus_constant (XEXP (x, 0), newoffset);
901 	  rtx tmp_reg
902 	    = force_reg (Pmode,
903 			 gen_rtx_HIGH (Pmode, const_part));
904 	  ptr_reg
905 	    = force_reg (Pmode,
906 			 gen_rtx_LO_SUM (Pmode,
907 					 tmp_reg, const_part));
908 	}
909       else
910 	{
911 	  if (! VAL_14_BITS_P (newoffset))
912 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
913 	  else
914 	    int_part = GEN_INT (newoffset);
915 
916 	  ptr_reg = force_reg (Pmode,
917 			       gen_rtx_PLUS (Pmode,
918 					     force_reg (Pmode, XEXP (x, 0)),
919 					     int_part));
920 	}
921       return plus_constant (ptr_reg, offset - newoffset);
922     }
923 
924   /* Handle (plus (mult (a) (shadd_constant)) (b)).  */
925 
926   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
927       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
928       && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
929       && (OBJECT_P (XEXP (x, 1))
930 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
931       && GET_CODE (XEXP (x, 1)) != CONST)
932     {
933       int val = INTVAL (XEXP (XEXP (x, 0), 1));
934       rtx reg1, reg2;
935 
936       reg1 = XEXP (x, 1);
937       if (GET_CODE (reg1) != REG)
938 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
939 
940       reg2 = XEXP (XEXP (x, 0), 0);
941       if (GET_CODE (reg2) != REG)
942         reg2 = force_reg (Pmode, force_operand (reg2, 0));
943 
944       return force_reg (Pmode, gen_rtx_PLUS (Pmode,
945 					     gen_rtx_MULT (Pmode,
946 							   reg2,
947 							   GEN_INT (val)),
948 					     reg1));
949     }
950 
951   /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
952 
953      Only do so for floating point modes since this is more speculative
954      and we lose if it's an integer store.  */
955   if (GET_CODE (x) == PLUS
956       && GET_CODE (XEXP (x, 0)) == PLUS
957       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
958       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
959       && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
960       && (mode == SFmode || mode == DFmode))
961     {
962 
963       /* First, try and figure out what to use as a base register.  */
964       rtx reg1, reg2, base, idx, orig_base;
965 
966       reg1 = XEXP (XEXP (x, 0), 1);
967       reg2 = XEXP (x, 1);
968       base = NULL_RTX;
969       idx = NULL_RTX;
970 
971       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
972 	 then emit_move_sequence will turn on REG_POINTER so we'll know
973 	 it's a base register below.  */
974       if (GET_CODE (reg1) != REG)
975 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
976 
977       if (GET_CODE (reg2) != REG)
978 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
979 
980       /* Figure out what the base and index are.  */
981 
982       if (GET_CODE (reg1) == REG
983 	  && REG_POINTER (reg1))
984 	{
985 	  base = reg1;
986 	  orig_base = XEXP (XEXP (x, 0), 1);
987 	  idx = gen_rtx_PLUS (Pmode,
988 			      gen_rtx_MULT (Pmode,
989 					    XEXP (XEXP (XEXP (x, 0), 0), 0),
990 					    XEXP (XEXP (XEXP (x, 0), 0), 1)),
991 			      XEXP (x, 1));
992 	}
993       else if (GET_CODE (reg2) == REG
994 	       && REG_POINTER (reg2))
995 	{
996 	  base = reg2;
997 	  orig_base = XEXP (x, 1);
998 	  idx = XEXP (x, 0);
999 	}
1000 
1001       if (base == 0)
1002 	return orig;
1003 
1004       /* If the index adds a large constant, try to scale the
1005 	 constant so that it can be loaded with only one insn.  */
1006       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1007 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1008 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1009 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1010 	{
1011 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1012 	  int val = INTVAL (XEXP (idx, 1));
1013 
1014 	  val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1015 	  reg1 = XEXP (XEXP (idx, 0), 0);
1016 	  if (GET_CODE (reg1) != REG)
1017 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1018 
1019 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1020 
1021 	  /* We can now generate a simple scaled indexed address.  */
1022 	  return
1023 	    force_reg
1024 	      (Pmode, gen_rtx_PLUS (Pmode,
1025 				    gen_rtx_MULT (Pmode, reg1,
1026 						  XEXP (XEXP (idx, 0), 1)),
1027 				    base));
1028 	}
1029 
1030       /* If B + C is still a valid base register, then add them.  */
1031       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1032 	  && INTVAL (XEXP (idx, 1)) <= 4096
1033 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1034 	{
1035 	  int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1036 	  rtx reg1, reg2;
1037 
1038 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1039 
1040 	  reg2 = XEXP (XEXP (idx, 0), 0);
1041 	  if (GET_CODE (reg2) != CONST_INT)
1042 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1043 
1044 	  return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1045 						 gen_rtx_MULT (Pmode,
1046 							       reg2,
1047 							       GEN_INT (val)),
1048 						 reg1));
1049 	}
1050 
1051       /* Get the index into a register, then add the base + index and
1052 	 return a register holding the result.  */
1053 
1054       /* First get A into a register.  */
1055       reg1 = XEXP (XEXP (idx, 0), 0);
1056       if (GET_CODE (reg1) != REG)
1057 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1058 
1059       /* And get B into a register.  */
1060       reg2 = XEXP (idx, 1);
1061       if (GET_CODE (reg2) != REG)
1062 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1063 
1064       reg1 = force_reg (Pmode,
1065 			gen_rtx_PLUS (Pmode,
1066 				      gen_rtx_MULT (Pmode, reg1,
1067 						    XEXP (XEXP (idx, 0), 1)),
1068 				      reg2));
1069 
1070       /* Add the result to our base register and return.  */
1071       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1072 
1073     }
1074 
1075   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1076      special handling to avoid creating an indexed memory address
1077      with x-100000 as the base.
1078 
1079      If the constant part is small enough, then it's still safe because
1080      there is a guard page at the beginning and end of the data segment.
1081 
1082      Scaled references are common enough that we want to try and rearrange the
1083      terms so that we can use indexing for these addresses too.  Only
1084      do the optimization for floatint point modes.  */
1085 
1086   if (GET_CODE (x) == PLUS
1087       && symbolic_expression_p (XEXP (x, 1)))
1088     {
1089       /* Ugly.  We modify things here so that the address offset specified
1090 	 by the index expression is computed first, then added to x to form
1091 	 the entire address.  */
1092 
1093       rtx regx1, regx2, regy1, regy2, y;
1094 
1095       /* Strip off any CONST.  */
1096       y = XEXP (x, 1);
1097       if (GET_CODE (y) == CONST)
1098 	y = XEXP (y, 0);
1099 
1100       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1101 	{
1102 	  /* See if this looks like
1103 		(plus (mult (reg) (shadd_const))
1104 		      (const (plus (symbol_ref) (const_int))))
1105 
1106 	     Where const_int is small.  In that case the const
1107 	     expression is a valid pointer for indexing.
1108 
1109 	     If const_int is big, but can be divided evenly by shadd_const
1110 	     and added to (reg).  This allows more scaled indexed addresses.  */
1111 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1112 	      && GET_CODE (XEXP (x, 0)) == MULT
1113 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1114 	      && INTVAL (XEXP (y, 1)) >= -4096
1115 	      && INTVAL (XEXP (y, 1)) <= 4095
1116 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1117 	      && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1118 	    {
1119 	      int val = INTVAL (XEXP (XEXP (x, 0), 1));
1120 	      rtx reg1, reg2;
1121 
1122 	      reg1 = XEXP (x, 1);
1123 	      if (GET_CODE (reg1) != REG)
1124 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1125 
1126 	      reg2 = XEXP (XEXP (x, 0), 0);
1127 	      if (GET_CODE (reg2) != REG)
1128 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1129 
1130 	      return force_reg (Pmode,
1131 				gen_rtx_PLUS (Pmode,
1132 					      gen_rtx_MULT (Pmode,
1133 							    reg2,
1134 							    GEN_INT (val)),
1135 					      reg1));
1136 	    }
1137 	  else if ((mode == DFmode || mode == SFmode)
1138 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1139 		   && GET_CODE (XEXP (x, 0)) == MULT
1140 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1141 		   && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1142 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1143 		   && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1144 	    {
1145 	      regx1
1146 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1147 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1148 	      regx2 = XEXP (XEXP (x, 0), 0);
1149 	      if (GET_CODE (regx2) != REG)
1150 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1151 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1152 							regx2, regx1));
1153 	      return
1154 		force_reg (Pmode,
1155 			   gen_rtx_PLUS (Pmode,
1156 					 gen_rtx_MULT (Pmode, regx2,
1157 						       XEXP (XEXP (x, 0), 1)),
1158 					 force_reg (Pmode, XEXP (y, 0))));
1159 	    }
1160 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1161 		   && INTVAL (XEXP (y, 1)) >= -4096
1162 		   && INTVAL (XEXP (y, 1)) <= 4095)
1163 	    {
1164 	      /* This is safe because of the guard page at the
1165 		 beginning and end of the data space.  Just
1166 		 return the original address.  */
1167 	      return orig;
1168 	    }
1169 	  else
1170 	    {
1171 	      /* Doesn't look like one we can optimize.  */
1172 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1173 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1174 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1175 	      regx1 = force_reg (Pmode,
1176 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1177 						 regx1, regy2));
1178 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1179 	    }
1180 	}
1181     }
1182 
1183   return orig;
1184 }
1185 
1186 /* For the HPPA, REG and REG+CONST is cost 0
1187    and addresses involving symbolic constants are cost 2.
1188 
1189    PIC addresses are very expensive.
1190 
1191    It is no coincidence that this has the same structure
1192    as GO_IF_LEGITIMATE_ADDRESS.  */
1193 
1194 static int
hppa_address_cost(rtx X)1195 hppa_address_cost (rtx X)
1196 {
1197   switch (GET_CODE (X))
1198     {
1199     case REG:
1200     case PLUS:
1201     case LO_SUM:
1202       return 1;
1203     case HIGH:
1204       return 2;
1205     default:
1206       return 4;
1207     }
1208 }
1209 
1210 /* Compute a (partial) cost for rtx X.  Return true if the complete
1211    cost has been computed, and false if subexpressions should be
1212    scanned.  In either case, *TOTAL contains the cost result.  */
1213 
1214 static bool
hppa_rtx_costs(rtx x,int code,int outer_code,int * total)1215 hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1216 {
1217   switch (code)
1218     {
1219     case CONST_INT:
1220       if (INTVAL (x) == 0)
1221 	*total = 0;
1222       else if (INT_14_BITS (x))
1223 	*total = 1;
1224       else
1225 	*total = 2;
1226       return true;
1227 
1228     case HIGH:
1229       *total = 2;
1230       return true;
1231 
1232     case CONST:
1233     case LABEL_REF:
1234     case SYMBOL_REF:
1235       *total = 4;
1236       return true;
1237 
1238     case CONST_DOUBLE:
1239       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1240 	  && outer_code != SET)
1241 	*total = 0;
1242       else
1243         *total = 8;
1244       return true;
1245 
1246     case MULT:
1247       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1248         *total = COSTS_N_INSNS (3);
1249       else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1250 	*total = COSTS_N_INSNS (8);
1251       else
1252 	*total = COSTS_N_INSNS (20);
1253       return true;
1254 
1255     case DIV:
1256       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1257 	{
1258 	  *total = COSTS_N_INSNS (14);
1259 	  return true;
1260 	}
1261       /* FALLTHRU */
1262 
1263     case UDIV:
1264     case MOD:
1265     case UMOD:
1266       *total = COSTS_N_INSNS (60);
1267       return true;
1268 
1269     case PLUS: /* this includes shNadd insns */
1270     case MINUS:
1271       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1272 	*total = COSTS_N_INSNS (3);
1273       else
1274         *total = COSTS_N_INSNS (1);
1275       return true;
1276 
1277     case ASHIFT:
1278     case ASHIFTRT:
1279     case LSHIFTRT:
1280       *total = COSTS_N_INSNS (1);
1281       return true;
1282 
1283     default:
1284       return false;
1285     }
1286 }
1287 
1288 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1289    new rtx with the correct mode.  */
1290 static inline rtx
force_mode(enum machine_mode mode,rtx orig)1291 force_mode (enum machine_mode mode, rtx orig)
1292 {
1293   if (mode == GET_MODE (orig))
1294     return orig;
1295 
1296   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1297 
1298   return gen_rtx_REG (mode, REGNO (orig));
1299 }
1300 
1301 /* Return 1 if *X is a thread-local symbol.  */
1302 
1303 static int
pa_tls_symbol_ref_1(rtx * x,void * data ATTRIBUTE_UNUSED)1304 pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1305 {
1306   return PA_SYMBOL_REF_TLS_P (*x);
1307 }
1308 
1309 /* Return 1 if X contains a thread-local symbol.  */
1310 
1311 bool
pa_tls_referenced_p(rtx x)1312 pa_tls_referenced_p (rtx x)
1313 {
1314   if (!TARGET_HAVE_TLS)
1315     return false;
1316 
1317   return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1318 }
1319 
1320 /* Emit insns to move operands[1] into operands[0].
1321 
1322    Return 1 if we have written out everything that needs to be done to
1323    do the move.  Otherwise, return 0 and the caller will emit the move
1324    normally.
1325 
1326    Note SCRATCH_REG may not be in the proper mode depending on how it
1327    will be used.  This routine is responsible for creating a new copy
1328    of SCRATCH_REG in the proper mode.  */
1329 
1330 int
emit_move_sequence(rtx * operands,enum machine_mode mode,rtx scratch_reg)1331 emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1332 {
1333   register rtx operand0 = operands[0];
1334   register rtx operand1 = operands[1];
1335   register rtx tem;
1336 
1337   /* We can only handle indexed addresses in the destination operand
1338      of floating point stores.  Thus, we need to break out indexed
1339      addresses from the destination operand.  */
1340   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1341     {
1342       /* This is only safe up to the beginning of life analysis.  */
1343       gcc_assert (!no_new_pseudos);
1344 
1345       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1346       operand0 = replace_equiv_address (operand0, tem);
1347     }
1348 
1349   /* On targets with non-equivalent space registers, break out unscaled
1350      indexed addresses from the source operand before the final CSE.
1351      We have to do this because the REG_POINTER flag is not correctly
1352      carried through various optimization passes and CSE may substitute
1353      a pseudo without the pointer set for one with the pointer set.  As
1354      a result, we loose various opportunities to create insns with
1355      unscaled indexed addresses.  */
1356   if (!TARGET_NO_SPACE_REGS
1357       && !cse_not_expected
1358       && GET_CODE (operand1) == MEM
1359       && GET_CODE (XEXP (operand1, 0)) == PLUS
1360       && REG_P (XEXP (XEXP (operand1, 0), 0))
1361       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1362     operand1
1363       = replace_equiv_address (operand1,
1364 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1365 
1366   if (scratch_reg
1367       && reload_in_progress && GET_CODE (operand0) == REG
1368       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1369     operand0 = reg_equiv_mem[REGNO (operand0)];
1370   else if (scratch_reg
1371 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1372 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1373 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1374     {
1375      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1376 	the code which tracks sets/uses for delete_output_reload.  */
1377       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1378 				 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1379 				 SUBREG_BYTE (operand0));
1380       operand0 = alter_subreg (&temp);
1381     }
1382 
1383   if (scratch_reg
1384       && reload_in_progress && GET_CODE (operand1) == REG
1385       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1386     operand1 = reg_equiv_mem[REGNO (operand1)];
1387   else if (scratch_reg
1388 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1389 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1390 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1391     {
1392      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1393 	the code which tracks sets/uses for delete_output_reload.  */
1394       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1395 				 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1396 				 SUBREG_BYTE (operand1));
1397       operand1 = alter_subreg (&temp);
1398     }
1399 
1400   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1401       && ((tem = find_replacement (&XEXP (operand0, 0)))
1402 	  != XEXP (operand0, 0)))
1403     operand0 = replace_equiv_address (operand0, tem);
1404 
1405   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1406       && ((tem = find_replacement (&XEXP (operand1, 0)))
1407 	  != XEXP (operand1, 0)))
1408     operand1 = replace_equiv_address (operand1, tem);
1409 
1410   /* Handle secondary reloads for loads/stores of FP registers from
1411      REG+D addresses where D does not fit in 5 or 14 bits, including
1412      (subreg (mem (addr))) cases.  */
1413   if (scratch_reg
1414       && fp_reg_operand (operand0, mode)
1415       && ((GET_CODE (operand1) == MEM
1416 	   && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1417 				 XEXP (operand1, 0)))
1418 	  || ((GET_CODE (operand1) == SUBREG
1419 	       && GET_CODE (XEXP (operand1, 0)) == MEM
1420 	       && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1421 				      ? SFmode : DFmode),
1422 				     XEXP (XEXP (operand1, 0), 0))))))
1423     {
1424       if (GET_CODE (operand1) == SUBREG)
1425 	operand1 = XEXP (operand1, 0);
1426 
1427       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1428 	 it in WORD_MODE regardless of what mode it was originally given
1429 	 to us.  */
1430       scratch_reg = force_mode (word_mode, scratch_reg);
1431 
1432       /* D might not fit in 14 bits either; for such cases load D into
1433 	 scratch reg.  */
1434       if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1435 	{
1436 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1437 	  emit_move_insn (scratch_reg,
1438 			  gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1439 					  Pmode,
1440 					  XEXP (XEXP (operand1, 0), 0),
1441 					  scratch_reg));
1442 	}
1443       else
1444 	emit_move_insn (scratch_reg, XEXP (operand1, 0));
1445       emit_insn (gen_rtx_SET (VOIDmode, operand0,
1446 			      replace_equiv_address (operand1, scratch_reg)));
1447       return 1;
1448     }
1449   else if (scratch_reg
1450 	   && fp_reg_operand (operand1, mode)
1451 	   && ((GET_CODE (operand0) == MEM
1452 		&& !memory_address_p ((GET_MODE_SIZE (mode) == 4
1453 					? SFmode : DFmode),
1454 				       XEXP (operand0, 0)))
1455 	       || ((GET_CODE (operand0) == SUBREG)
1456 		   && GET_CODE (XEXP (operand0, 0)) == MEM
1457 		   && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1458 					  ? SFmode : DFmode),
1459 			   		 XEXP (XEXP (operand0, 0), 0)))))
1460     {
1461       if (GET_CODE (operand0) == SUBREG)
1462 	operand0 = XEXP (operand0, 0);
1463 
1464       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1465 	 it in WORD_MODE regardless of what mode it was originally given
1466 	 to us.  */
1467       scratch_reg = force_mode (word_mode, scratch_reg);
1468 
1469       /* D might not fit in 14 bits either; for such cases load D into
1470 	 scratch reg.  */
1471       if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1472 	{
1473 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1474 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1475 								        0)),
1476 						       Pmode,
1477 						       XEXP (XEXP (operand0, 0),
1478 								   0),
1479 						       scratch_reg));
1480 	}
1481       else
1482 	emit_move_insn (scratch_reg, XEXP (operand0, 0));
1483       emit_insn (gen_rtx_SET (VOIDmode,
1484 			      replace_equiv_address (operand0, scratch_reg),
1485 			      operand1));
1486       return 1;
1487     }
1488   /* Handle secondary reloads for loads of FP registers from constant
1489      expressions by forcing the constant into memory.
1490 
1491      Use scratch_reg to hold the address of the memory location.
1492 
1493      The proper fix is to change PREFERRED_RELOAD_CLASS to return
1494      NO_REGS when presented with a const_int and a register class
1495      containing only FP registers.  Doing so unfortunately creates
1496      more problems than it solves.   Fix this for 2.5.  */
1497   else if (scratch_reg
1498 	   && CONSTANT_P (operand1)
1499 	   && fp_reg_operand (operand0, mode))
1500     {
1501       rtx const_mem, xoperands[2];
1502 
1503       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1504 	 it in WORD_MODE regardless of what mode it was originally given
1505 	 to us.  */
1506       scratch_reg = force_mode (word_mode, scratch_reg);
1507 
1508       /* Force the constant into memory and put the address of the
1509 	 memory location into scratch_reg.  */
1510       const_mem = force_const_mem (mode, operand1);
1511       xoperands[0] = scratch_reg;
1512       xoperands[1] = XEXP (const_mem, 0);
1513       emit_move_sequence (xoperands, Pmode, 0);
1514 
1515       /* Now load the destination register.  */
1516       emit_insn (gen_rtx_SET (mode, operand0,
1517 			      replace_equiv_address (const_mem, scratch_reg)));
1518       return 1;
1519     }
1520   /* Handle secondary reloads for SAR.  These occur when trying to load
1521      the SAR from memory, FP register, or with a constant.  */
1522   else if (scratch_reg
1523 	   && GET_CODE (operand0) == REG
1524 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1525 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1526 	   && (GET_CODE (operand1) == MEM
1527 	       || GET_CODE (operand1) == CONST_INT
1528 	       || (GET_CODE (operand1) == REG
1529 		   && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1530     {
1531       /* D might not fit in 14 bits either; for such cases load D into
1532 	 scratch reg.  */
1533       if (GET_CODE (operand1) == MEM
1534 	  && !memory_address_p (Pmode, XEXP (operand1, 0)))
1535 	{
1536 	  /* We are reloading the address into the scratch register, so we
1537 	     want to make sure the scratch register is a full register.  */
1538 	  scratch_reg = force_mode (word_mode, scratch_reg);
1539 
1540 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1541 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1542 								        0)),
1543 						       Pmode,
1544 						       XEXP (XEXP (operand1, 0),
1545 						       0),
1546 						       scratch_reg));
1547 
1548 	  /* Now we are going to load the scratch register from memory,
1549 	     we want to load it in the same width as the original MEM,
1550 	     which must be the same as the width of the ultimate destination,
1551 	     OPERAND0.  */
1552 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1553 
1554 	  emit_move_insn (scratch_reg,
1555 			  replace_equiv_address (operand1, scratch_reg));
1556 	}
1557       else
1558 	{
1559 	  /* We want to load the scratch register using the same mode as
1560 	     the ultimate destination.  */
1561 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1562 
1563 	  emit_move_insn (scratch_reg, operand1);
1564 	}
1565 
1566       /* And emit the insn to set the ultimate destination.  We know that
1567 	 the scratch register has the same mode as the destination at this
1568 	 point.  */
1569       emit_move_insn (operand0, scratch_reg);
1570       return 1;
1571     }
1572   /* Handle the most common case: storing into a register.  */
1573   else if (register_operand (operand0, mode))
1574     {
1575       if (register_operand (operand1, mode)
1576 	  || (GET_CODE (operand1) == CONST_INT
1577 	      && cint_ok_for_move (INTVAL (operand1)))
1578 	  || (operand1 == CONST0_RTX (mode))
1579 	  || (GET_CODE (operand1) == HIGH
1580 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1581 	  /* Only `general_operands' can come here, so MEM is ok.  */
1582 	  || GET_CODE (operand1) == MEM)
1583 	{
1584 	  /* Various sets are created during RTL generation which don't
1585 	     have the REG_POINTER flag correctly set.  After the CSE pass,
1586 	     instruction recognition can fail if we don't consistently
1587 	     set this flag when performing register copies.  This should
1588 	     also improve the opportunities for creating insns that use
1589 	     unscaled indexing.  */
1590 	  if (REG_P (operand0) && REG_P (operand1))
1591 	    {
1592 	      if (REG_POINTER (operand1)
1593 		  && !REG_POINTER (operand0)
1594 		  && !HARD_REGISTER_P (operand0))
1595 		copy_reg_pointer (operand0, operand1);
1596 	      else if (REG_POINTER (operand0)
1597 		       && !REG_POINTER (operand1)
1598 		       && !HARD_REGISTER_P (operand1))
1599 		copy_reg_pointer (operand1, operand0);
1600 	    }
1601 
1602 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
1603 	     get set.  In some cases, we can set the REG_POINTER flag
1604 	     from the declaration for the MEM.  */
1605 	  if (REG_P (operand0)
1606 	      && GET_CODE (operand1) == MEM
1607 	      && !REG_POINTER (operand0))
1608 	    {
1609 	      tree decl = MEM_EXPR (operand1);
1610 
1611 	      /* Set the register pointer flag and register alignment
1612 		 if the declaration for this memory reference is a
1613 		 pointer type.  Fortran indirect argument references
1614 		 are ignored.  */
1615 	      if (decl
1616 		  && !(flag_argument_noalias > 1
1617 		       && TREE_CODE (decl) == INDIRECT_REF
1618 		       && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1619 		{
1620 		  tree type;
1621 
1622 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1623 		     tree operand 1.  */
1624 		  if (TREE_CODE (decl) == COMPONENT_REF)
1625 		    decl = TREE_OPERAND (decl, 1);
1626 
1627 		  type = TREE_TYPE (decl);
1628 		  if (TREE_CODE (type) == ARRAY_TYPE)
1629 		    type = get_inner_array_type (type);
1630 
1631 		  if (POINTER_TYPE_P (type))
1632 		    {
1633 		      int align;
1634 
1635 		      type = TREE_TYPE (type);
1636 		      /* Using TYPE_ALIGN_OK is rather conservative as
1637 			 only the ada frontend actually sets it.  */
1638 		      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1639 			       : BITS_PER_UNIT);
1640 		      mark_reg_pointer (operand0, align);
1641 		    }
1642 		}
1643 	    }
1644 
1645 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1646 	  return 1;
1647 	}
1648     }
1649   else if (GET_CODE (operand0) == MEM)
1650     {
1651       if (mode == DFmode && operand1 == CONST0_RTX (mode)
1652 	  && !(reload_in_progress || reload_completed))
1653 	{
1654 	  rtx temp = gen_reg_rtx (DFmode);
1655 
1656 	  emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1657 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1658 	  return 1;
1659 	}
1660       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1661 	{
1662 	  /* Run this case quickly.  */
1663 	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1664 	  return 1;
1665 	}
1666       if (! (reload_in_progress || reload_completed))
1667 	{
1668 	  operands[0] = validize_mem (operand0);
1669 	  operands[1] = operand1 = force_reg (mode, operand1);
1670 	}
1671     }
1672 
1673   /* Simplify the source if we need to.
1674      Note we do have to handle function labels here, even though we do
1675      not consider them legitimate constants.  Loop optimizations can
1676      call the emit_move_xxx with one as a source.  */
1677   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1678       || function_label_operand (operand1, mode)
1679       || (GET_CODE (operand1) == HIGH
1680 	  && symbolic_operand (XEXP (operand1, 0), mode)))
1681     {
1682       int ishighonly = 0;
1683 
1684       if (GET_CODE (operand1) == HIGH)
1685 	{
1686 	  ishighonly = 1;
1687 	  operand1 = XEXP (operand1, 0);
1688 	}
1689       if (symbolic_operand (operand1, mode))
1690 	{
1691 	  /* Argh.  The assembler and linker can't handle arithmetic
1692 	     involving plabels.
1693 
1694 	     So we force the plabel into memory, load operand0 from
1695 	     the memory location, then add in the constant part.  */
1696 	  if ((GET_CODE (operand1) == CONST
1697 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
1698 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1699 	      || function_label_operand (operand1, mode))
1700 	    {
1701 	      rtx temp, const_part;
1702 
1703 	      /* Figure out what (if any) scratch register to use.  */
1704 	      if (reload_in_progress || reload_completed)
1705 		{
1706 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
1707 		  /* SCRATCH_REG will hold an address and maybe the actual
1708 		     data.  We want it in WORD_MODE regardless of what mode it
1709 		     was originally given to us.  */
1710 		  scratch_reg = force_mode (word_mode, scratch_reg);
1711 		}
1712 	      else if (flag_pic)
1713 		scratch_reg = gen_reg_rtx (Pmode);
1714 
1715 	      if (GET_CODE (operand1) == CONST)
1716 		{
1717 		  /* Save away the constant part of the expression.  */
1718 		  const_part = XEXP (XEXP (operand1, 0), 1);
1719 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
1720 
1721 		  /* Force the function label into memory.  */
1722 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1723 		}
1724 	      else
1725 		{
1726 		  /* No constant part.  */
1727 		  const_part = NULL_RTX;
1728 
1729 		  /* Force the function label into memory.  */
1730 		  temp = force_const_mem (mode, operand1);
1731 		}
1732 
1733 
1734 	      /* Get the address of the memory location.  PIC-ify it if
1735 		 necessary.  */
1736 	      temp = XEXP (temp, 0);
1737 	      if (flag_pic)
1738 		temp = legitimize_pic_address (temp, mode, scratch_reg);
1739 
1740 	      /* Put the address of the memory location into our destination
1741 		 register.  */
1742 	      operands[1] = temp;
1743 	      emit_move_sequence (operands, mode, scratch_reg);
1744 
1745 	      /* Now load from the memory location into our destination
1746 		 register.  */
1747 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1748 	      emit_move_sequence (operands, mode, scratch_reg);
1749 
1750 	      /* And add back in the constant part.  */
1751 	      if (const_part != NULL_RTX)
1752 		expand_inc (operand0, const_part);
1753 
1754 	      return 1;
1755 	    }
1756 
1757 	  if (flag_pic)
1758 	    {
1759 	      rtx temp;
1760 
1761 	      if (reload_in_progress || reload_completed)
1762 		{
1763 		  temp = scratch_reg ? scratch_reg : operand0;
1764 		  /* TEMP will hold an address and maybe the actual
1765 		     data.  We want it in WORD_MODE regardless of what mode it
1766 		     was originally given to us.  */
1767 		  temp = force_mode (word_mode, temp);
1768 		}
1769 	      else
1770 		temp = gen_reg_rtx (Pmode);
1771 
1772 	      /* (const (plus (symbol) (const_int))) must be forced to
1773 		 memory during/after reload if the const_int will not fit
1774 		 in 14 bits.  */
1775 	      if (GET_CODE (operand1) == CONST
1776 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
1777 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1778 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1779 		       && (reload_completed || reload_in_progress)
1780 		       && flag_pic)
1781 		{
1782 		  rtx const_mem = force_const_mem (mode, operand1);
1783 		  operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1784 							mode, temp);
1785 		  operands[1] = replace_equiv_address (const_mem, operands[1]);
1786 		  emit_move_sequence (operands, mode, temp);
1787 		}
1788 	      else
1789 		{
1790 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
1791 		  if (REG_P (operand0) && REG_P (operands[1]))
1792 		    copy_reg_pointer (operand0, operands[1]);
1793 		  emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1794 		}
1795 	    }
1796 	  /* On the HPPA, references to data space are supposed to use dp,
1797 	     register 27, but showing it in the RTL inhibits various cse
1798 	     and loop optimizations.  */
1799 	  else
1800 	    {
1801 	      rtx temp, set;
1802 
1803 	      if (reload_in_progress || reload_completed)
1804 		{
1805 		  temp = scratch_reg ? scratch_reg : operand0;
1806 		  /* TEMP will hold an address and maybe the actual
1807 		     data.  We want it in WORD_MODE regardless of what mode it
1808 		     was originally given to us.  */
1809 		  temp = force_mode (word_mode, temp);
1810 		}
1811 	      else
1812 		temp = gen_reg_rtx (mode);
1813 
1814 	      /* Loading a SYMBOL_REF into a register makes that register
1815 		 safe to be used as the base in an indexed address.
1816 
1817 		 Don't mark hard registers though.  That loses.  */
1818 	      if (GET_CODE (operand0) == REG
1819 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1820 		mark_reg_pointer (operand0, BITS_PER_UNIT);
1821 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1822 		mark_reg_pointer (temp, BITS_PER_UNIT);
1823 
1824 	      if (ishighonly)
1825 		set = gen_rtx_SET (mode, operand0, temp);
1826 	      else
1827 		set = gen_rtx_SET (VOIDmode,
1828 				   operand0,
1829 				   gen_rtx_LO_SUM (mode, temp, operand1));
1830 
1831 	      emit_insn (gen_rtx_SET (VOIDmode,
1832 				      temp,
1833 				      gen_rtx_HIGH (mode, operand1)));
1834 	      emit_insn (set);
1835 
1836 	    }
1837 	  return 1;
1838 	}
1839       else if (pa_tls_referenced_p (operand1))
1840 	{
1841 	  rtx tmp = operand1;
1842 	  rtx addend = NULL;
1843 
1844 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1845 	    {
1846 	      addend = XEXP (XEXP (tmp, 0), 1);
1847 	      tmp = XEXP (XEXP (tmp, 0), 0);
1848 	    }
1849 
1850 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1851 	  tmp = legitimize_tls_address (tmp);
1852 	  if (addend)
1853 	    {
1854 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
1855 	      tmp = force_operand (tmp, operands[0]);
1856 	    }
1857 	  operands[1] = tmp;
1858 	}
1859       else if (GET_CODE (operand1) != CONST_INT
1860 	       || !cint_ok_for_move (INTVAL (operand1)))
1861 	{
1862 	  rtx insn, temp;
1863 	  rtx op1 = operand1;
1864 	  HOST_WIDE_INT value = 0;
1865 	  HOST_WIDE_INT insv = 0;
1866 	  int insert = 0;
1867 
1868 	  if (GET_CODE (operand1) == CONST_INT)
1869 	    value = INTVAL (operand1);
1870 
1871 	  if (TARGET_64BIT
1872 	      && GET_CODE (operand1) == CONST_INT
1873 	      && HOST_BITS_PER_WIDE_INT > 32
1874 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1875 	    {
1876 	      HOST_WIDE_INT nval;
1877 
1878 	      /* Extract the low order 32 bits of the value and sign extend.
1879 		 If the new value is the same as the original value, we can
1880 		 can use the original value as-is.  If the new value is
1881 		 different, we use it and insert the most-significant 32-bits
1882 		 of the original value into the final result.  */
1883 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1884 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1885 	      if (value != nval)
1886 		{
1887 #if HOST_BITS_PER_WIDE_INT > 32
1888 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1889 #endif
1890 		  insert = 1;
1891 		  value = nval;
1892 		  operand1 = GEN_INT (nval);
1893 		}
1894 	    }
1895 
1896 	  if (reload_in_progress || reload_completed)
1897 	    temp = scratch_reg ? scratch_reg : operand0;
1898 	  else
1899 	    temp = gen_reg_rtx (mode);
1900 
1901 	  /* We don't directly split DImode constants on 32-bit targets
1902 	     because PLUS uses an 11-bit immediate and the insn sequence
1903 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
1904 	  if (GET_CODE (operand1) == CONST_INT
1905 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
1906 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1907 	      && !insert)
1908 	    {
1909 	      /* Directly break constant into high and low parts.  This
1910 		 provides better optimization opportunities because various
1911 		 passes recognize constants split with PLUS but not LO_SUM.
1912 		 We use a 14-bit signed low part except when the addition
1913 		 of 0x4000 to the high part might change the sign of the
1914 		 high part.  */
1915 	      HOST_WIDE_INT low = value & 0x3fff;
1916 	      HOST_WIDE_INT high = value & ~ 0x3fff;
1917 
1918 	      if (low >= 0x2000)
1919 		{
1920 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1921 		    high += 0x2000;
1922 		  else
1923 		    high += 0x4000;
1924 		}
1925 
1926 	      low = value - high;
1927 
1928 	      emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1929 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1930 	    }
1931 	  else
1932 	    {
1933 	      emit_insn (gen_rtx_SET (VOIDmode, temp,
1934 				      gen_rtx_HIGH (mode, operand1)));
1935 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1936 	    }
1937 
1938 	  insn = emit_move_insn (operands[0], operands[1]);
1939 
1940 	  /* Now insert the most significant 32 bits of the value
1941 	     into the register.  When we don't have a second register
1942 	     available, it could take up to nine instructions to load
1943 	     a 64-bit integer constant.  Prior to reload, we force
1944 	     constants that would take more than three instructions
1945 	     to load to the constant pool.  During and after reload,
1946 	     we have to handle all possible values.  */
1947 	  if (insert)
1948 	    {
1949 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
1950 		 register and the value to be inserted is outside the
1951 		 range that can be loaded with three depdi instructions.  */
1952 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
1953 		{
1954 		  operand1 = GEN_INT (insv);
1955 
1956 		  emit_insn (gen_rtx_SET (VOIDmode, temp,
1957 					  gen_rtx_HIGH (mode, operand1)));
1958 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
1959 		  emit_insn (gen_insv (operand0, GEN_INT (32),
1960 				       const0_rtx, temp));
1961 		}
1962 	      else
1963 		{
1964 		  int len = 5, pos = 27;
1965 
1966 		  /* Insert the bits using the depdi instruction.  */
1967 		  while (pos >= 0)
1968 		    {
1969 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
1970 		      HOST_WIDE_INT sign = v5 < 0;
1971 
1972 		      /* Left extend the insertion.  */
1973 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
1974 		      while (pos > 0 && (insv & 1) == sign)
1975 			{
1976 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
1977 			  len += 1;
1978 			  pos -= 1;
1979 			}
1980 
1981 		      emit_insn (gen_insv (operand0, GEN_INT (len),
1982 					   GEN_INT (pos), GEN_INT (v5)));
1983 
1984 		      len = pos > 0 && pos < 5 ? pos : 5;
1985 		      pos -= len;
1986 		    }
1987 		}
1988 	    }
1989 
1990 	  REG_NOTES (insn)
1991 	    = gen_rtx_EXPR_LIST (REG_EQUAL, op1, REG_NOTES (insn));
1992 
1993 	  return 1;
1994 	}
1995     }
1996   /* Now have insn-emit do whatever it normally does.  */
1997   return 0;
1998 }
1999 
2000 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2001    it will need a link/runtime reloc).  */
2002 
2003 int
reloc_needed(tree exp)2004 reloc_needed (tree exp)
2005 {
2006   int reloc = 0;
2007 
2008   switch (TREE_CODE (exp))
2009     {
2010     case ADDR_EXPR:
2011       return 1;
2012 
2013     case PLUS_EXPR:
2014     case MINUS_EXPR:
2015       reloc = reloc_needed (TREE_OPERAND (exp, 0));
2016       reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2017       break;
2018 
2019     case NOP_EXPR:
2020     case CONVERT_EXPR:
2021     case NON_LVALUE_EXPR:
2022       reloc = reloc_needed (TREE_OPERAND (exp, 0));
2023       break;
2024 
2025     case CONSTRUCTOR:
2026       {
2027 	tree value;
2028 	unsigned HOST_WIDE_INT ix;
2029 
2030 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2031 	  if (value)
2032 	    reloc |= reloc_needed (value);
2033       }
2034       break;
2035 
2036     case ERROR_MARK:
2037       break;
2038 
2039     default:
2040       break;
2041     }
2042   return reloc;
2043 }
2044 
2045 /* Does operand (which is a symbolic_operand) live in text space?
2046    If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2047    will be true.  */
2048 
2049 int
read_only_operand(rtx operand,enum machine_mode mode ATTRIBUTE_UNUSED)2050 read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2051 {
2052   if (GET_CODE (operand) == CONST)
2053     operand = XEXP (XEXP (operand, 0), 0);
2054   if (flag_pic)
2055     {
2056       if (GET_CODE (operand) == SYMBOL_REF)
2057 	return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2058     }
2059   else
2060     {
2061       if (GET_CODE (operand) == SYMBOL_REF)
2062 	return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2063     }
2064   return 1;
2065 }
2066 
2067 
2068 /* Return the best assembler insn template
2069    for moving operands[1] into operands[0] as a fullword.  */
2070 const char *
singlemove_string(rtx * operands)2071 singlemove_string (rtx *operands)
2072 {
2073   HOST_WIDE_INT intval;
2074 
2075   if (GET_CODE (operands[0]) == MEM)
2076     return "stw %r1,%0";
2077   if (GET_CODE (operands[1]) == MEM)
2078     return "ldw %1,%0";
2079   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2080     {
2081       long i;
2082       REAL_VALUE_TYPE d;
2083 
2084       gcc_assert (GET_MODE (operands[1]) == SFmode);
2085 
2086       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2087 	 bit pattern.  */
2088       REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2089       REAL_VALUE_TO_TARGET_SINGLE (d, i);
2090 
2091       operands[1] = GEN_INT (i);
2092       /* Fall through to CONST_INT case.  */
2093     }
2094   if (GET_CODE (operands[1]) == CONST_INT)
2095     {
2096       intval = INTVAL (operands[1]);
2097 
2098       if (VAL_14_BITS_P (intval))
2099 	return "ldi %1,%0";
2100       else if ((intval & 0x7ff) == 0)
2101 	return "ldil L'%1,%0";
2102       else if (zdepi_cint_p (intval))
2103 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2104       else
2105 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2106     }
2107   return "copy %1,%0";
2108 }
2109 
2110 
2111 /* Compute position (in OP[1]) and width (in OP[2])
2112    useful for copying IMM to a register using the zdepi
2113    instructions.  Store the immediate value to insert in OP[0].  */
2114 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2115 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2116 {
2117   int lsb, len;
2118 
2119   /* Find the least significant set bit in IMM.  */
2120   for (lsb = 0; lsb < 32; lsb++)
2121     {
2122       if ((imm & 1) != 0)
2123         break;
2124       imm >>= 1;
2125     }
2126 
2127   /* Choose variants based on *sign* of the 5-bit field.  */
2128   if ((imm & 0x10) == 0)
2129     len = (lsb <= 28) ? 4 : 32 - lsb;
2130   else
2131     {
2132       /* Find the width of the bitstring in IMM.  */
2133       for (len = 5; len < 32; len++)
2134 	{
2135 	  if ((imm & (1 << len)) == 0)
2136 	    break;
2137 	}
2138 
2139       /* Sign extend IMM as a 5-bit value.  */
2140       imm = (imm & 0xf) - 0x10;
2141     }
2142 
2143   op[0] = imm;
2144   op[1] = 31 - lsb;
2145   op[2] = len;
2146 }
2147 
2148 /* Compute position (in OP[1]) and width (in OP[2])
2149    useful for copying IMM to a register using the depdi,z
2150    instructions.  Store the immediate value to insert in OP[0].  */
2151 void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2152 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2153 {
2154   HOST_WIDE_INT lsb, len;
2155 
2156   /* Find the least significant set bit in IMM.  */
2157   for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2158     {
2159       if ((imm & 1) != 0)
2160         break;
2161       imm >>= 1;
2162     }
2163 
2164   /* Choose variants based on *sign* of the 5-bit field.  */
2165   if ((imm & 0x10) == 0)
2166     len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2167 	   ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2168   else
2169     {
2170       /* Find the width of the bitstring in IMM.  */
2171       for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2172 	{
2173 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2174 	    break;
2175 	}
2176 
2177       /* Sign extend IMM as a 5-bit value.  */
2178       imm = (imm & 0xf) - 0x10;
2179     }
2180 
2181   op[0] = imm;
2182   op[1] = 63 - lsb;
2183   op[2] = len;
2184 }
2185 
2186 /* Output assembler code to perform a doubleword move insn
2187    with operands OPERANDS.  */
2188 
2189 const char *
output_move_double(rtx * operands)2190 output_move_double (rtx *operands)
2191 {
2192   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2193   rtx latehalf[2];
2194   rtx addreg0 = 0, addreg1 = 0;
2195 
2196   /* First classify both operands.  */
2197 
2198   if (REG_P (operands[0]))
2199     optype0 = REGOP;
2200   else if (offsettable_memref_p (operands[0]))
2201     optype0 = OFFSOP;
2202   else if (GET_CODE (operands[0]) == MEM)
2203     optype0 = MEMOP;
2204   else
2205     optype0 = RNDOP;
2206 
2207   if (REG_P (operands[1]))
2208     optype1 = REGOP;
2209   else if (CONSTANT_P (operands[1]))
2210     optype1 = CNSTOP;
2211   else if (offsettable_memref_p (operands[1]))
2212     optype1 = OFFSOP;
2213   else if (GET_CODE (operands[1]) == MEM)
2214     optype1 = MEMOP;
2215   else
2216     optype1 = RNDOP;
2217 
2218   /* Check for the cases that the operand constraints are not
2219      supposed to allow to happen.  */
2220   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2221 
2222   /* Handle copies between general and floating registers.  */
2223 
2224   if (optype0 == REGOP && optype1 == REGOP
2225       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2226     {
2227       if (FP_REG_P (operands[0]))
2228 	{
2229 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2230 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2231 	  return "{fldds|fldd} -16(%%sp),%0";
2232 	}
2233       else
2234 	{
2235 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2236 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2237 	  return "{ldws|ldw} -12(%%sp),%R0";
2238 	}
2239     }
2240 
2241    /* Handle auto decrementing and incrementing loads and stores
2242      specifically, since the structure of the function doesn't work
2243      for them without major modification.  Do it better when we learn
2244      this port about the general inc/dec addressing of PA.
2245      (This was written by tege.  Chide him if it doesn't work.)  */
2246 
2247   if (optype0 == MEMOP)
2248     {
2249       /* We have to output the address syntax ourselves, since print_operand
2250 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2251 
2252       rtx addr = XEXP (operands[0], 0);
2253       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2254 	{
2255 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2256 
2257 	  operands[0] = XEXP (addr, 0);
2258 	  gcc_assert (GET_CODE (operands[1]) == REG
2259 		      && GET_CODE (operands[0]) == REG);
2260 
2261 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2262 
2263 	  /* No overlap between high target register and address
2264 	     register.  (We do this in a non-obvious way to
2265 	     save a register file writeback)  */
2266 	  if (GET_CODE (addr) == POST_INC)
2267 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2268 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2269 	}
2270       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2271 	{
2272 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2273 
2274 	  operands[0] = XEXP (addr, 0);
2275 	  gcc_assert (GET_CODE (operands[1]) == REG
2276 		      && GET_CODE (operands[0]) == REG);
2277 
2278 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2279 	  /* No overlap between high target register and address
2280 	     register.  (We do this in a non-obvious way to save a
2281 	     register file writeback)  */
2282 	  if (GET_CODE (addr) == PRE_INC)
2283 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2284 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2285 	}
2286     }
2287   if (optype1 == MEMOP)
2288     {
2289       /* We have to output the address syntax ourselves, since print_operand
2290 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2291 
2292       rtx addr = XEXP (operands[1], 0);
2293       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2294 	{
2295 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2296 
2297 	  operands[1] = XEXP (addr, 0);
2298 	  gcc_assert (GET_CODE (operands[0]) == REG
2299 		      && GET_CODE (operands[1]) == REG);
2300 
2301 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2302 	    {
2303 	      /* No overlap between high target register and address
2304 		 register.  (We do this in a non-obvious way to
2305 		 save a register file writeback)  */
2306 	      if (GET_CODE (addr) == POST_INC)
2307 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2308 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2309 	    }
2310 	  else
2311 	    {
2312 	      /* This is an undefined situation.  We should load into the
2313 		 address register *and* update that register.  Probably
2314 		 we don't need to handle this at all.  */
2315 	      if (GET_CODE (addr) == POST_INC)
2316 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2317 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2318 	    }
2319 	}
2320       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2321 	{
2322 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2323 
2324 	  operands[1] = XEXP (addr, 0);
2325 	  gcc_assert (GET_CODE (operands[0]) == REG
2326 		      && GET_CODE (operands[1]) == REG);
2327 
2328 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2329 	    {
2330 	      /* No overlap between high target register and address
2331 		 register.  (We do this in a non-obvious way to
2332 		 save a register file writeback)  */
2333 	      if (GET_CODE (addr) == PRE_INC)
2334 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2335 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2336 	    }
2337 	  else
2338 	    {
2339 	      /* This is an undefined situation.  We should load into the
2340 		 address register *and* update that register.  Probably
2341 		 we don't need to handle this at all.  */
2342 	      if (GET_CODE (addr) == PRE_INC)
2343 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2344 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2345 	    }
2346 	}
2347       else if (GET_CODE (addr) == PLUS
2348 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2349 	{
2350 	  rtx xoperands[4];
2351 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2352 
2353 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2354 	    {
2355 	      xoperands[0] = high_reg;
2356 	      xoperands[1] = XEXP (addr, 1);
2357 	      xoperands[2] = XEXP (XEXP (addr, 0), 0);
2358 	      xoperands[3] = XEXP (XEXP (addr, 0), 1);
2359 	      output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2360 			       xoperands);
2361 	      return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2362 	    }
2363 	  else
2364 	    {
2365 	      xoperands[0] = high_reg;
2366 	      xoperands[1] = XEXP (addr, 1);
2367 	      xoperands[2] = XEXP (XEXP (addr, 0), 0);
2368 	      xoperands[3] = XEXP (XEXP (addr, 0), 1);
2369 	      output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2370 			       xoperands);
2371 	      return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2372 	    }
2373 	}
2374     }
2375 
2376   /* If an operand is an unoffsettable memory ref, find a register
2377      we can increment temporarily to make it refer to the second word.  */
2378 
2379   if (optype0 == MEMOP)
2380     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2381 
2382   if (optype1 == MEMOP)
2383     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2384 
2385   /* Ok, we can do one word at a time.
2386      Normally we do the low-numbered word first.
2387 
2388      In either case, set up in LATEHALF the operands to use
2389      for the high-numbered word and in some cases alter the
2390      operands in OPERANDS to be suitable for the low-numbered word.  */
2391 
2392   if (optype0 == REGOP)
2393     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2394   else if (optype0 == OFFSOP)
2395     latehalf[0] = adjust_address (operands[0], SImode, 4);
2396   else
2397     latehalf[0] = operands[0];
2398 
2399   if (optype1 == REGOP)
2400     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2401   else if (optype1 == OFFSOP)
2402     latehalf[1] = adjust_address (operands[1], SImode, 4);
2403   else if (optype1 == CNSTOP)
2404     split_double (operands[1], &operands[1], &latehalf[1]);
2405   else
2406     latehalf[1] = operands[1];
2407 
2408   /* If the first move would clobber the source of the second one,
2409      do them in the other order.
2410 
2411      This can happen in two cases:
2412 
2413 	mem -> register where the first half of the destination register
2414  	is the same register used in the memory's address.  Reload
2415 	can create such insns.
2416 
2417 	mem in this case will be either register indirect or register
2418 	indirect plus a valid offset.
2419 
2420 	register -> register move where REGNO(dst) == REGNO(src + 1)
2421 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2422 
2423      Handle mem -> register case first.  */
2424   if (optype0 == REGOP
2425       && (optype1 == MEMOP || optype1 == OFFSOP)
2426       && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2427 			    operands[1], 0))
2428     {
2429       /* Do the late half first.  */
2430       if (addreg1)
2431 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2432       output_asm_insn (singlemove_string (latehalf), latehalf);
2433 
2434       /* Then clobber.  */
2435       if (addreg1)
2436 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2437       return singlemove_string (operands);
2438     }
2439 
2440   /* Now handle register -> register case.  */
2441   if (optype0 == REGOP && optype1 == REGOP
2442       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2443     {
2444       output_asm_insn (singlemove_string (latehalf), latehalf);
2445       return singlemove_string (operands);
2446     }
2447 
2448   /* Normal case: do the two words, low-numbered first.  */
2449 
2450   output_asm_insn (singlemove_string (operands), operands);
2451 
2452   /* Make any unoffsettable addresses point at high-numbered word.  */
2453   if (addreg0)
2454     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2455   if (addreg1)
2456     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2457 
2458   /* Do that word.  */
2459   output_asm_insn (singlemove_string (latehalf), latehalf);
2460 
2461   /* Undo the adds we just did.  */
2462   if (addreg0)
2463     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2464   if (addreg1)
2465     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2466 
2467   return "";
2468 }
2469 
2470 const char *
output_fp_move_double(rtx * operands)2471 output_fp_move_double (rtx *operands)
2472 {
2473   if (FP_REG_P (operands[0]))
2474     {
2475       if (FP_REG_P (operands[1])
2476 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2477 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2478       else
2479 	output_asm_insn ("fldd%F1 %1,%0", operands);
2480     }
2481   else if (FP_REG_P (operands[1]))
2482     {
2483       output_asm_insn ("fstd%F0 %1,%0", operands);
2484     }
2485   else
2486     {
2487       rtx xoperands[2];
2488 
2489       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2490 
2491       /* This is a pain.  You have to be prepared to deal with an
2492 	 arbitrary address here including pre/post increment/decrement.
2493 
2494 	 so avoid this in the MD.  */
2495       gcc_assert (GET_CODE (operands[0]) == REG);
2496 
2497       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2498       xoperands[0] = operands[0];
2499       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2500     }
2501   return "";
2502 }
2503 
2504 /* Return a REG that occurs in ADDR with coefficient 1.
2505    ADDR can be effectively incremented by incrementing REG.  */
2506 
2507 static rtx
find_addr_reg(rtx addr)2508 find_addr_reg (rtx addr)
2509 {
2510   while (GET_CODE (addr) == PLUS)
2511     {
2512       if (GET_CODE (XEXP (addr, 0)) == REG)
2513 	addr = XEXP (addr, 0);
2514       else if (GET_CODE (XEXP (addr, 1)) == REG)
2515 	addr = XEXP (addr, 1);
2516       else if (CONSTANT_P (XEXP (addr, 0)))
2517 	addr = XEXP (addr, 1);
2518       else if (CONSTANT_P (XEXP (addr, 1)))
2519 	addr = XEXP (addr, 0);
2520       else
2521 	gcc_unreachable ();
2522     }
2523   gcc_assert (GET_CODE (addr) == REG);
2524   return addr;
2525 }
2526 
2527 /* Emit code to perform a block move.
2528 
2529    OPERANDS[0] is the destination pointer as a REG, clobbered.
2530    OPERANDS[1] is the source pointer as a REG, clobbered.
2531    OPERANDS[2] is a register for temporary storage.
2532    OPERANDS[3] is a register for temporary storage.
2533    OPERANDS[4] is the size as a CONST_INT
2534    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2535    OPERANDS[6] is another temporary register.  */
2536 
2537 const char *
output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)2538 output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2539 {
2540   int align = INTVAL (operands[5]);
2541   unsigned long n_bytes = INTVAL (operands[4]);
2542 
2543   /* We can't move more than a word at a time because the PA
2544      has no longer integer move insns.  (Could use fp mem ops?)  */
2545   if (align > (TARGET_64BIT ? 8 : 4))
2546     align = (TARGET_64BIT ? 8 : 4);
2547 
2548   /* Note that we know each loop below will execute at least twice
2549      (else we would have open-coded the copy).  */
2550   switch (align)
2551     {
2552       case 8:
2553 	/* Pre-adjust the loop counter.  */
2554 	operands[4] = GEN_INT (n_bytes - 16);
2555 	output_asm_insn ("ldi %4,%2", operands);
2556 
2557 	/* Copying loop.  */
2558 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2559 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2560 	output_asm_insn ("std,ma %3,8(%0)", operands);
2561 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2562 	output_asm_insn ("std,ma %6,8(%0)", operands);
2563 
2564 	/* Handle the residual.  There could be up to 7 bytes of
2565 	   residual to copy!  */
2566 	if (n_bytes % 16 != 0)
2567 	  {
2568 	    operands[4] = GEN_INT (n_bytes % 8);
2569 	    if (n_bytes % 16 >= 8)
2570 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
2571 	    if (n_bytes % 8 != 0)
2572 	      output_asm_insn ("ldd 0(%1),%6", operands);
2573 	    if (n_bytes % 16 >= 8)
2574 	      output_asm_insn ("std,ma %3,8(%0)", operands);
2575 	    if (n_bytes % 8 != 0)
2576 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
2577 	  }
2578 	return "";
2579 
2580       case 4:
2581 	/* Pre-adjust the loop counter.  */
2582 	operands[4] = GEN_INT (n_bytes - 8);
2583 	output_asm_insn ("ldi %4,%2", operands);
2584 
2585 	/* Copying loop.  */
2586 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2587 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2588 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2589 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
2590 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2591 
2592 	/* Handle the residual.  There could be up to 7 bytes of
2593 	   residual to copy!  */
2594 	if (n_bytes % 8 != 0)
2595 	  {
2596 	    operands[4] = GEN_INT (n_bytes % 4);
2597 	    if (n_bytes % 8 >= 4)
2598 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2599 	    if (n_bytes % 4 != 0)
2600 	      output_asm_insn ("ldw 0(%1),%6", operands);
2601 	    if (n_bytes % 8 >= 4)
2602 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2603 	    if (n_bytes % 4 != 0)
2604 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2605 	  }
2606 	return "";
2607 
2608       case 2:
2609 	/* Pre-adjust the loop counter.  */
2610 	operands[4] = GEN_INT (n_bytes - 4);
2611 	output_asm_insn ("ldi %4,%2", operands);
2612 
2613 	/* Copying loop.  */
2614 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2615 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2616 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2617 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
2618 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2619 
2620 	/* Handle the residual.  */
2621 	if (n_bytes % 4 != 0)
2622 	  {
2623 	    if (n_bytes % 4 >= 2)
2624 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2625 	    if (n_bytes % 2 != 0)
2626 	      output_asm_insn ("ldb 0(%1),%6", operands);
2627 	    if (n_bytes % 4 >= 2)
2628 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2629 	    if (n_bytes % 2 != 0)
2630 	      output_asm_insn ("stb %6,0(%0)", operands);
2631 	  }
2632 	return "";
2633 
2634       case 1:
2635 	/* Pre-adjust the loop counter.  */
2636 	operands[4] = GEN_INT (n_bytes - 2);
2637 	output_asm_insn ("ldi %4,%2", operands);
2638 
2639 	/* Copying loop.  */
2640 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2641 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2642 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2643 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
2644 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2645 
2646 	/* Handle the residual.  */
2647 	if (n_bytes % 2 != 0)
2648 	  {
2649 	    output_asm_insn ("ldb 0(%1),%3", operands);
2650 	    output_asm_insn ("stb %3,0(%0)", operands);
2651 	  }
2652 	return "";
2653 
2654       default:
2655 	gcc_unreachable ();
2656     }
2657 }
2658 
2659 /* Count the number of insns necessary to handle this block move.
2660 
2661    Basic structure is the same as emit_block_move, except that we
2662    count insns rather than emit them.  */
2663 
2664 static int
compute_movmem_length(rtx insn)2665 compute_movmem_length (rtx insn)
2666 {
2667   rtx pat = PATTERN (insn);
2668   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2669   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2670   unsigned int n_insns = 0;
2671 
2672   /* We can't move more than four bytes at a time because the PA
2673      has no longer integer move insns.  (Could use fp mem ops?)  */
2674   if (align > (TARGET_64BIT ? 8 : 4))
2675     align = (TARGET_64BIT ? 8 : 4);
2676 
2677   /* The basic copying loop.  */
2678   n_insns = 6;
2679 
2680   /* Residuals.  */
2681   if (n_bytes % (2 * align) != 0)
2682     {
2683       if ((n_bytes % (2 * align)) >= align)
2684 	n_insns += 2;
2685 
2686       if ((n_bytes % align) != 0)
2687 	n_insns += 2;
2688     }
2689 
2690   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2691   return n_insns * 4;
2692 }
2693 
2694 /* Emit code to perform a block clear.
2695 
2696    OPERANDS[0] is the destination pointer as a REG, clobbered.
2697    OPERANDS[1] is a register for temporary storage.
2698    OPERANDS[2] is the size as a CONST_INT
2699    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
2700 
2701 const char *
output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)2702 output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2703 {
2704   int align = INTVAL (operands[3]);
2705   unsigned long n_bytes = INTVAL (operands[2]);
2706 
2707   /* We can't clear more than a word at a time because the PA
2708      has no longer integer move insns.  */
2709   if (align > (TARGET_64BIT ? 8 : 4))
2710     align = (TARGET_64BIT ? 8 : 4);
2711 
2712   /* Note that we know each loop below will execute at least twice
2713      (else we would have open-coded the copy).  */
2714   switch (align)
2715     {
2716       case 8:
2717 	/* Pre-adjust the loop counter.  */
2718 	operands[2] = GEN_INT (n_bytes - 16);
2719 	output_asm_insn ("ldi %2,%1", operands);
2720 
2721 	/* Loop.  */
2722 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
2723 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
2724 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
2725 
2726 	/* Handle the residual.  There could be up to 7 bytes of
2727 	   residual to copy!  */
2728 	if (n_bytes % 16 != 0)
2729 	  {
2730 	    operands[2] = GEN_INT (n_bytes % 8);
2731 	    if (n_bytes % 16 >= 8)
2732 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
2733 	    if (n_bytes % 8 != 0)
2734 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2735 	  }
2736 	return "";
2737 
2738       case 4:
2739 	/* Pre-adjust the loop counter.  */
2740 	operands[2] = GEN_INT (n_bytes - 8);
2741 	output_asm_insn ("ldi %2,%1", operands);
2742 
2743 	/* Loop.  */
2744 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2745 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
2746 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2747 
2748 	/* Handle the residual.  There could be up to 7 bytes of
2749 	   residual to copy!  */
2750 	if (n_bytes % 8 != 0)
2751 	  {
2752 	    operands[2] = GEN_INT (n_bytes % 4);
2753 	    if (n_bytes % 8 >= 4)
2754 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2755 	    if (n_bytes % 4 != 0)
2756 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2757 	  }
2758 	return "";
2759 
2760       case 2:
2761 	/* Pre-adjust the loop counter.  */
2762 	operands[2] = GEN_INT (n_bytes - 4);
2763 	output_asm_insn ("ldi %2,%1", operands);
2764 
2765 	/* Loop.  */
2766 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2767 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
2768 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2769 
2770 	/* Handle the residual.  */
2771 	if (n_bytes % 4 != 0)
2772 	  {
2773 	    if (n_bytes % 4 >= 2)
2774 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2775 	    if (n_bytes % 2 != 0)
2776 	      output_asm_insn ("stb %%r0,0(%0)", operands);
2777 	  }
2778 	return "";
2779 
2780       case 1:
2781 	/* Pre-adjust the loop counter.  */
2782 	operands[2] = GEN_INT (n_bytes - 2);
2783 	output_asm_insn ("ldi %2,%1", operands);
2784 
2785 	/* Loop.  */
2786 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2787 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
2788 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2789 
2790 	/* Handle the residual.  */
2791 	if (n_bytes % 2 != 0)
2792 	  output_asm_insn ("stb %%r0,0(%0)", operands);
2793 
2794 	return "";
2795 
2796       default:
2797 	gcc_unreachable ();
2798     }
2799 }
2800 
2801 /* Count the number of insns necessary to handle this block move.
2802 
2803    Basic structure is the same as emit_block_move, except that we
2804    count insns rather than emit them.  */
2805 
2806 static int
compute_clrmem_length(rtx insn)2807 compute_clrmem_length (rtx insn)
2808 {
2809   rtx pat = PATTERN (insn);
2810   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2811   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2812   unsigned int n_insns = 0;
2813 
2814   /* We can't clear more than a word at a time because the PA
2815      has no longer integer move insns.  */
2816   if (align > (TARGET_64BIT ? 8 : 4))
2817     align = (TARGET_64BIT ? 8 : 4);
2818 
2819   /* The basic loop.  */
2820   n_insns = 4;
2821 
2822   /* Residuals.  */
2823   if (n_bytes % (2 * align) != 0)
2824     {
2825       if ((n_bytes % (2 * align)) >= align)
2826 	n_insns++;
2827 
2828       if ((n_bytes % align) != 0)
2829 	n_insns++;
2830     }
2831 
2832   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2833   return n_insns * 4;
2834 }
2835 
2836 
2837 const char *
output_and(rtx * operands)2838 output_and (rtx *operands)
2839 {
2840   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2841     {
2842       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2843       int ls0, ls1, ms0, p, len;
2844 
2845       for (ls0 = 0; ls0 < 32; ls0++)
2846 	if ((mask & (1 << ls0)) == 0)
2847 	  break;
2848 
2849       for (ls1 = ls0; ls1 < 32; ls1++)
2850 	if ((mask & (1 << ls1)) != 0)
2851 	  break;
2852 
2853       for (ms0 = ls1; ms0 < 32; ms0++)
2854 	if ((mask & (1 << ms0)) == 0)
2855 	  break;
2856 
2857       gcc_assert (ms0 == 32);
2858 
2859       if (ls1 == 32)
2860 	{
2861 	  len = ls0;
2862 
2863 	  gcc_assert (len);
2864 
2865 	  operands[2] = GEN_INT (len);
2866 	  return "{extru|extrw,u} %1,31,%2,%0";
2867 	}
2868       else
2869 	{
2870 	  /* We could use this `depi' for the case above as well, but `depi'
2871 	     requires one more register file access than an `extru'.  */
2872 
2873 	  p = 31 - ls0;
2874 	  len = ls1 - ls0;
2875 
2876 	  operands[2] = GEN_INT (p);
2877 	  operands[3] = GEN_INT (len);
2878 	  return "{depi|depwi} 0,%2,%3,%0";
2879 	}
2880     }
2881   else
2882     return "and %1,%2,%0";
2883 }
2884 
2885 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2886    storing the result in operands[0].  */
2887 const char *
output_64bit_and(rtx * operands)2888 output_64bit_and (rtx *operands)
2889 {
2890   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2891     {
2892       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2893       int ls0, ls1, ms0, p, len;
2894 
2895       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2896 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2897 	  break;
2898 
2899       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2900 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2901 	  break;
2902 
2903       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2904 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2905 	  break;
2906 
2907       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2908 
2909       if (ls1 == HOST_BITS_PER_WIDE_INT)
2910 	{
2911 	  len = ls0;
2912 
2913 	  gcc_assert (len);
2914 
2915 	  operands[2] = GEN_INT (len);
2916 	  return "extrd,u %1,63,%2,%0";
2917 	}
2918       else
2919 	{
2920 	  /* We could use this `depi' for the case above as well, but `depi'
2921 	     requires one more register file access than an `extru'.  */
2922 
2923 	  p = 63 - ls0;
2924 	  len = ls1 - ls0;
2925 
2926 	  operands[2] = GEN_INT (p);
2927 	  operands[3] = GEN_INT (len);
2928 	  return "depdi 0,%2,%3,%0";
2929 	}
2930     }
2931   else
2932     return "and %1,%2,%0";
2933 }
2934 
2935 const char *
output_ior(rtx * operands)2936 output_ior (rtx *operands)
2937 {
2938   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2939   int bs0, bs1, p, len;
2940 
2941   if (INTVAL (operands[2]) == 0)
2942     return "copy %1,%0";
2943 
2944   for (bs0 = 0; bs0 < 32; bs0++)
2945     if ((mask & (1 << bs0)) != 0)
2946       break;
2947 
2948   for (bs1 = bs0; bs1 < 32; bs1++)
2949     if ((mask & (1 << bs1)) == 0)
2950       break;
2951 
2952   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2953 
2954   p = 31 - bs0;
2955   len = bs1 - bs0;
2956 
2957   operands[2] = GEN_INT (p);
2958   operands[3] = GEN_INT (len);
2959   return "{depi|depwi} -1,%2,%3,%0";
2960 }
2961 
2962 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
2963    storing the result in operands[0].  */
2964 const char *
output_64bit_ior(rtx * operands)2965 output_64bit_ior (rtx *operands)
2966 {
2967   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2968   int bs0, bs1, p, len;
2969 
2970   if (INTVAL (operands[2]) == 0)
2971     return "copy %1,%0";
2972 
2973   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2974     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2975       break;
2976 
2977   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2978     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2979       break;
2980 
2981   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
2982 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2983 
2984   p = 63 - bs0;
2985   len = bs1 - bs0;
2986 
2987   operands[2] = GEN_INT (p);
2988   operands[3] = GEN_INT (len);
2989   return "depdi -1,%2,%3,%0";
2990 }
2991 
2992 /* Target hook for assembling integer objects.  This code handles
2993    aligned SI and DI integers specially since function references
2994    must be preceded by P%.  */
2995 
2996 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)2997 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
2998 {
2999   if (size == UNITS_PER_WORD
3000       && aligned_p
3001       && function_label_operand (x, VOIDmode))
3002     {
3003       fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3004       output_addr_const (asm_out_file, x);
3005       fputc ('\n', asm_out_file);
3006       return true;
3007     }
3008   return default_assemble_integer (x, size, aligned_p);
3009 }
3010 
3011 /* Output an ascii string.  */
3012 void
output_ascii(FILE * file,const char * p,int size)3013 output_ascii (FILE *file, const char *p, int size)
3014 {
3015   int i;
3016   int chars_output;
3017   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3018 
3019   /* The HP assembler can only take strings of 256 characters at one
3020      time.  This is a limitation on input line length, *not* the
3021      length of the string.  Sigh.  Even worse, it seems that the
3022      restriction is in number of input characters (see \xnn &
3023      \whatever).  So we have to do this very carefully.  */
3024 
3025   fputs ("\t.STRING \"", file);
3026 
3027   chars_output = 0;
3028   for (i = 0; i < size; i += 4)
3029     {
3030       int co = 0;
3031       int io = 0;
3032       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3033 	{
3034 	  register unsigned int c = (unsigned char) p[i + io];
3035 
3036 	  if (c == '\"' || c == '\\')
3037 	    partial_output[co++] = '\\';
3038 	  if (c >= ' ' && c < 0177)
3039 	    partial_output[co++] = c;
3040 	  else
3041 	    {
3042 	      unsigned int hexd;
3043 	      partial_output[co++] = '\\';
3044 	      partial_output[co++] = 'x';
3045 	      hexd =  c  / 16 - 0 + '0';
3046 	      if (hexd > '9')
3047 		hexd -= '9' - 'a' + 1;
3048 	      partial_output[co++] = hexd;
3049 	      hexd =  c % 16 - 0 + '0';
3050 	      if (hexd > '9')
3051 		hexd -= '9' - 'a' + 1;
3052 	      partial_output[co++] = hexd;
3053 	    }
3054 	}
3055       if (chars_output + co > 243)
3056 	{
3057 	  fputs ("\"\n\t.STRING \"", file);
3058 	  chars_output = 0;
3059 	}
3060       fwrite (partial_output, 1, (size_t) co, file);
3061       chars_output += co;
3062       co = 0;
3063     }
3064   fputs ("\"\n", file);
3065 }
3066 
3067 /* Try to rewrite floating point comparisons & branches to avoid
3068    useless add,tr insns.
3069 
3070    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3071    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3072    first attempt to remove useless add,tr insns.  It is zero
3073    for the second pass as reorg sometimes leaves bogus REG_DEAD
3074    notes lying around.
3075 
3076    When CHECK_NOTES is zero we can only eliminate add,tr insns
3077    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3078    instructions.  */
3079 static void
remove_useless_addtr_insns(int check_notes)3080 remove_useless_addtr_insns (int check_notes)
3081 {
3082   rtx insn;
3083   static int pass = 0;
3084 
3085   /* This is fairly cheap, so always run it when optimizing.  */
3086   if (optimize > 0)
3087     {
3088       int fcmp_count = 0;
3089       int fbranch_count = 0;
3090 
3091       /* Walk all the insns in this function looking for fcmp & fbranch
3092 	 instructions.  Keep track of how many of each we find.  */
3093       for (insn = get_insns (); insn; insn = next_insn (insn))
3094 	{
3095 	  rtx tmp;
3096 
3097 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3098 	  if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3099 	    continue;
3100 
3101 	  tmp = PATTERN (insn);
3102 
3103 	  /* It must be a set.  */
3104 	  if (GET_CODE (tmp) != SET)
3105 	    continue;
3106 
3107 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3108 	  tmp = SET_DEST (tmp);
3109 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3110 	    {
3111 	      fcmp_count++;
3112 	      continue;
3113 	    }
3114 
3115 	  tmp = PATTERN (insn);
3116 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3117 	  if (GET_CODE (tmp) == SET
3118 	      && SET_DEST (tmp) == pc_rtx
3119 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3120 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3121 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3122 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3123 	    {
3124 	      fbranch_count++;
3125 	      continue;
3126 	    }
3127 	}
3128 
3129 
3130       /* Find all floating point compare + branch insns.  If possible,
3131 	 reverse the comparison & the branch to avoid add,tr insns.  */
3132       for (insn = get_insns (); insn; insn = next_insn (insn))
3133 	{
3134 	  rtx tmp, next;
3135 
3136 	  /* Ignore anything that isn't an INSN.  */
3137 	  if (GET_CODE (insn) != INSN)
3138 	    continue;
3139 
3140 	  tmp = PATTERN (insn);
3141 
3142 	  /* It must be a set.  */
3143 	  if (GET_CODE (tmp) != SET)
3144 	    continue;
3145 
3146 	  /* The destination must be CCFP, which is register zero.  */
3147 	  tmp = SET_DEST (tmp);
3148 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3149 	    continue;
3150 
3151 	  /* INSN should be a set of CCFP.
3152 
3153 	     See if the result of this insn is used in a reversed FP
3154 	     conditional branch.  If so, reverse our condition and
3155 	     the branch.  Doing so avoids useless add,tr insns.  */
3156 	  next = next_insn (insn);
3157 	  while (next)
3158 	    {
3159 	      /* Jumps, calls and labels stop our search.  */
3160 	      if (GET_CODE (next) == JUMP_INSN
3161 		  || GET_CODE (next) == CALL_INSN
3162 		  || GET_CODE (next) == CODE_LABEL)
3163 		break;
3164 
3165 	      /* As does another fcmp insn.  */
3166 	      if (GET_CODE (next) == INSN
3167 		  && GET_CODE (PATTERN (next)) == SET
3168 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3169 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3170 		break;
3171 
3172 	      next = next_insn (next);
3173 	    }
3174 
3175 	  /* Is NEXT_INSN a branch?  */
3176 	  if (next
3177 	      && GET_CODE (next) == JUMP_INSN)
3178 	    {
3179 	      rtx pattern = PATTERN (next);
3180 
3181 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3182 		 and CCFP dies, then reverse our conditional and the branch
3183 		 to avoid the add,tr.  */
3184 	      if (GET_CODE (pattern) == SET
3185 		  && SET_DEST (pattern) == pc_rtx
3186 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3187 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3188 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3189 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3190 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3191 		  && (fcmp_count == fbranch_count
3192 		      || (check_notes
3193 			  && find_regno_note (next, REG_DEAD, 0))))
3194 		{
3195 		  /* Reverse the branch.  */
3196 		  tmp = XEXP (SET_SRC (pattern), 1);
3197 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3198 		  XEXP (SET_SRC (pattern), 2) = tmp;
3199 		  INSN_CODE (next) = -1;
3200 
3201 		  /* Reverse our condition.  */
3202 		  tmp = PATTERN (insn);
3203 		  PUT_CODE (XEXP (tmp, 1),
3204 			    (reverse_condition_maybe_unordered
3205 			     (GET_CODE (XEXP (tmp, 1)))));
3206 		}
3207 	    }
3208 	}
3209     }
3210 
3211   pass = !pass;
3212 
3213 }
3214 
3215 /* You may have trouble believing this, but this is the 32 bit HP-PA
3216    stack layout.  Wow.
3217 
3218    Offset		Contents
3219 
3220    Variable arguments	(optional; any number may be allocated)
3221 
3222    SP-(4*(N+9))		arg word N
3223    	:		    :
3224       SP-56		arg word 5
3225       SP-52		arg word 4
3226 
3227    Fixed arguments	(must be allocated; may remain unused)
3228 
3229       SP-48		arg word 3
3230       SP-44		arg word 2
3231       SP-40		arg word 1
3232       SP-36		arg word 0
3233 
3234    Frame Marker
3235 
3236       SP-32		External Data Pointer (DP)
3237       SP-28		External sr4
3238       SP-24		External/stub RP (RP')
3239       SP-20		Current RP
3240       SP-16		Static Link
3241       SP-12		Clean up
3242       SP-8		Calling Stub RP (RP'')
3243       SP-4		Previous SP
3244 
3245    Top of Frame
3246 
3247       SP-0		Stack Pointer (points to next available address)
3248 
3249 */
3250 
3251 /* This function saves registers as follows.  Registers marked with ' are
3252    this function's registers (as opposed to the previous function's).
3253    If a frame_pointer isn't needed, r4 is saved as a general register;
3254    the space for the frame pointer is still allocated, though, to keep
3255    things simple.
3256 
3257 
3258    Top of Frame
3259 
3260        SP (FP')		Previous FP
3261        SP + 4		Alignment filler (sigh)
3262        SP + 8		Space for locals reserved here.
3263        .
3264        .
3265        .
3266        SP + n		All call saved register used.
3267        .
3268        .
3269        .
3270        SP + o		All call saved fp registers used.
3271        .
3272        .
3273        .
3274        SP + p (SP')	points to next available address.
3275 
3276 */
3277 
3278 /* Global variables set by output_function_prologue().  */
3279 /* Size of frame.  Need to know this to emit return insns from
3280    leaf procedures.  */
3281 static HOST_WIDE_INT actual_fsize, local_fsize;
3282 static int save_fregs;
3283 
3284 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3285    Handle case where DISP > 8k by using the add_high_const patterns.
3286 
3287    Note in DISP > 8k case, we will leave the high part of the address
3288    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3289 
3290 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3291 store_reg (int reg, HOST_WIDE_INT disp, int base)
3292 {
3293   rtx insn, dest, src, basereg;
3294 
3295   src = gen_rtx_REG (word_mode, reg);
3296   basereg = gen_rtx_REG (Pmode, base);
3297   if (VAL_14_BITS_P (disp))
3298     {
3299       dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3300       insn = emit_move_insn (dest, src);
3301     }
3302   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3303     {
3304       rtx delta = GEN_INT (disp);
3305       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3306 
3307       emit_move_insn (tmpreg, delta);
3308       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3309       if (DO_FRAME_NOTES)
3310 	{
3311 	  REG_NOTES (insn)
3312 	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3313 		gen_rtx_SET (VOIDmode, tmpreg,
3314 			     gen_rtx_PLUS (Pmode, basereg, delta)),
3315                 REG_NOTES (insn));
3316 	  RTX_FRAME_RELATED_P (insn) = 1;
3317 	}
3318       dest = gen_rtx_MEM (word_mode, tmpreg);
3319       insn = emit_move_insn (dest, src);
3320     }
3321   else
3322     {
3323       rtx delta = GEN_INT (disp);
3324       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3325       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3326 
3327       emit_move_insn (tmpreg, high);
3328       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3329       insn = emit_move_insn (dest, src);
3330       if (DO_FRAME_NOTES)
3331 	{
3332 	  REG_NOTES (insn)
3333 	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3334 		gen_rtx_SET (VOIDmode,
3335 			     gen_rtx_MEM (word_mode,
3336 					  gen_rtx_PLUS (word_mode, basereg,
3337 							delta)),
3338                              src),
3339                 REG_NOTES (insn));
3340 	}
3341     }
3342 
3343   if (DO_FRAME_NOTES)
3344     RTX_FRAME_RELATED_P (insn) = 1;
3345 }
3346 
3347 /* Emit RTL to store REG at the memory location specified by BASE and then
3348    add MOD to BASE.  MOD must be <= 8k.  */
3349 
3350 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3351 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3352 {
3353   rtx insn, basereg, srcreg, delta;
3354 
3355   gcc_assert (VAL_14_BITS_P (mod));
3356 
3357   basereg = gen_rtx_REG (Pmode, base);
3358   srcreg = gen_rtx_REG (word_mode, reg);
3359   delta = GEN_INT (mod);
3360 
3361   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3362   if (DO_FRAME_NOTES)
3363     {
3364       RTX_FRAME_RELATED_P (insn) = 1;
3365 
3366       /* RTX_FRAME_RELATED_P must be set on each frame related set
3367 	 in a parallel with more than one element.  */
3368       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3369       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3370     }
3371 }
3372 
3373 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3374    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3375    whether to add a frame note or not.
3376 
3377    In the DISP > 8k case, we leave the high part of the address in %r1.
3378    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3379 
3380 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3381 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3382 {
3383   rtx insn;
3384 
3385   if (VAL_14_BITS_P (disp))
3386     {
3387       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3388 			     plus_constant (gen_rtx_REG (Pmode, base), disp));
3389     }
3390   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3391     {
3392       rtx basereg = gen_rtx_REG (Pmode, base);
3393       rtx delta = GEN_INT (disp);
3394       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3395 
3396       emit_move_insn (tmpreg, delta);
3397       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3398 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3399       if (DO_FRAME_NOTES)
3400 	REG_NOTES (insn)
3401 	  = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3402 	      gen_rtx_SET (VOIDmode, tmpreg,
3403 			   gen_rtx_PLUS (Pmode, basereg, delta)),
3404 	      REG_NOTES (insn));
3405     }
3406   else
3407     {
3408       rtx basereg = gen_rtx_REG (Pmode, base);
3409       rtx delta = GEN_INT (disp);
3410       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3411 
3412       emit_move_insn (tmpreg,
3413 		      gen_rtx_PLUS (Pmode, basereg,
3414 				    gen_rtx_HIGH (Pmode, delta)));
3415       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3416 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3417     }
3418 
3419   if (DO_FRAME_NOTES && note)
3420     RTX_FRAME_RELATED_P (insn) = 1;
3421 }
3422 
3423 HOST_WIDE_INT
compute_frame_size(HOST_WIDE_INT size,int * fregs_live)3424 compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3425 {
3426   int freg_saved = 0;
3427   int i, j;
3428 
3429   /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3430      be consistent with the rounding and size calculation done here.
3431      Change them at the same time.  */
3432 
3433   /* We do our own stack alignment.  First, round the size of the
3434      stack locals up to a word boundary.  */
3435   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3436 
3437   /* Space for previous frame pointer + filler.  If any frame is
3438      allocated, we need to add in the STARTING_FRAME_OFFSET.  We
3439      waste some space here for the sake of HP compatibility.  The
3440      first slot is only used when the frame pointer is needed.  */
3441   if (size || frame_pointer_needed)
3442     size += STARTING_FRAME_OFFSET;
3443 
3444   /* If the current function calls __builtin_eh_return, then we need
3445      to allocate stack space for registers that will hold data for
3446      the exception handler.  */
3447   if (DO_FRAME_NOTES && current_function_calls_eh_return)
3448     {
3449       unsigned int i;
3450 
3451       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3452 	continue;
3453       size += i * UNITS_PER_WORD;
3454     }
3455 
3456   /* Account for space used by the callee general register saves.  */
3457   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3458     if (regs_ever_live[i])
3459       size += UNITS_PER_WORD;
3460 
3461   /* Account for space used by the callee floating point register saves.  */
3462   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3463     if (regs_ever_live[i]
3464 	|| (!TARGET_64BIT && regs_ever_live[i + 1]))
3465       {
3466 	freg_saved = 1;
3467 
3468 	/* We always save both halves of the FP register, so always
3469 	   increment the frame size by 8 bytes.  */
3470 	size += 8;
3471       }
3472 
3473   /* If any of the floating registers are saved, account for the
3474      alignment needed for the floating point register save block.  */
3475   if (freg_saved)
3476     {
3477       size = (size + 7) & ~7;
3478       if (fregs_live)
3479 	*fregs_live = 1;
3480     }
3481 
3482   /* The various ABIs include space for the outgoing parameters in the
3483      size of the current function's stack frame.  We don't need to align
3484      for the outgoing arguments as their alignment is set by the final
3485      rounding for the frame as a whole.  */
3486   size += current_function_outgoing_args_size;
3487 
3488   /* Allocate space for the fixed frame marker.  This space must be
3489      allocated for any function that makes calls or allocates
3490      stack space.  */
3491   if (!current_function_is_leaf || size)
3492     size += TARGET_64BIT ? 48 : 32;
3493 
3494   /* Finally, round to the preferred stack boundary.  */
3495   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3496 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3497 }
3498 
3499 /* Generate the assembly code for function entry.  FILE is a stdio
3500    stream to output the code to.  SIZE is an int: how many units of
3501    temporary storage to allocate.
3502 
3503    Refer to the array `regs_ever_live' to determine which registers to
3504    save; `regs_ever_live[I]' is nonzero if register number I is ever
3505    used in the function.  This function is responsible for knowing
3506    which registers should not be saved even if used.  */
3507 
3508 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3509    of memory.  If any fpu reg is used in the function, we allocate
3510    such a block here, at the bottom of the frame, just in case it's needed.
3511 
3512    If this function is a leaf procedure, then we may choose not
3513    to do a "save" insn.  The decision about whether or not
3514    to do this is made in regclass.c.  */
3515 
3516 static void
pa_output_function_prologue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)3517 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3518 {
3519   /* The function's label and associated .PROC must never be
3520      separated and must be output *after* any profiling declarations
3521      to avoid changing spaces/subspaces within a procedure.  */
3522   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3523   fputs ("\t.PROC\n", file);
3524 
3525   /* hppa_expand_prologue does the dirty work now.  We just need
3526      to output the assembler directives which denote the start
3527      of a function.  */
3528   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3529   if (regs_ever_live[2])
3530     fputs (",CALLS,SAVE_RP", file);
3531   else
3532     fputs (",NO_CALLS", file);
3533 
3534   /* The SAVE_SP flag is used to indicate that register %r3 is stored
3535      at the beginning of the frame and that it is used as the frame
3536      pointer for the frame.  We do this because our current frame
3537      layout doesn't conform to that specified in the HP runtime
3538      documentation and we need a way to indicate to programs such as
3539      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3540      isn't used by HP compilers but is supported by the assembler.
3541      However, SAVE_SP is supposed to indicate that the previous stack
3542      pointer has been saved in the frame marker.  */
3543   if (frame_pointer_needed)
3544     fputs (",SAVE_SP", file);
3545 
3546   /* Pass on information about the number of callee register saves
3547      performed in the prologue.
3548 
3549      The compiler is supposed to pass the highest register number
3550      saved, the assembler then has to adjust that number before
3551      entering it into the unwind descriptor (to account for any
3552      caller saved registers with lower register numbers than the
3553      first callee saved register).  */
3554   if (gr_saved)
3555     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3556 
3557   if (fr_saved)
3558     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3559 
3560   fputs ("\n\t.ENTRY\n", file);
3561 
3562   remove_useless_addtr_insns (0);
3563 }
3564 
3565 void
hppa_expand_prologue(void)3566 hppa_expand_prologue (void)
3567 {
3568   int merge_sp_adjust_with_store = 0;
3569   HOST_WIDE_INT size = get_frame_size ();
3570   HOST_WIDE_INT offset;
3571   int i;
3572   rtx insn, tmpreg;
3573 
3574   gr_saved = 0;
3575   fr_saved = 0;
3576   save_fregs = 0;
3577 
3578   /* Compute total size for frame pointer, filler, locals and rounding to
3579      the next word boundary.  Similar code appears in compute_frame_size
3580      and must be changed in tandem with this code.  */
3581   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3582   if (local_fsize || frame_pointer_needed)
3583     local_fsize += STARTING_FRAME_OFFSET;
3584 
3585   actual_fsize = compute_frame_size (size, &save_fregs);
3586 
3587   if (warn_stack_larger_than && actual_fsize > stack_larger_than_size)
3588     warning (0, "stack usage is %d bytes", actual_fsize);
3589 
3590   /* Compute a few things we will use often.  */
3591   tmpreg = gen_rtx_REG (word_mode, 1);
3592 
3593   /* Save RP first.  The calling conventions manual states RP will
3594      always be stored into the caller's frame at sp - 20 or sp - 16
3595      depending on which ABI is in use.  */
3596   if (regs_ever_live[2] || current_function_calls_eh_return)
3597     store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3598 
3599   /* Allocate the local frame and set up the frame pointer if needed.  */
3600   if (actual_fsize != 0)
3601     {
3602       if (frame_pointer_needed)
3603 	{
3604 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
3605 	     new stack pointer, then store away the saved old frame pointer
3606 	     into the stack at sp and at the same time update the stack
3607 	     pointer by actual_fsize bytes.  Two versions, first
3608 	     handles small (<8k) frames.  The second handles large (>=8k)
3609 	     frames.  */
3610 	  insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3611 	  if (DO_FRAME_NOTES)
3612 	    RTX_FRAME_RELATED_P (insn) = 1;
3613 
3614 	  insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3615 	  if (DO_FRAME_NOTES)
3616 	    RTX_FRAME_RELATED_P (insn) = 1;
3617 
3618 	  if (VAL_14_BITS_P (actual_fsize))
3619 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3620 	  else
3621 	    {
3622 	      /* It is incorrect to store the saved frame pointer at *sp,
3623 		 then increment sp (writes beyond the current stack boundary).
3624 
3625 		 So instead use stwm to store at *sp and post-increment the
3626 		 stack pointer as an atomic operation.  Then increment sp to
3627 		 finish allocating the new frame.  */
3628 	      HOST_WIDE_INT adjust1 = 8192 - 64;
3629 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3630 
3631 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3632 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3633 			      adjust2, 1);
3634 	    }
3635 
3636 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3637 	     we need to store the previous stack pointer (frame pointer)
3638 	     into the frame marker on targets that use the HP unwind
3639 	     library.  This allows the HP unwind library to be used to
3640 	     unwind GCC frames.  However, we are not fully compatible
3641 	     with the HP library because our frame layout differs from
3642 	     that specified in the HP runtime specification.
3643 
3644 	     We don't want a frame note on this instruction as the frame
3645 	     marker moves during dynamic stack allocation.
3646 
3647 	     This instruction also serves as a blockage to prevent
3648 	     register spills from being scheduled before the stack
3649 	     pointer is raised.  This is necessary as we store
3650 	     registers using the frame pointer as a base register,
3651 	     and the frame pointer is set before sp is raised.  */
3652 	  if (TARGET_HPUX_UNWIND_LIBRARY)
3653 	    {
3654 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3655 				       GEN_INT (TARGET_64BIT ? -8 : -4));
3656 
3657 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
3658 			      frame_pointer_rtx);
3659 	    }
3660 	  else
3661 	    emit_insn (gen_blockage ());
3662 	}
3663       /* no frame pointer needed.  */
3664       else
3665 	{
3666 	  /* In some cases we can perform the first callee register save
3667 	     and allocating the stack frame at the same time.   If so, just
3668 	     make a note of it and defer allocating the frame until saving
3669 	     the callee registers.  */
3670 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3671 	    merge_sp_adjust_with_store = 1;
3672 	  /* Can not optimize.  Adjust the stack frame by actual_fsize
3673 	     bytes.  */
3674 	  else
3675 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3676 			    actual_fsize, 1);
3677 	}
3678     }
3679 
3680   /* Normal register save.
3681 
3682      Do not save the frame pointer in the frame_pointer_needed case.  It
3683      was done earlier.  */
3684   if (frame_pointer_needed)
3685     {
3686       offset = local_fsize;
3687 
3688       /* Saving the EH return data registers in the frame is the simplest
3689 	 way to get the frame unwind information emitted.  We put them
3690 	 just before the general registers.  */
3691       if (DO_FRAME_NOTES && current_function_calls_eh_return)
3692 	{
3693 	  unsigned int i, regno;
3694 
3695 	  for (i = 0; ; ++i)
3696 	    {
3697 	      regno = EH_RETURN_DATA_REGNO (i);
3698 	      if (regno == INVALID_REGNUM)
3699 		break;
3700 
3701 	      store_reg (regno, offset, FRAME_POINTER_REGNUM);
3702 	      offset += UNITS_PER_WORD;
3703 	    }
3704 	}
3705 
3706       for (i = 18; i >= 4; i--)
3707 	if (regs_ever_live[i] && ! call_used_regs[i])
3708 	  {
3709 	    store_reg (i, offset, FRAME_POINTER_REGNUM);
3710 	    offset += UNITS_PER_WORD;
3711 	    gr_saved++;
3712 	  }
3713       /* Account for %r3 which is saved in a special place.  */
3714       gr_saved++;
3715     }
3716   /* No frame pointer needed.  */
3717   else
3718     {
3719       offset = local_fsize - actual_fsize;
3720 
3721       /* Saving the EH return data registers in the frame is the simplest
3722          way to get the frame unwind information emitted.  */
3723       if (DO_FRAME_NOTES && current_function_calls_eh_return)
3724 	{
3725 	  unsigned int i, regno;
3726 
3727 	  for (i = 0; ; ++i)
3728 	    {
3729 	      regno = EH_RETURN_DATA_REGNO (i);
3730 	      if (regno == INVALID_REGNUM)
3731 		break;
3732 
3733 	      /* If merge_sp_adjust_with_store is nonzero, then we can
3734 		 optimize the first save.  */
3735 	      if (merge_sp_adjust_with_store)
3736 		{
3737 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3738 		  merge_sp_adjust_with_store = 0;
3739 		}
3740 	      else
3741 		store_reg (regno, offset, STACK_POINTER_REGNUM);
3742 	      offset += UNITS_PER_WORD;
3743 	    }
3744 	}
3745 
3746       for (i = 18; i >= 3; i--)
3747       	if (regs_ever_live[i] && ! call_used_regs[i])
3748 	  {
3749 	    /* If merge_sp_adjust_with_store is nonzero, then we can
3750 	       optimize the first GR save.  */
3751 	    if (merge_sp_adjust_with_store)
3752 	      {
3753 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3754 		merge_sp_adjust_with_store = 0;
3755 	      }
3756 	    else
3757 	      store_reg (i, offset, STACK_POINTER_REGNUM);
3758 	    offset += UNITS_PER_WORD;
3759 	    gr_saved++;
3760 	  }
3761 
3762       /* If we wanted to merge the SP adjustment with a GR save, but we never
3763 	 did any GR saves, then just emit the adjustment here.  */
3764       if (merge_sp_adjust_with_store)
3765 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3766 			actual_fsize, 1);
3767     }
3768 
3769   /* The hppa calling conventions say that %r19, the pic offset
3770      register, is saved at sp - 32 (in this function's frame)
3771      when generating PIC code.  FIXME:  What is the correct thing
3772      to do for functions which make no calls and allocate no
3773      frame?  Do we need to allocate a frame, or can we just omit
3774      the save?   For now we'll just omit the save.
3775 
3776      We don't want a note on this insn as the frame marker can
3777      move if there is a dynamic stack allocation.  */
3778   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3779     {
3780       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3781 
3782       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3783 
3784     }
3785 
3786   /* Align pointer properly (doubleword boundary).  */
3787   offset = (offset + 7) & ~7;
3788 
3789   /* Floating point register store.  */
3790   if (save_fregs)
3791     {
3792       rtx base;
3793 
3794       /* First get the frame or stack pointer to the start of the FP register
3795 	 save area.  */
3796       if (frame_pointer_needed)
3797 	{
3798 	  set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3799 	  base = frame_pointer_rtx;
3800 	}
3801       else
3802 	{
3803 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3804 	  base = stack_pointer_rtx;
3805 	}
3806 
3807       /* Now actually save the FP registers.  */
3808       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3809 	{
3810 	  if (regs_ever_live[i]
3811 	      || (! TARGET_64BIT && regs_ever_live[i + 1]))
3812 	    {
3813 	      rtx addr, insn, reg;
3814 	      addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3815 	      reg = gen_rtx_REG (DFmode, i);
3816 	      insn = emit_move_insn (addr, reg);
3817 	      if (DO_FRAME_NOTES)
3818 		{
3819 		  RTX_FRAME_RELATED_P (insn) = 1;
3820 		  if (TARGET_64BIT)
3821 		    {
3822 		      rtx mem = gen_rtx_MEM (DFmode,
3823 					     plus_constant (base, offset));
3824 		      REG_NOTES (insn)
3825 			= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3826 					     gen_rtx_SET (VOIDmode, mem, reg),
3827 					     REG_NOTES (insn));
3828 		    }
3829 		  else
3830 		    {
3831 		      rtx meml = gen_rtx_MEM (SFmode,
3832 					      plus_constant (base, offset));
3833 		      rtx memr = gen_rtx_MEM (SFmode,
3834 					      plus_constant (base, offset + 4));
3835 		      rtx regl = gen_rtx_REG (SFmode, i);
3836 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
3837 		      rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3838 		      rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3839 		      rtvec vec;
3840 
3841 		      RTX_FRAME_RELATED_P (setl) = 1;
3842 		      RTX_FRAME_RELATED_P (setr) = 1;
3843 		      vec = gen_rtvec (2, setl, setr);
3844 		      REG_NOTES (insn)
3845 			= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3846 					     gen_rtx_SEQUENCE (VOIDmode, vec),
3847 					     REG_NOTES (insn));
3848 		    }
3849 		}
3850 	      offset += GET_MODE_SIZE (DFmode);
3851 	      fr_saved++;
3852 	    }
3853 	}
3854     }
3855 }
3856 
3857 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
3858    Handle case where DISP > 8k by using the add_high_const patterns.  */
3859 
3860 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)3861 load_reg (int reg, HOST_WIDE_INT disp, int base)
3862 {
3863   rtx dest = gen_rtx_REG (word_mode, reg);
3864   rtx basereg = gen_rtx_REG (Pmode, base);
3865   rtx src;
3866 
3867   if (VAL_14_BITS_P (disp))
3868     src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3869   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3870     {
3871       rtx delta = GEN_INT (disp);
3872       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3873 
3874       emit_move_insn (tmpreg, delta);
3875       if (TARGET_DISABLE_INDEXING)
3876 	{
3877 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3878 	  src = gen_rtx_MEM (word_mode, tmpreg);
3879 	}
3880       else
3881 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3882     }
3883   else
3884     {
3885       rtx delta = GEN_INT (disp);
3886       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3887       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3888 
3889       emit_move_insn (tmpreg, high);
3890       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3891     }
3892 
3893   emit_move_insn (dest, src);
3894 }
3895 
3896 /* Update the total code bytes output to the text section.  */
3897 
3898 static void
update_total_code_bytes(int nbytes)3899 update_total_code_bytes (int nbytes)
3900 {
3901   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3902       && !IN_NAMED_SECTION_P (cfun->decl))
3903     {
3904       if (INSN_ADDRESSES_SET_P ())
3905 	{
3906 	  unsigned long old_total = total_code_bytes;
3907 
3908 	  total_code_bytes += nbytes;
3909 
3910 	  /* Be prepared to handle overflows.  */
3911 	  if (old_total > total_code_bytes)
3912 	    total_code_bytes = -1;
3913 	}
3914       else
3915 	total_code_bytes = -1;
3916     }
3917 }
3918 
3919 /* This function generates the assembly code for function exit.
3920    Args are as for output_function_prologue ().
3921 
3922    The function epilogue should not depend on the current stack
3923    pointer!  It should use the frame pointer only.  This is mandatory
3924    because of alloca; we also take advantage of it to omit stack
3925    adjustments before returning.  */
3926 
3927 static void
pa_output_function_epilogue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)3928 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3929 {
3930   rtx insn = get_last_insn ();
3931 
3932   last_address = 0;
3933 
3934   /* hppa_expand_epilogue does the dirty work now.  We just need
3935      to output the assembler directives which denote the end
3936      of a function.
3937 
3938      To make debuggers happy, emit a nop if the epilogue was completely
3939      eliminated due to a volatile call as the last insn in the
3940      current function.  That way the return address (in %r2) will
3941      always point to a valid instruction in the current function.  */
3942 
3943   /* Get the last real insn.  */
3944   if (GET_CODE (insn) == NOTE)
3945     insn = prev_real_insn (insn);
3946 
3947   /* If it is a sequence, then look inside.  */
3948   if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3949     insn = XVECEXP (PATTERN (insn), 0, 0);
3950 
3951   /* If insn is a CALL_INSN, then it must be a call to a volatile
3952      function (otherwise there would be epilogue insns).  */
3953   if (insn && GET_CODE (insn) == CALL_INSN)
3954     {
3955       fputs ("\tnop\n", file);
3956       last_address += 4;
3957     }
3958 
3959   fputs ("\t.EXIT\n\t.PROCEND\n", file);
3960 
3961   if (TARGET_SOM && TARGET_GAS)
3962     {
3963       /* We done with this subspace except possibly for some additional
3964 	 debug information.  Forget that we are in this subspace to ensure
3965 	 that the next function is output in its own subspace.  */
3966       in_section = NULL;
3967       cfun->machine->in_nsubspa = 2;
3968     }
3969 
3970   if (INSN_ADDRESSES_SET_P ())
3971     {
3972       insn = get_last_nonnote_insn ();
3973       last_address += INSN_ADDRESSES (INSN_UID (insn));
3974       if (INSN_P (insn))
3975 	last_address += insn_default_length (insn);
3976       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
3977 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
3978     }
3979 
3980   /* Finally, update the total number of code bytes output so far.  */
3981   update_total_code_bytes (last_address);
3982 }
3983 
3984 void
hppa_expand_epilogue(void)3985 hppa_expand_epilogue (void)
3986 {
3987   rtx tmpreg;
3988   HOST_WIDE_INT offset;
3989   HOST_WIDE_INT ret_off = 0;
3990   int i;
3991   int merge_sp_adjust_with_load = 0;
3992 
3993   /* We will use this often.  */
3994   tmpreg = gen_rtx_REG (word_mode, 1);
3995 
3996   /* Try to restore RP early to avoid load/use interlocks when
3997      RP gets used in the return (bv) instruction.  This appears to still
3998      be necessary even when we schedule the prologue and epilogue.  */
3999   if (regs_ever_live [2] || current_function_calls_eh_return)
4000     {
4001       ret_off = TARGET_64BIT ? -16 : -20;
4002       if (frame_pointer_needed)
4003 	{
4004 	  load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4005 	  ret_off = 0;
4006 	}
4007       else
4008 	{
4009 	  /* No frame pointer, and stack is smaller than 8k.  */
4010 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4011 	    {
4012 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4013 	      ret_off = 0;
4014 	    }
4015 	}
4016     }
4017 
4018   /* General register restores.  */
4019   if (frame_pointer_needed)
4020     {
4021       offset = local_fsize;
4022 
4023       /* If the current function calls __builtin_eh_return, then we need
4024          to restore the saved EH data registers.  */
4025       if (DO_FRAME_NOTES && current_function_calls_eh_return)
4026 	{
4027 	  unsigned int i, regno;
4028 
4029 	  for (i = 0; ; ++i)
4030 	    {
4031 	      regno = EH_RETURN_DATA_REGNO (i);
4032 	      if (regno == INVALID_REGNUM)
4033 		break;
4034 
4035 	      load_reg (regno, offset, FRAME_POINTER_REGNUM);
4036 	      offset += UNITS_PER_WORD;
4037 	    }
4038 	}
4039 
4040       for (i = 18; i >= 4; i--)
4041 	if (regs_ever_live[i] && ! call_used_regs[i])
4042 	  {
4043 	    load_reg (i, offset, FRAME_POINTER_REGNUM);
4044 	    offset += UNITS_PER_WORD;
4045 	  }
4046     }
4047   else
4048     {
4049       offset = local_fsize - actual_fsize;
4050 
4051       /* If the current function calls __builtin_eh_return, then we need
4052          to restore the saved EH data registers.  */
4053       if (DO_FRAME_NOTES && current_function_calls_eh_return)
4054 	{
4055 	  unsigned int i, regno;
4056 
4057 	  for (i = 0; ; ++i)
4058 	    {
4059 	      regno = EH_RETURN_DATA_REGNO (i);
4060 	      if (regno == INVALID_REGNUM)
4061 		break;
4062 
4063 	      /* Only for the first load.
4064 	         merge_sp_adjust_with_load holds the register load
4065 	         with which we will merge the sp adjustment.  */
4066 	      if (merge_sp_adjust_with_load == 0
4067 		  && local_fsize == 0
4068 		  && VAL_14_BITS_P (-actual_fsize))
4069 	        merge_sp_adjust_with_load = regno;
4070 	      else
4071 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4072 	      offset += UNITS_PER_WORD;
4073 	    }
4074 	}
4075 
4076       for (i = 18; i >= 3; i--)
4077 	{
4078 	  if (regs_ever_live[i] && ! call_used_regs[i])
4079 	    {
4080 	      /* Only for the first load.
4081 	         merge_sp_adjust_with_load holds the register load
4082 	         with which we will merge the sp adjustment.  */
4083 	      if (merge_sp_adjust_with_load == 0
4084 		  && local_fsize == 0
4085 		  && VAL_14_BITS_P (-actual_fsize))
4086 	        merge_sp_adjust_with_load = i;
4087 	      else
4088 		load_reg (i, offset, STACK_POINTER_REGNUM);
4089 	      offset += UNITS_PER_WORD;
4090 	    }
4091 	}
4092     }
4093 
4094   /* Align pointer properly (doubleword boundary).  */
4095   offset = (offset + 7) & ~7;
4096 
4097   /* FP register restores.  */
4098   if (save_fregs)
4099     {
4100       /* Adjust the register to index off of.  */
4101       if (frame_pointer_needed)
4102 	set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4103       else
4104 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4105 
4106       /* Actually do the restores now.  */
4107       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4108 	if (regs_ever_live[i]
4109 	    || (! TARGET_64BIT && regs_ever_live[i + 1]))
4110 	  {
4111 	    rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4112 	    rtx dest = gen_rtx_REG (DFmode, i);
4113 	    emit_move_insn (dest, src);
4114 	  }
4115     }
4116 
4117   /* Emit a blockage insn here to keep these insns from being moved to
4118      an earlier spot in the epilogue, or into the main instruction stream.
4119 
4120      This is necessary as we must not cut the stack back before all the
4121      restores are finished.  */
4122   emit_insn (gen_blockage ());
4123 
4124   /* Reset stack pointer (and possibly frame pointer).  The stack
4125      pointer is initially set to fp + 64 to avoid a race condition.  */
4126   if (frame_pointer_needed)
4127     {
4128       rtx delta = GEN_INT (-64);
4129 
4130       set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4131       emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4132     }
4133   /* If we were deferring a callee register restore, do it now.  */
4134   else if (merge_sp_adjust_with_load)
4135     {
4136       rtx delta = GEN_INT (-actual_fsize);
4137       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4138 
4139       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4140     }
4141   else if (actual_fsize != 0)
4142     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4143 		    - actual_fsize, 0);
4144 
4145   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4146      frame greater than 8k), do so now.  */
4147   if (ret_off != 0)
4148     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4149 
4150   if (DO_FRAME_NOTES && current_function_calls_eh_return)
4151     {
4152       rtx sa = EH_RETURN_STACKADJ_RTX;
4153 
4154       emit_insn (gen_blockage ());
4155       emit_insn (TARGET_64BIT
4156 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4157 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4158     }
4159 }
4160 
4161 rtx
hppa_pic_save_rtx(void)4162 hppa_pic_save_rtx (void)
4163 {
4164   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4165 }
4166 
4167 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4168 #define NO_DEFERRED_PROFILE_COUNTERS 0
4169 #endif
4170 
4171 /* Define heap vector type for funcdef numbers.  */
4172 DEF_VEC_I(int);
4173 DEF_VEC_ALLOC_I(int,heap);
4174 
4175 /* Vector of funcdef numbers.  */
4176 static VEC(int,heap) *funcdef_nos;
4177 
4178 /* Output deferred profile counters.  */
4179 static void
output_deferred_profile_counters(void)4180 output_deferred_profile_counters (void)
4181 {
4182   unsigned int i;
4183   int align, n;
4184 
4185   if (VEC_empty (int, funcdef_nos))
4186    return;
4187 
4188   switch_to_section (data_section);
4189   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4190   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4191 
4192   for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4193     {
4194       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4195       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4196     }
4197 
4198   VEC_free (int, heap, funcdef_nos);
4199 }
4200 
4201 void
hppa_profile_hook(int label_no)4202 hppa_profile_hook (int label_no)
4203 {
4204   /* We use SImode for the address of the function in both 32 and
4205      64-bit code to avoid having to provide DImode versions of the
4206      lcla2 and load_offset_label_address insn patterns.  */
4207   rtx reg = gen_reg_rtx (SImode);
4208   rtx label_rtx = gen_label_rtx ();
4209   rtx begin_label_rtx, call_insn;
4210   char begin_label_name[16];
4211 
4212   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4213 			       label_no);
4214   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4215 
4216   if (TARGET_64BIT)
4217     emit_move_insn (arg_pointer_rtx,
4218 		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4219 				  GEN_INT (64)));
4220 
4221   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4222 
4223   /* The address of the function is loaded into %r25 with a instruction-
4224      relative sequence that avoids the use of relocations.  The sequence
4225      is split so that the load_offset_label_address instruction can
4226      occupy the delay slot of the call to _mcount.  */
4227   if (TARGET_PA_20)
4228     emit_insn (gen_lcla2 (reg, label_rtx));
4229   else
4230     emit_insn (gen_lcla1 (reg, label_rtx));
4231 
4232   emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4233 					    reg, begin_label_rtx, label_rtx));
4234 
4235 #if !NO_DEFERRED_PROFILE_COUNTERS
4236   {
4237     rtx count_label_rtx, addr, r24;
4238     char count_label_name[16];
4239 
4240     VEC_safe_push (int, heap, funcdef_nos, label_no);
4241     ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4242     count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4243 
4244     addr = force_reg (Pmode, count_label_rtx);
4245     r24 = gen_rtx_REG (Pmode, 24);
4246     emit_move_insn (r24, addr);
4247 
4248     call_insn =
4249       emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4250 					     gen_rtx_SYMBOL_REF (Pmode,
4251 								 "_mcount")),
4252 				GEN_INT (TARGET_64BIT ? 24 : 12)));
4253 
4254     use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4255   }
4256 #else
4257 
4258   call_insn =
4259     emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4260 					   gen_rtx_SYMBOL_REF (Pmode,
4261 							       "_mcount")),
4262 			      GEN_INT (TARGET_64BIT ? 16 : 8)));
4263 
4264 #endif
4265 
4266   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4267   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4268 
4269   /* Indicate the _mcount call cannot throw, nor will it execute a
4270      non-local goto.  */
4271   REG_NOTES (call_insn)
4272     = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4273 }
4274 
4275 /* Fetch the return address for the frame COUNT steps up from
4276    the current frame, after the prologue.  FRAMEADDR is the
4277    frame pointer of the COUNT frame.
4278 
4279    We want to ignore any export stub remnants here.  To handle this,
4280    we examine the code at the return address, and if it is an export
4281    stub, we return a memory rtx for the stub return address stored
4282    at frame-24.
4283 
4284    The value returned is used in two different ways:
4285 
4286 	1. To find a function's caller.
4287 
4288 	2. To change the return address for a function.
4289 
4290    This function handles most instances of case 1; however, it will
4291    fail if there are two levels of stubs to execute on the return
4292    path.  The only way I believe that can happen is if the return value
4293    needs a parameter relocation, which never happens for C code.
4294 
4295    This function handles most instances of case 2; however, it will
4296    fail if we did not originally have stub code on the return path
4297    but will need stub code on the new return path.  This can happen if
4298    the caller & callee are both in the main program, but the new
4299    return location is in a shared library.  */
4300 
4301 rtx
return_addr_rtx(int count,rtx frameaddr)4302 return_addr_rtx (int count, rtx frameaddr)
4303 {
4304   rtx label;
4305   rtx rp;
4306   rtx saved_rp;
4307   rtx ins;
4308 
4309   if (count != 0)
4310     return NULL_RTX;
4311 
4312   rp = get_hard_reg_initial_val (Pmode, 2);
4313 
4314   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4315     return rp;
4316 
4317   saved_rp = gen_reg_rtx (Pmode);
4318   emit_move_insn (saved_rp, rp);
4319 
4320   /* Get pointer to the instruction stream.  We have to mask out the
4321      privilege level from the two low order bits of the return address
4322      pointer here so that ins will point to the start of the first
4323      instruction that would have been executed if we returned.  */
4324   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4325   label = gen_label_rtx ();
4326 
4327   /* Check the instruction stream at the normal return address for the
4328      export stub:
4329 
4330 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4331 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4332 	0x00011820 | stub+16:  mtsp r1,sr0
4333 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4334 
4335      If it is an export stub, than our return address is really in
4336      -24[frameaddr].  */
4337 
4338   emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4339 		 NULL_RTX, SImode, 1);
4340   emit_jump_insn (gen_bne (label));
4341 
4342   emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4343 		 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4344   emit_jump_insn (gen_bne (label));
4345 
4346   emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4347 		 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4348   emit_jump_insn (gen_bne (label));
4349 
4350   /* 0xe0400002 must be specified as -532676606 so that it won't be
4351      rejected as an invalid immediate operand on 64-bit hosts.  */
4352   emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4353 		 GEN_INT (-532676606), NE, NULL_RTX, SImode, 1);
4354 
4355   /* If there is no export stub then just use the value saved from
4356      the return pointer register.  */
4357 
4358   emit_jump_insn (gen_bne (label));
4359 
4360   /* Here we know that our return address points to an export
4361      stub.  We don't want to return the address of the export stub,
4362      but rather the return address of the export stub.  That return
4363      address is stored at -24[frameaddr].  */
4364 
4365   emit_move_insn (saved_rp,
4366 		  gen_rtx_MEM (Pmode,
4367 			       memory_address (Pmode,
4368 					       plus_constant (frameaddr,
4369 							      -24))));
4370 
4371   emit_label (label);
4372   return saved_rp;
4373 }
4374 
4375 /* This is only valid once reload has completed because it depends on
4376    knowing exactly how much (if any) frame there is and...
4377 
4378    It's only valid if there is no frame marker to de-allocate and...
4379 
4380    It's only valid if %r2 hasn't been saved into the caller's frame
4381    (we're not profiling and %r2 isn't live anywhere).  */
4382 int
hppa_can_use_return_insn_p(void)4383 hppa_can_use_return_insn_p (void)
4384 {
4385   return (reload_completed
4386 	  && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4387 	  && ! regs_ever_live[2]
4388 	  && ! frame_pointer_needed);
4389 }
4390 
4391 void
emit_bcond_fp(enum rtx_code code,rtx operand0)4392 emit_bcond_fp (enum rtx_code code, rtx operand0)
4393 {
4394   emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4395 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4396 						     gen_rtx_fmt_ee (code,
4397 							      VOIDmode,
4398 							      gen_rtx_REG (CCFPmode, 0),
4399 							      const0_rtx),
4400 						     gen_rtx_LABEL_REF (VOIDmode, operand0),
4401 						     pc_rtx)));
4402 
4403 }
4404 
4405 rtx
gen_cmp_fp(enum rtx_code code,rtx operand0,rtx operand1)4406 gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4407 {
4408   return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4409 		      gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4410 }
4411 
4412 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4413    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4414 
4415 static int
pa_adjust_cost(rtx insn,rtx link,rtx dep_insn,int cost)4416 pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4417 {
4418   enum attr_type attr_type;
4419 
4420   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4421      true dependencies as they are described with bypasses now.  */
4422   if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4423     return cost;
4424 
4425   if (! recog_memoized (insn))
4426     return 0;
4427 
4428   attr_type = get_attr_type (insn);
4429 
4430   switch (REG_NOTE_KIND (link))
4431     {
4432     case REG_DEP_ANTI:
4433       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4434 	 cycles later.  */
4435 
4436       if (attr_type == TYPE_FPLOAD)
4437 	{
4438 	  rtx pat = PATTERN (insn);
4439 	  rtx dep_pat = PATTERN (dep_insn);
4440 	  if (GET_CODE (pat) == PARALLEL)
4441 	    {
4442 	      /* This happens for the fldXs,mb patterns.  */
4443 	      pat = XVECEXP (pat, 0, 0);
4444 	    }
4445 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4446 	    /* If this happens, we have to extend this to schedule
4447 	       optimally.  Return 0 for now.  */
4448 	  return 0;
4449 
4450 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4451 	    {
4452 	      if (! recog_memoized (dep_insn))
4453 		return 0;
4454 	      switch (get_attr_type (dep_insn))
4455 		{
4456 		case TYPE_FPALU:
4457 		case TYPE_FPMULSGL:
4458 		case TYPE_FPMULDBL:
4459 		case TYPE_FPDIVSGL:
4460 		case TYPE_FPDIVDBL:
4461 		case TYPE_FPSQRTSGL:
4462 		case TYPE_FPSQRTDBL:
4463 		  /* A fpload can't be issued until one cycle before a
4464 		     preceding arithmetic operation has finished if
4465 		     the target of the fpload is any of the sources
4466 		     (or destination) of the arithmetic operation.  */
4467 		  return insn_default_latency (dep_insn) - 1;
4468 
4469 		default:
4470 		  return 0;
4471 		}
4472 	    }
4473 	}
4474       else if (attr_type == TYPE_FPALU)
4475 	{
4476 	  rtx pat = PATTERN (insn);
4477 	  rtx dep_pat = PATTERN (dep_insn);
4478 	  if (GET_CODE (pat) == PARALLEL)
4479 	    {
4480 	      /* This happens for the fldXs,mb patterns.  */
4481 	      pat = XVECEXP (pat, 0, 0);
4482 	    }
4483 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4484 	    /* If this happens, we have to extend this to schedule
4485 	       optimally.  Return 0 for now.  */
4486 	  return 0;
4487 
4488 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4489 	    {
4490 	      if (! recog_memoized (dep_insn))
4491 		return 0;
4492 	      switch (get_attr_type (dep_insn))
4493 		{
4494 		case TYPE_FPDIVSGL:
4495 		case TYPE_FPDIVDBL:
4496 		case TYPE_FPSQRTSGL:
4497 		case TYPE_FPSQRTDBL:
4498 		  /* An ALU flop can't be issued until two cycles before a
4499 		     preceding divide or sqrt operation has finished if
4500 		     the target of the ALU flop is any of the sources
4501 		     (or destination) of the divide or sqrt operation.  */
4502 		  return insn_default_latency (dep_insn) - 2;
4503 
4504 		default:
4505 		  return 0;
4506 		}
4507 	    }
4508 	}
4509 
4510       /* For other anti dependencies, the cost is 0.  */
4511       return 0;
4512 
4513     case REG_DEP_OUTPUT:
4514       /* Output dependency; DEP_INSN writes a register that INSN writes some
4515 	 cycles later.  */
4516       if (attr_type == TYPE_FPLOAD)
4517 	{
4518 	  rtx pat = PATTERN (insn);
4519 	  rtx dep_pat = PATTERN (dep_insn);
4520 	  if (GET_CODE (pat) == PARALLEL)
4521 	    {
4522 	      /* This happens for the fldXs,mb patterns.  */
4523 	      pat = XVECEXP (pat, 0, 0);
4524 	    }
4525 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4526 	    /* If this happens, we have to extend this to schedule
4527 	       optimally.  Return 0 for now.  */
4528 	  return 0;
4529 
4530 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4531 	    {
4532 	      if (! recog_memoized (dep_insn))
4533 		return 0;
4534 	      switch (get_attr_type (dep_insn))
4535 		{
4536 		case TYPE_FPALU:
4537 		case TYPE_FPMULSGL:
4538 		case TYPE_FPMULDBL:
4539 		case TYPE_FPDIVSGL:
4540 		case TYPE_FPDIVDBL:
4541 		case TYPE_FPSQRTSGL:
4542 		case TYPE_FPSQRTDBL:
4543 		  /* A fpload can't be issued until one cycle before a
4544 		     preceding arithmetic operation has finished if
4545 		     the target of the fpload is the destination of the
4546 		     arithmetic operation.
4547 
4548 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
4549 		     is 3 cycles, unless they bundle together.   We also
4550 		     pay the penalty if the second insn is a fpload.  */
4551 		  return insn_default_latency (dep_insn) - 1;
4552 
4553 		default:
4554 		  return 0;
4555 		}
4556 	    }
4557 	}
4558       else if (attr_type == TYPE_FPALU)
4559 	{
4560 	  rtx pat = PATTERN (insn);
4561 	  rtx dep_pat = PATTERN (dep_insn);
4562 	  if (GET_CODE (pat) == PARALLEL)
4563 	    {
4564 	      /* This happens for the fldXs,mb patterns.  */
4565 	      pat = XVECEXP (pat, 0, 0);
4566 	    }
4567 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4568 	    /* If this happens, we have to extend this to schedule
4569 	       optimally.  Return 0 for now.  */
4570 	  return 0;
4571 
4572 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4573 	    {
4574 	      if (! recog_memoized (dep_insn))
4575 		return 0;
4576 	      switch (get_attr_type (dep_insn))
4577 		{
4578 		case TYPE_FPDIVSGL:
4579 		case TYPE_FPDIVDBL:
4580 		case TYPE_FPSQRTSGL:
4581 		case TYPE_FPSQRTDBL:
4582 		  /* An ALU flop can't be issued until two cycles before a
4583 		     preceding divide or sqrt operation has finished if
4584 		     the target of the ALU flop is also the target of
4585 		     the divide or sqrt operation.  */
4586 		  return insn_default_latency (dep_insn) - 2;
4587 
4588 		default:
4589 		  return 0;
4590 		}
4591 	    }
4592 	}
4593 
4594       /* For other output dependencies, the cost is 0.  */
4595       return 0;
4596 
4597     default:
4598       gcc_unreachable ();
4599     }
4600 }
4601 
4602 /* Adjust scheduling priorities.  We use this to try and keep addil
4603    and the next use of %r1 close together.  */
4604 static int
pa_adjust_priority(rtx insn,int priority)4605 pa_adjust_priority (rtx insn, int priority)
4606 {
4607   rtx set = single_set (insn);
4608   rtx src, dest;
4609   if (set)
4610     {
4611       src = SET_SRC (set);
4612       dest = SET_DEST (set);
4613       if (GET_CODE (src) == LO_SUM
4614 	  && symbolic_operand (XEXP (src, 1), VOIDmode)
4615 	  && ! read_only_operand (XEXP (src, 1), VOIDmode))
4616 	priority >>= 3;
4617 
4618       else if (GET_CODE (src) == MEM
4619 	       && GET_CODE (XEXP (src, 0)) == LO_SUM
4620 	       && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4621 	       && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4622 	priority >>= 1;
4623 
4624       else if (GET_CODE (dest) == MEM
4625 	       && GET_CODE (XEXP (dest, 0)) == LO_SUM
4626 	       && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4627 	       && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4628 	priority >>= 3;
4629     }
4630   return priority;
4631 }
4632 
4633 /* The 700 can only issue a single insn at a time.
4634    The 7XXX processors can issue two insns at a time.
4635    The 8000 can issue 4 insns at a time.  */
4636 static int
pa_issue_rate(void)4637 pa_issue_rate (void)
4638 {
4639   switch (pa_cpu)
4640     {
4641     case PROCESSOR_700:		return 1;
4642     case PROCESSOR_7100:	return 2;
4643     case PROCESSOR_7100LC:	return 2;
4644     case PROCESSOR_7200:	return 2;
4645     case PROCESSOR_7300:	return 2;
4646     case PROCESSOR_8000:	return 4;
4647 
4648     default:
4649       gcc_unreachable ();
4650     }
4651 }
4652 
4653 
4654 
4655 /* Return any length adjustment needed by INSN which already has its length
4656    computed as LENGTH.   Return zero if no adjustment is necessary.
4657 
4658    For the PA: function calls, millicode calls, and backwards short
4659    conditional branches with unfilled delay slots need an adjustment by +1
4660    (to account for the NOP which will be inserted into the instruction stream).
4661 
4662    Also compute the length of an inline block move here as it is too
4663    complicated to express as a length attribute in pa.md.  */
4664 int
pa_adjust_insn_length(rtx insn,int length)4665 pa_adjust_insn_length (rtx insn, int length)
4666 {
4667   rtx pat = PATTERN (insn);
4668 
4669   /* Jumps inside switch tables which have unfilled delay slots need
4670      adjustment.  */
4671   if (GET_CODE (insn) == JUMP_INSN
4672       && GET_CODE (pat) == PARALLEL
4673       && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4674     return 4;
4675   /* Millicode insn with an unfilled delay slot.  */
4676   else if (GET_CODE (insn) == INSN
4677 	   && GET_CODE (pat) != SEQUENCE
4678 	   && GET_CODE (pat) != USE
4679 	   && GET_CODE (pat) != CLOBBER
4680 	   && get_attr_type (insn) == TYPE_MILLI)
4681     return 4;
4682   /* Block move pattern.  */
4683   else if (GET_CODE (insn) == INSN
4684 	   && GET_CODE (pat) == PARALLEL
4685 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4686 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4687 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4688 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4689 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4690     return compute_movmem_length (insn) - 4;
4691   /* Block clear pattern.  */
4692   else if (GET_CODE (insn) == INSN
4693 	   && GET_CODE (pat) == PARALLEL
4694 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4695 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4696 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4697 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4698     return compute_clrmem_length (insn) - 4;
4699   /* Conditional branch with an unfilled delay slot.  */
4700   else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4701     {
4702       /* Adjust a short backwards conditional with an unfilled delay slot.  */
4703       if (GET_CODE (pat) == SET
4704 	  && length == 4
4705 	  && ! forward_branch_p (insn))
4706 	return 4;
4707       else if (GET_CODE (pat) == PARALLEL
4708 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4709 	       && length == 4)
4710 	return 4;
4711       /* Adjust dbra insn with short backwards conditional branch with
4712 	 unfilled delay slot -- only for case where counter is in a
4713 	 general register register.  */
4714       else if (GET_CODE (pat) == PARALLEL
4715 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4716 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4717  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4718 	       && length == 4
4719 	       && ! forward_branch_p (insn))
4720 	return 4;
4721       else
4722 	return 0;
4723     }
4724   return 0;
4725 }
4726 
4727 /* Print operand X (an rtx) in assembler syntax to file FILE.
4728    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4729    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
4730 
4731 void
print_operand(FILE * file,rtx x,int code)4732 print_operand (FILE *file, rtx x, int code)
4733 {
4734   switch (code)
4735     {
4736     case '#':
4737       /* Output a 'nop' if there's nothing for the delay slot.  */
4738       if (dbr_sequence_length () == 0)
4739 	fputs ("\n\tnop", file);
4740       return;
4741     case '*':
4742       /* Output a nullification completer if there's nothing for the */
4743       /* delay slot or nullification is requested.  */
4744       if (dbr_sequence_length () == 0 ||
4745 	  (final_sequence &&
4746 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4747         fputs (",n", file);
4748       return;
4749     case 'R':
4750       /* Print out the second register name of a register pair.
4751 	 I.e., R (6) => 7.  */
4752       fputs (reg_names[REGNO (x) + 1], file);
4753       return;
4754     case 'r':
4755       /* A register or zero.  */
4756       if (x == const0_rtx
4757 	  || (x == CONST0_RTX (DFmode))
4758 	  || (x == CONST0_RTX (SFmode)))
4759 	{
4760 	  fputs ("%r0", file);
4761 	  return;
4762 	}
4763       else
4764 	break;
4765     case 'f':
4766       /* A register or zero (floating point).  */
4767       if (x == const0_rtx
4768 	  || (x == CONST0_RTX (DFmode))
4769 	  || (x == CONST0_RTX (SFmode)))
4770 	{
4771 	  fputs ("%fr0", file);
4772 	  return;
4773 	}
4774       else
4775 	break;
4776     case 'A':
4777       {
4778 	rtx xoperands[2];
4779 
4780 	xoperands[0] = XEXP (XEXP (x, 0), 0);
4781 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4782 	output_global_address (file, xoperands[1], 0);
4783         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4784 	return;
4785       }
4786 
4787     case 'C':			/* Plain (C)ondition */
4788     case 'X':
4789       switch (GET_CODE (x))
4790 	{
4791 	case EQ:
4792 	  fputs ("=", file);  break;
4793 	case NE:
4794 	  fputs ("<>", file);  break;
4795 	case GT:
4796 	  fputs (">", file);  break;
4797 	case GE:
4798 	  fputs (">=", file);  break;
4799 	case GEU:
4800 	  fputs (">>=", file);  break;
4801 	case GTU:
4802 	  fputs (">>", file);  break;
4803 	case LT:
4804 	  fputs ("<", file);  break;
4805 	case LE:
4806 	  fputs ("<=", file);  break;
4807 	case LEU:
4808 	  fputs ("<<=", file);  break;
4809 	case LTU:
4810 	  fputs ("<<", file);  break;
4811 	default:
4812 	  gcc_unreachable ();
4813 	}
4814       return;
4815     case 'N':			/* Condition, (N)egated */
4816       switch (GET_CODE (x))
4817 	{
4818 	case EQ:
4819 	  fputs ("<>", file);  break;
4820 	case NE:
4821 	  fputs ("=", file);  break;
4822 	case GT:
4823 	  fputs ("<=", file);  break;
4824 	case GE:
4825 	  fputs ("<", file);  break;
4826 	case GEU:
4827 	  fputs ("<<", file);  break;
4828 	case GTU:
4829 	  fputs ("<<=", file);  break;
4830 	case LT:
4831 	  fputs (">=", file);  break;
4832 	case LE:
4833 	  fputs (">", file);  break;
4834 	case LEU:
4835 	  fputs (">>", file);  break;
4836 	case LTU:
4837 	  fputs (">>=", file);  break;
4838 	default:
4839 	  gcc_unreachable ();
4840 	}
4841       return;
4842     /* For floating point comparisons.  Note that the output
4843        predicates are the complement of the desired mode.  The
4844        conditions for GT, GE, LT, LE and LTGT cause an invalid
4845        operation exception if the result is unordered and this
4846        exception is enabled in the floating-point status register.  */
4847     case 'Y':
4848       switch (GET_CODE (x))
4849 	{
4850 	case EQ:
4851 	  fputs ("!=", file);  break;
4852 	case NE:
4853 	  fputs ("=", file);  break;
4854 	case GT:
4855 	  fputs ("!>", file);  break;
4856 	case GE:
4857 	  fputs ("!>=", file);  break;
4858 	case LT:
4859 	  fputs ("!<", file);  break;
4860 	case LE:
4861 	  fputs ("!<=", file);  break;
4862 	case LTGT:
4863 	  fputs ("!<>", file);  break;
4864 	case UNLE:
4865 	  fputs ("!?<=", file);  break;
4866 	case UNLT:
4867 	  fputs ("!?<", file);  break;
4868 	case UNGE:
4869 	  fputs ("!?>=", file);  break;
4870 	case UNGT:
4871 	  fputs ("!?>", file);  break;
4872 	case UNEQ:
4873 	  fputs ("!?=", file);  break;
4874 	case UNORDERED:
4875 	  fputs ("!?", file);  break;
4876 	case ORDERED:
4877 	  fputs ("?", file);  break;
4878 	default:
4879 	  gcc_unreachable ();
4880 	}
4881       return;
4882     case 'S':			/* Condition, operands are (S)wapped.  */
4883       switch (GET_CODE (x))
4884 	{
4885 	case EQ:
4886 	  fputs ("=", file);  break;
4887 	case NE:
4888 	  fputs ("<>", file);  break;
4889 	case GT:
4890 	  fputs ("<", file);  break;
4891 	case GE:
4892 	  fputs ("<=", file);  break;
4893 	case GEU:
4894 	  fputs ("<<=", file);  break;
4895 	case GTU:
4896 	  fputs ("<<", file);  break;
4897 	case LT:
4898 	  fputs (">", file);  break;
4899 	case LE:
4900 	  fputs (">=", file);  break;
4901 	case LEU:
4902 	  fputs (">>=", file);  break;
4903 	case LTU:
4904 	  fputs (">>", file);  break;
4905 	default:
4906 	  gcc_unreachable ();
4907 	}
4908       return;
4909     case 'B':			/* Condition, (B)oth swapped and negate.  */
4910       switch (GET_CODE (x))
4911 	{
4912 	case EQ:
4913 	  fputs ("<>", file);  break;
4914 	case NE:
4915 	  fputs ("=", file);  break;
4916 	case GT:
4917 	  fputs (">=", file);  break;
4918 	case GE:
4919 	  fputs (">", file);  break;
4920 	case GEU:
4921 	  fputs (">>", file);  break;
4922 	case GTU:
4923 	  fputs (">>=", file);  break;
4924 	case LT:
4925 	  fputs ("<=", file);  break;
4926 	case LE:
4927 	  fputs ("<", file);  break;
4928 	case LEU:
4929 	  fputs ("<<", file);  break;
4930 	case LTU:
4931 	  fputs ("<<=", file);  break;
4932 	default:
4933 	  gcc_unreachable ();
4934 	}
4935       return;
4936     case 'k':
4937       gcc_assert (GET_CODE (x) == CONST_INT);
4938       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4939       return;
4940     case 'Q':
4941       gcc_assert (GET_CODE (x) == CONST_INT);
4942       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4943       return;
4944     case 'L':
4945       gcc_assert (GET_CODE (x) == CONST_INT);
4946       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4947       return;
4948     case 'O':
4949       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
4950       fprintf (file, "%d", exact_log2 (INTVAL (x)));
4951       return;
4952     case 'p':
4953       gcc_assert (GET_CODE (x) == CONST_INT);
4954       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4955       return;
4956     case 'P':
4957       gcc_assert (GET_CODE (x) == CONST_INT);
4958       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4959       return;
4960     case 'I':
4961       if (GET_CODE (x) == CONST_INT)
4962 	fputs ("i", file);
4963       return;
4964     case 'M':
4965     case 'F':
4966       switch (GET_CODE (XEXP (x, 0)))
4967 	{
4968 	case PRE_DEC:
4969 	case PRE_INC:
4970 	  if (ASSEMBLER_DIALECT == 0)
4971 	    fputs ("s,mb", file);
4972 	  else
4973 	    fputs (",mb", file);
4974 	  break;
4975 	case POST_DEC:
4976 	case POST_INC:
4977 	  if (ASSEMBLER_DIALECT == 0)
4978 	    fputs ("s,ma", file);
4979 	  else
4980 	    fputs (",ma", file);
4981 	  break;
4982 	case PLUS:
4983 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
4984 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
4985 	    {
4986 	      if (ASSEMBLER_DIALECT == 0)
4987 		fputs ("x", file);
4988 	    }
4989 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4990 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4991 	    {
4992 	      if (ASSEMBLER_DIALECT == 0)
4993 		fputs ("x,s", file);
4994 	      else
4995 		fputs (",s", file);
4996 	    }
4997 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4998 	    fputs ("s", file);
4999 	  break;
5000 	default:
5001 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5002 	    fputs ("s", file);
5003 	  break;
5004 	}
5005       return;
5006     case 'G':
5007       output_global_address (file, x, 0);
5008       return;
5009     case 'H':
5010       output_global_address (file, x, 1);
5011       return;
5012     case 0:			/* Don't do anything special */
5013       break;
5014     case 'Z':
5015       {
5016 	unsigned op[3];
5017 	compute_zdepwi_operands (INTVAL (x), op);
5018 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5019 	return;
5020       }
5021     case 'z':
5022       {
5023 	unsigned op[3];
5024 	compute_zdepdi_operands (INTVAL (x), op);
5025 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5026 	return;
5027       }
5028     case 'c':
5029       /* We can get here from a .vtable_inherit due to our
5030 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5031 	 addresses.  */
5032       break;
5033     default:
5034       gcc_unreachable ();
5035     }
5036   if (GET_CODE (x) == REG)
5037     {
5038       fputs (reg_names [REGNO (x)], file);
5039       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5040 	{
5041 	  fputs ("R", file);
5042 	  return;
5043 	}
5044       if (FP_REG_P (x)
5045 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5046 	  && (REGNO (x) & 1) == 0)
5047 	fputs ("L", file);
5048     }
5049   else if (GET_CODE (x) == MEM)
5050     {
5051       int size = GET_MODE_SIZE (GET_MODE (x));
5052       rtx base = NULL_RTX;
5053       switch (GET_CODE (XEXP (x, 0)))
5054 	{
5055 	case PRE_DEC:
5056 	case POST_DEC:
5057           base = XEXP (XEXP (x, 0), 0);
5058 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5059 	  break;
5060 	case PRE_INC:
5061 	case POST_INC:
5062           base = XEXP (XEXP (x, 0), 0);
5063 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5064 	  break;
5065 	case PLUS:
5066 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5067 	    fprintf (file, "%s(%s)",
5068 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5069 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5070 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5071 	    fprintf (file, "%s(%s)",
5072 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5073 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5074 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5075 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5076 	    {
5077 	      /* Because the REG_POINTER flag can get lost during reload,
5078 		 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5079 		 index and base registers in the combined move patterns.  */
5080 	      rtx base = XEXP (XEXP (x, 0), 1);
5081 	      rtx index = XEXP (XEXP (x, 0), 0);
5082 
5083 	      fprintf (file, "%s(%s)",
5084 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5085 	    }
5086 	  else
5087 	    output_address (XEXP (x, 0));
5088 	  break;
5089 	default:
5090 	  output_address (XEXP (x, 0));
5091 	  break;
5092 	}
5093     }
5094   else
5095     output_addr_const (file, x);
5096 }
5097 
5098 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5099 
5100 void
output_global_address(FILE * file,rtx x,int round_constant)5101 output_global_address (FILE *file, rtx x, int round_constant)
5102 {
5103 
5104   /* Imagine  (high (const (plus ...))).  */
5105   if (GET_CODE (x) == HIGH)
5106     x = XEXP (x, 0);
5107 
5108   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5109     output_addr_const (file, x);
5110   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5111     {
5112       output_addr_const (file, x);
5113       fputs ("-$global$", file);
5114     }
5115   else if (GET_CODE (x) == CONST)
5116     {
5117       const char *sep = "";
5118       int offset = 0;		/* assembler wants -$global$ at end */
5119       rtx base = NULL_RTX;
5120 
5121       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5122 	{
5123 	case SYMBOL_REF:
5124 	  base = XEXP (XEXP (x, 0), 0);
5125 	  output_addr_const (file, base);
5126 	  break;
5127 	case CONST_INT:
5128 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5129 	  break;
5130 	default:
5131 	  gcc_unreachable ();
5132 	}
5133 
5134       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5135 	{
5136 	case SYMBOL_REF:
5137 	  base = XEXP (XEXP (x, 0), 1);
5138 	  output_addr_const (file, base);
5139 	  break;
5140 	case CONST_INT:
5141 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5142 	  break;
5143 	default:
5144 	  gcc_unreachable ();
5145 	}
5146 
5147       /* How bogus.  The compiler is apparently responsible for
5148 	 rounding the constant if it uses an LR field selector.
5149 
5150 	 The linker and/or assembler seem a better place since
5151 	 they have to do this kind of thing already.
5152 
5153 	 If we fail to do this, HP's optimizing linker may eliminate
5154 	 an addil, but not update the ldw/stw/ldo instruction that
5155 	 uses the result of the addil.  */
5156       if (round_constant)
5157 	offset = ((offset + 0x1000) & ~0x1fff);
5158 
5159       switch (GET_CODE (XEXP (x, 0)))
5160 	{
5161 	case PLUS:
5162 	  if (offset < 0)
5163 	    {
5164 	      offset = -offset;
5165 	      sep = "-";
5166 	    }
5167 	  else
5168 	    sep = "+";
5169 	  break;
5170 
5171 	case MINUS:
5172 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5173 	  sep = "-";
5174 	  break;
5175 
5176 	default:
5177 	  gcc_unreachable ();
5178 	}
5179 
5180       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5181 	fputs ("-$global$", file);
5182       if (offset)
5183 	fprintf (file, "%s%d", sep, offset);
5184     }
5185   else
5186     output_addr_const (file, x);
5187 }
5188 
5189 /* Output boilerplate text to appear at the beginning of the file.
5190    There are several possible versions.  */
5191 #define aputs(x) fputs(x, asm_out_file)
5192 static inline void
pa_file_start_level(void)5193 pa_file_start_level (void)
5194 {
5195   if (TARGET_64BIT)
5196     aputs ("\t.LEVEL 2.0w\n");
5197   else if (TARGET_PA_20)
5198     aputs ("\t.LEVEL 2.0\n");
5199   else if (TARGET_PA_11)
5200     aputs ("\t.LEVEL 1.1\n");
5201   else
5202     aputs ("\t.LEVEL 1.0\n");
5203 }
5204 
5205 static inline void
pa_file_start_space(int sortspace)5206 pa_file_start_space (int sortspace)
5207 {
5208   aputs ("\t.SPACE $PRIVATE$");
5209   if (sortspace)
5210     aputs (",SORT=16");
5211   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5212          "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5213          "\n\t.SPACE $TEXT$");
5214   if (sortspace)
5215     aputs (",SORT=8");
5216   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5217          "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5218 }
5219 
5220 static inline void
pa_file_start_file(int want_version)5221 pa_file_start_file (int want_version)
5222 {
5223   if (write_symbols != NO_DEBUG)
5224     {
5225       output_file_directive (asm_out_file, main_input_filename);
5226       if (want_version)
5227 	aputs ("\t.version\t\"01.01\"\n");
5228     }
5229 }
5230 
5231 static inline void
pa_file_start_mcount(const char * aswhat)5232 pa_file_start_mcount (const char *aswhat)
5233 {
5234   if (profile_flag)
5235     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5236 }
5237 
5238 static void
pa_elf_file_start(void)5239 pa_elf_file_start (void)
5240 {
5241   pa_file_start_level ();
5242   pa_file_start_mcount ("ENTRY");
5243   pa_file_start_file (0);
5244 }
5245 
5246 static void
pa_som_file_start(void)5247 pa_som_file_start (void)
5248 {
5249   pa_file_start_level ();
5250   pa_file_start_space (0);
5251   aputs ("\t.IMPORT $global$,DATA\n"
5252          "\t.IMPORT $$dyncall,MILLICODE\n");
5253   pa_file_start_mcount ("CODE");
5254   pa_file_start_file (0);
5255 }
5256 
5257 static void
pa_linux_file_start(void)5258 pa_linux_file_start (void)
5259 {
5260   pa_file_start_file (1);
5261   pa_file_start_level ();
5262   pa_file_start_mcount ("CODE");
5263 }
5264 
5265 static void
pa_hpux64_gas_file_start(void)5266 pa_hpux64_gas_file_start (void)
5267 {
5268   pa_file_start_level ();
5269 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5270   if (profile_flag)
5271     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5272 #endif
5273   pa_file_start_file (1);
5274 }
5275 
5276 static void
pa_hpux64_hpas_file_start(void)5277 pa_hpux64_hpas_file_start (void)
5278 {
5279   pa_file_start_level ();
5280   pa_file_start_space (1);
5281   pa_file_start_mcount ("CODE");
5282   pa_file_start_file (0);
5283 }
5284 #undef aputs
5285 
5286 /* Search the deferred plabel list for SYMBOL and return its internal
5287    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5288 
5289 rtx
get_deferred_plabel(rtx symbol)5290 get_deferred_plabel (rtx symbol)
5291 {
5292   const char *fname = XSTR (symbol, 0);
5293   size_t i;
5294 
5295   /* See if we have already put this function on the list of deferred
5296      plabels.  This list is generally small, so a liner search is not
5297      too ugly.  If it proves too slow replace it with something faster.  */
5298   for (i = 0; i < n_deferred_plabels; i++)
5299     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5300       break;
5301 
5302   /* If the deferred plabel list is empty, or this entry was not found
5303      on the list, create a new entry on the list.  */
5304   if (deferred_plabels == NULL || i == n_deferred_plabels)
5305     {
5306       tree id;
5307 
5308       if (deferred_plabels == 0)
5309 	deferred_plabels = (struct deferred_plabel *)
5310 	  ggc_alloc (sizeof (struct deferred_plabel));
5311       else
5312 	deferred_plabels = (struct deferred_plabel *)
5313 	  ggc_realloc (deferred_plabels,
5314 		       ((n_deferred_plabels + 1)
5315 			* sizeof (struct deferred_plabel)));
5316 
5317       i = n_deferred_plabels++;
5318       deferred_plabels[i].internal_label = gen_label_rtx ();
5319       deferred_plabels[i].symbol = symbol;
5320 
5321       /* Gross.  We have just implicitly taken the address of this
5322 	 function.  Mark it in the same manner as assemble_name.  */
5323       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5324       if (id)
5325 	mark_referenced (id);
5326     }
5327 
5328   return deferred_plabels[i].internal_label;
5329 }
5330 
5331 static void
output_deferred_plabels(void)5332 output_deferred_plabels (void)
5333 {
5334   size_t i;
5335 
5336   /* If we have some deferred plabels, then we need to switch into the
5337      data or readonly data section, and align it to a 4 byte boundary
5338      before outputting the deferred plabels.  */
5339   if (n_deferred_plabels)
5340     {
5341       switch_to_section (flag_pic ? data_section : readonly_data_section);
5342       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5343     }
5344 
5345   /* Now output the deferred plabels.  */
5346   for (i = 0; i < n_deferred_plabels; i++)
5347     {
5348       (*targetm.asm_out.internal_label) (asm_out_file, "L",
5349 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5350       assemble_integer (deferred_plabels[i].symbol,
5351 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5352     }
5353 }
5354 
5355 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5356 /* Initialize optabs to point to HPUX long double emulation routines.  */
5357 static void
pa_hpux_init_libfuncs(void)5358 pa_hpux_init_libfuncs (void)
5359 {
5360   set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5361   set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5362   set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5363   set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5364   set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5365   set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5366   set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5367   set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5368   set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5369 
5370   set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5371   set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5372   set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5373   set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5374   set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5375   set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5376   set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5377 
5378   set_conv_libfunc (sext_optab,   TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5379   set_conv_libfunc (sext_optab,   TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5380   set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5381   set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5382 
5383   set_conv_libfunc (sfix_optab,   SImode, TFmode, TARGET_64BIT
5384 						  ? "__U_Qfcnvfxt_quad_to_sgl"
5385 						  : "_U_Qfcnvfxt_quad_to_sgl");
5386   set_conv_libfunc (sfix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5387   set_conv_libfunc (ufix_optab,   SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5388   set_conv_libfunc (ufix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5389 
5390   set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5391   set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5392   set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5393   set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5394 }
5395 #endif
5396 
5397 /* HP's millicode routines mean something special to the assembler.
5398    Keep track of which ones we have used.  */
5399 
5400 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5401 static void import_milli (enum millicodes);
5402 static char imported[(int) end1000];
5403 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5404 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5405 #define MILLI_START 10
5406 
5407 static void
import_milli(enum millicodes code)5408 import_milli (enum millicodes code)
5409 {
5410   char str[sizeof (import_string)];
5411 
5412   if (!imported[(int) code])
5413     {
5414       imported[(int) code] = 1;
5415       strcpy (str, import_string);
5416       strncpy (str + MILLI_START, milli_names[(int) code], 4);
5417       output_asm_insn (str, 0);
5418     }
5419 }
5420 
5421 /* The register constraints have put the operands and return value in
5422    the proper registers.  */
5423 
5424 const char *
output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx insn)5425 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5426 {
5427   import_milli (mulI);
5428   return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5429 }
5430 
5431 /* Emit the rtl for doing a division by a constant.  */
5432 
5433 /* Do magic division millicodes exist for this value? */
5434 const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5435 
5436 /* We'll use an array to keep track of the magic millicodes and
5437    whether or not we've used them already. [n][0] is signed, [n][1] is
5438    unsigned.  */
5439 
5440 static int div_milli[16][2];
5441 
5442 int
emit_hpdiv_const(rtx * operands,int unsignedp)5443 emit_hpdiv_const (rtx *operands, int unsignedp)
5444 {
5445   if (GET_CODE (operands[2]) == CONST_INT
5446       && INTVAL (operands[2]) > 0
5447       && INTVAL (operands[2]) < 16
5448       && magic_milli[INTVAL (operands[2])])
5449     {
5450       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5451 
5452       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5453       emit
5454 	(gen_rtx_PARALLEL
5455 	 (VOIDmode,
5456 	  gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5457 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5458 						     SImode,
5459 						     gen_rtx_REG (SImode, 26),
5460 						     operands[2])),
5461 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5462 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5463 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5464 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5465 		     gen_rtx_CLOBBER (VOIDmode, ret))));
5466       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5467       return 1;
5468     }
5469   return 0;
5470 }
5471 
5472 const char *
output_div_insn(rtx * operands,int unsignedp,rtx insn)5473 output_div_insn (rtx *operands, int unsignedp, rtx insn)
5474 {
5475   int divisor;
5476 
5477   /* If the divisor is a constant, try to use one of the special
5478      opcodes .*/
5479   if (GET_CODE (operands[0]) == CONST_INT)
5480     {
5481       static char buf[100];
5482       divisor = INTVAL (operands[0]);
5483       if (!div_milli[divisor][unsignedp])
5484 	{
5485 	  div_milli[divisor][unsignedp] = 1;
5486 	  if (unsignedp)
5487 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5488 	  else
5489 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5490 	}
5491       if (unsignedp)
5492 	{
5493 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5494 		   INTVAL (operands[0]));
5495 	  return output_millicode_call (insn,
5496 					gen_rtx_SYMBOL_REF (SImode, buf));
5497 	}
5498       else
5499 	{
5500 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5501 		   INTVAL (operands[0]));
5502 	  return output_millicode_call (insn,
5503 					gen_rtx_SYMBOL_REF (SImode, buf));
5504 	}
5505     }
5506   /* Divisor isn't a special constant.  */
5507   else
5508     {
5509       if (unsignedp)
5510 	{
5511 	  import_milli (divU);
5512 	  return output_millicode_call (insn,
5513 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5514 	}
5515       else
5516 	{
5517 	  import_milli (divI);
5518 	  return output_millicode_call (insn,
5519 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5520 	}
5521     }
5522 }
5523 
5524 /* Output a $$rem millicode to do mod.  */
5525 
5526 const char *
output_mod_insn(int unsignedp,rtx insn)5527 output_mod_insn (int unsignedp, rtx insn)
5528 {
5529   if (unsignedp)
5530     {
5531       import_milli (remU);
5532       return output_millicode_call (insn,
5533 				    gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5534     }
5535   else
5536     {
5537       import_milli (remI);
5538       return output_millicode_call (insn,
5539 				    gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5540     }
5541 }
5542 
5543 void
output_arg_descriptor(rtx call_insn)5544 output_arg_descriptor (rtx call_insn)
5545 {
5546   const char *arg_regs[4];
5547   enum machine_mode arg_mode;
5548   rtx link;
5549   int i, output_flag = 0;
5550   int regno;
5551 
5552   /* We neither need nor want argument location descriptors for the
5553      64bit runtime environment or the ELF32 environment.  */
5554   if (TARGET_64BIT || TARGET_ELF32)
5555     return;
5556 
5557   for (i = 0; i < 4; i++)
5558     arg_regs[i] = 0;
5559 
5560   /* Specify explicitly that no argument relocations should take place
5561      if using the portable runtime calling conventions.  */
5562   if (TARGET_PORTABLE_RUNTIME)
5563     {
5564       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5565 	     asm_out_file);
5566       return;
5567     }
5568 
5569   gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5570   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5571        link; link = XEXP (link, 1))
5572     {
5573       rtx use = XEXP (link, 0);
5574 
5575       if (! (GET_CODE (use) == USE
5576 	     && GET_CODE (XEXP (use, 0)) == REG
5577 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5578 	continue;
5579 
5580       arg_mode = GET_MODE (XEXP (use, 0));
5581       regno = REGNO (XEXP (use, 0));
5582       if (regno >= 23 && regno <= 26)
5583 	{
5584 	  arg_regs[26 - regno] = "GR";
5585 	  if (arg_mode == DImode)
5586 	    arg_regs[25 - regno] = "GR";
5587 	}
5588       else if (regno >= 32 && regno <= 39)
5589 	{
5590 	  if (arg_mode == SFmode)
5591 	    arg_regs[(regno - 32) / 2] = "FR";
5592 	  else
5593 	    {
5594 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5595 	      arg_regs[(regno - 34) / 2] = "FR";
5596 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
5597 #else
5598 	      arg_regs[(regno - 34) / 2] = "FU";
5599 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
5600 #endif
5601 	    }
5602 	}
5603     }
5604   fputs ("\t.CALL ", asm_out_file);
5605   for (i = 0; i < 4; i++)
5606     {
5607       if (arg_regs[i])
5608 	{
5609 	  if (output_flag++)
5610 	    fputc (',', asm_out_file);
5611 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5612 	}
5613     }
5614   fputc ('\n', asm_out_file);
5615 }
5616 
5617 static enum reg_class
pa_secondary_reload(bool in_p,rtx x,enum reg_class class,enum machine_mode mode,secondary_reload_info * sri)5618 pa_secondary_reload (bool in_p, rtx x, enum reg_class class,
5619 		     enum machine_mode mode, secondary_reload_info *sri)
5620 {
5621   int is_symbolic, regno;
5622 
5623   /* Handle the easy stuff first.  */
5624   if (class == R1_REGS)
5625     return NO_REGS;
5626 
5627   if (REG_P (x))
5628     {
5629       regno = REGNO (x);
5630       if (class == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5631 	return NO_REGS;
5632     }
5633   else
5634     regno = -1;
5635 
5636   /* If we have something like (mem (mem (...)), we can safely assume the
5637      inner MEM will end up in a general register after reloading, so there's
5638      no need for a secondary reload.  */
5639   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5640     return NO_REGS;
5641 
5642   /* Trying to load a constant into a FP register during PIC code
5643      generation requires %r1 as a scratch register.  */
5644   if (flag_pic
5645       && (mode == SImode || mode == DImode)
5646       && FP_REG_CLASS_P (class)
5647       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5648     {
5649       sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5650 		    : CODE_FOR_reload_indi_r1);
5651       return NO_REGS;
5652     }
5653 
5654   /* Profiling showed the PA port spends about 1.3% of its compilation
5655      time in true_regnum from calls inside pa_secondary_reload_class.  */
5656   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5657     regno = true_regnum (x);
5658 
5659   /* Handle out of range displacement for integer mode loads/stores of
5660      FP registers.  */
5661   if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5662        && GET_MODE_CLASS (mode) == MODE_INT
5663        && FP_REG_CLASS_P (class))
5664       || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5665     {
5666       sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5667       return NO_REGS;
5668     }
5669 
5670   /* A SAR<->FP register copy requires a secondary register (GPR) as
5671      well as secondary memory.  */
5672   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5673       && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5674 	  || (class == SHIFT_REGS
5675 	      && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5676     {
5677       sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5678       return NO_REGS;
5679     }
5680 
5681   /* Secondary reloads of symbolic operands require %r1 as a scratch
5682      register when we're generating PIC code and the operand isn't
5683      readonly.  */
5684   if (GET_CODE (x) == HIGH)
5685     x = XEXP (x, 0);
5686 
5687   /* Profiling has showed GCC spends about 2.6% of its compilation
5688      time in symbolic_operand from calls inside pa_secondary_reload_class.
5689      So, we use an inline copy to avoid useless work.  */
5690   switch (GET_CODE (x))
5691     {
5692       rtx op;
5693 
5694       case SYMBOL_REF:
5695         is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5696         break;
5697       case LABEL_REF:
5698         is_symbolic = 1;
5699         break;
5700       case CONST:
5701 	op = XEXP (x, 0);
5702 	is_symbolic = (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5703 			 && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5704 			|| GET_CODE (XEXP (op, 0)) == LABEL_REF)
5705 		       && GET_CODE (XEXP (op, 1)) == CONST_INT);
5706         break;
5707       default:
5708         is_symbolic = 0;
5709         break;
5710     }
5711 
5712   if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5713     {
5714       gcc_assert (mode == SImode || mode == DImode);
5715       sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5716 		    : CODE_FOR_reload_indi_r1);
5717     }
5718 
5719   return NO_REGS;
5720 }
5721 
5722 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5723    by invisible reference.  As a GCC extension, we also pass anything
5724    with a zero or variable size by reference.
5725 
5726    The 64-bit runtime does not describe passing any types by invisible
5727    reference.  The internals of GCC can't currently handle passing
5728    empty structures, and zero or variable length arrays when they are
5729    not passed entirely on the stack or by reference.  Thus, as a GCC
5730    extension, we pass these types by reference.  The HP compiler doesn't
5731    support these types, so hopefully there shouldn't be any compatibility
5732    issues.  This may have to be revisited when HP releases a C99 compiler
5733    or updates the ABI.  */
5734 
5735 static bool
pa_pass_by_reference(CUMULATIVE_ARGS * ca ATTRIBUTE_UNUSED,enum machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)5736 pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5737 		      enum machine_mode mode, tree type,
5738 		      bool named ATTRIBUTE_UNUSED)
5739 {
5740   HOST_WIDE_INT size;
5741 
5742   if (type)
5743     size = int_size_in_bytes (type);
5744   else
5745     size = GET_MODE_SIZE (mode);
5746 
5747   if (TARGET_64BIT)
5748     return size <= 0;
5749   else
5750     return size <= 0 || size > 8;
5751 }
5752 
5753 enum direction
function_arg_padding(enum machine_mode mode,tree type)5754 function_arg_padding (enum machine_mode mode, tree type)
5755 {
5756   if (mode == BLKmode
5757       || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5758     {
5759       /* Return none if justification is not required.  */
5760       if (type
5761 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5762 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5763 	return none;
5764 
5765       /* The directions set here are ignored when a BLKmode argument larger
5766 	 than a word is placed in a register.  Different code is used for
5767 	 the stack and registers.  This makes it difficult to have a
5768 	 consistent data representation for both the stack and registers.
5769 	 For both runtimes, the justification and padding for arguments on
5770 	 the stack and in registers should be identical.  */
5771       if (TARGET_64BIT)
5772 	/* The 64-bit runtime specifies left justification for aggregates.  */
5773         return upward;
5774       else
5775 	/* The 32-bit runtime architecture specifies right justification.
5776 	   When the argument is passed on the stack, the argument is padded
5777 	   with garbage on the left.  The HP compiler pads with zeros.  */
5778 	return downward;
5779     }
5780 
5781   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5782     return downward;
5783   else
5784     return none;
5785 }
5786 
5787 
5788 /* Do what is necessary for `va_start'.  We look at the current function
5789    to determine if stdargs or varargs is used and fill in an initial
5790    va_list.  A pointer to this constructor is returned.  */
5791 
5792 static rtx
hppa_builtin_saveregs(void)5793 hppa_builtin_saveregs (void)
5794 {
5795   rtx offset, dest;
5796   tree fntype = TREE_TYPE (current_function_decl);
5797   int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5798 		   && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5799 		       != void_type_node)))
5800 		? UNITS_PER_WORD : 0);
5801 
5802   if (argadj)
5803     offset = plus_constant (current_function_arg_offset_rtx, argadj);
5804   else
5805     offset = current_function_arg_offset_rtx;
5806 
5807   if (TARGET_64BIT)
5808     {
5809       int i, off;
5810 
5811       /* Adjust for varargs/stdarg differences.  */
5812       if (argadj)
5813 	offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5814       else
5815 	offset = current_function_arg_offset_rtx;
5816 
5817       /* We need to save %r26 .. %r19 inclusive starting at offset -64
5818 	 from the incoming arg pointer and growing to larger addresses.  */
5819       for (i = 26, off = -64; i >= 19; i--, off += 8)
5820 	emit_move_insn (gen_rtx_MEM (word_mode,
5821 				     plus_constant (arg_pointer_rtx, off)),
5822 			gen_rtx_REG (word_mode, i));
5823 
5824       /* The incoming args pointer points just beyond the flushback area;
5825 	 normally this is not a serious concern.  However, when we are doing
5826 	 varargs/stdargs we want to make the arg pointer point to the start
5827 	 of the incoming argument area.  */
5828       emit_move_insn (virtual_incoming_args_rtx,
5829 		      plus_constant (arg_pointer_rtx, -64));
5830 
5831       /* Now return a pointer to the first anonymous argument.  */
5832       return copy_to_reg (expand_binop (Pmode, add_optab,
5833 					virtual_incoming_args_rtx,
5834 					offset, 0, 0, OPTAB_LIB_WIDEN));
5835     }
5836 
5837   /* Store general registers on the stack.  */
5838   dest = gen_rtx_MEM (BLKmode,
5839 		      plus_constant (current_function_internal_arg_pointer,
5840 				     -16));
5841   set_mem_alias_set (dest, get_varargs_alias_set ());
5842   set_mem_align (dest, BITS_PER_WORD);
5843   move_block_from_reg (23, dest, 4);
5844 
5845   /* move_block_from_reg will emit code to store the argument registers
5846      individually as scalar stores.
5847 
5848      However, other insns may later load from the same addresses for
5849      a structure load (passing a struct to a varargs routine).
5850 
5851      The alias code assumes that such aliasing can never happen, so we
5852      have to keep memory referencing insns from moving up beyond the
5853      last argument register store.  So we emit a blockage insn here.  */
5854   emit_insn (gen_blockage ());
5855 
5856   return copy_to_reg (expand_binop (Pmode, add_optab,
5857 				    current_function_internal_arg_pointer,
5858 				    offset, 0, 0, OPTAB_LIB_WIDEN));
5859 }
5860 
5861 void
hppa_va_start(tree valist,rtx nextarg)5862 hppa_va_start (tree valist, rtx nextarg)
5863 {
5864   nextarg = expand_builtin_saveregs ();
5865   std_expand_builtin_va_start (valist, nextarg);
5866 }
5867 
5868 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,tree * pre_p,tree * post_p)5869 hppa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, tree *post_p)
5870 {
5871   if (TARGET_64BIT)
5872     {
5873       /* Args grow upward.  We can use the generic routines.  */
5874       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5875     }
5876   else /* !TARGET_64BIT */
5877     {
5878       tree ptr = build_pointer_type (type);
5879       tree valist_type;
5880       tree t, u;
5881       unsigned int size, ofs;
5882       bool indirect;
5883 
5884       indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
5885       if (indirect)
5886 	{
5887 	  type = ptr;
5888 	  ptr = build_pointer_type (type);
5889 	}
5890       size = int_size_in_bytes (type);
5891       valist_type = TREE_TYPE (valist);
5892 
5893       /* Args grow down.  Not handled by generic routines.  */
5894 
5895       u = fold_convert (valist_type, size_in_bytes (type));
5896       t = build2 (MINUS_EXPR, valist_type, valist, u);
5897 
5898       /* Copied from va-pa.h, but we probably don't need to align to
5899 	 word size, since we generate and preserve that invariant.  */
5900       u = build_int_cst (valist_type, (size > 4 ? -8 : -4));
5901       t = build2 (BIT_AND_EXPR, valist_type, t, u);
5902 
5903       t = build2 (MODIFY_EXPR, valist_type, valist, t);
5904 
5905       ofs = (8 - size) % 4;
5906       if (ofs != 0)
5907 	{
5908 	  u = fold_convert (valist_type, size_int (ofs));
5909 	  t = build2 (PLUS_EXPR, valist_type, t, u);
5910 	}
5911 
5912       t = fold_convert (ptr, t);
5913       t = build_va_arg_indirect_ref (t);
5914 
5915       if (indirect)
5916 	t = build_va_arg_indirect_ref (t);
5917 
5918       return t;
5919     }
5920 }
5921 
5922 /* True if MODE is valid for the target.  By "valid", we mean able to
5923    be manipulated in non-trivial ways.  In particular, this means all
5924    the arithmetic is supported.
5925 
5926    Currently, TImode is not valid as the HP 64-bit runtime documentation
5927    doesn't document the alignment and calling conventions for this type.
5928    Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
5929    2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
5930 
5931 static bool
pa_scalar_mode_supported_p(enum machine_mode mode)5932 pa_scalar_mode_supported_p (enum machine_mode mode)
5933 {
5934   int precision = GET_MODE_PRECISION (mode);
5935 
5936   switch (GET_MODE_CLASS (mode))
5937     {
5938     case MODE_PARTIAL_INT:
5939     case MODE_INT:
5940       if (precision == CHAR_TYPE_SIZE)
5941 	return true;
5942       if (precision == SHORT_TYPE_SIZE)
5943 	return true;
5944       if (precision == INT_TYPE_SIZE)
5945 	return true;
5946       if (precision == LONG_TYPE_SIZE)
5947 	return true;
5948       if (precision == LONG_LONG_TYPE_SIZE)
5949 	return true;
5950       return false;
5951 
5952     case MODE_FLOAT:
5953       if (precision == FLOAT_TYPE_SIZE)
5954 	return true;
5955       if (precision == DOUBLE_TYPE_SIZE)
5956 	return true;
5957       if (precision == LONG_DOUBLE_TYPE_SIZE)
5958 	return true;
5959       return false;
5960 
5961     case MODE_DECIMAL_FLOAT:
5962       return false;
5963 
5964     default:
5965       gcc_unreachable ();
5966     }
5967 }
5968 
5969 /* This routine handles all the normal conditional branch sequences we
5970    might need to generate.  It handles compare immediate vs compare
5971    register, nullification of delay slots, varying length branches,
5972    negated branches, and all combinations of the above.  It returns the
5973    output appropriate to emit the branch corresponding to all given
5974    parameters.  */
5975 
5976 const char *
output_cbranch(rtx * operands,int negated,rtx insn)5977 output_cbranch (rtx *operands, int negated, rtx insn)
5978 {
5979   static char buf[100];
5980   int useskip = 0;
5981   int nullify = INSN_ANNULLED_BRANCH_P (insn);
5982   int length = get_attr_length (insn);
5983   int xdelay;
5984 
5985   /* A conditional branch to the following instruction (e.g. the delay slot)
5986      is asking for a disaster.  This can happen when not optimizing and
5987      when jump optimization fails.
5988 
5989      While it is usually safe to emit nothing, this can fail if the
5990      preceding instruction is a nullified branch with an empty delay
5991      slot and the same branch target as this branch.  We could check
5992      for this but jump optimization should eliminate nop jumps.  It
5993      is always safe to emit a nop.  */
5994   if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5995     return "nop";
5996 
5997   /* The doubleword form of the cmpib instruction doesn't have the LEU
5998      and GTU conditions while the cmpb instruction does.  Since we accept
5999      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6000   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6001     operands[2] = gen_rtx_REG (DImode, 0);
6002   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6003     operands[1] = gen_rtx_REG (DImode, 0);
6004 
6005   /* If this is a long branch with its delay slot unfilled, set `nullify'
6006      as it can nullify the delay slot and save a nop.  */
6007   if (length == 8 && dbr_sequence_length () == 0)
6008     nullify = 1;
6009 
6010   /* If this is a short forward conditional branch which did not get
6011      its delay slot filled, the delay slot can still be nullified.  */
6012   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6013     nullify = forward_branch_p (insn);
6014 
6015   /* A forward branch over a single nullified insn can be done with a
6016      comclr instruction.  This avoids a single cycle penalty due to
6017      mis-predicted branch if we fall through (branch not taken).  */
6018   if (length == 4
6019       && next_real_insn (insn) != 0
6020       && get_attr_length (next_real_insn (insn)) == 4
6021       && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6022       && nullify)
6023     useskip = 1;
6024 
6025   switch (length)
6026     {
6027       /* All short conditional branches except backwards with an unfilled
6028 	 delay slot.  */
6029       case 4:
6030 	if (useskip)
6031 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6032 	else
6033 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6034 	if (GET_MODE (operands[1]) == DImode)
6035 	  strcat (buf, "*");
6036 	if (negated)
6037 	  strcat (buf, "%B3");
6038 	else
6039 	  strcat (buf, "%S3");
6040 	if (useskip)
6041 	  strcat (buf, " %2,%r1,%%r0");
6042 	else if (nullify)
6043 	  strcat (buf, ",n %2,%r1,%0");
6044 	else
6045 	  strcat (buf, " %2,%r1,%0");
6046 	break;
6047 
6048      /* All long conditionals.  Note a short backward branch with an
6049 	unfilled delay slot is treated just like a long backward branch
6050 	with an unfilled delay slot.  */
6051       case 8:
6052 	/* Handle weird backwards branch with a filled delay slot
6053 	   which is nullified.  */
6054 	if (dbr_sequence_length () != 0
6055 	    && ! forward_branch_p (insn)
6056 	    && nullify)
6057 	  {
6058 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6059 	    if (GET_MODE (operands[1]) == DImode)
6060 	      strcat (buf, "*");
6061 	    if (negated)
6062 	      strcat (buf, "%S3");
6063 	    else
6064 	      strcat (buf, "%B3");
6065 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6066 	  }
6067 	/* Handle short backwards branch with an unfilled delay slot.
6068 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6069 	   taken and untaken branches.  */
6070 	else if (dbr_sequence_length () == 0
6071 		 && ! forward_branch_p (insn)
6072 		 && INSN_ADDRESSES_SET_P ()
6073 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6074 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6075 	  {
6076 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6077 	    if (GET_MODE (operands[1]) == DImode)
6078 	      strcat (buf, "*");
6079 	    if (negated)
6080 	      strcat (buf, "%B3 %2,%r1,%0%#");
6081 	    else
6082 	      strcat (buf, "%S3 %2,%r1,%0%#");
6083 	  }
6084 	else
6085 	  {
6086 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6087 	    if (GET_MODE (operands[1]) == DImode)
6088 	      strcat (buf, "*");
6089 	    if (negated)
6090 	      strcat (buf, "%S3");
6091 	    else
6092 	      strcat (buf, "%B3");
6093 	    if (nullify)
6094 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6095 	    else
6096 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6097 	  }
6098 	break;
6099 
6100       default:
6101 	/* The reversed conditional branch must branch over one additional
6102 	   instruction if the delay slot is filled and needs to be extracted
6103 	   by output_lbranch.  If the delay slot is empty or this is a
6104 	   nullified forward branch, the instruction after the reversed
6105 	   condition branch must be nullified.  */
6106 	if (dbr_sequence_length () == 0
6107 	    || (nullify && forward_branch_p (insn)))
6108 	  {
6109 	    nullify = 1;
6110 	    xdelay = 0;
6111 	    operands[4] = GEN_INT (length);
6112 	  }
6113 	else
6114 	  {
6115 	    xdelay = 1;
6116 	    operands[4] = GEN_INT (length + 4);
6117 	  }
6118 
6119 	/* Create a reversed conditional branch which branches around
6120 	   the following insns.  */
6121 	if (GET_MODE (operands[1]) != DImode)
6122 	  {
6123 	    if (nullify)
6124 	      {
6125 		if (negated)
6126 		  strcpy (buf,
6127 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6128 		else
6129 		  strcpy (buf,
6130 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6131 	      }
6132 	    else
6133 	      {
6134 		if (negated)
6135 		  strcpy (buf,
6136 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6137 		else
6138 		  strcpy (buf,
6139 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6140 	      }
6141 	  }
6142 	else
6143 	  {
6144 	    if (nullify)
6145 	      {
6146 		if (negated)
6147 		  strcpy (buf,
6148 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6149 		else
6150 		  strcpy (buf,
6151 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6152 	      }
6153 	    else
6154 	      {
6155 		if (negated)
6156 		  strcpy (buf,
6157 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6158 		else
6159 		  strcpy (buf,
6160 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6161 	      }
6162 	  }
6163 
6164 	output_asm_insn (buf, operands);
6165 	return output_lbranch (operands[0], insn, xdelay);
6166     }
6167   return buf;
6168 }
6169 
6170 /* This routine handles output of long unconditional branches that
6171    exceed the maximum range of a simple branch instruction.  Since
6172    we don't have a register available for the branch, we save register
6173    %r1 in the frame marker, load the branch destination DEST into %r1,
6174    execute the branch, and restore %r1 in the delay slot of the branch.
6175 
6176    Since long branches may have an insn in the delay slot and the
6177    delay slot is used to restore %r1, we in general need to extract
6178    this insn and execute it before the branch.  However, to facilitate
6179    use of this function by conditional branches, we also provide an
6180    option to not extract the delay insn so that it will be emitted
6181    after the long branch.  So, if there is an insn in the delay slot,
6182    it is extracted if XDELAY is nonzero.
6183 
6184    The lengths of the various long-branch sequences are 20, 16 and 24
6185    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6186 
6187 const char *
output_lbranch(rtx dest,rtx insn,int xdelay)6188 output_lbranch (rtx dest, rtx insn, int xdelay)
6189 {
6190   rtx xoperands[2];
6191 
6192   xoperands[0] = dest;
6193 
6194   /* First, free up the delay slot.  */
6195   if (xdelay && dbr_sequence_length () != 0)
6196     {
6197       /* We can't handle a jump in the delay slot.  */
6198       gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6199 
6200       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6201 		       optimize, 0, NULL);
6202 
6203       /* Now delete the delay insn.  */
6204       PUT_CODE (NEXT_INSN (insn), NOTE);
6205       NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6206       NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6207     }
6208 
6209   /* Output an insn to save %r1.  The runtime documentation doesn't
6210      specify whether the "Clean Up" slot in the callers frame can
6211      be clobbered by the callee.  It isn't copied by HP's builtin
6212      alloca, so this suggests that it can be clobbered if necessary.
6213      The "Static Link" location is copied by HP builtin alloca, so
6214      we avoid using it.  Using the cleanup slot might be a problem
6215      if we have to interoperate with languages that pass cleanup
6216      information.  However, it should be possible to handle these
6217      situations with GCC's asm feature.
6218 
6219      The "Current RP" slot is reserved for the called procedure, so
6220      we try to use it when we don't have a frame of our own.  It's
6221      rather unlikely that we won't have a frame when we need to emit
6222      a very long branch.
6223 
6224      Really the way to go long term is a register scavenger; goto
6225      the target of the jump and find a register which we can use
6226      as a scratch to hold the value in %r1.  Then, we wouldn't have
6227      to free up the delay slot or clobber a slot that may be needed
6228      for other purposes.  */
6229   if (TARGET_64BIT)
6230     {
6231       if (actual_fsize == 0 && !regs_ever_live[2])
6232 	/* Use the return pointer slot in the frame marker.  */
6233 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6234       else
6235 	/* Use the slot at -40 in the frame marker since HP builtin
6236 	   alloca doesn't copy it.  */
6237 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6238     }
6239   else
6240     {
6241       if (actual_fsize == 0 && !regs_ever_live[2])
6242 	/* Use the return pointer slot in the frame marker.  */
6243 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6244       else
6245 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6246 	   the only other use of this location is for copying a
6247 	   floating point double argument from a floating-point
6248 	   register to two general registers.  The copy is done
6249 	   as an "atomic" operation when outputting a call, so it
6250 	   won't interfere with our using the location here.  */
6251 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6252     }
6253 
6254   if (TARGET_PORTABLE_RUNTIME)
6255     {
6256       output_asm_insn ("ldil L'%0,%%r1", xoperands);
6257       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6258       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6259     }
6260   else if (flag_pic)
6261     {
6262       output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6263       if (TARGET_SOM || !TARGET_GAS)
6264 	{
6265 	  xoperands[1] = gen_label_rtx ();
6266 	  output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6267 	  (*targetm.asm_out.internal_label) (asm_out_file, "L",
6268 					     CODE_LABEL_NUMBER (xoperands[1]));
6269 	  output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6270 	}
6271       else
6272 	{
6273 	  output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6274 	  output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6275 	}
6276       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6277     }
6278   else
6279     /* Now output a very long branch to the original target.  */
6280     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6281 
6282   /* Now restore the value of %r1 in the delay slot.  */
6283   if (TARGET_64BIT)
6284     {
6285       if (actual_fsize == 0 && !regs_ever_live[2])
6286 	return "ldd -16(%%r30),%%r1";
6287       else
6288 	return "ldd -40(%%r30),%%r1";
6289     }
6290   else
6291     {
6292       if (actual_fsize == 0 && !regs_ever_live[2])
6293 	return "ldw -20(%%r30),%%r1";
6294       else
6295 	return "ldw -12(%%r30),%%r1";
6296     }
6297 }
6298 
6299 /* This routine handles all the branch-on-bit conditional branch sequences we
6300    might need to generate.  It handles nullification of delay slots,
6301    varying length branches, negated branches and all combinations of the
6302    above.  it returns the appropriate output template to emit the branch.  */
6303 
6304 const char *
output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx insn,int which)6305 output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6306 {
6307   static char buf[100];
6308   int useskip = 0;
6309   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6310   int length = get_attr_length (insn);
6311   int xdelay;
6312 
6313   /* A conditional branch to the following instruction (e.g. the delay slot) is
6314      asking for a disaster.  I do not think this can happen as this pattern
6315      is only used when optimizing; jump optimization should eliminate the
6316      jump.  But be prepared just in case.  */
6317 
6318   if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6319     return "nop";
6320 
6321   /* If this is a long branch with its delay slot unfilled, set `nullify'
6322      as it can nullify the delay slot and save a nop.  */
6323   if (length == 8 && dbr_sequence_length () == 0)
6324     nullify = 1;
6325 
6326   /* If this is a short forward conditional branch which did not get
6327      its delay slot filled, the delay slot can still be nullified.  */
6328   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6329     nullify = forward_branch_p (insn);
6330 
6331   /* A forward branch over a single nullified insn can be done with a
6332      extrs instruction.  This avoids a single cycle penalty due to
6333      mis-predicted branch if we fall through (branch not taken).  */
6334 
6335   if (length == 4
6336       && next_real_insn (insn) != 0
6337       && get_attr_length (next_real_insn (insn)) == 4
6338       && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6339       && nullify)
6340     useskip = 1;
6341 
6342   switch (length)
6343     {
6344 
6345       /* All short conditional branches except backwards with an unfilled
6346 	 delay slot.  */
6347       case 4:
6348 	if (useskip)
6349 	  strcpy (buf, "{extrs,|extrw,s,}");
6350 	else
6351 	  strcpy (buf, "bb,");
6352 	if (useskip && GET_MODE (operands[0]) == DImode)
6353 	  strcpy (buf, "extrd,s,*");
6354 	else if (GET_MODE (operands[0]) == DImode)
6355 	  strcpy (buf, "bb,*");
6356 	if ((which == 0 && negated)
6357 	     || (which == 1 && ! negated))
6358 	  strcat (buf, ">=");
6359 	else
6360 	  strcat (buf, "<");
6361 	if (useskip)
6362 	  strcat (buf, " %0,%1,1,%%r0");
6363 	else if (nullify && negated)
6364 	  strcat (buf, ",n %0,%1,%3");
6365 	else if (nullify && ! negated)
6366 	  strcat (buf, ",n %0,%1,%2");
6367 	else if (! nullify && negated)
6368 	  strcat (buf, "%0,%1,%3");
6369 	else if (! nullify && ! negated)
6370 	  strcat (buf, " %0,%1,%2");
6371 	break;
6372 
6373      /* All long conditionals.  Note a short backward branch with an
6374 	unfilled delay slot is treated just like a long backward branch
6375 	with an unfilled delay slot.  */
6376       case 8:
6377 	/* Handle weird backwards branch with a filled delay slot
6378 	   which is nullified.  */
6379 	if (dbr_sequence_length () != 0
6380 	    && ! forward_branch_p (insn)
6381 	    && nullify)
6382 	  {
6383 	    strcpy (buf, "bb,");
6384 	    if (GET_MODE (operands[0]) == DImode)
6385 	      strcat (buf, "*");
6386 	    if ((which == 0 && negated)
6387 		|| (which == 1 && ! negated))
6388 	      strcat (buf, "<");
6389 	    else
6390 	      strcat (buf, ">=");
6391 	    if (negated)
6392 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
6393 	    else
6394 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
6395 	  }
6396 	/* Handle short backwards branch with an unfilled delay slot.
6397 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
6398 	   taken and untaken branches.  */
6399 	else if (dbr_sequence_length () == 0
6400 		 && ! forward_branch_p (insn)
6401 		 && INSN_ADDRESSES_SET_P ()
6402 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6403 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6404 	  {
6405 	    strcpy (buf, "bb,");
6406 	    if (GET_MODE (operands[0]) == DImode)
6407 	      strcat (buf, "*");
6408 	    if ((which == 0 && negated)
6409 		|| (which == 1 && ! negated))
6410 	      strcat (buf, ">=");
6411 	    else
6412 	      strcat (buf, "<");
6413 	    if (negated)
6414 	      strcat (buf, " %0,%1,%3%#");
6415 	    else
6416 	      strcat (buf, " %0,%1,%2%#");
6417 	  }
6418 	else
6419 	  {
6420 	    if (GET_MODE (operands[0]) == DImode)
6421 	      strcpy (buf, "extrd,s,*");
6422 	    else
6423 	      strcpy (buf, "{extrs,|extrw,s,}");
6424 	    if ((which == 0 && negated)
6425 		|| (which == 1 && ! negated))
6426 	      strcat (buf, "<");
6427 	    else
6428 	      strcat (buf, ">=");
6429 	    if (nullify && negated)
6430 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6431 	    else if (nullify && ! negated)
6432 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6433 	    else if (negated)
6434 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6435 	    else
6436 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6437 	  }
6438 	break;
6439 
6440       default:
6441 	/* The reversed conditional branch must branch over one additional
6442 	   instruction if the delay slot is filled and needs to be extracted
6443 	   by output_lbranch.  If the delay slot is empty or this is a
6444 	   nullified forward branch, the instruction after the reversed
6445 	   condition branch must be nullified.  */
6446 	if (dbr_sequence_length () == 0
6447 	    || (nullify && forward_branch_p (insn)))
6448 	  {
6449 	    nullify = 1;
6450 	    xdelay = 0;
6451 	    operands[4] = GEN_INT (length);
6452 	  }
6453 	else
6454 	  {
6455 	    xdelay = 1;
6456 	    operands[4] = GEN_INT (length + 4);
6457 	  }
6458 
6459 	if (GET_MODE (operands[0]) == DImode)
6460 	  strcpy (buf, "bb,*");
6461 	else
6462 	  strcpy (buf, "bb,");
6463 	if ((which == 0 && negated)
6464 	    || (which == 1 && !negated))
6465 	  strcat (buf, "<");
6466 	else
6467 	  strcat (buf, ">=");
6468 	if (nullify)
6469 	  strcat (buf, ",n %0,%1,.+%4");
6470 	else
6471 	  strcat (buf, " %0,%1,.+%4");
6472 	output_asm_insn (buf, operands);
6473 	return output_lbranch (negated ? operands[3] : operands[2],
6474 			       insn, xdelay);
6475     }
6476   return buf;
6477 }
6478 
6479 /* This routine handles all the branch-on-variable-bit conditional branch
6480    sequences we might need to generate.  It handles nullification of delay
6481    slots, varying length branches, negated branches and all combinations
6482    of the above.  it returns the appropriate output template to emit the
6483    branch.  */
6484 
6485 const char *
output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx insn,int which)6486 output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6487 {
6488   static char buf[100];
6489   int useskip = 0;
6490   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6491   int length = get_attr_length (insn);
6492   int xdelay;
6493 
6494   /* A conditional branch to the following instruction (e.g. the delay slot) is
6495      asking for a disaster.  I do not think this can happen as this pattern
6496      is only used when optimizing; jump optimization should eliminate the
6497      jump.  But be prepared just in case.  */
6498 
6499   if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6500     return "nop";
6501 
6502   /* If this is a long branch with its delay slot unfilled, set `nullify'
6503      as it can nullify the delay slot and save a nop.  */
6504   if (length == 8 && dbr_sequence_length () == 0)
6505     nullify = 1;
6506 
6507   /* If this is a short forward conditional branch which did not get
6508      its delay slot filled, the delay slot can still be nullified.  */
6509   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6510     nullify = forward_branch_p (insn);
6511 
6512   /* A forward branch over a single nullified insn can be done with a
6513      extrs instruction.  This avoids a single cycle penalty due to
6514      mis-predicted branch if we fall through (branch not taken).  */
6515 
6516   if (length == 4
6517       && next_real_insn (insn) != 0
6518       && get_attr_length (next_real_insn (insn)) == 4
6519       && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6520       && nullify)
6521     useskip = 1;
6522 
6523   switch (length)
6524     {
6525 
6526       /* All short conditional branches except backwards with an unfilled
6527 	 delay slot.  */
6528       case 4:
6529 	if (useskip)
6530 	  strcpy (buf, "{vextrs,|extrw,s,}");
6531 	else
6532 	  strcpy (buf, "{bvb,|bb,}");
6533 	if (useskip && GET_MODE (operands[0]) == DImode)
6534 	  strcpy (buf, "extrd,s,*");
6535 	else if (GET_MODE (operands[0]) == DImode)
6536 	  strcpy (buf, "bb,*");
6537 	if ((which == 0 && negated)
6538 	     || (which == 1 && ! negated))
6539 	  strcat (buf, ">=");
6540 	else
6541 	  strcat (buf, "<");
6542 	if (useskip)
6543 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6544 	else if (nullify && negated)
6545 	  strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6546 	else if (nullify && ! negated)
6547 	  strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6548 	else if (! nullify && negated)
6549 	  strcat (buf, "{%0,%3|%0,%%sar,%3}");
6550 	else if (! nullify && ! negated)
6551 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6552 	break;
6553 
6554      /* All long conditionals.  Note a short backward branch with an
6555 	unfilled delay slot is treated just like a long backward branch
6556 	with an unfilled delay slot.  */
6557       case 8:
6558 	/* Handle weird backwards branch with a filled delay slot
6559 	   which is nullified.  */
6560 	if (dbr_sequence_length () != 0
6561 	    && ! forward_branch_p (insn)
6562 	    && nullify)
6563 	  {
6564 	    strcpy (buf, "{bvb,|bb,}");
6565 	    if (GET_MODE (operands[0]) == DImode)
6566 	      strcat (buf, "*");
6567 	    if ((which == 0 && negated)
6568 		|| (which == 1 && ! negated))
6569 	      strcat (buf, "<");
6570 	    else
6571 	      strcat (buf, ">=");
6572 	    if (negated)
6573 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6574 	    else
6575 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6576 	  }
6577 	/* Handle short backwards branch with an unfilled delay slot.
6578 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
6579 	   taken and untaken branches.  */
6580 	else if (dbr_sequence_length () == 0
6581 		 && ! forward_branch_p (insn)
6582 		 && INSN_ADDRESSES_SET_P ()
6583 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6584 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6585 	  {
6586 	    strcpy (buf, "{bvb,|bb,}");
6587 	    if (GET_MODE (operands[0]) == DImode)
6588 	      strcat (buf, "*");
6589 	    if ((which == 0 && negated)
6590 		|| (which == 1 && ! negated))
6591 	      strcat (buf, ">=");
6592 	    else
6593 	      strcat (buf, "<");
6594 	    if (negated)
6595 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6596 	    else
6597 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6598 	  }
6599 	else
6600 	  {
6601 	    strcpy (buf, "{vextrs,|extrw,s,}");
6602 	    if (GET_MODE (operands[0]) == DImode)
6603 	      strcpy (buf, "extrd,s,*");
6604 	    if ((which == 0 && negated)
6605 		|| (which == 1 && ! negated))
6606 	      strcat (buf, "<");
6607 	    else
6608 	      strcat (buf, ">=");
6609 	    if (nullify && negated)
6610 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6611 	    else if (nullify && ! negated)
6612 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6613 	    else if (negated)
6614 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6615 	    else
6616 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6617 	  }
6618 	break;
6619 
6620       default:
6621 	/* The reversed conditional branch must branch over one additional
6622 	   instruction if the delay slot is filled and needs to be extracted
6623 	   by output_lbranch.  If the delay slot is empty or this is a
6624 	   nullified forward branch, the instruction after the reversed
6625 	   condition branch must be nullified.  */
6626 	if (dbr_sequence_length () == 0
6627 	    || (nullify && forward_branch_p (insn)))
6628 	  {
6629 	    nullify = 1;
6630 	    xdelay = 0;
6631 	    operands[4] = GEN_INT (length);
6632 	  }
6633 	else
6634 	  {
6635 	    xdelay = 1;
6636 	    operands[4] = GEN_INT (length + 4);
6637 	  }
6638 
6639 	if (GET_MODE (operands[0]) == DImode)
6640 	  strcpy (buf, "bb,*");
6641 	else
6642 	  strcpy (buf, "{bvb,|bb,}");
6643 	if ((which == 0 && negated)
6644 	    || (which == 1 && !negated))
6645 	  strcat (buf, "<");
6646 	else
6647 	  strcat (buf, ">=");
6648 	if (nullify)
6649 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6650 	else
6651 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6652 	output_asm_insn (buf, operands);
6653 	return output_lbranch (negated ? operands[3] : operands[2],
6654 			       insn, xdelay);
6655     }
6656   return buf;
6657 }
6658 
6659 /* Return the output template for emitting a dbra type insn.
6660 
6661    Note it may perform some output operations on its own before
6662    returning the final output string.  */
6663 const char *
output_dbra(rtx * operands,rtx insn,int which_alternative)6664 output_dbra (rtx *operands, rtx insn, int which_alternative)
6665 {
6666   int length = get_attr_length (insn);
6667 
6668   /* A conditional branch to the following instruction (e.g. the delay slot) is
6669      asking for a disaster.  Be prepared!  */
6670 
6671   if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6672     {
6673       if (which_alternative == 0)
6674 	return "ldo %1(%0),%0";
6675       else if (which_alternative == 1)
6676 	{
6677 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6678 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
6679 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6680 	  return "{fldws|fldw} -16(%%r30),%0";
6681 	}
6682       else
6683 	{
6684 	  output_asm_insn ("ldw %0,%4", operands);
6685 	  return "ldo %1(%4),%4\n\tstw %4,%0";
6686 	}
6687     }
6688 
6689   if (which_alternative == 0)
6690     {
6691       int nullify = INSN_ANNULLED_BRANCH_P (insn);
6692       int xdelay;
6693 
6694       /* If this is a long branch with its delay slot unfilled, set `nullify'
6695 	 as it can nullify the delay slot and save a nop.  */
6696       if (length == 8 && dbr_sequence_length () == 0)
6697 	nullify = 1;
6698 
6699       /* If this is a short forward conditional branch which did not get
6700 	 its delay slot filled, the delay slot can still be nullified.  */
6701       if (! nullify && length == 4 && dbr_sequence_length () == 0)
6702 	nullify = forward_branch_p (insn);
6703 
6704       switch (length)
6705 	{
6706 	case 4:
6707 	  if (nullify)
6708 	    return "addib,%C2,n %1,%0,%3";
6709 	  else
6710 	    return "addib,%C2 %1,%0,%3";
6711 
6712 	case 8:
6713 	  /* Handle weird backwards branch with a fulled delay slot
6714 	     which is nullified.  */
6715 	  if (dbr_sequence_length () != 0
6716 	      && ! forward_branch_p (insn)
6717 	      && nullify)
6718 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
6719 	  /* Handle short backwards branch with an unfilled delay slot.
6720 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
6721 	     taken and untaken branches.  */
6722 	  else if (dbr_sequence_length () == 0
6723 		   && ! forward_branch_p (insn)
6724 		   && INSN_ADDRESSES_SET_P ()
6725 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6726 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6727 	      return "addib,%C2 %1,%0,%3%#";
6728 
6729 	  /* Handle normal cases.  */
6730 	  if (nullify)
6731 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
6732 	  else
6733 	    return "addi,%N2 %1,%0,%0\n\tb %3";
6734 
6735 	default:
6736 	  /* The reversed conditional branch must branch over one additional
6737 	     instruction if the delay slot is filled and needs to be extracted
6738 	     by output_lbranch.  If the delay slot is empty or this is a
6739 	     nullified forward branch, the instruction after the reversed
6740 	     condition branch must be nullified.  */
6741 	  if (dbr_sequence_length () == 0
6742 	      || (nullify && forward_branch_p (insn)))
6743 	    {
6744 	      nullify = 1;
6745 	      xdelay = 0;
6746 	      operands[4] = GEN_INT (length);
6747 	    }
6748 	  else
6749 	    {
6750 	      xdelay = 1;
6751 	      operands[4] = GEN_INT (length + 4);
6752 	    }
6753 
6754 	  if (nullify)
6755 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6756 	  else
6757 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6758 
6759 	  return output_lbranch (operands[3], insn, xdelay);
6760 	}
6761 
6762     }
6763   /* Deal with gross reload from FP register case.  */
6764   else if (which_alternative == 1)
6765     {
6766       /* Move loop counter from FP register to MEM then into a GR,
6767 	 increment the GR, store the GR into MEM, and finally reload
6768 	 the FP register from MEM from within the branch's delay slot.  */
6769       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6770 		       operands);
6771       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6772       if (length == 24)
6773 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6774       else if (length == 28)
6775 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6776       else
6777 	{
6778 	  operands[5] = GEN_INT (length - 16);
6779 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6780 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6781 	  return output_lbranch (operands[3], insn, 0);
6782 	}
6783     }
6784   /* Deal with gross reload from memory case.  */
6785   else
6786     {
6787       /* Reload loop counter from memory, the store back to memory
6788 	 happens in the branch's delay slot.  */
6789       output_asm_insn ("ldw %0,%4", operands);
6790       if (length == 12)
6791 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6792       else if (length == 16)
6793 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6794       else
6795 	{
6796 	  operands[5] = GEN_INT (length - 4);
6797 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
6798 	  return output_lbranch (operands[3], insn, 0);
6799 	}
6800     }
6801 }
6802 
6803 /* Return the output template for emitting a movb type insn.
6804 
6805    Note it may perform some output operations on its own before
6806    returning the final output string.  */
6807 const char *
output_movb(rtx * operands,rtx insn,int which_alternative,int reverse_comparison)6808 output_movb (rtx *operands, rtx insn, int which_alternative,
6809 	     int reverse_comparison)
6810 {
6811   int length = get_attr_length (insn);
6812 
6813   /* A conditional branch to the following instruction (e.g. the delay slot) is
6814      asking for a disaster.  Be prepared!  */
6815 
6816   if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6817     {
6818       if (which_alternative == 0)
6819 	return "copy %1,%0";
6820       else if (which_alternative == 1)
6821 	{
6822 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
6823 	  return "{fldws|fldw} -16(%%r30),%0";
6824 	}
6825       else if (which_alternative == 2)
6826 	return "stw %1,%0";
6827       else
6828 	return "mtsar %r1";
6829     }
6830 
6831   /* Support the second variant.  */
6832   if (reverse_comparison)
6833     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6834 
6835   if (which_alternative == 0)
6836     {
6837       int nullify = INSN_ANNULLED_BRANCH_P (insn);
6838       int xdelay;
6839 
6840       /* If this is a long branch with its delay slot unfilled, set `nullify'
6841 	 as it can nullify the delay slot and save a nop.  */
6842       if (length == 8 && dbr_sequence_length () == 0)
6843 	nullify = 1;
6844 
6845       /* If this is a short forward conditional branch which did not get
6846 	 its delay slot filled, the delay slot can still be nullified.  */
6847       if (! nullify && length == 4 && dbr_sequence_length () == 0)
6848 	nullify = forward_branch_p (insn);
6849 
6850       switch (length)
6851 	{
6852 	case 4:
6853 	  if (nullify)
6854 	    return "movb,%C2,n %1,%0,%3";
6855 	  else
6856 	    return "movb,%C2 %1,%0,%3";
6857 
6858 	case 8:
6859 	  /* Handle weird backwards branch with a filled delay slot
6860 	     which is nullified.  */
6861 	  if (dbr_sequence_length () != 0
6862 	      && ! forward_branch_p (insn)
6863 	      && nullify)
6864 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
6865 
6866 	  /* Handle short backwards branch with an unfilled delay slot.
6867 	     Using a movb;nop rather than or;bl saves 1 cycle for both
6868 	     taken and untaken branches.  */
6869 	  else if (dbr_sequence_length () == 0
6870 		   && ! forward_branch_p (insn)
6871 		   && INSN_ADDRESSES_SET_P ()
6872 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6873 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6874 	    return "movb,%C2 %1,%0,%3%#";
6875 	  /* Handle normal cases.  */
6876 	  if (nullify)
6877 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6878 	  else
6879 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
6880 
6881 	default:
6882 	  /* The reversed conditional branch must branch over one additional
6883 	     instruction if the delay slot is filled and needs to be extracted
6884 	     by output_lbranch.  If the delay slot is empty or this is a
6885 	     nullified forward branch, the instruction after the reversed
6886 	     condition branch must be nullified.  */
6887 	  if (dbr_sequence_length () == 0
6888 	      || (nullify && forward_branch_p (insn)))
6889 	    {
6890 	      nullify = 1;
6891 	      xdelay = 0;
6892 	      operands[4] = GEN_INT (length);
6893 	    }
6894 	  else
6895 	    {
6896 	      xdelay = 1;
6897 	      operands[4] = GEN_INT (length + 4);
6898 	    }
6899 
6900 	  if (nullify)
6901 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
6902 	  else
6903 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
6904 
6905 	  return output_lbranch (operands[3], insn, xdelay);
6906 	}
6907     }
6908   /* Deal with gross reload for FP destination register case.  */
6909   else if (which_alternative == 1)
6910     {
6911       /* Move source register to MEM, perform the branch test, then
6912 	 finally load the FP register from MEM from within the branch's
6913 	 delay slot.  */
6914       output_asm_insn ("stw %1,-16(%%r30)", operands);
6915       if (length == 12)
6916 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6917       else if (length == 16)
6918 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6919       else
6920 	{
6921 	  operands[4] = GEN_INT (length - 4);
6922 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
6923 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6924 	  return output_lbranch (operands[3], insn, 0);
6925 	}
6926     }
6927   /* Deal with gross reload from memory case.  */
6928   else if (which_alternative == 2)
6929     {
6930       /* Reload loop counter from memory, the store back to memory
6931 	 happens in the branch's delay slot.  */
6932       if (length == 8)
6933 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6934       else if (length == 12)
6935 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6936       else
6937 	{
6938 	  operands[4] = GEN_INT (length);
6939 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
6940 			   operands);
6941 	  return output_lbranch (operands[3], insn, 0);
6942 	}
6943     }
6944   /* Handle SAR as a destination.  */
6945   else
6946     {
6947       if (length == 8)
6948 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6949       else if (length == 12)
6950 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
6951       else
6952 	{
6953 	  operands[4] = GEN_INT (length);
6954 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
6955 			   operands);
6956 	  return output_lbranch (operands[3], insn, 0);
6957 	}
6958     }
6959 }
6960 
6961 /* Copy any FP arguments in INSN into integer registers.  */
6962 static void
copy_fp_args(rtx insn)6963 copy_fp_args (rtx insn)
6964 {
6965   rtx link;
6966   rtx xoperands[2];
6967 
6968   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6969     {
6970       int arg_mode, regno;
6971       rtx use = XEXP (link, 0);
6972 
6973       if (! (GET_CODE (use) == USE
6974 	  && GET_CODE (XEXP (use, 0)) == REG
6975 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6976 	continue;
6977 
6978       arg_mode = GET_MODE (XEXP (use, 0));
6979       regno = REGNO (XEXP (use, 0));
6980 
6981       /* Is it a floating point register?  */
6982       if (regno >= 32 && regno <= 39)
6983 	{
6984 	  /* Copy the FP register into an integer register via memory.  */
6985 	  if (arg_mode == SFmode)
6986 	    {
6987 	      xoperands[0] = XEXP (use, 0);
6988 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6989 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6990 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6991 	    }
6992 	  else
6993 	    {
6994 	      xoperands[0] = XEXP (use, 0);
6995 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6996 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6997 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6998 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6999 	    }
7000 	}
7001     }
7002 }
7003 
7004 /* Compute length of the FP argument copy sequence for INSN.  */
7005 static int
length_fp_args(rtx insn)7006 length_fp_args (rtx insn)
7007 {
7008   int length = 0;
7009   rtx link;
7010 
7011   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7012     {
7013       int arg_mode, regno;
7014       rtx use = XEXP (link, 0);
7015 
7016       if (! (GET_CODE (use) == USE
7017 	  && GET_CODE (XEXP (use, 0)) == REG
7018 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7019 	continue;
7020 
7021       arg_mode = GET_MODE (XEXP (use, 0));
7022       regno = REGNO (XEXP (use, 0));
7023 
7024       /* Is it a floating point register?  */
7025       if (regno >= 32 && regno <= 39)
7026 	{
7027 	  if (arg_mode == SFmode)
7028 	    length += 8;
7029 	  else
7030 	    length += 12;
7031 	}
7032     }
7033 
7034   return length;
7035 }
7036 
7037 /* Return the attribute length for the millicode call instruction INSN.
7038    The length must match the code generated by output_millicode_call.
7039    We include the delay slot in the returned length as it is better to
7040    over estimate the length than to under estimate it.  */
7041 
7042 int
attr_length_millicode_call(rtx insn)7043 attr_length_millicode_call (rtx insn)
7044 {
7045   unsigned long distance = -1;
7046   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7047 
7048   if (INSN_ADDRESSES_SET_P ())
7049     {
7050       distance = (total + insn_current_reference_address (insn));
7051       if (distance < total)
7052 	distance = -1;
7053     }
7054 
7055   if (TARGET_64BIT)
7056     {
7057       if (!TARGET_LONG_CALLS && distance < 7600000)
7058 	return 8;
7059 
7060       return 20;
7061     }
7062   else if (TARGET_PORTABLE_RUNTIME)
7063     return 24;
7064   else
7065     {
7066       if (!TARGET_LONG_CALLS && distance < 240000)
7067 	return 8;
7068 
7069       if (TARGET_LONG_ABS_CALL && !flag_pic)
7070 	return 12;
7071 
7072       return 24;
7073     }
7074 }
7075 
7076 /* INSN is a function call.  It may have an unconditional jump
7077    in its delay slot.
7078 
7079    CALL_DEST is the routine we are calling.  */
7080 
7081 const char *
output_millicode_call(rtx insn,rtx call_dest)7082 output_millicode_call (rtx insn, rtx call_dest)
7083 {
7084   int attr_length = get_attr_length (insn);
7085   int seq_length = dbr_sequence_length ();
7086   int distance;
7087   rtx seq_insn;
7088   rtx xoperands[3];
7089 
7090   xoperands[0] = call_dest;
7091   xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7092 
7093   /* Handle the common case where we are sure that the branch will
7094      reach the beginning of the $CODE$ subspace.  The within reach
7095      form of the $$sh_func_adrs call has a length of 28.  Because
7096      it has an attribute type of multi, it never has a nonzero
7097      sequence length.  The length of the $$sh_func_adrs is the same
7098      as certain out of reach PIC calls to other routines.  */
7099   if (!TARGET_LONG_CALLS
7100       && ((seq_length == 0
7101 	   && (attr_length == 12
7102 	       || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7103 	  || (seq_length != 0 && attr_length == 8)))
7104     {
7105       output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7106     }
7107   else
7108     {
7109       if (TARGET_64BIT)
7110 	{
7111 	  /* It might seem that one insn could be saved by accessing
7112 	     the millicode function using the linkage table.  However,
7113 	     this doesn't work in shared libraries and other dynamically
7114 	     loaded objects.  Using a pc-relative sequence also avoids
7115 	     problems related to the implicit use of the gp register.  */
7116 	  output_asm_insn ("b,l .+8,%%r1", xoperands);
7117 
7118 	  if (TARGET_GAS)
7119 	    {
7120 	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7121 	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7122 	    }
7123 	  else
7124 	    {
7125 	      xoperands[1] = gen_label_rtx ();
7126 	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7127 	      (*targetm.asm_out.internal_label) (asm_out_file, "L",
7128 					 CODE_LABEL_NUMBER (xoperands[1]));
7129 	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7130 	    }
7131 
7132 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7133 	}
7134       else if (TARGET_PORTABLE_RUNTIME)
7135 	{
7136 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7137 	     have PIC support in the assembler/linker, so this sequence
7138 	     is needed.  */
7139 
7140 	  /* Get the address of our target into %r1.  */
7141 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7142 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7143 
7144 	  /* Get our return address into %r31.  */
7145 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7146 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7147 
7148 	  /* Jump to our target address in %r1.  */
7149 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7150 	}
7151       else if (!flag_pic)
7152 	{
7153 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7154 	  if (TARGET_PA_20)
7155 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7156 	  else
7157 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7158 	}
7159       else
7160 	{
7161 	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7162 	  output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7163 
7164 	  if (TARGET_SOM || !TARGET_GAS)
7165 	    {
7166 	      /* The HP assembler can generate relocations for the
7167 		 difference of two symbols.  GAS can do this for a
7168 		 millicode symbol but not an arbitrary external
7169 		 symbol when generating SOM output.  */
7170 	      xoperands[1] = gen_label_rtx ();
7171 	      (*targetm.asm_out.internal_label) (asm_out_file, "L",
7172 					 CODE_LABEL_NUMBER (xoperands[1]));
7173 	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7174 	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7175 	    }
7176 	  else
7177 	    {
7178 	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7179 	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7180 			       xoperands);
7181 	    }
7182 
7183 	  /* Jump to our target address in %r1.  */
7184 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7185 	}
7186     }
7187 
7188   if (seq_length == 0)
7189     output_asm_insn ("nop", xoperands);
7190 
7191   /* We are done if there isn't a jump in the delay slot.  */
7192   if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7193     return "";
7194 
7195   /* This call has an unconditional jump in its delay slot.  */
7196   xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7197 
7198   /* See if the return address can be adjusted.  Use the containing
7199      sequence insn's address.  */
7200   if (INSN_ADDRESSES_SET_P ())
7201     {
7202       seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7203       distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7204 		  - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7205 
7206       if (VAL_14_BITS_P (distance))
7207 	{
7208 	  xoperands[1] = gen_label_rtx ();
7209 	  output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7210 	  (*targetm.asm_out.internal_label) (asm_out_file, "L",
7211 					     CODE_LABEL_NUMBER (xoperands[1]));
7212 	}
7213       else
7214 	/* ??? This branch may not reach its target.  */
7215 	output_asm_insn ("nop\n\tb,n %0", xoperands);
7216     }
7217   else
7218     /* ??? This branch may not reach its target.  */
7219     output_asm_insn ("nop\n\tb,n %0", xoperands);
7220 
7221   /* Delete the jump.  */
7222   PUT_CODE (NEXT_INSN (insn), NOTE);
7223   NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7224   NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7225 
7226   return "";
7227 }
7228 
7229 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7230    flag indicates whether INSN is a regular call or a sibling call.  The
7231    length returned must be longer than the code actually generated by
7232    output_call.  Since branch shortening is done before delay branch
7233    sequencing, there is no way to determine whether or not the delay
7234    slot will be filled during branch shortening.  Even when the delay
7235    slot is filled, we may have to add a nop if the delay slot contains
7236    a branch that can't reach its target.  Thus, we always have to include
7237    the delay slot in the length estimate.  This used to be done in
7238    pa_adjust_insn_length but we do it here now as some sequences always
7239    fill the delay slot and we can save four bytes in the estimate for
7240    these sequences.  */
7241 
7242 int
attr_length_call(rtx insn,int sibcall)7243 attr_length_call (rtx insn, int sibcall)
7244 {
7245   int local_call;
7246   rtx call_dest;
7247   tree call_decl;
7248   int length = 0;
7249   rtx pat = PATTERN (insn);
7250   unsigned long distance = -1;
7251 
7252   if (INSN_ADDRESSES_SET_P ())
7253     {
7254       unsigned long total;
7255 
7256       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7257       distance = (total + insn_current_reference_address (insn));
7258       if (distance < total)
7259 	distance = -1;
7260     }
7261 
7262   /* Determine if this is a local call.  */
7263   if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7264     call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7265   else
7266     call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7267 
7268   call_decl = SYMBOL_REF_DECL (call_dest);
7269   local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7270 
7271   /* pc-relative branch.  */
7272   if (!TARGET_LONG_CALLS
7273       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7274 	  || distance < 240000))
7275     length += 8;
7276 
7277   /* 64-bit plabel sequence.  */
7278   else if (TARGET_64BIT && !local_call)
7279     length += sibcall ? 28 : 24;
7280 
7281   /* non-pic long absolute branch sequence.  */
7282   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7283     length += 12;
7284 
7285   /* long pc-relative branch sequence.  */
7286   else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7287 	   || (TARGET_64BIT && !TARGET_GAS)
7288 	   || (TARGET_GAS && !TARGET_SOM
7289 	       && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7290     {
7291       length += 20;
7292 
7293       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7294 	length += 8;
7295     }
7296 
7297   /* 32-bit plabel sequence.  */
7298   else
7299     {
7300       length += 32;
7301 
7302       if (TARGET_SOM)
7303 	length += length_fp_args (insn);
7304 
7305       if (flag_pic)
7306 	length += 4;
7307 
7308       if (!TARGET_PA_20)
7309 	{
7310 	  if (!sibcall)
7311 	    length += 8;
7312 
7313 	  if (!TARGET_NO_SPACE_REGS)
7314 	    length += 8;
7315 	}
7316     }
7317 
7318   return length;
7319 }
7320 
7321 /* INSN is a function call.  It may have an unconditional jump
7322    in its delay slot.
7323 
7324    CALL_DEST is the routine we are calling.  */
7325 
7326 const char *
output_call(rtx insn,rtx call_dest,int sibcall)7327 output_call (rtx insn, rtx call_dest, int sibcall)
7328 {
7329   int delay_insn_deleted = 0;
7330   int delay_slot_filled = 0;
7331   int seq_length = dbr_sequence_length ();
7332   tree call_decl = SYMBOL_REF_DECL (call_dest);
7333   int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7334   rtx xoperands[2];
7335 
7336   xoperands[0] = call_dest;
7337 
7338   /* Handle the common case where we're sure that the branch will reach
7339      the beginning of the "$CODE$" subspace.  This is the beginning of
7340      the current function if we are in a named section.  */
7341   if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7342     {
7343       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7344       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7345     }
7346   else
7347     {
7348       if (TARGET_64BIT && !local_call)
7349 	{
7350 	  /* ??? As far as I can tell, the HP linker doesn't support the
7351 	     long pc-relative sequence described in the 64-bit runtime
7352 	     architecture.  So, we use a slightly longer indirect call.  */
7353 	  xoperands[0] = get_deferred_plabel (call_dest);
7354 	  xoperands[1] = gen_label_rtx ();
7355 
7356 	  /* If this isn't a sibcall, we put the load of %r27 into the
7357 	     delay slot.  We can't do this in a sibcall as we don't
7358 	     have a second call-clobbered scratch register available.  */
7359 	  if (seq_length != 0
7360 	      && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7361 	      && !sibcall)
7362 	    {
7363 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
7364 			       optimize, 0, NULL);
7365 
7366 	      /* Now delete the delay insn.  */
7367 	      PUT_CODE (NEXT_INSN (insn), NOTE);
7368 	      NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7369 	      NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7370 	      delay_insn_deleted = 1;
7371 	    }
7372 
7373 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
7374 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7375 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7376 
7377 	  if (sibcall)
7378 	    {
7379 	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7380 	      output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7381 	      output_asm_insn ("bve (%%r1)", xoperands);
7382 	    }
7383 	  else
7384 	    {
7385 	      output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7386 	      output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7387 	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7388 	      delay_slot_filled = 1;
7389 	    }
7390 	}
7391       else
7392 	{
7393 	  int indirect_call = 0;
7394 
7395 	  /* Emit a long call.  There are several different sequences
7396 	     of increasing length and complexity.  In most cases,
7397              they don't allow an instruction in the delay slot.  */
7398 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7399 	      && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7400 	      && !(TARGET_GAS && !TARGET_SOM
7401 		   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7402 	      && !TARGET_64BIT)
7403 	    indirect_call = 1;
7404 
7405 	  if (seq_length != 0
7406 	      && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7407 	      && !sibcall
7408 	      && (!TARGET_PA_20 || indirect_call))
7409 	    {
7410 	      /* A non-jump insn in the delay slot.  By definition we can
7411 		 emit this insn before the call (and in fact before argument
7412 		 relocating.  */
7413 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7414 			       NULL);
7415 
7416 	      /* Now delete the delay insn.  */
7417 	      PUT_CODE (NEXT_INSN (insn), NOTE);
7418 	      NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7419 	      NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7420 	      delay_insn_deleted = 1;
7421 	    }
7422 
7423 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7424 	    {
7425 	      /* This is the best sequence for making long calls in
7426 		 non-pic code.  Unfortunately, GNU ld doesn't provide
7427 		 the stub needed for external calls, and GAS's support
7428 		 for this with the SOM linker is buggy.  It is safe
7429 		 to use this for local calls.  */
7430 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7431 	      if (sibcall)
7432 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7433 	      else
7434 		{
7435 		  if (TARGET_PA_20)
7436 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7437 				     xoperands);
7438 		  else
7439 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7440 
7441 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
7442 		  delay_slot_filled = 1;
7443 		}
7444 	    }
7445 	  else
7446 	    {
7447 	      if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7448 		  || (TARGET_64BIT && !TARGET_GAS))
7449 		{
7450 		  /* The HP assembler and linker can handle relocations
7451 		     for the difference of two symbols.  GAS and the HP
7452 		     linker can't do this when one of the symbols is
7453 		     external.  */
7454 		  xoperands[1] = gen_label_rtx ();
7455 		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7456 		  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7457 		  (*targetm.asm_out.internal_label) (asm_out_file, "L",
7458 					     CODE_LABEL_NUMBER (xoperands[1]));
7459 		  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7460 		}
7461 	      else if (TARGET_GAS && !TARGET_SOM
7462 		       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7463 		{
7464 		  /*  GAS currently can't generate the relocations that
7465 		      are needed for the SOM linker under HP-UX using this
7466 		      sequence.  The GNU linker doesn't generate the stubs
7467 		      that are needed for external calls on TARGET_ELF32
7468 		      with this sequence.  For now, we have to use a
7469 		      longer plabel sequence when using GAS.  */
7470 		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7471 		  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7472 				   xoperands);
7473 		  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7474 				   xoperands);
7475 		}
7476 	      else
7477 		{
7478 		  /* Emit a long plabel-based call sequence.  This is
7479 		     essentially an inline implementation of $$dyncall.
7480 		     We don't actually try to call $$dyncall as this is
7481 		     as difficult as calling the function itself.  */
7482 		  xoperands[0] = get_deferred_plabel (call_dest);
7483 		  xoperands[1] = gen_label_rtx ();
7484 
7485 		  /* Since the call is indirect, FP arguments in registers
7486 		     need to be copied to the general registers.  Then, the
7487 		     argument relocation stub will copy them back.  */
7488 		  if (TARGET_SOM)
7489 		    copy_fp_args (insn);
7490 
7491 		  if (flag_pic)
7492 		    {
7493 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
7494 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7495 		      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7496 		    }
7497 		  else
7498 		    {
7499 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
7500 				       xoperands);
7501 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7502 				       xoperands);
7503 		    }
7504 
7505 		  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7506 		  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7507 		  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7508 		  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7509 
7510 		  if (!sibcall && !TARGET_PA_20)
7511 		    {
7512 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7513 		      if (TARGET_NO_SPACE_REGS)
7514 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7515 		      else
7516 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7517 		    }
7518 		}
7519 
7520 	      if (TARGET_PA_20)
7521 		{
7522 		  if (sibcall)
7523 		    output_asm_insn ("bve (%%r1)", xoperands);
7524 		  else
7525 		    {
7526 		      if (indirect_call)
7527 			{
7528 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7529 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7530 			  delay_slot_filled = 1;
7531 			}
7532 		      else
7533 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7534 		    }
7535 		}
7536 	      else
7537 		{
7538 		  if (!TARGET_NO_SPACE_REGS)
7539 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7540 				     xoperands);
7541 
7542 		  if (sibcall)
7543 		    {
7544 		      if (TARGET_NO_SPACE_REGS)
7545 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7546 		      else
7547 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7548 		    }
7549 		  else
7550 		    {
7551 		      if (TARGET_NO_SPACE_REGS)
7552 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7553 		      else
7554 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7555 
7556 		      if (indirect_call)
7557 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7558 		      else
7559 			output_asm_insn ("copy %%r31,%%r2", xoperands);
7560 		      delay_slot_filled = 1;
7561 		    }
7562 		}
7563 	    }
7564 	}
7565     }
7566 
7567   if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7568     output_asm_insn ("nop", xoperands);
7569 
7570   /* We are done if there isn't a jump in the delay slot.  */
7571   if (seq_length == 0
7572       || delay_insn_deleted
7573       || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7574     return "";
7575 
7576   /* A sibcall should never have a branch in the delay slot.  */
7577   gcc_assert (!sibcall);
7578 
7579   /* This call has an unconditional jump in its delay slot.  */
7580   xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7581 
7582   if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7583     {
7584       /* See if the return address can be adjusted.  Use the containing
7585          sequence insn's address.  */
7586       rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7587       int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7588 		      - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7589 
7590       if (VAL_14_BITS_P (distance))
7591 	{
7592 	  xoperands[1] = gen_label_rtx ();
7593 	  output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7594 	  (*targetm.asm_out.internal_label) (asm_out_file, "L",
7595 					     CODE_LABEL_NUMBER (xoperands[1]));
7596 	}
7597       else
7598 	output_asm_insn ("nop\n\tb,n %0", xoperands);
7599     }
7600   else
7601     output_asm_insn ("b,n %0", xoperands);
7602 
7603   /* Delete the jump.  */
7604   PUT_CODE (NEXT_INSN (insn), NOTE);
7605   NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7606   NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7607 
7608   return "";
7609 }
7610 
7611 /* Return the attribute length of the indirect call instruction INSN.
7612    The length must match the code generated by output_indirect call.
7613    The returned length includes the delay slot.  Currently, the delay
7614    slot of an indirect call sequence is not exposed and it is used by
7615    the sequence itself.  */
7616 
7617 int
attr_length_indirect_call(rtx insn)7618 attr_length_indirect_call (rtx insn)
7619 {
7620   unsigned long distance = -1;
7621   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7622 
7623   if (INSN_ADDRESSES_SET_P ())
7624     {
7625       distance = (total + insn_current_reference_address (insn));
7626       if (distance < total)
7627 	distance = -1;
7628     }
7629 
7630   if (TARGET_64BIT)
7631     return 12;
7632 
7633   if (TARGET_FAST_INDIRECT_CALLS
7634       || (!TARGET_PORTABLE_RUNTIME
7635 	  && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7636 	      || distance < 240000)))
7637     return 8;
7638 
7639   if (flag_pic)
7640     return 24;
7641 
7642   if (TARGET_PORTABLE_RUNTIME)
7643     return 20;
7644 
7645   /* Out of reach, can use ble.  */
7646   return 12;
7647 }
7648 
7649 const char *
output_indirect_call(rtx insn,rtx call_dest)7650 output_indirect_call (rtx insn, rtx call_dest)
7651 {
7652   rtx xoperands[1];
7653 
7654   if (TARGET_64BIT)
7655     {
7656       xoperands[0] = call_dest;
7657       output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7658       output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7659       return "";
7660     }
7661 
7662   /* First the special case for kernels, level 0 systems, etc.  */
7663   if (TARGET_FAST_INDIRECT_CALLS)
7664     return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7665 
7666   /* Now the normal case -- we can reach $$dyncall directly or
7667      we're sure that we can get there via a long-branch stub.
7668 
7669      No need to check target flags as the length uniquely identifies
7670      the remaining cases.  */
7671   if (attr_length_indirect_call (insn) == 8)
7672     {
7673       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7674 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
7675 	 variant of the B,L instruction can't be used on the SOM target.  */
7676       if (TARGET_PA_20 && !TARGET_SOM)
7677 	return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7678       else
7679 	return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7680     }
7681 
7682   /* Long millicode call, but we are not generating PIC or portable runtime
7683      code.  */
7684   if (attr_length_indirect_call (insn) == 12)
7685     return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7686 
7687   /* Long millicode call for portable runtime.  */
7688   if (attr_length_indirect_call (insn) == 20)
7689     return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7690 
7691   /* We need a long PIC call to $$dyncall.  */
7692   xoperands[0] = NULL_RTX;
7693   output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7694   if (TARGET_SOM || !TARGET_GAS)
7695     {
7696       xoperands[0] = gen_label_rtx ();
7697       output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7698       (*targetm.asm_out.internal_label) (asm_out_file, "L",
7699 					 CODE_LABEL_NUMBER (xoperands[0]));
7700       output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7701     }
7702   else
7703     {
7704       output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7705       output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7706 		       xoperands);
7707     }
7708   output_asm_insn ("blr %%r0,%%r2", xoperands);
7709   output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7710   return "";
7711 }
7712 
7713 /* Return the total length of the save and restore instructions needed for
7714    the data linkage table pointer (i.e., the PIC register) across the call
7715    instruction INSN.  No-return calls do not require a save and restore.
7716    In addition, we may be able to avoid the save and restore for calls
7717    within the same translation unit.  */
7718 
7719 int
attr_length_save_restore_dltp(rtx insn)7720 attr_length_save_restore_dltp (rtx insn)
7721 {
7722   if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7723     return 0;
7724 
7725   return 8;
7726 }
7727 
7728 /* In HPUX 8.0's shared library scheme, special relocations are needed
7729    for function labels if they might be passed to a function
7730    in a shared library (because shared libraries don't live in code
7731    space), and special magic is needed to construct their address.  */
7732 
7733 void
hppa_encode_label(rtx sym)7734 hppa_encode_label (rtx sym)
7735 {
7736   const char *str = XSTR (sym, 0);
7737   int len = strlen (str) + 1;
7738   char *newstr, *p;
7739 
7740   p = newstr = alloca (len + 1);
7741   *p++ = '@';
7742   strcpy (p, str);
7743 
7744   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7745 }
7746 
7747 static void
pa_encode_section_info(tree decl,rtx rtl,int first)7748 pa_encode_section_info (tree decl, rtx rtl, int first)
7749 {
7750   default_encode_section_info (decl, rtl, first);
7751 
7752   if (first && TEXT_SPACE_P (decl))
7753     {
7754       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7755       if (TREE_CODE (decl) == FUNCTION_DECL)
7756 	hppa_encode_label (XEXP (rtl, 0));
7757     }
7758 }
7759 
7760 /* This is sort of inverse to pa_encode_section_info.  */
7761 
7762 static const char *
pa_strip_name_encoding(const char * str)7763 pa_strip_name_encoding (const char *str)
7764 {
7765   str += (*str == '@');
7766   str += (*str == '*');
7767   return str;
7768 }
7769 
7770 int
function_label_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7771 function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7772 {
7773   return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7774 }
7775 
7776 /* Returns 1 if OP is a function label involved in a simple addition
7777    with a constant.  Used to keep certain patterns from matching
7778    during instruction combination.  */
7779 int
is_function_label_plus_const(rtx op)7780 is_function_label_plus_const (rtx op)
7781 {
7782   /* Strip off any CONST.  */
7783   if (GET_CODE (op) == CONST)
7784     op = XEXP (op, 0);
7785 
7786   return (GET_CODE (op) == PLUS
7787 	  && function_label_operand (XEXP (op, 0), Pmode)
7788 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
7789 }
7790 
7791 /* Output assembly code for a thunk to FUNCTION.  */
7792 
7793 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,tree function)7794 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7795 			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7796 			tree function)
7797 {
7798   static unsigned int current_thunk_number;
7799   int val_14 = VAL_14_BITS_P (delta);
7800   int nbytes = 0;
7801   char label[16];
7802   rtx xoperands[4];
7803 
7804   xoperands[0] = XEXP (DECL_RTL (function), 0);
7805   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
7806   xoperands[2] = GEN_INT (delta);
7807 
7808   ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
7809   fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7810 
7811   /* Output the thunk.  We know that the function is in the same
7812      translation unit (i.e., the same space) as the thunk, and that
7813      thunks are output after their method.  Thus, we don't need an
7814      external branch to reach the function.  With SOM and GAS,
7815      functions and thunks are effectively in different sections.
7816      Thus, we can always use a IA-relative branch and the linker
7817      will add a long branch stub if necessary.
7818 
7819      However, we have to be careful when generating PIC code on the
7820      SOM port to ensure that the sequence does not transfer to an
7821      import stub for the target function as this could clobber the
7822      return value saved at SP-24.  This would also apply to the
7823      32-bit linux port if the multi-space model is implemented.  */
7824   if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7825        && !(flag_pic && TREE_PUBLIC (function))
7826        && (TARGET_GAS || last_address < 262132))
7827       || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7828 	  && ((targetm.have_named_sections
7829 	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
7830 	       /* The GNU 64-bit linker has rather poor stub management.
7831 		  So, we use a long branch from thunks that aren't in
7832 		  the same section as the target function.  */
7833 	       && ((!TARGET_64BIT
7834 		    && (DECL_SECTION_NAME (thunk_fndecl)
7835 			!= DECL_SECTION_NAME (function)))
7836 		   || ((DECL_SECTION_NAME (thunk_fndecl)
7837 			== DECL_SECTION_NAME (function))
7838 		       && last_address < 262132)))
7839 	      || (!targetm.have_named_sections && last_address < 262132))))
7840     {
7841       if (!val_14)
7842 	output_asm_insn ("addil L'%2,%%r26", xoperands);
7843 
7844       output_asm_insn ("b %0", xoperands);
7845 
7846       if (val_14)
7847 	{
7848 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7849 	  nbytes += 8;
7850 	}
7851       else
7852 	{
7853 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7854 	  nbytes += 12;
7855 	}
7856     }
7857   else if (TARGET_64BIT)
7858     {
7859       /* We only have one call-clobbered scratch register, so we can't
7860          make use of the delay slot if delta doesn't fit in 14 bits.  */
7861       if (!val_14)
7862 	{
7863 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
7864 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7865 	}
7866 
7867       output_asm_insn ("b,l .+8,%%r1", xoperands);
7868 
7869       if (TARGET_GAS)
7870 	{
7871 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7872 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7873 	}
7874       else
7875 	{
7876 	  xoperands[3] = GEN_INT (val_14 ? 8 : 16);
7877 	  output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
7878 	}
7879 
7880       if (val_14)
7881 	{
7882 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7883 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7884 	  nbytes += 20;
7885 	}
7886       else
7887 	{
7888 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
7889 	  nbytes += 24;
7890 	}
7891     }
7892   else if (TARGET_PORTABLE_RUNTIME)
7893     {
7894       output_asm_insn ("ldil L'%0,%%r1", xoperands);
7895       output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
7896 
7897       if (!val_14)
7898 	output_asm_insn ("addil L'%2,%%r26", xoperands);
7899 
7900       output_asm_insn ("bv %%r0(%%r22)", xoperands);
7901 
7902       if (val_14)
7903 	{
7904 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7905 	  nbytes += 16;
7906 	}
7907       else
7908 	{
7909 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7910 	  nbytes += 20;
7911 	}
7912     }
7913   else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7914     {
7915       /* The function is accessible from outside this module.  The only
7916 	 way to avoid an import stub between the thunk and function is to
7917 	 call the function directly with an indirect sequence similar to
7918 	 that used by $$dyncall.  This is possible because $$dyncall acts
7919 	 as the import stub in an indirect call.  */
7920       ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7921       xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
7922       output_asm_insn ("addil LT'%3,%%r19", xoperands);
7923       output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
7924       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7925       output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
7926       output_asm_insn ("depi 0,31,2,%%r22", xoperands);
7927       output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
7928       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7929 
7930       if (!val_14)
7931 	{
7932 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
7933 	  nbytes += 4;
7934 	}
7935 
7936       if (TARGET_PA_20)
7937 	{
7938 	  output_asm_insn ("bve (%%r22)", xoperands);
7939 	  nbytes += 36;
7940 	}
7941       else if (TARGET_NO_SPACE_REGS)
7942 	{
7943 	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
7944 	  nbytes += 36;
7945 	}
7946       else
7947 	{
7948 	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
7949 	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
7950 	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
7951 	  nbytes += 44;
7952 	}
7953 
7954       if (val_14)
7955 	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7956       else
7957 	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7958     }
7959   else if (flag_pic)
7960     {
7961       output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7962 
7963       if (TARGET_SOM || !TARGET_GAS)
7964 	{
7965 	  output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
7966 	  output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
7967 	}
7968       else
7969 	{
7970 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7971 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
7972 	}
7973 
7974       if (!val_14)
7975 	output_asm_insn ("addil L'%2,%%r26", xoperands);
7976 
7977       output_asm_insn ("bv %%r0(%%r22)", xoperands);
7978 
7979       if (val_14)
7980 	{
7981 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7982 	  nbytes += 20;
7983 	}
7984       else
7985 	{
7986 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7987 	  nbytes += 24;
7988 	}
7989     }
7990   else
7991     {
7992       if (!val_14)
7993 	output_asm_insn ("addil L'%2,%%r26", xoperands);
7994 
7995       output_asm_insn ("ldil L'%0,%%r22", xoperands);
7996       output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
7997 
7998       if (val_14)
7999 	{
8000 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8001 	  nbytes += 12;
8002 	}
8003       else
8004 	{
8005 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8006 	  nbytes += 16;
8007 	}
8008     }
8009 
8010   fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8011 
8012   if (TARGET_SOM && TARGET_GAS)
8013     {
8014       /* We done with this subspace except possibly for some additional
8015 	 debug information.  Forget that we are in this subspace to ensure
8016 	 that the next function is output in its own subspace.  */
8017       in_section = NULL;
8018       cfun->machine->in_nsubspa = 2;
8019     }
8020 
8021   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8022     {
8023       switch_to_section (data_section);
8024       output_asm_insn (".align 4", xoperands);
8025       ASM_OUTPUT_LABEL (file, label);
8026       output_asm_insn (".word P'%0", xoperands);
8027     }
8028 
8029   current_thunk_number++;
8030   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8031 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8032   last_address += nbytes;
8033   update_total_code_bytes (nbytes);
8034 }
8035 
8036 /* Only direct calls to static functions are allowed to be sibling (tail)
8037    call optimized.
8038 
8039    This restriction is necessary because some linker generated stubs will
8040    store return pointers into rp' in some cases which might clobber a
8041    live value already in rp'.
8042 
8043    In a sibcall the current function and the target function share stack
8044    space.  Thus if the path to the current function and the path to the
8045    target function save a value in rp', they save the value into the
8046    same stack slot, which has undesirable consequences.
8047 
8048    Because of the deferred binding nature of shared libraries any function
8049    with external scope could be in a different load module and thus require
8050    rp' to be saved when calling that function.  So sibcall optimizations
8051    can only be safe for static function.
8052 
8053    Note that GCC never needs return value relocations, so we don't have to
8054    worry about static calls with return value relocations (which require
8055    saving rp').
8056 
8057    It is safe to perform a sibcall optimization when the target function
8058    will never return.  */
8059 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8060 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8061 {
8062   if (TARGET_PORTABLE_RUNTIME)
8063     return false;
8064 
8065   /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8066      single subspace mode and the call is not indirect.  As far as I know,
8067      there is no operating system support for the multiple subspace mode.
8068      It might be possible to support indirect calls if we didn't use
8069      $$dyncall (see the indirect sequence generated in output_call).  */
8070   if (TARGET_ELF32)
8071     return (decl != NULL_TREE);
8072 
8073   /* Sibcalls are not ok because the arg pointer register is not a fixed
8074      register.  This prevents the sibcall optimization from occurring.  In
8075      addition, there are problems with stub placement using GNU ld.  This
8076      is because a normal sibcall branch uses a 17-bit relocation while
8077      a regular call branch uses a 22-bit relocation.  As a result, more
8078      care needs to be taken in the placement of long-branch stubs.  */
8079   if (TARGET_64BIT)
8080     return false;
8081 
8082   /* Sibcalls are only ok within a translation unit.  */
8083   return (decl && !TREE_PUBLIC (decl));
8084 }
8085 
8086 /* ??? Addition is not commutative on the PA due to the weird implicit
8087    space register selection rules for memory addresses.  Therefore, we
8088    don't consider a + b == b + a, as this might be inside a MEM.  */
8089 static bool
pa_commutative_p(rtx x,int outer_code)8090 pa_commutative_p (rtx x, int outer_code)
8091 {
8092   return (COMMUTATIVE_P (x)
8093 	  && (TARGET_NO_SPACE_REGS
8094 	      || (outer_code != UNKNOWN && outer_code != MEM)
8095 	      || GET_CODE (x) != PLUS));
8096 }
8097 
8098 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8099    use in fmpyadd instructions.  */
8100 int
fmpyaddoperands(rtx * operands)8101 fmpyaddoperands (rtx *operands)
8102 {
8103   enum machine_mode mode = GET_MODE (operands[0]);
8104 
8105   /* Must be a floating point mode.  */
8106   if (mode != SFmode && mode != DFmode)
8107     return 0;
8108 
8109   /* All modes must be the same.  */
8110   if (! (mode == GET_MODE (operands[1])
8111 	 && mode == GET_MODE (operands[2])
8112 	 && mode == GET_MODE (operands[3])
8113 	 && mode == GET_MODE (operands[4])
8114 	 && mode == GET_MODE (operands[5])))
8115     return 0;
8116 
8117   /* All operands must be registers.  */
8118   if (! (GET_CODE (operands[1]) == REG
8119 	 && GET_CODE (operands[2]) == REG
8120 	 && GET_CODE (operands[3]) == REG
8121 	 && GET_CODE (operands[4]) == REG
8122 	 && GET_CODE (operands[5]) == REG))
8123     return 0;
8124 
8125   /* Only 2 real operands to the addition.  One of the input operands must
8126      be the same as the output operand.  */
8127   if (! rtx_equal_p (operands[3], operands[4])
8128       && ! rtx_equal_p (operands[3], operands[5]))
8129     return 0;
8130 
8131   /* Inout operand of add cannot conflict with any operands from multiply.  */
8132   if (rtx_equal_p (operands[3], operands[0])
8133      || rtx_equal_p (operands[3], operands[1])
8134      || rtx_equal_p (operands[3], operands[2]))
8135     return 0;
8136 
8137   /* multiply cannot feed into addition operands.  */
8138   if (rtx_equal_p (operands[4], operands[0])
8139       || rtx_equal_p (operands[5], operands[0]))
8140     return 0;
8141 
8142   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8143   if (mode == SFmode
8144       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8145 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8146 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8147 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8148 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8149 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8150     return 0;
8151 
8152   /* Passed.  Operands are suitable for fmpyadd.  */
8153   return 1;
8154 }
8155 
8156 #if !defined(USE_COLLECT2)
8157 static void
pa_asm_out_constructor(rtx symbol,int priority)8158 pa_asm_out_constructor (rtx symbol, int priority)
8159 {
8160   if (!function_label_operand (symbol, VOIDmode))
8161     hppa_encode_label (symbol);
8162 
8163 #ifdef CTORS_SECTION_ASM_OP
8164   default_ctor_section_asm_out_constructor (symbol, priority);
8165 #else
8166 # ifdef TARGET_ASM_NAMED_SECTION
8167   default_named_section_asm_out_constructor (symbol, priority);
8168 # else
8169   default_stabs_asm_out_constructor (symbol, priority);
8170 # endif
8171 #endif
8172 }
8173 
8174 static void
pa_asm_out_destructor(rtx symbol,int priority)8175 pa_asm_out_destructor (rtx symbol, int priority)
8176 {
8177   if (!function_label_operand (symbol, VOIDmode))
8178     hppa_encode_label (symbol);
8179 
8180 #ifdef DTORS_SECTION_ASM_OP
8181   default_dtor_section_asm_out_destructor (symbol, priority);
8182 #else
8183 # ifdef TARGET_ASM_NAMED_SECTION
8184   default_named_section_asm_out_destructor (symbol, priority);
8185 # else
8186   default_stabs_asm_out_destructor (symbol, priority);
8187 # endif
8188 #endif
8189 }
8190 #endif
8191 
8192 /* This function places uninitialized global data in the bss section.
8193    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8194    function on the SOM port to prevent uninitialized global data from
8195    being placed in the data section.  */
8196 
8197 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8198 pa_asm_output_aligned_bss (FILE *stream,
8199 			   const char *name,
8200 			   unsigned HOST_WIDE_INT size,
8201 			   unsigned int align)
8202 {
8203   switch_to_section (bss_section);
8204   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8205 
8206 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8207   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8208 #endif
8209 
8210 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8211   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8212 #endif
8213 
8214   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8215   ASM_OUTPUT_LABEL (stream, name);
8216   fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8217 }
8218 
8219 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8220    that doesn't allow the alignment of global common storage to be directly
8221    specified.  The SOM linker aligns common storage based on the rounded
8222    value of the NUM_BYTES parameter in the .comm directive.  It's not
8223    possible to use the .align directive as it doesn't affect the alignment
8224    of the label associated with a .comm directive.  */
8225 
8226 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8227 pa_asm_output_aligned_common (FILE *stream,
8228 			      const char *name,
8229 			      unsigned HOST_WIDE_INT size,
8230 			      unsigned int align)
8231 {
8232   unsigned int max_common_align;
8233 
8234   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8235   if (align > max_common_align)
8236     {
8237       warning (0, "alignment (%u) for %s exceeds maximum alignment "
8238 	       "for global common data.  Using %u",
8239 	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8240       align = max_common_align;
8241     }
8242 
8243   switch_to_section (bss_section);
8244 
8245   assemble_name (stream, name);
8246   fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8247            MAX (size, align / BITS_PER_UNIT));
8248 }
8249 
8250 /* We can't use .comm for local common storage as the SOM linker effectively
8251    treats the symbol as universal and uses the same storage for local symbols
8252    with the same name in different object files.  The .block directive
8253    reserves an uninitialized block of storage.  However, it's not common
8254    storage.  Fortunately, GCC never requests common storage with the same
8255    name in any given translation unit.  */
8256 
8257 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8258 pa_asm_output_aligned_local (FILE *stream,
8259 			     const char *name,
8260 			     unsigned HOST_WIDE_INT size,
8261 			     unsigned int align)
8262 {
8263   switch_to_section (bss_section);
8264   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8265 
8266 #ifdef LOCAL_ASM_OP
8267   fprintf (stream, "%s", LOCAL_ASM_OP);
8268   assemble_name (stream, name);
8269   fprintf (stream, "\n");
8270 #endif
8271 
8272   ASM_OUTPUT_LABEL (stream, name);
8273   fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8274 }
8275 
8276 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8277    use in fmpysub instructions.  */
8278 int
fmpysuboperands(rtx * operands)8279 fmpysuboperands (rtx *operands)
8280 {
8281   enum machine_mode mode = GET_MODE (operands[0]);
8282 
8283   /* Must be a floating point mode.  */
8284   if (mode != SFmode && mode != DFmode)
8285     return 0;
8286 
8287   /* All modes must be the same.  */
8288   if (! (mode == GET_MODE (operands[1])
8289 	 && mode == GET_MODE (operands[2])
8290 	 && mode == GET_MODE (operands[3])
8291 	 && mode == GET_MODE (operands[4])
8292 	 && mode == GET_MODE (operands[5])))
8293     return 0;
8294 
8295   /* All operands must be registers.  */
8296   if (! (GET_CODE (operands[1]) == REG
8297 	 && GET_CODE (operands[2]) == REG
8298 	 && GET_CODE (operands[3]) == REG
8299 	 && GET_CODE (operands[4]) == REG
8300 	 && GET_CODE (operands[5]) == REG))
8301     return 0;
8302 
8303   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8304      operation, so operands[4] must be the same as operand[3].  */
8305   if (! rtx_equal_p (operands[3], operands[4]))
8306     return 0;
8307 
8308   /* multiply cannot feed into subtraction.  */
8309   if (rtx_equal_p (operands[5], operands[0]))
8310     return 0;
8311 
8312   /* Inout operand of sub cannot conflict with any operands from multiply.  */
8313   if (rtx_equal_p (operands[3], operands[0])
8314      || rtx_equal_p (operands[3], operands[1])
8315      || rtx_equal_p (operands[3], operands[2]))
8316     return 0;
8317 
8318   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8319   if (mode == SFmode
8320       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8321 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8322 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8323 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8324 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8325 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8326     return 0;
8327 
8328   /* Passed.  Operands are suitable for fmpysub.  */
8329   return 1;
8330 }
8331 
8332 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8333    constants for shadd instructions.  */
8334 int
shadd_constant_p(int val)8335 shadd_constant_p (int val)
8336 {
8337   if (val == 2 || val == 4 || val == 8)
8338     return 1;
8339   else
8340     return 0;
8341 }
8342 
8343 /* Return 1 if OP is valid as a base or index register in a
8344    REG+REG address.  */
8345 
8346 int
borx_reg_operand(rtx op,enum machine_mode mode)8347 borx_reg_operand (rtx op, enum machine_mode mode)
8348 {
8349   if (GET_CODE (op) != REG)
8350     return 0;
8351 
8352   /* We must reject virtual registers as the only expressions that
8353      can be instantiated are REG and REG+CONST.  */
8354   if (op == virtual_incoming_args_rtx
8355       || op == virtual_stack_vars_rtx
8356       || op == virtual_stack_dynamic_rtx
8357       || op == virtual_outgoing_args_rtx
8358       || op == virtual_cfa_rtx)
8359     return 0;
8360 
8361   /* While it's always safe to index off the frame pointer, it's not
8362      profitable to do so when the frame pointer is being eliminated.  */
8363   if (!reload_completed
8364       && flag_omit_frame_pointer
8365       && !current_function_calls_alloca
8366       && op == frame_pointer_rtx)
8367     return 0;
8368 
8369   return register_operand (op, mode);
8370 }
8371 
8372 /* Return 1 if this operand is anything other than a hard register.  */
8373 
8374 int
non_hard_reg_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)8375 non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8376 {
8377   return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8378 }
8379 
8380 /* Return 1 if INSN branches forward.  Should be using insn_addresses
8381    to avoid walking through all the insns...  */
8382 static int
forward_branch_p(rtx insn)8383 forward_branch_p (rtx insn)
8384 {
8385   rtx label = JUMP_LABEL (insn);
8386 
8387   while (insn)
8388     {
8389       if (insn == label)
8390 	break;
8391       else
8392 	insn = NEXT_INSN (insn);
8393     }
8394 
8395   return (insn == label);
8396 }
8397 
8398 /* Return 1 if OP is an equality comparison, else return 0.  */
8399 int
eq_neq_comparison_operator(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)8400 eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8401 {
8402   return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8403 }
8404 
8405 /* Return 1 if INSN is in the delay slot of a call instruction.  */
8406 int
jump_in_call_delay(rtx insn)8407 jump_in_call_delay (rtx insn)
8408 {
8409 
8410   if (GET_CODE (insn) != JUMP_INSN)
8411     return 0;
8412 
8413   if (PREV_INSN (insn)
8414       && PREV_INSN (PREV_INSN (insn))
8415       && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8416     {
8417       rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8418 
8419       return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8420 	      && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8421 
8422     }
8423   else
8424     return 0;
8425 }
8426 
8427 /* Output an unconditional move and branch insn.  */
8428 
8429 const char *
output_parallel_movb(rtx * operands,rtx insn)8430 output_parallel_movb (rtx *operands, rtx insn)
8431 {
8432   int length = get_attr_length (insn);
8433 
8434   /* These are the cases in which we win.  */
8435   if (length == 4)
8436     return "mov%I1b,tr %1,%0,%2";
8437 
8438   /* None of the following cases win, but they don't lose either.  */
8439   if (length == 8)
8440     {
8441       if (dbr_sequence_length () == 0)
8442 	{
8443 	  /* Nothing in the delay slot, fake it by putting the combined
8444 	     insn (the copy or add) in the delay slot of a bl.  */
8445 	  if (GET_CODE (operands[1]) == CONST_INT)
8446 	    return "b %2\n\tldi %1,%0";
8447 	  else
8448 	    return "b %2\n\tcopy %1,%0";
8449 	}
8450       else
8451 	{
8452 	  /* Something in the delay slot, but we've got a long branch.  */
8453 	  if (GET_CODE (operands[1]) == CONST_INT)
8454 	    return "ldi %1,%0\n\tb %2";
8455 	  else
8456 	    return "copy %1,%0\n\tb %2";
8457 	}
8458     }
8459 
8460   if (GET_CODE (operands[1]) == CONST_INT)
8461     output_asm_insn ("ldi %1,%0", operands);
8462   else
8463     output_asm_insn ("copy %1,%0", operands);
8464   return output_lbranch (operands[2], insn, 1);
8465 }
8466 
8467 /* Output an unconditional add and branch insn.  */
8468 
8469 const char *
output_parallel_addb(rtx * operands,rtx insn)8470 output_parallel_addb (rtx *operands, rtx insn)
8471 {
8472   int length = get_attr_length (insn);
8473 
8474   /* To make life easy we want operand0 to be the shared input/output
8475      operand and operand1 to be the readonly operand.  */
8476   if (operands[0] == operands[1])
8477     operands[1] = operands[2];
8478 
8479   /* These are the cases in which we win.  */
8480   if (length == 4)
8481     return "add%I1b,tr %1,%0,%3";
8482 
8483   /* None of the following cases win, but they don't lose either.  */
8484   if (length == 8)
8485     {
8486       if (dbr_sequence_length () == 0)
8487 	/* Nothing in the delay slot, fake it by putting the combined
8488 	   insn (the copy or add) in the delay slot of a bl.  */
8489 	return "b %3\n\tadd%I1 %1,%0,%0";
8490       else
8491 	/* Something in the delay slot, but we've got a long branch.  */
8492 	return "add%I1 %1,%0,%0\n\tb %3";
8493     }
8494 
8495   output_asm_insn ("add%I1 %1,%0,%0", operands);
8496   return output_lbranch (operands[3], insn, 1);
8497 }
8498 
8499 /* Return nonzero if INSN (a jump insn) immediately follows a call
8500    to a named function.  This is used to avoid filling the delay slot
8501    of the jump since it can usually be eliminated by modifying RP in
8502    the delay slot of the call.  */
8503 
8504 int
following_call(rtx insn)8505 following_call (rtx insn)
8506 {
8507   if (! TARGET_JUMP_IN_DELAY)
8508     return 0;
8509 
8510   /* Find the previous real insn, skipping NOTEs.  */
8511   insn = PREV_INSN (insn);
8512   while (insn && GET_CODE (insn) == NOTE)
8513     insn = PREV_INSN (insn);
8514 
8515   /* Check for CALL_INSNs and millicode calls.  */
8516   if (insn
8517       && ((GET_CODE (insn) == CALL_INSN
8518 	   && get_attr_type (insn) != TYPE_DYNCALL)
8519 	  || (GET_CODE (insn) == INSN
8520 	      && GET_CODE (PATTERN (insn)) != SEQUENCE
8521 	      && GET_CODE (PATTERN (insn)) != USE
8522 	      && GET_CODE (PATTERN (insn)) != CLOBBER
8523 	      && get_attr_type (insn) == TYPE_MILLI)))
8524     return 1;
8525 
8526   return 0;
8527 }
8528 
8529 /* We use this hook to perform a PA specific optimization which is difficult
8530    to do in earlier passes.
8531 
8532    We want the delay slots of branches within jump tables to be filled.
8533    None of the compiler passes at the moment even has the notion that a
8534    PA jump table doesn't contain addresses, but instead contains actual
8535    instructions!
8536 
8537    Because we actually jump into the table, the addresses of each entry
8538    must stay constant in relation to the beginning of the table (which
8539    itself must stay constant relative to the instruction to jump into
8540    it).  I don't believe we can guarantee earlier passes of the compiler
8541    will adhere to those rules.
8542 
8543    So, late in the compilation process we find all the jump tables, and
8544    expand them into real code -- e.g. each entry in the jump table vector
8545    will get an appropriate label followed by a jump to the final target.
8546 
8547    Reorg and the final jump pass can then optimize these branches and
8548    fill their delay slots.  We end up with smaller, more efficient code.
8549 
8550    The jump instructions within the table are special; we must be able
8551    to identify them during assembly output (if the jumps don't get filled
8552    we need to emit a nop rather than nullifying the delay slot)).  We
8553    identify jumps in switch tables by using insns with the attribute
8554    type TYPE_BTABLE_BRANCH.
8555 
8556    We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8557    insns.  This serves two purposes, first it prevents jump.c from
8558    noticing that the last N entries in the table jump to the instruction
8559    immediately after the table and deleting the jumps.  Second, those
8560    insns mark where we should emit .begin_brtab and .end_brtab directives
8561    when using GAS (allows for better link time optimizations).  */
8562 
8563 static void
pa_reorg(void)8564 pa_reorg (void)
8565 {
8566   rtx insn;
8567 
8568   remove_useless_addtr_insns (1);
8569 
8570   if (pa_cpu < PROCESSOR_8000)
8571     pa_combine_instructions ();
8572 
8573 
8574   /* This is fairly cheap, so always run it if optimizing.  */
8575   if (optimize > 0 && !TARGET_BIG_SWITCH)
8576     {
8577       /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns.  */
8578       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8579 	{
8580 	  rtx pattern, tmp, location, label;
8581 	  unsigned int length, i;
8582 
8583 	  /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode.  */
8584 	  if (GET_CODE (insn) != JUMP_INSN
8585 	      || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8586 		  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8587 	    continue;
8588 
8589 	  /* Emit marker for the beginning of the branch table.  */
8590 	  emit_insn_before (gen_begin_brtab (), insn);
8591 
8592 	  pattern = PATTERN (insn);
8593 	  location = PREV_INSN (insn);
8594           length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8595 
8596 	  for (i = 0; i < length; i++)
8597 	    {
8598 	      /* Emit a label before each jump to keep jump.c from
8599 		 removing this code.  */
8600 	      tmp = gen_label_rtx ();
8601 	      LABEL_NUSES (tmp) = 1;
8602 	      emit_label_after (tmp, location);
8603 	      location = NEXT_INSN (location);
8604 
8605 	      if (GET_CODE (pattern) == ADDR_VEC)
8606 		label = XEXP (XVECEXP (pattern, 0, i), 0);
8607 	      else
8608 		label = XEXP (XVECEXP (pattern, 1, i), 0);
8609 
8610 	      tmp = gen_short_jump (label);
8611 
8612 	      /* Emit the jump itself.  */
8613 	      tmp = emit_jump_insn_after (tmp, location);
8614 	      JUMP_LABEL (tmp) = label;
8615 	      LABEL_NUSES (label)++;
8616 	      location = NEXT_INSN (location);
8617 
8618 	      /* Emit a BARRIER after the jump.  */
8619 	      emit_barrier_after (location);
8620 	      location = NEXT_INSN (location);
8621 	    }
8622 
8623 	  /* Emit marker for the end of the branch table.  */
8624 	  emit_insn_before (gen_end_brtab (), location);
8625 	  location = NEXT_INSN (location);
8626 	  emit_barrier_after (location);
8627 
8628 	  /* Delete the ADDR_VEC or ADDR_DIFF_VEC.  */
8629 	  delete_insn (insn);
8630 	}
8631     }
8632   else
8633     {
8634       /* Still need brtab marker insns.  FIXME: the presence of these
8635 	 markers disables output of the branch table to readonly memory,
8636 	 and any alignment directives that might be needed.  Possibly,
8637 	 the begin_brtab insn should be output before the label for the
8638 	 table.  This doesn't matter at the moment since the tables are
8639 	 always output in the text section.  */
8640       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8641 	{
8642 	  /* Find an ADDR_VEC insn.  */
8643 	  if (GET_CODE (insn) != JUMP_INSN
8644 	      || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8645 		  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8646 	    continue;
8647 
8648 	  /* Now generate markers for the beginning and end of the
8649 	     branch table.  */
8650 	  emit_insn_before (gen_begin_brtab (), insn);
8651 	  emit_insn_after (gen_end_brtab (), insn);
8652 	}
8653     }
8654 }
8655 
8656 /* The PA has a number of odd instructions which can perform multiple
8657    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8658    it may be profitable to combine two instructions into one instruction
8659    with two outputs.  It's not profitable PA2.0 machines because the
8660    two outputs would take two slots in the reorder buffers.
8661 
8662    This routine finds instructions which can be combined and combines
8663    them.  We only support some of the potential combinations, and we
8664    only try common ways to find suitable instructions.
8665 
8666       * addb can add two registers or a register and a small integer
8667       and jump to a nearby (+-8k) location.  Normally the jump to the
8668       nearby location is conditional on the result of the add, but by
8669       using the "true" condition we can make the jump unconditional.
8670       Thus addb can perform two independent operations in one insn.
8671 
8672       * movb is similar to addb in that it can perform a reg->reg
8673       or small immediate->reg copy and jump to a nearby (+-8k location).
8674 
8675       * fmpyadd and fmpysub can perform a FP multiply and either an
8676       FP add or FP sub if the operands of the multiply and add/sub are
8677       independent (there are other minor restrictions).  Note both
8678       the fmpy and fadd/fsub can in theory move to better spots according
8679       to data dependencies, but for now we require the fmpy stay at a
8680       fixed location.
8681 
8682       * Many of the memory operations can perform pre & post updates
8683       of index registers.  GCC's pre/post increment/decrement addressing
8684       is far too simple to take advantage of all the possibilities.  This
8685       pass may not be suitable since those insns may not be independent.
8686 
8687       * comclr can compare two ints or an int and a register, nullify
8688       the following instruction and zero some other register.  This
8689       is more difficult to use as it's harder to find an insn which
8690       will generate a comclr than finding something like an unconditional
8691       branch.  (conditional moves & long branches create comclr insns).
8692 
8693       * Most arithmetic operations can conditionally skip the next
8694       instruction.  They can be viewed as "perform this operation
8695       and conditionally jump to this nearby location" (where nearby
8696       is an insns away).  These are difficult to use due to the
8697       branch length restrictions.  */
8698 
8699 static void
pa_combine_instructions(void)8700 pa_combine_instructions (void)
8701 {
8702   rtx anchor, new;
8703 
8704   /* This can get expensive since the basic algorithm is on the
8705      order of O(n^2) (or worse).  Only do it for -O2 or higher
8706      levels of optimization.  */
8707   if (optimize < 2)
8708     return;
8709 
8710   /* Walk down the list of insns looking for "anchor" insns which
8711      may be combined with "floating" insns.  As the name implies,
8712      "anchor" instructions don't move, while "floating" insns may
8713      move around.  */
8714   new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8715   new = make_insn_raw (new);
8716 
8717   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8718     {
8719       enum attr_pa_combine_type anchor_attr;
8720       enum attr_pa_combine_type floater_attr;
8721 
8722       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8723 	 Also ignore any special USE insns.  */
8724       if ((GET_CODE (anchor) != INSN
8725 	  && GET_CODE (anchor) != JUMP_INSN
8726 	  && GET_CODE (anchor) != CALL_INSN)
8727 	  || GET_CODE (PATTERN (anchor)) == USE
8728 	  || GET_CODE (PATTERN (anchor)) == CLOBBER
8729 	  || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8730 	  || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8731 	continue;
8732 
8733       anchor_attr = get_attr_pa_combine_type (anchor);
8734       /* See if anchor is an insn suitable for combination.  */
8735       if (anchor_attr == PA_COMBINE_TYPE_FMPY
8736 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8737 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8738 	      && ! forward_branch_p (anchor)))
8739 	{
8740 	  rtx floater;
8741 
8742 	  for (floater = PREV_INSN (anchor);
8743 	       floater;
8744 	       floater = PREV_INSN (floater))
8745 	    {
8746 	      if (GET_CODE (floater) == NOTE
8747 		  || (GET_CODE (floater) == INSN
8748 		      && (GET_CODE (PATTERN (floater)) == USE
8749 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
8750 		continue;
8751 
8752 	      /* Anything except a regular INSN will stop our search.  */
8753 	      if (GET_CODE (floater) != INSN
8754 		  || GET_CODE (PATTERN (floater)) == ADDR_VEC
8755 		  || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8756 		{
8757 		  floater = NULL_RTX;
8758 		  break;
8759 		}
8760 
8761 	      /* See if FLOATER is suitable for combination with the
8762 		 anchor.  */
8763 	      floater_attr = get_attr_pa_combine_type (floater);
8764 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8765 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8766 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8767 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
8768 		{
8769 		  /* If ANCHOR and FLOATER can be combined, then we're
8770 		     done with this pass.  */
8771 		  if (pa_can_combine_p (new, anchor, floater, 0,
8772 					SET_DEST (PATTERN (floater)),
8773 					XEXP (SET_SRC (PATTERN (floater)), 0),
8774 					XEXP (SET_SRC (PATTERN (floater)), 1)))
8775 		    break;
8776 		}
8777 
8778 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8779 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8780 		{
8781 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8782 		    {
8783 		      if (pa_can_combine_p (new, anchor, floater, 0,
8784 					    SET_DEST (PATTERN (floater)),
8785 					XEXP (SET_SRC (PATTERN (floater)), 0),
8786 					XEXP (SET_SRC (PATTERN (floater)), 1)))
8787 			break;
8788 		    }
8789 		  else
8790 		    {
8791 		      if (pa_can_combine_p (new, anchor, floater, 0,
8792 					    SET_DEST (PATTERN (floater)),
8793 					    SET_SRC (PATTERN (floater)),
8794 					    SET_SRC (PATTERN (floater))))
8795 			break;
8796 		    }
8797 		}
8798 	    }
8799 
8800 	  /* If we didn't find anything on the backwards scan try forwards.  */
8801 	  if (!floater
8802 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
8803 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8804 	    {
8805 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
8806 		{
8807 		  if (GET_CODE (floater) == NOTE
8808 		      || (GET_CODE (floater) == INSN
8809 			  && (GET_CODE (PATTERN (floater)) == USE
8810 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
8811 
8812 		    continue;
8813 
8814 		  /* Anything except a regular INSN will stop our search.  */
8815 		  if (GET_CODE (floater) != INSN
8816 		      || GET_CODE (PATTERN (floater)) == ADDR_VEC
8817 		      || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8818 		    {
8819 		      floater = NULL_RTX;
8820 		      break;
8821 		    }
8822 
8823 		  /* See if FLOATER is suitable for combination with the
8824 		     anchor.  */
8825 		  floater_attr = get_attr_pa_combine_type (floater);
8826 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8827 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8828 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8829 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
8830 		    {
8831 		      /* If ANCHOR and FLOATER can be combined, then we're
8832 			 done with this pass.  */
8833 		      if (pa_can_combine_p (new, anchor, floater, 1,
8834 					    SET_DEST (PATTERN (floater)),
8835 					    XEXP (SET_SRC (PATTERN (floater)),
8836 						  0),
8837 					    XEXP (SET_SRC (PATTERN (floater)),
8838 						  1)))
8839 			break;
8840 		    }
8841 		}
8842 	    }
8843 
8844 	  /* FLOATER will be nonzero if we found a suitable floating
8845 	     insn for combination with ANCHOR.  */
8846 	  if (floater
8847 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8848 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
8849 	    {
8850 	      /* Emit the new instruction and delete the old anchor.  */
8851 	      emit_insn_before (gen_rtx_PARALLEL
8852 				(VOIDmode,
8853 				 gen_rtvec (2, PATTERN (anchor),
8854 					    PATTERN (floater))),
8855 				anchor);
8856 
8857 	      PUT_CODE (anchor, NOTE);
8858 	      NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8859 	      NOTE_SOURCE_FILE (anchor) = 0;
8860 
8861 	      /* Emit a special USE insn for FLOATER, then delete
8862 		 the floating insn.  */
8863 	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8864 	      delete_insn (floater);
8865 
8866 	      continue;
8867 	    }
8868 	  else if (floater
8869 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8870 	    {
8871 	      rtx temp;
8872 	      /* Emit the new_jump instruction and delete the old anchor.  */
8873 	      temp
8874 		= emit_jump_insn_before (gen_rtx_PARALLEL
8875 					 (VOIDmode,
8876 					  gen_rtvec (2, PATTERN (anchor),
8877 						     PATTERN (floater))),
8878 					 anchor);
8879 
8880 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8881 	      PUT_CODE (anchor, NOTE);
8882 	      NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8883 	      NOTE_SOURCE_FILE (anchor) = 0;
8884 
8885 	      /* Emit a special USE insn for FLOATER, then delete
8886 		 the floating insn.  */
8887 	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8888 	      delete_insn (floater);
8889 	      continue;
8890 	    }
8891 	}
8892     }
8893 }
8894 
8895 static int
pa_can_combine_p(rtx new,rtx anchor,rtx floater,int reversed,rtx dest,rtx src1,rtx src2)8896 pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8897 		  rtx src1, rtx src2)
8898 {
8899   int insn_code_number;
8900   rtx start, end;
8901 
8902   /* Create a PARALLEL with the patterns of ANCHOR and
8903      FLOATER, try to recognize it, then test constraints
8904      for the resulting pattern.
8905 
8906      If the pattern doesn't match or the constraints
8907      aren't met keep searching for a suitable floater
8908      insn.  */
8909   XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8910   XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8911   INSN_CODE (new) = -1;
8912   insn_code_number = recog_memoized (new);
8913   if (insn_code_number < 0
8914       || (extract_insn (new), ! constrain_operands (1)))
8915     return 0;
8916 
8917   if (reversed)
8918     {
8919       start = anchor;
8920       end = floater;
8921     }
8922   else
8923     {
8924       start = floater;
8925       end = anchor;
8926     }
8927 
8928   /* There's up to three operands to consider.  One
8929      output and two inputs.
8930 
8931      The output must not be used between FLOATER & ANCHOR
8932      exclusive.  The inputs must not be set between
8933      FLOATER and ANCHOR exclusive.  */
8934 
8935   if (reg_used_between_p (dest, start, end))
8936     return 0;
8937 
8938   if (reg_set_between_p (src1, start, end))
8939     return 0;
8940 
8941   if (reg_set_between_p (src2, start, end))
8942     return 0;
8943 
8944   /* If we get here, then everything is good.  */
8945   return 1;
8946 }
8947 
8948 /* Return nonzero if references for INSN are delayed.
8949 
8950    Millicode insns are actually function calls with some special
8951    constraints on arguments and register usage.
8952 
8953    Millicode calls always expect their arguments in the integer argument
8954    registers, and always return their result in %r29 (ret1).  They
8955    are expected to clobber their arguments, %r1, %r29, and the return
8956    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8957 
8958    This function tells reorg that the references to arguments and
8959    millicode calls do not appear to happen until after the millicode call.
8960    This allows reorg to put insns which set the argument registers into the
8961    delay slot of the millicode call -- thus they act more like traditional
8962    CALL_INSNs.
8963 
8964    Note we cannot consider side effects of the insn to be delayed because
8965    the branch and link insn will clobber the return pointer.  If we happened
8966    to use the return pointer in the delay slot of the call, then we lose.
8967 
8968    get_attr_type will try to recognize the given insn, so make sure to
8969    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8970    in particular.  */
8971 int
insn_refs_are_delayed(rtx insn)8972 insn_refs_are_delayed (rtx insn)
8973 {
8974   return ((GET_CODE (insn) == INSN
8975 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
8976 	   && GET_CODE (PATTERN (insn)) != USE
8977 	   && GET_CODE (PATTERN (insn)) != CLOBBER
8978 	   && get_attr_type (insn) == TYPE_MILLI));
8979 }
8980 
8981 /* On the HP-PA the value is found in register(s) 28(-29), unless
8982    the mode is SF or DF. Then the value is returned in fr4 (32).
8983 
8984    This must perform the same promotions as PROMOTE_MODE, else
8985    TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
8986 
8987    Small structures must be returned in a PARALLEL on PA64 in order
8988    to match the HP Compiler ABI.  */
8989 
8990 rtx
function_value(tree valtype,tree func ATTRIBUTE_UNUSED)8991 function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8992 {
8993   enum machine_mode valmode;
8994 
8995   if (AGGREGATE_TYPE_P (valtype)
8996       || TREE_CODE (valtype) == COMPLEX_TYPE
8997       || TREE_CODE (valtype) == VECTOR_TYPE)
8998     {
8999       if (TARGET_64BIT)
9000 	{
9001           /* Aggregates with a size less than or equal to 128 bits are
9002 	     returned in GR 28(-29).  They are left justified.  The pad
9003 	     bits are undefined.  Larger aggregates are returned in
9004 	     memory.  */
9005 	  rtx loc[2];
9006 	  int i, offset = 0;
9007 	  int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9008 
9009 	  for (i = 0; i < ub; i++)
9010 	    {
9011 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9012 					  gen_rtx_REG (DImode, 28 + i),
9013 					  GEN_INT (offset));
9014 	      offset += 8;
9015 	    }
9016 
9017 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9018 	}
9019       else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9020 	{
9021 	  /* Aggregates 5 to 8 bytes in size are returned in general
9022 	     registers r28-r29 in the same manner as other non
9023 	     floating-point objects.  The data is right-justified and
9024 	     zero-extended to 64 bits.  This is opposite to the normal
9025 	     justification used on big endian targets and requires
9026 	     special treatment.  */
9027 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9028 				       gen_rtx_REG (DImode, 28), const0_rtx);
9029 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9030 	}
9031     }
9032 
9033   if ((INTEGRAL_TYPE_P (valtype)
9034        && TYPE_PRECISION (valtype) < BITS_PER_WORD)
9035       || POINTER_TYPE_P (valtype))
9036     valmode = word_mode;
9037   else
9038     valmode = TYPE_MODE (valtype);
9039 
9040   if (TREE_CODE (valtype) == REAL_TYPE
9041       && !AGGREGATE_TYPE_P (valtype)
9042       && TYPE_MODE (valtype) != TFmode
9043       && !TARGET_SOFT_FLOAT)
9044     return gen_rtx_REG (valmode, 32);
9045 
9046   return gen_rtx_REG (valmode, 28);
9047 }
9048 
9049 /* Return the location of a parameter that is passed in a register or NULL
9050    if the parameter has any component that is passed in memory.
9051 
9052    This is new code and will be pushed to into the net sources after
9053    further testing.
9054 
9055    ??? We might want to restructure this so that it looks more like other
9056    ports.  */
9057 rtx
function_arg(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,int named ATTRIBUTE_UNUSED)9058 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9059 	      int named ATTRIBUTE_UNUSED)
9060 {
9061   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9062   int alignment = 0;
9063   int arg_size;
9064   int fpr_reg_base;
9065   int gpr_reg_base;
9066   rtx retval;
9067 
9068   if (mode == VOIDmode)
9069     return NULL_RTX;
9070 
9071   arg_size = FUNCTION_ARG_SIZE (mode, type);
9072 
9073   /* If this arg would be passed partially or totally on the stack, then
9074      this routine should return zero.  pa_arg_partial_bytes will
9075      handle arguments which are split between regs and stack slots if
9076      the ABI mandates split arguments.  */
9077   if (!TARGET_64BIT)
9078     {
9079       /* The 32-bit ABI does not split arguments.  */
9080       if (cum->words + arg_size > max_arg_words)
9081 	return NULL_RTX;
9082     }
9083   else
9084     {
9085       if (arg_size > 1)
9086 	alignment = cum->words & 1;
9087       if (cum->words + alignment >= max_arg_words)
9088 	return NULL_RTX;
9089     }
9090 
9091   /* The 32bit ABIs and the 64bit ABIs are rather different,
9092      particularly in their handling of FP registers.  We might
9093      be able to cleverly share code between them, but I'm not
9094      going to bother in the hope that splitting them up results
9095      in code that is more easily understood.  */
9096 
9097   if (TARGET_64BIT)
9098     {
9099       /* Advance the base registers to their current locations.
9100 
9101          Remember, gprs grow towards smaller register numbers while
9102 	 fprs grow to higher register numbers.  Also remember that
9103 	 although FP regs are 32-bit addressable, we pretend that
9104 	 the registers are 64-bits wide.  */
9105       gpr_reg_base = 26 - cum->words;
9106       fpr_reg_base = 32 + cum->words;
9107 
9108       /* Arguments wider than one word and small aggregates need special
9109 	 treatment.  */
9110       if (arg_size > 1
9111 	  || mode == BLKmode
9112 	  || (type && (AGGREGATE_TYPE_P (type)
9113 		       || TREE_CODE (type) == COMPLEX_TYPE
9114 		       || TREE_CODE (type) == VECTOR_TYPE)))
9115 	{
9116 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9117 	     and aggregates including complex numbers are aligned on
9118 	     128-bit boundaries.  The first eight 64-bit argument slots
9119 	     are associated one-to-one, with general registers r26
9120 	     through r19, and also with floating-point registers fr4
9121 	     through fr11.  Arguments larger than one word are always
9122 	     passed in general registers.
9123 
9124 	     Using a PARALLEL with a word mode register results in left
9125 	     justified data on a big-endian target.  */
9126 
9127 	  rtx loc[8];
9128 	  int i, offset = 0, ub = arg_size;
9129 
9130 	  /* Align the base register.  */
9131 	  gpr_reg_base -= alignment;
9132 
9133 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9134 	  for (i = 0; i < ub; i++)
9135 	    {
9136 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9137 					  gen_rtx_REG (DImode, gpr_reg_base),
9138 					  GEN_INT (offset));
9139 	      gpr_reg_base -= 1;
9140 	      offset += 8;
9141 	    }
9142 
9143 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9144 	}
9145      }
9146   else
9147     {
9148       /* If the argument is larger than a word, then we know precisely
9149 	 which registers we must use.  */
9150       if (arg_size > 1)
9151 	{
9152 	  if (cum->words)
9153 	    {
9154 	      gpr_reg_base = 23;
9155 	      fpr_reg_base = 38;
9156 	    }
9157 	  else
9158 	    {
9159 	      gpr_reg_base = 25;
9160 	      fpr_reg_base = 34;
9161 	    }
9162 
9163 	  /* Structures 5 to 8 bytes in size are passed in the general
9164 	     registers in the same manner as other non floating-point
9165 	     objects.  The data is right-justified and zero-extended
9166 	     to 64 bits.  This is opposite to the normal justification
9167 	     used on big endian targets and requires special treatment.
9168 	     We now define BLOCK_REG_PADDING to pad these objects.
9169 	     Aggregates, complex and vector types are passed in the same
9170 	     manner as structures.  */
9171 	  if (mode == BLKmode
9172 	      || (type && (AGGREGATE_TYPE_P (type)
9173 			   || TREE_CODE (type) == COMPLEX_TYPE
9174 			   || TREE_CODE (type) == VECTOR_TYPE)))
9175 	    {
9176 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9177 					   gen_rtx_REG (DImode, gpr_reg_base),
9178 					   const0_rtx);
9179 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9180 	    }
9181 	}
9182       else
9183         {
9184 	   /* We have a single word (32 bits).  A simple computation
9185 	      will get us the register #s we need.  */
9186 	   gpr_reg_base = 26 - cum->words;
9187 	   fpr_reg_base = 32 + 2 * cum->words;
9188 	}
9189     }
9190 
9191   /* Determine if the argument needs to be passed in both general and
9192      floating point registers.  */
9193   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9194        /* If we are doing soft-float with portable runtime, then there
9195 	  is no need to worry about FP regs.  */
9196        && !TARGET_SOFT_FLOAT
9197        /* The parameter must be some kind of scalar float, else we just
9198 	  pass it in integer registers.  */
9199        && GET_MODE_CLASS (mode) == MODE_FLOAT
9200        /* The target function must not have a prototype.  */
9201        && cum->nargs_prototype <= 0
9202        /* libcalls do not need to pass items in both FP and general
9203 	  registers.  */
9204        && type != NULL_TREE
9205        /* All this hair applies to "outgoing" args only.  This includes
9206 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9207        && !cum->incoming)
9208       /* Also pass outgoing floating arguments in both registers in indirect
9209 	 calls with the 32 bit ABI and the HP assembler since there is no
9210 	 way to the specify argument locations in static functions.  */
9211       || (!TARGET_64BIT
9212 	  && !TARGET_GAS
9213 	  && !cum->incoming
9214 	  && cum->indirect
9215 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9216     {
9217       retval
9218 	= gen_rtx_PARALLEL
9219 	    (mode,
9220 	     gen_rtvec (2,
9221 			gen_rtx_EXPR_LIST (VOIDmode,
9222 					   gen_rtx_REG (mode, fpr_reg_base),
9223 					   const0_rtx),
9224 			gen_rtx_EXPR_LIST (VOIDmode,
9225 					   gen_rtx_REG (mode, gpr_reg_base),
9226 					   const0_rtx)));
9227     }
9228   else
9229     {
9230       /* See if we should pass this parameter in a general register.  */
9231       if (TARGET_SOFT_FLOAT
9232 	  /* Indirect calls in the normal 32bit ABI require all arguments
9233 	     to be passed in general registers.  */
9234 	  || (!TARGET_PORTABLE_RUNTIME
9235 	      && !TARGET_64BIT
9236 	      && !TARGET_ELF32
9237 	      && cum->indirect)
9238 	  /* If the parameter is not a scalar floating-point parameter,
9239 	     then it belongs in GPRs.  */
9240 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9241 	  /* Structure with single SFmode field belongs in GPR.  */
9242 	  || (type && AGGREGATE_TYPE_P (type)))
9243 	retval = gen_rtx_REG (mode, gpr_reg_base);
9244       else
9245 	retval = gen_rtx_REG (mode, fpr_reg_base);
9246     }
9247   return retval;
9248 }
9249 
9250 
9251 /* If this arg would be passed totally in registers or totally on the stack,
9252    then this routine should return zero.  */
9253 
9254 static int
pa_arg_partial_bytes(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)9255 pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9256 		      tree type, bool named ATTRIBUTE_UNUSED)
9257 {
9258   unsigned int max_arg_words = 8;
9259   unsigned int offset = 0;
9260 
9261   if (!TARGET_64BIT)
9262     return 0;
9263 
9264   if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9265     offset = 1;
9266 
9267   if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9268     /* Arg fits fully into registers.  */
9269     return 0;
9270   else if (cum->words + offset >= max_arg_words)
9271     /* Arg fully on the stack.  */
9272     return 0;
9273   else
9274     /* Arg is split.  */
9275     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9276 }
9277 
9278 
9279 /* A get_unnamed_section callback for switching to the text section.
9280 
9281    This function is only used with SOM.  Because we don't support
9282    named subspaces, we can only create a new subspace or switch back
9283    to the default text subspace.  */
9284 
9285 static void
som_output_text_section_asm_op(const void * data ATTRIBUTE_UNUSED)9286 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9287 {
9288   gcc_assert (TARGET_SOM);
9289   if (TARGET_GAS)
9290     {
9291       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9292 	{
9293 	  /* We only want to emit a .nsubspa directive once at the
9294 	     start of the function.  */
9295 	  cfun->machine->in_nsubspa = 1;
9296 
9297 	  /* Create a new subspace for the text.  This provides
9298 	     better stub placement and one-only functions.  */
9299 	  if (cfun->decl
9300 	      && DECL_ONE_ONLY (cfun->decl)
9301 	      && !DECL_WEAK (cfun->decl))
9302 	    {
9303 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
9304 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9305 				     "ACCESS=44,SORT=24,COMDAT");
9306 	      return;
9307 	    }
9308 	}
9309       else
9310 	{
9311 	  /* There isn't a current function or the body of the current
9312 	     function has been completed.  So, we are changing to the
9313 	     text section to output debugging information.  Thus, we
9314 	     need to forget that we are in the text section so that
9315 	     varasm.c will call us when text_section is selected again.  */
9316 	  gcc_assert (!cfun || !cfun->machine
9317 		      || cfun->machine->in_nsubspa == 2);
9318 	  in_section = NULL;
9319 	}
9320       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9321       return;
9322     }
9323   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9324 }
9325 
9326 /* A get_unnamed_section callback for switching to comdat data
9327    sections.  This function is only used with SOM.  */
9328 
9329 static void
som_output_comdat_data_section_asm_op(const void * data)9330 som_output_comdat_data_section_asm_op (const void *data)
9331 {
9332   in_section = NULL;
9333   output_section_asm_op (data);
9334 }
9335 
9336 /* Implement TARGET_ASM_INITIALIZE_SECTIONS  */
9337 
9338 static void
pa_som_asm_init_sections(void)9339 pa_som_asm_init_sections (void)
9340 {
9341   text_section
9342     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9343 
9344   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9345      is not being generated.  */
9346   som_readonly_data_section
9347     = get_unnamed_section (0, output_section_asm_op,
9348 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9349 
9350   /* When secondary definitions are not supported, SOM makes readonly
9351      data one-only by creating a new $LIT$ subspace in $TEXT$ with
9352      the comdat flag.  */
9353   som_one_only_readonly_data_section
9354     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9355 			   "\t.SPACE $TEXT$\n"
9356 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9357 			   "ACCESS=0x2c,SORT=16,COMDAT");
9358 
9359 
9360   /* When secondary definitions are not supported, SOM makes data one-only
9361      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9362   som_one_only_data_section
9363     = get_unnamed_section (SECTION_WRITE,
9364 			   som_output_comdat_data_section_asm_op,
9365 			   "\t.SPACE $PRIVATE$\n"
9366 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9367 			   "ACCESS=31,SORT=24,COMDAT");
9368 
9369   /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9370      which reference data within the $TEXT$ space (for example constant
9371      strings in the $LIT$ subspace).
9372 
9373      The assemblers (GAS and HP as) both have problems with handling
9374      the difference of two symbols which is the other correct way to
9375      reference constant data during PIC code generation.
9376 
9377      So, there's no way to reference constant data which is in the
9378      $TEXT$ space during PIC generation.  Instead place all constant
9379      data into the $PRIVATE$ subspace (this reduces sharing, but it
9380      works correctly).  */
9381   readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9382 
9383   /* We must not have a reference to an external symbol defined in a
9384      shared library in a readonly section, else the SOM linker will
9385      complain.
9386 
9387      So, we force exception information into the data section.  */
9388   exception_section = data_section;
9389 }
9390 
9391 /* On hpux10, the linker will give an error if we have a reference
9392    in the read-only data section to a symbol defined in a shared
9393    library.  Therefore, expressions that might require a reloc can
9394    not be placed in the read-only data section.  */
9395 
9396 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)9397 pa_select_section (tree exp, int reloc,
9398 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9399 {
9400   if (TREE_CODE (exp) == VAR_DECL
9401       && TREE_READONLY (exp)
9402       && !TREE_THIS_VOLATILE (exp)
9403       && DECL_INITIAL (exp)
9404       && (DECL_INITIAL (exp) == error_mark_node
9405           || TREE_CONSTANT (DECL_INITIAL (exp)))
9406       && !reloc)
9407     {
9408       if (TARGET_SOM
9409 	  && DECL_ONE_ONLY (exp)
9410 	  && !DECL_WEAK (exp))
9411 	return som_one_only_readonly_data_section;
9412       else
9413 	return readonly_data_section;
9414     }
9415   else if (CONSTANT_CLASS_P (exp) && !reloc)
9416     return readonly_data_section;
9417   else if (TARGET_SOM
9418 	   && TREE_CODE (exp) == VAR_DECL
9419 	   && DECL_ONE_ONLY (exp)
9420 	   && !DECL_WEAK (exp))
9421     return som_one_only_data_section;
9422   else
9423     return data_section;
9424 }
9425 
9426 static void
pa_globalize_label(FILE * stream,const char * name)9427 pa_globalize_label (FILE *stream, const char *name)
9428 {
9429   /* We only handle DATA objects here, functions are globalized in
9430      ASM_DECLARE_FUNCTION_NAME.  */
9431   if (! FUNCTION_NAME_P (name))
9432   {
9433     fputs ("\t.EXPORT ", stream);
9434     assemble_name (stream, name);
9435     fputs (",DATA\n", stream);
9436   }
9437 }
9438 
9439 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9440 
9441 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)9442 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9443 		     int incoming ATTRIBUTE_UNUSED)
9444 {
9445   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9446 }
9447 
9448 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
9449 
9450 bool
pa_return_in_memory(tree type,tree fntype ATTRIBUTE_UNUSED)9451 pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9452 {
9453   /* SOM ABI says that objects larger than 64 bits are returned in memory.
9454      PA64 ABI says that objects larger than 128 bits are returned in memory.
9455      Note, int_size_in_bytes can return -1 if the size of the object is
9456      variable or larger than the maximum value that can be expressed as
9457      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9458      simplest way to handle variable and empty types is to pass them in
9459      memory.  This avoids problems in defining the boundaries of argument
9460      slots, allocating registers, etc.  */
9461   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9462 	  || int_size_in_bytes (type) <= 0);
9463 }
9464 
9465 /* Structure to hold declaration and name of external symbols that are
9466    emitted by GCC.  We generate a vector of these symbols and output them
9467    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9468    This avoids putting out names that are never really used.  */
9469 
9470 typedef struct extern_symbol GTY(())
9471 {
9472   tree decl;
9473   const char *name;
9474 } extern_symbol;
9475 
9476 /* Define gc'd vector type for extern_symbol.  */
9477 DEF_VEC_O(extern_symbol);
9478 DEF_VEC_ALLOC_O(extern_symbol,gc);
9479 
9480 /* Vector of extern_symbol pointers.  */
9481 static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9482 
9483 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9484 /* Mark DECL (name NAME) as an external reference (assembler output
9485    file FILE).  This saves the names to output at the end of the file
9486    if actually referenced.  */
9487 
9488 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)9489 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9490 {
9491   extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9492 
9493   gcc_assert (file == asm_out_file);
9494   p->decl = decl;
9495   p->name = name;
9496 }
9497 
9498 /* Output text required at the end of an assembler file.
9499    This includes deferred plabels and .import directives for
9500    all external symbols that were actually referenced.  */
9501 
9502 static void
pa_hpux_file_end(void)9503 pa_hpux_file_end (void)
9504 {
9505   unsigned int i;
9506   extern_symbol *p;
9507 
9508   if (!NO_DEFERRED_PROFILE_COUNTERS)
9509     output_deferred_profile_counters ();
9510 
9511   output_deferred_plabels ();
9512 
9513   for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9514     {
9515       tree decl = p->decl;
9516 
9517       if (!TREE_ASM_WRITTEN (decl)
9518 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9519 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9520     }
9521 
9522   VEC_free (extern_symbol, gc, extern_symbols);
9523 }
9524 #endif
9525 
9526 #include "gt-pa.h"
9527