xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/arm/arm.c (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1 /* Output routines for GCC for ARM.
2    Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3    2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4    Free Software Foundation, Inc.
5    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6    and Martin Simmons (@harleqn.co.uk).
7    More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 
9    This file is part of GCC.
10 
11    GCC is free software; you can redistribute it and/or modify it
12    under the terms of the GNU General Public License as published
13    by the Free Software Foundation; either version 3, or (at your
14    option) any later version.
15 
16    GCC is distributed in the hope that it will be useful, but WITHOUT
17    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
19    License for more details.
20 
21    You should have received a copy of the GNU General Public License
22    along with GCC; see the file COPYING3.  If not see
23    <http://www.gnu.org/licenses/>.  */
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "ggc.h"
48 #include "except.h"
49 #include "c-pragma.h"
50 #include "integrate.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 
60 /* Forward definitions of types.  */
61 typedef struct minipool_node    Mnode;
62 typedef struct minipool_fixup   Mfix;
63 
64 void (*arm_lang_output_object_attributes_hook)(void);
65 
66 /* Forward function declarations.  */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 			     HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
92 					   int, HOST_WIDE_INT);
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
109 			       rtx);
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
121 #endif
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
125 static int arm_comp_type_attributes (const_tree, const_tree);
126 static void arm_set_default_type_attributes (tree);
127 static int arm_adjust_cost (rtx, rtx, rtx, int);
128 static int count_insns_for_constant (HOST_WIDE_INT, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree, tree);
131 static enum machine_mode arm_promote_function_mode (const_tree,
132 						    enum machine_mode, int *,
133 						    const_tree, int);
134 static bool arm_return_in_memory (const_tree, const_tree);
135 static rtx arm_function_value (const_tree, const_tree, bool);
136 static rtx arm_libcall_value (enum machine_mode, const_rtx);
137 
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
140 				 tree);
141 static bool arm_have_conditional_execution (void);
142 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
143 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
144 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
148 static bool arm_rtx_costs (rtx, int, int, int *, bool);
149 static int arm_address_cost (rtx, bool);
150 static bool arm_memory_load_p (rtx);
151 static bool arm_cirrus_insn_p (rtx);
152 static void cirrus_reorg (rtx);
153 static void arm_init_builtins (void);
154 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
155 static void arm_init_iwmmxt_builtins (void);
156 static rtx safe_vector_operand (rtx, enum machine_mode);
157 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
158 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
159 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
160 static void emit_constant_insn (rtx cond, rtx pattern);
161 static rtx emit_set_insn (rtx, rtx);
162 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
163 				  tree, bool);
164 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
165 				      const_tree);
166 static int aapcs_select_return_coproc (const_tree, const_tree);
167 
168 #ifdef OBJECT_FORMAT_ELF
169 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
170 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
171 #endif
172 #ifndef ARM_PE
173 static void arm_encode_section_info (tree, rtx, int);
174 #endif
175 
176 static void arm_file_end (void);
177 static void arm_file_start (void);
178 
179 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
180 					tree, int *, int);
181 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
182 				   enum machine_mode, const_tree, bool);
183 static bool arm_promote_prototypes (const_tree);
184 static bool arm_default_short_enums (void);
185 static bool arm_align_anon_bitfield (void);
186 static bool arm_return_in_msb (const_tree);
187 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
188 static bool arm_return_in_memory (const_tree, const_tree);
189 #ifdef TARGET_UNWIND_INFO
190 static void arm_unwind_emit (FILE *, rtx);
191 static bool arm_output_ttype (rtx);
192 #endif
193 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
194 static rtx arm_dwarf_register_span (rtx);
195 
196 static tree arm_cxx_guard_type (void);
197 static bool arm_cxx_guard_mask_bit (void);
198 static tree arm_get_cookie_size (tree);
199 static bool arm_cookie_has_size (void);
200 static bool arm_cxx_cdtor_returns_this (void);
201 static bool arm_cxx_key_method_may_be_inline (void);
202 static void arm_cxx_determine_class_data_visibility (tree);
203 static bool arm_cxx_class_data_always_comdat (void);
204 static bool arm_cxx_use_aeabi_atexit (void);
205 static void arm_init_libfuncs (void);
206 static tree arm_build_builtin_va_list (void);
207 static void arm_expand_builtin_va_start (tree, rtx);
208 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
209 static bool arm_handle_option (size_t, const char *, int);
210 static void arm_target_help (void);
211 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
212 static bool arm_cannot_copy_insn_p (rtx);
213 static bool arm_tls_symbol_p (rtx x);
214 static int arm_issue_rate (void);
215 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
216 static bool arm_allocate_stack_slots_for_args (void);
217 static const char *arm_invalid_parameter_type (const_tree t);
218 static const char *arm_invalid_return_type (const_tree t);
219 static tree arm_promoted_type (const_tree t);
220 static tree arm_convert_to_type (tree type, tree expr);
221 static bool arm_scalar_mode_supported_p (enum machine_mode);
222 static bool arm_frame_pointer_required (void);
223 static bool arm_can_eliminate (const int, const int);
224 static void arm_asm_trampoline_template (FILE *);
225 static void arm_trampoline_init (rtx, tree, rtx);
226 static rtx arm_trampoline_adjust_address (rtx);
227 
228 
229 /* Table of machine attributes.  */
230 static const struct attribute_spec arm_attribute_table[] =
231 {
232   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
233   /* Function calls made to this symbol must be done indirectly, because
234      it may lie outside of the 26 bit addressing range of a normal function
235      call.  */
236   { "long_call",    0, 0, false, true,  true,  NULL },
237   /* Whereas these functions are always known to reside within the 26 bit
238      addressing range.  */
239   { "short_call",   0, 0, false, true,  true,  NULL },
240   /* Specify the procedure call conventions for a function.  */
241   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute },
242   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
243   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute },
244   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute },
245   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute },
246 #ifdef ARM_PE
247   /* ARM/PE has three new attributes:
248      interfacearm - ?
249      dllexport - for exporting a function/variable that will live in a dll
250      dllimport - for importing a function/variable from a dll
251 
252      Microsoft allows multiple declspecs in one __declspec, separating
253      them with spaces.  We do NOT support this.  Instead, use __declspec
254      multiple times.
255   */
256   { "dllimport",    0, 0, true,  false, false, NULL },
257   { "dllexport",    0, 0, true,  false, false, NULL },
258   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute },
259 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
260   { "dllimport",    0, 0, false, false, false, handle_dll_attribute },
261   { "dllexport",    0, 0, false, false, false, handle_dll_attribute },
262   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute },
263 #endif
264   { NULL,           0, 0, false, false, false, NULL }
265 };
266 
267 /* Initialize the GCC target structure.  */
268 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
269 #undef  TARGET_MERGE_DECL_ATTRIBUTES
270 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
271 #endif
272 
273 #undef TARGET_LEGITIMIZE_ADDRESS
274 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
275 
276 #undef  TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
278 
279 #undef TARGET_ASM_FILE_START
280 #define TARGET_ASM_FILE_START arm_file_start
281 #undef TARGET_ASM_FILE_END
282 #define TARGET_ASM_FILE_END arm_file_end
283 
284 #undef  TARGET_ASM_ALIGNED_SI_OP
285 #define TARGET_ASM_ALIGNED_SI_OP NULL
286 #undef  TARGET_ASM_INTEGER
287 #define TARGET_ASM_INTEGER arm_assemble_integer
288 
289 #undef  TARGET_ASM_FUNCTION_PROLOGUE
290 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
291 
292 #undef  TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
294 
295 #undef  TARGET_DEFAULT_TARGET_FLAGS
296 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
297 #undef  TARGET_HANDLE_OPTION
298 #define TARGET_HANDLE_OPTION arm_handle_option
299 #undef  TARGET_HELP
300 #define TARGET_HELP arm_target_help
301 
302 #undef  TARGET_COMP_TYPE_ATTRIBUTES
303 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
304 
305 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
306 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
307 
308 #undef  TARGET_SCHED_ADJUST_COST
309 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
310 
311 #undef TARGET_ENCODE_SECTION_INFO
312 #ifdef ARM_PE
313 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
314 #else
315 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
316 #endif
317 
318 #undef  TARGET_STRIP_NAME_ENCODING
319 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
320 
321 #undef  TARGET_ASM_INTERNAL_LABEL
322 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
323 
324 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
325 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
326 
327 #undef  TARGET_FUNCTION_VALUE
328 #define TARGET_FUNCTION_VALUE arm_function_value
329 
330 #undef  TARGET_LIBCALL_VALUE
331 #define TARGET_LIBCALL_VALUE arm_libcall_value
332 
333 #undef  TARGET_ASM_OUTPUT_MI_THUNK
334 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
335 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
337 
338 #undef  TARGET_RTX_COSTS
339 #define TARGET_RTX_COSTS arm_rtx_costs
340 #undef  TARGET_ADDRESS_COST
341 #define TARGET_ADDRESS_COST arm_address_cost
342 
343 #undef TARGET_SHIFT_TRUNCATION_MASK
344 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
345 #undef TARGET_VECTOR_MODE_SUPPORTED_P
346 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
347 
348 #undef  TARGET_MACHINE_DEPENDENT_REORG
349 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
350 
351 #undef  TARGET_INIT_BUILTINS
352 #define TARGET_INIT_BUILTINS  arm_init_builtins
353 #undef  TARGET_EXPAND_BUILTIN
354 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
355 
356 #undef TARGET_INIT_LIBFUNCS
357 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
358 
359 #undef TARGET_PROMOTE_FUNCTION_MODE
360 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
361 #undef TARGET_PROMOTE_PROTOTYPES
362 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
363 #undef TARGET_PASS_BY_REFERENCE
364 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
365 #undef TARGET_ARG_PARTIAL_BYTES
366 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
367 
368 #undef  TARGET_SETUP_INCOMING_VARARGS
369 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
370 
371 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
372 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
373 
374 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
375 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
376 #undef TARGET_TRAMPOLINE_INIT
377 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
378 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
379 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
380 
381 #undef TARGET_DEFAULT_SHORT_ENUMS
382 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
383 
384 #undef TARGET_ALIGN_ANON_BITFIELD
385 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
386 
387 #undef TARGET_NARROW_VOLATILE_BITFIELD
388 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
389 
390 #undef TARGET_CXX_GUARD_TYPE
391 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
392 
393 #undef TARGET_CXX_GUARD_MASK_BIT
394 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
395 
396 #undef TARGET_CXX_GET_COOKIE_SIZE
397 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
398 
399 #undef TARGET_CXX_COOKIE_HAS_SIZE
400 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
401 
402 #undef TARGET_CXX_CDTOR_RETURNS_THIS
403 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
404 
405 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
406 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
407 
408 #undef TARGET_CXX_USE_AEABI_ATEXIT
409 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
410 
411 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
412 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
413   arm_cxx_determine_class_data_visibility
414 
415 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
416 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
417 
418 #undef TARGET_RETURN_IN_MSB
419 #define TARGET_RETURN_IN_MSB arm_return_in_msb
420 
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
423 
424 #undef TARGET_MUST_PASS_IN_STACK
425 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
426 
427 #ifdef TARGET_UNWIND_INFO
428 #undef TARGET_UNWIND_EMIT
429 #define TARGET_UNWIND_EMIT arm_unwind_emit
430 
431 /* EABI unwinding tables use a different format for the typeinfo tables.  */
432 #undef TARGET_ASM_TTYPE
433 #define TARGET_ASM_TTYPE arm_output_ttype
434 
435 #undef TARGET_ARM_EABI_UNWINDER
436 #define TARGET_ARM_EABI_UNWINDER true
437 #endif /* TARGET_UNWIND_INFO */
438 
439 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
440 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
441 
442 #undef TARGET_DWARF_REGISTER_SPAN
443 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
444 
445 #undef  TARGET_CANNOT_COPY_INSN_P
446 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
447 
448 #ifdef HAVE_AS_TLS
449 #undef TARGET_HAVE_TLS
450 #define TARGET_HAVE_TLS true
451 #endif
452 
453 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
454 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
455 
456 #undef TARGET_CANNOT_FORCE_CONST_MEM
457 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
458 
459 #undef TARGET_MAX_ANCHOR_OFFSET
460 #define TARGET_MAX_ANCHOR_OFFSET 4095
461 
462 /* The minimum is set such that the total size of the block
463    for a particular anchor is -4088 + 1 + 4095 bytes, which is
464    divisible by eight, ensuring natural spacing of anchors.  */
465 #undef TARGET_MIN_ANCHOR_OFFSET
466 #define TARGET_MIN_ANCHOR_OFFSET -4088
467 
468 #undef TARGET_SCHED_ISSUE_RATE
469 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
470 
471 #undef TARGET_MANGLE_TYPE
472 #define TARGET_MANGLE_TYPE arm_mangle_type
473 
474 #undef TARGET_BUILD_BUILTIN_VA_LIST
475 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
476 #undef TARGET_EXPAND_BUILTIN_VA_START
477 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
478 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
479 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
480 
481 #ifdef HAVE_AS_TLS
482 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
483 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
484 #endif
485 
486 #undef TARGET_LEGITIMATE_ADDRESS_P
487 #define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p
488 
489 #undef TARGET_INVALID_PARAMETER_TYPE
490 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
491 
492 #undef TARGET_INVALID_RETURN_TYPE
493 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
494 
495 #undef TARGET_PROMOTED_TYPE
496 #define TARGET_PROMOTED_TYPE arm_promoted_type
497 
498 #undef TARGET_CONVERT_TO_TYPE
499 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
500 
501 #undef TARGET_SCALAR_MODE_SUPPORTED_P
502 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
503 
504 #undef TARGET_FRAME_POINTER_REQUIRED
505 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
506 
507 #undef TARGET_CAN_ELIMINATE
508 #define TARGET_CAN_ELIMINATE arm_can_eliminate
509 
510 struct gcc_target targetm = TARGET_INITIALIZER;
511 
512 /* Obstack for minipool constant handling.  */
513 static struct obstack minipool_obstack;
514 static char *         minipool_startobj;
515 
516 /* The maximum number of insns skipped which
517    will be conditionalised if possible.  */
518 static int max_insns_skipped = 5;
519 
520 extern FILE * asm_out_file;
521 
522 /* True if we are currently building a constant table.  */
523 int making_const_table;
524 
525 /* The processor for which instructions should be scheduled.  */
526 enum processor_type arm_tune = arm_none;
527 
528 /* The default processor used if not overridden by commandline.  */
529 static enum processor_type arm_default_cpu = arm_none;
530 
531 /* Which floating point hardware to schedule for.  */
532 int arm_fpu_attr;
533 
534 /* Which floating popint hardware to use.  */
535 const struct arm_fpu_desc *arm_fpu_desc;
536 
537 /* Whether to use floating point hardware.  */
538 enum float_abi_type arm_float_abi;
539 
540 /* Which __fp16 format to use.  */
541 enum arm_fp16_format_type arm_fp16_format;
542 
543 /* Which ABI to use.  */
544 enum arm_abi_type arm_abi;
545 
546 /* Which thread pointer model to use.  */
547 enum arm_tp_type target_thread_pointer = TP_AUTO;
548 
549 /* Used to parse -mstructure_size_boundary command line option.  */
550 int    arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
551 
552 /* Used for Thumb call_via trampolines.  */
553 rtx thumb_call_via_label[14];
554 static int thumb_call_reg_needed;
555 
556 /* Bit values used to identify processor capabilities.  */
557 #define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
558 #define FL_ARCH3M     (1 << 1)        /* Extended multiply */
559 #define FL_MODE26     (1 << 2)        /* 26-bit mode support */
560 #define FL_MODE32     (1 << 3)        /* 32-bit mode support */
561 #define FL_ARCH4      (1 << 4)        /* Architecture rel 4 */
562 #define FL_ARCH5      (1 << 5)        /* Architecture rel 5 */
563 #define FL_THUMB      (1 << 6)        /* Thumb aware */
564 #define FL_LDSCHED    (1 << 7)	      /* Load scheduling necessary */
565 #define FL_STRONG     (1 << 8)	      /* StrongARM */
566 #define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
567 #define FL_XSCALE     (1 << 10)	      /* XScale */
568 #define FL_CIRRUS     (1 << 11)	      /* Cirrus/DSP.  */
569 #define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
570 					 media instructions.  */
571 #define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
572 #define FL_WBUF	      (1 << 14)	      /* Schedule for write buffer ops.
573 					 Note: ARM6 & 7 derivatives only.  */
574 #define FL_ARCH6K     (1 << 15)       /* Architecture rel 6 K extensions.  */
575 #define FL_THUMB2     (1 << 16)	      /* Thumb-2.  */
576 #define FL_NOTM	      (1 << 17)	      /* Instructions not present in the 'M'
577 					 profile.  */
578 #define FL_DIV	      (1 << 18)	      /* Hardware divide.  */
579 #define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
580 #define FL_NEON       (1 << 20)       /* Neon instructions.  */
581 #define FL_ARCH7EM    (1 << 21)	      /* Instructions present in the ARMv7E-M
582 					 architecture.  */
583 
584 #define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
585 
586 #define FL_FOR_ARCH2	FL_NOTM
587 #define FL_FOR_ARCH3	(FL_FOR_ARCH2 | FL_MODE32)
588 #define FL_FOR_ARCH3M	(FL_FOR_ARCH3 | FL_ARCH3M)
589 #define FL_FOR_ARCH4	(FL_FOR_ARCH3M | FL_ARCH4)
590 #define FL_FOR_ARCH4T	(FL_FOR_ARCH4 | FL_THUMB)
591 #define FL_FOR_ARCH5	(FL_FOR_ARCH4 | FL_ARCH5)
592 #define FL_FOR_ARCH5T	(FL_FOR_ARCH5 | FL_THUMB)
593 #define FL_FOR_ARCH5E	(FL_FOR_ARCH5 | FL_ARCH5E)
594 #define FL_FOR_ARCH5TE	(FL_FOR_ARCH5E | FL_THUMB)
595 #define FL_FOR_ARCH5TEJ	FL_FOR_ARCH5TE
596 #define FL_FOR_ARCH6	(FL_FOR_ARCH5TE | FL_ARCH6)
597 #define FL_FOR_ARCH6J	FL_FOR_ARCH6
598 #define FL_FOR_ARCH6K	(FL_FOR_ARCH6 | FL_ARCH6K)
599 #define FL_FOR_ARCH6Z	FL_FOR_ARCH6
600 #define FL_FOR_ARCH6ZK	FL_FOR_ARCH6K
601 #define FL_FOR_ARCH6T2	(FL_FOR_ARCH6 | FL_THUMB2)
602 #define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
603 #define FL_FOR_ARCH7	(FL_FOR_ARCH6T2 &~ FL_NOTM)
604 #define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
605 #define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_DIV)
606 #define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_DIV)
607 #define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
608 
609 /* The bits in this mask specify which
610    instructions we are allowed to generate.  */
611 static unsigned long insn_flags = 0;
612 
613 /* The bits in this mask specify which instruction scheduling options should
614    be used.  */
615 static unsigned long tune_flags = 0;
616 
617 /* The following are used in the arm.md file as equivalents to bits
618    in the above two flag variables.  */
619 
620 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
621 int arm_arch3m = 0;
622 
623 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
624 int arm_arch4 = 0;
625 
626 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
627 int arm_arch4t = 0;
628 
629 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
630 int arm_arch5 = 0;
631 
632 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
633 int arm_arch5e = 0;
634 
635 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
636 int arm_arch6 = 0;
637 
638 /* Nonzero if this chip supports the ARM 6K extensions.  */
639 int arm_arch6k = 0;
640 
641 /* Nonzero if instructions not present in the 'M' profile can be used.  */
642 int arm_arch_notm = 0;
643 
644 /* Nonzero if instructions present in ARMv7E-M can be used.  */
645 int arm_arch7em = 0;
646 
647 /* Nonzero if this chip can benefit from load scheduling.  */
648 int arm_ld_sched = 0;
649 
650 /* Nonzero if this chip is a StrongARM.  */
651 int arm_tune_strongarm = 0;
652 
653 /* Nonzero if this chip is a Cirrus variant.  */
654 int arm_arch_cirrus = 0;
655 
656 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
657 int arm_arch_iwmmxt = 0;
658 
659 /* Nonzero if this chip is an XScale.  */
660 int arm_arch_xscale = 0;
661 
662 /* Nonzero if tuning for XScale  */
663 int arm_tune_xscale = 0;
664 
665 /* Nonzero if we want to tune for stores that access the write-buffer.
666    This typically means an ARM6 or ARM7 with MMU or MPU.  */
667 int arm_tune_wbuf = 0;
668 
669 /* Nonzero if tuning for Cortex-A9.  */
670 int arm_tune_cortex_a9 = 0;
671 
672 /* Nonzero if generating Thumb instructions.  */
673 int thumb_code = 0;
674 
675 /* Nonzero if we should define __THUMB_INTERWORK__ in the
676    preprocessor.
677    XXX This is a bit of a hack, it's intended to help work around
678    problems in GLD which doesn't understand that armv5t code is
679    interworking clean.  */
680 int arm_cpp_interwork = 0;
681 
682 /* Nonzero if chip supports Thumb 2.  */
683 int arm_arch_thumb2;
684 
685 /* Nonzero if chip supports integer division instruction.  */
686 int arm_arch_hwdiv;
687 
688 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
689    must report the mode of the memory reference from PRINT_OPERAND to
690    PRINT_OPERAND_ADDRESS.  */
691 enum machine_mode output_memory_reference_mode;
692 
693 /* The register number to be used for the PIC offset register.  */
694 unsigned arm_pic_register = INVALID_REGNUM;
695 
696 /* Set to 1 after arm_reorg has started.  Reset to start at the start of
697    the next function.  */
698 static int after_arm_reorg = 0;
699 
700 /* The maximum number of insns to be used when loading a constant.  */
701 static int arm_constant_limit = 3;
702 
703 static enum arm_pcs arm_pcs_default;
704 
705 /* For an explanation of these variables, see final_prescan_insn below.  */
706 int arm_ccfsm_state;
707 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
708 enum arm_cond_code arm_current_cc;
709 rtx arm_target_insn;
710 int arm_target_label;
711 /* The number of conditionally executed insns, including the current insn.  */
712 int arm_condexec_count = 0;
713 /* A bitmask specifying the patterns for the IT block.
714    Zero means do not output an IT block before this insn. */
715 int arm_condexec_mask = 0;
716 /* The number of bits used in arm_condexec_mask.  */
717 int arm_condexec_masklen = 0;
718 
719 /* The condition codes of the ARM, and the inverse function.  */
720 static const char * const arm_condition_codes[] =
721 {
722   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
723   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
724 };
725 
726 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
727 #define streq(string1, string2) (strcmp (string1, string2) == 0)
728 
729 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
730 				   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
731 				   | (1 << PIC_OFFSET_TABLE_REGNUM)))
732 
733 /* Initialization code.  */
734 
735 struct processors
736 {
737   const char *const name;
738   enum processor_type core;
739   const char *arch;
740   const unsigned long flags;
741   bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
742 };
743 
744 /* Not all of these give usefully different compilation alternatives,
745    but there is no simple way of generalizing them.  */
746 static const struct processors all_cores[] =
747 {
748   /* ARM Cores */
749 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
750   {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
751 #include "arm-cores.def"
752 #undef ARM_CORE
753   {NULL, arm_none, NULL, 0, NULL}
754 };
755 
756 static const struct processors all_architectures[] =
757 {
758   /* ARM Architectures */
759   /* We don't specify rtx_costs here as it will be figured out
760      from the core.  */
761 
762   {"armv2",   arm2,       "2",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
763   {"armv2a",  arm2,       "2",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
764   {"armv3",   arm6,       "3",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
765   {"armv3m",  arm7m,      "3M",  FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
766   {"armv4",   arm7tdmi,   "4",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
767   /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
768      implementations that support it, so we will leave it out for now.  */
769   {"armv4t",  arm7tdmi,   "4T",  FL_CO_PROC |             FL_FOR_ARCH4T, NULL},
770   {"armv5",   arm10tdmi,  "5",   FL_CO_PROC |             FL_FOR_ARCH5, NULL},
771   {"armv5t",  arm10tdmi,  "5T",  FL_CO_PROC |             FL_FOR_ARCH5T, NULL},
772   {"armv5e",  arm1026ejs, "5E",  FL_CO_PROC |             FL_FOR_ARCH5E, NULL},
773   {"armv5te", arm1026ejs, "5TE", FL_CO_PROC |             FL_FOR_ARCH5TE, NULL},
774   {"armv6",   arm1136js,  "6",   FL_CO_PROC |             FL_FOR_ARCH6, NULL},
775   {"armv6j",  arm1136js,  "6J",  FL_CO_PROC |             FL_FOR_ARCH6J, NULL},
776   {"armv6k",  mpcore,	  "6K",  FL_CO_PROC |             FL_FOR_ARCH6K, NULL},
777   {"armv6z",  arm1176jzs, "6Z",  FL_CO_PROC |             FL_FOR_ARCH6Z, NULL},
778   {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC |             FL_FOR_ARCH6ZK, NULL},
779   {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC |             FL_FOR_ARCH6T2, NULL},
780   {"armv6-m", cortexm1,	  "6M",				  FL_FOR_ARCH6M, NULL},
781   {"armv7",   cortexa8,	  "7",	 FL_CO_PROC |		  FL_FOR_ARCH7, NULL},
782   {"armv7-a", cortexa8,	  "7A",	 FL_CO_PROC |		  FL_FOR_ARCH7A, NULL},
783   {"armv7-r", cortexr4,	  "7R",	 FL_CO_PROC |		  FL_FOR_ARCH7R, NULL},
784   {"armv7-m", cortexm3,	  "7M",	 FL_CO_PROC |		  FL_FOR_ARCH7M, NULL},
785   {"armv7e-m",   cortexm3, "7EM", FL_CO_PROC |		  FL_FOR_ARCH7EM, NULL},
786   {"ep9312",  ep9312,     "4T",  FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
787   {"iwmmxt",  iwmmxt,     "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
788   {"iwmmxt2", iwmmxt2,     "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
789   {NULL, arm_none, NULL, 0 , NULL}
790 };
791 
792 struct arm_cpu_select
793 {
794   const char *              string;
795   const char *              name;
796   const struct processors * processors;
797 };
798 
799 /* This is a magic structure.  The 'string' field is magically filled in
800    with a pointer to the value specified by the user on the command line
801    assuming that the user has specified such a value.  */
802 
803 static struct arm_cpu_select arm_select[] =
804 {
805   /* string	  name            processors  */
806   { NULL,	"-mcpu=",	all_cores  },
807   { NULL,	"-march=",	all_architectures },
808   { NULL,	"-mtune=",	all_cores }
809 };
810 
811 /* Defines representing the indexes into the above table.  */
812 #define ARM_OPT_SET_CPU 0
813 #define ARM_OPT_SET_ARCH 1
814 #define ARM_OPT_SET_TUNE 2
815 
816 /* The name of the preprocessor macro to define for this architecture.  */
817 
818 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
819 
820 /* Available values for -mfpu=.  */
821 
822 static const struct arm_fpu_desc all_fpus[] =
823 {
824   {"fpa",		ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
825   {"fpe2",		ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
826   {"fpe3",		ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
827   {"maverick",		ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
828   {"vfp",		ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
829   {"vfpv3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
830   {"vfpv3-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
831   {"vfpv3-d16",		ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
832   {"vfpv3-d16-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
833   {"vfpv3xd",		ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
834   {"vfpv3xd-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
835   {"neon",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
836   {"neon-fp16",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
837   {"vfpv4",		ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
838   {"vfpv4-d16",		ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
839   {"fpv4-sp-d16",	ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
840   {"neon-vfpv4",	ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
841   /* Compatibility aliases.  */
842   {"vfp3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
843 };
844 
845 
846 struct float_abi
847 {
848   const char * name;
849   enum float_abi_type abi_type;
850 };
851 
852 
853 /* Available values for -mfloat-abi=.  */
854 
855 static const struct float_abi all_float_abis[] =
856 {
857   {"soft",	ARM_FLOAT_ABI_SOFT},
858   {"softfp",	ARM_FLOAT_ABI_SOFTFP},
859   {"hard",	ARM_FLOAT_ABI_HARD}
860 };
861 
862 
863 struct fp16_format
864 {
865   const char *name;
866   enum arm_fp16_format_type fp16_format_type;
867 };
868 
869 
870 /* Available values for -mfp16-format=.  */
871 
872 static const struct fp16_format all_fp16_formats[] =
873 {
874   {"none",		ARM_FP16_FORMAT_NONE},
875   {"ieee",		ARM_FP16_FORMAT_IEEE},
876   {"alternative",	ARM_FP16_FORMAT_ALTERNATIVE}
877 };
878 
879 
880 struct abi_name
881 {
882   const char *name;
883   enum arm_abi_type abi_type;
884 };
885 
886 
887 /* Available values for -mabi=.  */
888 
889 static const struct abi_name arm_all_abis[] =
890 {
891   {"apcs-gnu",    ARM_ABI_APCS},
892   {"atpcs",   ARM_ABI_ATPCS},
893   {"aapcs",   ARM_ABI_AAPCS},
894   {"iwmmxt",  ARM_ABI_IWMMXT},
895   {"aapcs-linux",   ARM_ABI_AAPCS_LINUX}
896 };
897 
898 /* Supported TLS relocations.  */
899 
900 enum tls_reloc {
901   TLS_GD32,
902   TLS_LDM32,
903   TLS_LDO32,
904   TLS_IE32,
905   TLS_LE32
906 };
907 
908 /* Emit an insn that's a simple single-set.  Both the operands must be known
909    to be valid.  */
910 inline static rtx
911 emit_set_insn (rtx x, rtx y)
912 {
913   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
914 }
915 
916 /* Return the number of bits set in VALUE.  */
917 static unsigned
918 bit_count (unsigned long value)
919 {
920   unsigned long count = 0;
921 
922   while (value)
923     {
924       count++;
925       value &= value - 1;  /* Clear the least-significant set bit.  */
926     }
927 
928   return count;
929 }
930 
931 /* Set up library functions unique to ARM.  */
932 
933 static void
934 arm_init_libfuncs (void)
935 {
936   /* There are no special library functions unless we are using the
937      ARM BPABI.  */
938   if (!TARGET_BPABI)
939     return;
940 
941   /* The functions below are described in Section 4 of the "Run-Time
942      ABI for the ARM architecture", Version 1.0.  */
943 
944   /* Double-precision floating-point arithmetic.  Table 2.  */
945   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
946   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
947   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
948   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
949   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
950 
951   /* Double-precision comparisons.  Table 3.  */
952   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
953   set_optab_libfunc (ne_optab, DFmode, NULL);
954   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
955   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
956   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
957   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
958   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
959 
960   /* Single-precision floating-point arithmetic.  Table 4.  */
961   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
962   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
963   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
964   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
965   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
966 
967   /* Single-precision comparisons.  Table 5.  */
968   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
969   set_optab_libfunc (ne_optab, SFmode, NULL);
970   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
971   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
972   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
973   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
974   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
975 
976   /* Floating-point to integer conversions.  Table 6.  */
977   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
978   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
979   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
980   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
981   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
982   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
983   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
984   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
985 
986   /* Conversions between floating types.  Table 7.  */
987   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
988   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
989 
990   /* Integer to floating-point conversions.  Table 8.  */
991   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
992   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
993   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
994   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
995   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
996   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
997   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
998   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
999 
1000   /* Long long.  Table 9.  */
1001   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1002   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1003   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1004   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1005   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1006   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1007   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1008   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1009 
1010   /* Integer (32/32->32) division.  \S 4.3.1.  */
1011   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1012   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1013 
1014   /* The divmod functions are designed so that they can be used for
1015      plain division, even though they return both the quotient and the
1016      remainder.  The quotient is returned in the usual location (i.e.,
1017      r0 for SImode, {r0, r1} for DImode), just as would be expected
1018      for an ordinary division routine.  Because the AAPCS calling
1019      conventions specify that all of { r0, r1, r2, r3 } are
1020      callee-saved registers, there is no need to tell the compiler
1021      explicitly that those registers are clobbered by these
1022      routines.  */
1023   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1024   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1025 
1026   /* For SImode division the ABI provides div-without-mod routines,
1027      which are faster.  */
1028   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1029   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1030 
1031   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
1032      divmod libcalls instead.  */
1033   set_optab_libfunc (smod_optab, DImode, NULL);
1034   set_optab_libfunc (umod_optab, DImode, NULL);
1035   set_optab_libfunc (smod_optab, SImode, NULL);
1036   set_optab_libfunc (umod_optab, SImode, NULL);
1037 
1038   /* Half-precision float operations.  The compiler handles all operations
1039      with NULL libfuncs by converting the SFmode.  */
1040   switch (arm_fp16_format)
1041     {
1042     case ARM_FP16_FORMAT_IEEE:
1043     case ARM_FP16_FORMAT_ALTERNATIVE:
1044 
1045       /* Conversions.  */
1046       set_conv_libfunc (trunc_optab, HFmode, SFmode,
1047 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
1048 			 ? "__gnu_f2h_ieee"
1049 			 : "__gnu_f2h_alternative"));
1050       set_conv_libfunc (sext_optab, SFmode, HFmode,
1051 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
1052 			 ? "__gnu_h2f_ieee"
1053 			 : "__gnu_h2f_alternative"));
1054 
1055       /* Arithmetic.  */
1056       set_optab_libfunc (add_optab, HFmode, NULL);
1057       set_optab_libfunc (sdiv_optab, HFmode, NULL);
1058       set_optab_libfunc (smul_optab, HFmode, NULL);
1059       set_optab_libfunc (neg_optab, HFmode, NULL);
1060       set_optab_libfunc (sub_optab, HFmode, NULL);
1061 
1062       /* Comparisons.  */
1063       set_optab_libfunc (eq_optab, HFmode, NULL);
1064       set_optab_libfunc (ne_optab, HFmode, NULL);
1065       set_optab_libfunc (lt_optab, HFmode, NULL);
1066       set_optab_libfunc (le_optab, HFmode, NULL);
1067       set_optab_libfunc (ge_optab, HFmode, NULL);
1068       set_optab_libfunc (gt_optab, HFmode, NULL);
1069       set_optab_libfunc (unord_optab, HFmode, NULL);
1070       break;
1071 
1072     default:
1073       break;
1074     }
1075 
1076   if (TARGET_AAPCS_BASED)
1077     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1078 }
1079 
1080 /* On AAPCS systems, this is the "struct __va_list".  */
1081 static GTY(()) tree va_list_type;
1082 
1083 /* Return the type to use as __builtin_va_list.  */
1084 static tree
1085 arm_build_builtin_va_list (void)
1086 {
1087   tree va_list_name;
1088   tree ap_field;
1089 
1090   if (!TARGET_AAPCS_BASED)
1091     return std_build_builtin_va_list ();
1092 
1093   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1094      defined as:
1095 
1096        struct __va_list
1097        {
1098 	 void *__ap;
1099        };
1100 
1101      The C Library ABI further reinforces this definition in \S
1102      4.1.
1103 
1104      We must follow this definition exactly.  The structure tag
1105      name is visible in C++ mangled names, and thus forms a part
1106      of the ABI.  The field name may be used by people who
1107      #include <stdarg.h>.  */
1108   /* Create the type.  */
1109   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1110   /* Give it the required name.  */
1111   va_list_name = build_decl (BUILTINS_LOCATION,
1112 			     TYPE_DECL,
1113 			     get_identifier ("__va_list"),
1114 			     va_list_type);
1115   DECL_ARTIFICIAL (va_list_name) = 1;
1116   TYPE_NAME (va_list_type) = va_list_name;
1117   TYPE_STUB_DECL (va_list_type) = va_list_name;
1118   /* Create the __ap field.  */
1119   ap_field = build_decl (BUILTINS_LOCATION,
1120 			 FIELD_DECL,
1121 			 get_identifier ("__ap"),
1122 			 ptr_type_node);
1123   DECL_ARTIFICIAL (ap_field) = 1;
1124   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1125   TYPE_FIELDS (va_list_type) = ap_field;
1126   /* Compute its layout.  */
1127   layout_type (va_list_type);
1128 
1129   return va_list_type;
1130 }
1131 
1132 /* Return an expression of type "void *" pointing to the next
1133    available argument in a variable-argument list.  VALIST is the
1134    user-level va_list object, of type __builtin_va_list.  */
1135 static tree
1136 arm_extract_valist_ptr (tree valist)
1137 {
1138   if (TREE_TYPE (valist) == error_mark_node)
1139     return error_mark_node;
1140 
1141   /* On an AAPCS target, the pointer is stored within "struct
1142      va_list".  */
1143   if (TARGET_AAPCS_BASED)
1144     {
1145       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1146       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1147 		       valist, ap_field, NULL_TREE);
1148     }
1149 
1150   return valist;
1151 }
1152 
1153 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
1154 static void
1155 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1156 {
1157   valist = arm_extract_valist_ptr (valist);
1158   std_expand_builtin_va_start (valist, nextarg);
1159 }
1160 
1161 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
1162 static tree
1163 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1164 			  gimple_seq *post_p)
1165 {
1166   valist = arm_extract_valist_ptr (valist);
1167   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1168 }
1169 
1170 /* Implement TARGET_HANDLE_OPTION.  */
1171 
1172 static bool
1173 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1174 {
1175   switch (code)
1176     {
1177     case OPT_march_:
1178       arm_select[1].string = arg;
1179       return true;
1180 
1181     case OPT_mcpu_:
1182       arm_select[0].string = arg;
1183       return true;
1184 
1185     case OPT_mhard_float:
1186       target_float_abi_name = "hard";
1187       return true;
1188 
1189     case OPT_msoft_float:
1190       target_float_abi_name = "soft";
1191       return true;
1192 
1193     case OPT_mtune_:
1194       arm_select[2].string = arg;
1195       return true;
1196 
1197     default:
1198       return true;
1199     }
1200 }
1201 
1202 static void
1203 arm_target_help (void)
1204 {
1205   int i;
1206   static int columns = 0;
1207   int remaining;
1208 
1209   /* If we have not done so already, obtain the desired maximum width of
1210      the output.  Note - this is a duplication of the code at the start of
1211      gcc/opts.c:print_specific_help() - the two copies should probably be
1212      replaced by a single function.  */
1213   if (columns == 0)
1214     {
1215       const char *p;
1216 
1217       GET_ENVIRONMENT (p, "COLUMNS");
1218       if (p != NULL)
1219 	{
1220 	  int value = atoi (p);
1221 
1222 	  if (value > 0)
1223 	    columns = value;
1224 	}
1225 
1226       if (columns == 0)
1227 	/* Use a reasonable default.  */
1228 	columns = 80;
1229     }
1230 
1231   printf ("  Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1232 
1233   /* The - 2 is because we know that the last entry in the array is NULL.  */
1234   i = ARRAY_SIZE (all_cores) - 2;
1235   gcc_assert (i > 0);
1236   printf ("    %s", all_cores[i].name);
1237   remaining = columns - (strlen (all_cores[i].name) + 4);
1238   gcc_assert (remaining >= 0);
1239 
1240   while (i--)
1241     {
1242       int len = strlen (all_cores[i].name);
1243 
1244       if (remaining > len + 2)
1245 	{
1246 	  printf (", %s", all_cores[i].name);
1247 	  remaining -= len + 2;
1248 	}
1249       else
1250 	{
1251 	  if (remaining > 0)
1252 	    printf (",");
1253 	  printf ("\n    %s", all_cores[i].name);
1254 	  remaining = columns - (len + 4);
1255 	}
1256     }
1257 
1258   printf ("\n\n  Known ARM architectures (for use with the -march= option):\n");
1259 
1260   i = ARRAY_SIZE (all_architectures) - 2;
1261   gcc_assert (i > 0);
1262 
1263   printf ("    %s", all_architectures[i].name);
1264   remaining = columns - (strlen (all_architectures[i].name) + 4);
1265   gcc_assert (remaining >= 0);
1266 
1267   while (i--)
1268     {
1269       int len = strlen (all_architectures[i].name);
1270 
1271       if (remaining > len + 2)
1272 	{
1273 	  printf (", %s", all_architectures[i].name);
1274 	  remaining -= len + 2;
1275 	}
1276       else
1277 	{
1278 	  if (remaining > 0)
1279 	    printf (",");
1280 	  printf ("\n    %s", all_architectures[i].name);
1281 	  remaining = columns - (len + 4);
1282 	}
1283     }
1284   printf ("\n");
1285 
1286 }
1287 
1288 /* Fix up any incompatible options that the user has specified.
1289    This has now turned into a maze.  */
1290 void
1291 arm_override_options (void)
1292 {
1293   unsigned i;
1294   enum processor_type target_arch_cpu = arm_none;
1295   enum processor_type selected_cpu = arm_none;
1296 
1297   /* Set up the flags based on the cpu/architecture selected by the user.  */
1298   for (i = ARRAY_SIZE (arm_select); i--;)
1299     {
1300       struct arm_cpu_select * ptr = arm_select + i;
1301 
1302       if (ptr->string != NULL && ptr->string[0] != '\0')
1303         {
1304 	  const struct processors * sel;
1305 
1306           for (sel = ptr->processors; sel->name != NULL; sel++)
1307             if (streq (ptr->string, sel->name))
1308               {
1309 		/* Set the architecture define.  */
1310 		if (i != ARM_OPT_SET_TUNE)
1311 		  sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1312 
1313 		/* Determine the processor core for which we should
1314 		   tune code-generation.  */
1315 		if (/* -mcpu= is a sensible default.  */
1316 		    i == ARM_OPT_SET_CPU
1317 		    /* -mtune= overrides -mcpu= and -march=.  */
1318 		    || i == ARM_OPT_SET_TUNE)
1319 		  arm_tune = (enum processor_type) (sel - ptr->processors);
1320 
1321 		/* Remember the CPU associated with this architecture.
1322 		   If no other option is used to set the CPU type,
1323 		   we'll use this to guess the most suitable tuning
1324 		   options.  */
1325 		if (i == ARM_OPT_SET_ARCH)
1326 		  target_arch_cpu = sel->core;
1327 
1328 		if (i == ARM_OPT_SET_CPU)
1329 		  selected_cpu = (enum processor_type) (sel - ptr->processors);
1330 
1331 		if (i != ARM_OPT_SET_TUNE)
1332 		  {
1333 		    /* If we have been given an architecture and a processor
1334 		       make sure that they are compatible.  We only generate
1335 		       a warning though, and we prefer the CPU over the
1336 		       architecture.  */
1337 		    if (insn_flags != 0 && (insn_flags ^ sel->flags))
1338 		      warning (0, "switch -mcpu=%s conflicts with -march= switch",
1339 			       ptr->string);
1340 
1341 		    insn_flags = sel->flags;
1342 		  }
1343 
1344                 break;
1345               }
1346 
1347           if (sel->name == NULL)
1348             error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1349         }
1350     }
1351 
1352   /* Guess the tuning options from the architecture if necessary.  */
1353   if (arm_tune == arm_none)
1354     arm_tune = target_arch_cpu;
1355 
1356   /* If the user did not specify a processor, choose one for them.  */
1357   if (insn_flags == 0)
1358     {
1359       const struct processors * sel;
1360       unsigned int        sought;
1361 
1362       selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1363       if (selected_cpu == arm_none)
1364 	{
1365 #ifdef SUBTARGET_CPU_DEFAULT
1366 	  /* Use the subtarget default CPU if none was specified by
1367 	     configure.  */
1368 	  selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1369 #endif
1370 	  /* Default to ARM6.  */
1371 	  if (selected_cpu == arm_none)
1372 	    selected_cpu = arm6;
1373 	}
1374       sel = &all_cores[selected_cpu];
1375 
1376       insn_flags = sel->flags;
1377 
1378       /* Now check to see if the user has specified some command line
1379 	 switch that require certain abilities from the cpu.  */
1380       sought = 0;
1381 
1382       if (TARGET_INTERWORK || TARGET_THUMB)
1383 	{
1384 	  sought |= (FL_THUMB | FL_MODE32);
1385 
1386 	  /* There are no ARM processors that support both APCS-26 and
1387 	     interworking.  Therefore we force FL_MODE26 to be removed
1388 	     from insn_flags here (if it was set), so that the search
1389 	     below will always be able to find a compatible processor.  */
1390 	  insn_flags &= ~FL_MODE26;
1391 	}
1392 
1393       if (sought != 0 && ((sought & insn_flags) != sought))
1394 	{
1395 	  /* Try to locate a CPU type that supports all of the abilities
1396 	     of the default CPU, plus the extra abilities requested by
1397 	     the user.  */
1398 	  for (sel = all_cores; sel->name != NULL; sel++)
1399 	    if ((sel->flags & sought) == (sought | insn_flags))
1400 	      break;
1401 
1402 	  if (sel->name == NULL)
1403 	    {
1404 	      unsigned current_bit_count = 0;
1405 	      const struct processors * best_fit = NULL;
1406 
1407 	      /* Ideally we would like to issue an error message here
1408 		 saying that it was not possible to find a CPU compatible
1409 		 with the default CPU, but which also supports the command
1410 		 line options specified by the programmer, and so they
1411 		 ought to use the -mcpu=<name> command line option to
1412 		 override the default CPU type.
1413 
1414 		 If we cannot find a cpu that has both the
1415 		 characteristics of the default cpu and the given
1416 		 command line options we scan the array again looking
1417 		 for a best match.  */
1418 	      for (sel = all_cores; sel->name != NULL; sel++)
1419 		if ((sel->flags & sought) == sought)
1420 		  {
1421 		    unsigned count;
1422 
1423 		    count = bit_count (sel->flags & insn_flags);
1424 
1425 		    if (count >= current_bit_count)
1426 		      {
1427 			best_fit = sel;
1428 			current_bit_count = count;
1429 		      }
1430 		  }
1431 
1432 	      gcc_assert (best_fit);
1433 	      sel = best_fit;
1434 	    }
1435 
1436 	  insn_flags = sel->flags;
1437 	}
1438       sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1439       arm_default_cpu = (enum processor_type) (sel - all_cores);
1440       if (arm_tune == arm_none)
1441 	arm_tune = arm_default_cpu;
1442     }
1443 
1444   /* The processor for which we should tune should now have been
1445      chosen.  */
1446   gcc_assert (arm_tune != arm_none);
1447 
1448   tune_flags = all_cores[(int)arm_tune].flags;
1449 
1450   if (target_fp16_format_name)
1451     {
1452       for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1453 	{
1454 	  if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1455 	    {
1456 	      arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1457 	      break;
1458 	    }
1459 	}
1460       if (i == ARRAY_SIZE (all_fp16_formats))
1461 	error ("invalid __fp16 format option: -mfp16-format=%s",
1462 	       target_fp16_format_name);
1463     }
1464   else
1465     arm_fp16_format = ARM_FP16_FORMAT_NONE;
1466 
1467   if (target_abi_name)
1468     {
1469       for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1470 	{
1471 	  if (streq (arm_all_abis[i].name, target_abi_name))
1472 	    {
1473 	      arm_abi = arm_all_abis[i].abi_type;
1474 	      break;
1475 	    }
1476 	}
1477       if (i == ARRAY_SIZE (arm_all_abis))
1478 	error ("invalid ABI option: -mabi=%s", target_abi_name);
1479     }
1480   else
1481     arm_abi = ARM_DEFAULT_ABI;
1482 
1483   /* Make sure that the processor choice does not conflict with any of the
1484      other command line choices.  */
1485   if (TARGET_ARM && !(insn_flags & FL_NOTM))
1486     error ("target CPU does not support ARM mode");
1487 
1488   /* BPABI targets use linker tricks to allow interworking on cores
1489      without thumb support.  */
1490   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1491     {
1492       warning (0, "target CPU does not support interworking" );
1493       target_flags &= ~MASK_INTERWORK;
1494     }
1495 
1496   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1497     {
1498       warning (0, "target CPU does not support THUMB instructions");
1499       target_flags &= ~MASK_THUMB;
1500     }
1501 
1502   if (TARGET_APCS_FRAME && TARGET_THUMB)
1503     {
1504       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1505       target_flags &= ~MASK_APCS_FRAME;
1506     }
1507 
1508   /* Callee super interworking implies thumb interworking.  Adding
1509      this to the flags here simplifies the logic elsewhere.  */
1510   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1511       target_flags |= MASK_INTERWORK;
1512 
1513   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1514      from here where no function is being compiled currently.  */
1515   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1516     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1517 
1518   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1519     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1520 
1521   if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1522     warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1523 
1524   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1525     {
1526       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1527       target_flags |= MASK_APCS_FRAME;
1528     }
1529 
1530   if (TARGET_POKE_FUNCTION_NAME)
1531     target_flags |= MASK_APCS_FRAME;
1532 
1533   if (TARGET_APCS_REENT && flag_pic)
1534     error ("-fpic and -mapcs-reent are incompatible");
1535 
1536   if (TARGET_APCS_REENT)
1537     warning (0, "APCS reentrant code not supported.  Ignored");
1538 
1539   /* If this target is normally configured to use APCS frames, warn if they
1540      are turned off and debugging is turned on.  */
1541   if (TARGET_ARM
1542       && write_symbols != NO_DEBUG
1543       && !TARGET_APCS_FRAME
1544       && (TARGET_DEFAULT & MASK_APCS_FRAME))
1545     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1546 
1547   if (TARGET_APCS_FLOAT)
1548     warning (0, "passing floating point arguments in fp regs not yet supported");
1549 
1550   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
1551   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1552   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1553   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1554   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1555   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1556   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1557   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1558   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1559   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1560   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1561   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1562   arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1563 
1564   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1565   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1566   thumb_code = (TARGET_ARM == 0);
1567   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1568   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1569   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1570   arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1571   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1572 
1573   /* If we are not using the default (ARM mode) section anchor offset
1574      ranges, then set the correct ranges now.  */
1575   if (TARGET_THUMB1)
1576     {
1577       /* Thumb-1 LDR instructions cannot have negative offsets.
1578          Permissible positive offset ranges are 5-bit (for byte loads),
1579          6-bit (for halfword loads), or 7-bit (for word loads).
1580          Empirical results suggest a 7-bit anchor range gives the best
1581          overall code size.  */
1582       targetm.min_anchor_offset = 0;
1583       targetm.max_anchor_offset = 127;
1584     }
1585   else if (TARGET_THUMB2)
1586     {
1587       /* The minimum is set such that the total size of the block
1588          for a particular anchor is 248 + 1 + 4095 bytes, which is
1589          divisible by eight, ensuring natural spacing of anchors.  */
1590       targetm.min_anchor_offset = -248;
1591       targetm.max_anchor_offset = 4095;
1592     }
1593 
1594   /* V5 code we generate is completely interworking capable, so we turn off
1595      TARGET_INTERWORK here to avoid many tests later on.  */
1596 
1597   /* XXX However, we must pass the right pre-processor defines to CPP
1598      or GLD can get confused.  This is a hack.  */
1599   if (TARGET_INTERWORK)
1600     arm_cpp_interwork = 1;
1601 
1602   if (arm_arch5)
1603     target_flags &= ~MASK_INTERWORK;
1604 
1605   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1606     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1607 
1608   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1609     error ("iwmmxt abi requires an iwmmxt capable cpu");
1610 
1611   if (target_fpu_name == NULL && target_fpe_name != NULL)
1612     {
1613       if (streq (target_fpe_name, "2"))
1614 	target_fpu_name = "fpe2";
1615       else if (streq (target_fpe_name, "3"))
1616 	target_fpu_name = "fpe3";
1617       else
1618 	error ("invalid floating point emulation option: -mfpe=%s",
1619 	       target_fpe_name);
1620     }
1621 
1622   if (target_fpu_name == NULL)
1623     {
1624 #ifdef FPUTYPE_DEFAULT
1625       target_fpu_name = FPUTYPE_DEFAULT;
1626 #else
1627       if (arm_arch_cirrus)
1628 	target_fpu_name = "maverick";
1629       else
1630 	target_fpu_name = "fpe2";
1631 #endif
1632     }
1633 
1634   arm_fpu_desc = NULL;
1635   for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1636     {
1637       if (streq (all_fpus[i].name, target_fpu_name))
1638 	{
1639 	  arm_fpu_desc = &all_fpus[i];
1640 	  break;
1641 	}
1642     }
1643 
1644   if (!arm_fpu_desc)
1645     {
1646       error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1647       return;
1648     }
1649 
1650   switch (arm_fpu_desc->model)
1651     {
1652     case ARM_FP_MODEL_FPA:
1653       if (arm_fpu_desc->rev == 2)
1654 	arm_fpu_attr = FPU_FPE2;
1655       else if (arm_fpu_desc->rev == 3)
1656 	arm_fpu_attr = FPU_FPE3;
1657       else
1658 	arm_fpu_attr = FPU_FPA;
1659       break;
1660 
1661     case ARM_FP_MODEL_MAVERICK:
1662       arm_fpu_attr = FPU_MAVERICK;
1663       break;
1664 
1665     case ARM_FP_MODEL_VFP:
1666       arm_fpu_attr = FPU_VFP;
1667       break;
1668 
1669     default:
1670       gcc_unreachable();
1671     }
1672 
1673   if (target_float_abi_name != NULL)
1674     {
1675       /* The user specified a FP ABI.  */
1676       for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1677 	{
1678 	  if (streq (all_float_abis[i].name, target_float_abi_name))
1679 	    {
1680 	      arm_float_abi = all_float_abis[i].abi_type;
1681 	      break;
1682 	    }
1683 	}
1684       if (i == ARRAY_SIZE (all_float_abis))
1685 	error ("invalid floating point abi: -mfloat-abi=%s",
1686 	       target_float_abi_name);
1687     }
1688   else
1689     arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1690 
1691   if (TARGET_AAPCS_BASED
1692       && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1693     error ("FPA is unsupported in the AAPCS");
1694 
1695   if (TARGET_AAPCS_BASED)
1696     {
1697       if (TARGET_CALLER_INTERWORKING)
1698 	error ("AAPCS does not support -mcaller-super-interworking");
1699       else
1700 	if (TARGET_CALLEE_INTERWORKING)
1701 	  error ("AAPCS does not support -mcallee-super-interworking");
1702     }
1703 
1704   /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1705      VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1706      will ever exist.  GCC makes no attempt to support this combination.  */
1707   if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1708     sorry ("iWMMXt and hardware floating point");
1709 
1710   /* ??? iWMMXt insn patterns need auditing for Thumb-2.  */
1711   if (TARGET_THUMB2 && TARGET_IWMMXT)
1712     sorry ("Thumb-2 iWMMXt");
1713 
1714   /* __fp16 support currently assumes the core has ldrh.  */
1715   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1716     sorry ("__fp16 and no ldrh");
1717 
1718   /* If soft-float is specified then don't use FPU.  */
1719   if (TARGET_SOFT_FLOAT)
1720     arm_fpu_attr = FPU_NONE;
1721 
1722   if (TARGET_AAPCS_BASED)
1723     {
1724       if (arm_abi == ARM_ABI_IWMMXT)
1725 	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1726       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1727 	       && TARGET_HARD_FLOAT
1728 	       && TARGET_VFP)
1729 	arm_pcs_default = ARM_PCS_AAPCS_VFP;
1730       else
1731 	arm_pcs_default = ARM_PCS_AAPCS;
1732     }
1733   else
1734     {
1735       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1736 	sorry ("-mfloat-abi=hard and VFP");
1737 
1738       if (arm_abi == ARM_ABI_APCS)
1739 	arm_pcs_default = ARM_PCS_APCS;
1740       else
1741 	arm_pcs_default = ARM_PCS_ATPCS;
1742     }
1743 
1744   /* For arm2/3 there is no need to do any scheduling if there is only
1745      a floating point emulator, or we are doing software floating-point.  */
1746   if ((TARGET_SOFT_FLOAT
1747        || (TARGET_FPA && arm_fpu_desc->rev))
1748       && (tune_flags & FL_MODE32) == 0)
1749     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1750 
1751   if (target_thread_switch)
1752     {
1753       if (strcmp (target_thread_switch, "soft") == 0)
1754 	target_thread_pointer = TP_SOFT;
1755       else if (strcmp (target_thread_switch, "auto") == 0)
1756 	target_thread_pointer = TP_AUTO;
1757       else if (strcmp (target_thread_switch, "cp15") == 0)
1758 	target_thread_pointer = TP_CP15;
1759       else
1760 	error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1761     }
1762 
1763   /* Use the cp15 method if it is available.  */
1764   if (target_thread_pointer == TP_AUTO)
1765     {
1766       if (arm_arch6k && !TARGET_THUMB1)
1767 	target_thread_pointer = TP_CP15;
1768       else
1769 	target_thread_pointer = TP_SOFT;
1770     }
1771 
1772   if (TARGET_HARD_TP && TARGET_THUMB1)
1773     error ("can not use -mtp=cp15 with 16-bit Thumb");
1774 
1775   /* Override the default structure alignment for AAPCS ABI.  */
1776   if (TARGET_AAPCS_BASED)
1777     arm_structure_size_boundary = 8;
1778 
1779   if (structure_size_string != NULL)
1780     {
1781       int size = strtol (structure_size_string, NULL, 0);
1782 
1783       if (size == 8 || size == 32
1784 	  || (ARM_DOUBLEWORD_ALIGN && size == 64))
1785 	arm_structure_size_boundary = size;
1786       else
1787 	warning (0, "structure size boundary can only be set to %s",
1788 		 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1789     }
1790 
1791   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1792     {
1793       error ("RTP PIC is incompatible with Thumb");
1794       flag_pic = 0;
1795     }
1796 
1797   /* If stack checking is disabled, we can use r10 as the PIC register,
1798      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
1799   if (flag_pic && TARGET_SINGLE_PIC_BASE)
1800     {
1801       if (TARGET_VXWORKS_RTP)
1802 	warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1803       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1804     }
1805 
1806   if (flag_pic && TARGET_VXWORKS_RTP)
1807     arm_pic_register = 9;
1808 
1809   if (arm_pic_register_string != NULL)
1810     {
1811       int pic_register = decode_reg_name (arm_pic_register_string);
1812 
1813       if (!flag_pic)
1814 	warning (0, "-mpic-register= is useless without -fpic");
1815 
1816       /* Prevent the user from choosing an obviously stupid PIC register.  */
1817       else if (pic_register < 0 || call_used_regs[pic_register]
1818 	       || pic_register == HARD_FRAME_POINTER_REGNUM
1819 	       || pic_register == STACK_POINTER_REGNUM
1820 	       || pic_register >= PC_REGNUM
1821 	       || (TARGET_VXWORKS_RTP
1822 		   && (unsigned int) pic_register != arm_pic_register))
1823 	error ("unable to use '%s' for PIC register", arm_pic_register_string);
1824       else
1825 	arm_pic_register = pic_register;
1826     }
1827 
1828   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
1829   if (fix_cm3_ldrd == 2)
1830     {
1831       if (selected_cpu == cortexm3)
1832 	fix_cm3_ldrd = 1;
1833       else
1834 	fix_cm3_ldrd = 0;
1835     }
1836 
1837   if (TARGET_THUMB1 && flag_schedule_insns)
1838     {
1839       /* Don't warn since it's on by default in -O2.  */
1840       flag_schedule_insns = 0;
1841     }
1842 
1843   if (optimize_size)
1844     {
1845       arm_constant_limit = 1;
1846 
1847       /* If optimizing for size, bump the number of instructions that we
1848          are prepared to conditionally execute (even on a StrongARM).  */
1849       max_insns_skipped = 6;
1850     }
1851   else
1852     {
1853       /* For processors with load scheduling, it never costs more than
1854          2 cycles to load a constant, and the load scheduler may well
1855 	 reduce that to 1.  */
1856       if (arm_ld_sched)
1857         arm_constant_limit = 1;
1858 
1859       /* On XScale the longer latency of a load makes it more difficult
1860          to achieve a good schedule, so it's faster to synthesize
1861 	 constants that can be done in two insns.  */
1862       if (arm_tune_xscale)
1863         arm_constant_limit = 2;
1864 
1865       /* StrongARM has early execution of branches, so a sequence
1866          that is worth skipping is shorter.  */
1867       if (arm_tune_strongarm)
1868         max_insns_skipped = 3;
1869     }
1870 
1871   /* Hot/Cold partitioning is not currently supported, since we can't
1872      handle literal pool placement in that case.  */
1873   if (flag_reorder_blocks_and_partition)
1874     {
1875       inform (input_location,
1876 	      "-freorder-blocks-and-partition not supported on this architecture");
1877       flag_reorder_blocks_and_partition = 0;
1878       flag_reorder_blocks = 1;
1879     }
1880 
1881   /* Register global variables with the garbage collector.  */
1882   arm_add_gc_roots ();
1883 }
1884 
1885 static void
1886 arm_add_gc_roots (void)
1887 {
1888   gcc_obstack_init(&minipool_obstack);
1889   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1890 }
1891 
1892 /* A table of known ARM exception types.
1893    For use with the interrupt function attribute.  */
1894 
1895 typedef struct
1896 {
1897   const char *const arg;
1898   const unsigned long return_value;
1899 }
1900 isr_attribute_arg;
1901 
1902 static const isr_attribute_arg isr_attribute_args [] =
1903 {
1904   { "IRQ",   ARM_FT_ISR },
1905   { "irq",   ARM_FT_ISR },
1906   { "FIQ",   ARM_FT_FIQ },
1907   { "fiq",   ARM_FT_FIQ },
1908   { "ABORT", ARM_FT_ISR },
1909   { "abort", ARM_FT_ISR },
1910   { "ABORT", ARM_FT_ISR },
1911   { "abort", ARM_FT_ISR },
1912   { "UNDEF", ARM_FT_EXCEPTION },
1913   { "undef", ARM_FT_EXCEPTION },
1914   { "SWI",   ARM_FT_EXCEPTION },
1915   { "swi",   ARM_FT_EXCEPTION },
1916   { NULL,    ARM_FT_NORMAL }
1917 };
1918 
1919 /* Returns the (interrupt) function type of the current
1920    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
1921 
1922 static unsigned long
1923 arm_isr_value (tree argument)
1924 {
1925   const isr_attribute_arg * ptr;
1926   const char *              arg;
1927 
1928   if (!arm_arch_notm)
1929     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1930 
1931   /* No argument - default to IRQ.  */
1932   if (argument == NULL_TREE)
1933     return ARM_FT_ISR;
1934 
1935   /* Get the value of the argument.  */
1936   if (TREE_VALUE (argument) == NULL_TREE
1937       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1938     return ARM_FT_UNKNOWN;
1939 
1940   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1941 
1942   /* Check it against the list of known arguments.  */
1943   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1944     if (streq (arg, ptr->arg))
1945       return ptr->return_value;
1946 
1947   /* An unrecognized interrupt type.  */
1948   return ARM_FT_UNKNOWN;
1949 }
1950 
1951 /* Computes the type of the current function.  */
1952 
1953 static unsigned long
1954 arm_compute_func_type (void)
1955 {
1956   unsigned long type = ARM_FT_UNKNOWN;
1957   tree a;
1958   tree attr;
1959 
1960   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1961 
1962   /* Decide if the current function is volatile.  Such functions
1963      never return, and many memory cycles can be saved by not storing
1964      register values that will never be needed again.  This optimization
1965      was added to speed up context switching in a kernel application.  */
1966   if (optimize > 0
1967       && (TREE_NOTHROW (current_function_decl)
1968           || !(flag_unwind_tables
1969                || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1970       && TREE_THIS_VOLATILE (current_function_decl))
1971     type |= ARM_FT_VOLATILE;
1972 
1973   if (cfun->static_chain_decl != NULL)
1974     type |= ARM_FT_NESTED;
1975 
1976   attr = DECL_ATTRIBUTES (current_function_decl);
1977 
1978   a = lookup_attribute ("naked", attr);
1979   if (a != NULL_TREE)
1980     type |= ARM_FT_NAKED;
1981 
1982   a = lookup_attribute ("isr", attr);
1983   if (a == NULL_TREE)
1984     a = lookup_attribute ("interrupt", attr);
1985 
1986   if (a == NULL_TREE)
1987     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1988   else
1989     type |= arm_isr_value (TREE_VALUE (a));
1990 
1991   return type;
1992 }
1993 
1994 /* Returns the type of the current function.  */
1995 
1996 unsigned long
1997 arm_current_func_type (void)
1998 {
1999   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2000     cfun->machine->func_type = arm_compute_func_type ();
2001 
2002   return cfun->machine->func_type;
2003 }
2004 
2005 bool
2006 arm_allocate_stack_slots_for_args (void)
2007 {
2008   /* Naked functions should not allocate stack slots for arguments.  */
2009   return !IS_NAKED (arm_current_func_type ());
2010 }
2011 
2012 
2013 /* Output assembler code for a block containing the constant parts
2014    of a trampoline, leaving space for the variable parts.
2015 
2016    On the ARM, (if r8 is the static chain regnum, and remembering that
2017    referencing pc adds an offset of 8) the trampoline looks like:
2018 	   ldr 		r8, [pc, #0]
2019 	   ldr		pc, [pc]
2020 	   .word	static chain value
2021 	   .word	function's address
2022    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
2023 
2024 static void
2025 arm_asm_trampoline_template (FILE *f)
2026 {
2027   if (TARGET_ARM)
2028     {
2029       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2030       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2031     }
2032   else if (TARGET_THUMB2)
2033     {
2034       /* The Thumb-2 trampoline is similar to the arm implementation.
2035 	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
2036       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2037 		   STATIC_CHAIN_REGNUM, PC_REGNUM);
2038       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2039     }
2040   else
2041     {
2042       ASM_OUTPUT_ALIGN (f, 2);
2043       fprintf (f, "\t.code\t16\n");
2044       fprintf (f, ".Ltrampoline_start:\n");
2045       asm_fprintf (f, "\tpush\t{r0, r1}\n");
2046       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2047       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2048       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2049       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2050       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2051     }
2052   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2053   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2054 }
2055 
2056 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
2057 
2058 static void
2059 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2060 {
2061   rtx fnaddr, mem, a_tramp;
2062 
2063   emit_block_move (m_tramp, assemble_trampoline_template (),
2064 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2065 
2066   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2067   emit_move_insn (mem, chain_value);
2068 
2069   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2070   fnaddr = XEXP (DECL_RTL (fndecl), 0);
2071   emit_move_insn (mem, fnaddr);
2072 
2073   a_tramp = XEXP (m_tramp, 0);
2074   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2075 		     LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2076 		     plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2077 }
2078 
2079 /* Thumb trampolines should be entered in thumb mode, so set
2080    the bottom bit of the address.  */
2081 
2082 static rtx
2083 arm_trampoline_adjust_address (rtx addr)
2084 {
2085   if (TARGET_THUMB)
2086     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2087 				NULL, 0, OPTAB_LIB_WIDEN);
2088   return addr;
2089 }
2090 
2091 /* Return 1 if it is possible to return using a single instruction.
2092    If SIBLING is non-null, this is a test for a return before a sibling
2093    call.  SIBLING is the call insn, so we can examine its register usage.  */
2094 
2095 int
2096 use_return_insn (int iscond, rtx sibling)
2097 {
2098   int regno;
2099   unsigned int func_type;
2100   unsigned long saved_int_regs;
2101   unsigned HOST_WIDE_INT stack_adjust;
2102   arm_stack_offsets *offsets;
2103 
2104   /* Never use a return instruction before reload has run.  */
2105   if (!reload_completed)
2106     return 0;
2107 
2108   func_type = arm_current_func_type ();
2109 
2110   /* Naked, volatile and stack alignment functions need special
2111      consideration.  */
2112   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2113     return 0;
2114 
2115   /* So do interrupt functions that use the frame pointer and Thumb
2116      interrupt functions.  */
2117   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2118     return 0;
2119 
2120   offsets = arm_get_frame_offsets ();
2121   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2122 
2123   /* As do variadic functions.  */
2124   if (crtl->args.pretend_args_size
2125       || cfun->machine->uses_anonymous_args
2126       /* Or if the function calls __builtin_eh_return () */
2127       || crtl->calls_eh_return
2128       /* Or if the function calls alloca */
2129       || cfun->calls_alloca
2130       /* Or if there is a stack adjustment.  However, if the stack pointer
2131 	 is saved on the stack, we can use a pre-incrementing stack load.  */
2132       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2133 				 && stack_adjust == 4)))
2134     return 0;
2135 
2136   saved_int_regs = offsets->saved_regs_mask;
2137 
2138   /* Unfortunately, the insn
2139 
2140        ldmib sp, {..., sp, ...}
2141 
2142      triggers a bug on most SA-110 based devices, such that the stack
2143      pointer won't be correctly restored if the instruction takes a
2144      page fault.  We work around this problem by popping r3 along with
2145      the other registers, since that is never slower than executing
2146      another instruction.
2147 
2148      We test for !arm_arch5 here, because code for any architecture
2149      less than this could potentially be run on one of the buggy
2150      chips.  */
2151   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2152     {
2153       /* Validate that r3 is a call-clobbered register (always true in
2154 	 the default abi) ...  */
2155       if (!call_used_regs[3])
2156 	return 0;
2157 
2158       /* ... that it isn't being used for a return value ... */
2159       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2160 	return 0;
2161 
2162       /* ... or for a tail-call argument ...  */
2163       if (sibling)
2164 	{
2165 	  gcc_assert (GET_CODE (sibling) == CALL_INSN);
2166 
2167 	  if (find_regno_fusage (sibling, USE, 3))
2168 	    return 0;
2169 	}
2170 
2171       /* ... and that there are no call-saved registers in r0-r2
2172 	 (always true in the default ABI).  */
2173       if (saved_int_regs & 0x7)
2174 	return 0;
2175     }
2176 
2177   /* Can't be done if interworking with Thumb, and any registers have been
2178      stacked.  */
2179   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2180     return 0;
2181 
2182   /* On StrongARM, conditional returns are expensive if they aren't
2183      taken and multiple registers have been stacked.  */
2184   if (iscond && arm_tune_strongarm)
2185     {
2186       /* Conditional return when just the LR is stored is a simple
2187 	 conditional-load instruction, that's not expensive.  */
2188       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2189 	return 0;
2190 
2191       if (flag_pic
2192 	  && arm_pic_register != INVALID_REGNUM
2193 	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2194 	return 0;
2195     }
2196 
2197   /* If there are saved registers but the LR isn't saved, then we need
2198      two instructions for the return.  */
2199   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2200     return 0;
2201 
2202   /* Can't be done if any of the FPA regs are pushed,
2203      since this also requires an insn.  */
2204   if (TARGET_HARD_FLOAT && TARGET_FPA)
2205     for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2206       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2207 	return 0;
2208 
2209   /* Likewise VFP regs.  */
2210   if (TARGET_HARD_FLOAT && TARGET_VFP)
2211     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2212       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2213 	return 0;
2214 
2215   if (TARGET_REALLY_IWMMXT)
2216     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2217       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2218 	return 0;
2219 
2220   return 1;
2221 }
2222 
2223 /* Return TRUE if int I is a valid immediate ARM constant.  */
2224 
2225 int
2226 const_ok_for_arm (HOST_WIDE_INT i)
2227 {
2228   int lowbit;
2229 
2230   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2231      be all zero, or all one.  */
2232   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2233       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2234 	  != ((~(unsigned HOST_WIDE_INT) 0)
2235 	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2236     return FALSE;
2237 
2238   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2239 
2240   /* Fast return for 0 and small values.  We must do this for zero, since
2241      the code below can't handle that one case.  */
2242   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2243     return TRUE;
2244 
2245   /* Get the number of trailing zeros.  */
2246   lowbit = ffs((int) i) - 1;
2247 
2248   /* Only even shifts are allowed in ARM mode so round down to the
2249      nearest even number.  */
2250   if (TARGET_ARM)
2251     lowbit &= ~1;
2252 
2253   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2254     return TRUE;
2255 
2256   if (TARGET_ARM)
2257     {
2258       /* Allow rotated constants in ARM mode.  */
2259       if (lowbit <= 4
2260 	   && ((i & ~0xc000003f) == 0
2261 	       || (i & ~0xf000000f) == 0
2262 	       || (i & ~0xfc000003) == 0))
2263 	return TRUE;
2264     }
2265   else
2266     {
2267       HOST_WIDE_INT v;
2268 
2269       /* Allow repeated pattern.  */
2270       v = i & 0xff;
2271       v |= v << 16;
2272       if (i == v || i == (v | (v << 8)))
2273 	return TRUE;
2274     }
2275 
2276   return FALSE;
2277 }
2278 
2279 /* Return true if I is a valid constant for the operation CODE.  */
2280 static int
2281 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2282 {
2283   if (const_ok_for_arm (i))
2284     return 1;
2285 
2286   switch (code)
2287     {
2288     case PLUS:
2289     case COMPARE:
2290     case EQ:
2291     case NE:
2292     case GT:
2293     case LE:
2294     case LT:
2295     case GE:
2296     case GEU:
2297     case LTU:
2298     case GTU:
2299     case LEU:
2300     case UNORDERED:
2301     case ORDERED:
2302     case UNEQ:
2303     case UNGE:
2304     case UNLT:
2305     case UNGT:
2306     case UNLE:
2307       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2308 
2309     case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
2310     case XOR:
2311       return 0;
2312 
2313     case IOR:
2314       if (TARGET_THUMB2)
2315 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2316       return 0;
2317 
2318     case AND:
2319       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2320 
2321     default:
2322       gcc_unreachable ();
2323     }
2324 }
2325 
2326 /* Emit a sequence of insns to handle a large constant.
2327    CODE is the code of the operation required, it can be any of SET, PLUS,
2328    IOR, AND, XOR, MINUS;
2329    MODE is the mode in which the operation is being performed;
2330    VAL is the integer to operate on;
2331    SOURCE is the other operand (a register, or a null-pointer for SET);
2332    SUBTARGETS means it is safe to create scratch registers if that will
2333    either produce a simpler sequence, or we will want to cse the values.
2334    Return value is the number of insns emitted.  */
2335 
2336 /* ??? Tweak this for thumb2.  */
2337 int
2338 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2339 		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2340 {
2341   rtx cond;
2342 
2343   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2344     cond = COND_EXEC_TEST (PATTERN (insn));
2345   else
2346     cond = NULL_RTX;
2347 
2348   if (subtargets || code == SET
2349       || (GET_CODE (target) == REG && GET_CODE (source) == REG
2350 	  && REGNO (target) != REGNO (source)))
2351     {
2352       /* After arm_reorg has been called, we can't fix up expensive
2353 	 constants by pushing them into memory so we must synthesize
2354 	 them in-line, regardless of the cost.  This is only likely to
2355 	 be more costly on chips that have load delay slots and we are
2356 	 compiling without running the scheduler (so no splitting
2357 	 occurred before the final instruction emission).
2358 
2359 	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2360       */
2361       if (!after_arm_reorg
2362 	  && !cond
2363 	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2364 				1, 0)
2365 	      > arm_constant_limit + (code != SET)))
2366 	{
2367 	  if (code == SET)
2368 	    {
2369 	      /* Currently SET is the only monadic value for CODE, all
2370 		 the rest are diadic.  */
2371 	      if (TARGET_USE_MOVT)
2372 		arm_emit_movpair (target, GEN_INT (val));
2373 	      else
2374 		emit_set_insn (target, GEN_INT (val));
2375 
2376 	      return 1;
2377 	    }
2378 	  else
2379 	    {
2380 	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2381 
2382 	      if (TARGET_USE_MOVT)
2383 		arm_emit_movpair (temp, GEN_INT (val));
2384 	      else
2385 		emit_set_insn (temp, GEN_INT (val));
2386 
2387 	      /* For MINUS, the value is subtracted from, since we never
2388 		 have subtraction of a constant.  */
2389 	      if (code == MINUS)
2390 		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2391 	      else
2392 		emit_set_insn (target,
2393 			       gen_rtx_fmt_ee (code, mode, source, temp));
2394 	      return 2;
2395 	    }
2396 	}
2397     }
2398 
2399   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2400 			   1);
2401 }
2402 
2403 /* Return the number of instructions required to synthesize the given
2404    constant, if we start emitting them from bit-position I.  */
2405 static int
2406 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2407 {
2408   HOST_WIDE_INT temp1;
2409   int step_size = TARGET_ARM ? 2 : 1;
2410   int num_insns = 0;
2411 
2412   gcc_assert (TARGET_ARM || i == 0);
2413 
2414   do
2415     {
2416       int end;
2417 
2418       if (i <= 0)
2419 	i += 32;
2420       if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2421 	{
2422 	  end = i - 8;
2423 	  if (end < 0)
2424 	    end += 32;
2425 	  temp1 = remainder & ((0x0ff << end)
2426 				    | ((i < end) ? (0xff >> (32 - end)) : 0));
2427 	  remainder &= ~temp1;
2428 	  num_insns++;
2429 	  i -= 8 - step_size;
2430 	}
2431       i -= step_size;
2432     } while (remainder);
2433   return num_insns;
2434 }
2435 
2436 static int
2437 find_best_start (unsigned HOST_WIDE_INT remainder)
2438 {
2439   int best_consecutive_zeros = 0;
2440   int i;
2441   int best_start = 0;
2442 
2443   /* If we aren't targetting ARM, the best place to start is always at
2444      the bottom.  */
2445   if (! TARGET_ARM)
2446     return 0;
2447 
2448   for (i = 0; i < 32; i += 2)
2449     {
2450       int consecutive_zeros = 0;
2451 
2452       if (!(remainder & (3 << i)))
2453 	{
2454 	  while ((i < 32) && !(remainder & (3 << i)))
2455 	    {
2456 	      consecutive_zeros += 2;
2457 	      i += 2;
2458 	    }
2459 	  if (consecutive_zeros > best_consecutive_zeros)
2460 	    {
2461 	      best_consecutive_zeros = consecutive_zeros;
2462 	      best_start = i - consecutive_zeros;
2463 	    }
2464 	  i -= 2;
2465 	}
2466     }
2467 
2468   /* So long as it won't require any more insns to do so, it's
2469      desirable to emit a small constant (in bits 0...9) in the last
2470      insn.  This way there is more chance that it can be combined with
2471      a later addressing insn to form a pre-indexed load or store
2472      operation.  Consider:
2473 
2474 	   *((volatile int *)0xe0000100) = 1;
2475 	   *((volatile int *)0xe0000110) = 2;
2476 
2477      We want this to wind up as:
2478 
2479 	    mov rA, #0xe0000000
2480 	    mov rB, #1
2481 	    str rB, [rA, #0x100]
2482 	    mov rB, #2
2483 	    str rB, [rA, #0x110]
2484 
2485      rather than having to synthesize both large constants from scratch.
2486 
2487      Therefore, we calculate how many insns would be required to emit
2488      the constant starting from `best_start', and also starting from
2489      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
2490      yield a shorter sequence, we may as well use zero.  */
2491   if (best_start != 0
2492       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2493       && (count_insns_for_constant (remainder, 0) <=
2494 	  count_insns_for_constant (remainder, best_start)))
2495     best_start = 0;
2496 
2497   return best_start;
2498 }
2499 
2500 /* Emit an instruction with the indicated PATTERN.  If COND is
2501    non-NULL, conditionalize the execution of the instruction on COND
2502    being true.  */
2503 
2504 static void
2505 emit_constant_insn (rtx cond, rtx pattern)
2506 {
2507   if (cond)
2508     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2509   emit_insn (pattern);
2510 }
2511 
2512 /* As above, but extra parameter GENERATE which, if clear, suppresses
2513    RTL generation.  */
2514 /* ??? This needs more work for thumb2.  */
2515 
2516 static int
2517 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2518 		  HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2519 		  int generate)
2520 {
2521   int can_invert = 0;
2522   int can_negate = 0;
2523   int final_invert = 0;
2524   int can_negate_initial = 0;
2525   int can_shift = 0;
2526   int i;
2527   int num_bits_set = 0;
2528   int set_sign_bit_copies = 0;
2529   int clear_sign_bit_copies = 0;
2530   int clear_zero_bit_copies = 0;
2531   int set_zero_bit_copies = 0;
2532   int insns = 0;
2533   unsigned HOST_WIDE_INT temp1, temp2;
2534   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2535   int step_size = TARGET_ARM ? 2 : 1;
2536 
2537   /* Find out which operations are safe for a given CODE.  Also do a quick
2538      check for degenerate cases; these can occur when DImode operations
2539      are split.  */
2540   switch (code)
2541     {
2542     case SET:
2543       can_invert = 1;
2544       can_shift = 1;
2545       can_negate = 1;
2546       break;
2547 
2548     case PLUS:
2549       can_negate = 1;
2550       can_negate_initial = 1;
2551       break;
2552 
2553     case IOR:
2554       if (remainder == 0xffffffff)
2555 	{
2556 	  if (generate)
2557 	    emit_constant_insn (cond,
2558 				gen_rtx_SET (VOIDmode, target,
2559 					     GEN_INT (ARM_SIGN_EXTEND (val))));
2560 	  return 1;
2561 	}
2562 
2563       if (remainder == 0)
2564 	{
2565 	  if (reload_completed && rtx_equal_p (target, source))
2566 	    return 0;
2567 
2568 	  if (generate)
2569 	    emit_constant_insn (cond,
2570 				gen_rtx_SET (VOIDmode, target, source));
2571 	  return 1;
2572 	}
2573 
2574       if (TARGET_THUMB2)
2575 	can_invert = 1;
2576       break;
2577 
2578     case AND:
2579       if (remainder == 0)
2580 	{
2581 	  if (generate)
2582 	    emit_constant_insn (cond,
2583 				gen_rtx_SET (VOIDmode, target, const0_rtx));
2584 	  return 1;
2585 	}
2586       if (remainder == 0xffffffff)
2587 	{
2588 	  if (reload_completed && rtx_equal_p (target, source))
2589 	    return 0;
2590 	  if (generate)
2591 	    emit_constant_insn (cond,
2592 				gen_rtx_SET (VOIDmode, target, source));
2593 	  return 1;
2594 	}
2595       can_invert = 1;
2596       break;
2597 
2598     case XOR:
2599       if (remainder == 0)
2600 	{
2601 	  if (reload_completed && rtx_equal_p (target, source))
2602 	    return 0;
2603 	  if (generate)
2604 	    emit_constant_insn (cond,
2605 				gen_rtx_SET (VOIDmode, target, source));
2606 	  return 1;
2607 	}
2608 
2609       if (remainder == 0xffffffff)
2610 	{
2611 	  if (generate)
2612 	    emit_constant_insn (cond,
2613 				gen_rtx_SET (VOIDmode, target,
2614 					     gen_rtx_NOT (mode, source)));
2615 	  return 1;
2616 	}
2617       break;
2618 
2619     case MINUS:
2620       /* We treat MINUS as (val - source), since (source - val) is always
2621 	 passed as (source + (-val)).  */
2622       if (remainder == 0)
2623 	{
2624 	  if (generate)
2625 	    emit_constant_insn (cond,
2626 				gen_rtx_SET (VOIDmode, target,
2627 					     gen_rtx_NEG (mode, source)));
2628 	  return 1;
2629 	}
2630       if (const_ok_for_arm (val))
2631 	{
2632 	  if (generate)
2633 	    emit_constant_insn (cond,
2634 				gen_rtx_SET (VOIDmode, target,
2635 					     gen_rtx_MINUS (mode, GEN_INT (val),
2636 							    source)));
2637 	  return 1;
2638 	}
2639       can_negate = 1;
2640 
2641       break;
2642 
2643     default:
2644       gcc_unreachable ();
2645     }
2646 
2647   /* If we can do it in one insn get out quickly.  */
2648   if (const_ok_for_arm (val)
2649       || (can_negate_initial && const_ok_for_arm (-val))
2650       || (can_invert && const_ok_for_arm (~val)))
2651     {
2652       if (generate)
2653 	emit_constant_insn (cond,
2654 			    gen_rtx_SET (VOIDmode, target,
2655 					 (source
2656 					  ? gen_rtx_fmt_ee (code, mode, source,
2657 							    GEN_INT (val))
2658 					  : GEN_INT (val))));
2659       return 1;
2660     }
2661 
2662   /* Calculate a few attributes that may be useful for specific
2663      optimizations.  */
2664   /* Count number of leading zeros.  */
2665   for (i = 31; i >= 0; i--)
2666     {
2667       if ((remainder & (1 << i)) == 0)
2668 	clear_sign_bit_copies++;
2669       else
2670 	break;
2671     }
2672 
2673   /* Count number of leading 1's.  */
2674   for (i = 31; i >= 0; i--)
2675     {
2676       if ((remainder & (1 << i)) != 0)
2677 	set_sign_bit_copies++;
2678       else
2679 	break;
2680     }
2681 
2682   /* Count number of trailing zero's.  */
2683   for (i = 0; i <= 31; i++)
2684     {
2685       if ((remainder & (1 << i)) == 0)
2686 	clear_zero_bit_copies++;
2687       else
2688 	break;
2689     }
2690 
2691   /* Count number of trailing 1's.  */
2692   for (i = 0; i <= 31; i++)
2693     {
2694       if ((remainder & (1 << i)) != 0)
2695 	set_zero_bit_copies++;
2696       else
2697 	break;
2698     }
2699 
2700   switch (code)
2701     {
2702     case SET:
2703       /* See if we can use movw.  */
2704       if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2705 	{
2706 	  if (generate)
2707 	    emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2708 						   GEN_INT (val)));
2709 	  return 1;
2710 	}
2711 
2712       /* See if we can do this by sign_extending a constant that is known
2713 	 to be negative.  This is a good, way of doing it, since the shift
2714 	 may well merge into a subsequent insn.  */
2715       if (set_sign_bit_copies > 1)
2716 	{
2717 	  if (const_ok_for_arm
2718 	      (temp1 = ARM_SIGN_EXTEND (remainder
2719 					<< (set_sign_bit_copies - 1))))
2720 	    {
2721 	      if (generate)
2722 		{
2723 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2724 		  emit_constant_insn (cond,
2725 				      gen_rtx_SET (VOIDmode, new_src,
2726 						   GEN_INT (temp1)));
2727 		  emit_constant_insn (cond,
2728 				      gen_ashrsi3 (target, new_src,
2729 						   GEN_INT (set_sign_bit_copies - 1)));
2730 		}
2731 	      return 2;
2732 	    }
2733 	  /* For an inverted constant, we will need to set the low bits,
2734 	     these will be shifted out of harm's way.  */
2735 	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2736 	  if (const_ok_for_arm (~temp1))
2737 	    {
2738 	      if (generate)
2739 		{
2740 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2741 		  emit_constant_insn (cond,
2742 				      gen_rtx_SET (VOIDmode, new_src,
2743 						   GEN_INT (temp1)));
2744 		  emit_constant_insn (cond,
2745 				      gen_ashrsi3 (target, new_src,
2746 						   GEN_INT (set_sign_bit_copies - 1)));
2747 		}
2748 	      return 2;
2749 	    }
2750 	}
2751 
2752       /* See if we can calculate the value as the difference between two
2753 	 valid immediates.  */
2754       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2755 	{
2756 	  int topshift = clear_sign_bit_copies & ~1;
2757 
2758 	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2759 				   & (0xff000000 >> topshift));
2760 
2761 	  /* If temp1 is zero, then that means the 9 most significant
2762 	     bits of remainder were 1 and we've caused it to overflow.
2763 	     When topshift is 0 we don't need to do anything since we
2764 	     can borrow from 'bit 32'.  */
2765 	  if (temp1 == 0 && topshift != 0)
2766 	    temp1 = 0x80000000 >> (topshift - 1);
2767 
2768 	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2769 
2770 	  if (const_ok_for_arm (temp2))
2771 	    {
2772 	      if (generate)
2773 		{
2774 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2775 		  emit_constant_insn (cond,
2776 				      gen_rtx_SET (VOIDmode, new_src,
2777 						   GEN_INT (temp1)));
2778 		  emit_constant_insn (cond,
2779 				      gen_addsi3 (target, new_src,
2780 						  GEN_INT (-temp2)));
2781 		}
2782 
2783 	      return 2;
2784 	    }
2785 	}
2786 
2787       /* See if we can generate this by setting the bottom (or the top)
2788 	 16 bits, and then shifting these into the other half of the
2789 	 word.  We only look for the simplest cases, to do more would cost
2790 	 too much.  Be careful, however, not to generate this when the
2791 	 alternative would take fewer insns.  */
2792       if (val & 0xffff0000)
2793 	{
2794 	  temp1 = remainder & 0xffff0000;
2795 	  temp2 = remainder & 0x0000ffff;
2796 
2797 	  /* Overlaps outside this range are best done using other methods.  */
2798 	  for (i = 9; i < 24; i++)
2799 	    {
2800 	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2801 		  && !const_ok_for_arm (temp2))
2802 		{
2803 		  rtx new_src = (subtargets
2804 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2805 				 : target);
2806 		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2807 					    source, subtargets, generate);
2808 		  source = new_src;
2809 		  if (generate)
2810 		    emit_constant_insn
2811 		      (cond,
2812 		       gen_rtx_SET
2813 		       (VOIDmode, target,
2814 			gen_rtx_IOR (mode,
2815 				     gen_rtx_ASHIFT (mode, source,
2816 						     GEN_INT (i)),
2817 				     source)));
2818 		  return insns + 1;
2819 		}
2820 	    }
2821 
2822 	  /* Don't duplicate cases already considered.  */
2823 	  for (i = 17; i < 24; i++)
2824 	    {
2825 	      if (((temp1 | (temp1 >> i)) == remainder)
2826 		  && !const_ok_for_arm (temp1))
2827 		{
2828 		  rtx new_src = (subtargets
2829 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2830 				 : target);
2831 		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2832 					    source, subtargets, generate);
2833 		  source = new_src;
2834 		  if (generate)
2835 		    emit_constant_insn
2836 		      (cond,
2837 		       gen_rtx_SET (VOIDmode, target,
2838 				    gen_rtx_IOR
2839 				    (mode,
2840 				     gen_rtx_LSHIFTRT (mode, source,
2841 						       GEN_INT (i)),
2842 				     source)));
2843 		  return insns + 1;
2844 		}
2845 	    }
2846 	}
2847       break;
2848 
2849     case IOR:
2850     case XOR:
2851       /* If we have IOR or XOR, and the constant can be loaded in a
2852 	 single instruction, and we can find a temporary to put it in,
2853 	 then this can be done in two instructions instead of 3-4.  */
2854       if (subtargets
2855 	  /* TARGET can't be NULL if SUBTARGETS is 0 */
2856 	  || (reload_completed && !reg_mentioned_p (target, source)))
2857 	{
2858 	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2859 	    {
2860 	      if (generate)
2861 		{
2862 		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2863 
2864 		  emit_constant_insn (cond,
2865 				      gen_rtx_SET (VOIDmode, sub,
2866 						   GEN_INT (val)));
2867 		  emit_constant_insn (cond,
2868 				      gen_rtx_SET (VOIDmode, target,
2869 						   gen_rtx_fmt_ee (code, mode,
2870 								   source, sub)));
2871 		}
2872 	      return 2;
2873 	    }
2874 	}
2875 
2876       if (code == XOR)
2877 	break;
2878 
2879       /*  Convert.
2880 	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2881 	                     and the remainder 0s for e.g. 0xfff00000)
2882 	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2883 
2884 	  This can be done in 2 instructions by using shifts with mov or mvn.
2885 	  e.g. for
2886 	  x = x | 0xfff00000;
2887 	  we generate.
2888 	  mvn	r0, r0, asl #12
2889 	  mvn	r0, r0, lsr #12  */
2890       if (set_sign_bit_copies > 8
2891 	  && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2892 	{
2893 	  if (generate)
2894 	    {
2895 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2896 	      rtx shift = GEN_INT (set_sign_bit_copies);
2897 
2898 	      emit_constant_insn
2899 		(cond,
2900 		 gen_rtx_SET (VOIDmode, sub,
2901 			      gen_rtx_NOT (mode,
2902 					   gen_rtx_ASHIFT (mode,
2903 							   source,
2904 							   shift))));
2905 	      emit_constant_insn
2906 		(cond,
2907 		 gen_rtx_SET (VOIDmode, target,
2908 			      gen_rtx_NOT (mode,
2909 					   gen_rtx_LSHIFTRT (mode, sub,
2910 							     shift))));
2911 	    }
2912 	  return 2;
2913 	}
2914 
2915       /* Convert
2916 	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
2917 	   to
2918 	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2919 
2920 	  For eg. r0 = r0 | 0xfff
2921 	       mvn	r0, r0, lsr #12
2922 	       mvn	r0, r0, asl #12
2923 
2924       */
2925       if (set_zero_bit_copies > 8
2926 	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2927 	{
2928 	  if (generate)
2929 	    {
2930 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2931 	      rtx shift = GEN_INT (set_zero_bit_copies);
2932 
2933 	      emit_constant_insn
2934 		(cond,
2935 		 gen_rtx_SET (VOIDmode, sub,
2936 			      gen_rtx_NOT (mode,
2937 					   gen_rtx_LSHIFTRT (mode,
2938 							     source,
2939 							     shift))));
2940 	      emit_constant_insn
2941 		(cond,
2942 		 gen_rtx_SET (VOIDmode, target,
2943 			      gen_rtx_NOT (mode,
2944 					   gen_rtx_ASHIFT (mode, sub,
2945 							   shift))));
2946 	    }
2947 	  return 2;
2948 	}
2949 
2950       /* This will never be reached for Thumb2 because orn is a valid
2951 	 instruction. This is for Thumb1 and the ARM 32 bit cases.
2952 
2953 	 x = y | constant (such that ~constant is a valid constant)
2954 	 Transform this to
2955 	 x = ~(~y & ~constant).
2956       */
2957       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2958 	{
2959 	  if (generate)
2960 	    {
2961 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2962 	      emit_constant_insn (cond,
2963 				  gen_rtx_SET (VOIDmode, sub,
2964 					       gen_rtx_NOT (mode, source)));
2965 	      source = sub;
2966 	      if (subtargets)
2967 		sub = gen_reg_rtx (mode);
2968 	      emit_constant_insn (cond,
2969 				  gen_rtx_SET (VOIDmode, sub,
2970 					       gen_rtx_AND (mode, source,
2971 							    GEN_INT (temp1))));
2972 	      emit_constant_insn (cond,
2973 				  gen_rtx_SET (VOIDmode, target,
2974 					       gen_rtx_NOT (mode, sub)));
2975 	    }
2976 	  return 3;
2977 	}
2978       break;
2979 
2980     case AND:
2981       /* See if two shifts will do 2 or more insn's worth of work.  */
2982       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2983 	{
2984 	  HOST_WIDE_INT shift_mask = ((0xffffffff
2985 				       << (32 - clear_sign_bit_copies))
2986 				      & 0xffffffff);
2987 
2988 	  if ((remainder | shift_mask) != 0xffffffff)
2989 	    {
2990 	      if (generate)
2991 		{
2992 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2993 		  insns = arm_gen_constant (AND, mode, cond,
2994 					    remainder | shift_mask,
2995 					    new_src, source, subtargets, 1);
2996 		  source = new_src;
2997 		}
2998 	      else
2999 		{
3000 		  rtx targ = subtargets ? NULL_RTX : target;
3001 		  insns = arm_gen_constant (AND, mode, cond,
3002 					    remainder | shift_mask,
3003 					    targ, source, subtargets, 0);
3004 		}
3005 	    }
3006 
3007 	  if (generate)
3008 	    {
3009 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3010 	      rtx shift = GEN_INT (clear_sign_bit_copies);
3011 
3012 	      emit_insn (gen_ashlsi3 (new_src, source, shift));
3013 	      emit_insn (gen_lshrsi3 (target, new_src, shift));
3014 	    }
3015 
3016 	  return insns + 2;
3017 	}
3018 
3019       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3020 	{
3021 	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3022 
3023 	  if ((remainder | shift_mask) != 0xffffffff)
3024 	    {
3025 	      if (generate)
3026 		{
3027 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3028 
3029 		  insns = arm_gen_constant (AND, mode, cond,
3030 					    remainder | shift_mask,
3031 					    new_src, source, subtargets, 1);
3032 		  source = new_src;
3033 		}
3034 	      else
3035 		{
3036 		  rtx targ = subtargets ? NULL_RTX : target;
3037 
3038 		  insns = arm_gen_constant (AND, mode, cond,
3039 					    remainder | shift_mask,
3040 					    targ, source, subtargets, 0);
3041 		}
3042 	    }
3043 
3044 	  if (generate)
3045 	    {
3046 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3047 	      rtx shift = GEN_INT (clear_zero_bit_copies);
3048 
3049 	      emit_insn (gen_lshrsi3 (new_src, source, shift));
3050 	      emit_insn (gen_ashlsi3 (target, new_src, shift));
3051 	    }
3052 
3053 	  return insns + 2;
3054 	}
3055 
3056       break;
3057 
3058     default:
3059       break;
3060     }
3061 
3062   for (i = 0; i < 32; i++)
3063     if (remainder & (1 << i))
3064       num_bits_set++;
3065 
3066   if ((code == AND)
3067       || (code != IOR && can_invert && num_bits_set > 16))
3068     remainder ^= 0xffffffff;
3069   else if (code == PLUS && num_bits_set > 16)
3070     remainder = (-remainder) & 0xffffffff;
3071 
3072   /* For XOR, if more than half the bits are set and there's a sequence
3073      of more than 8 consecutive ones in the pattern then we can XOR by the
3074      inverted constant and then invert the final result; this may save an
3075      instruction and might also lead to the final mvn being merged with
3076      some other operation.  */
3077   else if (code == XOR && num_bits_set > 16
3078 	   && (count_insns_for_constant (remainder ^ 0xffffffff,
3079 					 find_best_start
3080 					 (remainder ^ 0xffffffff))
3081 	       < count_insns_for_constant (remainder,
3082 					   find_best_start (remainder))))
3083     {
3084       remainder ^= 0xffffffff;
3085       final_invert = 1;
3086     }
3087   else
3088     {
3089       can_invert = 0;
3090       can_negate = 0;
3091     }
3092 
3093   /* Now try and find a way of doing the job in either two or three
3094      instructions.
3095      We start by looking for the largest block of zeros that are aligned on
3096      a 2-bit boundary, we then fill up the temps, wrapping around to the
3097      top of the word when we drop off the bottom.
3098      In the worst case this code should produce no more than four insns.
3099      Thumb-2 constants are shifted, not rotated, so the MSB is always the
3100      best place to start.  */
3101 
3102   /* ??? Use thumb2 replicated constants when the high and low halfwords are
3103      the same.  */
3104   {
3105     /* Now start emitting the insns.  */
3106     i = find_best_start (remainder);
3107     do
3108       {
3109 	int end;
3110 
3111 	if (i <= 0)
3112 	  i += 32;
3113 	if (remainder & (3 << (i - 2)))
3114 	  {
3115 	    end = i - 8;
3116 	    if (end < 0)
3117 	      end += 32;
3118 	    temp1 = remainder & ((0x0ff << end)
3119 				 | ((i < end) ? (0xff >> (32 - end)) : 0));
3120 	    remainder &= ~temp1;
3121 
3122 	    if (generate)
3123 	      {
3124 		rtx new_src, temp1_rtx;
3125 
3126 		if (code == SET || code == MINUS)
3127 		  {
3128 		    new_src = (subtargets ? gen_reg_rtx (mode) : target);
3129 		    if (can_invert && code != MINUS)
3130 		      temp1 = ~temp1;
3131 		  }
3132 		else
3133 		  {
3134 		    if ((final_invert || remainder) && subtargets)
3135 		      new_src = gen_reg_rtx (mode);
3136 		    else
3137 		      new_src = target;
3138 		    if (can_invert)
3139 		      temp1 = ~temp1;
3140 		    else if (can_negate)
3141 		      temp1 = -temp1;
3142 		  }
3143 
3144 		temp1 = trunc_int_for_mode (temp1, mode);
3145 		temp1_rtx = GEN_INT (temp1);
3146 
3147 		if (code == SET)
3148 		  ;
3149 		else if (code == MINUS)
3150 		  temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3151 		else
3152 		  temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3153 
3154 		emit_constant_insn (cond,
3155 				    gen_rtx_SET (VOIDmode, new_src,
3156 						 temp1_rtx));
3157 		source = new_src;
3158 	      }
3159 
3160 	    if (code == SET)
3161 	      {
3162 		can_invert = 0;
3163 		code = PLUS;
3164 	      }
3165 	    else if (code == MINUS)
3166 	      code = PLUS;
3167 
3168 	    insns++;
3169 	    i -= 8 - step_size;
3170 	  }
3171 	/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3172 	   shifts.  */
3173 	i -= step_size;
3174       }
3175     while (remainder);
3176   }
3177 
3178   if (final_invert)
3179     {
3180       if (generate)
3181 	emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3182 					       gen_rtx_NOT (mode, source)));
3183       insns++;
3184     }
3185 
3186   return insns;
3187 }
3188 
3189 /* Canonicalize a comparison so that we are more likely to recognize it.
3190    This can be done for a few constant compares, where we can make the
3191    immediate value easier to load.  */
3192 
3193 enum rtx_code
3194 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3195 			     rtx * op1)
3196 {
3197   unsigned HOST_WIDE_INT i = INTVAL (*op1);
3198   unsigned HOST_WIDE_INT maxval;
3199   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3200 
3201   switch (code)
3202     {
3203     case EQ:
3204     case NE:
3205       return code;
3206 
3207     case GT:
3208     case LE:
3209       if (i != maxval
3210 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3211 	{
3212 	  *op1 = GEN_INT (i + 1);
3213 	  return code == GT ? GE : LT;
3214 	}
3215       break;
3216 
3217     case GE:
3218     case LT:
3219       if (i != ~maxval
3220 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3221 	{
3222 	  *op1 = GEN_INT (i - 1);
3223 	  return code == GE ? GT : LE;
3224 	}
3225       break;
3226 
3227     case GTU:
3228     case LEU:
3229       if (i != ~((unsigned HOST_WIDE_INT) 0)
3230 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3231 	{
3232 	  *op1 = GEN_INT (i + 1);
3233 	  return code == GTU ? GEU : LTU;
3234 	}
3235       break;
3236 
3237     case GEU:
3238     case LTU:
3239       if (i != 0
3240 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3241 	{
3242 	  *op1 = GEN_INT (i - 1);
3243 	  return code == GEU ? GTU : LEU;
3244 	}
3245       break;
3246 
3247     default:
3248       gcc_unreachable ();
3249     }
3250 
3251   return code;
3252 }
3253 
3254 
3255 /* Define how to find the value returned by a function.  */
3256 
3257 static rtx
3258 arm_function_value(const_tree type, const_tree func,
3259 		   bool outgoing ATTRIBUTE_UNUSED)
3260 {
3261   enum machine_mode mode;
3262   int unsignedp ATTRIBUTE_UNUSED;
3263   rtx r ATTRIBUTE_UNUSED;
3264 
3265   mode = TYPE_MODE (type);
3266 
3267   if (TARGET_AAPCS_BASED)
3268     return aapcs_allocate_return_reg (mode, type, func);
3269 
3270   /* Promote integer types.  */
3271   if (INTEGRAL_TYPE_P (type))
3272     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3273 
3274   /* Promotes small structs returned in a register to full-word size
3275      for big-endian AAPCS.  */
3276   if (arm_return_in_msb (type))
3277     {
3278       HOST_WIDE_INT size = int_size_in_bytes (type);
3279       if (size % UNITS_PER_WORD != 0)
3280 	{
3281 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3282 	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3283 	}
3284     }
3285 
3286   return LIBCALL_VALUE (mode);
3287 }
3288 
3289 static int
3290 libcall_eq (const void *p1, const void *p2)
3291 {
3292   return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3293 }
3294 
3295 static hashval_t
3296 libcall_hash (const void *p1)
3297 {
3298   return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3299 }
3300 
3301 static void
3302 add_libcall (htab_t htab, rtx libcall)
3303 {
3304   *htab_find_slot (htab, libcall, INSERT) = libcall;
3305 }
3306 
3307 static bool
3308 arm_libcall_uses_aapcs_base (const_rtx libcall)
3309 {
3310   static bool init_done = false;
3311   static htab_t libcall_htab;
3312 
3313   if (!init_done)
3314     {
3315       init_done = true;
3316 
3317       libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3318 				  NULL);
3319       add_libcall (libcall_htab,
3320 		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3321       add_libcall (libcall_htab,
3322 		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3323       add_libcall (libcall_htab,
3324 		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3325       add_libcall (libcall_htab,
3326 		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3327 
3328       add_libcall (libcall_htab,
3329 		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3330       add_libcall (libcall_htab,
3331 		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3332       add_libcall (libcall_htab,
3333 		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3334       add_libcall (libcall_htab,
3335 		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3336 
3337       add_libcall (libcall_htab,
3338 		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
3339       add_libcall (libcall_htab,
3340 		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3341       add_libcall (libcall_htab,
3342 		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
3343       add_libcall (libcall_htab,
3344 		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
3345       add_libcall (libcall_htab,
3346 		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
3347       add_libcall (libcall_htab,
3348 		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
3349       add_libcall (libcall_htab,
3350 		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
3351       add_libcall (libcall_htab,
3352 		   convert_optab_libfunc (ufix_optab, DImode, SFmode));
3353 
3354       /* Values from double-precision helper functions are returned in core
3355 	 registers if the selected core only supports single-precision
3356 	 arithmetic, even if we are using the hard-float ABI.  The same is
3357 	 true for single-precision helpers, but we will never be using the
3358 	 hard-float ABI on a CPU which doesn't support single-precision
3359 	 operations in hardware.  */
3360       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3361       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3362       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3363       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3364       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3365       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3366       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3367       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3368       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3369       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3370       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3371       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3372 							SFmode));
3373       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3374 							DFmode));
3375     }
3376 
3377   return libcall && htab_find (libcall_htab, libcall) != NULL;
3378 }
3379 
3380 rtx
3381 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3382 {
3383   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3384       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3385     {
3386       /* The following libcalls return their result in integer registers,
3387 	 even though they return a floating point value.  */
3388       if (arm_libcall_uses_aapcs_base (libcall))
3389 	return gen_rtx_REG (mode, ARG_REGISTER(1));
3390 
3391     }
3392 
3393   return LIBCALL_VALUE (mode);
3394 }
3395 
3396 /* Determine the amount of memory needed to store the possible return
3397    registers of an untyped call.  */
3398 int
3399 arm_apply_result_size (void)
3400 {
3401   int size = 16;
3402 
3403   if (TARGET_32BIT)
3404     {
3405       if (TARGET_HARD_FLOAT_ABI)
3406 	{
3407 	  if (TARGET_VFP)
3408 	    size += 32;
3409 	  if (TARGET_FPA)
3410 	    size += 12;
3411 	  if (TARGET_MAVERICK)
3412 	    size += 8;
3413 	}
3414       if (TARGET_IWMMXT_ABI)
3415 	size += 8;
3416     }
3417 
3418   return size;
3419 }
3420 
3421 /* Decide whether TYPE should be returned in memory (true)
3422    or in a register (false).  FNTYPE is the type of the function making
3423    the call.  */
3424 static bool
3425 arm_return_in_memory (const_tree type, const_tree fntype)
3426 {
3427   HOST_WIDE_INT size;
3428 
3429   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
3430 
3431   if (TARGET_AAPCS_BASED)
3432     {
3433       /* Simple, non-aggregate types (ie not including vectors and
3434 	 complex) are always returned in a register (or registers).
3435 	 We don't care about which register here, so we can short-cut
3436 	 some of the detail.  */
3437       if (!AGGREGATE_TYPE_P (type)
3438 	  && TREE_CODE (type) != VECTOR_TYPE
3439 	  && TREE_CODE (type) != COMPLEX_TYPE)
3440 	return false;
3441 
3442       /* Any return value that is no larger than one word can be
3443 	 returned in r0.  */
3444       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3445 	return false;
3446 
3447       /* Check any available co-processors to see if they accept the
3448 	 type as a register candidate (VFP, for example, can return
3449 	 some aggregates in consecutive registers).  These aren't
3450 	 available if the call is variadic.  */
3451       if (aapcs_select_return_coproc (type, fntype) >= 0)
3452 	return false;
3453 
3454       /* Vector values should be returned using ARM registers, not
3455 	 memory (unless they're over 16 bytes, which will break since
3456 	 we only have four call-clobbered registers to play with).  */
3457       if (TREE_CODE (type) == VECTOR_TYPE)
3458 	return (size < 0 || size > (4 * UNITS_PER_WORD));
3459 
3460       /* The rest go in memory.  */
3461       return true;
3462     }
3463 
3464   if (TREE_CODE (type) == VECTOR_TYPE)
3465     return (size < 0 || size > (4 * UNITS_PER_WORD));
3466 
3467   if (!AGGREGATE_TYPE_P (type) &&
3468       (TREE_CODE (type) != VECTOR_TYPE))
3469     /* All simple types are returned in registers.  */
3470     return false;
3471 
3472   if (arm_abi != ARM_ABI_APCS)
3473     {
3474       /* ATPCS and later return aggregate types in memory only if they are
3475 	 larger than a word (or are variable size).  */
3476       return (size < 0 || size > UNITS_PER_WORD);
3477     }
3478 
3479   /* For the arm-wince targets we choose to be compatible with Microsoft's
3480      ARM and Thumb compilers, which always return aggregates in memory.  */
3481 #ifndef ARM_WINCE
3482   /* All structures/unions bigger than one word are returned in memory.
3483      Also catch the case where int_size_in_bytes returns -1.  In this case
3484      the aggregate is either huge or of variable size, and in either case
3485      we will want to return it via memory and not in a register.  */
3486   if (size < 0 || size > UNITS_PER_WORD)
3487     return true;
3488 
3489   if (TREE_CODE (type) == RECORD_TYPE)
3490     {
3491       tree field;
3492 
3493       /* For a struct the APCS says that we only return in a register
3494 	 if the type is 'integer like' and every addressable element
3495 	 has an offset of zero.  For practical purposes this means
3496 	 that the structure can have at most one non bit-field element
3497 	 and that this element must be the first one in the structure.  */
3498 
3499       /* Find the first field, ignoring non FIELD_DECL things which will
3500 	 have been created by C++.  */
3501       for (field = TYPE_FIELDS (type);
3502 	   field && TREE_CODE (field) != FIELD_DECL;
3503 	   field = TREE_CHAIN (field))
3504 	continue;
3505 
3506       if (field == NULL)
3507 	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
3508 
3509       /* Check that the first field is valid for returning in a register.  */
3510 
3511       /* ... Floats are not allowed */
3512       if (FLOAT_TYPE_P (TREE_TYPE (field)))
3513 	return true;
3514 
3515       /* ... Aggregates that are not themselves valid for returning in
3516 	 a register are not allowed.  */
3517       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3518 	return true;
3519 
3520       /* Now check the remaining fields, if any.  Only bitfields are allowed,
3521 	 since they are not addressable.  */
3522       for (field = TREE_CHAIN (field);
3523 	   field;
3524 	   field = TREE_CHAIN (field))
3525 	{
3526 	  if (TREE_CODE (field) != FIELD_DECL)
3527 	    continue;
3528 
3529 	  if (!DECL_BIT_FIELD_TYPE (field))
3530 	    return true;
3531 	}
3532 
3533       return false;
3534     }
3535 
3536   if (TREE_CODE (type) == UNION_TYPE)
3537     {
3538       tree field;
3539 
3540       /* Unions can be returned in registers if every element is
3541 	 integral, or can be returned in an integer register.  */
3542       for (field = TYPE_FIELDS (type);
3543 	   field;
3544 	   field = TREE_CHAIN (field))
3545 	{
3546 	  if (TREE_CODE (field) != FIELD_DECL)
3547 	    continue;
3548 
3549 	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
3550 	    return true;
3551 
3552 	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3553 	    return true;
3554 	}
3555 
3556       return false;
3557     }
3558 #endif /* not ARM_WINCE */
3559 
3560   /* Return all other types in memory.  */
3561   return true;
3562 }
3563 
3564 /* Indicate whether or not words of a double are in big-endian order.  */
3565 
3566 int
3567 arm_float_words_big_endian (void)
3568 {
3569   if (TARGET_MAVERICK)
3570     return 0;
3571 
3572   /* For FPA, float words are always big-endian.  For VFP, floats words
3573      follow the memory system mode.  */
3574 
3575   if (TARGET_FPA)
3576     {
3577       return 1;
3578     }
3579 
3580   if (TARGET_VFP)
3581     return (TARGET_BIG_END ? 1 : 0);
3582 
3583   return 1;
3584 }
3585 
3586 const struct pcs_attribute_arg
3587 {
3588   const char *arg;
3589   enum arm_pcs value;
3590 } pcs_attribute_args[] =
3591   {
3592     {"aapcs", ARM_PCS_AAPCS},
3593     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3594 #if 0
3595     /* We could recognize these, but changes would be needed elsewhere
3596      * to implement them.  */
3597     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3598     {"atpcs", ARM_PCS_ATPCS},
3599     {"apcs", ARM_PCS_APCS},
3600 #endif
3601     {NULL, ARM_PCS_UNKNOWN}
3602   };
3603 
3604 static enum arm_pcs
3605 arm_pcs_from_attribute (tree attr)
3606 {
3607   const struct pcs_attribute_arg *ptr;
3608   const char *arg;
3609 
3610   /* Get the value of the argument.  */
3611   if (TREE_VALUE (attr) == NULL_TREE
3612       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3613     return ARM_PCS_UNKNOWN;
3614 
3615   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3616 
3617   /* Check it against the list of known arguments.  */
3618   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3619     if (streq (arg, ptr->arg))
3620       return ptr->value;
3621 
3622   /* An unrecognized interrupt type.  */
3623   return ARM_PCS_UNKNOWN;
3624 }
3625 
3626 /* Get the PCS variant to use for this call.  TYPE is the function's type
3627    specification, DECL is the specific declartion.  DECL may be null if
3628    the call could be indirect or if this is a library call.  */
3629 static enum arm_pcs
3630 arm_get_pcs_model (const_tree type, const_tree decl)
3631 {
3632   bool user_convention = false;
3633   enum arm_pcs user_pcs = arm_pcs_default;
3634   tree attr;
3635 
3636   gcc_assert (type);
3637 
3638   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3639   if (attr)
3640     {
3641       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3642       user_convention = true;
3643     }
3644 
3645   if (TARGET_AAPCS_BASED)
3646     {
3647       /* Detect varargs functions.  These always use the base rules
3648 	 (no argument is ever a candidate for a co-processor
3649 	 register).  */
3650       bool base_rules = (TYPE_ARG_TYPES (type) != 0
3651 			 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3652 			     != void_type_node));
3653 
3654       if (user_convention)
3655 	{
3656 	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3657 	    sorry ("Non-AAPCS derived PCS variant");
3658 	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3659 	    error ("Variadic functions must use the base AAPCS variant");
3660 	}
3661 
3662       if (base_rules)
3663 	return ARM_PCS_AAPCS;
3664       else if (user_convention)
3665 	return user_pcs;
3666       else if (decl && flag_unit_at_a_time)
3667 	{
3668 	  /* Local functions never leak outside this compilation unit,
3669 	     so we are free to use whatever conventions are
3670 	     appropriate.  */
3671 	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
3672 	  struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3673 	  if (i && i->local)
3674 	    return ARM_PCS_AAPCS_LOCAL;
3675 	}
3676     }
3677   else if (user_convention && user_pcs != arm_pcs_default)
3678     sorry ("PCS variant");
3679 
3680   /* For everything else we use the target's default.  */
3681   return arm_pcs_default;
3682 }
3683 
3684 
3685 static void
3686 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
3687 		    const_tree fntype ATTRIBUTE_UNUSED,
3688 		    rtx libcall ATTRIBUTE_UNUSED,
3689 		    const_tree fndecl ATTRIBUTE_UNUSED)
3690 {
3691   /* Record the unallocated VFP registers.  */
3692   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3693   pcum->aapcs_vfp_reg_alloc = 0;
3694 }
3695 
3696 /* Walk down the type tree of TYPE counting consecutive base elements.
3697    If *MODEP is VOIDmode, then set it to the first valid floating point
3698    type.  If a non-floating point type is found, or if a floating point
3699    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3700    otherwise return the count in the sub-tree.  */
3701 static int
3702 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3703 {
3704   enum machine_mode mode;
3705   HOST_WIDE_INT size;
3706 
3707   switch (TREE_CODE (type))
3708     {
3709     case REAL_TYPE:
3710       mode = TYPE_MODE (type);
3711       if (mode != DFmode && mode != SFmode)
3712 	return -1;
3713 
3714       if (*modep == VOIDmode)
3715 	*modep = mode;
3716 
3717       if (*modep == mode)
3718 	return 1;
3719 
3720       break;
3721 
3722     case COMPLEX_TYPE:
3723       mode = TYPE_MODE (TREE_TYPE (type));
3724       if (mode != DFmode && mode != SFmode)
3725 	return -1;
3726 
3727       if (*modep == VOIDmode)
3728 	*modep = mode;
3729 
3730       if (*modep == mode)
3731 	return 2;
3732 
3733       break;
3734 
3735     case VECTOR_TYPE:
3736       /* Use V2SImode and V4SImode as representatives of all 64-bit
3737 	 and 128-bit vector types, whether or not those modes are
3738 	 supported with the present options.  */
3739       size = int_size_in_bytes (type);
3740       switch (size)
3741 	{
3742 	case 8:
3743 	  mode = V2SImode;
3744 	  break;
3745 	case 16:
3746 	  mode = V4SImode;
3747 	  break;
3748 	default:
3749 	  return -1;
3750 	}
3751 
3752       if (*modep == VOIDmode)
3753 	*modep = mode;
3754 
3755       /* Vector modes are considered to be opaque: two vectors are
3756 	 equivalent for the purposes of being homogeneous aggregates
3757 	 if they are the same size.  */
3758       if (*modep == mode)
3759 	return 1;
3760 
3761       break;
3762 
3763     case ARRAY_TYPE:
3764       {
3765 	int count;
3766 	tree index = TYPE_DOMAIN (type);
3767 
3768 	/* Can't handle incomplete types.  */
3769 	if (!COMPLETE_TYPE_P(type))
3770 	  return -1;
3771 
3772 	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3773 	if (count == -1
3774 	    || !index
3775 	    || !TYPE_MAX_VALUE (index)
3776 	    || !host_integerp (TYPE_MAX_VALUE (index), 1)
3777 	    || !TYPE_MIN_VALUE (index)
3778 	    || !host_integerp (TYPE_MIN_VALUE (index), 1)
3779 	    || count < 0)
3780 	  return -1;
3781 
3782 	count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3783 		      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3784 
3785 	/* There must be no padding.  */
3786 	if (!host_integerp (TYPE_SIZE (type), 1)
3787 	    || (tree_low_cst (TYPE_SIZE (type), 1)
3788 		!= count * GET_MODE_BITSIZE (*modep)))
3789 	  return -1;
3790 
3791 	return count;
3792       }
3793 
3794     case RECORD_TYPE:
3795       {
3796 	int count = 0;
3797 	int sub_count;
3798 	tree field;
3799 
3800 	/* Can't handle incomplete types.  */
3801 	if (!COMPLETE_TYPE_P(type))
3802 	  return -1;
3803 
3804 	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3805 	  {
3806 	    if (TREE_CODE (field) != FIELD_DECL)
3807 	      continue;
3808 
3809 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3810 	    if (sub_count < 0)
3811 	      return -1;
3812 	    count += sub_count;
3813 	  }
3814 
3815 	/* There must be no padding.  */
3816 	if (!host_integerp (TYPE_SIZE (type), 1)
3817 	    || (tree_low_cst (TYPE_SIZE (type), 1)
3818 		!= count * GET_MODE_BITSIZE (*modep)))
3819 	  return -1;
3820 
3821 	return count;
3822       }
3823 
3824     case UNION_TYPE:
3825     case QUAL_UNION_TYPE:
3826       {
3827 	/* These aren't very interesting except in a degenerate case.  */
3828 	int count = 0;
3829 	int sub_count;
3830 	tree field;
3831 
3832 	/* Can't handle incomplete types.  */
3833 	if (!COMPLETE_TYPE_P(type))
3834 	  return -1;
3835 
3836 	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3837 	  {
3838 	    if (TREE_CODE (field) != FIELD_DECL)
3839 	      continue;
3840 
3841 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3842 	    if (sub_count < 0)
3843 	      return -1;
3844 	    count = count > sub_count ? count : sub_count;
3845 	  }
3846 
3847 	/* There must be no padding.  */
3848 	if (!host_integerp (TYPE_SIZE (type), 1)
3849 	    || (tree_low_cst (TYPE_SIZE (type), 1)
3850 		!= count * GET_MODE_BITSIZE (*modep)))
3851 	  return -1;
3852 
3853 	return count;
3854       }
3855 
3856     default:
3857       break;
3858     }
3859 
3860   return -1;
3861 }
3862 
3863 /* Return true if PCS_VARIANT should use VFP registers.  */
3864 static bool
3865 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3866 {
3867   if (pcs_variant == ARM_PCS_AAPCS_VFP)
3868     return true;
3869 
3870   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3871     return false;
3872 
3873   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3874 	  (TARGET_VFP_DOUBLE || !is_double));
3875 }
3876 
3877 static bool
3878 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3879 				       enum machine_mode mode, const_tree type,
3880 				       enum machine_mode *base_mode, int *count)
3881 {
3882   enum machine_mode new_mode = VOIDmode;
3883 
3884   if (GET_MODE_CLASS (mode) == MODE_FLOAT
3885       || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3886       || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3887     {
3888       *count = 1;
3889       new_mode = mode;
3890     }
3891   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3892     {
3893       *count = 2;
3894       new_mode = (mode == DCmode ? DFmode : SFmode);
3895     }
3896   else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3897     {
3898       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3899 
3900       if (ag_count > 0 && ag_count <= 4)
3901 	*count = ag_count;
3902       else
3903 	return false;
3904     }
3905   else
3906     return false;
3907 
3908 
3909   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3910     return false;
3911 
3912   *base_mode = new_mode;
3913   return true;
3914 }
3915 
3916 static bool
3917 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3918 			       enum machine_mode mode, const_tree type)
3919 {
3920   int count ATTRIBUTE_UNUSED;
3921   enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3922 
3923   if (!use_vfp_abi (pcs_variant, false))
3924     return false;
3925   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3926 						&ag_mode, &count);
3927 }
3928 
3929 static bool
3930 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3931 			     const_tree type)
3932 {
3933   if (!use_vfp_abi (pcum->pcs_variant, false))
3934     return false;
3935 
3936   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3937 						&pcum->aapcs_vfp_rmode,
3938 						&pcum->aapcs_vfp_rcount);
3939 }
3940 
3941 static bool
3942 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3943 		    const_tree type  ATTRIBUTE_UNUSED)
3944 {
3945   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3946   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3947   int regno;
3948 
3949   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3950     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3951       {
3952 	pcum->aapcs_vfp_reg_alloc = mask << regno;
3953 	if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3954 	  {
3955 	    int i;
3956 	    int rcount = pcum->aapcs_vfp_rcount;
3957 	    int rshift = shift;
3958 	    enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3959 	    rtx par;
3960 	    if (!TARGET_NEON)
3961 	      {
3962 		/* Avoid using unsupported vector modes.  */
3963 		if (rmode == V2SImode)
3964 		  rmode = DImode;
3965 		else if (rmode == V4SImode)
3966 		  {
3967 		    rmode = DImode;
3968 		    rcount *= 2;
3969 		    rshift /= 2;
3970 		  }
3971 	      }
3972 	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3973 	    for (i = 0; i < rcount; i++)
3974 	      {
3975 		rtx tmp = gen_rtx_REG (rmode,
3976 				       FIRST_VFP_REGNUM + regno + i * rshift);
3977 		tmp = gen_rtx_EXPR_LIST
3978 		  (VOIDmode, tmp,
3979 		   GEN_INT (i * GET_MODE_SIZE (rmode)));
3980 		XVECEXP (par, 0, i) = tmp;
3981 	      }
3982 
3983 	    pcum->aapcs_reg = par;
3984 	  }
3985 	else
3986 	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3987 	return true;
3988       }
3989   return false;
3990 }
3991 
3992 static rtx
3993 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3994 			       enum machine_mode mode,
3995 			       const_tree type ATTRIBUTE_UNUSED)
3996 {
3997   if (!use_vfp_abi (pcs_variant, false))
3998     return false;
3999 
4000   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4001     {
4002       int count;
4003       enum machine_mode ag_mode;
4004       int i;
4005       rtx par;
4006       int shift;
4007 
4008       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4009 					     &ag_mode, &count);
4010 
4011       if (!TARGET_NEON)
4012 	{
4013 	  if (ag_mode == V2SImode)
4014 	    ag_mode = DImode;
4015 	  else if (ag_mode == V4SImode)
4016 	    {
4017 	      ag_mode = DImode;
4018 	      count *= 2;
4019 	    }
4020 	}
4021       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4022       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4023       for (i = 0; i < count; i++)
4024 	{
4025 	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4026 	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4027 				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4028 	  XVECEXP (par, 0, i) = tmp;
4029 	}
4030 
4031       return par;
4032     }
4033 
4034   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4035 }
4036 
4037 static void
4038 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
4039 		   enum machine_mode mode  ATTRIBUTE_UNUSED,
4040 		   const_tree type  ATTRIBUTE_UNUSED)
4041 {
4042   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4043   pcum->aapcs_vfp_reg_alloc = 0;
4044   return;
4045 }
4046 
4047 #define AAPCS_CP(X)				\
4048   {						\
4049     aapcs_ ## X ## _cum_init,			\
4050     aapcs_ ## X ## _is_call_candidate,		\
4051     aapcs_ ## X ## _allocate,			\
4052     aapcs_ ## X ## _is_return_candidate,	\
4053     aapcs_ ## X ## _allocate_return_reg,	\
4054     aapcs_ ## X ## _advance			\
4055   }
4056 
4057 /* Table of co-processors that can be used to pass arguments in
4058    registers.  Idealy no arugment should be a candidate for more than
4059    one co-processor table entry, but the table is processed in order
4060    and stops after the first match.  If that entry then fails to put
4061    the argument into a co-processor register, the argument will go on
4062    the stack.  */
4063 static struct
4064 {
4065   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
4066   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4067 
4068   /* Return true if an argument of mode MODE (or type TYPE if MODE is
4069      BLKmode) is a candidate for this co-processor's registers; this
4070      function should ignore any position-dependent state in
4071      CUMULATIVE_ARGS and only use call-type dependent information.  */
4072   bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4073 
4074   /* Return true if the argument does get a co-processor register; it
4075      should set aapcs_reg to an RTX of the register allocated as is
4076      required for a return from FUNCTION_ARG.  */
4077   bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4078 
4079   /* Return true if a result of mode MODE (or type TYPE if MODE is
4080      BLKmode) is can be returned in this co-processor's registers.  */
4081   bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4082 
4083   /* Allocate and return an RTX element to hold the return type of a
4084      call, this routine must not fail and will only be called if
4085      is_return_candidate returned true with the same parameters.  */
4086   rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4087 
4088   /* Finish processing this argument and prepare to start processing
4089      the next one.  */
4090   void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4091 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4092   {
4093     AAPCS_CP(vfp)
4094   };
4095 
4096 #undef AAPCS_CP
4097 
4098 static int
4099 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4100 			  tree type)
4101 {
4102   int i;
4103 
4104   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4105     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4106       return i;
4107 
4108   return -1;
4109 }
4110 
4111 static int
4112 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4113 {
4114   /* We aren't passed a decl, so we can't check that a call is local.
4115      However, it isn't clear that that would be a win anyway, since it
4116      might limit some tail-calling opportunities.  */
4117   enum arm_pcs pcs_variant;
4118 
4119   if (fntype)
4120     {
4121       const_tree fndecl = NULL_TREE;
4122 
4123       if (TREE_CODE (fntype) == FUNCTION_DECL)
4124 	{
4125 	  fndecl = fntype;
4126 	  fntype = TREE_TYPE (fntype);
4127 	}
4128 
4129       pcs_variant = arm_get_pcs_model (fntype, fndecl);
4130     }
4131   else
4132     pcs_variant = arm_pcs_default;
4133 
4134   if (pcs_variant != ARM_PCS_AAPCS)
4135     {
4136       int i;
4137 
4138       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4139 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4140 							TYPE_MODE (type),
4141 							type))
4142 	  return i;
4143     }
4144   return -1;
4145 }
4146 
4147 static rtx
4148 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4149 			   const_tree fntype)
4150 {
4151   /* We aren't passed a decl, so we can't check that a call is local.
4152      However, it isn't clear that that would be a win anyway, since it
4153      might limit some tail-calling opportunities.  */
4154   enum arm_pcs pcs_variant;
4155   int unsignedp ATTRIBUTE_UNUSED;
4156 
4157   if (fntype)
4158     {
4159       const_tree fndecl = NULL_TREE;
4160 
4161       if (TREE_CODE (fntype) == FUNCTION_DECL)
4162 	{
4163 	  fndecl = fntype;
4164 	  fntype = TREE_TYPE (fntype);
4165 	}
4166 
4167       pcs_variant = arm_get_pcs_model (fntype, fndecl);
4168     }
4169   else
4170     pcs_variant = arm_pcs_default;
4171 
4172   /* Promote integer types.  */
4173   if (type && INTEGRAL_TYPE_P (type))
4174     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4175 
4176   if (pcs_variant != ARM_PCS_AAPCS)
4177     {
4178       int i;
4179 
4180       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4181 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4182 							type))
4183 	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4184 							     mode, type);
4185     }
4186 
4187   /* Promotes small structs returned in a register to full-word size
4188      for big-endian AAPCS.  */
4189   if (type && arm_return_in_msb (type))
4190     {
4191       HOST_WIDE_INT size = int_size_in_bytes (type);
4192       if (size % UNITS_PER_WORD != 0)
4193 	{
4194 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4195 	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4196 	}
4197     }
4198 
4199   return gen_rtx_REG (mode, R0_REGNUM);
4200 }
4201 
4202 rtx
4203 aapcs_libcall_value (enum machine_mode mode)
4204 {
4205   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4206 }
4207 
4208 /* Lay out a function argument using the AAPCS rules.  The rule
4209    numbers referred to here are those in the AAPCS.  */
4210 static void
4211 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4212 		  tree type, int named)
4213 {
4214   int nregs, nregs2;
4215   int ncrn;
4216 
4217   /* We only need to do this once per argument.  */
4218   if (pcum->aapcs_arg_processed)
4219     return;
4220 
4221   pcum->aapcs_arg_processed = true;
4222 
4223   /* Special case: if named is false then we are handling an incoming
4224      anonymous argument which is on the stack.  */
4225   if (!named)
4226     return;
4227 
4228   /* Is this a potential co-processor register candidate?  */
4229   if (pcum->pcs_variant != ARM_PCS_AAPCS)
4230     {
4231       int slot = aapcs_select_call_coproc (pcum, mode, type);
4232       pcum->aapcs_cprc_slot = slot;
4233 
4234       /* We don't have to apply any of the rules from part B of the
4235 	 preparation phase, these are handled elsewhere in the
4236 	 compiler.  */
4237 
4238       if (slot >= 0)
4239 	{
4240 	  /* A Co-processor register candidate goes either in its own
4241 	     class of registers or on the stack.  */
4242 	  if (!pcum->aapcs_cprc_failed[slot])
4243 	    {
4244 	      /* C1.cp - Try to allocate the argument to co-processor
4245 		 registers.  */
4246 	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4247 		return;
4248 
4249 	      /* C2.cp - Put the argument on the stack and note that we
4250 		 can't assign any more candidates in this slot.  We also
4251 		 need to note that we have allocated stack space, so that
4252 		 we won't later try to split a non-cprc candidate between
4253 		 core registers and the stack.  */
4254 	      pcum->aapcs_cprc_failed[slot] = true;
4255 	      pcum->can_split = false;
4256 	    }
4257 
4258 	  /* We didn't get a register, so this argument goes on the
4259 	     stack.  */
4260 	  gcc_assert (pcum->can_split == false);
4261 	  return;
4262 	}
4263     }
4264 
4265   /* C3 - For double-word aligned arguments, round the NCRN up to the
4266      next even number.  */
4267   ncrn = pcum->aapcs_ncrn;
4268   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4269     ncrn++;
4270 
4271   nregs = ARM_NUM_REGS2(mode, type);
4272 
4273   /* Sigh, this test should really assert that nregs > 0, but a GCC
4274      extension allows empty structs and then gives them empty size; it
4275      then allows such a structure to be passed by value.  For some of
4276      the code below we have to pretend that such an argument has
4277      non-zero size so that we 'locate' it correctly either in
4278      registers or on the stack.  */
4279   gcc_assert (nregs >= 0);
4280 
4281   nregs2 = nregs ? nregs : 1;
4282 
4283   /* C4 - Argument fits entirely in core registers.  */
4284   if (ncrn + nregs2 <= NUM_ARG_REGS)
4285     {
4286       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4287       pcum->aapcs_next_ncrn = ncrn + nregs;
4288       return;
4289     }
4290 
4291   /* C5 - Some core registers left and there are no arguments already
4292      on the stack: split this argument between the remaining core
4293      registers and the stack.  */
4294   if (ncrn < NUM_ARG_REGS && pcum->can_split)
4295     {
4296       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4297       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4298       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4299       return;
4300     }
4301 
4302   /* C6 - NCRN is set to 4.  */
4303   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4304 
4305   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
4306   return;
4307 }
4308 
4309 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4310    for a call to a function whose data type is FNTYPE.
4311    For a library call, FNTYPE is NULL.  */
4312 void
4313 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4314 			  rtx libname,
4315 			  tree fndecl ATTRIBUTE_UNUSED)
4316 {
4317   /* Long call handling.  */
4318   if (fntype)
4319     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4320   else
4321     pcum->pcs_variant = arm_pcs_default;
4322 
4323   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4324     {
4325       if (arm_libcall_uses_aapcs_base (libname))
4326 	pcum->pcs_variant = ARM_PCS_AAPCS;
4327 
4328       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4329       pcum->aapcs_reg = NULL_RTX;
4330       pcum->aapcs_partial = 0;
4331       pcum->aapcs_arg_processed = false;
4332       pcum->aapcs_cprc_slot = -1;
4333       pcum->can_split = true;
4334 
4335       if (pcum->pcs_variant != ARM_PCS_AAPCS)
4336 	{
4337 	  int i;
4338 
4339 	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4340 	    {
4341 	      pcum->aapcs_cprc_failed[i] = false;
4342 	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4343 	    }
4344 	}
4345       return;
4346     }
4347 
4348   /* Legacy ABIs */
4349 
4350   /* On the ARM, the offset starts at 0.  */
4351   pcum->nregs = 0;
4352   pcum->iwmmxt_nregs = 0;
4353   pcum->can_split = true;
4354 
4355   /* Varargs vectors are treated the same as long long.
4356      named_count avoids having to change the way arm handles 'named' */
4357   pcum->named_count = 0;
4358   pcum->nargs = 0;
4359 
4360   if (TARGET_REALLY_IWMMXT && fntype)
4361     {
4362       tree fn_arg;
4363 
4364       for (fn_arg = TYPE_ARG_TYPES (fntype);
4365 	   fn_arg;
4366 	   fn_arg = TREE_CHAIN (fn_arg))
4367 	pcum->named_count += 1;
4368 
4369       if (! pcum->named_count)
4370 	pcum->named_count = INT_MAX;
4371     }
4372 }
4373 
4374 
4375 /* Return true if mode/type need doubleword alignment.  */
4376 bool
4377 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4378 {
4379   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4380 	  || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4381 }
4382 
4383 
4384 /* Determine where to put an argument to a function.
4385    Value is zero to push the argument on the stack,
4386    or a hard register in which to store the argument.
4387 
4388    MODE is the argument's machine mode.
4389    TYPE is the data type of the argument (as a tree).
4390     This is null for libcalls where that information may
4391     not be available.
4392    CUM is a variable of type CUMULATIVE_ARGS which gives info about
4393     the preceding args and about the function being called.
4394    NAMED is nonzero if this argument is a named parameter
4395     (otherwise it is an extra parameter matching an ellipsis).  */
4396 
4397 rtx
4398 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4399 		  tree type, int named)
4400 {
4401   int nregs;
4402 
4403   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
4404      a call insn (op3 of a call_value insn).  */
4405   if (mode == VOIDmode)
4406     return const0_rtx;
4407 
4408   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4409     {
4410       aapcs_layout_arg (pcum, mode, type, named);
4411       return pcum->aapcs_reg;
4412     }
4413 
4414   /* Varargs vectors are treated the same as long long.
4415      named_count avoids having to change the way arm handles 'named' */
4416   if (TARGET_IWMMXT_ABI
4417       && arm_vector_mode_supported_p (mode)
4418       && pcum->named_count > pcum->nargs + 1)
4419     {
4420       if (pcum->iwmmxt_nregs <= 9)
4421 	return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4422       else
4423 	{
4424 	  pcum->can_split = false;
4425 	  return NULL_RTX;
4426 	}
4427     }
4428 
4429   /* Put doubleword aligned quantities in even register pairs.  */
4430   if (pcum->nregs & 1
4431       && ARM_DOUBLEWORD_ALIGN
4432       && arm_needs_doubleword_align (mode, type))
4433     pcum->nregs++;
4434 
4435   if (mode == VOIDmode)
4436     /* Pick an arbitrary value for operand 2 of the call insn.  */
4437     return const0_rtx;
4438 
4439   /* Only allow splitting an arg between regs and memory if all preceding
4440      args were allocated to regs.  For args passed by reference we only count
4441      the reference pointer.  */
4442   if (pcum->can_split)
4443     nregs = 1;
4444   else
4445     nregs = ARM_NUM_REGS2 (mode, type);
4446 
4447   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4448     return NULL_RTX;
4449 
4450   return gen_rtx_REG (mode, pcum->nregs);
4451 }
4452 
4453 static int
4454 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4455 		       tree type, bool named)
4456 {
4457   int nregs = pcum->nregs;
4458 
4459   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4460     {
4461       aapcs_layout_arg (pcum, mode, type, named);
4462       return pcum->aapcs_partial;
4463     }
4464 
4465   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4466     return 0;
4467 
4468   if (NUM_ARG_REGS > nregs
4469       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4470       && pcum->can_split)
4471     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4472 
4473   return 0;
4474 }
4475 
4476 void
4477 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4478 			  tree type, bool named)
4479 {
4480   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4481     {
4482       aapcs_layout_arg (pcum, mode, type, named);
4483 
4484       if (pcum->aapcs_cprc_slot >= 0)
4485 	{
4486 	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4487 							      type);
4488 	  pcum->aapcs_cprc_slot = -1;
4489 	}
4490 
4491       /* Generic stuff.  */
4492       pcum->aapcs_arg_processed = false;
4493       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4494       pcum->aapcs_reg = NULL_RTX;
4495       pcum->aapcs_partial = 0;
4496     }
4497   else
4498     {
4499       pcum->nargs += 1;
4500       if (arm_vector_mode_supported_p (mode)
4501 	  && pcum->named_count > pcum->nargs
4502 	  && TARGET_IWMMXT_ABI)
4503 	pcum->iwmmxt_nregs += 1;
4504       else
4505 	pcum->nregs += ARM_NUM_REGS2 (mode, type);
4506     }
4507 }
4508 
4509 /* Variable sized types are passed by reference.  This is a GCC
4510    extension to the ARM ABI.  */
4511 
4512 static bool
4513 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4514 		       enum machine_mode mode ATTRIBUTE_UNUSED,
4515 		       const_tree type, bool named ATTRIBUTE_UNUSED)
4516 {
4517   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4518 }
4519 
4520 /* Encode the current state of the #pragma [no_]long_calls.  */
4521 typedef enum
4522 {
4523   OFF,		/* No #pragma [no_]long_calls is in effect.  */
4524   LONG,		/* #pragma long_calls is in effect.  */
4525   SHORT		/* #pragma no_long_calls is in effect.  */
4526 } arm_pragma_enum;
4527 
4528 static arm_pragma_enum arm_pragma_long_calls = OFF;
4529 
4530 void
4531 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4532 {
4533   arm_pragma_long_calls = LONG;
4534 }
4535 
4536 void
4537 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4538 {
4539   arm_pragma_long_calls = SHORT;
4540 }
4541 
4542 void
4543 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4544 {
4545   arm_pragma_long_calls = OFF;
4546 }
4547 
4548 /* Handle an attribute requiring a FUNCTION_DECL;
4549    arguments as in struct attribute_spec.handler.  */
4550 static tree
4551 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4552 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4553 {
4554   if (TREE_CODE (*node) != FUNCTION_DECL)
4555     {
4556       warning (OPT_Wattributes, "%qE attribute only applies to functions",
4557 	       name);
4558       *no_add_attrs = true;
4559     }
4560 
4561   return NULL_TREE;
4562 }
4563 
4564 /* Handle an "interrupt" or "isr" attribute;
4565    arguments as in struct attribute_spec.handler.  */
4566 static tree
4567 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4568 			  bool *no_add_attrs)
4569 {
4570   if (DECL_P (*node))
4571     {
4572       if (TREE_CODE (*node) != FUNCTION_DECL)
4573 	{
4574 	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
4575 		   name);
4576 	  *no_add_attrs = true;
4577 	}
4578       /* FIXME: the argument if any is checked for type attributes;
4579 	 should it be checked for decl ones?  */
4580     }
4581   else
4582     {
4583       if (TREE_CODE (*node) == FUNCTION_TYPE
4584 	  || TREE_CODE (*node) == METHOD_TYPE)
4585 	{
4586 	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4587 	    {
4588 	      warning (OPT_Wattributes, "%qE attribute ignored",
4589 		       name);
4590 	      *no_add_attrs = true;
4591 	    }
4592 	}
4593       else if (TREE_CODE (*node) == POINTER_TYPE
4594 	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4595 		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4596 	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
4597 	{
4598 	  *node = build_variant_type_copy (*node);
4599 	  TREE_TYPE (*node) = build_type_attribute_variant
4600 	    (TREE_TYPE (*node),
4601 	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4602 	  *no_add_attrs = true;
4603 	}
4604       else
4605 	{
4606 	  /* Possibly pass this attribute on from the type to a decl.  */
4607 	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
4608 		       | (int) ATTR_FLAG_FUNCTION_NEXT
4609 		       | (int) ATTR_FLAG_ARRAY_NEXT))
4610 	    {
4611 	      *no_add_attrs = true;
4612 	      return tree_cons (name, args, NULL_TREE);
4613 	    }
4614 	  else
4615 	    {
4616 	      warning (OPT_Wattributes, "%qE attribute ignored",
4617 		       name);
4618 	    }
4619 	}
4620     }
4621 
4622   return NULL_TREE;
4623 }
4624 
4625 /* Handle a "pcs" attribute; arguments as in struct
4626    attribute_spec.handler.  */
4627 static tree
4628 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4629 			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4630 {
4631   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4632     {
4633       warning (OPT_Wattributes, "%qE attribute ignored", name);
4634       *no_add_attrs = true;
4635     }
4636   return NULL_TREE;
4637 }
4638 
4639 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4640 /* Handle the "notshared" attribute.  This attribute is another way of
4641    requesting hidden visibility.  ARM's compiler supports
4642    "__declspec(notshared)"; we support the same thing via an
4643    attribute.  */
4644 
4645 static tree
4646 arm_handle_notshared_attribute (tree *node,
4647 				tree name ATTRIBUTE_UNUSED,
4648 				tree args ATTRIBUTE_UNUSED,
4649 				int flags ATTRIBUTE_UNUSED,
4650 				bool *no_add_attrs)
4651 {
4652   tree decl = TYPE_NAME (*node);
4653 
4654   if (decl)
4655     {
4656       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4657       DECL_VISIBILITY_SPECIFIED (decl) = 1;
4658       *no_add_attrs = false;
4659     }
4660   return NULL_TREE;
4661 }
4662 #endif
4663 
4664 /* Return 0 if the attributes for two types are incompatible, 1 if they
4665    are compatible, and 2 if they are nearly compatible (which causes a
4666    warning to be generated).  */
4667 static int
4668 arm_comp_type_attributes (const_tree type1, const_tree type2)
4669 {
4670   int l1, l2, s1, s2;
4671 
4672   /* Check for mismatch of non-default calling convention.  */
4673   if (TREE_CODE (type1) != FUNCTION_TYPE)
4674     return 1;
4675 
4676   /* Check for mismatched call attributes.  */
4677   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4678   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4679   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4680   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4681 
4682   /* Only bother to check if an attribute is defined.  */
4683   if (l1 | l2 | s1 | s2)
4684     {
4685       /* If one type has an attribute, the other must have the same attribute.  */
4686       if ((l1 != l2) || (s1 != s2))
4687 	return 0;
4688 
4689       /* Disallow mixed attributes.  */
4690       if ((l1 & s2) || (l2 & s1))
4691 	return 0;
4692     }
4693 
4694   /* Check for mismatched ISR attribute.  */
4695   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4696   if (! l1)
4697     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4698   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4699   if (! l2)
4700     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4701   if (l1 != l2)
4702     return 0;
4703 
4704   return 1;
4705 }
4706 
4707 /*  Assigns default attributes to newly defined type.  This is used to
4708     set short_call/long_call attributes for function types of
4709     functions defined inside corresponding #pragma scopes.  */
4710 static void
4711 arm_set_default_type_attributes (tree type)
4712 {
4713   /* Add __attribute__ ((long_call)) to all functions, when
4714      inside #pragma long_calls or __attribute__ ((short_call)),
4715      when inside #pragma no_long_calls.  */
4716   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4717     {
4718       tree type_attr_list, attr_name;
4719       type_attr_list = TYPE_ATTRIBUTES (type);
4720 
4721       if (arm_pragma_long_calls == LONG)
4722  	attr_name = get_identifier ("long_call");
4723       else if (arm_pragma_long_calls == SHORT)
4724  	attr_name = get_identifier ("short_call");
4725       else
4726  	return;
4727 
4728       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4729       TYPE_ATTRIBUTES (type) = type_attr_list;
4730     }
4731 }
4732 
4733 /* Return true if DECL is known to be linked into section SECTION.  */
4734 
4735 static bool
4736 arm_function_in_section_p (tree decl, section *section)
4737 {
4738   /* We can only be certain about functions defined in the same
4739      compilation unit.  */
4740   if (!TREE_STATIC (decl))
4741     return false;
4742 
4743   /* Make sure that SYMBOL always binds to the definition in this
4744      compilation unit.  */
4745   if (!targetm.binds_local_p (decl))
4746     return false;
4747 
4748   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
4749   if (!DECL_SECTION_NAME (decl))
4750     {
4751       /* Make sure that we will not create a unique section for DECL.  */
4752       if (flag_function_sections || DECL_ONE_ONLY (decl))
4753 	return false;
4754     }
4755 
4756   return function_section (decl) == section;
4757 }
4758 
4759 /* Return nonzero if a 32-bit "long_call" should be generated for
4760    a call from the current function to DECL.  We generate a long_call
4761    if the function:
4762 
4763         a.  has an __attribute__((long call))
4764      or b.  is within the scope of a #pragma long_calls
4765      or c.  the -mlong-calls command line switch has been specified
4766 
4767    However we do not generate a long call if the function:
4768 
4769         d.  has an __attribute__ ((short_call))
4770      or e.  is inside the scope of a #pragma no_long_calls
4771      or f.  is defined in the same section as the current function.  */
4772 
4773 bool
4774 arm_is_long_call_p (tree decl)
4775 {
4776   tree attrs;
4777 
4778   if (!decl)
4779     return TARGET_LONG_CALLS;
4780 
4781   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4782   if (lookup_attribute ("short_call", attrs))
4783     return false;
4784 
4785   /* For "f", be conservative, and only cater for cases in which the
4786      whole of the current function is placed in the same section.  */
4787   if (!flag_reorder_blocks_and_partition
4788       && TREE_CODE (decl) == FUNCTION_DECL
4789       && arm_function_in_section_p (decl, current_function_section ()))
4790     return false;
4791 
4792   if (lookup_attribute ("long_call", attrs))
4793     return true;
4794 
4795   return TARGET_LONG_CALLS;
4796 }
4797 
4798 /* Return nonzero if it is ok to make a tail-call to DECL.  */
4799 static bool
4800 arm_function_ok_for_sibcall (tree decl, tree exp)
4801 {
4802   unsigned long func_type;
4803 
4804   if (cfun->machine->sibcall_blocked)
4805     return false;
4806 
4807   /* Never tailcall something for which we have no decl, or if we
4808      are in Thumb mode.  */
4809   if (decl == NULL || TARGET_THUMB)
4810     return false;
4811 
4812   /* The PIC register is live on entry to VxWorks PLT entries, so we
4813      must make the call before restoring the PIC register.  */
4814   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4815     return false;
4816 
4817   /* Cannot tail-call to long calls, since these are out of range of
4818      a branch instruction.  */
4819   if (arm_is_long_call_p (decl))
4820     return false;
4821 
4822   /* If we are interworking and the function is not declared static
4823      then we can't tail-call it unless we know that it exists in this
4824      compilation unit (since it might be a Thumb routine).  */
4825   if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4826     return false;
4827 
4828   func_type = arm_current_func_type ();
4829   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
4830   if (IS_INTERRUPT (func_type))
4831     return false;
4832 
4833   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4834     {
4835       /* Check that the return value locations are the same.  For
4836 	 example that we aren't returning a value from the sibling in
4837 	 a VFP register but then need to transfer it to a core
4838 	 register.  */
4839       rtx a, b;
4840 
4841       a = arm_function_value (TREE_TYPE (exp), decl, false);
4842       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4843 			      cfun->decl, false);
4844       if (!rtx_equal_p (a, b))
4845 	return false;
4846     }
4847 
4848   /* Never tailcall if function may be called with a misaligned SP.  */
4849   if (IS_STACKALIGN (func_type))
4850     return false;
4851 
4852   /* Everything else is ok.  */
4853   return true;
4854 }
4855 
4856 
4857 /* Addressing mode support functions.  */
4858 
4859 /* Return nonzero if X is a legitimate immediate operand when compiling
4860    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
4861 int
4862 legitimate_pic_operand_p (rtx x)
4863 {
4864   if (GET_CODE (x) == SYMBOL_REF
4865       || (GET_CODE (x) == CONST
4866 	  && GET_CODE (XEXP (x, 0)) == PLUS
4867 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4868     return 0;
4869 
4870   return 1;
4871 }
4872 
4873 /* Record that the current function needs a PIC register.  Initialize
4874    cfun->machine->pic_reg if we have not already done so.  */
4875 
4876 static void
4877 require_pic_register (void)
4878 {
4879   /* A lot of the logic here is made obscure by the fact that this
4880      routine gets called as part of the rtx cost estimation process.
4881      We don't want those calls to affect any assumptions about the real
4882      function; and further, we can't call entry_of_function() until we
4883      start the real expansion process.  */
4884   if (!crtl->uses_pic_offset_table)
4885     {
4886       gcc_assert (can_create_pseudo_p ());
4887       if (arm_pic_register != INVALID_REGNUM)
4888 	{
4889 	  if (!cfun->machine->pic_reg)
4890 	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4891 
4892 	  /* Play games to avoid marking the function as needing pic
4893 	     if we are being called as part of the cost-estimation
4894 	     process.  */
4895 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4896 	    crtl->uses_pic_offset_table = 1;
4897 	}
4898       else
4899 	{
4900 	  rtx seq;
4901 
4902 	  if (!cfun->machine->pic_reg)
4903 	    cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4904 
4905 	  /* Play games to avoid marking the function as needing pic
4906 	     if we are being called as part of the cost-estimation
4907 	     process.  */
4908 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4909 	    {
4910 	      crtl->uses_pic_offset_table = 1;
4911 	      start_sequence ();
4912 
4913 	      arm_load_pic_register (0UL);
4914 
4915 	      seq = get_insns ();
4916 	      end_sequence ();
4917 	      /* We can be called during expansion of PHI nodes, where
4918 	         we can't yet emit instructions directly in the final
4919 		 insn stream.  Queue the insns on the entry edge, they will
4920 		 be committed after everything else is expanded.  */
4921 	      insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4922 	    }
4923 	}
4924     }
4925 }
4926 
4927 rtx
4928 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4929 {
4930   if (GET_CODE (orig) == SYMBOL_REF
4931       || GET_CODE (orig) == LABEL_REF)
4932     {
4933       rtx pic_ref, address;
4934       rtx insn;
4935       int subregs = 0;
4936 
4937       /* If this function doesn't have a pic register, create one now.  */
4938       require_pic_register ();
4939 
4940       if (reg == 0)
4941 	{
4942 	  gcc_assert (can_create_pseudo_p ());
4943 	  reg = gen_reg_rtx (Pmode);
4944 
4945 	  subregs = 1;
4946 	}
4947 
4948       if (subregs)
4949 	address = gen_reg_rtx (Pmode);
4950       else
4951 	address = reg;
4952 
4953       if (TARGET_32BIT)
4954 	emit_insn (gen_pic_load_addr_32bit (address, orig));
4955       else /* TARGET_THUMB1 */
4956 	emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4957 
4958       /* VxWorks does not impose a fixed gap between segments; the run-time
4959 	 gap can be different from the object-file gap.  We therefore can't
4960 	 use GOTOFF unless we are absolutely sure that the symbol is in the
4961 	 same segment as the GOT.  Unfortunately, the flexibility of linker
4962 	 scripts means that we can't be sure of that in general, so assume
4963 	 that GOTOFF is never valid on VxWorks.  */
4964       if ((GET_CODE (orig) == LABEL_REF
4965 	   || (GET_CODE (orig) == SYMBOL_REF &&
4966 	       SYMBOL_REF_LOCAL_P (orig)))
4967 	  && NEED_GOT_RELOC
4968 	  && !TARGET_VXWORKS_RTP)
4969 	pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
4970       else
4971 	{
4972 	  pic_ref = gen_const_mem (Pmode,
4973 				   gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4974 					         address));
4975 	}
4976 
4977       insn = emit_move_insn (reg, pic_ref);
4978 
4979       /* Put a REG_EQUAL note on this insn, so that it can be optimized
4980 	 by loop.  */
4981       set_unique_reg_note (insn, REG_EQUAL, orig);
4982 
4983       return reg;
4984     }
4985   else if (GET_CODE (orig) == CONST)
4986     {
4987       rtx base, offset;
4988 
4989       if (GET_CODE (XEXP (orig, 0)) == PLUS
4990 	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4991 	return orig;
4992 
4993       /* Handle the case where we have: const (UNSPEC_TLS).  */
4994       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4995 	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4996 	return orig;
4997 
4998       /* Handle the case where we have:
4999          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
5000          CONST_INT.  */
5001       if (GET_CODE (XEXP (orig, 0)) == PLUS
5002           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5003           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5004         {
5005 	  gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5006 	  return orig;
5007 	}
5008 
5009       if (reg == 0)
5010 	{
5011 	  gcc_assert (can_create_pseudo_p ());
5012 	  reg = gen_reg_rtx (Pmode);
5013 	}
5014 
5015       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5016 
5017       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5018       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5019 				       base == reg ? 0 : reg);
5020 
5021       if (GET_CODE (offset) == CONST_INT)
5022 	{
5023 	  /* The base register doesn't really matter, we only want to
5024 	     test the index for the appropriate mode.  */
5025 	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
5026 	    {
5027 	      gcc_assert (can_create_pseudo_p ());
5028 	      offset = force_reg (Pmode, offset);
5029 	    }
5030 
5031 	  if (GET_CODE (offset) == CONST_INT)
5032 	    return plus_constant (base, INTVAL (offset));
5033 	}
5034 
5035       if (GET_MODE_SIZE (mode) > 4
5036 	  && (GET_MODE_CLASS (mode) == MODE_INT
5037 	      || TARGET_SOFT_FLOAT))
5038 	{
5039 	  emit_insn (gen_addsi3 (reg, base, offset));
5040 	  return reg;
5041 	}
5042 
5043       return gen_rtx_PLUS (Pmode, base, offset);
5044     }
5045 
5046   return orig;
5047 }
5048 
5049 
5050 /* Find a spare register to use during the prolog of a function.  */
5051 
5052 static int
5053 thumb_find_work_register (unsigned long pushed_regs_mask)
5054 {
5055   int reg;
5056 
5057   /* Check the argument registers first as these are call-used.  The
5058      register allocation order means that sometimes r3 might be used
5059      but earlier argument registers might not, so check them all.  */
5060   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5061     if (!df_regs_ever_live_p (reg))
5062       return reg;
5063 
5064   /* Before going on to check the call-saved registers we can try a couple
5065      more ways of deducing that r3 is available.  The first is when we are
5066      pushing anonymous arguments onto the stack and we have less than 4
5067      registers worth of fixed arguments(*).  In this case r3 will be part of
5068      the variable argument list and so we can be sure that it will be
5069      pushed right at the start of the function.  Hence it will be available
5070      for the rest of the prologue.
5071      (*): ie crtl->args.pretend_args_size is greater than 0.  */
5072   if (cfun->machine->uses_anonymous_args
5073       && crtl->args.pretend_args_size > 0)
5074     return LAST_ARG_REGNUM;
5075 
5076   /* The other case is when we have fixed arguments but less than 4 registers
5077      worth.  In this case r3 might be used in the body of the function, but
5078      it is not being used to convey an argument into the function.  In theory
5079      we could just check crtl->args.size to see how many bytes are
5080      being passed in argument registers, but it seems that it is unreliable.
5081      Sometimes it will have the value 0 when in fact arguments are being
5082      passed.  (See testcase execute/20021111-1.c for an example).  So we also
5083      check the args_info.nregs field as well.  The problem with this field is
5084      that it makes no allowances for arguments that are passed to the
5085      function but which are not used.  Hence we could miss an opportunity
5086      when a function has an unused argument in r3.  But it is better to be
5087      safe than to be sorry.  */
5088   if (! cfun->machine->uses_anonymous_args
5089       && crtl->args.size >= 0
5090       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5091       && crtl->args.info.nregs < 4)
5092     return LAST_ARG_REGNUM;
5093 
5094   /* Otherwise look for a call-saved register that is going to be pushed.  */
5095   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5096     if (pushed_regs_mask & (1 << reg))
5097       return reg;
5098 
5099   if (TARGET_THUMB2)
5100     {
5101       /* Thumb-2 can use high regs.  */
5102       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5103 	if (pushed_regs_mask & (1 << reg))
5104 	  return reg;
5105     }
5106   /* Something went wrong - thumb_compute_save_reg_mask()
5107      should have arranged for a suitable register to be pushed.  */
5108   gcc_unreachable ();
5109 }
5110 
5111 static GTY(()) int pic_labelno;
5112 
5113 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
5114    low register.  */
5115 
5116 void
5117 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5118 {
5119   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5120 
5121   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5122     return;
5123 
5124   gcc_assert (flag_pic);
5125 
5126   pic_reg = cfun->machine->pic_reg;
5127   if (TARGET_VXWORKS_RTP)
5128     {
5129       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5130       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5131       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5132 
5133       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5134 
5135       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5136       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5137     }
5138   else
5139     {
5140       /* We use an UNSPEC rather than a LABEL_REF because this label
5141 	 never appears in the code stream.  */
5142 
5143       labelno = GEN_INT (pic_labelno++);
5144       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5145       l1 = gen_rtx_CONST (VOIDmode, l1);
5146 
5147       /* On the ARM the PC register contains 'dot + 8' at the time of the
5148 	 addition, on the Thumb it is 'dot + 4'.  */
5149       pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5150       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5151 				UNSPEC_GOTSYM_OFF);
5152       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5153 
5154       if (TARGET_32BIT)
5155 	{
5156 	  emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5157 	  if (TARGET_ARM)
5158 	    emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5159 	  else
5160 	    emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5161 	}
5162       else /* TARGET_THUMB1 */
5163 	{
5164 	  if (arm_pic_register != INVALID_REGNUM
5165 	      && REGNO (pic_reg) > LAST_LO_REGNUM)
5166 	    {
5167 	      /* We will have pushed the pic register, so we should always be
5168 		 able to find a work register.  */
5169 	      pic_tmp = gen_rtx_REG (SImode,
5170 				     thumb_find_work_register (saved_regs));
5171 	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5172 	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5173 	    }
5174 	  else
5175 	    emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5176 	  emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5177 	}
5178     }
5179 
5180   /* Need to emit this whether or not we obey regdecls,
5181      since setjmp/longjmp can cause life info to screw up.  */
5182   emit_use (pic_reg);
5183 }
5184 
5185 
5186 /* Return nonzero if X is valid as an ARM state addressing register.  */
5187 static int
5188 arm_address_register_rtx_p (rtx x, int strict_p)
5189 {
5190   int regno;
5191 
5192   if (GET_CODE (x) != REG)
5193     return 0;
5194 
5195   regno = REGNO (x);
5196 
5197   if (strict_p)
5198     return ARM_REGNO_OK_FOR_BASE_P (regno);
5199 
5200   return (regno <= LAST_ARM_REGNUM
5201 	  || regno >= FIRST_PSEUDO_REGISTER
5202 	  || regno == FRAME_POINTER_REGNUM
5203 	  || regno == ARG_POINTER_REGNUM);
5204 }
5205 
5206 /* Return TRUE if this rtx is the difference of a symbol and a label,
5207    and will reduce to a PC-relative relocation in the object file.
5208    Expressions like this can be left alone when generating PIC, rather
5209    than forced through the GOT.  */
5210 static int
5211 pcrel_constant_p (rtx x)
5212 {
5213   if (GET_CODE (x) == MINUS)
5214     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5215 
5216   return FALSE;
5217 }
5218 
5219 /* Return nonzero if X is a valid ARM state address operand.  */
5220 int
5221 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5222 			        int strict_p)
5223 {
5224   bool use_ldrd;
5225   enum rtx_code code = GET_CODE (x);
5226 
5227   if (arm_address_register_rtx_p (x, strict_p))
5228     return 1;
5229 
5230   use_ldrd = (TARGET_LDRD
5231 	      && (mode == DImode
5232 		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5233 
5234   if (code == POST_INC || code == PRE_DEC
5235       || ((code == PRE_INC || code == POST_DEC)
5236 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5237     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5238 
5239   else if ((code == POST_MODIFY || code == PRE_MODIFY)
5240 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5241 	   && GET_CODE (XEXP (x, 1)) == PLUS
5242 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5243     {
5244       rtx addend = XEXP (XEXP (x, 1), 1);
5245 
5246       /* Don't allow ldrd post increment by register because it's hard
5247 	 to fixup invalid register choices.  */
5248       if (use_ldrd
5249 	  && GET_CODE (x) == POST_MODIFY
5250 	  && GET_CODE (addend) == REG)
5251 	return 0;
5252 
5253       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5254 	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
5255     }
5256 
5257   /* After reload constants split into minipools will have addresses
5258      from a LABEL_REF.  */
5259   else if (reload_completed
5260 	   && (code == LABEL_REF
5261 	       || (code == CONST
5262 		   && GET_CODE (XEXP (x, 0)) == PLUS
5263 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5264 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5265     return 1;
5266 
5267   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5268     return 0;
5269 
5270   else if (code == PLUS)
5271     {
5272       rtx xop0 = XEXP (x, 0);
5273       rtx xop1 = XEXP (x, 1);
5274 
5275       return ((arm_address_register_rtx_p (xop0, strict_p)
5276 	       && GET_CODE(xop1) == CONST_INT
5277 	       && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5278 	      || (arm_address_register_rtx_p (xop1, strict_p)
5279 		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5280     }
5281 
5282 #if 0
5283   /* Reload currently can't handle MINUS, so disable this for now */
5284   else if (GET_CODE (x) == MINUS)
5285     {
5286       rtx xop0 = XEXP (x, 0);
5287       rtx xop1 = XEXP (x, 1);
5288 
5289       return (arm_address_register_rtx_p (xop0, strict_p)
5290 	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5291     }
5292 #endif
5293 
5294   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5295 	   && code == SYMBOL_REF
5296 	   && CONSTANT_POOL_ADDRESS_P (x)
5297 	   && ! (flag_pic
5298 		 && symbol_mentioned_p (get_pool_constant (x))
5299 		 && ! pcrel_constant_p (get_pool_constant (x))))
5300     return 1;
5301 
5302   return 0;
5303 }
5304 
5305 /* Return nonzero if X is a valid Thumb-2 address operand.  */
5306 static int
5307 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5308 {
5309   bool use_ldrd;
5310   enum rtx_code code = GET_CODE (x);
5311 
5312   if (arm_address_register_rtx_p (x, strict_p))
5313     return 1;
5314 
5315   use_ldrd = (TARGET_LDRD
5316 	      && (mode == DImode
5317 		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5318 
5319   if (code == POST_INC || code == PRE_DEC
5320       || ((code == PRE_INC || code == POST_DEC)
5321 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5322     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5323 
5324   else if ((code == POST_MODIFY || code == PRE_MODIFY)
5325 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5326 	   && GET_CODE (XEXP (x, 1)) == PLUS
5327 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5328     {
5329       /* Thumb-2 only has autoincrement by constant.  */
5330       rtx addend = XEXP (XEXP (x, 1), 1);
5331       HOST_WIDE_INT offset;
5332 
5333       if (GET_CODE (addend) != CONST_INT)
5334 	return 0;
5335 
5336       offset = INTVAL(addend);
5337       if (GET_MODE_SIZE (mode) <= 4)
5338 	return (offset > -256 && offset < 256);
5339 
5340       return (use_ldrd && offset > -1024 && offset < 1024
5341 	      && (offset & 3) == 0);
5342     }
5343 
5344   /* After reload constants split into minipools will have addresses
5345      from a LABEL_REF.  */
5346   else if (reload_completed
5347 	   && (code == LABEL_REF
5348 	       || (code == CONST
5349 		   && GET_CODE (XEXP (x, 0)) == PLUS
5350 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5351 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5352     return 1;
5353 
5354   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5355     return 0;
5356 
5357   else if (code == PLUS)
5358     {
5359       rtx xop0 = XEXP (x, 0);
5360       rtx xop1 = XEXP (x, 1);
5361 
5362       return ((arm_address_register_rtx_p (xop0, strict_p)
5363 	       && thumb2_legitimate_index_p (mode, xop1, strict_p))
5364 	      || (arm_address_register_rtx_p (xop1, strict_p)
5365 		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5366     }
5367 
5368   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5369 	   && code == SYMBOL_REF
5370 	   && CONSTANT_POOL_ADDRESS_P (x)
5371 	   && ! (flag_pic
5372 		 && symbol_mentioned_p (get_pool_constant (x))
5373 		 && ! pcrel_constant_p (get_pool_constant (x))))
5374     return 1;
5375 
5376   return 0;
5377 }
5378 
5379 /* Return nonzero if INDEX is valid for an address index operand in
5380    ARM state.  */
5381 static int
5382 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5383 			int strict_p)
5384 {
5385   HOST_WIDE_INT range;
5386   enum rtx_code code = GET_CODE (index);
5387 
5388   /* Standard coprocessor addressing modes.  */
5389   if (TARGET_HARD_FLOAT
5390       && (TARGET_FPA || TARGET_MAVERICK)
5391       && (GET_MODE_CLASS (mode) == MODE_FLOAT
5392 	  || (TARGET_MAVERICK && mode == DImode)))
5393     return (code == CONST_INT && INTVAL (index) < 1024
5394 	    && INTVAL (index) > -1024
5395 	    && (INTVAL (index) & 3) == 0);
5396 
5397   if (TARGET_NEON
5398       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5399     return (code == CONST_INT
5400 	    && INTVAL (index) < 1016
5401 	    && INTVAL (index) > -1024
5402 	    && (INTVAL (index) & 3) == 0);
5403 
5404   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5405     return (code == CONST_INT
5406 	    && INTVAL (index) < 1024
5407 	    && INTVAL (index) > -1024
5408 	    && (INTVAL (index) & 3) == 0);
5409 
5410   if (arm_address_register_rtx_p (index, strict_p)
5411       && (GET_MODE_SIZE (mode) <= 4))
5412     return 1;
5413 
5414   if (mode == DImode || mode == DFmode)
5415     {
5416       if (code == CONST_INT)
5417 	{
5418 	  HOST_WIDE_INT val = INTVAL (index);
5419 
5420 	  if (TARGET_LDRD)
5421 	    return val > -256 && val < 256;
5422 	  else
5423 	    return val > -4096 && val < 4092;
5424 	}
5425 
5426       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5427     }
5428 
5429   if (GET_MODE_SIZE (mode) <= 4
5430       && ! (arm_arch4
5431 	    && (mode == HImode
5432 		|| mode == HFmode
5433 		|| (mode == QImode && outer == SIGN_EXTEND))))
5434     {
5435       if (code == MULT)
5436 	{
5437 	  rtx xiop0 = XEXP (index, 0);
5438 	  rtx xiop1 = XEXP (index, 1);
5439 
5440 	  return ((arm_address_register_rtx_p (xiop0, strict_p)
5441 		   && power_of_two_operand (xiop1, SImode))
5442 		  || (arm_address_register_rtx_p (xiop1, strict_p)
5443 		      && power_of_two_operand (xiop0, SImode)));
5444 	}
5445       else if (code == LSHIFTRT || code == ASHIFTRT
5446 	       || code == ASHIFT || code == ROTATERT)
5447 	{
5448 	  rtx op = XEXP (index, 1);
5449 
5450 	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5451 		  && GET_CODE (op) == CONST_INT
5452 		  && INTVAL (op) > 0
5453 		  && INTVAL (op) <= 31);
5454 	}
5455     }
5456 
5457   /* For ARM v4 we may be doing a sign-extend operation during the
5458      load.  */
5459   if (arm_arch4)
5460     {
5461       if (mode == HImode
5462 	  || mode == HFmode
5463 	  || (outer == SIGN_EXTEND && mode == QImode))
5464 	range = 256;
5465       else
5466 	range = 4096;
5467     }
5468   else
5469     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5470 
5471   return (code == CONST_INT
5472 	  && INTVAL (index) < range
5473 	  && INTVAL (index) > -range);
5474 }
5475 
5476 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5477    index operand.  i.e. 1, 2, 4 or 8.  */
5478 static bool
5479 thumb2_index_mul_operand (rtx op)
5480 {
5481   HOST_WIDE_INT val;
5482 
5483   if (GET_CODE(op) != CONST_INT)
5484     return false;
5485 
5486   val = INTVAL(op);
5487   return (val == 1 || val == 2 || val == 4 || val == 8);
5488 }
5489 
5490 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
5491 static int
5492 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5493 {
5494   enum rtx_code code = GET_CODE (index);
5495 
5496   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
5497   /* Standard coprocessor addressing modes.  */
5498   if (TARGET_HARD_FLOAT
5499       && (TARGET_FPA || TARGET_MAVERICK)
5500       && (GET_MODE_CLASS (mode) == MODE_FLOAT
5501 	  || (TARGET_MAVERICK && mode == DImode)))
5502     return (code == CONST_INT && INTVAL (index) < 1024
5503 	    && INTVAL (index) > -1024
5504 	    && (INTVAL (index) & 3) == 0);
5505 
5506   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5507     {
5508       /* For DImode assume values will usually live in core regs
5509 	 and only allow LDRD addressing modes.  */
5510       if (!TARGET_LDRD || mode != DImode)
5511 	return (code == CONST_INT
5512 		&& INTVAL (index) < 1024
5513 		&& INTVAL (index) > -1024
5514 		&& (INTVAL (index) & 3) == 0);
5515     }
5516 
5517   if (TARGET_NEON
5518       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5519     return (code == CONST_INT
5520 	    && INTVAL (index) < 1016
5521 	    && INTVAL (index) > -1024
5522 	    && (INTVAL (index) & 3) == 0);
5523 
5524   if (arm_address_register_rtx_p (index, strict_p)
5525       && (GET_MODE_SIZE (mode) <= 4))
5526     return 1;
5527 
5528   if (mode == DImode || mode == DFmode)
5529     {
5530       if (code == CONST_INT)
5531 	{
5532 	  HOST_WIDE_INT val = INTVAL (index);
5533 	  /* ??? Can we assume ldrd for thumb2?  */
5534 	  /* Thumb-2 ldrd only has reg+const addressing modes.  */
5535 	  /* ldrd supports offsets of +-1020.
5536 	     However the ldr fallback does not.  */
5537 	  return val > -256 && val < 256 && (val & 3) == 0;
5538 	}
5539       else
5540 	return 0;
5541     }
5542 
5543   if (code == MULT)
5544     {
5545       rtx xiop0 = XEXP (index, 0);
5546       rtx xiop1 = XEXP (index, 1);
5547 
5548       return ((arm_address_register_rtx_p (xiop0, strict_p)
5549 	       && thumb2_index_mul_operand (xiop1))
5550 	      || (arm_address_register_rtx_p (xiop1, strict_p)
5551 		  && thumb2_index_mul_operand (xiop0)));
5552     }
5553   else if (code == ASHIFT)
5554     {
5555       rtx op = XEXP (index, 1);
5556 
5557       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5558 	      && GET_CODE (op) == CONST_INT
5559 	      && INTVAL (op) > 0
5560 	      && INTVAL (op) <= 3);
5561     }
5562 
5563   return (code == CONST_INT
5564 	  && INTVAL (index) < 4096
5565 	  && INTVAL (index) > -256);
5566 }
5567 
5568 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
5569 static int
5570 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5571 {
5572   int regno;
5573 
5574   if (GET_CODE (x) != REG)
5575     return 0;
5576 
5577   regno = REGNO (x);
5578 
5579   if (strict_p)
5580     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5581 
5582   return (regno <= LAST_LO_REGNUM
5583 	  || regno > LAST_VIRTUAL_REGISTER
5584 	  || regno == FRAME_POINTER_REGNUM
5585 	  || (GET_MODE_SIZE (mode) >= 4
5586 	      && (regno == STACK_POINTER_REGNUM
5587 		  || regno >= FIRST_PSEUDO_REGISTER
5588 		  || x == hard_frame_pointer_rtx
5589 		  || x == arg_pointer_rtx)));
5590 }
5591 
5592 /* Return nonzero if x is a legitimate index register.  This is the case
5593    for any base register that can access a QImode object.  */
5594 inline static int
5595 thumb1_index_register_rtx_p (rtx x, int strict_p)
5596 {
5597   return thumb1_base_register_rtx_p (x, QImode, strict_p);
5598 }
5599 
5600 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5601 
5602    The AP may be eliminated to either the SP or the FP, so we use the
5603    least common denominator, e.g. SImode, and offsets from 0 to 64.
5604 
5605    ??? Verify whether the above is the right approach.
5606 
5607    ??? Also, the FP may be eliminated to the SP, so perhaps that
5608    needs special handling also.
5609 
5610    ??? Look at how the mips16 port solves this problem.  It probably uses
5611    better ways to solve some of these problems.
5612 
5613    Although it is not incorrect, we don't accept QImode and HImode
5614    addresses based on the frame pointer or arg pointer until the
5615    reload pass starts.  This is so that eliminating such addresses
5616    into stack based ones won't produce impossible code.  */
5617 static int
5618 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5619 {
5620   /* ??? Not clear if this is right.  Experiment.  */
5621   if (GET_MODE_SIZE (mode) < 4
5622       && !(reload_in_progress || reload_completed)
5623       && (reg_mentioned_p (frame_pointer_rtx, x)
5624 	  || reg_mentioned_p (arg_pointer_rtx, x)
5625 	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
5626 	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5627 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5628 	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5629     return 0;
5630 
5631   /* Accept any base register.  SP only in SImode or larger.  */
5632   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5633     return 1;
5634 
5635   /* This is PC relative data before arm_reorg runs.  */
5636   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5637 	   && GET_CODE (x) == SYMBOL_REF
5638            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5639     return 1;
5640 
5641   /* This is PC relative data after arm_reorg runs.  */
5642   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5643 	   && reload_completed
5644 	   && (GET_CODE (x) == LABEL_REF
5645 	       || (GET_CODE (x) == CONST
5646 		   && GET_CODE (XEXP (x, 0)) == PLUS
5647 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5648 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5649     return 1;
5650 
5651   /* Post-inc indexing only supported for SImode and larger.  */
5652   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5653 	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5654     return 1;
5655 
5656   else if (GET_CODE (x) == PLUS)
5657     {
5658       /* REG+REG address can be any two index registers.  */
5659       /* We disallow FRAME+REG addressing since we know that FRAME
5660 	 will be replaced with STACK, and SP relative addressing only
5661 	 permits SP+OFFSET.  */
5662       if (GET_MODE_SIZE (mode) <= 4
5663 	  && XEXP (x, 0) != frame_pointer_rtx
5664 	  && XEXP (x, 1) != frame_pointer_rtx
5665 	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5666 	  && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5667 	return 1;
5668 
5669       /* REG+const has 5-7 bit offset for non-SP registers.  */
5670       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5671 		|| XEXP (x, 0) == arg_pointer_rtx)
5672 	       && GET_CODE (XEXP (x, 1)) == CONST_INT
5673 	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5674 	return 1;
5675 
5676       /* REG+const has 10-bit offset for SP, but only SImode and
5677 	 larger is supported.  */
5678       /* ??? Should probably check for DI/DFmode overflow here
5679 	 just like GO_IF_LEGITIMATE_OFFSET does.  */
5680       else if (GET_CODE (XEXP (x, 0)) == REG
5681 	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5682 	       && GET_MODE_SIZE (mode) >= 4
5683 	       && GET_CODE (XEXP (x, 1)) == CONST_INT
5684 	       && INTVAL (XEXP (x, 1)) >= 0
5685 	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5686 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
5687 	return 1;
5688 
5689       else if (GET_CODE (XEXP (x, 0)) == REG
5690 	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5691 		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5692 		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5693 		       && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5694 	       && GET_MODE_SIZE (mode) >= 4
5695 	       && GET_CODE (XEXP (x, 1)) == CONST_INT
5696 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
5697 	return 1;
5698     }
5699 
5700   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5701 	   && GET_MODE_SIZE (mode) == 4
5702 	   && GET_CODE (x) == SYMBOL_REF
5703 	   && CONSTANT_POOL_ADDRESS_P (x)
5704 	   && ! (flag_pic
5705 		 && symbol_mentioned_p (get_pool_constant (x))
5706 		 && ! pcrel_constant_p (get_pool_constant (x))))
5707     return 1;
5708 
5709   return 0;
5710 }
5711 
5712 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5713    instruction of mode MODE.  */
5714 int
5715 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5716 {
5717   switch (GET_MODE_SIZE (mode))
5718     {
5719     case 1:
5720       return val >= 0 && val < 32;
5721 
5722     case 2:
5723       return val >= 0 && val < 64 && (val & 1) == 0;
5724 
5725     default:
5726       return (val >= 0
5727 	      && (val + GET_MODE_SIZE (mode)) <= 128
5728 	      && (val & 3) == 0);
5729     }
5730 }
5731 
5732 bool
5733 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5734 {
5735   if (TARGET_ARM)
5736     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5737   else if (TARGET_THUMB2)
5738     return thumb2_legitimate_address_p (mode, x, strict_p);
5739   else /* if (TARGET_THUMB1) */
5740     return thumb1_legitimate_address_p (mode, x, strict_p);
5741 }
5742 
5743 /* Build the SYMBOL_REF for __tls_get_addr.  */
5744 
5745 static GTY(()) rtx tls_get_addr_libfunc;
5746 
5747 static rtx
5748 get_tls_get_addr (void)
5749 {
5750   if (!tls_get_addr_libfunc)
5751     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5752   return tls_get_addr_libfunc;
5753 }
5754 
5755 static rtx
5756 arm_load_tp (rtx target)
5757 {
5758   if (!target)
5759     target = gen_reg_rtx (SImode);
5760 
5761   if (TARGET_HARD_TP)
5762     {
5763       /* Can return in any reg.  */
5764       emit_insn (gen_load_tp_hard (target));
5765     }
5766   else
5767     {
5768       /* Always returned in r0.  Immediately copy the result into a pseudo,
5769 	 otherwise other uses of r0 (e.g. setting up function arguments) may
5770 	 clobber the value.  */
5771 
5772       rtx tmp;
5773 
5774       emit_insn (gen_load_tp_soft ());
5775 
5776       tmp = gen_rtx_REG (SImode, 0);
5777       emit_move_insn (target, tmp);
5778     }
5779   return target;
5780 }
5781 
5782 static rtx
5783 load_tls_operand (rtx x, rtx reg)
5784 {
5785   rtx tmp;
5786 
5787   if (reg == NULL_RTX)
5788     reg = gen_reg_rtx (SImode);
5789 
5790   tmp = gen_rtx_CONST (SImode, x);
5791 
5792   emit_move_insn (reg, tmp);
5793 
5794   return reg;
5795 }
5796 
5797 static rtx
5798 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5799 {
5800   rtx insns, label, labelno, sum;
5801 
5802   start_sequence ();
5803 
5804   labelno = GEN_INT (pic_labelno++);
5805   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5806   label = gen_rtx_CONST (VOIDmode, label);
5807 
5808   sum = gen_rtx_UNSPEC (Pmode,
5809 			gen_rtvec (4, x, GEN_INT (reloc), label,
5810 				   GEN_INT (TARGET_ARM ? 8 : 4)),
5811 			UNSPEC_TLS);
5812   reg = load_tls_operand (sum, reg);
5813 
5814   if (TARGET_ARM)
5815     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5816   else if (TARGET_THUMB2)
5817     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5818   else /* TARGET_THUMB1 */
5819     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5820 
5821   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST?  */
5822 				     Pmode, 1, reg, Pmode);
5823 
5824   insns = get_insns ();
5825   end_sequence ();
5826 
5827   return insns;
5828 }
5829 
5830 rtx
5831 legitimize_tls_address (rtx x, rtx reg)
5832 {
5833   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5834   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5835 
5836   switch (model)
5837     {
5838     case TLS_MODEL_GLOBAL_DYNAMIC:
5839       insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5840       dest = gen_reg_rtx (Pmode);
5841       emit_libcall_block (insns, dest, ret, x);
5842       return dest;
5843 
5844     case TLS_MODEL_LOCAL_DYNAMIC:
5845       insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5846 
5847       /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5848 	 share the LDM result with other LD model accesses.  */
5849       eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5850 			    UNSPEC_TLS);
5851       dest = gen_reg_rtx (Pmode);
5852       emit_libcall_block (insns, dest, ret, eqv);
5853 
5854       /* Load the addend.  */
5855       addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5856 			       UNSPEC_TLS);
5857       addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5858       return gen_rtx_PLUS (Pmode, dest, addend);
5859 
5860     case TLS_MODEL_INITIAL_EXEC:
5861       labelno = GEN_INT (pic_labelno++);
5862       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5863       label = gen_rtx_CONST (VOIDmode, label);
5864       sum = gen_rtx_UNSPEC (Pmode,
5865 			    gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5866 				       GEN_INT (TARGET_ARM ? 8 : 4)),
5867 			    UNSPEC_TLS);
5868       reg = load_tls_operand (sum, reg);
5869 
5870       if (TARGET_ARM)
5871 	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5872       else if (TARGET_THUMB2)
5873 	emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5874       else
5875 	{
5876 	  emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5877 	  emit_move_insn (reg, gen_const_mem (SImode, reg));
5878 	}
5879 
5880       tp = arm_load_tp (NULL_RTX);
5881 
5882       return gen_rtx_PLUS (Pmode, tp, reg);
5883 
5884     case TLS_MODEL_LOCAL_EXEC:
5885       tp = arm_load_tp (NULL_RTX);
5886 
5887       reg = gen_rtx_UNSPEC (Pmode,
5888 			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5889 			    UNSPEC_TLS);
5890       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5891 
5892       return gen_rtx_PLUS (Pmode, tp, reg);
5893 
5894     default:
5895       abort ();
5896     }
5897 }
5898 
5899 /* Try machine-dependent ways of modifying an illegitimate address
5900    to be legitimate.  If we find one, return the new, valid address.  */
5901 rtx
5902 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5903 {
5904   if (!TARGET_ARM)
5905     {
5906       /* TODO: legitimize_address for Thumb2.  */
5907       if (TARGET_THUMB2)
5908         return x;
5909       return thumb_legitimize_address (x, orig_x, mode);
5910     }
5911 
5912   if (arm_tls_symbol_p (x))
5913     return legitimize_tls_address (x, NULL_RTX);
5914 
5915   if (GET_CODE (x) == PLUS)
5916     {
5917       rtx xop0 = XEXP (x, 0);
5918       rtx xop1 = XEXP (x, 1);
5919 
5920       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5921 	xop0 = force_reg (SImode, xop0);
5922 
5923       if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5924 	xop1 = force_reg (SImode, xop1);
5925 
5926       if (ARM_BASE_REGISTER_RTX_P (xop0)
5927 	  && GET_CODE (xop1) == CONST_INT)
5928 	{
5929 	  HOST_WIDE_INT n, low_n;
5930 	  rtx base_reg, val;
5931 	  n = INTVAL (xop1);
5932 
5933 	  /* VFP addressing modes actually allow greater offsets, but for
5934 	     now we just stick with the lowest common denominator.  */
5935 	  if (mode == DImode
5936 	      || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5937 	    {
5938 	      low_n = n & 0x0f;
5939 	      n &= ~0x0f;
5940 	      if (low_n > 4)
5941 		{
5942 		  n += 16;
5943 		  low_n -= 16;
5944 		}
5945 	    }
5946 	  else
5947 	    {
5948 	      low_n = ((mode) == TImode ? 0
5949 		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5950 	      n -= low_n;
5951 	    }
5952 
5953 	  base_reg = gen_reg_rtx (SImode);
5954 	  val = force_operand (plus_constant (xop0, n), NULL_RTX);
5955 	  emit_move_insn (base_reg, val);
5956 	  x = plus_constant (base_reg, low_n);
5957 	}
5958       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5959 	x = gen_rtx_PLUS (SImode, xop0, xop1);
5960     }
5961 
5962   /* XXX We don't allow MINUS any more -- see comment in
5963      arm_legitimate_address_outer_p ().  */
5964   else if (GET_CODE (x) == MINUS)
5965     {
5966       rtx xop0 = XEXP (x, 0);
5967       rtx xop1 = XEXP (x, 1);
5968 
5969       if (CONSTANT_P (xop0))
5970 	xop0 = force_reg (SImode, xop0);
5971 
5972       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5973 	xop1 = force_reg (SImode, xop1);
5974 
5975       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5976 	x = gen_rtx_MINUS (SImode, xop0, xop1);
5977     }
5978 
5979   /* Make sure to take full advantage of the pre-indexed addressing mode
5980      with absolute addresses which often allows for the base register to
5981      be factorized for multiple adjacent memory references, and it might
5982      even allows for the mini pool to be avoided entirely. */
5983   else if (GET_CODE (x) == CONST_INT && optimize > 0)
5984     {
5985       unsigned int bits;
5986       HOST_WIDE_INT mask, base, index;
5987       rtx base_reg;
5988 
5989       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5990          use a 8-bit index. So let's use a 12-bit index for SImode only and
5991          hope that arm_gen_constant will enable ldrb to use more bits. */
5992       bits = (mode == SImode) ? 12 : 8;
5993       mask = (1 << bits) - 1;
5994       base = INTVAL (x) & ~mask;
5995       index = INTVAL (x) & mask;
5996       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
5997         {
5998 	  /* It'll most probably be more efficient to generate the base
5999 	     with more bits set and use a negative index instead. */
6000 	  base |= mask;
6001 	  index -= mask;
6002 	}
6003       base_reg = force_reg (SImode, GEN_INT (base));
6004       x = plus_constant (base_reg, index);
6005     }
6006 
6007   if (flag_pic)
6008     {
6009       /* We need to find and carefully transform any SYMBOL and LABEL
6010 	 references; so go back to the original address expression.  */
6011       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6012 
6013       if (new_x != orig_x)
6014 	x = new_x;
6015     }
6016 
6017   return x;
6018 }
6019 
6020 
6021 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6022    to be legitimate.  If we find one, return the new, valid address.  */
6023 rtx
6024 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6025 {
6026   if (arm_tls_symbol_p (x))
6027     return legitimize_tls_address (x, NULL_RTX);
6028 
6029   if (GET_CODE (x) == PLUS
6030       && GET_CODE (XEXP (x, 1)) == CONST_INT
6031       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6032 	  || INTVAL (XEXP (x, 1)) < 0))
6033     {
6034       rtx xop0 = XEXP (x, 0);
6035       rtx xop1 = XEXP (x, 1);
6036       HOST_WIDE_INT offset = INTVAL (xop1);
6037 
6038       /* Try and fold the offset into a biasing of the base register and
6039 	 then offsetting that.  Don't do this when optimizing for space
6040 	 since it can cause too many CSEs.  */
6041       if (optimize_size && offset >= 0
6042 	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
6043 	{
6044 	  HOST_WIDE_INT delta;
6045 
6046 	  if (offset >= 256)
6047 	    delta = offset - (256 - GET_MODE_SIZE (mode));
6048 	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6049 	    delta = 31 * GET_MODE_SIZE (mode);
6050 	  else
6051 	    delta = offset & (~31 * GET_MODE_SIZE (mode));
6052 
6053 	  xop0 = force_operand (plus_constant (xop0, offset - delta),
6054 				NULL_RTX);
6055 	  x = plus_constant (xop0, delta);
6056 	}
6057       else if (offset < 0 && offset > -256)
6058 	/* Small negative offsets are best done with a subtract before the
6059 	   dereference, forcing these into a register normally takes two
6060 	   instructions.  */
6061 	x = force_operand (x, NULL_RTX);
6062       else
6063 	{
6064 	  /* For the remaining cases, force the constant into a register.  */
6065 	  xop1 = force_reg (SImode, xop1);
6066 	  x = gen_rtx_PLUS (SImode, xop0, xop1);
6067 	}
6068     }
6069   else if (GET_CODE (x) == PLUS
6070 	   && s_register_operand (XEXP (x, 1), SImode)
6071 	   && !s_register_operand (XEXP (x, 0), SImode))
6072     {
6073       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6074 
6075       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6076     }
6077 
6078   if (flag_pic)
6079     {
6080       /* We need to find and carefully transform any SYMBOL and LABEL
6081 	 references; so go back to the original address expression.  */
6082       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6083 
6084       if (new_x != orig_x)
6085 	x = new_x;
6086     }
6087 
6088   return x;
6089 }
6090 
6091 rtx
6092 thumb_legitimize_reload_address (rtx *x_p,
6093 				 enum machine_mode mode,
6094 				 int opnum, int type,
6095 				 int ind_levels ATTRIBUTE_UNUSED)
6096 {
6097   rtx x = *x_p;
6098 
6099   if (GET_CODE (x) == PLUS
6100       && GET_MODE_SIZE (mode) < 4
6101       && REG_P (XEXP (x, 0))
6102       && XEXP (x, 0) == stack_pointer_rtx
6103       && GET_CODE (XEXP (x, 1)) == CONST_INT
6104       && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6105     {
6106       rtx orig_x = x;
6107 
6108       x = copy_rtx (x);
6109       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6110 		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6111       return x;
6112     }
6113 
6114   /* If both registers are hi-regs, then it's better to reload the
6115      entire expression rather than each register individually.  That
6116      only requires one reload register rather than two.  */
6117   if (GET_CODE (x) == PLUS
6118       && REG_P (XEXP (x, 0))
6119       && REG_P (XEXP (x, 1))
6120       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6121       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6122     {
6123       rtx orig_x = x;
6124 
6125       x = copy_rtx (x);
6126       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6127 		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6128       return x;
6129     }
6130 
6131   return NULL;
6132 }
6133 
6134 /* Test for various thread-local symbols.  */
6135 
6136 /* Return TRUE if X is a thread-local symbol.  */
6137 
6138 static bool
6139 arm_tls_symbol_p (rtx x)
6140 {
6141   if (! TARGET_HAVE_TLS)
6142     return false;
6143 
6144   if (GET_CODE (x) != SYMBOL_REF)
6145     return false;
6146 
6147   return SYMBOL_REF_TLS_MODEL (x) != 0;
6148 }
6149 
6150 /* Helper for arm_tls_referenced_p.  */
6151 
6152 static int
6153 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6154 {
6155   if (GET_CODE (*x) == SYMBOL_REF)
6156     return SYMBOL_REF_TLS_MODEL (*x) != 0;
6157 
6158   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6159      TLS offsets, not real symbol references.  */
6160   if (GET_CODE (*x) == UNSPEC
6161       && XINT (*x, 1) == UNSPEC_TLS)
6162     return -1;
6163 
6164   return 0;
6165 }
6166 
6167 /* Return TRUE if X contains any TLS symbol references.  */
6168 
6169 bool
6170 arm_tls_referenced_p (rtx x)
6171 {
6172   if (! TARGET_HAVE_TLS)
6173     return false;
6174 
6175   return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6176 }
6177 
6178 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
6179 
6180 bool
6181 arm_cannot_force_const_mem (rtx x)
6182 {
6183   rtx base, offset;
6184 
6185   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6186     {
6187       split_const (x, &base, &offset);
6188       if (GET_CODE (base) == SYMBOL_REF
6189 	  && !offset_within_block_p (base, INTVAL (offset)))
6190 	return true;
6191     }
6192   return arm_tls_referenced_p (x);
6193 }
6194 
6195 #define REG_OR_SUBREG_REG(X)						\
6196   (GET_CODE (X) == REG							\
6197    || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6198 
6199 #define REG_OR_SUBREG_RTX(X)			\
6200    (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6201 
6202 #ifndef COSTS_N_INSNS
6203 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6204 #endif
6205 static inline int
6206 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6207 {
6208   enum machine_mode mode = GET_MODE (x);
6209 
6210   switch (code)
6211     {
6212     case ASHIFT:
6213     case ASHIFTRT:
6214     case LSHIFTRT:
6215     case ROTATERT:
6216     case PLUS:
6217     case MINUS:
6218     case COMPARE:
6219     case NEG:
6220     case NOT:
6221       return COSTS_N_INSNS (1);
6222 
6223     case MULT:
6224       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6225 	{
6226 	  int cycles = 0;
6227 	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6228 
6229 	  while (i)
6230 	    {
6231 	      i >>= 2;
6232 	      cycles++;
6233 	    }
6234 	  return COSTS_N_INSNS (2) + cycles;
6235 	}
6236       return COSTS_N_INSNS (1) + 16;
6237 
6238     case SET:
6239       return (COSTS_N_INSNS (1)
6240 	      + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6241 		     + GET_CODE (SET_DEST (x)) == MEM));
6242 
6243     case CONST_INT:
6244       if (outer == SET)
6245 	{
6246 	  if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6247 	    return 0;
6248 	  if (thumb_shiftable_const (INTVAL (x)))
6249 	    return COSTS_N_INSNS (2);
6250 	  return COSTS_N_INSNS (3);
6251 	}
6252       else if ((outer == PLUS || outer == COMPARE)
6253 	       && INTVAL (x) < 256 && INTVAL (x) > -256)
6254 	return 0;
6255       else if ((outer == IOR || outer == XOR || outer == AND)
6256 	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
6257 	return COSTS_N_INSNS (1);
6258       else if (outer == AND)
6259 	{
6260 	  int i;
6261 	  /* This duplicates the tests in the andsi3 expander.  */
6262 	  for (i = 9; i <= 31; i++)
6263 	    if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6264 		|| (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6265 	      return COSTS_N_INSNS (2);
6266 	}
6267       else if (outer == ASHIFT || outer == ASHIFTRT
6268 	       || outer == LSHIFTRT)
6269 	return 0;
6270       return COSTS_N_INSNS (2);
6271 
6272     case CONST:
6273     case CONST_DOUBLE:
6274     case LABEL_REF:
6275     case SYMBOL_REF:
6276       return COSTS_N_INSNS (3);
6277 
6278     case UDIV:
6279     case UMOD:
6280     case DIV:
6281     case MOD:
6282       return 100;
6283 
6284     case TRUNCATE:
6285       return 99;
6286 
6287     case AND:
6288     case XOR:
6289     case IOR:
6290       /* XXX guess.  */
6291       return 8;
6292 
6293     case MEM:
6294       /* XXX another guess.  */
6295       /* Memory costs quite a lot for the first word, but subsequent words
6296 	 load at the equivalent of a single insn each.  */
6297       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6298 	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6299 		 ? 4 : 0));
6300 
6301     case IF_THEN_ELSE:
6302       /* XXX a guess.  */
6303       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6304 	return 14;
6305       return 2;
6306 
6307     case ZERO_EXTEND:
6308       /* XXX still guessing.  */
6309       switch (GET_MODE (XEXP (x, 0)))
6310 	{
6311 	case QImode:
6312 	  return (1 + (mode == DImode ? 4 : 0)
6313 		  + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6314 
6315 	case HImode:
6316 	  return (4 + (mode == DImode ? 4 : 0)
6317 		  + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6318 
6319 	case SImode:
6320 	  return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6321 
6322 	default:
6323 	  return 99;
6324 	}
6325 
6326     default:
6327       return 99;
6328     }
6329 }
6330 
6331 static inline bool
6332 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6333 {
6334   enum machine_mode mode = GET_MODE (x);
6335   enum rtx_code subcode;
6336   rtx operand;
6337   enum rtx_code code = GET_CODE (x);
6338   int extra_cost;
6339   *total = 0;
6340 
6341   switch (code)
6342     {
6343     case MEM:
6344       /* Memory costs quite a lot for the first word, but subsequent words
6345 	 load at the equivalent of a single insn each.  */
6346       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6347       return true;
6348 
6349     case DIV:
6350     case MOD:
6351     case UDIV:
6352     case UMOD:
6353       if (TARGET_HARD_FLOAT && mode == SFmode)
6354 	*total = COSTS_N_INSNS (2);
6355       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6356 	*total = COSTS_N_INSNS (4);
6357       else
6358 	*total = COSTS_N_INSNS (20);
6359       return false;
6360 
6361     case ROTATE:
6362       if (GET_CODE (XEXP (x, 1)) == REG)
6363 	*total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6364       else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6365 	*total = rtx_cost (XEXP (x, 1), code, speed);
6366 
6367       /* Fall through */
6368     case ROTATERT:
6369       if (mode != SImode)
6370 	{
6371 	  *total += COSTS_N_INSNS (4);
6372 	  return true;
6373 	}
6374 
6375       /* Fall through */
6376     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6377       *total += rtx_cost (XEXP (x, 0), code, speed);
6378       if (mode == DImode)
6379 	{
6380 	  *total += COSTS_N_INSNS (3);
6381 	  return true;
6382 	}
6383 
6384       *total += COSTS_N_INSNS (1);
6385       /* Increase the cost of complex shifts because they aren't any faster,
6386          and reduce dual issue opportunities.  */
6387       if (arm_tune_cortex_a9
6388 	  && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6389 	++*total;
6390 
6391       return true;
6392 
6393     case MINUS:
6394       if (TARGET_THUMB2)
6395 	{
6396 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6397 	    {
6398 	      if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6399 		*total = COSTS_N_INSNS (1);
6400 	      else
6401 		*total = COSTS_N_INSNS (20);
6402 	    }
6403 	  else
6404 	    *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6405 	  /* Thumb2 does not have RSB, so all arguments must be
6406 	     registers (subtracting a constant is canonicalized as
6407 	     addition of the negated constant).  */
6408 	  return false;
6409 	}
6410 
6411       if (mode == DImode)
6412 	{
6413 	  *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6414 	  if (GET_CODE (XEXP (x, 0)) == CONST_INT
6415 	      && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6416 	    {
6417 	      *total += rtx_cost (XEXP (x, 1), code, speed);
6418 	      return true;
6419 	    }
6420 
6421 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT
6422 	      && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6423 	    {
6424 	      *total += rtx_cost (XEXP (x, 0), code, speed);
6425 	      return true;
6426 	    }
6427 
6428 	  return false;
6429 	}
6430 
6431       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6432 	{
6433 	  if (TARGET_HARD_FLOAT
6434 	      && (mode == SFmode
6435 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
6436 	    {
6437 	      *total = COSTS_N_INSNS (1);
6438 	      if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6439 		  && arm_const_double_rtx (XEXP (x, 0)))
6440 		{
6441 		  *total += rtx_cost (XEXP (x, 1), code, speed);
6442 		  return true;
6443 		}
6444 
6445 	      if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6446 		  && arm_const_double_rtx (XEXP (x, 1)))
6447 		{
6448 		  *total += rtx_cost (XEXP (x, 0), code, speed);
6449 		  return true;
6450 		}
6451 
6452 	      return false;
6453 	    }
6454 	  *total = COSTS_N_INSNS (20);
6455 	  return false;
6456 	}
6457 
6458       *total = COSTS_N_INSNS (1);
6459       if (GET_CODE (XEXP (x, 0)) == CONST_INT
6460 	  && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6461 	{
6462 	  *total += rtx_cost (XEXP (x, 1), code, speed);
6463 	  return true;
6464 	}
6465 
6466       subcode = GET_CODE (XEXP (x, 1));
6467       if (subcode == ASHIFT || subcode == ASHIFTRT
6468 	  || subcode == LSHIFTRT
6469 	  || subcode == ROTATE || subcode == ROTATERT)
6470 	{
6471 	  *total += rtx_cost (XEXP (x, 0), code, speed);
6472 	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6473 	  return true;
6474 	}
6475 
6476       /* A shift as a part of RSB costs no more than RSB itself.  */
6477       if (GET_CODE (XEXP (x, 0)) == MULT
6478 	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6479 	{
6480 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6481 	  *total += rtx_cost (XEXP (x, 1), code, speed);
6482 	  return true;
6483 	}
6484 
6485       if (subcode == MULT
6486 	  && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6487 	{
6488 	  *total += rtx_cost (XEXP (x, 0), code, speed);
6489 	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6490 	  return true;
6491 	}
6492 
6493       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6494 	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6495 	{
6496 	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6497 	  if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6498 	      && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6499 	    *total += COSTS_N_INSNS (1);
6500 
6501 	  return true;
6502 	}
6503 
6504       /* Fall through */
6505 
6506     case PLUS:
6507       if (code == PLUS && arm_arch6 && mode == SImode
6508 	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6509 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6510 	{
6511 	  *total = COSTS_N_INSNS (1);
6512 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6513 			      speed);
6514 	  *total += rtx_cost (XEXP (x, 1), code, speed);
6515 	  return true;
6516 	}
6517 
6518       /* MLA: All arguments must be registers.  We filter out
6519 	 multiplication by a power of two, so that we fall down into
6520 	 the code below.  */
6521       if (GET_CODE (XEXP (x, 0)) == MULT
6522 	  && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6523 	{
6524 	  /* The cost comes from the cost of the multiply.  */
6525 	  return false;
6526 	}
6527 
6528       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6529 	{
6530 	  if (TARGET_HARD_FLOAT
6531 	      && (mode == SFmode
6532 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
6533 	    {
6534 	      *total = COSTS_N_INSNS (1);
6535 	      if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6536 		  && arm_const_double_rtx (XEXP (x, 1)))
6537 		{
6538 		  *total += rtx_cost (XEXP (x, 0), code, speed);
6539 		  return true;
6540 		}
6541 
6542 	      return false;
6543 	    }
6544 
6545 	  *total = COSTS_N_INSNS (20);
6546 	  return false;
6547 	}
6548 
6549       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6550 	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6551 	{
6552 	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6553 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6554 	      && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6555 	    *total += COSTS_N_INSNS (1);
6556 	  return true;
6557 	}
6558 
6559       /* Fall through */
6560 
6561     case AND: case XOR: case IOR:
6562       extra_cost = 0;
6563 
6564       /* Normally the frame registers will be spilt into reg+const during
6565 	 reload, so it is a bad idea to combine them with other instructions,
6566 	 since then they might not be moved outside of loops.  As a compromise
6567 	 we allow integration with ops that have a constant as their second
6568 	 operand.  */
6569       if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6570 	   && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6571 	   && GET_CODE (XEXP (x, 1)) != CONST_INT)
6572 	  || (REG_OR_SUBREG_REG (XEXP (x, 0))
6573 	      && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6574 	*total = 4;
6575 
6576       if (mode == DImode)
6577 	{
6578 	  *total += COSTS_N_INSNS (2);
6579 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT
6580 	      && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6581 	    {
6582 	      *total += rtx_cost (XEXP (x, 0), code, speed);
6583 	      return true;
6584 	    }
6585 
6586 	  return false;
6587 	}
6588 
6589       *total += COSTS_N_INSNS (1);
6590       if (GET_CODE (XEXP (x, 1)) == CONST_INT
6591 	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6592 	{
6593 	  *total += rtx_cost (XEXP (x, 0), code, speed);
6594 	  return true;
6595 	}
6596       subcode = GET_CODE (XEXP (x, 0));
6597       if (subcode == ASHIFT || subcode == ASHIFTRT
6598 	  || subcode == LSHIFTRT
6599 	  || subcode == ROTATE || subcode == ROTATERT)
6600 	{
6601 	  *total += rtx_cost (XEXP (x, 1), code, speed);
6602 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6603 	  return true;
6604 	}
6605 
6606       if (subcode == MULT
6607 	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6608 	{
6609 	  *total += rtx_cost (XEXP (x, 1), code, speed);
6610 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6611 	  return true;
6612 	}
6613 
6614       if (subcode == UMIN || subcode == UMAX
6615 	  || subcode == SMIN || subcode == SMAX)
6616 	{
6617 	  *total = COSTS_N_INSNS (3);
6618 	  return true;
6619 	}
6620 
6621       return false;
6622 
6623     case MULT:
6624       /* This should have been handled by the CPU specific routines.  */
6625       gcc_unreachable ();
6626 
6627     case TRUNCATE:
6628       if (arm_arch3m && mode == SImode
6629 	  && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6630 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6631 	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6632 	      == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6633 	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6634 	      || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6635 	{
6636 	  *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6637 	  return true;
6638 	}
6639       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6640       return false;
6641 
6642     case NEG:
6643       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6644 	{
6645 	  if (TARGET_HARD_FLOAT
6646 	      && (mode == SFmode
6647 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
6648 	    {
6649 	      *total = COSTS_N_INSNS (1);
6650 	      return false;
6651 	    }
6652 	  *total = COSTS_N_INSNS (2);
6653 	  return false;
6654 	}
6655 
6656       /* Fall through */
6657     case NOT:
6658       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6659       if (mode == SImode && code == NOT)
6660 	{
6661 	  subcode = GET_CODE (XEXP (x, 0));
6662 	  if (subcode == ASHIFT || subcode == ASHIFTRT
6663 	      || subcode == LSHIFTRT
6664 	      || subcode == ROTATE || subcode == ROTATERT
6665 	      || (subcode == MULT
6666 		  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6667 	    {
6668 	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6669 	      /* Register shifts cost an extra cycle.  */
6670 	      if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6671 		*total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6672 							subcode, speed);
6673 	      return true;
6674 	    }
6675 	}
6676 
6677       return false;
6678 
6679     case IF_THEN_ELSE:
6680       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6681 	{
6682 	  *total = COSTS_N_INSNS (4);
6683 	  return true;
6684 	}
6685 
6686       operand = XEXP (x, 0);
6687 
6688       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6689 	     || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6690 	    && GET_CODE (XEXP (operand, 0)) == REG
6691 	    && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6692 	*total += COSTS_N_INSNS (1);
6693       *total += (rtx_cost (XEXP (x, 1), code, speed)
6694 		 + rtx_cost (XEXP (x, 2), code, speed));
6695       return true;
6696 
6697     case NE:
6698       if (mode == SImode && XEXP (x, 1) == const0_rtx)
6699 	{
6700 	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6701 	  return true;
6702 	}
6703       goto scc_insn;
6704 
6705     case GE:
6706       if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6707 	  && mode == SImode && XEXP (x, 1) == const0_rtx)
6708 	{
6709 	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6710 	  return true;
6711 	}
6712       goto scc_insn;
6713 
6714     case LT:
6715       if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6716 	  && mode == SImode && XEXP (x, 1) == const0_rtx)
6717 	{
6718 	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6719 	  return true;
6720 	}
6721       goto scc_insn;
6722 
6723     case EQ:
6724     case GT:
6725     case LE:
6726     case GEU:
6727     case LTU:
6728     case GTU:
6729     case LEU:
6730     case UNORDERED:
6731     case ORDERED:
6732     case UNEQ:
6733     case UNGE:
6734     case UNLT:
6735     case UNGT:
6736     case UNLE:
6737     scc_insn:
6738       /* SCC insns.  In the case where the comparison has already been
6739 	 performed, then they cost 2 instructions.  Otherwise they need
6740 	 an additional comparison before them.  */
6741       *total = COSTS_N_INSNS (2);
6742       if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6743 	{
6744 	  return true;
6745 	}
6746 
6747       /* Fall through */
6748     case COMPARE:
6749       if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6750 	{
6751 	  *total = 0;
6752 	  return true;
6753 	}
6754 
6755       *total += COSTS_N_INSNS (1);
6756       if (GET_CODE (XEXP (x, 1)) == CONST_INT
6757 	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6758 	{
6759 	  *total += rtx_cost (XEXP (x, 0), code, speed);
6760 	  return true;
6761 	}
6762 
6763       subcode = GET_CODE (XEXP (x, 0));
6764       if (subcode == ASHIFT || subcode == ASHIFTRT
6765 	  || subcode == LSHIFTRT
6766 	  || subcode == ROTATE || subcode == ROTATERT)
6767 	{
6768 	  *total += rtx_cost (XEXP (x, 1), code, speed);
6769 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6770 	  return true;
6771 	}
6772 
6773       if (subcode == MULT
6774 	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6775 	{
6776 	  *total += rtx_cost (XEXP (x, 1), code, speed);
6777 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6778 	  return true;
6779 	}
6780 
6781       return false;
6782 
6783     case UMIN:
6784     case UMAX:
6785     case SMIN:
6786     case SMAX:
6787       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6788       if (GET_CODE (XEXP (x, 1)) != CONST_INT
6789 	  || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6790 	*total += rtx_cost (XEXP (x, 1), code, speed);
6791       return true;
6792 
6793     case ABS:
6794       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6795 	{
6796 	  if (TARGET_HARD_FLOAT
6797 	      && (mode == SFmode
6798 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
6799 	    {
6800 	      *total = COSTS_N_INSNS (1);
6801 	      return false;
6802 	    }
6803 	  *total = COSTS_N_INSNS (20);
6804 	  return false;
6805 	}
6806       *total = COSTS_N_INSNS (1);
6807       if (mode == DImode)
6808 	*total += COSTS_N_INSNS (3);
6809       return false;
6810 
6811     case SIGN_EXTEND:
6812       if (GET_MODE_CLASS (mode) == MODE_INT)
6813 	{
6814 	  *total = 0;
6815 	  if (mode == DImode)
6816 	    *total += COSTS_N_INSNS (1);
6817 
6818 	  if (GET_MODE (XEXP (x, 0)) != SImode)
6819 	    {
6820 	      if (arm_arch6)
6821 		{
6822 		  if (GET_CODE (XEXP (x, 0)) != MEM)
6823 		    *total += COSTS_N_INSNS (1);
6824 		}
6825 	      else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6826 		*total += COSTS_N_INSNS (2);
6827 	    }
6828 
6829 	  return false;
6830 	}
6831 
6832       /* Fall through */
6833     case ZERO_EXTEND:
6834       *total = 0;
6835       if (GET_MODE_CLASS (mode) == MODE_INT)
6836 	{
6837 	  if (mode == DImode)
6838 	    *total += COSTS_N_INSNS (1);
6839 
6840 	  if (GET_MODE (XEXP (x, 0)) != SImode)
6841 	    {
6842 	      if (arm_arch6)
6843 		{
6844 		  if (GET_CODE (XEXP (x, 0)) != MEM)
6845 		    *total += COSTS_N_INSNS (1);
6846 		}
6847 	      else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6848 		*total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6849 					 1 : 2);
6850 	    }
6851 
6852 	  return false;
6853 	}
6854 
6855       switch (GET_MODE (XEXP (x, 0)))
6856 	{
6857 	case V8QImode:
6858 	case V4HImode:
6859 	case V2SImode:
6860 	case V4QImode:
6861 	case V2HImode:
6862 	  *total = COSTS_N_INSNS (1);
6863 	  return false;
6864 
6865 	default:
6866 	  gcc_unreachable ();
6867 	}
6868       gcc_unreachable ();
6869 
6870     case ZERO_EXTRACT:
6871     case SIGN_EXTRACT:
6872       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6873       return true;
6874 
6875     case CONST_INT:
6876       if (const_ok_for_arm (INTVAL (x))
6877 	  || const_ok_for_arm (~INTVAL (x)))
6878 	*total = COSTS_N_INSNS (1);
6879       else
6880 	*total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6881 						  INTVAL (x), NULL_RTX,
6882 						  NULL_RTX, 0, 0));
6883       return true;
6884 
6885     case CONST:
6886     case LABEL_REF:
6887     case SYMBOL_REF:
6888       *total = COSTS_N_INSNS (3);
6889       return true;
6890 
6891     case HIGH:
6892       *total = COSTS_N_INSNS (1);
6893       return true;
6894 
6895     case LO_SUM:
6896       *total = COSTS_N_INSNS (1);
6897       *total += rtx_cost (XEXP (x, 0), code, speed);
6898       return true;
6899 
6900     case CONST_DOUBLE:
6901       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6902 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
6903 	*total = COSTS_N_INSNS (1);
6904       else
6905 	*total = COSTS_N_INSNS (4);
6906       return true;
6907 
6908     default:
6909       *total = COSTS_N_INSNS (4);
6910       return false;
6911     }
6912 }
6913 
6914 /* RTX costs when optimizing for size.  */
6915 static bool
6916 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6917 		    int *total)
6918 {
6919   enum machine_mode mode = GET_MODE (x);
6920   if (TARGET_THUMB1)
6921     {
6922       /* XXX TBD.  For now, use the standard costs.  */
6923       *total = thumb1_rtx_costs (x, code, outer_code);
6924       return true;
6925     }
6926 
6927   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
6928   switch (code)
6929     {
6930     case MEM:
6931       /* A memory access costs 1 insn if the mode is small, or the address is
6932 	 a single register, otherwise it costs one insn per word.  */
6933       if (REG_P (XEXP (x, 0)))
6934 	*total = COSTS_N_INSNS (1);
6935       else
6936 	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6937       return true;
6938 
6939     case DIV:
6940     case MOD:
6941     case UDIV:
6942     case UMOD:
6943       /* Needs a libcall, so it costs about this.  */
6944       *total = COSTS_N_INSNS (2);
6945       return false;
6946 
6947     case ROTATE:
6948       if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
6949 	{
6950 	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
6951 	  return true;
6952 	}
6953       /* Fall through */
6954     case ROTATERT:
6955     case ASHIFT:
6956     case LSHIFTRT:
6957     case ASHIFTRT:
6958       if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
6959 	{
6960 	  *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
6961 	  return true;
6962 	}
6963       else if (mode == SImode)
6964 	{
6965 	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
6966 	  /* Slightly disparage register shifts, but not by much.  */
6967 	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6968 	    *total += 1 + rtx_cost (XEXP (x, 1), code, false);
6969 	  return true;
6970 	}
6971 
6972       /* Needs a libcall.  */
6973       *total = COSTS_N_INSNS (2);
6974       return false;
6975 
6976     case MINUS:
6977       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
6978 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
6979 	{
6980 	  *total = COSTS_N_INSNS (1);
6981 	  return false;
6982 	}
6983 
6984       if (mode == SImode)
6985 	{
6986 	  enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
6987 	  enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
6988 
6989 	  if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
6990 	      || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
6991 	      || subcode1 == ROTATE || subcode1 == ROTATERT
6992 	      || subcode1 == ASHIFT || subcode1 == LSHIFTRT
6993 	      || subcode1 == ASHIFTRT)
6994 	    {
6995 	      /* It's just the cost of the two operands.  */
6996 	      *total = 0;
6997 	      return false;
6998 	    }
6999 
7000 	  *total = COSTS_N_INSNS (1);
7001 	  return false;
7002 	}
7003 
7004       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7005       return false;
7006 
7007     case PLUS:
7008       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7009 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
7010 	{
7011 	  *total = COSTS_N_INSNS (1);
7012 	  return false;
7013 	}
7014 
7015       /* A shift as a part of ADD costs nothing.  */
7016       if (GET_CODE (XEXP (x, 0)) == MULT
7017 	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7018 	{
7019 	  *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7020 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7021 	  *total += rtx_cost (XEXP (x, 1), code, false);
7022 	  return true;
7023 	}
7024 
7025       /* Fall through */
7026     case AND: case XOR: case IOR:
7027       if (mode == SImode)
7028 	{
7029 	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7030 
7031 	  if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7032 	      || subcode == LSHIFTRT || subcode == ASHIFTRT
7033 	      || (code == AND && subcode == NOT))
7034 	    {
7035 	      /* It's just the cost of the two operands.  */
7036 	      *total = 0;
7037 	      return false;
7038 	    }
7039 	}
7040 
7041       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7042       return false;
7043 
7044     case MULT:
7045       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7046       return false;
7047 
7048     case NEG:
7049       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7050 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
7051 	{
7052 	  *total = COSTS_N_INSNS (1);
7053 	  return false;
7054 	}
7055 
7056       /* Fall through */
7057     case NOT:
7058       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7059 
7060       return false;
7061 
7062     case IF_THEN_ELSE:
7063       *total = 0;
7064       return false;
7065 
7066     case COMPARE:
7067       if (cc_register (XEXP (x, 0), VOIDmode))
7068 	* total = 0;
7069       else
7070 	*total = COSTS_N_INSNS (1);
7071       return false;
7072 
7073     case ABS:
7074       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7075 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
7076 	*total = COSTS_N_INSNS (1);
7077       else
7078 	*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7079       return false;
7080 
7081     case SIGN_EXTEND:
7082       *total = 0;
7083       if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7084 	{
7085 	  if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7086 	    *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7087 	}
7088       if (mode == DImode)
7089 	*total += COSTS_N_INSNS (1);
7090       return false;
7091 
7092     case ZERO_EXTEND:
7093       *total = 0;
7094       if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7095 	{
7096 	  switch (GET_MODE (XEXP (x, 0)))
7097 	    {
7098 	    case QImode:
7099 	      *total += COSTS_N_INSNS (1);
7100 	      break;
7101 
7102 	    case HImode:
7103 	      *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7104 
7105 	    case SImode:
7106 	      break;
7107 
7108 	    default:
7109 	      *total += COSTS_N_INSNS (2);
7110 	    }
7111 	}
7112 
7113       if (mode == DImode)
7114 	*total += COSTS_N_INSNS (1);
7115 
7116       return false;
7117 
7118     case CONST_INT:
7119       if (const_ok_for_arm (INTVAL (x)))
7120 	/* A multiplication by a constant requires another instruction
7121 	   to load the constant to a register.  */
7122 	*total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7123 				? 1 : 0);
7124       else if (const_ok_for_arm (~INTVAL (x)))
7125 	*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7126       else if (const_ok_for_arm (-INTVAL (x)))
7127 	{
7128 	  if (outer_code == COMPARE || outer_code == PLUS
7129 	      || outer_code == MINUS)
7130 	    *total = 0;
7131 	  else
7132 	    *total = COSTS_N_INSNS (1);
7133 	}
7134       else
7135 	*total = COSTS_N_INSNS (2);
7136       return true;
7137 
7138     case CONST:
7139     case LABEL_REF:
7140     case SYMBOL_REF:
7141       *total = COSTS_N_INSNS (2);
7142       return true;
7143 
7144     case CONST_DOUBLE:
7145       *total = COSTS_N_INSNS (4);
7146       return true;
7147 
7148     case HIGH:
7149     case LO_SUM:
7150       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7151 	 cost of these slightly.  */
7152       *total = COSTS_N_INSNS (1) + 1;
7153       return true;
7154 
7155     default:
7156       if (mode != VOIDmode)
7157 	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7158       else
7159 	*total = COSTS_N_INSNS (4); /* How knows?  */
7160       return false;
7161     }
7162 }
7163 
7164 /* RTX costs when optimizing for size.  */
7165 static bool
7166 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7167 	       bool speed)
7168 {
7169   if (!speed)
7170     return arm_size_rtx_costs (x, (enum rtx_code) code,
7171 			       (enum rtx_code) outer_code, total);
7172   else
7173     return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
7174 					       (enum rtx_code) outer_code,
7175 					       total, speed);
7176 }
7177 
7178 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
7179    supported on any "slowmul" cores, so it can be ignored.  */
7180 
7181 static bool
7182 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7183 		       int *total, bool speed)
7184 {
7185   enum machine_mode mode = GET_MODE (x);
7186 
7187   if (TARGET_THUMB)
7188     {
7189       *total = thumb1_rtx_costs (x, code, outer_code);
7190       return true;
7191     }
7192 
7193   switch (code)
7194     {
7195     case MULT:
7196       if (GET_MODE_CLASS (mode) == MODE_FLOAT
7197 	  || mode == DImode)
7198 	{
7199 	  *total = COSTS_N_INSNS (20);
7200 	  return false;
7201 	}
7202 
7203       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7204 	{
7205 	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7206 				      & (unsigned HOST_WIDE_INT) 0xffffffff);
7207 	  int cost, const_ok = const_ok_for_arm (i);
7208 	  int j, booth_unit_size;
7209 
7210 	  /* Tune as appropriate.  */
7211 	  cost = const_ok ? 4 : 8;
7212 	  booth_unit_size = 2;
7213 	  for (j = 0; i && j < 32; j += booth_unit_size)
7214 	    {
7215 	      i >>= booth_unit_size;
7216 	      cost++;
7217 	    }
7218 
7219 	  *total = COSTS_N_INSNS (cost);
7220 	  *total += rtx_cost (XEXP (x, 0), code, speed);
7221 	  return true;
7222 	}
7223 
7224       *total = COSTS_N_INSNS (20);
7225       return false;
7226 
7227     default:
7228       return arm_rtx_costs_1 (x, outer_code, total, speed);;
7229     }
7230 }
7231 
7232 
7233 /* RTX cost for cores with a fast multiply unit (M variants).  */
7234 
7235 static bool
7236 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7237 		       int *total, bool speed)
7238 {
7239   enum machine_mode mode = GET_MODE (x);
7240 
7241   if (TARGET_THUMB1)
7242     {
7243       *total = thumb1_rtx_costs (x, code, outer_code);
7244       return true;
7245     }
7246 
7247   /* ??? should thumb2 use different costs?  */
7248   switch (code)
7249     {
7250     case MULT:
7251       /* There is no point basing this on the tuning, since it is always the
7252 	 fast variant if it exists at all.  */
7253       if (mode == DImode
7254 	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7255 	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7256 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7257 	{
7258 	  *total = COSTS_N_INSNS(2);
7259 	  return false;
7260 	}
7261 
7262 
7263       if (mode == DImode)
7264 	{
7265 	  *total = COSTS_N_INSNS (5);
7266 	  return false;
7267 	}
7268 
7269       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7270 	{
7271 	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7272 				      & (unsigned HOST_WIDE_INT) 0xffffffff);
7273 	  int cost, const_ok = const_ok_for_arm (i);
7274 	  int j, booth_unit_size;
7275 
7276 	  /* Tune as appropriate.  */
7277 	  cost = const_ok ? 4 : 8;
7278 	  booth_unit_size = 8;
7279 	  for (j = 0; i && j < 32; j += booth_unit_size)
7280 	    {
7281 	      i >>= booth_unit_size;
7282 	      cost++;
7283 	    }
7284 
7285 	  *total = COSTS_N_INSNS(cost);
7286 	  return false;
7287 	}
7288 
7289       if (mode == SImode)
7290 	{
7291 	  *total = COSTS_N_INSNS (4);
7292 	  return false;
7293 	}
7294 
7295       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7296 	{
7297 	  if (TARGET_HARD_FLOAT
7298 	      && (mode == SFmode
7299 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
7300 	    {
7301 	      *total = COSTS_N_INSNS (1);
7302 	      return false;
7303 	    }
7304 	}
7305 
7306       /* Requires a lib call */
7307       *total = COSTS_N_INSNS (20);
7308       return false;
7309 
7310     default:
7311       return arm_rtx_costs_1 (x, outer_code, total, speed);
7312     }
7313 }
7314 
7315 
7316 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
7317    so it can be ignored.  */
7318 
7319 static bool
7320 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
7321 {
7322   enum machine_mode mode = GET_MODE (x);
7323 
7324   if (TARGET_THUMB)
7325     {
7326       *total = thumb1_rtx_costs (x, code, outer_code);
7327       return true;
7328     }
7329 
7330   switch (code)
7331     {
7332     case COMPARE:
7333       if (GET_CODE (XEXP (x, 0)) != MULT)
7334 	return arm_rtx_costs_1 (x, outer_code, total, speed);
7335 
7336       /* A COMPARE of a MULT is slow on XScale; the muls instruction
7337 	 will stall until the multiplication is complete.  */
7338       *total = COSTS_N_INSNS (3);
7339       return false;
7340 
7341     case MULT:
7342       /* There is no point basing this on the tuning, since it is always the
7343 	 fast variant if it exists at all.  */
7344       if (mode == DImode
7345 	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7346 	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7347 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7348 	{
7349 	  *total = COSTS_N_INSNS (2);
7350 	  return false;
7351 	}
7352 
7353 
7354       if (mode == DImode)
7355 	{
7356 	  *total = COSTS_N_INSNS (5);
7357 	  return false;
7358 	}
7359 
7360       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7361 	{
7362 	  /* If operand 1 is a constant we can more accurately
7363 	     calculate the cost of the multiply.  The multiplier can
7364 	     retire 15 bits on the first cycle and a further 12 on the
7365 	     second.  We do, of course, have to load the constant into
7366 	     a register first.  */
7367 	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7368 	  /* There's a general overhead of one cycle.  */
7369 	  int cost = 1;
7370 	  unsigned HOST_WIDE_INT masked_const;
7371 
7372 	  if (i & 0x80000000)
7373 	    i = ~i;
7374 
7375 	  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7376 
7377 	  masked_const = i & 0xffff8000;
7378 	  if (masked_const != 0)
7379 	    {
7380 	      cost++;
7381 	      masked_const = i & 0xf8000000;
7382 	      if (masked_const != 0)
7383 		cost++;
7384 	    }
7385 	  *total = COSTS_N_INSNS (cost);
7386 	  return false;
7387 	}
7388 
7389       if (mode == SImode)
7390 	{
7391 	  *total = COSTS_N_INSNS (3);
7392 	  return false;
7393 	}
7394 
7395       /* Requires a lib call */
7396       *total = COSTS_N_INSNS (20);
7397       return false;
7398 
7399     default:
7400       return arm_rtx_costs_1 (x, outer_code, total, speed);
7401     }
7402 }
7403 
7404 
7405 /* RTX costs for 9e (and later) cores.  */
7406 
7407 static bool
7408 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7409 		  int *total, bool speed)
7410 {
7411   enum machine_mode mode = GET_MODE (x);
7412 
7413   if (TARGET_THUMB1)
7414     {
7415       switch (code)
7416 	{
7417 	case MULT:
7418 	  *total = COSTS_N_INSNS (3);
7419 	  return true;
7420 
7421 	default:
7422 	  *total = thumb1_rtx_costs (x, code, outer_code);
7423 	  return true;
7424 	}
7425     }
7426 
7427   switch (code)
7428     {
7429     case MULT:
7430       /* There is no point basing this on the tuning, since it is always the
7431 	 fast variant if it exists at all.  */
7432       if (mode == DImode
7433 	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7434 	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7435 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7436 	{
7437 	  *total = COSTS_N_INSNS (2);
7438 	  return false;
7439 	}
7440 
7441 
7442       if (mode == DImode)
7443 	{
7444 	  *total = COSTS_N_INSNS (5);
7445 	  return false;
7446 	}
7447 
7448       if (mode == SImode)
7449 	{
7450 	  *total = COSTS_N_INSNS (2);
7451 	  return false;
7452 	}
7453 
7454       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7455 	{
7456 	  if (TARGET_HARD_FLOAT
7457 	      && (mode == SFmode
7458 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
7459 	    {
7460 	      *total = COSTS_N_INSNS (1);
7461 	      return false;
7462 	    }
7463 	}
7464 
7465       *total = COSTS_N_INSNS (20);
7466       return false;
7467 
7468     default:
7469       return arm_rtx_costs_1 (x, outer_code, total, speed);
7470     }
7471 }
7472 /* All address computations that can be done are free, but rtx cost returns
7473    the same for practically all of them.  So we weight the different types
7474    of address here in the order (most pref first):
7475    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
7476 static inline int
7477 arm_arm_address_cost (rtx x)
7478 {
7479   enum rtx_code c  = GET_CODE (x);
7480 
7481   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7482     return 0;
7483   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7484     return 10;
7485 
7486   if (c == PLUS)
7487     {
7488       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7489 	return 2;
7490 
7491       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7492 	return 3;
7493 
7494       return 4;
7495     }
7496 
7497   return 6;
7498 }
7499 
7500 static inline int
7501 arm_thumb_address_cost (rtx x)
7502 {
7503   enum rtx_code c  = GET_CODE (x);
7504 
7505   if (c == REG)
7506     return 1;
7507   if (c == PLUS
7508       && GET_CODE (XEXP (x, 0)) == REG
7509       && GET_CODE (XEXP (x, 1)) == CONST_INT)
7510     return 1;
7511 
7512   return 2;
7513 }
7514 
7515 static int
7516 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7517 {
7518   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7519 }
7520 
7521 static int
7522 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7523 {
7524   rtx i_pat, d_pat;
7525 
7526   /* Some true dependencies can have a higher cost depending
7527      on precisely how certain input operands are used.  */
7528   if (arm_tune_xscale
7529       && REG_NOTE_KIND (link) == 0
7530       && recog_memoized (insn) >= 0
7531       && recog_memoized (dep) >= 0)
7532     {
7533       int shift_opnum = get_attr_shift (insn);
7534       enum attr_type attr_type = get_attr_type (dep);
7535 
7536       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7537 	 operand for INSN.  If we have a shifted input operand and the
7538 	 instruction we depend on is another ALU instruction, then we may
7539 	 have to account for an additional stall.  */
7540       if (shift_opnum != 0
7541 	  && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7542 	{
7543 	  rtx shifted_operand;
7544 	  int opno;
7545 
7546 	  /* Get the shifted operand.  */
7547 	  extract_insn (insn);
7548 	  shifted_operand = recog_data.operand[shift_opnum];
7549 
7550 	  /* Iterate over all the operands in DEP.  If we write an operand
7551 	     that overlaps with SHIFTED_OPERAND, then we have increase the
7552 	     cost of this dependency.  */
7553 	  extract_insn (dep);
7554 	  preprocess_constraints ();
7555 	  for (opno = 0; opno < recog_data.n_operands; opno++)
7556 	    {
7557 	      /* We can ignore strict inputs.  */
7558 	      if (recog_data.operand_type[opno] == OP_IN)
7559 		continue;
7560 
7561 	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
7562 					   shifted_operand))
7563 		return 2;
7564 	    }
7565 	}
7566     }
7567 
7568   /* XXX This is not strictly true for the FPA.  */
7569   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7570       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7571     return 0;
7572 
7573   /* Call insns don't incur a stall, even if they follow a load.  */
7574   if (REG_NOTE_KIND (link) == 0
7575       && GET_CODE (insn) == CALL_INSN)
7576     return 1;
7577 
7578   if ((i_pat = single_set (insn)) != NULL
7579       && GET_CODE (SET_SRC (i_pat)) == MEM
7580       && (d_pat = single_set (dep)) != NULL
7581       && GET_CODE (SET_DEST (d_pat)) == MEM)
7582     {
7583       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7584       /* This is a load after a store, there is no conflict if the load reads
7585 	 from a cached area.  Assume that loads from the stack, and from the
7586 	 constant pool are cached, and that others will miss.  This is a
7587 	 hack.  */
7588 
7589       if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7590 	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
7591 	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
7592 	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7593 	return 1;
7594     }
7595 
7596   return cost;
7597 }
7598 
7599 static int fp_consts_inited = 0;
7600 
7601 /* Only zero is valid for VFP.  Other values are also valid for FPA.  */
7602 static const char * const strings_fp[8] =
7603 {
7604   "0",   "1",   "2",   "3",
7605   "4",   "5",   "0.5", "10"
7606 };
7607 
7608 static REAL_VALUE_TYPE values_fp[8];
7609 
7610 static void
7611 init_fp_table (void)
7612 {
7613   int i;
7614   REAL_VALUE_TYPE r;
7615 
7616   if (TARGET_VFP)
7617     fp_consts_inited = 1;
7618   else
7619     fp_consts_inited = 8;
7620 
7621   for (i = 0; i < fp_consts_inited; i++)
7622     {
7623       r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7624       values_fp[i] = r;
7625     }
7626 }
7627 
7628 /* Return TRUE if rtx X is a valid immediate FP constant.  */
7629 int
7630 arm_const_double_rtx (rtx x)
7631 {
7632   REAL_VALUE_TYPE r;
7633   int i;
7634 
7635   if (!fp_consts_inited)
7636     init_fp_table ();
7637 
7638   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7639   if (REAL_VALUE_MINUS_ZERO (r))
7640     return 0;
7641 
7642   for (i = 0; i < fp_consts_inited; i++)
7643     if (REAL_VALUES_EQUAL (r, values_fp[i]))
7644       return 1;
7645 
7646   return 0;
7647 }
7648 
7649 /* Return TRUE if rtx X is a valid immediate FPA constant.  */
7650 int
7651 neg_const_double_rtx_ok_for_fpa (rtx x)
7652 {
7653   REAL_VALUE_TYPE r;
7654   int i;
7655 
7656   if (!fp_consts_inited)
7657     init_fp_table ();
7658 
7659   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7660   r = REAL_VALUE_NEGATE (r);
7661   if (REAL_VALUE_MINUS_ZERO (r))
7662     return 0;
7663 
7664   for (i = 0; i < 8; i++)
7665     if (REAL_VALUES_EQUAL (r, values_fp[i]))
7666       return 1;
7667 
7668   return 0;
7669 }
7670 
7671 
7672 /* VFPv3 has a fairly wide range of representable immediates, formed from
7673    "quarter-precision" floating-point values. These can be evaluated using this
7674    formula (with ^ for exponentiation):
7675 
7676      -1^s * n * 2^-r
7677 
7678    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7679    16 <= n <= 31 and 0 <= r <= 7.
7680 
7681    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7682 
7683      - A (most-significant) is the sign bit.
7684      - BCD are the exponent (encoded as r XOR 3).
7685      - EFGH are the mantissa (encoded as n - 16).
7686 */
7687 
7688 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7689    fconst[sd] instruction, or -1 if X isn't suitable.  */
7690 static int
7691 vfp3_const_double_index (rtx x)
7692 {
7693   REAL_VALUE_TYPE r, m;
7694   int sign, exponent;
7695   unsigned HOST_WIDE_INT mantissa, mant_hi;
7696   unsigned HOST_WIDE_INT mask;
7697   HOST_WIDE_INT m1, m2;
7698   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7699 
7700   if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7701     return -1;
7702 
7703   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7704 
7705   /* We can't represent these things, so detect them first.  */
7706   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7707     return -1;
7708 
7709   /* Extract sign, exponent and mantissa.  */
7710   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7711   r = REAL_VALUE_ABS (r);
7712   exponent = REAL_EXP (&r);
7713   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7714      highest (sign) bit, with a fixed binary point at bit point_pos.
7715      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7716      bits for the mantissa, this may fail (low bits would be lost).  */
7717   real_ldexp (&m, &r, point_pos - exponent);
7718   REAL_VALUE_TO_INT (&m1, &m2, m);
7719   mantissa = m1;
7720   mant_hi = m2;
7721 
7722   /* If there are bits set in the low part of the mantissa, we can't
7723      represent this value.  */
7724   if (mantissa != 0)
7725     return -1;
7726 
7727   /* Now make it so that mantissa contains the most-significant bits, and move
7728      the point_pos to indicate that the least-significant bits have been
7729      discarded.  */
7730   point_pos -= HOST_BITS_PER_WIDE_INT;
7731   mantissa = mant_hi;
7732 
7733   /* We can permit four significant bits of mantissa only, plus a high bit
7734      which is always 1.  */
7735   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7736   if ((mantissa & mask) != 0)
7737     return -1;
7738 
7739   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
7740   mantissa >>= point_pos - 5;
7741 
7742   /* The mantissa may be zero. Disallow that case. (It's possible to load the
7743      floating-point immediate zero with Neon using an integer-zero load, but
7744      that case is handled elsewhere.)  */
7745   if (mantissa == 0)
7746     return -1;
7747 
7748   gcc_assert (mantissa >= 16 && mantissa <= 31);
7749 
7750   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7751      normalized significands are in the range [1, 2). (Our mantissa is shifted
7752      left 4 places at this point relative to normalized IEEE754 values).  GCC
7753      internally uses [0.5, 1) (see real.c), so the exponent returned from
7754      REAL_EXP must be altered.  */
7755   exponent = 5 - exponent;
7756 
7757   if (exponent < 0 || exponent > 7)
7758     return -1;
7759 
7760   /* Sign, mantissa and exponent are now in the correct form to plug into the
7761      formula described in the comment above.  */
7762   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7763 }
7764 
7765 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
7766 int
7767 vfp3_const_double_rtx (rtx x)
7768 {
7769   if (!TARGET_VFP3)
7770     return 0;
7771 
7772   return vfp3_const_double_index (x) != -1;
7773 }
7774 
7775 /* Recognize immediates which can be used in various Neon instructions. Legal
7776    immediates are described by the following table (for VMVN variants, the
7777    bitwise inverse of the constant shown is recognized. In either case, VMOV
7778    is output and the correct instruction to use for a given constant is chosen
7779    by the assembler). The constant shown is replicated across all elements of
7780    the destination vector.
7781 
7782    insn elems variant constant (binary)
7783    ---- ----- ------- -----------------
7784    vmov  i32     0    00000000 00000000 00000000 abcdefgh
7785    vmov  i32     1    00000000 00000000 abcdefgh 00000000
7786    vmov  i32     2    00000000 abcdefgh 00000000 00000000
7787    vmov  i32     3    abcdefgh 00000000 00000000 00000000
7788    vmov  i16     4    00000000 abcdefgh
7789    vmov  i16     5    abcdefgh 00000000
7790    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
7791    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
7792    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
7793    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
7794    vmvn  i16    10    00000000 abcdefgh
7795    vmvn  i16    11    abcdefgh 00000000
7796    vmov  i32    12    00000000 00000000 abcdefgh 11111111
7797    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
7798    vmov  i32    14    00000000 abcdefgh 11111111 11111111
7799    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
7800    vmov   i8    16    abcdefgh
7801    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
7802                       eeeeeeee ffffffff gggggggg hhhhhhhh
7803    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
7804 
7805    For case 18, B = !b. Representable values are exactly those accepted by
7806    vfp3_const_double_index, but are output as floating-point numbers rather
7807    than indices.
7808 
7809    Variants 0-5 (inclusive) may also be used as immediates for the second
7810    operand of VORR/VBIC instructions.
7811 
7812    The INVERSE argument causes the bitwise inverse of the given operand to be
7813    recognized instead (used for recognizing legal immediates for the VAND/VORN
7814    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7815    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7816    output, rather than the real insns vbic/vorr).
7817 
7818    INVERSE makes no difference to the recognition of float vectors.
7819 
7820    The return value is the variant of immediate as shown in the above table, or
7821    -1 if the given value doesn't match any of the listed patterns.
7822 */
7823 static int
7824 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7825 		      rtx *modconst, int *elementwidth)
7826 {
7827 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
7828   matches = 1;					\
7829   for (i = 0; i < idx; i += (STRIDE))		\
7830     if (!(TEST))				\
7831       matches = 0;				\
7832   if (matches)					\
7833     {						\
7834       immtype = (CLASS);			\
7835       elsize = (ELSIZE);			\
7836       break;					\
7837     }
7838 
7839   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7840   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7841   unsigned char bytes[16];
7842   int immtype = -1, matches;
7843   unsigned int invmask = inverse ? 0xff : 0;
7844 
7845   /* Vectors of float constants.  */
7846   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7847     {
7848       rtx el0 = CONST_VECTOR_ELT (op, 0);
7849       REAL_VALUE_TYPE r0;
7850 
7851       if (!vfp3_const_double_rtx (el0))
7852         return -1;
7853 
7854       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7855 
7856       for (i = 1; i < n_elts; i++)
7857         {
7858           rtx elt = CONST_VECTOR_ELT (op, i);
7859           REAL_VALUE_TYPE re;
7860 
7861           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
7862 
7863           if (!REAL_VALUES_EQUAL (r0, re))
7864             return -1;
7865         }
7866 
7867       if (modconst)
7868         *modconst = CONST_VECTOR_ELT (op, 0);
7869 
7870       if (elementwidth)
7871         *elementwidth = 0;
7872 
7873       return 18;
7874     }
7875 
7876   /* Splat vector constant out into a byte vector.  */
7877   for (i = 0; i < n_elts; i++)
7878     {
7879       rtx el = CONST_VECTOR_ELT (op, i);
7880       unsigned HOST_WIDE_INT elpart;
7881       unsigned int part, parts;
7882 
7883       if (GET_CODE (el) == CONST_INT)
7884         {
7885           elpart = INTVAL (el);
7886           parts = 1;
7887         }
7888       else if (GET_CODE (el) == CONST_DOUBLE)
7889         {
7890           elpart = CONST_DOUBLE_LOW (el);
7891           parts = 2;
7892         }
7893       else
7894         gcc_unreachable ();
7895 
7896       for (part = 0; part < parts; part++)
7897         {
7898           unsigned int byte;
7899           for (byte = 0; byte < innersize; byte++)
7900             {
7901               bytes[idx++] = (elpart & 0xff) ^ invmask;
7902               elpart >>= BITS_PER_UNIT;
7903             }
7904           if (GET_CODE (el) == CONST_DOUBLE)
7905             elpart = CONST_DOUBLE_HIGH (el);
7906         }
7907     }
7908 
7909   /* Sanity check.  */
7910   gcc_assert (idx == GET_MODE_SIZE (mode));
7911 
7912   do
7913     {
7914       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7915 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7916 
7917       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7918 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
7919 
7920       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7921 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7922 
7923       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7924 		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
7925 
7926       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
7927 
7928       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
7929 
7930       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7931 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7932 
7933       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7934 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7935 
7936       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7937 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7938 
7939       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7940 		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
7941 
7942       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
7943 
7944       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
7945 
7946       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7947 			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
7948 
7949       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7950 			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
7951 
7952       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7953 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
7954 
7955       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7956 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
7957 
7958       CHECK (1, 8, 16, bytes[i] == bytes[0]);
7959 
7960       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7961 			&& bytes[i] == bytes[(i + 8) % idx]);
7962     }
7963   while (0);
7964 
7965   if (immtype == -1)
7966     return -1;
7967 
7968   if (elementwidth)
7969     *elementwidth = elsize;
7970 
7971   if (modconst)
7972     {
7973       unsigned HOST_WIDE_INT imm = 0;
7974 
7975       /* Un-invert bytes of recognized vector, if necessary.  */
7976       if (invmask != 0)
7977         for (i = 0; i < idx; i++)
7978           bytes[i] ^= invmask;
7979 
7980       if (immtype == 17)
7981         {
7982           /* FIXME: Broken on 32-bit H_W_I hosts.  */
7983           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7984 
7985           for (i = 0; i < 8; i++)
7986             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7987                    << (i * BITS_PER_UNIT);
7988 
7989           *modconst = GEN_INT (imm);
7990         }
7991       else
7992         {
7993           unsigned HOST_WIDE_INT imm = 0;
7994 
7995           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7996             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7997 
7998           *modconst = GEN_INT (imm);
7999         }
8000     }
8001 
8002   return immtype;
8003 #undef CHECK
8004 }
8005 
8006 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8007    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8008    float elements), and a modified constant (whatever should be output for a
8009    VMOV) in *MODCONST.  */
8010 
8011 int
8012 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8013 			       rtx *modconst, int *elementwidth)
8014 {
8015   rtx tmpconst;
8016   int tmpwidth;
8017   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8018 
8019   if (retval == -1)
8020     return 0;
8021 
8022   if (modconst)
8023     *modconst = tmpconst;
8024 
8025   if (elementwidth)
8026     *elementwidth = tmpwidth;
8027 
8028   return 1;
8029 }
8030 
8031 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
8032    the immediate is valid, write a constant suitable for using as an operand
8033    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8034    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
8035 
8036 int
8037 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8038 				rtx *modconst, int *elementwidth)
8039 {
8040   rtx tmpconst;
8041   int tmpwidth;
8042   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8043 
8044   if (retval < 0 || retval > 5)
8045     return 0;
8046 
8047   if (modconst)
8048     *modconst = tmpconst;
8049 
8050   if (elementwidth)
8051     *elementwidth = tmpwidth;
8052 
8053   return 1;
8054 }
8055 
8056 /* Return a string suitable for output of Neon immediate logic operation
8057    MNEM.  */
8058 
8059 char *
8060 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8061 			     int inverse, int quad)
8062 {
8063   int width, is_valid;
8064   static char templ[40];
8065 
8066   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8067 
8068   gcc_assert (is_valid != 0);
8069 
8070   if (quad)
8071     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8072   else
8073     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8074 
8075   return templ;
8076 }
8077 
8078 /* Output a sequence of pairwise operations to implement a reduction.
8079    NOTE: We do "too much work" here, because pairwise operations work on two
8080    registers-worth of operands in one go. Unfortunately we can't exploit those
8081    extra calculations to do the full operation in fewer steps, I don't think.
8082    Although all vector elements of the result but the first are ignored, we
8083    actually calculate the same result in each of the elements. An alternative
8084    such as initially loading a vector with zero to use as each of the second
8085    operands would use up an additional register and take an extra instruction,
8086    for no particular gain.  */
8087 
8088 void
8089 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8090 		      rtx (*reduc) (rtx, rtx, rtx))
8091 {
8092   enum machine_mode inner = GET_MODE_INNER (mode);
8093   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8094   rtx tmpsum = op1;
8095 
8096   for (i = parts / 2; i >= 1; i /= 2)
8097     {
8098       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8099       emit_insn (reduc (dest, tmpsum, tmpsum));
8100       tmpsum = dest;
8101     }
8102 }
8103 
8104 /* If VALS is a vector constant that can be loaded into a register
8105    using VDUP, generate instructions to do so and return an RTX to
8106    assign to the register.  Otherwise return NULL_RTX.  */
8107 
8108 static rtx
8109 neon_vdup_constant (rtx vals)
8110 {
8111   enum machine_mode mode = GET_MODE (vals);
8112   enum machine_mode inner_mode = GET_MODE_INNER (mode);
8113   int n_elts = GET_MODE_NUNITS (mode);
8114   bool all_same = true;
8115   rtx x;
8116   int i;
8117 
8118   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8119     return NULL_RTX;
8120 
8121   for (i = 0; i < n_elts; ++i)
8122     {
8123       x = XVECEXP (vals, 0, i);
8124       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8125 	all_same = false;
8126     }
8127 
8128   if (!all_same)
8129     /* The elements are not all the same.  We could handle repeating
8130        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8131        {0, C, 0, C, 0, C, 0, C} which can be loaded using
8132        vdup.i16).  */
8133     return NULL_RTX;
8134 
8135   /* We can load this constant by using VDUP and a constant in a
8136      single ARM register.  This will be cheaper than a vector
8137      load.  */
8138 
8139   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8140   return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8141 			 UNSPEC_VDUP_N);
8142 }
8143 
8144 /* Generate code to load VALS, which is a PARALLEL containing only
8145    constants (for vec_init) or CONST_VECTOR, efficiently into a
8146    register.  Returns an RTX to copy into the register, or NULL_RTX
8147    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
8148 
8149 rtx
8150 neon_make_constant (rtx vals)
8151 {
8152   enum machine_mode mode = GET_MODE (vals);
8153   rtx target;
8154   rtx const_vec = NULL_RTX;
8155   int n_elts = GET_MODE_NUNITS (mode);
8156   int n_const = 0;
8157   int i;
8158 
8159   if (GET_CODE (vals) == CONST_VECTOR)
8160     const_vec = vals;
8161   else if (GET_CODE (vals) == PARALLEL)
8162     {
8163       /* A CONST_VECTOR must contain only CONST_INTs and
8164 	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8165 	 Only store valid constants in a CONST_VECTOR.  */
8166       for (i = 0; i < n_elts; ++i)
8167 	{
8168 	  rtx x = XVECEXP (vals, 0, i);
8169 	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8170 	    n_const++;
8171 	}
8172       if (n_const == n_elts)
8173 	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8174     }
8175   else
8176     gcc_unreachable ();
8177 
8178   if (const_vec != NULL
8179       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8180     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
8181     return const_vec;
8182   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8183     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
8184        pipeline cycle; creating the constant takes one or two ARM
8185        pipeline cycles.  */
8186     return target;
8187   else if (const_vec != NULL_RTX)
8188     /* Load from constant pool.  On Cortex-A8 this takes two cycles
8189        (for either double or quad vectors).  We can not take advantage
8190        of single-cycle VLD1 because we need a PC-relative addressing
8191        mode.  */
8192     return const_vec;
8193   else
8194     /* A PARALLEL containing something not valid inside CONST_VECTOR.
8195        We can not construct an initializer.  */
8196     return NULL_RTX;
8197 }
8198 
8199 /* Initialize vector TARGET to VALS.  */
8200 
8201 void
8202 neon_expand_vector_init (rtx target, rtx vals)
8203 {
8204   enum machine_mode mode = GET_MODE (target);
8205   enum machine_mode inner_mode = GET_MODE_INNER (mode);
8206   int n_elts = GET_MODE_NUNITS (mode);
8207   int n_var = 0, one_var = -1;
8208   bool all_same = true;
8209   rtx x, mem;
8210   int i;
8211 
8212   for (i = 0; i < n_elts; ++i)
8213     {
8214       x = XVECEXP (vals, 0, i);
8215       if (!CONSTANT_P (x))
8216 	++n_var, one_var = i;
8217 
8218       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8219 	all_same = false;
8220     }
8221 
8222   if (n_var == 0)
8223     {
8224       rtx constant = neon_make_constant (vals);
8225       if (constant != NULL_RTX)
8226 	{
8227 	  emit_move_insn (target, constant);
8228 	  return;
8229 	}
8230     }
8231 
8232   /* Splat a single non-constant element if we can.  */
8233   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8234     {
8235       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8236       emit_insn (gen_rtx_SET (VOIDmode, target,
8237 			      gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8238 					      UNSPEC_VDUP_N)));
8239       return;
8240     }
8241 
8242   /* One field is non-constant.  Load constant then overwrite varying
8243      field.  This is more efficient than using the stack.  */
8244   if (n_var == 1)
8245     {
8246       rtx copy = copy_rtx (vals);
8247       rtvec ops;
8248 
8249       /* Load constant part of vector, substitute neighboring value for
8250 	 varying element.  */
8251       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8252       neon_expand_vector_init (target, copy);
8253 
8254       /* Insert variable.  */
8255       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8256       ops = gen_rtvec (3, x, target, GEN_INT (one_var));
8257       emit_insn (gen_rtx_SET (VOIDmode, target,
8258 			      gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
8259       return;
8260     }
8261 
8262   /* Construct the vector in memory one field at a time
8263      and load the whole vector.  */
8264   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8265   for (i = 0; i < n_elts; i++)
8266     emit_move_insn (adjust_address_nv (mem, inner_mode,
8267 				    i * GET_MODE_SIZE (inner_mode)),
8268 		    XVECEXP (vals, 0, i));
8269   emit_move_insn (target, mem);
8270 }
8271 
8272 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
8273    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
8274    reported source locations are bogus.  */
8275 
8276 static void
8277 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8278 	      const char *err)
8279 {
8280   HOST_WIDE_INT lane;
8281 
8282   gcc_assert (GET_CODE (operand) == CONST_INT);
8283 
8284   lane = INTVAL (operand);
8285 
8286   if (lane < low || lane >= high)
8287     error (err);
8288 }
8289 
8290 /* Bounds-check lanes.  */
8291 
8292 void
8293 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8294 {
8295   bounds_check (operand, low, high, "lane out of range");
8296 }
8297 
8298 /* Bounds-check constants.  */
8299 
8300 void
8301 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8302 {
8303   bounds_check (operand, low, high, "constant out of range");
8304 }
8305 
8306 HOST_WIDE_INT
8307 neon_element_bits (enum machine_mode mode)
8308 {
8309   if (mode == DImode)
8310     return GET_MODE_BITSIZE (mode);
8311   else
8312     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8313 }
8314 
8315 
8316 /* Predicates for `match_operand' and `match_operator'.  */
8317 
8318 /* Return nonzero if OP is a valid Cirrus memory address pattern.  */
8319 int
8320 cirrus_memory_offset (rtx op)
8321 {
8322   /* Reject eliminable registers.  */
8323   if (! (reload_in_progress || reload_completed)
8324       && (   reg_mentioned_p (frame_pointer_rtx, op)
8325 	  || reg_mentioned_p (arg_pointer_rtx, op)
8326 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
8327 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8328 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8329 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8330     return 0;
8331 
8332   if (GET_CODE (op) == MEM)
8333     {
8334       rtx ind;
8335 
8336       ind = XEXP (op, 0);
8337 
8338       /* Match: (mem (reg)).  */
8339       if (GET_CODE (ind) == REG)
8340 	return 1;
8341 
8342       /* Match:
8343 	 (mem (plus (reg)
8344 	            (const))).  */
8345       if (GET_CODE (ind) == PLUS
8346 	  && GET_CODE (XEXP (ind, 0)) == REG
8347 	  && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8348 	  && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8349 	return 1;
8350     }
8351 
8352   return 0;
8353 }
8354 
8355 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8356    WB is true if full writeback address modes are allowed and is false
8357    if limited writeback address modes (POST_INC and PRE_DEC) are
8358    allowed.  */
8359 
8360 int
8361 arm_coproc_mem_operand (rtx op, bool wb)
8362 {
8363   rtx ind;
8364 
8365   /* Reject eliminable registers.  */
8366   if (! (reload_in_progress || reload_completed)
8367       && (   reg_mentioned_p (frame_pointer_rtx, op)
8368 	  || reg_mentioned_p (arg_pointer_rtx, op)
8369 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
8370 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8371 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8372 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8373     return FALSE;
8374 
8375   /* Constants are converted into offsets from labels.  */
8376   if (GET_CODE (op) != MEM)
8377     return FALSE;
8378 
8379   ind = XEXP (op, 0);
8380 
8381   if (reload_completed
8382       && (GET_CODE (ind) == LABEL_REF
8383 	  || (GET_CODE (ind) == CONST
8384 	      && GET_CODE (XEXP (ind, 0)) == PLUS
8385 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8386 	      && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8387     return TRUE;
8388 
8389   /* Match: (mem (reg)).  */
8390   if (GET_CODE (ind) == REG)
8391     return arm_address_register_rtx_p (ind, 0);
8392 
8393   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
8394      acceptable in any case (subject to verification by
8395      arm_address_register_rtx_p).  We need WB to be true to accept
8396      PRE_INC and POST_DEC.  */
8397   if (GET_CODE (ind) == POST_INC
8398       || GET_CODE (ind) == PRE_DEC
8399       || (wb
8400 	  && (GET_CODE (ind) == PRE_INC
8401 	      || GET_CODE (ind) == POST_DEC)))
8402     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8403 
8404   if (wb
8405       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8406       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8407       && GET_CODE (XEXP (ind, 1)) == PLUS
8408       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8409     ind = XEXP (ind, 1);
8410 
8411   /* Match:
8412      (plus (reg)
8413 	   (const)).  */
8414   if (GET_CODE (ind) == PLUS
8415       && GET_CODE (XEXP (ind, 0)) == REG
8416       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8417       && GET_CODE (XEXP (ind, 1)) == CONST_INT
8418       && INTVAL (XEXP (ind, 1)) > -1024
8419       && INTVAL (XEXP (ind, 1)) <  1024
8420       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8421     return TRUE;
8422 
8423   return FALSE;
8424 }
8425 
8426 /* Return TRUE if OP is a memory operand which we can load or store a vector
8427    to/from. TYPE is one of the following values:
8428     0 - Vector load/stor (vldr)
8429     1 - Core registers (ldm)
8430     2 - Element/structure loads (vld1)
8431  */
8432 int
8433 neon_vector_mem_operand (rtx op, int type)
8434 {
8435   rtx ind;
8436 
8437   /* Reject eliminable registers.  */
8438   if (! (reload_in_progress || reload_completed)
8439       && (   reg_mentioned_p (frame_pointer_rtx, op)
8440 	  || reg_mentioned_p (arg_pointer_rtx, op)
8441 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
8442 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8443 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8444 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8445     return FALSE;
8446 
8447   /* Constants are converted into offsets from labels.  */
8448   if (GET_CODE (op) != MEM)
8449     return FALSE;
8450 
8451   ind = XEXP (op, 0);
8452 
8453   if (reload_completed
8454       && (GET_CODE (ind) == LABEL_REF
8455 	  || (GET_CODE (ind) == CONST
8456 	      && GET_CODE (XEXP (ind, 0)) == PLUS
8457 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8458 	      && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8459     return TRUE;
8460 
8461   /* Match: (mem (reg)).  */
8462   if (GET_CODE (ind) == REG)
8463     return arm_address_register_rtx_p (ind, 0);
8464 
8465   /* Allow post-increment with Neon registers.  */
8466   if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8467     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8468 
8469   /* FIXME: vld1 allows register post-modify.  */
8470 
8471   /* Match:
8472      (plus (reg)
8473           (const)).  */
8474   if (type == 0
8475       && GET_CODE (ind) == PLUS
8476       && GET_CODE (XEXP (ind, 0)) == REG
8477       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8478       && GET_CODE (XEXP (ind, 1)) == CONST_INT
8479       && INTVAL (XEXP (ind, 1)) > -1024
8480       && INTVAL (XEXP (ind, 1)) < 1016
8481       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8482     return TRUE;
8483 
8484   return FALSE;
8485 }
8486 
8487 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8488    type.  */
8489 int
8490 neon_struct_mem_operand (rtx op)
8491 {
8492   rtx ind;
8493 
8494   /* Reject eliminable registers.  */
8495   if (! (reload_in_progress || reload_completed)
8496       && (   reg_mentioned_p (frame_pointer_rtx, op)
8497 	  || reg_mentioned_p (arg_pointer_rtx, op)
8498 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
8499 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8500 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8501 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8502     return FALSE;
8503 
8504   /* Constants are converted into offsets from labels.  */
8505   if (GET_CODE (op) != MEM)
8506     return FALSE;
8507 
8508   ind = XEXP (op, 0);
8509 
8510   if (reload_completed
8511       && (GET_CODE (ind) == LABEL_REF
8512 	  || (GET_CODE (ind) == CONST
8513 	      && GET_CODE (XEXP (ind, 0)) == PLUS
8514 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8515 	      && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8516     return TRUE;
8517 
8518   /* Match: (mem (reg)).  */
8519   if (GET_CODE (ind) == REG)
8520     return arm_address_register_rtx_p (ind, 0);
8521 
8522   return FALSE;
8523 }
8524 
8525 /* Return true if X is a register that will be eliminated later on.  */
8526 int
8527 arm_eliminable_register (rtx x)
8528 {
8529   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8530 		       || REGNO (x) == ARG_POINTER_REGNUM
8531 		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8532 			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8533 }
8534 
8535 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8536    coprocessor registers.  Otherwise return NO_REGS.  */
8537 
8538 enum reg_class
8539 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8540 {
8541   if (mode == HFmode)
8542     {
8543       if (!TARGET_NEON_FP16)
8544 	return GENERAL_REGS;
8545       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8546 	return NO_REGS;
8547       return GENERAL_REGS;
8548     }
8549 
8550   if (TARGET_NEON
8551       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8552           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8553       && neon_vector_mem_operand (x, 0))
8554      return NO_REGS;
8555 
8556   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8557     return NO_REGS;
8558 
8559   return GENERAL_REGS;
8560 }
8561 
8562 /* Values which must be returned in the most-significant end of the return
8563    register.  */
8564 
8565 static bool
8566 arm_return_in_msb (const_tree valtype)
8567 {
8568   return (TARGET_AAPCS_BASED
8569           && BYTES_BIG_ENDIAN
8570           && (AGGREGATE_TYPE_P (valtype)
8571               || TREE_CODE (valtype) == COMPLEX_TYPE));
8572 }
8573 
8574 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8575    Use by the Cirrus Maverick code which has to workaround
8576    a hardware bug triggered by such instructions.  */
8577 static bool
8578 arm_memory_load_p (rtx insn)
8579 {
8580   rtx body, lhs, rhs;;
8581 
8582   if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8583     return false;
8584 
8585   body = PATTERN (insn);
8586 
8587   if (GET_CODE (body) != SET)
8588     return false;
8589 
8590   lhs = XEXP (body, 0);
8591   rhs = XEXP (body, 1);
8592 
8593   lhs = REG_OR_SUBREG_RTX (lhs);
8594 
8595   /* If the destination is not a general purpose
8596      register we do not have to worry.  */
8597   if (GET_CODE (lhs) != REG
8598       || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8599     return false;
8600 
8601   /* As well as loads from memory we also have to react
8602      to loads of invalid constants which will be turned
8603      into loads from the minipool.  */
8604   return (GET_CODE (rhs) == MEM
8605 	  || GET_CODE (rhs) == SYMBOL_REF
8606 	  || note_invalid_constants (insn, -1, false));
8607 }
8608 
8609 /* Return TRUE if INSN is a Cirrus instruction.  */
8610 static bool
8611 arm_cirrus_insn_p (rtx insn)
8612 {
8613   enum attr_cirrus attr;
8614 
8615   /* get_attr cannot accept USE or CLOBBER.  */
8616   if (!insn
8617       || GET_CODE (insn) != INSN
8618       || GET_CODE (PATTERN (insn)) == USE
8619       || GET_CODE (PATTERN (insn)) == CLOBBER)
8620     return 0;
8621 
8622   attr = get_attr_cirrus (insn);
8623 
8624   return attr != CIRRUS_NOT;
8625 }
8626 
8627 /* Cirrus reorg for invalid instruction combinations.  */
8628 static void
8629 cirrus_reorg (rtx first)
8630 {
8631   enum attr_cirrus attr;
8632   rtx body = PATTERN (first);
8633   rtx t;
8634   int nops;
8635 
8636   /* Any branch must be followed by 2 non Cirrus instructions.  */
8637   if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8638     {
8639       nops = 0;
8640       t = next_nonnote_insn (first);
8641 
8642       if (arm_cirrus_insn_p (t))
8643 	++ nops;
8644 
8645       if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8646 	++ nops;
8647 
8648       while (nops --)
8649 	emit_insn_after (gen_nop (), first);
8650 
8651       return;
8652     }
8653 
8654   /* (float (blah)) is in parallel with a clobber.  */
8655   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8656     body = XVECEXP (body, 0, 0);
8657 
8658   if (GET_CODE (body) == SET)
8659     {
8660       rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8661 
8662       /* cfldrd, cfldr64, cfstrd, cfstr64 must
8663 	 be followed by a non Cirrus insn.  */
8664       if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8665 	{
8666 	  if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8667 	    emit_insn_after (gen_nop (), first);
8668 
8669 	  return;
8670 	}
8671       else if (arm_memory_load_p (first))
8672 	{
8673 	  unsigned int arm_regno;
8674 
8675 	  /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8676 	     ldr/cfmv64hr combination where the Rd field is the same
8677 	     in both instructions must be split with a non Cirrus
8678 	     insn.  Example:
8679 
8680 	     ldr r0, blah
8681 	     nop
8682 	     cfmvsr mvf0, r0.  */
8683 
8684 	  /* Get Arm register number for ldr insn.  */
8685 	  if (GET_CODE (lhs) == REG)
8686 	    arm_regno = REGNO (lhs);
8687 	  else
8688 	    {
8689 	      gcc_assert (GET_CODE (rhs) == REG);
8690 	      arm_regno = REGNO (rhs);
8691 	    }
8692 
8693 	  /* Next insn.  */
8694 	  first = next_nonnote_insn (first);
8695 
8696 	  if (! arm_cirrus_insn_p (first))
8697 	    return;
8698 
8699 	  body = PATTERN (first);
8700 
8701           /* (float (blah)) is in parallel with a clobber.  */
8702           if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8703 	    body = XVECEXP (body, 0, 0);
8704 
8705 	  if (GET_CODE (body) == FLOAT)
8706 	    body = XEXP (body, 0);
8707 
8708 	  if (get_attr_cirrus (first) == CIRRUS_MOVE
8709 	      && GET_CODE (XEXP (body, 1)) == REG
8710 	      && arm_regno == REGNO (XEXP (body, 1)))
8711 	    emit_insn_after (gen_nop (), first);
8712 
8713 	  return;
8714 	}
8715     }
8716 
8717   /* get_attr cannot accept USE or CLOBBER.  */
8718   if (!first
8719       || GET_CODE (first) != INSN
8720       || GET_CODE (PATTERN (first)) == USE
8721       || GET_CODE (PATTERN (first)) == CLOBBER)
8722     return;
8723 
8724   attr = get_attr_cirrus (first);
8725 
8726   /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8727      must be followed by a non-coprocessor instruction.  */
8728   if (attr == CIRRUS_COMPARE)
8729     {
8730       nops = 0;
8731 
8732       t = next_nonnote_insn (first);
8733 
8734       if (arm_cirrus_insn_p (t))
8735 	++ nops;
8736 
8737       if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8738 	++ nops;
8739 
8740       while (nops --)
8741 	emit_insn_after (gen_nop (), first);
8742 
8743       return;
8744     }
8745 }
8746 
8747 /* Return TRUE if X references a SYMBOL_REF.  */
8748 int
8749 symbol_mentioned_p (rtx x)
8750 {
8751   const char * fmt;
8752   int i;
8753 
8754   if (GET_CODE (x) == SYMBOL_REF)
8755     return 1;
8756 
8757   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8758      are constant offsets, not symbols.  */
8759   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8760     return 0;
8761 
8762   fmt = GET_RTX_FORMAT (GET_CODE (x));
8763 
8764   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8765     {
8766       if (fmt[i] == 'E')
8767 	{
8768 	  int j;
8769 
8770 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8771 	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
8772 	      return 1;
8773 	}
8774       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8775 	return 1;
8776     }
8777 
8778   return 0;
8779 }
8780 
8781 /* Return TRUE if X references a LABEL_REF.  */
8782 int
8783 label_mentioned_p (rtx x)
8784 {
8785   const char * fmt;
8786   int i;
8787 
8788   if (GET_CODE (x) == LABEL_REF)
8789     return 1;
8790 
8791   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8792      instruction, but they are constant offsets, not symbols.  */
8793   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8794     return 0;
8795 
8796   fmt = GET_RTX_FORMAT (GET_CODE (x));
8797   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8798     {
8799       if (fmt[i] == 'E')
8800 	{
8801 	  int j;
8802 
8803 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8804 	    if (label_mentioned_p (XVECEXP (x, i, j)))
8805 	      return 1;
8806 	}
8807       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8808 	return 1;
8809     }
8810 
8811   return 0;
8812 }
8813 
8814 int
8815 tls_mentioned_p (rtx x)
8816 {
8817   switch (GET_CODE (x))
8818     {
8819     case CONST:
8820       return tls_mentioned_p (XEXP (x, 0));
8821 
8822     case UNSPEC:
8823       if (XINT (x, 1) == UNSPEC_TLS)
8824 	return 1;
8825 
8826     default:
8827       return 0;
8828     }
8829 }
8830 
8831 /* Must not copy any rtx that uses a pc-relative address.  */
8832 
8833 static int
8834 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
8835 {
8836   if (GET_CODE (*x) == UNSPEC
8837       && XINT (*x, 1) == UNSPEC_PIC_BASE)
8838     return 1;
8839   return 0;
8840 }
8841 
8842 static bool
8843 arm_cannot_copy_insn_p (rtx insn)
8844 {
8845   return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
8846 }
8847 
8848 enum rtx_code
8849 minmax_code (rtx x)
8850 {
8851   enum rtx_code code = GET_CODE (x);
8852 
8853   switch (code)
8854     {
8855     case SMAX:
8856       return GE;
8857     case SMIN:
8858       return LE;
8859     case UMIN:
8860       return LEU;
8861     case UMAX:
8862       return GEU;
8863     default:
8864       gcc_unreachable ();
8865     }
8866 }
8867 
8868 /* Return 1 if memory locations are adjacent.  */
8869 int
8870 adjacent_mem_locations (rtx a, rtx b)
8871 {
8872   /* We don't guarantee to preserve the order of these memory refs.  */
8873   if (volatile_refs_p (a) || volatile_refs_p (b))
8874     return 0;
8875 
8876   if ((GET_CODE (XEXP (a, 0)) == REG
8877        || (GET_CODE (XEXP (a, 0)) == PLUS
8878 	   && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
8879       && (GET_CODE (XEXP (b, 0)) == REG
8880 	  || (GET_CODE (XEXP (b, 0)) == PLUS
8881 	      && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
8882     {
8883       HOST_WIDE_INT val0 = 0, val1 = 0;
8884       rtx reg0, reg1;
8885       int val_diff;
8886 
8887       if (GET_CODE (XEXP (a, 0)) == PLUS)
8888         {
8889 	  reg0 = XEXP (XEXP (a, 0), 0);
8890 	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
8891         }
8892       else
8893 	reg0 = XEXP (a, 0);
8894 
8895       if (GET_CODE (XEXP (b, 0)) == PLUS)
8896         {
8897 	  reg1 = XEXP (XEXP (b, 0), 0);
8898 	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
8899         }
8900       else
8901 	reg1 = XEXP (b, 0);
8902 
8903       /* Don't accept any offset that will require multiple
8904 	 instructions to handle, since this would cause the
8905 	 arith_adjacentmem pattern to output an overlong sequence.  */
8906       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
8907 	return 0;
8908 
8909       /* Don't allow an eliminable register: register elimination can make
8910 	 the offset too large.  */
8911       if (arm_eliminable_register (reg0))
8912 	return 0;
8913 
8914       val_diff = val1 - val0;
8915 
8916       if (arm_ld_sched)
8917 	{
8918 	  /* If the target has load delay slots, then there's no benefit
8919 	     to using an ldm instruction unless the offset is zero and
8920 	     we are optimizing for size.  */
8921 	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
8922 		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
8923 		  && (val_diff == 4 || val_diff == -4));
8924 	}
8925 
8926       return ((REGNO (reg0) == REGNO (reg1))
8927 	      && (val_diff == 4 || val_diff == -4));
8928     }
8929 
8930   return 0;
8931 }
8932 
8933 int
8934 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8935 			HOST_WIDE_INT *load_offset)
8936 {
8937   int unsorted_regs[4];
8938   HOST_WIDE_INT unsorted_offsets[4];
8939   int order[4];
8940   int base_reg = -1;
8941   int i;
8942 
8943   /* Can only handle 2, 3, or 4 insns at present,
8944      though could be easily extended if required.  */
8945   gcc_assert (nops >= 2 && nops <= 4);
8946 
8947   memset (order, 0, 4 * sizeof (int));
8948 
8949   /* Loop over the operands and check that the memory references are
8950      suitable (i.e. immediate offsets from the same base register).  At
8951      the same time, extract the target register, and the memory
8952      offsets.  */
8953   for (i = 0; i < nops; i++)
8954     {
8955       rtx reg;
8956       rtx offset;
8957 
8958       /* Convert a subreg of a mem into the mem itself.  */
8959       if (GET_CODE (operands[nops + i]) == SUBREG)
8960 	operands[nops + i] = alter_subreg (operands + (nops + i));
8961 
8962       gcc_assert (GET_CODE (operands[nops + i]) == MEM);
8963 
8964       /* Don't reorder volatile memory references; it doesn't seem worth
8965 	 looking for the case where the order is ok anyway.  */
8966       if (MEM_VOLATILE_P (operands[nops + i]))
8967 	return 0;
8968 
8969       offset = const0_rtx;
8970 
8971       if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
8972 	   || (GET_CODE (reg) == SUBREG
8973 	       && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8974 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
8975 	      && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
8976 		   == REG)
8977 		  || (GET_CODE (reg) == SUBREG
8978 		      && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8979 	      && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8980 		  == CONST_INT)))
8981 	{
8982 	  if (i == 0)
8983 	    {
8984 	      base_reg = REGNO (reg);
8985 	      unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8986 				  ? REGNO (operands[i])
8987 				  : REGNO (SUBREG_REG (operands[i])));
8988 	      order[0] = 0;
8989 	    }
8990 	  else
8991 	    {
8992 	      if (base_reg != (int) REGNO (reg))
8993 		/* Not addressed from the same base register.  */
8994 		return 0;
8995 
8996 	      unsorted_regs[i] = (GET_CODE (operands[i]) == REG
8997 				  ? REGNO (operands[i])
8998 				  : REGNO (SUBREG_REG (operands[i])));
8999 	      if (unsorted_regs[i] < unsorted_regs[order[0]])
9000 		order[0] = i;
9001 	    }
9002 
9003 	  /* If it isn't an integer register, or if it overwrites the
9004 	     base register but isn't the last insn in the list, then
9005 	     we can't do this.  */
9006 	  if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
9007 	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
9008 	    return 0;
9009 
9010 	  unsorted_offsets[i] = INTVAL (offset);
9011 	}
9012       else
9013 	/* Not a suitable memory address.  */
9014 	return 0;
9015     }
9016 
9017   /* All the useful information has now been extracted from the
9018      operands into unsorted_regs and unsorted_offsets; additionally,
9019      order[0] has been set to the lowest numbered register in the
9020      list.  Sort the registers into order, and check that the memory
9021      offsets are ascending and adjacent.  */
9022 
9023   for (i = 1; i < nops; i++)
9024     {
9025       int j;
9026 
9027       order[i] = order[i - 1];
9028       for (j = 0; j < nops; j++)
9029 	if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9030 	    && (order[i] == order[i - 1]
9031 		|| unsorted_regs[j] < unsorted_regs[order[i]]))
9032 	  order[i] = j;
9033 
9034       /* Have we found a suitable register? if not, one must be used more
9035 	 than once.  */
9036       if (order[i] == order[i - 1])
9037 	return 0;
9038 
9039       /* Is the memory address adjacent and ascending? */
9040       if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9041 	return 0;
9042     }
9043 
9044   if (base)
9045     {
9046       *base = base_reg;
9047 
9048       for (i = 0; i < nops; i++)
9049 	regs[i] = unsorted_regs[order[i]];
9050 
9051       *load_offset = unsorted_offsets[order[0]];
9052     }
9053 
9054   if (unsorted_offsets[order[0]] == 0)
9055     return 1; /* ldmia */
9056 
9057   if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9058     return 2; /* ldmib */
9059 
9060   if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9061     return 3; /* ldmda */
9062 
9063   if (unsorted_offsets[order[nops - 1]] == -4)
9064     return 4; /* ldmdb */
9065 
9066   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9067      if the offset isn't small enough.  The reason 2 ldrs are faster
9068      is because these ARMs are able to do more than one cache access
9069      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
9070      whilst the ARM8 has a double bandwidth cache.  This means that
9071      these cores can do both an instruction fetch and a data fetch in
9072      a single cycle, so the trick of calculating the address into a
9073      scratch register (one of the result regs) and then doing a load
9074      multiple actually becomes slower (and no smaller in code size).
9075      That is the transformation
9076 
9077  	ldr	rd1, [rbase + offset]
9078  	ldr	rd2, [rbase + offset + 4]
9079 
9080      to
9081 
9082  	add	rd1, rbase, offset
9083  	ldmia	rd1, {rd1, rd2}
9084 
9085      produces worse code -- '3 cycles + any stalls on rd2' instead of
9086      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
9087      access per cycle, the first sequence could never complete in less
9088      than 6 cycles, whereas the ldm sequence would only take 5 and
9089      would make better use of sequential accesses if not hitting the
9090      cache.
9091 
9092      We cheat here and test 'arm_ld_sched' which we currently know to
9093      only be true for the ARM8, ARM9 and StrongARM.  If this ever
9094      changes, then the test below needs to be reworked.  */
9095   if (nops == 2 && arm_ld_sched)
9096     return 0;
9097 
9098   /* Can't do it without setting up the offset, only do this if it takes
9099      no more than one insn.  */
9100   return (const_ok_for_arm (unsorted_offsets[order[0]])
9101 	  || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
9102 }
9103 
9104 const char *
9105 emit_ldm_seq (rtx *operands, int nops)
9106 {
9107   int regs[4];
9108   int base_reg;
9109   HOST_WIDE_INT offset;
9110   char buf[100];
9111   int i;
9112 
9113   switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9114     {
9115     case 1:
9116       strcpy (buf, "ldm%(ia%)\t");
9117       break;
9118 
9119     case 2:
9120       strcpy (buf, "ldm%(ib%)\t");
9121       break;
9122 
9123     case 3:
9124       strcpy (buf, "ldm%(da%)\t");
9125       break;
9126 
9127     case 4:
9128       strcpy (buf, "ldm%(db%)\t");
9129       break;
9130 
9131     case 5:
9132       if (offset >= 0)
9133 	sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9134 		 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9135 		 (long) offset);
9136       else
9137 	sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9138 		 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9139 		 (long) -offset);
9140       output_asm_insn (buf, operands);
9141       base_reg = regs[0];
9142       strcpy (buf, "ldm%(ia%)\t");
9143       break;
9144 
9145     default:
9146       gcc_unreachable ();
9147     }
9148 
9149   sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9150 	   reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9151 
9152   for (i = 1; i < nops; i++)
9153     sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9154 	     reg_names[regs[i]]);
9155 
9156   strcat (buf, "}\t%@ phole ldm");
9157 
9158   output_asm_insn (buf, operands);
9159   return "";
9160 }
9161 
9162 int
9163 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9164 			 HOST_WIDE_INT * load_offset)
9165 {
9166   int unsorted_regs[4];
9167   HOST_WIDE_INT unsorted_offsets[4];
9168   int order[4];
9169   int base_reg = -1;
9170   int i;
9171 
9172   /* Can only handle 2, 3, or 4 insns at present, though could be easily
9173      extended if required.  */
9174   gcc_assert (nops >= 2 && nops <= 4);
9175 
9176   memset (order, 0, 4 * sizeof (int));
9177 
9178   /* Loop over the operands and check that the memory references are
9179      suitable (i.e. immediate offsets from the same base register).  At
9180      the same time, extract the target register, and the memory
9181      offsets.  */
9182   for (i = 0; i < nops; i++)
9183     {
9184       rtx reg;
9185       rtx offset;
9186 
9187       /* Convert a subreg of a mem into the mem itself.  */
9188       if (GET_CODE (operands[nops + i]) == SUBREG)
9189 	operands[nops + i] = alter_subreg (operands + (nops + i));
9190 
9191       gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9192 
9193       /* Don't reorder volatile memory references; it doesn't seem worth
9194 	 looking for the case where the order is ok anyway.  */
9195       if (MEM_VOLATILE_P (operands[nops + i]))
9196 	return 0;
9197 
9198       offset = const0_rtx;
9199 
9200       if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9201 	   || (GET_CODE (reg) == SUBREG
9202 	       && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9203 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9204 	      && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9205 		   == REG)
9206 		  || (GET_CODE (reg) == SUBREG
9207 		      && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9208 	      && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9209 		  == CONST_INT)))
9210 	{
9211 	  if (i == 0)
9212 	    {
9213 	      base_reg = REGNO (reg);
9214 	      unsorted_regs[0] = (GET_CODE (operands[i]) == REG
9215 				  ? REGNO (operands[i])
9216 				  : REGNO (SUBREG_REG (operands[i])));
9217 	      order[0] = 0;
9218 	    }
9219 	  else
9220 	    {
9221 	      if (base_reg != (int) REGNO (reg))
9222 		/* Not addressed from the same base register.  */
9223 		return 0;
9224 
9225 	      unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9226 				  ? REGNO (operands[i])
9227 				  : REGNO (SUBREG_REG (operands[i])));
9228 	      if (unsorted_regs[i] < unsorted_regs[order[0]])
9229 		order[0] = i;
9230 	    }
9231 
9232 	  /* If it isn't an integer register, then we can't do this.  */
9233 	  if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9234 	    return 0;
9235 
9236 	  unsorted_offsets[i] = INTVAL (offset);
9237 	}
9238       else
9239 	/* Not a suitable memory address.  */
9240 	return 0;
9241     }
9242 
9243   /* All the useful information has now been extracted from the
9244      operands into unsorted_regs and unsorted_offsets; additionally,
9245      order[0] has been set to the lowest numbered register in the
9246      list.  Sort the registers into order, and check that the memory
9247      offsets are ascending and adjacent.  */
9248 
9249   for (i = 1; i < nops; i++)
9250     {
9251       int j;
9252 
9253       order[i] = order[i - 1];
9254       for (j = 0; j < nops; j++)
9255 	if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9256 	    && (order[i] == order[i - 1]
9257 		|| unsorted_regs[j] < unsorted_regs[order[i]]))
9258 	  order[i] = j;
9259 
9260       /* Have we found a suitable register? if not, one must be used more
9261 	 than once.  */
9262       if (order[i] == order[i - 1])
9263 	return 0;
9264 
9265       /* Is the memory address adjacent and ascending? */
9266       if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9267 	return 0;
9268     }
9269 
9270   if (base)
9271     {
9272       *base = base_reg;
9273 
9274       for (i = 0; i < nops; i++)
9275 	regs[i] = unsorted_regs[order[i]];
9276 
9277       *load_offset = unsorted_offsets[order[0]];
9278     }
9279 
9280   if (unsorted_offsets[order[0]] == 0)
9281     return 1; /* stmia */
9282 
9283   if (unsorted_offsets[order[0]] == 4)
9284     return 2; /* stmib */
9285 
9286   if (unsorted_offsets[order[nops - 1]] == 0)
9287     return 3; /* stmda */
9288 
9289   if (unsorted_offsets[order[nops - 1]] == -4)
9290     return 4; /* stmdb */
9291 
9292   return 0;
9293 }
9294 
9295 const char *
9296 emit_stm_seq (rtx *operands, int nops)
9297 {
9298   int regs[4];
9299   int base_reg;
9300   HOST_WIDE_INT offset;
9301   char buf[100];
9302   int i;
9303 
9304   switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9305     {
9306     case 1:
9307       strcpy (buf, "stm%(ia%)\t");
9308       break;
9309 
9310     case 2:
9311       strcpy (buf, "stm%(ib%)\t");
9312       break;
9313 
9314     case 3:
9315       strcpy (buf, "stm%(da%)\t");
9316       break;
9317 
9318     case 4:
9319       strcpy (buf, "stm%(db%)\t");
9320       break;
9321 
9322     default:
9323       gcc_unreachable ();
9324     }
9325 
9326   sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9327 	   reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9328 
9329   for (i = 1; i < nops; i++)
9330     sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9331 	     reg_names[regs[i]]);
9332 
9333   strcat (buf, "}\t%@ phole stm");
9334 
9335   output_asm_insn (buf, operands);
9336   return "";
9337 }
9338 
9339 /* Routines for use in generating RTL.  */
9340 
9341 rtx
9342 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9343 		       int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9344 {
9345   HOST_WIDE_INT offset = *offsetp;
9346   int i = 0, j;
9347   rtx result;
9348   int sign = up ? 1 : -1;
9349   rtx mem, addr;
9350 
9351   /* XScale has load-store double instructions, but they have stricter
9352      alignment requirements than load-store multiple, so we cannot
9353      use them.
9354 
9355      For XScale ldm requires 2 + NREGS cycles to complete and blocks
9356      the pipeline until completion.
9357 
9358 	NREGS		CYCLES
9359 	  1		  3
9360 	  2		  4
9361 	  3		  5
9362 	  4		  6
9363 
9364      An ldr instruction takes 1-3 cycles, but does not block the
9365      pipeline.
9366 
9367 	NREGS		CYCLES
9368 	  1		 1-3
9369 	  2		 2-6
9370 	  3		 3-9
9371 	  4		 4-12
9372 
9373      Best case ldr will always win.  However, the more ldr instructions
9374      we issue, the less likely we are to be able to schedule them well.
9375      Using ldr instructions also increases code size.
9376 
9377      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9378      for counts of 3 or 4 regs.  */
9379   if (arm_tune_xscale && count <= 2 && ! optimize_size)
9380     {
9381       rtx seq;
9382 
9383       start_sequence ();
9384 
9385       for (i = 0; i < count; i++)
9386 	{
9387 	  addr = plus_constant (from, i * 4 * sign);
9388 	  mem = adjust_automodify_address (basemem, SImode, addr, offset);
9389 	  emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9390 	  offset += 4 * sign;
9391 	}
9392 
9393       if (write_back)
9394 	{
9395 	  emit_move_insn (from, plus_constant (from, count * 4 * sign));
9396 	  *offsetp = offset;
9397 	}
9398 
9399       seq = get_insns ();
9400       end_sequence ();
9401 
9402       return seq;
9403     }
9404 
9405   result = gen_rtx_PARALLEL (VOIDmode,
9406 			     rtvec_alloc (count + (write_back ? 1 : 0)));
9407   if (write_back)
9408     {
9409       XVECEXP (result, 0, 0)
9410 	= gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9411       i = 1;
9412       count++;
9413     }
9414 
9415   for (j = 0; i < count; i++, j++)
9416     {
9417       addr = plus_constant (from, j * 4 * sign);
9418       mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9419       XVECEXP (result, 0, i)
9420 	= gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9421       offset += 4 * sign;
9422     }
9423 
9424   if (write_back)
9425     *offsetp = offset;
9426 
9427   return result;
9428 }
9429 
9430 rtx
9431 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9432 			int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9433 {
9434   HOST_WIDE_INT offset = *offsetp;
9435   int i = 0, j;
9436   rtx result;
9437   int sign = up ? 1 : -1;
9438   rtx mem, addr;
9439 
9440   /* See arm_gen_load_multiple for discussion of
9441      the pros/cons of ldm/stm usage for XScale.  */
9442   if (arm_tune_xscale && count <= 2 && ! optimize_size)
9443     {
9444       rtx seq;
9445 
9446       start_sequence ();
9447 
9448       for (i = 0; i < count; i++)
9449 	{
9450 	  addr = plus_constant (to, i * 4 * sign);
9451 	  mem = adjust_automodify_address (basemem, SImode, addr, offset);
9452 	  emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9453 	  offset += 4 * sign;
9454 	}
9455 
9456       if (write_back)
9457 	{
9458 	  emit_move_insn (to, plus_constant (to, count * 4 * sign));
9459 	  *offsetp = offset;
9460 	}
9461 
9462       seq = get_insns ();
9463       end_sequence ();
9464 
9465       return seq;
9466     }
9467 
9468   result = gen_rtx_PARALLEL (VOIDmode,
9469 			     rtvec_alloc (count + (write_back ? 1 : 0)));
9470   if (write_back)
9471     {
9472       XVECEXP (result, 0, 0)
9473 	= gen_rtx_SET (VOIDmode, to,
9474 		       plus_constant (to, count * 4 * sign));
9475       i = 1;
9476       count++;
9477     }
9478 
9479   for (j = 0; i < count; i++, j++)
9480     {
9481       addr = plus_constant (to, j * 4 * sign);
9482       mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9483       XVECEXP (result, 0, i)
9484 	= gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9485       offset += 4 * sign;
9486     }
9487 
9488   if (write_back)
9489     *offsetp = offset;
9490 
9491   return result;
9492 }
9493 
9494 int
9495 arm_gen_movmemqi (rtx *operands)
9496 {
9497   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9498   HOST_WIDE_INT srcoffset, dstoffset;
9499   int i;
9500   rtx src, dst, srcbase, dstbase;
9501   rtx part_bytes_reg = NULL;
9502   rtx mem;
9503 
9504   if (GET_CODE (operands[2]) != CONST_INT
9505       || GET_CODE (operands[3]) != CONST_INT
9506       || INTVAL (operands[2]) > 64
9507       || INTVAL (operands[3]) & 3)
9508     return 0;
9509 
9510   dstbase = operands[0];
9511   srcbase = operands[1];
9512 
9513   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9514   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9515 
9516   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9517   out_words_to_go = INTVAL (operands[2]) / 4;
9518   last_bytes = INTVAL (operands[2]) & 3;
9519   dstoffset = srcoffset = 0;
9520 
9521   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9522     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9523 
9524   for (i = 0; in_words_to_go >= 2; i+=4)
9525     {
9526       if (in_words_to_go > 4)
9527 	emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9528 					  srcbase, &srcoffset));
9529       else
9530 	emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9531 					  FALSE, srcbase, &srcoffset));
9532 
9533       if (out_words_to_go)
9534 	{
9535 	  if (out_words_to_go > 4)
9536 	    emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9537 					       dstbase, &dstoffset));
9538 	  else if (out_words_to_go != 1)
9539 	    emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9540 					       dst, TRUE,
9541 					       (last_bytes == 0
9542 						? FALSE : TRUE),
9543 					       dstbase, &dstoffset));
9544 	  else
9545 	    {
9546 	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9547 	      emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9548 	      if (last_bytes != 0)
9549 		{
9550 		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9551 		  dstoffset += 4;
9552 		}
9553 	    }
9554 	}
9555 
9556       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9557       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9558     }
9559 
9560   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
9561   if (out_words_to_go)
9562     {
9563       rtx sreg;
9564 
9565       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9566       sreg = copy_to_reg (mem);
9567 
9568       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9569       emit_move_insn (mem, sreg);
9570       in_words_to_go--;
9571 
9572       gcc_assert (!in_words_to_go);	/* Sanity check */
9573     }
9574 
9575   if (in_words_to_go)
9576     {
9577       gcc_assert (in_words_to_go > 0);
9578 
9579       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9580       part_bytes_reg = copy_to_mode_reg (SImode, mem);
9581     }
9582 
9583   gcc_assert (!last_bytes || part_bytes_reg);
9584 
9585   if (BYTES_BIG_ENDIAN && last_bytes)
9586     {
9587       rtx tmp = gen_reg_rtx (SImode);
9588 
9589       /* The bytes we want are in the top end of the word.  */
9590       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9591 			      GEN_INT (8 * (4 - last_bytes))));
9592       part_bytes_reg = tmp;
9593 
9594       while (last_bytes)
9595 	{
9596 	  mem = adjust_automodify_address (dstbase, QImode,
9597 					   plus_constant (dst, last_bytes - 1),
9598 					   dstoffset + last_bytes - 1);
9599 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9600 
9601 	  if (--last_bytes)
9602 	    {
9603 	      tmp = gen_reg_rtx (SImode);
9604 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9605 	      part_bytes_reg = tmp;
9606 	    }
9607 	}
9608 
9609     }
9610   else
9611     {
9612       if (last_bytes > 1)
9613 	{
9614 	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9615 	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9616 	  last_bytes -= 2;
9617 	  if (last_bytes)
9618 	    {
9619 	      rtx tmp = gen_reg_rtx (SImode);
9620 	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9621 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9622 	      part_bytes_reg = tmp;
9623 	      dstoffset += 2;
9624 	    }
9625 	}
9626 
9627       if (last_bytes)
9628 	{
9629 	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9630 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9631 	}
9632     }
9633 
9634   return 1;
9635 }
9636 
9637 /* Select a dominance comparison mode if possible for a test of the general
9638    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
9639    COND_OR == DOM_CC_X_AND_Y => (X && Y)
9640    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9641    COND_OR == DOM_CC_X_OR_Y => (X || Y)
9642    In all cases OP will be either EQ or NE, but we don't need to know which
9643    here.  If we are unable to support a dominance comparison we return
9644    CC mode.  This will then fail to match for the RTL expressions that
9645    generate this call.  */
9646 enum machine_mode
9647 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9648 {
9649   enum rtx_code cond1, cond2;
9650   int swapped = 0;
9651 
9652   /* Currently we will probably get the wrong result if the individual
9653      comparisons are not simple.  This also ensures that it is safe to
9654      reverse a comparison if necessary.  */
9655   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9656        != CCmode)
9657       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9658 	  != CCmode))
9659     return CCmode;
9660 
9661   /* The if_then_else variant of this tests the second condition if the
9662      first passes, but is true if the first fails.  Reverse the first
9663      condition to get a true "inclusive-or" expression.  */
9664   if (cond_or == DOM_CC_NX_OR_Y)
9665     cond1 = reverse_condition (cond1);
9666 
9667   /* If the comparisons are not equal, and one doesn't dominate the other,
9668      then we can't do this.  */
9669   if (cond1 != cond2
9670       && !comparison_dominates_p (cond1, cond2)
9671       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9672     return CCmode;
9673 
9674   if (swapped)
9675     {
9676       enum rtx_code temp = cond1;
9677       cond1 = cond2;
9678       cond2 = temp;
9679     }
9680 
9681   switch (cond1)
9682     {
9683     case EQ:
9684       if (cond_or == DOM_CC_X_AND_Y)
9685 	return CC_DEQmode;
9686 
9687       switch (cond2)
9688 	{
9689 	case EQ: return CC_DEQmode;
9690 	case LE: return CC_DLEmode;
9691 	case LEU: return CC_DLEUmode;
9692 	case GE: return CC_DGEmode;
9693 	case GEU: return CC_DGEUmode;
9694 	default: gcc_unreachable ();
9695 	}
9696 
9697     case LT:
9698       if (cond_or == DOM_CC_X_AND_Y)
9699 	return CC_DLTmode;
9700 
9701       switch (cond2)
9702 	{
9703 	case  LT:
9704 	    return CC_DLTmode;
9705 	case LE:
9706 	  return CC_DLEmode;
9707 	case NE:
9708 	  return CC_DNEmode;
9709 	default:
9710 	  gcc_unreachable ();
9711 	}
9712 
9713     case GT:
9714       if (cond_or == DOM_CC_X_AND_Y)
9715 	return CC_DGTmode;
9716 
9717       switch (cond2)
9718 	{
9719 	case GT:
9720 	  return CC_DGTmode;
9721 	case GE:
9722 	  return CC_DGEmode;
9723 	case NE:
9724 	  return CC_DNEmode;
9725 	default:
9726 	  gcc_unreachable ();
9727 	}
9728 
9729     case LTU:
9730       if (cond_or == DOM_CC_X_AND_Y)
9731 	return CC_DLTUmode;
9732 
9733       switch (cond2)
9734 	{
9735 	case LTU:
9736 	  return CC_DLTUmode;
9737 	case LEU:
9738 	  return CC_DLEUmode;
9739 	case NE:
9740 	  return CC_DNEmode;
9741 	default:
9742 	  gcc_unreachable ();
9743 	}
9744 
9745     case GTU:
9746       if (cond_or == DOM_CC_X_AND_Y)
9747 	return CC_DGTUmode;
9748 
9749       switch (cond2)
9750 	{
9751 	case GTU:
9752 	  return CC_DGTUmode;
9753 	case GEU:
9754 	  return CC_DGEUmode;
9755 	case NE:
9756 	  return CC_DNEmode;
9757 	default:
9758 	  gcc_unreachable ();
9759 	}
9760 
9761     /* The remaining cases only occur when both comparisons are the
9762        same.  */
9763     case NE:
9764       gcc_assert (cond1 == cond2);
9765       return CC_DNEmode;
9766 
9767     case LE:
9768       gcc_assert (cond1 == cond2);
9769       return CC_DLEmode;
9770 
9771     case GE:
9772       gcc_assert (cond1 == cond2);
9773       return CC_DGEmode;
9774 
9775     case LEU:
9776       gcc_assert (cond1 == cond2);
9777       return CC_DLEUmode;
9778 
9779     case GEU:
9780       gcc_assert (cond1 == cond2);
9781       return CC_DGEUmode;
9782 
9783     default:
9784       gcc_unreachable ();
9785     }
9786 }
9787 
9788 enum machine_mode
9789 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9790 {
9791   /* All floating point compares return CCFP if it is an equality
9792      comparison, and CCFPE otherwise.  */
9793   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9794     {
9795       switch (op)
9796 	{
9797 	case EQ:
9798 	case NE:
9799 	case UNORDERED:
9800 	case ORDERED:
9801 	case UNLT:
9802 	case UNLE:
9803 	case UNGT:
9804 	case UNGE:
9805 	case UNEQ:
9806 	case LTGT:
9807 	  return CCFPmode;
9808 
9809 	case LT:
9810 	case LE:
9811 	case GT:
9812 	case GE:
9813 	  if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9814 	    return CCFPmode;
9815 	  return CCFPEmode;
9816 
9817 	default:
9818 	  gcc_unreachable ();
9819 	}
9820     }
9821 
9822   /* A compare with a shifted operand.  Because of canonicalization, the
9823      comparison will have to be swapped when we emit the assembler.  */
9824   if (GET_MODE (y) == SImode
9825       && (REG_P (y) || (GET_CODE (y) == SUBREG))
9826       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9827 	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9828 	  || GET_CODE (x) == ROTATERT))
9829     return CC_SWPmode;
9830 
9831   /* This operation is performed swapped, but since we only rely on the Z
9832      flag we don't need an additional mode.  */
9833   if (GET_MODE (y) == SImode
9834       && (REG_P (y) || (GET_CODE (y) == SUBREG))
9835       && GET_CODE (x) == NEG
9836       && (op ==	EQ || op == NE))
9837     return CC_Zmode;
9838 
9839   /* This is a special case that is used by combine to allow a
9840      comparison of a shifted byte load to be split into a zero-extend
9841      followed by a comparison of the shifted integer (only valid for
9842      equalities and unsigned inequalities).  */
9843   if (GET_MODE (x) == SImode
9844       && GET_CODE (x) == ASHIFT
9845       && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9846       && GET_CODE (XEXP (x, 0)) == SUBREG
9847       && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9848       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9849       && (op == EQ || op == NE
9850 	  || op == GEU || op == GTU || op == LTU || op == LEU)
9851       && GET_CODE (y) == CONST_INT)
9852     return CC_Zmode;
9853 
9854   /* A construct for a conditional compare, if the false arm contains
9855      0, then both conditions must be true, otherwise either condition
9856      must be true.  Not all conditions are possible, so CCmode is
9857      returned if it can't be done.  */
9858   if (GET_CODE (x) == IF_THEN_ELSE
9859       && (XEXP (x, 2) == const0_rtx
9860 	  || XEXP (x, 2) == const1_rtx)
9861       && COMPARISON_P (XEXP (x, 0))
9862       && COMPARISON_P (XEXP (x, 1)))
9863     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9864 					 INTVAL (XEXP (x, 2)));
9865 
9866   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
9867   if (GET_CODE (x) == AND
9868       && COMPARISON_P (XEXP (x, 0))
9869       && COMPARISON_P (XEXP (x, 1)))
9870     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9871 					 DOM_CC_X_AND_Y);
9872 
9873   if (GET_CODE (x) == IOR
9874       && COMPARISON_P (XEXP (x, 0))
9875       && COMPARISON_P (XEXP (x, 1)))
9876     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
9877 					 DOM_CC_X_OR_Y);
9878 
9879   /* An operation (on Thumb) where we want to test for a single bit.
9880      This is done by shifting that bit up into the top bit of a
9881      scratch register; we can then branch on the sign bit.  */
9882   if (TARGET_THUMB1
9883       && GET_MODE (x) == SImode
9884       && (op == EQ || op == NE)
9885       && GET_CODE (x) == ZERO_EXTRACT
9886       && XEXP (x, 1) == const1_rtx)
9887     return CC_Nmode;
9888 
9889   /* An operation that sets the condition codes as a side-effect, the
9890      V flag is not set correctly, so we can only use comparisons where
9891      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
9892      instead.)  */
9893   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
9894   if (GET_MODE (x) == SImode
9895       && y == const0_rtx
9896       && (op == EQ || op == NE || op == LT || op == GE)
9897       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
9898 	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
9899 	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
9900 	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
9901 	  || GET_CODE (x) == LSHIFTRT
9902 	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9903 	  || GET_CODE (x) == ROTATERT
9904 	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
9905     return CC_NOOVmode;
9906 
9907   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
9908     return CC_Zmode;
9909 
9910   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
9911       && GET_CODE (x) == PLUS
9912       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
9913     return CC_Cmode;
9914 
9915   return CCmode;
9916 }
9917 
9918 /* X and Y are two things to compare using CODE.  Emit the compare insn and
9919    return the rtx for register 0 in the proper mode.  FP means this is a
9920    floating point compare: I don't think that it is needed on the arm.  */
9921 rtx
9922 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
9923 {
9924   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
9925   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
9926 
9927   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
9928 
9929   return cc_reg;
9930 }
9931 
9932 /* Generate a sequence of insns that will generate the correct return
9933    address mask depending on the physical architecture that the program
9934    is running on.  */
9935 rtx
9936 arm_gen_return_addr_mask (void)
9937 {
9938   rtx reg = gen_reg_rtx (Pmode);
9939 
9940   emit_insn (gen_return_addr_mask (reg));
9941   return reg;
9942 }
9943 
9944 void
9945 arm_reload_in_hi (rtx *operands)
9946 {
9947   rtx ref = operands[1];
9948   rtx base, scratch;
9949   HOST_WIDE_INT offset = 0;
9950 
9951   if (GET_CODE (ref) == SUBREG)
9952     {
9953       offset = SUBREG_BYTE (ref);
9954       ref = SUBREG_REG (ref);
9955     }
9956 
9957   if (GET_CODE (ref) == REG)
9958     {
9959       /* We have a pseudo which has been spilt onto the stack; there
9960 	 are two cases here: the first where there is a simple
9961 	 stack-slot replacement and a second where the stack-slot is
9962 	 out of range, or is used as a subreg.  */
9963       if (reg_equiv_mem[REGNO (ref)])
9964 	{
9965 	  ref = reg_equiv_mem[REGNO (ref)];
9966 	  base = find_replacement (&XEXP (ref, 0));
9967 	}
9968       else
9969 	/* The slot is out of range, or was dressed up in a SUBREG.  */
9970 	base = reg_equiv_address[REGNO (ref)];
9971     }
9972   else
9973     base = find_replacement (&XEXP (ref, 0));
9974 
9975   /* Handle the case where the address is too complex to be offset by 1.  */
9976   if (GET_CODE (base) == MINUS
9977       || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
9978     {
9979       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
9980 
9981       emit_set_insn (base_plus, base);
9982       base = base_plus;
9983     }
9984   else if (GET_CODE (base) == PLUS)
9985     {
9986       /* The addend must be CONST_INT, or we would have dealt with it above.  */
9987       HOST_WIDE_INT hi, lo;
9988 
9989       offset += INTVAL (XEXP (base, 1));
9990       base = XEXP (base, 0);
9991 
9992       /* Rework the address into a legal sequence of insns.  */
9993       /* Valid range for lo is -4095 -> 4095 */
9994       lo = (offset >= 0
9995 	    ? (offset & 0xfff)
9996 	    : -((-offset) & 0xfff));
9997 
9998       /* Corner case, if lo is the max offset then we would be out of range
9999 	 once we have added the additional 1 below, so bump the msb into the
10000 	 pre-loading insn(s).  */
10001       if (lo == 4095)
10002 	lo &= 0x7ff;
10003 
10004       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10005 	     ^ (HOST_WIDE_INT) 0x80000000)
10006 	    - (HOST_WIDE_INT) 0x80000000);
10007 
10008       gcc_assert (hi + lo == offset);
10009 
10010       if (hi != 0)
10011 	{
10012 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10013 
10014 	  /* Get the base address; addsi3 knows how to handle constants
10015 	     that require more than one insn.  */
10016 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10017 	  base = base_plus;
10018 	  offset = lo;
10019 	}
10020     }
10021 
10022   /* Operands[2] may overlap operands[0] (though it won't overlap
10023      operands[1]), that's why we asked for a DImode reg -- so we can
10024      use the bit that does not overlap.  */
10025   if (REGNO (operands[2]) == REGNO (operands[0]))
10026     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10027   else
10028     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10029 
10030   emit_insn (gen_zero_extendqisi2 (scratch,
10031 				   gen_rtx_MEM (QImode,
10032 						plus_constant (base,
10033 							       offset))));
10034   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10035 				   gen_rtx_MEM (QImode,
10036 						plus_constant (base,
10037 							       offset + 1))));
10038   if (!BYTES_BIG_ENDIAN)
10039     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10040 		   gen_rtx_IOR (SImode,
10041 				gen_rtx_ASHIFT
10042 				(SImode,
10043 				 gen_rtx_SUBREG (SImode, operands[0], 0),
10044 				 GEN_INT (8)),
10045 				scratch));
10046   else
10047     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10048 		   gen_rtx_IOR (SImode,
10049 				gen_rtx_ASHIFT (SImode, scratch,
10050 						GEN_INT (8)),
10051 				gen_rtx_SUBREG (SImode, operands[0], 0)));
10052 }
10053 
10054 /* Handle storing a half-word to memory during reload by synthesizing as two
10055    byte stores.  Take care not to clobber the input values until after we
10056    have moved them somewhere safe.  This code assumes that if the DImode
10057    scratch in operands[2] overlaps either the input value or output address
10058    in some way, then that value must die in this insn (we absolutely need
10059    two scratch registers for some corner cases).  */
10060 void
10061 arm_reload_out_hi (rtx *operands)
10062 {
10063   rtx ref = operands[0];
10064   rtx outval = operands[1];
10065   rtx base, scratch;
10066   HOST_WIDE_INT offset = 0;
10067 
10068   if (GET_CODE (ref) == SUBREG)
10069     {
10070       offset = SUBREG_BYTE (ref);
10071       ref = SUBREG_REG (ref);
10072     }
10073 
10074   if (GET_CODE (ref) == REG)
10075     {
10076       /* We have a pseudo which has been spilt onto the stack; there
10077 	 are two cases here: the first where there is a simple
10078 	 stack-slot replacement and a second where the stack-slot is
10079 	 out of range, or is used as a subreg.  */
10080       if (reg_equiv_mem[REGNO (ref)])
10081 	{
10082 	  ref = reg_equiv_mem[REGNO (ref)];
10083 	  base = find_replacement (&XEXP (ref, 0));
10084 	}
10085       else
10086 	/* The slot is out of range, or was dressed up in a SUBREG.  */
10087 	base = reg_equiv_address[REGNO (ref)];
10088     }
10089   else
10090     base = find_replacement (&XEXP (ref, 0));
10091 
10092   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10093 
10094   /* Handle the case where the address is too complex to be offset by 1.  */
10095   if (GET_CODE (base) == MINUS
10096       || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10097     {
10098       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10099 
10100       /* Be careful not to destroy OUTVAL.  */
10101       if (reg_overlap_mentioned_p (base_plus, outval))
10102 	{
10103 	  /* Updating base_plus might destroy outval, see if we can
10104 	     swap the scratch and base_plus.  */
10105 	  if (!reg_overlap_mentioned_p (scratch, outval))
10106 	    {
10107 	      rtx tmp = scratch;
10108 	      scratch = base_plus;
10109 	      base_plus = tmp;
10110 	    }
10111 	  else
10112 	    {
10113 	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10114 
10115 	      /* Be conservative and copy OUTVAL into the scratch now,
10116 		 this should only be necessary if outval is a subreg
10117 		 of something larger than a word.  */
10118 	      /* XXX Might this clobber base?  I can't see how it can,
10119 		 since scratch is known to overlap with OUTVAL, and
10120 		 must be wider than a word.  */
10121 	      emit_insn (gen_movhi (scratch_hi, outval));
10122 	      outval = scratch_hi;
10123 	    }
10124 	}
10125 
10126       emit_set_insn (base_plus, base);
10127       base = base_plus;
10128     }
10129   else if (GET_CODE (base) == PLUS)
10130     {
10131       /* The addend must be CONST_INT, or we would have dealt with it above.  */
10132       HOST_WIDE_INT hi, lo;
10133 
10134       offset += INTVAL (XEXP (base, 1));
10135       base = XEXP (base, 0);
10136 
10137       /* Rework the address into a legal sequence of insns.  */
10138       /* Valid range for lo is -4095 -> 4095 */
10139       lo = (offset >= 0
10140 	    ? (offset & 0xfff)
10141 	    : -((-offset) & 0xfff));
10142 
10143       /* Corner case, if lo is the max offset then we would be out of range
10144 	 once we have added the additional 1 below, so bump the msb into the
10145 	 pre-loading insn(s).  */
10146       if (lo == 4095)
10147 	lo &= 0x7ff;
10148 
10149       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10150 	     ^ (HOST_WIDE_INT) 0x80000000)
10151 	    - (HOST_WIDE_INT) 0x80000000);
10152 
10153       gcc_assert (hi + lo == offset);
10154 
10155       if (hi != 0)
10156 	{
10157 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10158 
10159 	  /* Be careful not to destroy OUTVAL.  */
10160 	  if (reg_overlap_mentioned_p (base_plus, outval))
10161 	    {
10162 	      /* Updating base_plus might destroy outval, see if we
10163 		 can swap the scratch and base_plus.  */
10164 	      if (!reg_overlap_mentioned_p (scratch, outval))
10165 		{
10166 		  rtx tmp = scratch;
10167 		  scratch = base_plus;
10168 		  base_plus = tmp;
10169 		}
10170 	      else
10171 		{
10172 		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10173 
10174 		  /* Be conservative and copy outval into scratch now,
10175 		     this should only be necessary if outval is a
10176 		     subreg of something larger than a word.  */
10177 		  /* XXX Might this clobber base?  I can't see how it
10178 		     can, since scratch is known to overlap with
10179 		     outval.  */
10180 		  emit_insn (gen_movhi (scratch_hi, outval));
10181 		  outval = scratch_hi;
10182 		}
10183 	    }
10184 
10185 	  /* Get the base address; addsi3 knows how to handle constants
10186 	     that require more than one insn.  */
10187 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10188 	  base = base_plus;
10189 	  offset = lo;
10190 	}
10191     }
10192 
10193   if (BYTES_BIG_ENDIAN)
10194     {
10195       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10196 					 plus_constant (base, offset + 1)),
10197 			    gen_lowpart (QImode, outval)));
10198       emit_insn (gen_lshrsi3 (scratch,
10199 			      gen_rtx_SUBREG (SImode, outval, 0),
10200 			      GEN_INT (8)));
10201       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10202 			    gen_lowpart (QImode, scratch)));
10203     }
10204   else
10205     {
10206       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10207 			    gen_lowpart (QImode, outval)));
10208       emit_insn (gen_lshrsi3 (scratch,
10209 			      gen_rtx_SUBREG (SImode, outval, 0),
10210 			      GEN_INT (8)));
10211       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10212 					 plus_constant (base, offset + 1)),
10213 			    gen_lowpart (QImode, scratch)));
10214     }
10215 }
10216 
10217 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10218    (padded to the size of a word) should be passed in a register.  */
10219 
10220 static bool
10221 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10222 {
10223   if (TARGET_AAPCS_BASED)
10224     return must_pass_in_stack_var_size (mode, type);
10225   else
10226     return must_pass_in_stack_var_size_or_pad (mode, type);
10227 }
10228 
10229 
10230 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10231    Return true if an argument passed on the stack should be padded upwards,
10232    i.e. if the least-significant byte has useful data.
10233    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
10234    aggregate types are placed in the lowest memory address.  */
10235 
10236 bool
10237 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10238 {
10239   if (!TARGET_AAPCS_BASED)
10240     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10241 
10242   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10243     return false;
10244 
10245   return true;
10246 }
10247 
10248 
10249 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10250    For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10251    byte of the register has useful data, and return the opposite if the
10252    most significant byte does.
10253    For AAPCS, small aggregates and small complex types are always padded
10254    upwards.  */
10255 
10256 bool
10257 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10258                     tree type, int first ATTRIBUTE_UNUSED)
10259 {
10260   if (TARGET_AAPCS_BASED
10261       && BYTES_BIG_ENDIAN
10262       && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10263       && int_size_in_bytes (type) <= 4)
10264     return true;
10265 
10266   /* Otherwise, use default padding.  */
10267   return !BYTES_BIG_ENDIAN;
10268 }
10269 
10270 
10271 /* Print a symbolic form of X to the debug file, F.  */
10272 static void
10273 arm_print_value (FILE *f, rtx x)
10274 {
10275   switch (GET_CODE (x))
10276     {
10277     case CONST_INT:
10278       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10279       return;
10280 
10281     case CONST_DOUBLE:
10282       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10283       return;
10284 
10285     case CONST_VECTOR:
10286       {
10287 	int i;
10288 
10289 	fprintf (f, "<");
10290 	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10291 	  {
10292 	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10293 	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
10294 	      fputc (',', f);
10295 	  }
10296 	fprintf (f, ">");
10297       }
10298       return;
10299 
10300     case CONST_STRING:
10301       fprintf (f, "\"%s\"", XSTR (x, 0));
10302       return;
10303 
10304     case SYMBOL_REF:
10305       fprintf (f, "`%s'", XSTR (x, 0));
10306       return;
10307 
10308     case LABEL_REF:
10309       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10310       return;
10311 
10312     case CONST:
10313       arm_print_value (f, XEXP (x, 0));
10314       return;
10315 
10316     case PLUS:
10317       arm_print_value (f, XEXP (x, 0));
10318       fprintf (f, "+");
10319       arm_print_value (f, XEXP (x, 1));
10320       return;
10321 
10322     case PC:
10323       fprintf (f, "pc");
10324       return;
10325 
10326     default:
10327       fprintf (f, "????");
10328       return;
10329     }
10330 }
10331 
10332 /* Routines for manipulation of the constant pool.  */
10333 
10334 /* Arm instructions cannot load a large constant directly into a
10335    register; they have to come from a pc relative load.  The constant
10336    must therefore be placed in the addressable range of the pc
10337    relative load.  Depending on the precise pc relative load
10338    instruction the range is somewhere between 256 bytes and 4k.  This
10339    means that we often have to dump a constant inside a function, and
10340    generate code to branch around it.
10341 
10342    It is important to minimize this, since the branches will slow
10343    things down and make the code larger.
10344 
10345    Normally we can hide the table after an existing unconditional
10346    branch so that there is no interruption of the flow, but in the
10347    worst case the code looks like this:
10348 
10349 	ldr	rn, L1
10350 	...
10351 	b	L2
10352 	align
10353 	L1:	.long value
10354 	L2:
10355 	...
10356 
10357 	ldr	rn, L3
10358 	...
10359 	b	L4
10360 	align
10361 	L3:	.long value
10362 	L4:
10363 	...
10364 
10365    We fix this by performing a scan after scheduling, which notices
10366    which instructions need to have their operands fetched from the
10367    constant table and builds the table.
10368 
10369    The algorithm starts by building a table of all the constants that
10370    need fixing up and all the natural barriers in the function (places
10371    where a constant table can be dropped without breaking the flow).
10372    For each fixup we note how far the pc-relative replacement will be
10373    able to reach and the offset of the instruction into the function.
10374 
10375    Having built the table we then group the fixes together to form
10376    tables that are as large as possible (subject to addressing
10377    constraints) and emit each table of constants after the last
10378    barrier that is within range of all the instructions in the group.
10379    If a group does not contain a barrier, then we forcibly create one
10380    by inserting a jump instruction into the flow.  Once the table has
10381    been inserted, the insns are then modified to reference the
10382    relevant entry in the pool.
10383 
10384    Possible enhancements to the algorithm (not implemented) are:
10385 
10386    1) For some processors and object formats, there may be benefit in
10387    aligning the pools to the start of cache lines; this alignment
10388    would need to be taken into account when calculating addressability
10389    of a pool.  */
10390 
10391 /* These typedefs are located at the start of this file, so that
10392    they can be used in the prototypes there.  This comment is to
10393    remind readers of that fact so that the following structures
10394    can be understood more easily.
10395 
10396      typedef struct minipool_node    Mnode;
10397      typedef struct minipool_fixup   Mfix;  */
10398 
10399 struct minipool_node
10400 {
10401   /* Doubly linked chain of entries.  */
10402   Mnode * next;
10403   Mnode * prev;
10404   /* The maximum offset into the code that this entry can be placed.  While
10405      pushing fixes for forward references, all entries are sorted in order
10406      of increasing max_address.  */
10407   HOST_WIDE_INT max_address;
10408   /* Similarly for an entry inserted for a backwards ref.  */
10409   HOST_WIDE_INT min_address;
10410   /* The number of fixes referencing this entry.  This can become zero
10411      if we "unpush" an entry.  In this case we ignore the entry when we
10412      come to emit the code.  */
10413   int refcount;
10414   /* The offset from the start of the minipool.  */
10415   HOST_WIDE_INT offset;
10416   /* The value in table.  */
10417   rtx value;
10418   /* The mode of value.  */
10419   enum machine_mode mode;
10420   /* The size of the value.  With iWMMXt enabled
10421      sizes > 4 also imply an alignment of 8-bytes.  */
10422   int fix_size;
10423 };
10424 
10425 struct minipool_fixup
10426 {
10427   Mfix *            next;
10428   rtx               insn;
10429   HOST_WIDE_INT     address;
10430   rtx *             loc;
10431   enum machine_mode mode;
10432   int               fix_size;
10433   rtx               value;
10434   Mnode *           minipool;
10435   HOST_WIDE_INT     forwards;
10436   HOST_WIDE_INT     backwards;
10437 };
10438 
10439 /* Fixes less than a word need padding out to a word boundary.  */
10440 #define MINIPOOL_FIX_SIZE(mode) \
10441   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10442 
10443 static Mnode *	minipool_vector_head;
10444 static Mnode *	minipool_vector_tail;
10445 static rtx	minipool_vector_label;
10446 static int	minipool_pad;
10447 
10448 /* The linked list of all minipool fixes required for this function.  */
10449 Mfix * 		minipool_fix_head;
10450 Mfix * 		minipool_fix_tail;
10451 /* The fix entry for the current minipool, once it has been placed.  */
10452 Mfix *		minipool_barrier;
10453 
10454 /* Determines if INSN is the start of a jump table.  Returns the end
10455    of the TABLE or NULL_RTX.  */
10456 static rtx
10457 is_jump_table (rtx insn)
10458 {
10459   rtx table;
10460 
10461   if (GET_CODE (insn) == JUMP_INSN
10462       && JUMP_LABEL (insn) != NULL
10463       && ((table = next_real_insn (JUMP_LABEL (insn)))
10464 	  == next_real_insn (insn))
10465       && table != NULL
10466       && GET_CODE (table) == JUMP_INSN
10467       && (GET_CODE (PATTERN (table)) == ADDR_VEC
10468 	  || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10469     return table;
10470 
10471   return NULL_RTX;
10472 }
10473 
10474 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10475 #define JUMP_TABLES_IN_TEXT_SECTION 0
10476 #endif
10477 
10478 static HOST_WIDE_INT
10479 get_jump_table_size (rtx insn)
10480 {
10481   /* ADDR_VECs only take room if read-only data does into the text
10482      section.  */
10483   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10484     {
10485       rtx body = PATTERN (insn);
10486       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10487       HOST_WIDE_INT size;
10488       HOST_WIDE_INT modesize;
10489 
10490       modesize = GET_MODE_SIZE (GET_MODE (body));
10491       size = modesize * XVECLEN (body, elt);
10492       switch (modesize)
10493 	{
10494 	case 1:
10495 	  /* Round up size  of TBB table to a halfword boundary.  */
10496 	  size = (size + 1) & ~(HOST_WIDE_INT)1;
10497 	  break;
10498 	case 2:
10499 	  /* No padding necessary for TBH.  */
10500 	  break;
10501 	case 4:
10502 	  /* Add two bytes for alignment on Thumb.  */
10503 	  if (TARGET_THUMB)
10504 	    size += 2;
10505 	  break;
10506 	default:
10507 	  gcc_unreachable ();
10508 	}
10509       return size;
10510     }
10511 
10512   return 0;
10513 }
10514 
10515 /* Move a minipool fix MP from its current location to before MAX_MP.
10516    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10517    constraints may need updating.  */
10518 static Mnode *
10519 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10520 			       HOST_WIDE_INT max_address)
10521 {
10522   /* The code below assumes these are different.  */
10523   gcc_assert (mp != max_mp);
10524 
10525   if (max_mp == NULL)
10526     {
10527       if (max_address < mp->max_address)
10528 	mp->max_address = max_address;
10529     }
10530   else
10531     {
10532       if (max_address > max_mp->max_address - mp->fix_size)
10533 	mp->max_address = max_mp->max_address - mp->fix_size;
10534       else
10535 	mp->max_address = max_address;
10536 
10537       /* Unlink MP from its current position.  Since max_mp is non-null,
10538        mp->prev must be non-null.  */
10539       mp->prev->next = mp->next;
10540       if (mp->next != NULL)
10541 	mp->next->prev = mp->prev;
10542       else
10543 	minipool_vector_tail = mp->prev;
10544 
10545       /* Re-insert it before MAX_MP.  */
10546       mp->next = max_mp;
10547       mp->prev = max_mp->prev;
10548       max_mp->prev = mp;
10549 
10550       if (mp->prev != NULL)
10551 	mp->prev->next = mp;
10552       else
10553 	minipool_vector_head = mp;
10554     }
10555 
10556   /* Save the new entry.  */
10557   max_mp = mp;
10558 
10559   /* Scan over the preceding entries and adjust their addresses as
10560      required.  */
10561   while (mp->prev != NULL
10562 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10563     {
10564       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10565       mp = mp->prev;
10566     }
10567 
10568   return max_mp;
10569 }
10570 
10571 /* Add a constant to the minipool for a forward reference.  Returns the
10572    node added or NULL if the constant will not fit in this pool.  */
10573 static Mnode *
10574 add_minipool_forward_ref (Mfix *fix)
10575 {
10576   /* If set, max_mp is the first pool_entry that has a lower
10577      constraint than the one we are trying to add.  */
10578   Mnode *       max_mp = NULL;
10579   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10580   Mnode *       mp;
10581 
10582   /* If the minipool starts before the end of FIX->INSN then this FIX
10583      can not be placed into the current pool.  Furthermore, adding the
10584      new constant pool entry may cause the pool to start FIX_SIZE bytes
10585      earlier.  */
10586   if (minipool_vector_head &&
10587       (fix->address + get_attr_length (fix->insn)
10588        >= minipool_vector_head->max_address - fix->fix_size))
10589     return NULL;
10590 
10591   /* Scan the pool to see if a constant with the same value has
10592      already been added.  While we are doing this, also note the
10593      location where we must insert the constant if it doesn't already
10594      exist.  */
10595   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10596     {
10597       if (GET_CODE (fix->value) == GET_CODE (mp->value)
10598 	  && fix->mode == mp->mode
10599 	  && (GET_CODE (fix->value) != CODE_LABEL
10600 	      || (CODE_LABEL_NUMBER (fix->value)
10601 		  == CODE_LABEL_NUMBER (mp->value)))
10602 	  && rtx_equal_p (fix->value, mp->value))
10603 	{
10604 	  /* More than one fix references this entry.  */
10605 	  mp->refcount++;
10606 	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10607 	}
10608 
10609       /* Note the insertion point if necessary.  */
10610       if (max_mp == NULL
10611 	  && mp->max_address > max_address)
10612 	max_mp = mp;
10613 
10614       /* If we are inserting an 8-bytes aligned quantity and
10615 	 we have not already found an insertion point, then
10616 	 make sure that all such 8-byte aligned quantities are
10617 	 placed at the start of the pool.  */
10618       if (ARM_DOUBLEWORD_ALIGN
10619 	  && max_mp == NULL
10620 	  && fix->fix_size >= 8
10621 	  && mp->fix_size < 8)
10622 	{
10623 	  max_mp = mp;
10624 	  max_address = mp->max_address;
10625 	}
10626     }
10627 
10628   /* The value is not currently in the minipool, so we need to create
10629      a new entry for it.  If MAX_MP is NULL, the entry will be put on
10630      the end of the list since the placement is less constrained than
10631      any existing entry.  Otherwise, we insert the new fix before
10632      MAX_MP and, if necessary, adjust the constraints on the other
10633      entries.  */
10634   mp = XNEW (Mnode);
10635   mp->fix_size = fix->fix_size;
10636   mp->mode = fix->mode;
10637   mp->value = fix->value;
10638   mp->refcount = 1;
10639   /* Not yet required for a backwards ref.  */
10640   mp->min_address = -65536;
10641 
10642   if (max_mp == NULL)
10643     {
10644       mp->max_address = max_address;
10645       mp->next = NULL;
10646       mp->prev = minipool_vector_tail;
10647 
10648       if (mp->prev == NULL)
10649 	{
10650 	  minipool_vector_head = mp;
10651 	  minipool_vector_label = gen_label_rtx ();
10652 	}
10653       else
10654 	mp->prev->next = mp;
10655 
10656       minipool_vector_tail = mp;
10657     }
10658   else
10659     {
10660       if (max_address > max_mp->max_address - mp->fix_size)
10661 	mp->max_address = max_mp->max_address - mp->fix_size;
10662       else
10663 	mp->max_address = max_address;
10664 
10665       mp->next = max_mp;
10666       mp->prev = max_mp->prev;
10667       max_mp->prev = mp;
10668       if (mp->prev != NULL)
10669 	mp->prev->next = mp;
10670       else
10671 	minipool_vector_head = mp;
10672     }
10673 
10674   /* Save the new entry.  */
10675   max_mp = mp;
10676 
10677   /* Scan over the preceding entries and adjust their addresses as
10678      required.  */
10679   while (mp->prev != NULL
10680 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10681     {
10682       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10683       mp = mp->prev;
10684     }
10685 
10686   return max_mp;
10687 }
10688 
10689 static Mnode *
10690 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10691 				HOST_WIDE_INT  min_address)
10692 {
10693   HOST_WIDE_INT offset;
10694 
10695   /* The code below assumes these are different.  */
10696   gcc_assert (mp != min_mp);
10697 
10698   if (min_mp == NULL)
10699     {
10700       if (min_address > mp->min_address)
10701 	mp->min_address = min_address;
10702     }
10703   else
10704     {
10705       /* We will adjust this below if it is too loose.  */
10706       mp->min_address = min_address;
10707 
10708       /* Unlink MP from its current position.  Since min_mp is non-null,
10709 	 mp->next must be non-null.  */
10710       mp->next->prev = mp->prev;
10711       if (mp->prev != NULL)
10712 	mp->prev->next = mp->next;
10713       else
10714 	minipool_vector_head = mp->next;
10715 
10716       /* Reinsert it after MIN_MP.  */
10717       mp->prev = min_mp;
10718       mp->next = min_mp->next;
10719       min_mp->next = mp;
10720       if (mp->next != NULL)
10721 	mp->next->prev = mp;
10722       else
10723 	minipool_vector_tail = mp;
10724     }
10725 
10726   min_mp = mp;
10727 
10728   offset = 0;
10729   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10730     {
10731       mp->offset = offset;
10732       if (mp->refcount > 0)
10733 	offset += mp->fix_size;
10734 
10735       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10736 	mp->next->min_address = mp->min_address + mp->fix_size;
10737     }
10738 
10739   return min_mp;
10740 }
10741 
10742 /* Add a constant to the minipool for a backward reference.  Returns the
10743    node added or NULL if the constant will not fit in this pool.
10744 
10745    Note that the code for insertion for a backwards reference can be
10746    somewhat confusing because the calculated offsets for each fix do
10747    not take into account the size of the pool (which is still under
10748    construction.  */
10749 static Mnode *
10750 add_minipool_backward_ref (Mfix *fix)
10751 {
10752   /* If set, min_mp is the last pool_entry that has a lower constraint
10753      than the one we are trying to add.  */
10754   Mnode *min_mp = NULL;
10755   /* This can be negative, since it is only a constraint.  */
10756   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
10757   Mnode *mp;
10758 
10759   /* If we can't reach the current pool from this insn, or if we can't
10760      insert this entry at the end of the pool without pushing other
10761      fixes out of range, then we don't try.  This ensures that we
10762      can't fail later on.  */
10763   if (min_address >= minipool_barrier->address
10764       || (minipool_vector_tail->min_address + fix->fix_size
10765 	  >= minipool_barrier->address))
10766     return NULL;
10767 
10768   /* Scan the pool to see if a constant with the same value has
10769      already been added.  While we are doing this, also note the
10770      location where we must insert the constant if it doesn't already
10771      exist.  */
10772   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10773     {
10774       if (GET_CODE (fix->value) == GET_CODE (mp->value)
10775 	  && fix->mode == mp->mode
10776 	  && (GET_CODE (fix->value) != CODE_LABEL
10777 	      || (CODE_LABEL_NUMBER (fix->value)
10778 		  == CODE_LABEL_NUMBER (mp->value)))
10779 	  && rtx_equal_p (fix->value, mp->value)
10780 	  /* Check that there is enough slack to move this entry to the
10781 	     end of the table (this is conservative).  */
10782 	  && (mp->max_address
10783 	      > (minipool_barrier->address
10784 		 + minipool_vector_tail->offset
10785 		 + minipool_vector_tail->fix_size)))
10786 	{
10787 	  mp->refcount++;
10788 	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10789 	}
10790 
10791       if (min_mp != NULL)
10792 	mp->min_address += fix->fix_size;
10793       else
10794 	{
10795 	  /* Note the insertion point if necessary.  */
10796 	  if (mp->min_address < min_address)
10797 	    {
10798 	      /* For now, we do not allow the insertion of 8-byte alignment
10799 		 requiring nodes anywhere but at the start of the pool.  */
10800 	      if (ARM_DOUBLEWORD_ALIGN
10801 		  && fix->fix_size >= 8 && mp->fix_size < 8)
10802 		return NULL;
10803 	      else
10804 		min_mp = mp;
10805 	    }
10806 	  else if (mp->max_address
10807 		   < minipool_barrier->address + mp->offset + fix->fix_size)
10808 	    {
10809 	      /* Inserting before this entry would push the fix beyond
10810 		 its maximum address (which can happen if we have
10811 		 re-located a forwards fix); force the new fix to come
10812 		 after it.  */
10813 	      if (ARM_DOUBLEWORD_ALIGN
10814 		  && fix->fix_size >= 8 && mp->fix_size < 8)
10815 		return NULL;
10816 	      else
10817 		{
10818 		  min_mp = mp;
10819 		  min_address = mp->min_address + fix->fix_size;
10820 		}
10821 	    }
10822 	  /* Do not insert a non-8-byte aligned quantity before 8-byte
10823 	     aligned quantities.  */
10824 	  else if (ARM_DOUBLEWORD_ALIGN
10825 		   && fix->fix_size < 8
10826 		   && mp->fix_size >= 8)
10827 	    {
10828 	      min_mp = mp;
10829 	      min_address = mp->min_address + fix->fix_size;
10830 	    }
10831 	}
10832     }
10833 
10834   /* We need to create a new entry.  */
10835   mp = XNEW (Mnode);
10836   mp->fix_size = fix->fix_size;
10837   mp->mode = fix->mode;
10838   mp->value = fix->value;
10839   mp->refcount = 1;
10840   mp->max_address = minipool_barrier->address + 65536;
10841 
10842   mp->min_address = min_address;
10843 
10844   if (min_mp == NULL)
10845     {
10846       mp->prev = NULL;
10847       mp->next = minipool_vector_head;
10848 
10849       if (mp->next == NULL)
10850 	{
10851 	  minipool_vector_tail = mp;
10852 	  minipool_vector_label = gen_label_rtx ();
10853 	}
10854       else
10855 	mp->next->prev = mp;
10856 
10857       minipool_vector_head = mp;
10858     }
10859   else
10860     {
10861       mp->next = min_mp->next;
10862       mp->prev = min_mp;
10863       min_mp->next = mp;
10864 
10865       if (mp->next != NULL)
10866 	mp->next->prev = mp;
10867       else
10868 	minipool_vector_tail = mp;
10869     }
10870 
10871   /* Save the new entry.  */
10872   min_mp = mp;
10873 
10874   if (mp->prev)
10875     mp = mp->prev;
10876   else
10877     mp->offset = 0;
10878 
10879   /* Scan over the following entries and adjust their offsets.  */
10880   while (mp->next != NULL)
10881     {
10882       if (mp->next->min_address < mp->min_address + mp->fix_size)
10883 	mp->next->min_address = mp->min_address + mp->fix_size;
10884 
10885       if (mp->refcount)
10886 	mp->next->offset = mp->offset + mp->fix_size;
10887       else
10888 	mp->next->offset = mp->offset;
10889 
10890       mp = mp->next;
10891     }
10892 
10893   return min_mp;
10894 }
10895 
10896 static void
10897 assign_minipool_offsets (Mfix *barrier)
10898 {
10899   HOST_WIDE_INT offset = 0;
10900   Mnode *mp;
10901 
10902   minipool_barrier = barrier;
10903 
10904   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10905     {
10906       mp->offset = offset;
10907 
10908       if (mp->refcount > 0)
10909 	offset += mp->fix_size;
10910     }
10911 }
10912 
10913 /* Output the literal table */
10914 static void
10915 dump_minipool (rtx scan)
10916 {
10917   Mnode * mp;
10918   Mnode * nmp;
10919   int align64 = 0;
10920 
10921   if (ARM_DOUBLEWORD_ALIGN)
10922     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10923       if (mp->refcount > 0 && mp->fix_size >= 8)
10924 	{
10925 	  align64 = 1;
10926 	  break;
10927 	}
10928 
10929   if (dump_file)
10930     fprintf (dump_file,
10931 	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
10932 	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
10933 
10934   scan = emit_label_after (gen_label_rtx (), scan);
10935   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
10936   scan = emit_label_after (minipool_vector_label, scan);
10937 
10938   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
10939     {
10940       if (mp->refcount > 0)
10941 	{
10942 	  if (dump_file)
10943 	    {
10944 	      fprintf (dump_file,
10945 		       ";;  Offset %u, min %ld, max %ld ",
10946 		       (unsigned) mp->offset, (unsigned long) mp->min_address,
10947 		       (unsigned long) mp->max_address);
10948 	      arm_print_value (dump_file, mp->value);
10949 	      fputc ('\n', dump_file);
10950 	    }
10951 
10952 	  switch (mp->fix_size)
10953 	    {
10954 #ifdef HAVE_consttable_1
10955 	    case 1:
10956 	      scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
10957 	      break;
10958 
10959 #endif
10960 #ifdef HAVE_consttable_2
10961 	    case 2:
10962 	      scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
10963 	      break;
10964 
10965 #endif
10966 #ifdef HAVE_consttable_4
10967 	    case 4:
10968 	      scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
10969 	      break;
10970 
10971 #endif
10972 #ifdef HAVE_consttable_8
10973 	    case 8:
10974 	      scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
10975 	      break;
10976 
10977 #endif
10978 #ifdef HAVE_consttable_16
10979 	    case 16:
10980               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
10981               break;
10982 
10983 #endif
10984 	    default:
10985 	      gcc_unreachable ();
10986 	    }
10987 	}
10988 
10989       nmp = mp->next;
10990       free (mp);
10991     }
10992 
10993   minipool_vector_head = minipool_vector_tail = NULL;
10994   scan = emit_insn_after (gen_consttable_end (), scan);
10995   scan = emit_barrier_after (scan);
10996 }
10997 
10998 /* Return the cost of forcibly inserting a barrier after INSN.  */
10999 static int
11000 arm_barrier_cost (rtx insn)
11001 {
11002   /* Basing the location of the pool on the loop depth is preferable,
11003      but at the moment, the basic block information seems to be
11004      corrupt by this stage of the compilation.  */
11005   int base_cost = 50;
11006   rtx next = next_nonnote_insn (insn);
11007 
11008   if (next != NULL && GET_CODE (next) == CODE_LABEL)
11009     base_cost -= 20;
11010 
11011   switch (GET_CODE (insn))
11012     {
11013     case CODE_LABEL:
11014       /* It will always be better to place the table before the label, rather
11015 	 than after it.  */
11016       return 50;
11017 
11018     case INSN:
11019     case CALL_INSN:
11020       return base_cost;
11021 
11022     case JUMP_INSN:
11023       return base_cost - 10;
11024 
11025     default:
11026       return base_cost + 10;
11027     }
11028 }
11029 
11030 /* Find the best place in the insn stream in the range
11031    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11032    Create the barrier by inserting a jump and add a new fix entry for
11033    it.  */
11034 static Mfix *
11035 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11036 {
11037   HOST_WIDE_INT count = 0;
11038   rtx barrier;
11039   rtx from = fix->insn;
11040   /* The instruction after which we will insert the jump.  */
11041   rtx selected = NULL;
11042   int selected_cost;
11043   /* The address at which the jump instruction will be placed.  */
11044   HOST_WIDE_INT selected_address;
11045   Mfix * new_fix;
11046   HOST_WIDE_INT max_count = max_address - fix->address;
11047   rtx label = gen_label_rtx ();
11048 
11049   selected_cost = arm_barrier_cost (from);
11050   selected_address = fix->address;
11051 
11052   while (from && count < max_count)
11053     {
11054       rtx tmp;
11055       int new_cost;
11056 
11057       /* This code shouldn't have been called if there was a natural barrier
11058 	 within range.  */
11059       gcc_assert (GET_CODE (from) != BARRIER);
11060 
11061       /* Count the length of this insn.  */
11062       count += get_attr_length (from);
11063 
11064       /* If there is a jump table, add its length.  */
11065       tmp = is_jump_table (from);
11066       if (tmp != NULL)
11067 	{
11068 	  count += get_jump_table_size (tmp);
11069 
11070 	  /* Jump tables aren't in a basic block, so base the cost on
11071 	     the dispatch insn.  If we select this location, we will
11072 	     still put the pool after the table.  */
11073 	  new_cost = arm_barrier_cost (from);
11074 
11075 	  if (count < max_count
11076 	      && (!selected || new_cost <= selected_cost))
11077 	    {
11078 	      selected = tmp;
11079 	      selected_cost = new_cost;
11080 	      selected_address = fix->address + count;
11081 	    }
11082 
11083 	  /* Continue after the dispatch table.  */
11084 	  from = NEXT_INSN (tmp);
11085 	  continue;
11086 	}
11087 
11088       new_cost = arm_barrier_cost (from);
11089 
11090       if (count < max_count
11091 	  && (!selected || new_cost <= selected_cost))
11092 	{
11093 	  selected = from;
11094 	  selected_cost = new_cost;
11095 	  selected_address = fix->address + count;
11096 	}
11097 
11098       from = NEXT_INSN (from);
11099     }
11100 
11101   /* Make sure that we found a place to insert the jump.  */
11102   gcc_assert (selected);
11103 
11104   /* Create a new JUMP_INSN that branches around a barrier.  */
11105   from = emit_jump_insn_after (gen_jump (label), selected);
11106   JUMP_LABEL (from) = label;
11107   barrier = emit_barrier_after (from);
11108   emit_label_after (label, barrier);
11109 
11110   /* Create a minipool barrier entry for the new barrier.  */
11111   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11112   new_fix->insn = barrier;
11113   new_fix->address = selected_address;
11114   new_fix->next = fix->next;
11115   fix->next = new_fix;
11116 
11117   return new_fix;
11118 }
11119 
11120 /* Record that there is a natural barrier in the insn stream at
11121    ADDRESS.  */
11122 static void
11123 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11124 {
11125   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11126 
11127   fix->insn = insn;
11128   fix->address = address;
11129 
11130   fix->next = NULL;
11131   if (minipool_fix_head != NULL)
11132     minipool_fix_tail->next = fix;
11133   else
11134     minipool_fix_head = fix;
11135 
11136   minipool_fix_tail = fix;
11137 }
11138 
11139 /* Record INSN, which will need fixing up to load a value from the
11140    minipool.  ADDRESS is the offset of the insn since the start of the
11141    function; LOC is a pointer to the part of the insn which requires
11142    fixing; VALUE is the constant that must be loaded, which is of type
11143    MODE.  */
11144 static void
11145 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11146 		   enum machine_mode mode, rtx value)
11147 {
11148   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11149 
11150   fix->insn = insn;
11151   fix->address = address;
11152   fix->loc = loc;
11153   fix->mode = mode;
11154   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11155   fix->value = value;
11156   fix->forwards = get_attr_pool_range (insn);
11157   fix->backwards = get_attr_neg_pool_range (insn);
11158   fix->minipool = NULL;
11159 
11160   /* If an insn doesn't have a range defined for it, then it isn't
11161      expecting to be reworked by this code.  Better to stop now than
11162      to generate duff assembly code.  */
11163   gcc_assert (fix->forwards || fix->backwards);
11164 
11165   /* If an entry requires 8-byte alignment then assume all constant pools
11166      require 4 bytes of padding.  Trying to do this later on a per-pool
11167      basis is awkward because existing pool entries have to be modified.  */
11168   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11169     minipool_pad = 4;
11170 
11171   if (dump_file)
11172     {
11173       fprintf (dump_file,
11174 	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11175 	       GET_MODE_NAME (mode),
11176 	       INSN_UID (insn), (unsigned long) address,
11177 	       -1 * (long)fix->backwards, (long)fix->forwards);
11178       arm_print_value (dump_file, fix->value);
11179       fprintf (dump_file, "\n");
11180     }
11181 
11182   /* Add it to the chain of fixes.  */
11183   fix->next = NULL;
11184 
11185   if (minipool_fix_head != NULL)
11186     minipool_fix_tail->next = fix;
11187   else
11188     minipool_fix_head = fix;
11189 
11190   minipool_fix_tail = fix;
11191 }
11192 
11193 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11194    Returns the number of insns needed, or 99 if we don't know how to
11195    do it.  */
11196 int
11197 arm_const_double_inline_cost (rtx val)
11198 {
11199   rtx lowpart, highpart;
11200   enum machine_mode mode;
11201 
11202   mode = GET_MODE (val);
11203 
11204   if (mode == VOIDmode)
11205     mode = DImode;
11206 
11207   gcc_assert (GET_MODE_SIZE (mode) == 8);
11208 
11209   lowpart = gen_lowpart (SImode, val);
11210   highpart = gen_highpart_mode (SImode, mode, val);
11211 
11212   gcc_assert (GET_CODE (lowpart) == CONST_INT);
11213   gcc_assert (GET_CODE (highpart) == CONST_INT);
11214 
11215   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11216 			    NULL_RTX, NULL_RTX, 0, 0)
11217 	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11218 			      NULL_RTX, NULL_RTX, 0, 0));
11219 }
11220 
11221 /* Return true if it is worthwhile to split a 64-bit constant into two
11222    32-bit operations.  This is the case if optimizing for size, or
11223    if we have load delay slots, or if one 32-bit part can be done with
11224    a single data operation.  */
11225 bool
11226 arm_const_double_by_parts (rtx val)
11227 {
11228   enum machine_mode mode = GET_MODE (val);
11229   rtx part;
11230 
11231   if (optimize_size || arm_ld_sched)
11232     return true;
11233 
11234   if (mode == VOIDmode)
11235     mode = DImode;
11236 
11237   part = gen_highpart_mode (SImode, mode, val);
11238 
11239   gcc_assert (GET_CODE (part) == CONST_INT);
11240 
11241   if (const_ok_for_arm (INTVAL (part))
11242       || const_ok_for_arm (~INTVAL (part)))
11243     return true;
11244 
11245   part = gen_lowpart (SImode, val);
11246 
11247   gcc_assert (GET_CODE (part) == CONST_INT);
11248 
11249   if (const_ok_for_arm (INTVAL (part))
11250       || const_ok_for_arm (~INTVAL (part)))
11251     return true;
11252 
11253   return false;
11254 }
11255 
11256 /* Scan INSN and note any of its operands that need fixing.
11257    If DO_PUSHES is false we do not actually push any of the fixups
11258    needed.  The function returns TRUE if any fixups were needed/pushed.
11259    This is used by arm_memory_load_p() which needs to know about loads
11260    of constants that will be converted into minipool loads.  */
11261 static bool
11262 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11263 {
11264   bool result = false;
11265   int opno;
11266 
11267   extract_insn (insn);
11268 
11269   if (!constrain_operands (1))
11270     fatal_insn_not_found (insn);
11271 
11272   if (recog_data.n_alternatives == 0)
11273     return false;
11274 
11275   /* Fill in recog_op_alt with information about the constraints of
11276      this insn.  */
11277   preprocess_constraints ();
11278 
11279   for (opno = 0; opno < recog_data.n_operands; opno++)
11280     {
11281       /* Things we need to fix can only occur in inputs.  */
11282       if (recog_data.operand_type[opno] != OP_IN)
11283 	continue;
11284 
11285       /* If this alternative is a memory reference, then any mention
11286 	 of constants in this alternative is really to fool reload
11287 	 into allowing us to accept one there.  We need to fix them up
11288 	 now so that we output the right code.  */
11289       if (recog_op_alt[opno][which_alternative].memory_ok)
11290 	{
11291 	  rtx op = recog_data.operand[opno];
11292 
11293 	  if (CONSTANT_P (op))
11294 	    {
11295 	      if (do_pushes)
11296 		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11297 				   recog_data.operand_mode[opno], op);
11298 	      result = true;
11299 	    }
11300 	  else if (GET_CODE (op) == MEM
11301 		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11302 		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11303 	    {
11304 	      if (do_pushes)
11305 		{
11306 		  rtx cop = avoid_constant_pool_reference (op);
11307 
11308 		  /* Casting the address of something to a mode narrower
11309 		     than a word can cause avoid_constant_pool_reference()
11310 		     to return the pool reference itself.  That's no good to
11311 		     us here.  Lets just hope that we can use the
11312 		     constant pool value directly.  */
11313 		  if (op == cop)
11314 		    cop = get_pool_constant (XEXP (op, 0));
11315 
11316 		  push_minipool_fix (insn, address,
11317 				     recog_data.operand_loc[opno],
11318 				     recog_data.operand_mode[opno], cop);
11319 		}
11320 
11321 	      result = true;
11322 	    }
11323 	}
11324     }
11325 
11326   return result;
11327 }
11328 
11329 /* Gcc puts the pool in the wrong place for ARM, since we can only
11330    load addresses a limited distance around the pc.  We do some
11331    special munging to move the constant pool values to the correct
11332    point in the code.  */
11333 static void
11334 arm_reorg (void)
11335 {
11336   rtx insn;
11337   HOST_WIDE_INT address = 0;
11338   Mfix * fix;
11339 
11340   minipool_fix_head = minipool_fix_tail = NULL;
11341 
11342   /* The first insn must always be a note, or the code below won't
11343      scan it properly.  */
11344   insn = get_insns ();
11345   gcc_assert (GET_CODE (insn) == NOTE);
11346   minipool_pad = 0;
11347 
11348   /* Scan all the insns and record the operands that will need fixing.  */
11349   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11350     {
11351       if (TARGET_CIRRUS_FIX_INVALID_INSNS
11352           && (arm_cirrus_insn_p (insn)
11353 	      || GET_CODE (insn) == JUMP_INSN
11354 	      || arm_memory_load_p (insn)))
11355 	cirrus_reorg (insn);
11356 
11357       if (GET_CODE (insn) == BARRIER)
11358 	push_minipool_barrier (insn, address);
11359       else if (INSN_P (insn))
11360 	{
11361 	  rtx table;
11362 
11363 	  note_invalid_constants (insn, address, true);
11364 	  address += get_attr_length (insn);
11365 
11366 	  /* If the insn is a vector jump, add the size of the table
11367 	     and skip the table.  */
11368 	  if ((table = is_jump_table (insn)) != NULL)
11369 	    {
11370 	      address += get_jump_table_size (table);
11371 	      insn = table;
11372 	    }
11373 	}
11374     }
11375 
11376   fix = minipool_fix_head;
11377 
11378   /* Now scan the fixups and perform the required changes.  */
11379   while (fix)
11380     {
11381       Mfix * ftmp;
11382       Mfix * fdel;
11383       Mfix *  last_added_fix;
11384       Mfix * last_barrier = NULL;
11385       Mfix * this_fix;
11386 
11387       /* Skip any further barriers before the next fix.  */
11388       while (fix && GET_CODE (fix->insn) == BARRIER)
11389 	fix = fix->next;
11390 
11391       /* No more fixes.  */
11392       if (fix == NULL)
11393 	break;
11394 
11395       last_added_fix = NULL;
11396 
11397       for (ftmp = fix; ftmp; ftmp = ftmp->next)
11398 	{
11399 	  if (GET_CODE (ftmp->insn) == BARRIER)
11400 	    {
11401 	      if (ftmp->address >= minipool_vector_head->max_address)
11402 		break;
11403 
11404 	      last_barrier = ftmp;
11405 	    }
11406 	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11407 	    break;
11408 
11409 	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
11410 	}
11411 
11412       /* If we found a barrier, drop back to that; any fixes that we
11413 	 could have reached but come after the barrier will now go in
11414 	 the next mini-pool.  */
11415       if (last_barrier != NULL)
11416 	{
11417 	  /* Reduce the refcount for those fixes that won't go into this
11418 	     pool after all.  */
11419 	  for (fdel = last_barrier->next;
11420 	       fdel && fdel != ftmp;
11421 	       fdel = fdel->next)
11422 	    {
11423 	      fdel->minipool->refcount--;
11424 	      fdel->minipool = NULL;
11425 	    }
11426 
11427 	  ftmp = last_barrier;
11428 	}
11429       else
11430         {
11431 	  /* ftmp is first fix that we can't fit into this pool and
11432 	     there no natural barriers that we could use.  Insert a
11433 	     new barrier in the code somewhere between the previous
11434 	     fix and this one, and arrange to jump around it.  */
11435 	  HOST_WIDE_INT max_address;
11436 
11437 	  /* The last item on the list of fixes must be a barrier, so
11438 	     we can never run off the end of the list of fixes without
11439 	     last_barrier being set.  */
11440 	  gcc_assert (ftmp);
11441 
11442 	  max_address = minipool_vector_head->max_address;
11443 	  /* Check that there isn't another fix that is in range that
11444 	     we couldn't fit into this pool because the pool was
11445 	     already too large: we need to put the pool before such an
11446 	     instruction.  The pool itself may come just after the
11447 	     fix because create_fix_barrier also allows space for a
11448 	     jump instruction.  */
11449 	  if (ftmp->address < max_address)
11450 	    max_address = ftmp->address + 1;
11451 
11452 	  last_barrier = create_fix_barrier (last_added_fix, max_address);
11453 	}
11454 
11455       assign_minipool_offsets (last_barrier);
11456 
11457       while (ftmp)
11458 	{
11459 	  if (GET_CODE (ftmp->insn) != BARRIER
11460 	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11461 		  == NULL))
11462 	    break;
11463 
11464 	  ftmp = ftmp->next;
11465 	}
11466 
11467       /* Scan over the fixes we have identified for this pool, fixing them
11468 	 up and adding the constants to the pool itself.  */
11469       for (this_fix = fix; this_fix && ftmp != this_fix;
11470 	   this_fix = this_fix->next)
11471 	if (GET_CODE (this_fix->insn) != BARRIER)
11472 	  {
11473 	    rtx addr
11474 	      = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11475 						  minipool_vector_label),
11476 			       this_fix->minipool->offset);
11477 	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11478 	  }
11479 
11480       dump_minipool (last_barrier->insn);
11481       fix = ftmp;
11482     }
11483 
11484   /* From now on we must synthesize any constants that we can't handle
11485      directly.  This can happen if the RTL gets split during final
11486      instruction generation.  */
11487   after_arm_reorg = 1;
11488 
11489   /* Free the minipool memory.  */
11490   obstack_free (&minipool_obstack, minipool_startobj);
11491 }
11492 
11493 /* Routines to output assembly language.  */
11494 
11495 /* If the rtx is the correct value then return the string of the number.
11496    In this way we can ensure that valid double constants are generated even
11497    when cross compiling.  */
11498 const char *
11499 fp_immediate_constant (rtx x)
11500 {
11501   REAL_VALUE_TYPE r;
11502   int i;
11503 
11504   if (!fp_consts_inited)
11505     init_fp_table ();
11506 
11507   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11508   for (i = 0; i < 8; i++)
11509     if (REAL_VALUES_EQUAL (r, values_fp[i]))
11510       return strings_fp[i];
11511 
11512   gcc_unreachable ();
11513 }
11514 
11515 /* As for fp_immediate_constant, but value is passed directly, not in rtx.  */
11516 static const char *
11517 fp_const_from_val (REAL_VALUE_TYPE *r)
11518 {
11519   int i;
11520 
11521   if (!fp_consts_inited)
11522     init_fp_table ();
11523 
11524   for (i = 0; i < 8; i++)
11525     if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11526       return strings_fp[i];
11527 
11528   gcc_unreachable ();
11529 }
11530 
11531 /* Output the operands of a LDM/STM instruction to STREAM.
11532    MASK is the ARM register set mask of which only bits 0-15 are important.
11533    REG is the base register, either the frame pointer or the stack pointer,
11534    INSTR is the possibly suffixed load or store instruction.
11535    RFE is nonzero if the instruction should also copy spsr to cpsr.  */
11536 
11537 static void
11538 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11539 		 unsigned long mask, int rfe)
11540 {
11541   unsigned i;
11542   bool not_first = FALSE;
11543 
11544   gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11545   fputc ('\t', stream);
11546   asm_fprintf (stream, instr, reg);
11547   fputc ('{', stream);
11548 
11549   for (i = 0; i <= LAST_ARM_REGNUM; i++)
11550     if (mask & (1 << i))
11551       {
11552 	if (not_first)
11553 	  fprintf (stream, ", ");
11554 
11555 	asm_fprintf (stream, "%r", i);
11556 	not_first = TRUE;
11557       }
11558 
11559   if (rfe)
11560     fprintf (stream, "}^\n");
11561   else
11562     fprintf (stream, "}\n");
11563 }
11564 
11565 
11566 /* Output a FLDMD instruction to STREAM.
11567    BASE if the register containing the address.
11568    REG and COUNT specify the register range.
11569    Extra registers may be added to avoid hardware bugs.
11570 
11571    We output FLDMD even for ARMv5 VFP implementations.  Although
11572    FLDMD is technically not supported until ARMv6, it is believed
11573    that all VFP implementations support its use in this context.  */
11574 
11575 static void
11576 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11577 {
11578   int i;
11579 
11580   /* Workaround ARM10 VFPr1 bug.  */
11581   if (count == 2 && !arm_arch6)
11582     {
11583       if (reg == 15)
11584 	reg--;
11585       count++;
11586     }
11587 
11588   /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11589      load into multiple parts if we have to handle more than 16 registers.  */
11590   if (count > 16)
11591     {
11592       vfp_output_fldmd (stream, base, reg, 16);
11593       vfp_output_fldmd (stream, base, reg + 16, count - 16);
11594       return;
11595     }
11596 
11597   fputc ('\t', stream);
11598   asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11599 
11600   for (i = reg; i < reg + count; i++)
11601     {
11602       if (i > reg)
11603 	fputs (", ", stream);
11604       asm_fprintf (stream, "d%d", i);
11605     }
11606   fputs ("}\n", stream);
11607 
11608 }
11609 
11610 
11611 /* Output the assembly for a store multiple.  */
11612 
11613 const char *
11614 vfp_output_fstmd (rtx * operands)
11615 {
11616   char pattern[100];
11617   int p;
11618   int base;
11619   int i;
11620 
11621   strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11622   p = strlen (pattern);
11623 
11624   gcc_assert (GET_CODE (operands[1]) == REG);
11625 
11626   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11627   for (i = 1; i < XVECLEN (operands[2], 0); i++)
11628     {
11629       p += sprintf (&pattern[p], ", d%d", base + i);
11630     }
11631   strcpy (&pattern[p], "}");
11632 
11633   output_asm_insn (pattern, operands);
11634   return "";
11635 }
11636 
11637 
11638 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
11639    number of bytes pushed.  */
11640 
11641 static int
11642 vfp_emit_fstmd (int base_reg, int count)
11643 {
11644   rtx par;
11645   rtx dwarf;
11646   rtx tmp, reg;
11647   int i;
11648 
11649   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
11650      register pairs are stored by a store multiple insn.  We avoid this
11651      by pushing an extra pair.  */
11652   if (count == 2 && !arm_arch6)
11653     {
11654       if (base_reg == LAST_VFP_REGNUM - 3)
11655 	base_reg -= 2;
11656       count++;
11657     }
11658 
11659   /* FSTMD may not store more than 16 doubleword registers at once.  Split
11660      larger stores into multiple parts (up to a maximum of two, in
11661      practice).  */
11662   if (count > 16)
11663     {
11664       int saved;
11665       /* NOTE: base_reg is an internal register number, so each D register
11666          counts as 2.  */
11667       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11668       saved += vfp_emit_fstmd (base_reg, 16);
11669       return saved;
11670     }
11671 
11672   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11673   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11674 
11675   reg = gen_rtx_REG (DFmode, base_reg);
11676   base_reg += 2;
11677 
11678   XVECEXP (par, 0, 0)
11679     = gen_rtx_SET (VOIDmode,
11680 		   gen_frame_mem
11681 		   (BLKmode,
11682 		    gen_rtx_PRE_MODIFY (Pmode,
11683 					stack_pointer_rtx,
11684 					plus_constant
11685 					(stack_pointer_rtx,
11686 					 - (count * 8)))
11687 		    ),
11688 		   gen_rtx_UNSPEC (BLKmode,
11689 				   gen_rtvec (1, reg),
11690 				   UNSPEC_PUSH_MULT));
11691 
11692   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11693 		     plus_constant (stack_pointer_rtx, -(count * 8)));
11694   RTX_FRAME_RELATED_P (tmp) = 1;
11695   XVECEXP (dwarf, 0, 0) = tmp;
11696 
11697   tmp = gen_rtx_SET (VOIDmode,
11698 		     gen_frame_mem (DFmode, stack_pointer_rtx),
11699 		     reg);
11700   RTX_FRAME_RELATED_P (tmp) = 1;
11701   XVECEXP (dwarf, 0, 1) = tmp;
11702 
11703   for (i = 1; i < count; i++)
11704     {
11705       reg = gen_rtx_REG (DFmode, base_reg);
11706       base_reg += 2;
11707       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11708 
11709       tmp = gen_rtx_SET (VOIDmode,
11710 			 gen_frame_mem (DFmode,
11711 					plus_constant (stack_pointer_rtx,
11712 						       i * 8)),
11713 			 reg);
11714       RTX_FRAME_RELATED_P (tmp) = 1;
11715       XVECEXP (dwarf, 0, i + 1) = tmp;
11716     }
11717 
11718   par = emit_insn (par);
11719   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11720   RTX_FRAME_RELATED_P (par) = 1;
11721 
11722   return count * 8;
11723 }
11724 
11725 /* Emit a call instruction with pattern PAT.  ADDR is the address of
11726    the call target.  */
11727 
11728 void
11729 arm_emit_call_insn (rtx pat, rtx addr)
11730 {
11731   rtx insn;
11732 
11733   insn = emit_call_insn (pat);
11734 
11735   /* The PIC register is live on entry to VxWorks PIC PLT entries.
11736      If the call might use such an entry, add a use of the PIC register
11737      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
11738   if (TARGET_VXWORKS_RTP
11739       && flag_pic
11740       && GET_CODE (addr) == SYMBOL_REF
11741       && (SYMBOL_REF_DECL (addr)
11742 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11743 	  : !SYMBOL_REF_LOCAL_P (addr)))
11744     {
11745       require_pic_register ();
11746       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11747     }
11748 }
11749 
11750 /* Output a 'call' insn.  */
11751 const char *
11752 output_call (rtx *operands)
11753 {
11754   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
11755 
11756   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
11757   if (REGNO (operands[0]) == LR_REGNUM)
11758     {
11759       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11760       output_asm_insn ("mov%?\t%0, %|lr", operands);
11761     }
11762 
11763   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11764 
11765   if (TARGET_INTERWORK || arm_arch4t)
11766     output_asm_insn ("bx%?\t%0", operands);
11767   else
11768     output_asm_insn ("mov%?\t%|pc, %0", operands);
11769 
11770   return "";
11771 }
11772 
11773 /* Output a 'call' insn that is a reference in memory. This is
11774    disabled for ARMv5 and we prefer a blx instead because otherwise
11775    there's a significant performance overhead.  */
11776 const char *
11777 output_call_mem (rtx *operands)
11778 {
11779   gcc_assert (!arm_arch5);
11780   if (TARGET_INTERWORK)
11781     {
11782       output_asm_insn ("ldr%?\t%|ip, %0", operands);
11783       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11784       output_asm_insn ("bx%?\t%|ip", operands);
11785     }
11786   else if (regno_use_in (LR_REGNUM, operands[0]))
11787     {
11788       /* LR is used in the memory address.  We load the address in the
11789 	 first instruction.  It's safe to use IP as the target of the
11790 	 load since the call will kill it anyway.  */
11791       output_asm_insn ("ldr%?\t%|ip, %0", operands);
11792       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11793       if (arm_arch4t)
11794 	output_asm_insn ("bx%?\t%|ip", operands);
11795       else
11796 	output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11797     }
11798   else
11799     {
11800       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11801       output_asm_insn ("ldr%?\t%|pc, %0", operands);
11802     }
11803 
11804   return "";
11805 }
11806 
11807 
11808 /* Output a move from arm registers to an fpa registers.
11809    OPERANDS[0] is an fpa register.
11810    OPERANDS[1] is the first registers of an arm register pair.  */
11811 const char *
11812 output_mov_long_double_fpa_from_arm (rtx *operands)
11813 {
11814   int arm_reg0 = REGNO (operands[1]);
11815   rtx ops[3];
11816 
11817   gcc_assert (arm_reg0 != IP_REGNUM);
11818 
11819   ops[0] = gen_rtx_REG (SImode, arm_reg0);
11820   ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11821   ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11822 
11823   output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11824   output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11825 
11826   return "";
11827 }
11828 
11829 /* Output a move from an fpa register to arm registers.
11830    OPERANDS[0] is the first registers of an arm register pair.
11831    OPERANDS[1] is an fpa register.  */
11832 const char *
11833 output_mov_long_double_arm_from_fpa (rtx *operands)
11834 {
11835   int arm_reg0 = REGNO (operands[0]);
11836   rtx ops[3];
11837 
11838   gcc_assert (arm_reg0 != IP_REGNUM);
11839 
11840   ops[0] = gen_rtx_REG (SImode, arm_reg0);
11841   ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11842   ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11843 
11844   output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11845   output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11846   return "";
11847 }
11848 
11849 /* Output a move from arm registers to arm registers of a long double
11850    OPERANDS[0] is the destination.
11851    OPERANDS[1] is the source.  */
11852 const char *
11853 output_mov_long_double_arm_from_arm (rtx *operands)
11854 {
11855   /* We have to be careful here because the two might overlap.  */
11856   int dest_start = REGNO (operands[0]);
11857   int src_start = REGNO (operands[1]);
11858   rtx ops[2];
11859   int i;
11860 
11861   if (dest_start < src_start)
11862     {
11863       for (i = 0; i < 3; i++)
11864 	{
11865 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
11866 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
11867 	  output_asm_insn ("mov%?\t%0, %1", ops);
11868 	}
11869     }
11870   else
11871     {
11872       for (i = 2; i >= 0; i--)
11873 	{
11874 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
11875 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
11876 	  output_asm_insn ("mov%?\t%0, %1", ops);
11877 	}
11878     }
11879 
11880   return "";
11881 }
11882 
11883 void
11884 arm_emit_movpair (rtx dest, rtx src)
11885  {
11886   /* If the src is an immediate, simplify it.  */
11887   if (CONST_INT_P (src))
11888     {
11889       HOST_WIDE_INT val = INTVAL (src);
11890       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
11891       if ((val >> 16) & 0x0000ffff)
11892         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
11893                                              GEN_INT (16)),
11894                        GEN_INT ((val >> 16) & 0x0000ffff));
11895       return;
11896     }
11897    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
11898    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
11899  }
11900 
11901 /* Output a move from arm registers to an fpa registers.
11902    OPERANDS[0] is an fpa register.
11903    OPERANDS[1] is the first registers of an arm register pair.  */
11904 const char *
11905 output_mov_double_fpa_from_arm (rtx *operands)
11906 {
11907   int arm_reg0 = REGNO (operands[1]);
11908   rtx ops[2];
11909 
11910   gcc_assert (arm_reg0 != IP_REGNUM);
11911 
11912   ops[0] = gen_rtx_REG (SImode, arm_reg0);
11913   ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11914   output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
11915   output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
11916   return "";
11917 }
11918 
11919 /* Output a move from an fpa register to arm registers.
11920    OPERANDS[0] is the first registers of an arm register pair.
11921    OPERANDS[1] is an fpa register.  */
11922 const char *
11923 output_mov_double_arm_from_fpa (rtx *operands)
11924 {
11925   int arm_reg0 = REGNO (operands[0]);
11926   rtx ops[2];
11927 
11928   gcc_assert (arm_reg0 != IP_REGNUM);
11929 
11930   ops[0] = gen_rtx_REG (SImode, arm_reg0);
11931   ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11932   output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
11933   output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
11934   return "";
11935 }
11936 
11937 /* Output a move between double words.
11938    It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
11939    or MEM<-REG and all MEMs must be offsettable addresses.  */
11940 const char *
11941 output_move_double (rtx *operands)
11942 {
11943   enum rtx_code code0 = GET_CODE (operands[0]);
11944   enum rtx_code code1 = GET_CODE (operands[1]);
11945   rtx otherops[3];
11946 
11947   if (code0 == REG)
11948     {
11949       unsigned int reg0 = REGNO (operands[0]);
11950 
11951       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
11952 
11953       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
11954 
11955       switch (GET_CODE (XEXP (operands[1], 0)))
11956 	{
11957 	case REG:
11958 	  if (TARGET_LDRD
11959 	      && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
11960 	    output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
11961 	  else
11962 	    output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
11963 	  break;
11964 
11965 	case PRE_INC:
11966 	  gcc_assert (TARGET_LDRD);
11967 	  output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
11968 	  break;
11969 
11970 	case PRE_DEC:
11971 	  if (TARGET_LDRD)
11972 	    output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
11973 	  else
11974 	    output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
11975 	  break;
11976 
11977 	case POST_INC:
11978 	  if (TARGET_LDRD)
11979 	    output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
11980 	  else
11981 	    output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
11982 	  break;
11983 
11984 	case POST_DEC:
11985 	  gcc_assert (TARGET_LDRD);
11986 	  output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
11987 	  break;
11988 
11989 	case PRE_MODIFY:
11990 	case POST_MODIFY:
11991 	  /* Autoicrement addressing modes should never have overlapping
11992 	     base and destination registers, and overlapping index registers
11993 	     are already prohibited, so this doesn't need to worry about
11994 	     fix_cm3_ldrd.  */
11995 	  otherops[0] = operands[0];
11996 	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
11997 	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
11998 
11999 	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12000 	    {
12001 	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12002 		{
12003 		  /* Registers overlap so split out the increment.  */
12004 		  output_asm_insn ("add%?\t%1, %1, %2", otherops);
12005 		  output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12006 		}
12007 	      else
12008 		{
12009 		  /* Use a single insn if we can.
12010 		     FIXME: IWMMXT allows offsets larger than ldrd can
12011 		     handle, fix these up with a pair of ldr.  */
12012 		  if (TARGET_THUMB2
12013 		      || GET_CODE (otherops[2]) != CONST_INT
12014 		      || (INTVAL (otherops[2]) > -256
12015 			  && INTVAL (otherops[2]) < 256))
12016 		    output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12017 		  else
12018 		    {
12019 		      output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12020 		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12021 		    }
12022 		}
12023 	    }
12024 	  else
12025 	    {
12026 	      /* Use a single insn if we can.
12027 		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12028 		 fix these up with a pair of ldr.  */
12029 	      if (TARGET_THUMB2
12030 		  || GET_CODE (otherops[2]) != CONST_INT
12031 		  || (INTVAL (otherops[2]) > -256
12032 		      && INTVAL (otherops[2]) < 256))
12033 		output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12034 	      else
12035 		{
12036 		  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12037 		  output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12038 		}
12039 	    }
12040 	  break;
12041 
12042 	case LABEL_REF:
12043 	case CONST:
12044 	  /* We might be able to use ldrd %0, %1 here.  However the range is
12045 	     different to ldr/adr, and it is broken on some ARMv7-M
12046 	     implementations.  */
12047 	  /* Use the second register of the pair to avoid problematic
12048 	     overlap.  */
12049 	  otherops[1] = operands[1];
12050 	  output_asm_insn ("adr%?\t%0, %1", otherops);
12051 	  operands[1] = otherops[0];
12052 	  if (TARGET_LDRD)
12053 	    output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12054 	  else
12055 	    output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12056 	  break;
12057 
12058 	  /* ??? This needs checking for thumb2.  */
12059 	default:
12060 	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12061 			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12062 	    {
12063 	      otherops[0] = operands[0];
12064 	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12065 	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12066 
12067 	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12068 		{
12069 		  if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12070 		    {
12071 		      switch ((int) INTVAL (otherops[2]))
12072 			{
12073 			case -8:
12074 			  output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12075 			  return "";
12076 			case -4:
12077 			  if (TARGET_THUMB2)
12078 			    break;
12079 			  output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12080 			  return "";
12081 			case 4:
12082 			  if (TARGET_THUMB2)
12083 			    break;
12084 			  output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12085 			  return "";
12086 			}
12087 		    }
12088 		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12089 		  operands[1] = otherops[0];
12090 		  if (TARGET_LDRD
12091 		      && (GET_CODE (otherops[2]) == REG
12092 			  || TARGET_THUMB2
12093 			  || (GET_CODE (otherops[2]) == CONST_INT
12094 			      && INTVAL (otherops[2]) > -256
12095 			      && INTVAL (otherops[2]) < 256)))
12096 		    {
12097 		      if (reg_overlap_mentioned_p (operands[0],
12098 						   otherops[2]))
12099 			{
12100 			  rtx tmp;
12101 			  /* Swap base and index registers over to
12102 			     avoid a conflict.  */
12103 			  tmp = otherops[1];
12104 			  otherops[1] = otherops[2];
12105 			  otherops[2] = tmp;
12106 			}
12107 		      /* If both registers conflict, it will usually
12108 			 have been fixed by a splitter.  */
12109 		      if (reg_overlap_mentioned_p (operands[0], otherops[2])
12110 			  || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12111 			{
12112 			  output_asm_insn ("add%?\t%0, %1, %2", otherops);
12113 			  output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12114 			}
12115 		      else
12116 			{
12117 			  otherops[0] = operands[0];
12118 			  output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12119 			}
12120 		      return "";
12121 		    }
12122 
12123 		  if (GET_CODE (otherops[2]) == CONST_INT)
12124 		    {
12125 		      if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12126 			output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12127 		      else
12128 			output_asm_insn ("add%?\t%0, %1, %2", otherops);
12129 		    }
12130 		  else
12131 		    output_asm_insn ("add%?\t%0, %1, %2", otherops);
12132 		}
12133 	      else
12134 		output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12135 
12136 	      if (TARGET_LDRD)
12137 		return "ldr%(d%)\t%0, [%1]";
12138 
12139 	      return "ldm%(ia%)\t%1, %M0";
12140 	    }
12141 	  else
12142 	    {
12143 	      otherops[1] = adjust_address (operands[1], SImode, 4);
12144 	      /* Take care of overlapping base/data reg.  */
12145 	      if (reg_mentioned_p (operands[0], operands[1]))
12146 		{
12147 		  output_asm_insn ("ldr%?\t%0, %1", otherops);
12148 		  output_asm_insn ("ldr%?\t%0, %1", operands);
12149 		}
12150 	      else
12151 		{
12152 		  output_asm_insn ("ldr%?\t%0, %1", operands);
12153 		  output_asm_insn ("ldr%?\t%0, %1", otherops);
12154 		}
12155 	    }
12156 	}
12157     }
12158   else
12159     {
12160       /* Constraints should ensure this.  */
12161       gcc_assert (code0 == MEM && code1 == REG);
12162       gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12163 
12164       switch (GET_CODE (XEXP (operands[0], 0)))
12165         {
12166 	case REG:
12167 	  if (TARGET_LDRD)
12168 	    output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12169 	  else
12170 	    output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12171 	  break;
12172 
12173         case PRE_INC:
12174 	  gcc_assert (TARGET_LDRD);
12175 	  output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12176 	  break;
12177 
12178         case PRE_DEC:
12179 	  if (TARGET_LDRD)
12180 	    output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12181 	  else
12182 	    output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12183 	  break;
12184 
12185         case POST_INC:
12186 	  if (TARGET_LDRD)
12187 	    output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12188 	  else
12189 	    output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12190 	  break;
12191 
12192         case POST_DEC:
12193 	  gcc_assert (TARGET_LDRD);
12194 	  output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12195 	  break;
12196 
12197 	case PRE_MODIFY:
12198 	case POST_MODIFY:
12199 	  otherops[0] = operands[1];
12200 	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12201 	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12202 
12203 	  /* IWMMXT allows offsets larger than ldrd can handle,
12204 	     fix these up with a pair of ldr.  */
12205 	  if (!TARGET_THUMB2
12206 	      && GET_CODE (otherops[2]) == CONST_INT
12207 	      && (INTVAL(otherops[2]) <= -256
12208 		  || INTVAL(otherops[2]) >= 256))
12209 	    {
12210 	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12211 		{
12212 		  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12213 		  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12214 		}
12215 	      else
12216 		{
12217 		  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12218 		  output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12219 		}
12220 	    }
12221 	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12222 	    output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12223 	  else
12224 	    output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12225 	  break;
12226 
12227 	case PLUS:
12228 	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12229 	  if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12230 	    {
12231 	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12232 		{
12233 		case -8:
12234 		  output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12235 		  return "";
12236 
12237 		case -4:
12238 		  if (TARGET_THUMB2)
12239 		    break;
12240 		  output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12241 		  return "";
12242 
12243 		case 4:
12244 		  if (TARGET_THUMB2)
12245 		    break;
12246 		  output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12247 		  return "";
12248 		}
12249 	    }
12250 	  if (TARGET_LDRD
12251 	      && (GET_CODE (otherops[2]) == REG
12252 		  || TARGET_THUMB2
12253 		  || (GET_CODE (otherops[2]) == CONST_INT
12254 		      && INTVAL (otherops[2]) > -256
12255 		      && INTVAL (otherops[2]) < 256)))
12256 	    {
12257 	      otherops[0] = operands[1];
12258 	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12259 	      output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12260 	      return "";
12261 	    }
12262 	  /* Fall through */
12263 
12264         default:
12265 	  otherops[0] = adjust_address (operands[0], SImode, 4);
12266 	  otherops[1] = operands[1];
12267 	  output_asm_insn ("str%?\t%1, %0", operands);
12268 	  output_asm_insn ("str%?\t%H1, %0", otherops);
12269 	}
12270     }
12271 
12272   return "";
12273 }
12274 
12275 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
12276    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
12277 
12278 const char *
12279 output_move_quad (rtx *operands)
12280 {
12281   if (REG_P (operands[0]))
12282     {
12283       /* Load, or reg->reg move.  */
12284 
12285       if (MEM_P (operands[1]))
12286         {
12287           switch (GET_CODE (XEXP (operands[1], 0)))
12288             {
12289             case REG:
12290               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12291               break;
12292 
12293             case LABEL_REF:
12294             case CONST:
12295               output_asm_insn ("adr%?\t%0, %1", operands);
12296               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12297               break;
12298 
12299             default:
12300               gcc_unreachable ();
12301             }
12302         }
12303       else
12304         {
12305           rtx ops[2];
12306           int dest, src, i;
12307 
12308           gcc_assert (REG_P (operands[1]));
12309 
12310           dest = REGNO (operands[0]);
12311           src = REGNO (operands[1]);
12312 
12313           /* This seems pretty dumb, but hopefully GCC won't try to do it
12314              very often.  */
12315           if (dest < src)
12316             for (i = 0; i < 4; i++)
12317               {
12318                 ops[0] = gen_rtx_REG (SImode, dest + i);
12319                 ops[1] = gen_rtx_REG (SImode, src + i);
12320                 output_asm_insn ("mov%?\t%0, %1", ops);
12321               }
12322           else
12323             for (i = 3; i >= 0; i--)
12324               {
12325                 ops[0] = gen_rtx_REG (SImode, dest + i);
12326                 ops[1] = gen_rtx_REG (SImode, src + i);
12327                 output_asm_insn ("mov%?\t%0, %1", ops);
12328               }
12329         }
12330     }
12331   else
12332     {
12333       gcc_assert (MEM_P (operands[0]));
12334       gcc_assert (REG_P (operands[1]));
12335       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12336 
12337       switch (GET_CODE (XEXP (operands[0], 0)))
12338         {
12339         case REG:
12340           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12341           break;
12342 
12343         default:
12344           gcc_unreachable ();
12345         }
12346     }
12347 
12348   return "";
12349 }
12350 
12351 /* Output a VFP load or store instruction.  */
12352 
12353 const char *
12354 output_move_vfp (rtx *operands)
12355 {
12356   rtx reg, mem, addr, ops[2];
12357   int load = REG_P (operands[0]);
12358   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12359   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12360   const char *templ;
12361   char buff[50];
12362   enum machine_mode mode;
12363 
12364   reg = operands[!load];
12365   mem = operands[load];
12366 
12367   mode = GET_MODE (reg);
12368 
12369   gcc_assert (REG_P (reg));
12370   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12371   gcc_assert (mode == SFmode
12372 	      || mode == DFmode
12373 	      || mode == SImode
12374 	      || mode == DImode
12375               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12376   gcc_assert (MEM_P (mem));
12377 
12378   addr = XEXP (mem, 0);
12379 
12380   switch (GET_CODE (addr))
12381     {
12382     case PRE_DEC:
12383       templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12384       ops[0] = XEXP (addr, 0);
12385       ops[1] = reg;
12386       break;
12387 
12388     case POST_INC:
12389       templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12390       ops[0] = XEXP (addr, 0);
12391       ops[1] = reg;
12392       break;
12393 
12394     default:
12395       templ = "f%s%c%%?\t%%%s0, %%1%s";
12396       ops[0] = reg;
12397       ops[1] = mem;
12398       break;
12399     }
12400 
12401   sprintf (buff, templ,
12402 	   load ? "ld" : "st",
12403 	   dp ? 'd' : 's',
12404 	   dp ? "P" : "",
12405 	   integer_p ? "\t%@ int" : "");
12406   output_asm_insn (buff, ops);
12407 
12408   return "";
12409 }
12410 
12411 /* Output a Neon quad-word load or store, or a load or store for
12412    larger structure modes.
12413 
12414    WARNING: The ordering of elements is weird in big-endian mode,
12415    because we use VSTM, as required by the EABI.  GCC RTL defines
12416    element ordering based on in-memory order.  This can be differ
12417    from the architectural ordering of elements within a NEON register.
12418    The intrinsics defined in arm_neon.h use the NEON register element
12419    ordering, not the GCC RTL element ordering.
12420 
12421    For example, the in-memory ordering of a big-endian a quadword
12422    vector with 16-bit elements when stored from register pair {d0,d1}
12423    will be (lowest address first, d0[N] is NEON register element N):
12424 
12425      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12426 
12427    When necessary, quadword registers (dN, dN+1) are moved to ARM
12428    registers from rN in the order:
12429 
12430      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12431 
12432    So that STM/LDM can be used on vectors in ARM registers, and the
12433    same memory layout will result as if VSTM/VLDM were used.  */
12434 
12435 const char *
12436 output_move_neon (rtx *operands)
12437 {
12438   rtx reg, mem, addr, ops[2];
12439   int regno, load = REG_P (operands[0]);
12440   const char *templ;
12441   char buff[50];
12442   enum machine_mode mode;
12443 
12444   reg = operands[!load];
12445   mem = operands[load];
12446 
12447   mode = GET_MODE (reg);
12448 
12449   gcc_assert (REG_P (reg));
12450   regno = REGNO (reg);
12451   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12452 	      || NEON_REGNO_OK_FOR_QUAD (regno));
12453   gcc_assert (VALID_NEON_DREG_MODE (mode)
12454 	      || VALID_NEON_QREG_MODE (mode)
12455 	      || VALID_NEON_STRUCT_MODE (mode));
12456   gcc_assert (MEM_P (mem));
12457 
12458   addr = XEXP (mem, 0);
12459 
12460   /* Strip off const from addresses like (const (plus (...))).  */
12461   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12462     addr = XEXP (addr, 0);
12463 
12464   switch (GET_CODE (addr))
12465     {
12466     case POST_INC:
12467       templ = "v%smia%%?\t%%0!, %%h1";
12468       ops[0] = XEXP (addr, 0);
12469       ops[1] = reg;
12470       break;
12471 
12472     case PRE_DEC:
12473       /* FIXME: We should be using vld1/vst1 here in BE mode?  */
12474       templ = "v%smdb%%?\t%%0!, %%h1";
12475       ops[0] = XEXP (addr, 0);
12476       ops[1] = reg;
12477       break;
12478 
12479     case POST_MODIFY:
12480       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
12481       gcc_unreachable ();
12482 
12483     case LABEL_REF:
12484     case PLUS:
12485       {
12486 	int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12487 	int i;
12488 	int overlap = -1;
12489 	for (i = 0; i < nregs; i++)
12490 	  {
12491 	    /* We're only using DImode here because it's a convenient size.  */
12492 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12493 	    ops[1] = adjust_address (mem, DImode, 8 * i);
12494 	    if (reg_overlap_mentioned_p (ops[0], mem))
12495 	      {
12496 		gcc_assert (overlap == -1);
12497 		overlap = i;
12498 	      }
12499 	    else
12500 	      {
12501 		sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12502 		output_asm_insn (buff, ops);
12503 	      }
12504 	  }
12505 	if (overlap != -1)
12506 	  {
12507 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12508 	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
12509 	    sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12510 	    output_asm_insn (buff, ops);
12511 	  }
12512 
12513         return "";
12514       }
12515 
12516     default:
12517       templ = "v%smia%%?\t%%m0, %%h1";
12518       ops[0] = mem;
12519       ops[1] = reg;
12520     }
12521 
12522   sprintf (buff, templ, load ? "ld" : "st");
12523   output_asm_insn (buff, ops);
12524 
12525   return "";
12526 }
12527 
12528 /* Compute and return the length of neon_mov<mode>, where <mode> is
12529    one of VSTRUCT modes: EI, OI, CI or XI.  */
12530 int
12531 arm_attr_length_move_neon (rtx insn)
12532 {
12533   rtx reg, mem, addr;
12534   int load;
12535   enum machine_mode mode;
12536 
12537   extract_insn_cached (insn);
12538 
12539   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
12540     {
12541       mode = GET_MODE (recog_data.operand[0]);
12542       switch (mode)
12543 	{
12544 	case EImode:
12545 	case OImode:
12546 	  return 8;
12547 	case CImode:
12548 	  return 12;
12549 	case XImode:
12550 	  return 16;
12551 	default:
12552 	  gcc_unreachable ();
12553 	}
12554     }
12555 
12556   load = REG_P (recog_data.operand[0]);
12557   reg = recog_data.operand[!load];
12558   mem = recog_data.operand[load];
12559 
12560   gcc_assert (MEM_P (mem));
12561 
12562   mode = GET_MODE (reg);
12563   addr = XEXP (mem, 0);
12564 
12565   /* Strip off const from addresses like (const (plus (...))).  */
12566   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12567     addr = XEXP (addr, 0);
12568 
12569   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
12570     {
12571       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12572       return insns * 4;
12573     }
12574   else
12575     return 4;
12576 }
12577 
12578 /* Output an ADD r, s, #n where n may be too big for one instruction.
12579    If adding zero to one register, output nothing.  */
12580 const char *
12581 output_add_immediate (rtx *operands)
12582 {
12583   HOST_WIDE_INT n = INTVAL (operands[2]);
12584 
12585   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12586     {
12587       if (n < 0)
12588 	output_multi_immediate (operands,
12589 				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12590 				-n);
12591       else
12592 	output_multi_immediate (operands,
12593 				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12594 				n);
12595     }
12596 
12597   return "";
12598 }
12599 
12600 /* Output a multiple immediate operation.
12601    OPERANDS is the vector of operands referred to in the output patterns.
12602    INSTR1 is the output pattern to use for the first constant.
12603    INSTR2 is the output pattern to use for subsequent constants.
12604    IMMED_OP is the index of the constant slot in OPERANDS.
12605    N is the constant value.  */
12606 static const char *
12607 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12608 			int immed_op, HOST_WIDE_INT n)
12609 {
12610 #if HOST_BITS_PER_WIDE_INT > 32
12611   n &= 0xffffffff;
12612 #endif
12613 
12614   if (n == 0)
12615     {
12616       /* Quick and easy output.  */
12617       operands[immed_op] = const0_rtx;
12618       output_asm_insn (instr1, operands);
12619     }
12620   else
12621     {
12622       int i;
12623       const char * instr = instr1;
12624 
12625       /* Note that n is never zero here (which would give no output).  */
12626       for (i = 0; i < 32; i += 2)
12627 	{
12628 	  if (n & (3 << i))
12629 	    {
12630 	      operands[immed_op] = GEN_INT (n & (255 << i));
12631 	      output_asm_insn (instr, operands);
12632 	      instr = instr2;
12633 	      i += 6;
12634 	    }
12635 	}
12636     }
12637 
12638   return "";
12639 }
12640 
12641 /* Return the name of a shifter operation.  */
12642 static const char *
12643 arm_shift_nmem(enum rtx_code code)
12644 {
12645   switch (code)
12646     {
12647     case ASHIFT:
12648       return ARM_LSL_NAME;
12649 
12650     case ASHIFTRT:
12651       return "asr";
12652 
12653     case LSHIFTRT:
12654       return "lsr";
12655 
12656     case ROTATERT:
12657       return "ror";
12658 
12659     default:
12660       abort();
12661     }
12662 }
12663 
12664 /* Return the appropriate ARM instruction for the operation code.
12665    The returned result should not be overwritten.  OP is the rtx of the
12666    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12667    was shifted.  */
12668 const char *
12669 arithmetic_instr (rtx op, int shift_first_arg)
12670 {
12671   switch (GET_CODE (op))
12672     {
12673     case PLUS:
12674       return "add";
12675 
12676     case MINUS:
12677       return shift_first_arg ? "rsb" : "sub";
12678 
12679     case IOR:
12680       return "orr";
12681 
12682     case XOR:
12683       return "eor";
12684 
12685     case AND:
12686       return "and";
12687 
12688     case ASHIFT:
12689     case ASHIFTRT:
12690     case LSHIFTRT:
12691     case ROTATERT:
12692       return arm_shift_nmem(GET_CODE(op));
12693 
12694     default:
12695       gcc_unreachable ();
12696     }
12697 }
12698 
12699 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12700    for the operation code.  The returned result should not be overwritten.
12701    OP is the rtx code of the shift.
12702    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12703    shift.  */
12704 static const char *
12705 shift_op (rtx op, HOST_WIDE_INT *amountp)
12706 {
12707   const char * mnem;
12708   enum rtx_code code = GET_CODE (op);
12709 
12710   switch (GET_CODE (XEXP (op, 1)))
12711     {
12712     case REG:
12713     case SUBREG:
12714       *amountp = -1;
12715       break;
12716 
12717     case CONST_INT:
12718       *amountp = INTVAL (XEXP (op, 1));
12719       break;
12720 
12721     default:
12722       gcc_unreachable ();
12723     }
12724 
12725   switch (code)
12726     {
12727     case ROTATE:
12728       gcc_assert (*amountp != -1);
12729       *amountp = 32 - *amountp;
12730       code = ROTATERT;
12731 
12732       /* Fall through.  */
12733 
12734     case ASHIFT:
12735     case ASHIFTRT:
12736     case LSHIFTRT:
12737     case ROTATERT:
12738       mnem = arm_shift_nmem(code);
12739       break;
12740 
12741     case MULT:
12742       /* We never have to worry about the amount being other than a
12743 	 power of 2, since this case can never be reloaded from a reg.  */
12744       gcc_assert (*amountp != -1);
12745       *amountp = int_log2 (*amountp);
12746       return ARM_LSL_NAME;
12747 
12748     default:
12749       gcc_unreachable ();
12750     }
12751 
12752   if (*amountp != -1)
12753     {
12754       /* This is not 100% correct, but follows from the desire to merge
12755 	 multiplication by a power of 2 with the recognizer for a
12756 	 shift.  >=32 is not a valid shift for "lsl", so we must try and
12757 	 output a shift that produces the correct arithmetical result.
12758 	 Using lsr #32 is identical except for the fact that the carry bit
12759 	 is not set correctly if we set the flags; but we never use the
12760 	 carry bit from such an operation, so we can ignore that.  */
12761       if (code == ROTATERT)
12762 	/* Rotate is just modulo 32.  */
12763 	*amountp &= 31;
12764       else if (*amountp != (*amountp & 31))
12765 	{
12766 	  if (code == ASHIFT)
12767 	    mnem = "lsr";
12768 	  *amountp = 32;
12769 	}
12770 
12771       /* Shifts of 0 are no-ops.  */
12772       if (*amountp == 0)
12773 	return NULL;
12774     }
12775 
12776   return mnem;
12777 }
12778 
12779 /* Obtain the shift from the POWER of two.  */
12780 
12781 static HOST_WIDE_INT
12782 int_log2 (HOST_WIDE_INT power)
12783 {
12784   HOST_WIDE_INT shift = 0;
12785 
12786   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12787     {
12788       gcc_assert (shift <= 31);
12789       shift++;
12790     }
12791 
12792   return shift;
12793 }
12794 
12795 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
12796    because /bin/as is horribly restrictive.  The judgement about
12797    whether or not each character is 'printable' (and can be output as
12798    is) or not (and must be printed with an octal escape) must be made
12799    with reference to the *host* character set -- the situation is
12800    similar to that discussed in the comments above pp_c_char in
12801    c-pretty-print.c.  */
12802 
12803 #define MAX_ASCII_LEN 51
12804 
12805 void
12806 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12807 {
12808   int i;
12809   int len_so_far = 0;
12810 
12811   fputs ("\t.ascii\t\"", stream);
12812 
12813   for (i = 0; i < len; i++)
12814     {
12815       int c = p[i];
12816 
12817       if (len_so_far >= MAX_ASCII_LEN)
12818 	{
12819 	  fputs ("\"\n\t.ascii\t\"", stream);
12820 	  len_so_far = 0;
12821 	}
12822 
12823       if (ISPRINT (c))
12824 	{
12825 	  if (c == '\\' || c == '\"')
12826 	    {
12827 	      putc ('\\', stream);
12828 	      len_so_far++;
12829 	    }
12830 	  putc (c, stream);
12831 	  len_so_far++;
12832 	}
12833       else
12834 	{
12835 	  fprintf (stream, "\\%03o", c);
12836 	  len_so_far += 4;
12837 	}
12838     }
12839 
12840   fputs ("\"\n", stream);
12841 }
12842 
12843 /* Compute the register save mask for registers 0 through 12
12844    inclusive.  This code is used by arm_compute_save_reg_mask.  */
12845 
12846 static unsigned long
12847 arm_compute_save_reg0_reg12_mask (void)
12848 {
12849   unsigned long func_type = arm_current_func_type ();
12850   unsigned long save_reg_mask = 0;
12851   unsigned int reg;
12852 
12853   if (IS_INTERRUPT (func_type))
12854     {
12855       unsigned int max_reg;
12856       /* Interrupt functions must not corrupt any registers,
12857 	 even call clobbered ones.  If this is a leaf function
12858 	 we can just examine the registers used by the RTL, but
12859 	 otherwise we have to assume that whatever function is
12860 	 called might clobber anything, and so we have to save
12861 	 all the call-clobbered registers as well.  */
12862       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12863 	/* FIQ handlers have registers r8 - r12 banked, so
12864 	   we only need to check r0 - r7, Normal ISRs only
12865 	   bank r14 and r15, so we must check up to r12.
12866 	   r13 is the stack pointer which is always preserved,
12867 	   so we do not need to consider it here.  */
12868 	max_reg = 7;
12869       else
12870 	max_reg = 12;
12871 
12872       for (reg = 0; reg <= max_reg; reg++)
12873 	if (df_regs_ever_live_p (reg)
12874 	    || (! current_function_is_leaf && call_used_regs[reg]))
12875 	  save_reg_mask |= (1 << reg);
12876 
12877       /* Also save the pic base register if necessary.  */
12878       if (flag_pic
12879 	  && !TARGET_SINGLE_PIC_BASE
12880 	  && arm_pic_register != INVALID_REGNUM
12881 	  && crtl->uses_pic_offset_table)
12882 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12883     }
12884   else if (IS_VOLATILE(func_type))
12885     {
12886       /* For noreturn functions we historically omitted register saves
12887 	 altogether.  However this really messes up debugging.  As a
12888 	 compromise save just the frame pointers.  Combined with the link
12889 	 register saved elsewhere this should be sufficient to get
12890 	 a backtrace.  */
12891       if (frame_pointer_needed)
12892 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12893       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
12894 	save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12895       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
12896 	save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
12897     }
12898   else
12899     {
12900       /* In the normal case we only need to save those registers
12901 	 which are call saved and which are used by this function.  */
12902       for (reg = 0; reg <= 11; reg++)
12903 	if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12904 	  save_reg_mask |= (1 << reg);
12905 
12906       /* Handle the frame pointer as a special case.  */
12907       if (frame_pointer_needed)
12908 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12909 
12910       /* If we aren't loading the PIC register,
12911 	 don't stack it even though it may be live.  */
12912       if (flag_pic
12913 	  && !TARGET_SINGLE_PIC_BASE
12914 	  && arm_pic_register != INVALID_REGNUM
12915 	  && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
12916 	      || crtl->uses_pic_offset_table))
12917 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12918 
12919       /* The prologue will copy SP into R0, so save it.  */
12920       if (IS_STACKALIGN (func_type))
12921 	save_reg_mask |= 1;
12922     }
12923 
12924   /* Save registers so the exception handler can modify them.  */
12925   if (crtl->calls_eh_return)
12926     {
12927       unsigned int i;
12928 
12929       for (i = 0; ; i++)
12930 	{
12931 	  reg = EH_RETURN_DATA_REGNO (i);
12932 	  if (reg == INVALID_REGNUM)
12933 	    break;
12934 	  save_reg_mask |= 1 << reg;
12935 	}
12936     }
12937 
12938   return save_reg_mask;
12939 }
12940 
12941 
12942 /* Compute the number of bytes used to store the static chain register on the
12943    stack, above the stack frame. We need to know this accurately to get the
12944    alignment of the rest of the stack frame correct. */
12945 
12946 static int arm_compute_static_chain_stack_bytes (void)
12947 {
12948   unsigned long func_type = arm_current_func_type ();
12949   int static_chain_stack_bytes = 0;
12950 
12951   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
12952       IS_NESTED (func_type) &&
12953       df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
12954     static_chain_stack_bytes = 4;
12955 
12956   return static_chain_stack_bytes;
12957 }
12958 
12959 
12960 /* Compute a bit mask of which registers need to be
12961    saved on the stack for the current function.
12962    This is used by arm_get_frame_offsets, which may add extra registers.  */
12963 
12964 static unsigned long
12965 arm_compute_save_reg_mask (void)
12966 {
12967   unsigned int save_reg_mask = 0;
12968   unsigned long func_type = arm_current_func_type ();
12969   unsigned int reg;
12970 
12971   if (IS_NAKED (func_type))
12972     /* This should never really happen.  */
12973     return 0;
12974 
12975   /* If we are creating a stack frame, then we must save the frame pointer,
12976      IP (which will hold the old stack pointer), LR and the PC.  */
12977   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12978     save_reg_mask |=
12979       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
12980       | (1 << IP_REGNUM)
12981       | (1 << LR_REGNUM)
12982       | (1 << PC_REGNUM);
12983 
12984   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
12985 
12986   /* Decide if we need to save the link register.
12987      Interrupt routines have their own banked link register,
12988      so they never need to save it.
12989      Otherwise if we do not use the link register we do not need to save
12990      it.  If we are pushing other registers onto the stack however, we
12991      can save an instruction in the epilogue by pushing the link register
12992      now and then popping it back into the PC.  This incurs extra memory
12993      accesses though, so we only do it when optimizing for size, and only
12994      if we know that we will not need a fancy return sequence.  */
12995   if (df_regs_ever_live_p (LR_REGNUM)
12996       || (save_reg_mask
12997 	  && optimize_size
12998 	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12999 	  && !crtl->calls_eh_return))
13000     save_reg_mask |= 1 << LR_REGNUM;
13001 
13002   if (cfun->machine->lr_save_eliminated)
13003     save_reg_mask &= ~ (1 << LR_REGNUM);
13004 
13005   if (TARGET_REALLY_IWMMXT
13006       && ((bit_count (save_reg_mask)
13007 	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
13008 			   arm_compute_static_chain_stack_bytes())
13009 	   ) % 2) != 0)
13010     {
13011       /* The total number of registers that are going to be pushed
13012 	 onto the stack is odd.  We need to ensure that the stack
13013 	 is 64-bit aligned before we start to save iWMMXt registers,
13014 	 and also before we start to create locals.  (A local variable
13015 	 might be a double or long long which we will load/store using
13016 	 an iWMMXt instruction).  Therefore we need to push another
13017 	 ARM register, so that the stack will be 64-bit aligned.  We
13018 	 try to avoid using the arg registers (r0 -r3) as they might be
13019 	 used to pass values in a tail call.  */
13020       for (reg = 4; reg <= 12; reg++)
13021 	if ((save_reg_mask & (1 << reg)) == 0)
13022 	  break;
13023 
13024       if (reg <= 12)
13025 	save_reg_mask |= (1 << reg);
13026       else
13027 	{
13028 	  cfun->machine->sibcall_blocked = 1;
13029 	  save_reg_mask |= (1 << 3);
13030 	}
13031     }
13032 
13033   /* We may need to push an additional register for use initializing the
13034      PIC base register.  */
13035   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13036       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13037     {
13038       reg = thumb_find_work_register (1 << 4);
13039       if (!call_used_regs[reg])
13040 	save_reg_mask |= (1 << reg);
13041     }
13042 
13043   return save_reg_mask;
13044 }
13045 
13046 
13047 /* Compute a bit mask of which registers need to be
13048    saved on the stack for the current function.  */
13049 static unsigned long
13050 thumb1_compute_save_reg_mask (void)
13051 {
13052   unsigned long mask;
13053   unsigned reg;
13054 
13055   mask = 0;
13056   for (reg = 0; reg < 12; reg ++)
13057     if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13058       mask |= 1 << reg;
13059 
13060   if (flag_pic
13061       && !TARGET_SINGLE_PIC_BASE
13062       && arm_pic_register != INVALID_REGNUM
13063       && crtl->uses_pic_offset_table)
13064     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13065 
13066   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
13067   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13068     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13069 
13070   /* LR will also be pushed if any lo regs are pushed.  */
13071   if (mask & 0xff || thumb_force_lr_save ())
13072     mask |= (1 << LR_REGNUM);
13073 
13074   /* Make sure we have a low work register if we need one.
13075      We will need one if we are going to push a high register,
13076      but we are not currently intending to push a low register.  */
13077   if ((mask & 0xff) == 0
13078       && ((mask & 0x0f00) || TARGET_BACKTRACE))
13079     {
13080       /* Use thumb_find_work_register to choose which register
13081 	 we will use.  If the register is live then we will
13082 	 have to push it.  Use LAST_LO_REGNUM as our fallback
13083 	 choice for the register to select.  */
13084       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13085       /* Make sure the register returned by thumb_find_work_register is
13086 	 not part of the return value.  */
13087       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13088 	reg = LAST_LO_REGNUM;
13089 
13090       if (! call_used_regs[reg])
13091 	mask |= 1 << reg;
13092     }
13093 
13094   /* The 504 below is 8 bytes less than 512 because there are two possible
13095      alignment words.  We can't tell here if they will be present or not so we
13096      have to play it safe and assume that they are. */
13097   if ((CALLER_INTERWORKING_SLOT_SIZE +
13098        ROUND_UP_WORD (get_frame_size ()) +
13099        crtl->outgoing_args_size) >= 504)
13100     {
13101       /* This is the same as the code in thumb1_expand_prologue() which
13102 	 determines which register to use for stack decrement. */
13103       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13104 	if (mask & (1 << reg))
13105 	  break;
13106 
13107       if (reg > LAST_LO_REGNUM)
13108 	{
13109 	  /* Make sure we have a register available for stack decrement. */
13110 	  mask |= 1 << LAST_LO_REGNUM;
13111 	}
13112     }
13113 
13114   return mask;
13115 }
13116 
13117 
13118 /* Return the number of bytes required to save VFP registers.  */
13119 static int
13120 arm_get_vfp_saved_size (void)
13121 {
13122   unsigned int regno;
13123   int count;
13124   int saved;
13125 
13126   saved = 0;
13127   /* Space for saved VFP registers.  */
13128   if (TARGET_HARD_FLOAT && TARGET_VFP)
13129     {
13130       count = 0;
13131       for (regno = FIRST_VFP_REGNUM;
13132 	   regno < LAST_VFP_REGNUM;
13133 	   regno += 2)
13134 	{
13135 	  if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13136 	      && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13137 	    {
13138 	      if (count > 0)
13139 		{
13140 		  /* Workaround ARM10 VFPr1 bug.  */
13141 		  if (count == 2 && !arm_arch6)
13142 		    count++;
13143 		  saved += count * 8;
13144 		}
13145 	      count = 0;
13146 	    }
13147 	  else
13148 	    count++;
13149 	}
13150       if (count > 0)
13151 	{
13152 	  if (count == 2 && !arm_arch6)
13153 	    count++;
13154 	  saved += count * 8;
13155 	}
13156     }
13157   return saved;
13158 }
13159 
13160 
13161 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
13162    everything bar the final return instruction.  */
13163 const char *
13164 output_return_instruction (rtx operand, int really_return, int reverse)
13165 {
13166   char conditional[10];
13167   char instr[100];
13168   unsigned reg;
13169   unsigned long live_regs_mask;
13170   unsigned long func_type;
13171   arm_stack_offsets *offsets;
13172 
13173   func_type = arm_current_func_type ();
13174 
13175   if (IS_NAKED (func_type))
13176     return "";
13177 
13178   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13179     {
13180       /* If this function was declared non-returning, and we have
13181 	 found a tail call, then we have to trust that the called
13182 	 function won't return.  */
13183       if (really_return)
13184 	{
13185 	  rtx ops[2];
13186 
13187 	  /* Otherwise, trap an attempted return by aborting.  */
13188 	  ops[0] = operand;
13189 	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13190 				       : "abort");
13191 	  assemble_external_libcall (ops[1]);
13192 	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13193 	}
13194 
13195       return "";
13196     }
13197 
13198   gcc_assert (!cfun->calls_alloca || really_return);
13199 
13200   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13201 
13202   cfun->machine->return_used_this_function = 1;
13203 
13204   offsets = arm_get_frame_offsets ();
13205   live_regs_mask = offsets->saved_regs_mask;
13206 
13207   if (live_regs_mask)
13208     {
13209       const char * return_reg;
13210 
13211       /* If we do not have any special requirements for function exit
13212 	 (e.g. interworking) then we can load the return address
13213 	 directly into the PC.  Otherwise we must load it into LR.  */
13214       if (really_return
13215 	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13216 	return_reg = reg_names[PC_REGNUM];
13217       else
13218 	return_reg = reg_names[LR_REGNUM];
13219 
13220       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13221 	{
13222 	  /* There are three possible reasons for the IP register
13223 	     being saved.  1) a stack frame was created, in which case
13224 	     IP contains the old stack pointer, or 2) an ISR routine
13225 	     corrupted it, or 3) it was saved to align the stack on
13226 	     iWMMXt.  In case 1, restore IP into SP, otherwise just
13227 	     restore IP.  */
13228 	  if (frame_pointer_needed)
13229 	    {
13230 	      live_regs_mask &= ~ (1 << IP_REGNUM);
13231 	      live_regs_mask |=   (1 << SP_REGNUM);
13232 	    }
13233 	  else
13234 	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13235 	}
13236 
13237       /* On some ARM architectures it is faster to use LDR rather than
13238 	 LDM to load a single register.  On other architectures, the
13239 	 cost is the same.  In 26 bit mode, or for exception handlers,
13240 	 we have to use LDM to load the PC so that the CPSR is also
13241 	 restored.  */
13242       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13243 	if (live_regs_mask == (1U << reg))
13244 	  break;
13245 
13246       if (reg <= LAST_ARM_REGNUM
13247 	  && (reg != LR_REGNUM
13248 	      || ! really_return
13249 	      || ! IS_INTERRUPT (func_type)))
13250 	{
13251 	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13252 		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13253 	}
13254       else
13255 	{
13256 	  char *p;
13257 	  int first = 1;
13258 
13259 	  /* Generate the load multiple instruction to restore the
13260 	     registers.  Note we can get here, even if
13261 	     frame_pointer_needed is true, but only if sp already
13262 	     points to the base of the saved core registers.  */
13263 	  if (live_regs_mask & (1 << SP_REGNUM))
13264 	    {
13265 	      unsigned HOST_WIDE_INT stack_adjust;
13266 
13267 	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13268 	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13269 
13270 	      if (stack_adjust && arm_arch5 && TARGET_ARM)
13271 		if (TARGET_UNIFIED_ASM)
13272 		  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13273 		else
13274 		  sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13275 	      else
13276 		{
13277 		  /* If we can't use ldmib (SA110 bug),
13278 		     then try to pop r3 instead.  */
13279 		  if (stack_adjust)
13280 		    live_regs_mask |= 1 << 3;
13281 
13282 		  if (TARGET_UNIFIED_ASM)
13283 		    sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13284 		  else
13285 		    sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13286 		}
13287 	    }
13288 	  else
13289 	    if (TARGET_UNIFIED_ASM)
13290 	      sprintf (instr, "pop%s\t{", conditional);
13291 	    else
13292 	      sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13293 
13294 	  p = instr + strlen (instr);
13295 
13296 	  for (reg = 0; reg <= SP_REGNUM; reg++)
13297 	    if (live_regs_mask & (1 << reg))
13298 	      {
13299 		int l = strlen (reg_names[reg]);
13300 
13301 		if (first)
13302 		  first = 0;
13303 		else
13304 		  {
13305 		    memcpy (p, ", ", 2);
13306 		    p += 2;
13307 		  }
13308 
13309 		memcpy (p, "%|", 2);
13310 		memcpy (p + 2, reg_names[reg], l);
13311 		p += l + 2;
13312 	      }
13313 
13314 	  if (live_regs_mask & (1 << LR_REGNUM))
13315 	    {
13316 	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13317 	      /* If returning from an interrupt, restore the CPSR.  */
13318 	      if (IS_INTERRUPT (func_type))
13319 		strcat (p, "^");
13320 	    }
13321 	  else
13322 	    strcpy (p, "}");
13323 	}
13324 
13325       output_asm_insn (instr, & operand);
13326 
13327       /* See if we need to generate an extra instruction to
13328 	 perform the actual function return.  */
13329       if (really_return
13330 	  && func_type != ARM_FT_INTERWORKED
13331 	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13332 	{
13333 	  /* The return has already been handled
13334 	     by loading the LR into the PC.  */
13335 	  really_return = 0;
13336 	}
13337     }
13338 
13339   if (really_return)
13340     {
13341       switch ((int) ARM_FUNC_TYPE (func_type))
13342 	{
13343 	case ARM_FT_ISR:
13344 	case ARM_FT_FIQ:
13345 	  /* ??? This is wrong for unified assembly syntax.  */
13346 	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13347 	  break;
13348 
13349 	case ARM_FT_INTERWORKED:
13350 	  sprintf (instr, "bx%s\t%%|lr", conditional);
13351 	  break;
13352 
13353 	case ARM_FT_EXCEPTION:
13354 	  /* ??? This is wrong for unified assembly syntax.  */
13355 	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13356 	  break;
13357 
13358 	default:
13359 	  /* Use bx if it's available.  */
13360 	  if (arm_arch5 || arm_arch4t)
13361 	    sprintf (instr, "bx%s\t%%|lr", conditional);
13362 	  else
13363 	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13364 	  break;
13365 	}
13366 
13367       output_asm_insn (instr, & operand);
13368     }
13369 
13370   return "";
13371 }
13372 
13373 /* Write the function name into the code section, directly preceding
13374    the function prologue.
13375 
13376    Code will be output similar to this:
13377      t0
13378 	 .ascii "arm_poke_function_name", 0
13379 	 .align
13380      t1
13381 	 .word 0xff000000 + (t1 - t0)
13382      arm_poke_function_name
13383 	 mov     ip, sp
13384 	 stmfd   sp!, {fp, ip, lr, pc}
13385 	 sub     fp, ip, #4
13386 
13387    When performing a stack backtrace, code can inspect the value
13388    of 'pc' stored at 'fp' + 0.  If the trace function then looks
13389    at location pc - 12 and the top 8 bits are set, then we know
13390    that there is a function name embedded immediately preceding this
13391    location and has length ((pc[-3]) & 0xff000000).
13392 
13393    We assume that pc is declared as a pointer to an unsigned long.
13394 
13395    It is of no benefit to output the function name if we are assembling
13396    a leaf function.  These function types will not contain a stack
13397    backtrace structure, therefore it is not possible to determine the
13398    function name.  */
13399 void
13400 arm_poke_function_name (FILE *stream, const char *name)
13401 {
13402   unsigned long alignlength;
13403   unsigned long length;
13404   rtx           x;
13405 
13406   length      = strlen (name) + 1;
13407   alignlength = ROUND_UP_WORD (length);
13408 
13409   ASM_OUTPUT_ASCII (stream, name, length);
13410   ASM_OUTPUT_ALIGN (stream, 2);
13411   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13412   assemble_aligned_integer (UNITS_PER_WORD, x);
13413 }
13414 
13415 /* Place some comments into the assembler stream
13416    describing the current function.  */
13417 static void
13418 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13419 {
13420   unsigned long func_type;
13421 
13422   if (TARGET_THUMB1)
13423     {
13424       thumb1_output_function_prologue (f, frame_size);
13425       return;
13426     }
13427 
13428   /* Sanity check.  */
13429   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13430 
13431   func_type = arm_current_func_type ();
13432 
13433   switch ((int) ARM_FUNC_TYPE (func_type))
13434     {
13435     default:
13436     case ARM_FT_NORMAL:
13437       break;
13438     case ARM_FT_INTERWORKED:
13439       asm_fprintf (f, "\t%@ Function supports interworking.\n");
13440       break;
13441     case ARM_FT_ISR:
13442       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13443       break;
13444     case ARM_FT_FIQ:
13445       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13446       break;
13447     case ARM_FT_EXCEPTION:
13448       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13449       break;
13450     }
13451 
13452   if (IS_NAKED (func_type))
13453     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13454 
13455   if (IS_VOLATILE (func_type))
13456     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13457 
13458   if (IS_NESTED (func_type))
13459     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13460   if (IS_STACKALIGN (func_type))
13461     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13462 
13463   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13464 	       crtl->args.size,
13465 	       crtl->args.pretend_args_size, frame_size);
13466 
13467   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13468 	       frame_pointer_needed,
13469 	       cfun->machine->uses_anonymous_args);
13470 
13471   if (cfun->machine->lr_save_eliminated)
13472     asm_fprintf (f, "\t%@ link register save eliminated.\n");
13473 
13474   if (crtl->calls_eh_return)
13475     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13476 
13477 }
13478 
13479 const char *
13480 arm_output_epilogue (rtx sibling)
13481 {
13482   int reg;
13483   unsigned long saved_regs_mask;
13484   unsigned long func_type;
13485   /* Floats_offset is the offset from the "virtual" frame.  In an APCS
13486      frame that is $fp + 4 for a non-variadic function.  */
13487   int floats_offset = 0;
13488   rtx operands[3];
13489   FILE * f = asm_out_file;
13490   unsigned int lrm_count = 0;
13491   int really_return = (sibling == NULL);
13492   int start_reg;
13493   arm_stack_offsets *offsets;
13494 
13495   /* If we have already generated the return instruction
13496      then it is futile to generate anything else.  */
13497   if (use_return_insn (FALSE, sibling) &&
13498       (cfun->machine->return_used_this_function != 0))
13499     return "";
13500 
13501   func_type = arm_current_func_type ();
13502 
13503   if (IS_NAKED (func_type))
13504     /* Naked functions don't have epilogues.  */
13505     return "";
13506 
13507   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13508     {
13509       rtx op;
13510 
13511       /* A volatile function should never return.  Call abort.  */
13512       op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13513       assemble_external_libcall (op);
13514       output_asm_insn ("bl\t%a0", &op);
13515 
13516       return "";
13517     }
13518 
13519   /* If we are throwing an exception, then we really must be doing a
13520      return, so we can't tail-call.  */
13521   gcc_assert (!crtl->calls_eh_return || really_return);
13522 
13523   offsets = arm_get_frame_offsets ();
13524   saved_regs_mask = offsets->saved_regs_mask;
13525 
13526   if (TARGET_IWMMXT)
13527     lrm_count = bit_count (saved_regs_mask);
13528 
13529   floats_offset = offsets->saved_args;
13530   /* Compute how far away the floats will be.  */
13531   for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13532     if (saved_regs_mask & (1 << reg))
13533       floats_offset += 4;
13534 
13535   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13536     {
13537       /* This variable is for the Virtual Frame Pointer, not VFP regs.  */
13538       int vfp_offset = offsets->frame;
13539 
13540       if (TARGET_FPA_EMU2)
13541 	{
13542 	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13543 	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13544 	      {
13545 		floats_offset += 12;
13546 		asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13547 			     reg, FP_REGNUM, floats_offset - vfp_offset);
13548 	      }
13549 	}
13550       else
13551 	{
13552 	  start_reg = LAST_FPA_REGNUM;
13553 
13554 	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13555 	    {
13556 	      if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13557 		{
13558 		  floats_offset += 12;
13559 
13560 		  /* We can't unstack more than four registers at once.  */
13561 		  if (start_reg - reg == 3)
13562 		    {
13563 		      asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13564 			           reg, FP_REGNUM, floats_offset - vfp_offset);
13565 		      start_reg = reg - 1;
13566 		    }
13567 		}
13568 	      else
13569 		{
13570 		  if (reg != start_reg)
13571 		    asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13572 				 reg + 1, start_reg - reg,
13573 				 FP_REGNUM, floats_offset - vfp_offset);
13574 		  start_reg = reg - 1;
13575 		}
13576 	    }
13577 
13578 	  /* Just in case the last register checked also needs unstacking.  */
13579 	  if (reg != start_reg)
13580 	    asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13581 			 reg + 1, start_reg - reg,
13582 			 FP_REGNUM, floats_offset - vfp_offset);
13583 	}
13584 
13585       if (TARGET_HARD_FLOAT && TARGET_VFP)
13586 	{
13587 	  int saved_size;
13588 
13589 	  /* The fldmd insns do not have base+offset addressing
13590              modes, so we use IP to hold the address.  */
13591 	  saved_size = arm_get_vfp_saved_size ();
13592 
13593 	  if (saved_size > 0)
13594 	    {
13595 	      floats_offset += saved_size;
13596 	      asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13597 			   FP_REGNUM, floats_offset - vfp_offset);
13598 	    }
13599 	  start_reg = FIRST_VFP_REGNUM;
13600 	  for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13601 	    {
13602 	      if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13603 		  && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13604 		{
13605 		  if (start_reg != reg)
13606 		    vfp_output_fldmd (f, IP_REGNUM,
13607 				      (start_reg - FIRST_VFP_REGNUM) / 2,
13608 				      (reg - start_reg) / 2);
13609 		  start_reg = reg + 2;
13610 		}
13611 	    }
13612 	  if (start_reg != reg)
13613 	    vfp_output_fldmd (f, IP_REGNUM,
13614 			      (start_reg - FIRST_VFP_REGNUM) / 2,
13615 			      (reg - start_reg) / 2);
13616 	}
13617 
13618       if (TARGET_IWMMXT)
13619 	{
13620 	  /* The frame pointer is guaranteed to be non-double-word aligned.
13621 	     This is because it is set to (old_stack_pointer - 4) and the
13622 	     old_stack_pointer was double word aligned.  Thus the offset to
13623 	     the iWMMXt registers to be loaded must also be non-double-word
13624 	     sized, so that the resultant address *is* double-word aligned.
13625 	     We can ignore floats_offset since that was already included in
13626 	     the live_regs_mask.  */
13627 	  lrm_count += (lrm_count % 2 ? 2 : 1);
13628 
13629 	  for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13630 	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13631 	      {
13632 		asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13633 			     reg, FP_REGNUM, lrm_count * 4);
13634 		lrm_count += 2;
13635 	      }
13636 	}
13637 
13638       /* saved_regs_mask should contain the IP, which at the time of stack
13639 	 frame generation actually contains the old stack pointer.  So a
13640 	 quick way to unwind the stack is just pop the IP register directly
13641 	 into the stack pointer.  */
13642       gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13643       saved_regs_mask &= ~ (1 << IP_REGNUM);
13644       saved_regs_mask |=   (1 << SP_REGNUM);
13645 
13646       /* There are two registers left in saved_regs_mask - LR and PC.  We
13647 	 only need to restore the LR register (the return address), but to
13648 	 save time we can load it directly into the PC, unless we need a
13649 	 special function exit sequence, or we are not really returning.  */
13650       if (really_return
13651 	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13652 	  && !crtl->calls_eh_return)
13653 	/* Delete the LR from the register mask, so that the LR on
13654 	   the stack is loaded into the PC in the register mask.  */
13655 	saved_regs_mask &= ~ (1 << LR_REGNUM);
13656       else
13657 	saved_regs_mask &= ~ (1 << PC_REGNUM);
13658 
13659       /* We must use SP as the base register, because SP is one of the
13660          registers being restored.  If an interrupt or page fault
13661          happens in the ldm instruction, the SP might or might not
13662          have been restored.  That would be bad, as then SP will no
13663          longer indicate the safe area of stack, and we can get stack
13664          corruption.  Using SP as the base register means that it will
13665          be reset correctly to the original value, should an interrupt
13666          occur.  If the stack pointer already points at the right
13667          place, then omit the subtraction.  */
13668       if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13669 	  || cfun->calls_alloca)
13670 	asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13671 		     4 * bit_count (saved_regs_mask));
13672       print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13673 
13674       if (IS_INTERRUPT (func_type))
13675 	/* Interrupt handlers will have pushed the
13676 	   IP onto the stack, so restore it now.  */
13677 	print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13678     }
13679   else
13680     {
13681       /* This branch is executed for ARM mode (non-apcs frames) and
13682 	 Thumb-2 mode. Frame layout is essentially the same for those
13683 	 cases, except that in ARM mode frame pointer points to the
13684 	 first saved register, while in Thumb-2 mode the frame pointer points
13685 	 to the last saved register.
13686 
13687 	 It is possible to make frame pointer point to last saved
13688 	 register in both cases, and remove some conditionals below.
13689 	 That means that fp setup in prologue would be just "mov fp, sp"
13690 	 and sp restore in epilogue would be just "mov sp, fp", whereas
13691 	 now we have to use add/sub in those cases. However, the value
13692 	 of that would be marginal, as both mov and add/sub are 32-bit
13693 	 in ARM mode, and it would require extra conditionals
13694 	 in arm_expand_prologue to distingish ARM-apcs-frame case
13695 	 (where frame pointer is required to point at first register)
13696 	 and ARM-non-apcs-frame. Therefore, such change is postponed
13697 	 until real need arise.  */
13698       unsigned HOST_WIDE_INT amount;
13699       int rfe;
13700       /* Restore stack pointer if necessary.  */
13701       if (TARGET_ARM && frame_pointer_needed)
13702 	{
13703 	  operands[0] = stack_pointer_rtx;
13704 	  operands[1] = hard_frame_pointer_rtx;
13705 
13706 	  operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13707 	  output_add_immediate (operands);
13708 	}
13709       else
13710 	{
13711 	  if (frame_pointer_needed)
13712 	    {
13713 	      /* For Thumb-2 restore sp from the frame pointer.
13714 		 Operand restrictions mean we have to incrememnt FP, then copy
13715 		 to SP.  */
13716 	      amount = offsets->locals_base - offsets->saved_regs;
13717 	      operands[0] = hard_frame_pointer_rtx;
13718 	    }
13719 	  else
13720 	    {
13721 	      unsigned long count;
13722 	      operands[0] = stack_pointer_rtx;
13723 	      amount = offsets->outgoing_args - offsets->saved_regs;
13724 	      /* pop call clobbered registers if it avoids a
13725 	         separate stack adjustment.  */
13726 	      count = offsets->saved_regs - offsets->saved_args;
13727 	      if (optimize_size
13728 		  && count != 0
13729 		  && !crtl->calls_eh_return
13730 		  && bit_count(saved_regs_mask) * 4 == count
13731 		  && !IS_INTERRUPT (func_type)
13732 		  && !crtl->tail_call_emit)
13733 		{
13734 		  unsigned long mask;
13735                   /* Preserve return values, of any size.  */
13736 		  mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
13737 		  mask ^= 0xf;
13738 		  mask &= ~saved_regs_mask;
13739 		  reg = 0;
13740 		  while (bit_count (mask) * 4 > amount)
13741 		    {
13742 		      while ((mask & (1 << reg)) == 0)
13743 			reg++;
13744 		      mask &= ~(1 << reg);
13745 		    }
13746 		  if (bit_count (mask) * 4 == amount) {
13747 		      amount = 0;
13748 		      saved_regs_mask |= mask;
13749 		  }
13750 		}
13751 	    }
13752 
13753 	  if (amount)
13754 	    {
13755 	      operands[1] = operands[0];
13756 	      operands[2] = GEN_INT (amount);
13757 	      output_add_immediate (operands);
13758 	    }
13759 	  if (frame_pointer_needed)
13760 	    asm_fprintf (f, "\tmov\t%r, %r\n",
13761 			 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13762 	}
13763 
13764       if (TARGET_FPA_EMU2)
13765 	{
13766 	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13767 	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13768 	      asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13769 			   reg, SP_REGNUM);
13770 	}
13771       else
13772 	{
13773 	  start_reg = FIRST_FPA_REGNUM;
13774 
13775 	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13776 	    {
13777 	      if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13778 		{
13779 		  if (reg - start_reg == 3)
13780 		    {
13781 		      asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13782 				   start_reg, SP_REGNUM);
13783 		      start_reg = reg + 1;
13784 		    }
13785 		}
13786 	      else
13787 		{
13788 		  if (reg != start_reg)
13789 		    asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13790 				 start_reg, reg - start_reg,
13791 				 SP_REGNUM);
13792 
13793 		  start_reg = reg + 1;
13794 		}
13795 	    }
13796 
13797 	  /* Just in case the last register checked also needs unstacking.  */
13798 	  if (reg != start_reg)
13799 	    asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13800 			 start_reg, reg - start_reg, SP_REGNUM);
13801 	}
13802 
13803       if (TARGET_HARD_FLOAT && TARGET_VFP)
13804 	{
13805 	  int end_reg = LAST_VFP_REGNUM + 1;
13806 
13807 	  /* Scan the registers in reverse order.  We need to match
13808 	     any groupings made in the prologue and generate matching
13809 	     pop operations.  */
13810 	  for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
13811 	    {
13812 	      if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13813 		  && (!df_regs_ever_live_p (reg + 1)
13814 		      || call_used_regs[reg + 1]))
13815 		{
13816 		  if (end_reg > reg + 2)
13817 		    vfp_output_fldmd (f, SP_REGNUM,
13818 				      (reg + 2 - FIRST_VFP_REGNUM) / 2,
13819 				      (end_reg - (reg + 2)) / 2);
13820 		  end_reg = reg;
13821 		}
13822 	    }
13823 	  if (end_reg > reg + 2)
13824 	    vfp_output_fldmd (f, SP_REGNUM, 0,
13825 			      (end_reg - (reg + 2)) / 2);
13826 	}
13827 
13828       if (TARGET_IWMMXT)
13829 	for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13830 	  if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13831 	    asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13832 
13833       /* If we can, restore the LR into the PC.  */
13834       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13835 	  && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13836 	  && !IS_STACKALIGN (func_type)
13837 	  && really_return
13838 	  && crtl->args.pretend_args_size == 0
13839 	  && saved_regs_mask & (1 << LR_REGNUM)
13840 	  && !crtl->calls_eh_return)
13841 	{
13842 	  saved_regs_mask &= ~ (1 << LR_REGNUM);
13843 	  saved_regs_mask |=   (1 << PC_REGNUM);
13844 	  rfe = IS_INTERRUPT (func_type);
13845 	}
13846       else
13847 	rfe = 0;
13848 
13849       /* Load the registers off the stack.  If we only have one register
13850 	 to load use the LDR instruction - it is faster.  For Thumb-2
13851 	 always use pop and the assembler will pick the best instruction.*/
13852       if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13853 	  && !IS_INTERRUPT(func_type))
13854 	{
13855 	  asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13856 	}
13857       else if (saved_regs_mask)
13858 	{
13859 	  if (saved_regs_mask & (1 << SP_REGNUM))
13860 	    /* Note - write back to the stack register is not enabled
13861 	       (i.e. "ldmfd sp!...").  We know that the stack pointer is
13862 	       in the list of registers and if we add writeback the
13863 	       instruction becomes UNPREDICTABLE.  */
13864 	    print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13865 			     rfe);
13866 	  else if (TARGET_ARM)
13867 	    print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13868 			     rfe);
13869 	  else
13870 	    print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13871 	}
13872 
13873       if (crtl->args.pretend_args_size)
13874 	{
13875 	  /* Unwind the pre-pushed regs.  */
13876 	  operands[0] = operands[1] = stack_pointer_rtx;
13877 	  operands[2] = GEN_INT (crtl->args.pretend_args_size);
13878 	  output_add_immediate (operands);
13879 	}
13880     }
13881 
13882   /* We may have already restored PC directly from the stack.  */
13883   if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13884     return "";
13885 
13886   /* Stack adjustment for exception handler.  */
13887   if (crtl->calls_eh_return)
13888     asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13889 		 ARM_EH_STACKADJ_REGNUM);
13890 
13891   /* Generate the return instruction.  */
13892   switch ((int) ARM_FUNC_TYPE (func_type))
13893     {
13894     case ARM_FT_ISR:
13895     case ARM_FT_FIQ:
13896       asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13897       break;
13898 
13899     case ARM_FT_EXCEPTION:
13900       asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13901       break;
13902 
13903     case ARM_FT_INTERWORKED:
13904       asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13905       break;
13906 
13907     default:
13908       if (IS_STACKALIGN (func_type))
13909 	{
13910 	  /* See comment in arm_expand_prologue.  */
13911 	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
13912 	}
13913       if (arm_arch5 || arm_arch4t)
13914 	asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13915       else
13916 	asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13917       break;
13918     }
13919 
13920   return "";
13921 }
13922 
13923 static void
13924 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
13925 			      HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
13926 {
13927   arm_stack_offsets *offsets;
13928 
13929   if (TARGET_THUMB1)
13930     {
13931       int regno;
13932 
13933       /* Emit any call-via-reg trampolines that are needed for v4t support
13934 	 of call_reg and call_value_reg type insns.  */
13935       for (regno = 0; regno < LR_REGNUM; regno++)
13936 	{
13937 	  rtx label = cfun->machine->call_via[regno];
13938 
13939 	  if (label != NULL)
13940 	    {
13941 	      switch_to_section (function_section (current_function_decl));
13942 	      targetm.asm_out.internal_label (asm_out_file, "L",
13943 					      CODE_LABEL_NUMBER (label));
13944 	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13945 	    }
13946 	}
13947 
13948       /* ??? Probably not safe to set this here, since it assumes that a
13949 	 function will be emitted as assembly immediately after we generate
13950 	 RTL for it.  This does not happen for inline functions.  */
13951       cfun->machine->return_used_this_function = 0;
13952     }
13953   else /* TARGET_32BIT */
13954     {
13955       /* We need to take into account any stack-frame rounding.  */
13956       offsets = arm_get_frame_offsets ();
13957 
13958       gcc_assert (!use_return_insn (FALSE, NULL)
13959 		  || (cfun->machine->return_used_this_function != 0)
13960 		  || offsets->saved_regs == offsets->outgoing_args
13961 		  || frame_pointer_needed);
13962 
13963       /* Reset the ARM-specific per-function variables.  */
13964       after_arm_reorg = 0;
13965     }
13966 }
13967 
13968 /* Generate and emit an insn that we will recognize as a push_multi.
13969    Unfortunately, since this insn does not reflect very well the actual
13970    semantics of the operation, we need to annotate the insn for the benefit
13971    of DWARF2 frame unwind information.  */
13972 static rtx
13973 emit_multi_reg_push (unsigned long mask)
13974 {
13975   int num_regs = 0;
13976   int num_dwarf_regs;
13977   int i, j;
13978   rtx par;
13979   rtx dwarf;
13980   int dwarf_par_index;
13981   rtx tmp, reg;
13982 
13983   for (i = 0; i <= LAST_ARM_REGNUM; i++)
13984     if (mask & (1 << i))
13985       num_regs++;
13986 
13987   gcc_assert (num_regs && num_regs <= 16);
13988 
13989   /* We don't record the PC in the dwarf frame information.  */
13990   num_dwarf_regs = num_regs;
13991   if (mask & (1 << PC_REGNUM))
13992     num_dwarf_regs--;
13993 
13994   /* For the body of the insn we are going to generate an UNSPEC in
13995      parallel with several USEs.  This allows the insn to be recognized
13996      by the push_multi pattern in the arm.md file.
13997 
13998      The body of the insn looks something like this:
13999 
14000        (parallel [
14001            (set (mem:BLK (pre_modify:SI (reg:SI sp)
14002 	                                (const_int:SI <num>)))
14003 	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14004            (use (reg:SI XX))
14005            (use (reg:SI YY))
14006 	   ...
14007         ])
14008 
14009      For the frame note however, we try to be more explicit and actually
14010      show each register being stored into the stack frame, plus a (single)
14011      decrement of the stack pointer.  We do it this way in order to be
14012      friendly to the stack unwinding code, which only wants to see a single
14013      stack decrement per instruction.  The RTL we generate for the note looks
14014      something like this:
14015 
14016       (sequence [
14017            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14018            (set (mem:SI (reg:SI sp)) (reg:SI r4))
14019            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14020            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14021 	   ...
14022         ])
14023 
14024      FIXME:: In an ideal world the PRE_MODIFY would not exist and
14025      instead we'd have a parallel expression detailing all
14026      the stores to the various memory addresses so that debug
14027      information is more up-to-date. Remember however while writing
14028      this to take care of the constraints with the push instruction.
14029 
14030      Note also that this has to be taken care of for the VFP registers.
14031 
14032      For more see PR43399.  */
14033 
14034   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14035   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14036   dwarf_par_index = 1;
14037 
14038   for (i = 0; i <= LAST_ARM_REGNUM; i++)
14039     {
14040       if (mask & (1 << i))
14041 	{
14042 	  reg = gen_rtx_REG (SImode, i);
14043 
14044 	  XVECEXP (par, 0, 0)
14045 	    = gen_rtx_SET (VOIDmode,
14046 			   gen_frame_mem
14047 			   (BLKmode,
14048 			    gen_rtx_PRE_MODIFY (Pmode,
14049 						stack_pointer_rtx,
14050 						plus_constant
14051 						(stack_pointer_rtx,
14052 						 -4 * num_regs))
14053 			    ),
14054 			   gen_rtx_UNSPEC (BLKmode,
14055 					   gen_rtvec (1, reg),
14056 					   UNSPEC_PUSH_MULT));
14057 
14058 	  if (i != PC_REGNUM)
14059 	    {
14060 	      tmp = gen_rtx_SET (VOIDmode,
14061 				 gen_frame_mem (SImode, stack_pointer_rtx),
14062 				 reg);
14063 	      RTX_FRAME_RELATED_P (tmp) = 1;
14064 	      XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14065 	      dwarf_par_index++;
14066 	    }
14067 
14068 	  break;
14069 	}
14070     }
14071 
14072   for (j = 1, i++; j < num_regs; i++)
14073     {
14074       if (mask & (1 << i))
14075 	{
14076 	  reg = gen_rtx_REG (SImode, i);
14077 
14078 	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14079 
14080 	  if (i != PC_REGNUM)
14081 	    {
14082 	      tmp
14083 		= gen_rtx_SET (VOIDmode,
14084 			       gen_frame_mem
14085 			       (SImode,
14086 				plus_constant (stack_pointer_rtx,
14087 					       4 * j)),
14088 			       reg);
14089 	      RTX_FRAME_RELATED_P (tmp) = 1;
14090 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14091 	    }
14092 
14093 	  j++;
14094 	}
14095     }
14096 
14097   par = emit_insn (par);
14098 
14099   tmp = gen_rtx_SET (VOIDmode,
14100 		     stack_pointer_rtx,
14101 		     plus_constant (stack_pointer_rtx, -4 * num_regs));
14102   RTX_FRAME_RELATED_P (tmp) = 1;
14103   XVECEXP (dwarf, 0, 0) = tmp;
14104 
14105   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14106 
14107   return par;
14108 }
14109 
14110 /* Calculate the size of the return value that is passed in registers.  */
14111 static unsigned
14112 arm_size_return_regs (void)
14113 {
14114   enum machine_mode mode;
14115 
14116   if (crtl->return_rtx != 0)
14117     mode = GET_MODE (crtl->return_rtx);
14118   else
14119     mode = DECL_MODE (DECL_RESULT (current_function_decl));
14120 
14121   return GET_MODE_SIZE (mode);
14122 }
14123 
14124 static rtx
14125 emit_sfm (int base_reg, int count)
14126 {
14127   rtx par;
14128   rtx dwarf;
14129   rtx tmp, reg;
14130   int i;
14131 
14132   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14133   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14134 
14135   reg = gen_rtx_REG (XFmode, base_reg++);
14136 
14137   XVECEXP (par, 0, 0)
14138     = gen_rtx_SET (VOIDmode,
14139 		   gen_frame_mem
14140 		   (BLKmode,
14141 		    gen_rtx_PRE_MODIFY (Pmode,
14142 					stack_pointer_rtx,
14143 					plus_constant
14144 					(stack_pointer_rtx,
14145 					 -12 * count))
14146 		    ),
14147 		   gen_rtx_UNSPEC (BLKmode,
14148 				   gen_rtvec (1, reg),
14149 				   UNSPEC_PUSH_MULT));
14150   tmp = gen_rtx_SET (VOIDmode,
14151 		     gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14152   RTX_FRAME_RELATED_P (tmp) = 1;
14153   XVECEXP (dwarf, 0, 1) = tmp;
14154 
14155   for (i = 1; i < count; i++)
14156     {
14157       reg = gen_rtx_REG (XFmode, base_reg++);
14158       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14159 
14160       tmp = gen_rtx_SET (VOIDmode,
14161 			 gen_frame_mem (XFmode,
14162 					plus_constant (stack_pointer_rtx,
14163 						       i * 12)),
14164 			 reg);
14165       RTX_FRAME_RELATED_P (tmp) = 1;
14166       XVECEXP (dwarf, 0, i + 1) = tmp;
14167     }
14168 
14169   tmp = gen_rtx_SET (VOIDmode,
14170 		     stack_pointer_rtx,
14171 		     plus_constant (stack_pointer_rtx, -12 * count));
14172 
14173   RTX_FRAME_RELATED_P (tmp) = 1;
14174   XVECEXP (dwarf, 0, 0) = tmp;
14175 
14176   par = emit_insn (par);
14177   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14178 
14179   return par;
14180 }
14181 
14182 
14183 /* Return true if the current function needs to save/restore LR.  */
14184 
14185 static bool
14186 thumb_force_lr_save (void)
14187 {
14188   return !cfun->machine->lr_save_eliminated
14189 	 && (!leaf_function_p ()
14190 	     || thumb_far_jump_used_p ()
14191 	     || df_regs_ever_live_p (LR_REGNUM));
14192 }
14193 
14194 
14195 /* Compute the distance from register FROM to register TO.
14196    These can be the arg pointer (26), the soft frame pointer (25),
14197    the stack pointer (13) or the hard frame pointer (11).
14198    In thumb mode r7 is used as the soft frame pointer, if needed.
14199    Typical stack layout looks like this:
14200 
14201        old stack pointer -> |    |
14202                              ----
14203                             |    | \
14204                             |    |   saved arguments for
14205                             |    |   vararg functions
14206 			    |    | /
14207                               --
14208    hard FP & arg pointer -> |    | \
14209                             |    |   stack
14210                             |    |   frame
14211                             |    | /
14212                               --
14213                             |    | \
14214                             |    |   call saved
14215                             |    |   registers
14216       soft frame pointer -> |    | /
14217                               --
14218                             |    | \
14219                             |    |   local
14220                             |    |   variables
14221      locals base pointer -> |    | /
14222                               --
14223                             |    | \
14224                             |    |   outgoing
14225                             |    |   arguments
14226    current stack pointer -> |    | /
14227                               --
14228 
14229   For a given function some or all of these stack components
14230   may not be needed, giving rise to the possibility of
14231   eliminating some of the registers.
14232 
14233   The values returned by this function must reflect the behavior
14234   of arm_expand_prologue() and arm_compute_save_reg_mask().
14235 
14236   The sign of the number returned reflects the direction of stack
14237   growth, so the values are positive for all eliminations except
14238   from the soft frame pointer to the hard frame pointer.
14239 
14240   SFP may point just inside the local variables block to ensure correct
14241   alignment.  */
14242 
14243 
14244 /* Calculate stack offsets.  These are used to calculate register elimination
14245    offsets and in prologue/epilogue code.  Also calculates which registers
14246    should be saved.  */
14247 
14248 static arm_stack_offsets *
14249 arm_get_frame_offsets (void)
14250 {
14251   struct arm_stack_offsets *offsets;
14252   unsigned long func_type;
14253   int leaf;
14254   int saved;
14255   int core_saved;
14256   HOST_WIDE_INT frame_size;
14257   int i;
14258 
14259   offsets = &cfun->machine->stack_offsets;
14260 
14261   /* We need to know if we are a leaf function.  Unfortunately, it
14262      is possible to be called after start_sequence has been called,
14263      which causes get_insns to return the insns for the sequence,
14264      not the function, which will cause leaf_function_p to return
14265      the incorrect result.
14266 
14267      to know about leaf functions once reload has completed, and the
14268      frame size cannot be changed after that time, so we can safely
14269      use the cached value.  */
14270 
14271   if (reload_completed)
14272     return offsets;
14273 
14274   /* Initially this is the size of the local variables.  It will translated
14275      into an offset once we have determined the size of preceding data.  */
14276   frame_size = ROUND_UP_WORD (get_frame_size ());
14277 
14278   leaf = leaf_function_p ();
14279 
14280   /* Space for variadic functions.  */
14281   offsets->saved_args = crtl->args.pretend_args_size;
14282 
14283   /* In Thumb mode this is incorrect, but never used.  */
14284   offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14285                    arm_compute_static_chain_stack_bytes();
14286 
14287   if (TARGET_32BIT)
14288     {
14289       unsigned int regno;
14290 
14291       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14292       core_saved = bit_count (offsets->saved_regs_mask) * 4;
14293       saved = core_saved;
14294 
14295       /* We know that SP will be doubleword aligned on entry, and we must
14296 	 preserve that condition at any subroutine call.  We also require the
14297 	 soft frame pointer to be doubleword aligned.  */
14298 
14299       if (TARGET_REALLY_IWMMXT)
14300 	{
14301 	  /* Check for the call-saved iWMMXt registers.  */
14302 	  for (regno = FIRST_IWMMXT_REGNUM;
14303 	       regno <= LAST_IWMMXT_REGNUM;
14304 	       regno++)
14305 	    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14306 	      saved += 8;
14307 	}
14308 
14309       func_type = arm_current_func_type ();
14310       if (! IS_VOLATILE (func_type))
14311 	{
14312 	  /* Space for saved FPA registers.  */
14313 	  for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14314 	    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14315 	    saved += 12;
14316 
14317 	  /* Space for saved VFP registers.  */
14318 	  if (TARGET_HARD_FLOAT && TARGET_VFP)
14319 	    saved += arm_get_vfp_saved_size ();
14320 	}
14321     }
14322   else /* TARGET_THUMB1 */
14323     {
14324       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14325       core_saved = bit_count (offsets->saved_regs_mask) * 4;
14326       saved = core_saved;
14327       if (TARGET_BACKTRACE)
14328 	saved += 16;
14329     }
14330 
14331   /* Saved registers include the stack frame.  */
14332   offsets->saved_regs = offsets->saved_args + saved +
14333                         arm_compute_static_chain_stack_bytes();
14334   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14335   /* A leaf function does not need any stack alignment if it has nothing
14336      on the stack.  */
14337   if (leaf && frame_size == 0)
14338     {
14339       offsets->outgoing_args = offsets->soft_frame;
14340       offsets->locals_base = offsets->soft_frame;
14341       return offsets;
14342     }
14343 
14344   /* Ensure SFP has the correct alignment.  */
14345   if (ARM_DOUBLEWORD_ALIGN
14346       && (offsets->soft_frame & 7))
14347     {
14348       offsets->soft_frame += 4;
14349       /* Try to align stack by pushing an extra reg.  Don't bother doing this
14350          when there is a stack frame as the alignment will be rolled into
14351 	 the normal stack adjustment.  */
14352       if (frame_size + crtl->outgoing_args_size == 0)
14353 	{
14354 	  int reg = -1;
14355 
14356 	  /* If it is safe to use r3, then do so.  This sometimes
14357 	     generates better code on Thumb-2 by avoiding the need to
14358 	     use 32-bit push/pop instructions.  */
14359 	  if (!crtl->tail_call_emit
14360 	      && arm_size_return_regs () <= 12
14361 	      && (offsets->saved_regs_mask & (1 << 3)) == 0)
14362 	    {
14363 	      reg = 3;
14364 	    }
14365 	  else
14366 	    for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14367 	      {
14368 		if ((offsets->saved_regs_mask & (1 << i)) == 0)
14369 		  {
14370 		    reg = i;
14371 		    break;
14372 		  }
14373 	      }
14374 
14375 	  if (reg != -1)
14376 	    {
14377 	      offsets->saved_regs += 4;
14378 	      offsets->saved_regs_mask |= (1 << reg);
14379 	    }
14380 	}
14381     }
14382 
14383   offsets->locals_base = offsets->soft_frame + frame_size;
14384   offsets->outgoing_args = (offsets->locals_base
14385 			    + crtl->outgoing_args_size);
14386 
14387   if (ARM_DOUBLEWORD_ALIGN)
14388     {
14389       /* Ensure SP remains doubleword aligned.  */
14390       if (offsets->outgoing_args & 7)
14391 	offsets->outgoing_args += 4;
14392       gcc_assert (!(offsets->outgoing_args & 7));
14393     }
14394 
14395   return offsets;
14396 }
14397 
14398 
14399 /* Calculate the relative offsets for the different stack pointers.  Positive
14400    offsets are in the direction of stack growth.  */
14401 
14402 HOST_WIDE_INT
14403 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14404 {
14405   arm_stack_offsets *offsets;
14406 
14407   offsets = arm_get_frame_offsets ();
14408 
14409   /* OK, now we have enough information to compute the distances.
14410      There must be an entry in these switch tables for each pair
14411      of registers in ELIMINABLE_REGS, even if some of the entries
14412      seem to be redundant or useless.  */
14413   switch (from)
14414     {
14415     case ARG_POINTER_REGNUM:
14416       switch (to)
14417 	{
14418 	case THUMB_HARD_FRAME_POINTER_REGNUM:
14419 	  return 0;
14420 
14421 	case FRAME_POINTER_REGNUM:
14422 	  /* This is the reverse of the soft frame pointer
14423 	     to hard frame pointer elimination below.  */
14424 	  return offsets->soft_frame - offsets->saved_args;
14425 
14426 	case ARM_HARD_FRAME_POINTER_REGNUM:
14427 	  /* This is only non-zero in the case where the static chain register
14428 	     is stored above the frame.  */
14429 	  return offsets->frame - offsets->saved_args - 4;
14430 
14431 	case STACK_POINTER_REGNUM:
14432 	  /* If nothing has been pushed on the stack at all
14433 	     then this will return -4.  This *is* correct!  */
14434 	  return offsets->outgoing_args - (offsets->saved_args + 4);
14435 
14436 	default:
14437 	  gcc_unreachable ();
14438 	}
14439       gcc_unreachable ();
14440 
14441     case FRAME_POINTER_REGNUM:
14442       switch (to)
14443 	{
14444 	case THUMB_HARD_FRAME_POINTER_REGNUM:
14445 	  return 0;
14446 
14447 	case ARM_HARD_FRAME_POINTER_REGNUM:
14448 	  /* The hard frame pointer points to the top entry in the
14449 	     stack frame.  The soft frame pointer to the bottom entry
14450 	     in the stack frame.  If there is no stack frame at all,
14451 	     then they are identical.  */
14452 
14453 	  return offsets->frame - offsets->soft_frame;
14454 
14455 	case STACK_POINTER_REGNUM:
14456 	  return offsets->outgoing_args - offsets->soft_frame;
14457 
14458 	default:
14459 	  gcc_unreachable ();
14460 	}
14461       gcc_unreachable ();
14462 
14463     default:
14464       /* You cannot eliminate from the stack pointer.
14465 	 In theory you could eliminate from the hard frame
14466 	 pointer to the stack pointer, but this will never
14467 	 happen, since if a stack frame is not needed the
14468 	 hard frame pointer will never be used.  */
14469       gcc_unreachable ();
14470     }
14471 }
14472 
14473 /* Given FROM and TO register numbers, say whether this elimination is
14474    allowed.  Frame pointer elimination is automatically handled.
14475 
14476    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
14477    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
14478    pointer, we must eliminate FRAME_POINTER_REGNUM into
14479    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14480    ARG_POINTER_REGNUM.  */
14481 
14482 bool
14483 arm_can_eliminate (const int from, const int to)
14484 {
14485   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14486           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14487           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14488           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14489            true);
14490 }
14491 
14492 /* Emit RTL to save coprocessor registers on function entry.  Returns the
14493    number of bytes pushed.  */
14494 
14495 static int
14496 arm_save_coproc_regs(void)
14497 {
14498   int saved_size = 0;
14499   unsigned reg;
14500   unsigned start_reg;
14501   rtx insn;
14502 
14503   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14504     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14505       {
14506 	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14507 	insn = gen_rtx_MEM (V2SImode, insn);
14508 	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14509 	RTX_FRAME_RELATED_P (insn) = 1;
14510 	saved_size += 8;
14511       }
14512 
14513   /* Save any floating point call-saved registers used by this
14514      function.  */
14515   if (TARGET_FPA_EMU2)
14516     {
14517       for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14518 	if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14519 	  {
14520 	    insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14521 	    insn = gen_rtx_MEM (XFmode, insn);
14522 	    insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14523 	    RTX_FRAME_RELATED_P (insn) = 1;
14524 	    saved_size += 12;
14525 	  }
14526     }
14527   else
14528     {
14529       start_reg = LAST_FPA_REGNUM;
14530 
14531       for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14532 	{
14533 	  if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14534 	    {
14535 	      if (start_reg - reg == 3)
14536 		{
14537 		  insn = emit_sfm (reg, 4);
14538 		  RTX_FRAME_RELATED_P (insn) = 1;
14539 		  saved_size += 48;
14540 		  start_reg = reg - 1;
14541 		}
14542 	    }
14543 	  else
14544 	    {
14545 	      if (start_reg != reg)
14546 		{
14547 		  insn = emit_sfm (reg + 1, start_reg - reg);
14548 		  RTX_FRAME_RELATED_P (insn) = 1;
14549 		  saved_size += (start_reg - reg) * 12;
14550 		}
14551 	      start_reg = reg - 1;
14552 	    }
14553 	}
14554 
14555       if (start_reg != reg)
14556 	{
14557 	  insn = emit_sfm (reg + 1, start_reg - reg);
14558 	  saved_size += (start_reg - reg) * 12;
14559 	  RTX_FRAME_RELATED_P (insn) = 1;
14560 	}
14561     }
14562   if (TARGET_HARD_FLOAT && TARGET_VFP)
14563     {
14564       start_reg = FIRST_VFP_REGNUM;
14565 
14566       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14567 	{
14568 	  if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14569 	      && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14570 	    {
14571 	      if (start_reg != reg)
14572 		saved_size += vfp_emit_fstmd (start_reg,
14573 					      (reg - start_reg) / 2);
14574 	      start_reg = reg + 2;
14575 	    }
14576 	}
14577       if (start_reg != reg)
14578 	saved_size += vfp_emit_fstmd (start_reg,
14579 				      (reg - start_reg) / 2);
14580     }
14581   return saved_size;
14582 }
14583 
14584 
14585 /* Set the Thumb frame pointer from the stack pointer.  */
14586 
14587 static void
14588 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14589 {
14590   HOST_WIDE_INT amount;
14591   rtx insn, dwarf;
14592 
14593   amount = offsets->outgoing_args - offsets->locals_base;
14594   if (amount < 1024)
14595     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14596 				  stack_pointer_rtx, GEN_INT (amount)));
14597   else
14598     {
14599       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14600       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
14601          expects the first two operands to be the same.  */
14602       if (TARGET_THUMB2)
14603 	{
14604 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14605 					stack_pointer_rtx,
14606 					hard_frame_pointer_rtx));
14607 	}
14608       else
14609 	{
14610 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14611 					hard_frame_pointer_rtx,
14612 					stack_pointer_rtx));
14613 	}
14614       dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14615 			   plus_constant (stack_pointer_rtx, amount));
14616       RTX_FRAME_RELATED_P (dwarf) = 1;
14617       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14618     }
14619 
14620   RTX_FRAME_RELATED_P (insn) = 1;
14621 }
14622 
14623 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14624    function.  */
14625 void
14626 arm_expand_prologue (void)
14627 {
14628   rtx amount;
14629   rtx insn;
14630   rtx ip_rtx;
14631   unsigned long live_regs_mask;
14632   unsigned long func_type;
14633   int fp_offset = 0;
14634   int saved_pretend_args = 0;
14635   int saved_regs = 0;
14636   unsigned HOST_WIDE_INT args_to_push;
14637   arm_stack_offsets *offsets;
14638 
14639   func_type = arm_current_func_type ();
14640 
14641   /* Naked functions don't have prologues.  */
14642   if (IS_NAKED (func_type))
14643     return;
14644 
14645   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
14646   args_to_push = crtl->args.pretend_args_size;
14647 
14648   /* Compute which register we will have to save onto the stack.  */
14649   offsets = arm_get_frame_offsets ();
14650   live_regs_mask = offsets->saved_regs_mask;
14651 
14652   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14653 
14654   if (IS_STACKALIGN (func_type))
14655     {
14656       rtx dwarf;
14657       rtx r0;
14658       rtx r1;
14659       /* Handle a word-aligned stack pointer.  We generate the following:
14660 
14661 	  mov r0, sp
14662 	  bic r1, r0, #7
14663 	  mov sp, r1
14664 	  <save and restore r0 in normal prologue/epilogue>
14665 	  mov sp, r0
14666 	  bx lr
14667 
14668 	 The unwinder doesn't need to know about the stack realignment.
14669 	 Just tell it we saved SP in r0.  */
14670       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14671 
14672       r0 = gen_rtx_REG (SImode, 0);
14673       r1 = gen_rtx_REG (SImode, 1);
14674       /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14675 	 compiler won't choke.  */
14676       dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14677       dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14678       insn = gen_movsi (r0, stack_pointer_rtx);
14679       RTX_FRAME_RELATED_P (insn) = 1;
14680       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14681       emit_insn (insn);
14682       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14683       emit_insn (gen_movsi (stack_pointer_rtx, r1));
14684     }
14685 
14686   /* For APCS frames, if IP register is clobbered
14687      when creating frame, save that register in a special
14688      way.  */
14689   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14690     {
14691       if (IS_INTERRUPT (func_type))
14692 	{
14693 	  /* Interrupt functions must not corrupt any registers.
14694 	     Creating a frame pointer however, corrupts the IP
14695 	     register, so we must push it first.  */
14696 	  insn = emit_multi_reg_push (1 << IP_REGNUM);
14697 
14698 	  /* Do not set RTX_FRAME_RELATED_P on this insn.
14699 	     The dwarf stack unwinding code only wants to see one
14700 	     stack decrement per function, and this is not it.  If
14701 	     this instruction is labeled as being part of the frame
14702 	     creation sequence then dwarf2out_frame_debug_expr will
14703 	     die when it encounters the assignment of IP to FP
14704 	     later on, since the use of SP here establishes SP as
14705 	     the CFA register and not IP.
14706 
14707 	     Anyway this instruction is not really part of the stack
14708 	     frame creation although it is part of the prologue.  */
14709 	}
14710       else if (IS_NESTED (func_type))
14711 	{
14712 	  /* The Static chain register is the same as the IP register
14713 	     used as a scratch register during stack frame creation.
14714 	     To get around this need to find somewhere to store IP
14715 	     whilst the frame is being created.  We try the following
14716 	     places in order:
14717 
14718 	       1. The last argument register.
14719 	       2. A slot on the stack above the frame.  (This only
14720 	          works if the function is not a varargs function).
14721 	       3. Register r3, after pushing the argument registers
14722 	          onto the stack.
14723 
14724 	     Note - we only need to tell the dwarf2 backend about the SP
14725 	     adjustment in the second variant; the static chain register
14726 	     doesn't need to be unwound, as it doesn't contain a value
14727 	     inherited from the caller.  */
14728 
14729 	  if (df_regs_ever_live_p (3) == false)
14730 	    insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14731 	  else if (args_to_push == 0)
14732 	    {
14733 	      rtx dwarf;
14734 
14735 	      gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14736 	      saved_regs += 4;
14737 
14738 	      insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14739 	      insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14740 	      fp_offset = 4;
14741 
14742 	      /* Just tell the dwarf backend that we adjusted SP.  */
14743 	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14744 				   plus_constant (stack_pointer_rtx,
14745 						  -fp_offset));
14746 	      RTX_FRAME_RELATED_P (insn) = 1;
14747 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14748 	    }
14749 	  else
14750 	    {
14751 	      /* Store the args on the stack.  */
14752 	      if (cfun->machine->uses_anonymous_args)
14753 		insn = emit_multi_reg_push
14754 		  ((0xf0 >> (args_to_push / 4)) & 0xf);
14755 	      else
14756 		insn = emit_insn
14757 		  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14758 			       GEN_INT (- args_to_push)));
14759 
14760 	      RTX_FRAME_RELATED_P (insn) = 1;
14761 
14762 	      saved_pretend_args = 1;
14763 	      fp_offset = args_to_push;
14764 	      args_to_push = 0;
14765 
14766 	      /* Now reuse r3 to preserve IP.  */
14767 	      emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14768 	    }
14769 	}
14770 
14771       insn = emit_set_insn (ip_rtx,
14772 			    plus_constant (stack_pointer_rtx, fp_offset));
14773       RTX_FRAME_RELATED_P (insn) = 1;
14774     }
14775 
14776   if (args_to_push)
14777     {
14778       /* Push the argument registers, or reserve space for them.  */
14779       if (cfun->machine->uses_anonymous_args)
14780 	insn = emit_multi_reg_push
14781 	  ((0xf0 >> (args_to_push / 4)) & 0xf);
14782       else
14783 	insn = emit_insn
14784 	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14785 		       GEN_INT (- args_to_push)));
14786       RTX_FRAME_RELATED_P (insn) = 1;
14787     }
14788 
14789   /* If this is an interrupt service routine, and the link register
14790      is going to be pushed, and we're not generating extra
14791      push of IP (needed when frame is needed and frame layout if apcs),
14792      subtracting four from LR now will mean that the function return
14793      can be done with a single instruction.  */
14794   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14795       && (live_regs_mask & (1 << LR_REGNUM)) != 0
14796       && !(frame_pointer_needed && TARGET_APCS_FRAME)
14797       && TARGET_ARM)
14798     {
14799       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14800 
14801       emit_set_insn (lr, plus_constant (lr, -4));
14802     }
14803 
14804   if (live_regs_mask)
14805     {
14806       saved_regs += bit_count (live_regs_mask) * 4;
14807       if (optimize_size && !frame_pointer_needed
14808 	  && saved_regs == offsets->saved_regs - offsets->saved_args)
14809 	{
14810 	  /* If no coprocessor registers are being pushed and we don't have
14811 	     to worry about a frame pointer then push extra registers to
14812 	     create the stack frame.  This is done is a way that does not
14813 	     alter the frame layout, so is independent of the epilogue.  */
14814 	  int n;
14815 	  int frame;
14816 	  n = 0;
14817 	  while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14818 	    n++;
14819 	  frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14820 	  if (frame && n * 4 >= frame)
14821 	    {
14822 	      n = frame / 4;
14823 	      live_regs_mask |= (1 << n) - 1;
14824 	      saved_regs += frame;
14825 	    }
14826 	}
14827       insn = emit_multi_reg_push (live_regs_mask);
14828       RTX_FRAME_RELATED_P (insn) = 1;
14829     }
14830 
14831   if (! IS_VOLATILE (func_type))
14832     saved_regs += arm_save_coproc_regs ();
14833 
14834   if (frame_pointer_needed && TARGET_ARM)
14835     {
14836       /* Create the new frame pointer.  */
14837       if (TARGET_APCS_FRAME)
14838 	{
14839 	  insn = GEN_INT (-(4 + args_to_push + fp_offset));
14840 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14841 	  RTX_FRAME_RELATED_P (insn) = 1;
14842 
14843 	  if (IS_NESTED (func_type))
14844 	    {
14845 	      /* Recover the static chain register.  */
14846 	      if (!df_regs_ever_live_p (3)
14847 		  || saved_pretend_args)
14848 		insn = gen_rtx_REG (SImode, 3);
14849 	      else /* if (crtl->args.pretend_args_size == 0) */
14850 		{
14851 		  insn = plus_constant (hard_frame_pointer_rtx, 4);
14852 		  insn = gen_frame_mem (SImode, insn);
14853 		}
14854 	      emit_set_insn (ip_rtx, insn);
14855 	      /* Add a USE to stop propagate_one_insn() from barfing.  */
14856 	      emit_insn (gen_prologue_use (ip_rtx));
14857 	    }
14858 	}
14859       else
14860 	{
14861 	  insn = GEN_INT (saved_regs - 4);
14862 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14863 					stack_pointer_rtx, insn));
14864 	  RTX_FRAME_RELATED_P (insn) = 1;
14865 	}
14866     }
14867 
14868   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14869     {
14870       /* This add can produce multiple insns for a large constant, so we
14871 	 need to get tricky.  */
14872       rtx last = get_last_insn ();
14873 
14874       amount = GEN_INT (offsets->saved_args + saved_regs
14875 			- offsets->outgoing_args);
14876 
14877       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14878 				    amount));
14879       do
14880 	{
14881 	  last = last ? NEXT_INSN (last) : get_insns ();
14882 	  RTX_FRAME_RELATED_P (last) = 1;
14883 	}
14884       while (last != insn);
14885 
14886       /* If the frame pointer is needed, emit a special barrier that
14887 	 will prevent the scheduler from moving stores to the frame
14888 	 before the stack adjustment.  */
14889       if (frame_pointer_needed)
14890 	insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14891 					 hard_frame_pointer_rtx));
14892     }
14893 
14894 
14895   if (frame_pointer_needed && TARGET_THUMB2)
14896     thumb_set_frame_pointer (offsets);
14897 
14898   if (flag_pic && arm_pic_register != INVALID_REGNUM)
14899     {
14900       unsigned long mask;
14901 
14902       mask = live_regs_mask;
14903       mask &= THUMB2_WORK_REGS;
14904       if (!IS_NESTED (func_type))
14905 	mask |= (1 << IP_REGNUM);
14906       arm_load_pic_register (mask);
14907     }
14908 
14909   /* If we are profiling, make sure no instructions are scheduled before
14910      the call to mcount.  Similarly if the user has requested no
14911      scheduling in the prolog.  Similarly if we want non-call exceptions
14912      using the EABI unwinder, to prevent faulting instructions from being
14913      swapped with a stack adjustment.  */
14914   if (crtl->profile || !TARGET_SCHED_PROLOG
14915       || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
14916     emit_insn (gen_blockage ());
14917 
14918   /* If the link register is being kept alive, with the return address in it,
14919      then make sure that it does not get reused by the ce2 pass.  */
14920   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
14921     cfun->machine->lr_save_eliminated = 1;
14922 }
14923 
14924 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
14925 static void
14926 arm_print_condition (FILE *stream)
14927 {
14928   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
14929     {
14930       /* Branch conversion is not implemented for Thumb-2.  */
14931       if (TARGET_THUMB)
14932 	{
14933 	  output_operand_lossage ("predicated Thumb instruction");
14934 	  return;
14935 	}
14936       if (current_insn_predicate != NULL)
14937 	{
14938 	  output_operand_lossage
14939 	    ("predicated instruction in conditional sequence");
14940 	  return;
14941 	}
14942 
14943       fputs (arm_condition_codes[arm_current_cc], stream);
14944     }
14945   else if (current_insn_predicate)
14946     {
14947       enum arm_cond_code code;
14948 
14949       if (TARGET_THUMB1)
14950 	{
14951 	  output_operand_lossage ("predicated Thumb instruction");
14952 	  return;
14953 	}
14954 
14955       code = get_arm_condition_code (current_insn_predicate);
14956       fputs (arm_condition_codes[code], stream);
14957     }
14958 }
14959 
14960 
14961 /* If CODE is 'd', then the X is a condition operand and the instruction
14962    should only be executed if the condition is true.
14963    if CODE is 'D', then the X is a condition operand and the instruction
14964    should only be executed if the condition is false: however, if the mode
14965    of the comparison is CCFPEmode, then always execute the instruction -- we
14966    do this because in these circumstances !GE does not necessarily imply LT;
14967    in these cases the instruction pattern will take care to make sure that
14968    an instruction containing %d will follow, thereby undoing the effects of
14969    doing this instruction unconditionally.
14970    If CODE is 'N' then X is a floating point operand that must be negated
14971    before output.
14972    If CODE is 'B' then output a bitwise inverted value of X (a const int).
14973    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
14974 void
14975 arm_print_operand (FILE *stream, rtx x, int code)
14976 {
14977   switch (code)
14978     {
14979     case '@':
14980       fputs (ASM_COMMENT_START, stream);
14981       return;
14982 
14983     case '_':
14984       fputs (user_label_prefix, stream);
14985       return;
14986 
14987     case '|':
14988       fputs (REGISTER_PREFIX, stream);
14989       return;
14990 
14991     case '?':
14992       arm_print_condition (stream);
14993       return;
14994 
14995     case '(':
14996       /* Nothing in unified syntax, otherwise the current condition code.  */
14997       if (!TARGET_UNIFIED_ASM)
14998 	arm_print_condition (stream);
14999       break;
15000 
15001     case ')':
15002       /* The current condition code in unified syntax, otherwise nothing.  */
15003       if (TARGET_UNIFIED_ASM)
15004 	arm_print_condition (stream);
15005       break;
15006 
15007     case '.':
15008       /* The current condition code for a condition code setting instruction.
15009 	 Preceded by 's' in unified syntax, otherwise followed by 's'.  */
15010       if (TARGET_UNIFIED_ASM)
15011 	{
15012 	  fputc('s', stream);
15013 	  arm_print_condition (stream);
15014 	}
15015       else
15016 	{
15017 	  arm_print_condition (stream);
15018 	  fputc('s', stream);
15019 	}
15020       return;
15021 
15022     case '!':
15023       /* If the instruction is conditionally executed then print
15024 	 the current condition code, otherwise print 's'.  */
15025       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15026       if (current_insn_predicate)
15027 	arm_print_condition (stream);
15028       else
15029 	fputc('s', stream);
15030       break;
15031 
15032     /* %# is a "break" sequence. It doesn't output anything, but is used to
15033        separate e.g. operand numbers from following text, if that text consists
15034        of further digits which we don't want to be part of the operand
15035        number.  */
15036     case '#':
15037       return;
15038 
15039     case 'N':
15040       {
15041 	REAL_VALUE_TYPE r;
15042 	REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15043 	r = REAL_VALUE_NEGATE (r);
15044 	fprintf (stream, "%s", fp_const_from_val (&r));
15045       }
15046       return;
15047 
15048     /* An integer or symbol address without a preceding # sign.  */
15049     case 'c':
15050       switch (GET_CODE (x))
15051 	{
15052 	case CONST_INT:
15053 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15054 	  break;
15055 
15056 	case SYMBOL_REF:
15057 	  output_addr_const (stream, x);
15058 	  break;
15059 
15060 	default:
15061 	  gcc_unreachable ();
15062 	}
15063       return;
15064 
15065     case 'B':
15066       if (GET_CODE (x) == CONST_INT)
15067 	{
15068 	  HOST_WIDE_INT val;
15069 	  val = ARM_SIGN_EXTEND (~INTVAL (x));
15070 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15071 	}
15072       else
15073 	{
15074 	  putc ('~', stream);
15075 	  output_addr_const (stream, x);
15076 	}
15077       return;
15078 
15079     case 'L':
15080       /* The low 16 bits of an immediate constant.  */
15081       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15082       return;
15083 
15084     case 'i':
15085       fprintf (stream, "%s", arithmetic_instr (x, 1));
15086       return;
15087 
15088     /* Truncate Cirrus shift counts.  */
15089     case 's':
15090       if (GET_CODE (x) == CONST_INT)
15091 	{
15092 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15093 	  return;
15094 	}
15095       arm_print_operand (stream, x, 0);
15096       return;
15097 
15098     case 'I':
15099       fprintf (stream, "%s", arithmetic_instr (x, 0));
15100       return;
15101 
15102     case 'S':
15103       {
15104 	HOST_WIDE_INT val;
15105 	const char *shift;
15106 
15107 	if (!shift_operator (x, SImode))
15108 	  {
15109 	    output_operand_lossage ("invalid shift operand");
15110 	    break;
15111 	  }
15112 
15113 	shift = shift_op (x, &val);
15114 
15115 	if (shift)
15116 	  {
15117 	    fprintf (stream, ", %s ", shift);
15118 	    if (val == -1)
15119 	      arm_print_operand (stream, XEXP (x, 1), 0);
15120 	    else
15121 	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15122 	  }
15123       }
15124       return;
15125 
15126       /* An explanation of the 'Q', 'R' and 'H' register operands:
15127 
15128 	 In a pair of registers containing a DI or DF value the 'Q'
15129 	 operand returns the register number of the register containing
15130 	 the least significant part of the value.  The 'R' operand returns
15131 	 the register number of the register containing the most
15132 	 significant part of the value.
15133 
15134 	 The 'H' operand returns the higher of the two register numbers.
15135 	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15136 	 same as the 'Q' operand, since the most significant part of the
15137 	 value is held in the lower number register.  The reverse is true
15138 	 on systems where WORDS_BIG_ENDIAN is false.
15139 
15140 	 The purpose of these operands is to distinguish between cases
15141 	 where the endian-ness of the values is important (for example
15142 	 when they are added together), and cases where the endian-ness
15143 	 is irrelevant, but the order of register operations is important.
15144 	 For example when loading a value from memory into a register
15145 	 pair, the endian-ness does not matter.  Provided that the value
15146 	 from the lower memory address is put into the lower numbered
15147 	 register, and the value from the higher address is put into the
15148 	 higher numbered register, the load will work regardless of whether
15149 	 the value being loaded is big-wordian or little-wordian.  The
15150 	 order of the two register loads can matter however, if the address
15151 	 of the memory location is actually held in one of the registers
15152 	 being overwritten by the load.  */
15153     case 'Q':
15154       if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15155 	{
15156 	  output_operand_lossage ("invalid operand for code '%c'", code);
15157 	  return;
15158 	}
15159 
15160       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15161       return;
15162 
15163     case 'R':
15164       if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15165 	{
15166 	  output_operand_lossage ("invalid operand for code '%c'", code);
15167 	  return;
15168 	}
15169 
15170       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15171       return;
15172 
15173     case 'H':
15174       if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15175 	{
15176 	  output_operand_lossage ("invalid operand for code '%c'", code);
15177 	  return;
15178 	}
15179 
15180       asm_fprintf (stream, "%r", REGNO (x) + 1);
15181       return;
15182 
15183     case 'J':
15184       if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15185 	{
15186 	  output_operand_lossage ("invalid operand for code '%c'", code);
15187 	  return;
15188 	}
15189 
15190       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15191       return;
15192 
15193     case 'K':
15194       if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15195 	{
15196 	  output_operand_lossage ("invalid operand for code '%c'", code);
15197 	  return;
15198 	}
15199 
15200       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15201       return;
15202 
15203     case 'm':
15204       asm_fprintf (stream, "%r",
15205 		   GET_CODE (XEXP (x, 0)) == REG
15206 		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15207       return;
15208 
15209     case 'M':
15210       asm_fprintf (stream, "{%r-%r}",
15211 		   REGNO (x),
15212 		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15213       return;
15214 
15215     /* Like 'M', but writing doubleword vector registers, for use by Neon
15216        insns.  */
15217     case 'h':
15218       {
15219         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15220         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15221         if (numregs == 1)
15222           asm_fprintf (stream, "{d%d}", regno);
15223         else
15224           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15225       }
15226       return;
15227 
15228     case 'd':
15229       /* CONST_TRUE_RTX means always -- that's the default.  */
15230       if (x == const_true_rtx)
15231 	return;
15232 
15233       if (!COMPARISON_P (x))
15234 	{
15235 	  output_operand_lossage ("invalid operand for code '%c'", code);
15236 	  return;
15237 	}
15238 
15239       fputs (arm_condition_codes[get_arm_condition_code (x)],
15240 	     stream);
15241       return;
15242 
15243     case 'D':
15244       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
15245 	 want to do that.  */
15246       if (x == const_true_rtx)
15247 	{
15248 	  output_operand_lossage ("instruction never executed");
15249 	  return;
15250 	}
15251       if (!COMPARISON_P (x))
15252 	{
15253 	  output_operand_lossage ("invalid operand for code '%c'", code);
15254 	  return;
15255 	}
15256 
15257       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15258 				 (get_arm_condition_code (x))],
15259 	     stream);
15260       return;
15261 
15262     /* Cirrus registers can be accessed in a variety of ways:
15263          single floating point (f)
15264 	 double floating point (d)
15265 	 32bit integer         (fx)
15266 	 64bit integer         (dx).  */
15267     case 'W':			/* Cirrus register in F mode.  */
15268     case 'X':			/* Cirrus register in D mode.  */
15269     case 'Y':			/* Cirrus register in FX mode.  */
15270     case 'Z':			/* Cirrus register in DX mode.  */
15271       gcc_assert (GET_CODE (x) == REG
15272 		  && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15273 
15274       fprintf (stream, "mv%s%s",
15275 	       code == 'W' ? "f"
15276 	       : code == 'X' ? "d"
15277 	       : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15278 
15279       return;
15280 
15281     /* Print cirrus register in the mode specified by the register's mode.  */
15282     case 'V':
15283       {
15284 	int mode = GET_MODE (x);
15285 
15286 	if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15287 	  {
15288 	    output_operand_lossage ("invalid operand for code '%c'", code);
15289 	    return;
15290 	  }
15291 
15292 	fprintf (stream, "mv%s%s",
15293 		 mode == DFmode ? "d"
15294 		 : mode == SImode ? "fx"
15295 		 : mode == DImode ? "dx"
15296 		 : "f", reg_names[REGNO (x)] + 2);
15297 
15298 	return;
15299       }
15300 
15301     case 'U':
15302       if (GET_CODE (x) != REG
15303 	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15304 	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15305 	/* Bad value for wCG register number.  */
15306 	{
15307 	  output_operand_lossage ("invalid operand for code '%c'", code);
15308 	  return;
15309 	}
15310 
15311       else
15312 	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15313       return;
15314 
15315       /* Print an iWMMXt control register name.  */
15316     case 'w':
15317       if (GET_CODE (x) != CONST_INT
15318 	  || INTVAL (x) < 0
15319 	  || INTVAL (x) >= 16)
15320 	/* Bad value for wC register number.  */
15321 	{
15322 	  output_operand_lossage ("invalid operand for code '%c'", code);
15323 	  return;
15324 	}
15325 
15326       else
15327 	{
15328 	  static const char * wc_reg_names [16] =
15329 	    {
15330 	      "wCID",  "wCon",  "wCSSF", "wCASF",
15331 	      "wC4",   "wC5",   "wC6",   "wC7",
15332 	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15333 	      "wC12",  "wC13",  "wC14",  "wC15"
15334 	    };
15335 
15336 	  fprintf (stream, wc_reg_names [INTVAL (x)]);
15337 	}
15338       return;
15339 
15340     /* Print the high single-precision register of a VFP double-precision
15341        register.  */
15342     case 'p':
15343       {
15344         int mode = GET_MODE (x);
15345         int regno;
15346 
15347         if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15348           {
15349 	    output_operand_lossage ("invalid operand for code '%c'", code);
15350 	    return;
15351           }
15352 
15353         regno = REGNO (x);
15354         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15355           {
15356 	    output_operand_lossage ("invalid operand for code '%c'", code);
15357 	    return;
15358           }
15359 
15360 	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15361       }
15362       return;
15363 
15364     /* Print a VFP/Neon double precision or quad precision register name.  */
15365     case 'P':
15366     case 'q':
15367       {
15368 	int mode = GET_MODE (x);
15369 	int is_quad = (code == 'q');
15370 	int regno;
15371 
15372 	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15373 	  {
15374 	    output_operand_lossage ("invalid operand for code '%c'", code);
15375 	    return;
15376 	  }
15377 
15378 	if (GET_CODE (x) != REG
15379 	    || !IS_VFP_REGNUM (REGNO (x)))
15380 	  {
15381 	    output_operand_lossage ("invalid operand for code '%c'", code);
15382 	    return;
15383 	  }
15384 
15385 	regno = REGNO (x);
15386 	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15387             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15388 	  {
15389 	    output_operand_lossage ("invalid operand for code '%c'", code);
15390 	    return;
15391 	  }
15392 
15393 	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15394 	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15395       }
15396       return;
15397 
15398     /* These two codes print the low/high doubleword register of a Neon quad
15399        register, respectively.  For pair-structure types, can also print
15400        low/high quadword registers.  */
15401     case 'e':
15402     case 'f':
15403       {
15404         int mode = GET_MODE (x);
15405         int regno;
15406 
15407         if ((GET_MODE_SIZE (mode) != 16
15408 	     && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15409           {
15410 	    output_operand_lossage ("invalid operand for code '%c'", code);
15411 	    return;
15412           }
15413 
15414         regno = REGNO (x);
15415         if (!NEON_REGNO_OK_FOR_QUAD (regno))
15416           {
15417 	    output_operand_lossage ("invalid operand for code '%c'", code);
15418 	    return;
15419           }
15420 
15421         if (GET_MODE_SIZE (mode) == 16)
15422           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15423 				  + (code == 'f' ? 1 : 0));
15424         else
15425           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15426 				  + (code == 'f' ? 1 : 0));
15427       }
15428       return;
15429 
15430     /* Print a VFPv3 floating-point constant, represented as an integer
15431        index.  */
15432     case 'G':
15433       {
15434         int index = vfp3_const_double_index (x);
15435 	gcc_assert (index != -1);
15436 	fprintf (stream, "%d", index);
15437       }
15438       return;
15439 
15440     /* Print bits representing opcode features for Neon.
15441 
15442        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
15443        and polynomials as unsigned.
15444 
15445        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15446 
15447        Bit 2 is 1 for rounding functions, 0 otherwise.  */
15448 
15449     /* Identify the type as 's', 'u', 'p' or 'f'.  */
15450     case 'T':
15451       {
15452         HOST_WIDE_INT bits = INTVAL (x);
15453         fputc ("uspf"[bits & 3], stream);
15454       }
15455       return;
15456 
15457     /* Likewise, but signed and unsigned integers are both 'i'.  */
15458     case 'F':
15459       {
15460         HOST_WIDE_INT bits = INTVAL (x);
15461         fputc ("iipf"[bits & 3], stream);
15462       }
15463       return;
15464 
15465     /* As for 'T', but emit 'u' instead of 'p'.  */
15466     case 't':
15467       {
15468         HOST_WIDE_INT bits = INTVAL (x);
15469         fputc ("usuf"[bits & 3], stream);
15470       }
15471       return;
15472 
15473     /* Bit 2: rounding (vs none).  */
15474     case 'O':
15475       {
15476         HOST_WIDE_INT bits = INTVAL (x);
15477         fputs ((bits & 4) != 0 ? "r" : "", stream);
15478       }
15479       return;
15480 
15481     /* Memory operand for vld1/vst1 instruction.  */
15482     case 'A':
15483       {
15484 	rtx addr;
15485 	bool postinc = FALSE;
15486 	gcc_assert (GET_CODE (x) == MEM);
15487 	addr = XEXP (x, 0);
15488 	if (GET_CODE (addr) == POST_INC)
15489 	  {
15490 	    postinc = 1;
15491 	    addr = XEXP (addr, 0);
15492 	  }
15493 	asm_fprintf (stream, "[%r]", REGNO (addr));
15494 	if (postinc)
15495 	  fputs("!", stream);
15496       }
15497       return;
15498 
15499     /* Translate an S register number into a D register number and element index.  */
15500     case 'y':
15501       {
15502         int mode = GET_MODE (x);
15503         int regno;
15504 
15505         if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15506           {
15507 	    output_operand_lossage ("invalid operand for code '%c'", code);
15508 	    return;
15509           }
15510 
15511         regno = REGNO (x);
15512         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15513           {
15514 	    output_operand_lossage ("invalid operand for code '%c'", code);
15515 	    return;
15516           }
15517 
15518 	regno = regno - FIRST_VFP_REGNUM;
15519 	fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15520       }
15521       return;
15522 
15523     /* Register specifier for vld1.16/vst1.16.  Translate the S register
15524        number into a D register number and element index.  */
15525     case 'z':
15526       {
15527         int mode = GET_MODE (x);
15528         int regno;
15529 
15530         if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15531           {
15532 	    output_operand_lossage ("invalid operand for code '%c'", code);
15533 	    return;
15534           }
15535 
15536         regno = REGNO (x);
15537         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15538           {
15539 	    output_operand_lossage ("invalid operand for code '%c'", code);
15540 	    return;
15541           }
15542 
15543 	regno = regno - FIRST_VFP_REGNUM;
15544 	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15545       }
15546       return;
15547 
15548     default:
15549       if (x == 0)
15550 	{
15551 	  output_operand_lossage ("missing operand");
15552 	  return;
15553 	}
15554 
15555       switch (GET_CODE (x))
15556 	{
15557 	case REG:
15558 	  asm_fprintf (stream, "%r", REGNO (x));
15559 	  break;
15560 
15561 	case MEM:
15562 	  output_memory_reference_mode = GET_MODE (x);
15563 	  output_address (XEXP (x, 0));
15564 	  break;
15565 
15566 	case CONST_DOUBLE:
15567           if (TARGET_NEON)
15568             {
15569               char fpstr[20];
15570               real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15571 			       sizeof (fpstr), 0, 1);
15572               fprintf (stream, "#%s", fpstr);
15573             }
15574           else
15575 	    fprintf (stream, "#%s", fp_immediate_constant (x));
15576 	  break;
15577 
15578 	default:
15579 	  gcc_assert (GET_CODE (x) != NEG);
15580 	  fputc ('#', stream);
15581 	  if (GET_CODE (x) == HIGH)
15582 	    {
15583 	      fputs (":lower16:", stream);
15584 	      x = XEXP (x, 0);
15585 	    }
15586 
15587 	  output_addr_const (stream, x);
15588 	  break;
15589 	}
15590     }
15591 }
15592 
15593 /* Target hook for assembling integer objects.  The ARM version needs to
15594    handle word-sized values specially.  */
15595 static bool
15596 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15597 {
15598   enum machine_mode mode;
15599 
15600   if (size == UNITS_PER_WORD && aligned_p)
15601     {
15602       fputs ("\t.word\t", asm_out_file);
15603       output_addr_const (asm_out_file, x);
15604 
15605       /* Mark symbols as position independent.  We only do this in the
15606 	 .text segment, not in the .data segment.  */
15607       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15608 	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15609 	{
15610 	  /* See legitimize_pic_address for an explanation of the
15611 	     TARGET_VXWORKS_RTP check.  */
15612 	  if (TARGET_VXWORKS_RTP
15613 	      || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15614 	    fputs ("(GOT)", asm_out_file);
15615 	  else
15616 	    fputs ("(GOTOFF)", asm_out_file);
15617 	}
15618       fputc ('\n', asm_out_file);
15619       return true;
15620     }
15621 
15622   mode = GET_MODE (x);
15623 
15624   if (arm_vector_mode_supported_p (mode))
15625     {
15626       int i, units;
15627 
15628       gcc_assert (GET_CODE (x) == CONST_VECTOR);
15629 
15630       units = CONST_VECTOR_NUNITS (x);
15631       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15632 
15633       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15634         for (i = 0; i < units; i++)
15635 	  {
15636 	    rtx elt = CONST_VECTOR_ELT (x, i);
15637 	    assemble_integer
15638 	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15639 	  }
15640       else
15641         for (i = 0; i < units; i++)
15642           {
15643             rtx elt = CONST_VECTOR_ELT (x, i);
15644             REAL_VALUE_TYPE rval;
15645 
15646             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15647 
15648             assemble_real
15649               (rval, GET_MODE_INNER (mode),
15650               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15651           }
15652 
15653       return true;
15654     }
15655 
15656   return default_assemble_integer (x, size, aligned_p);
15657 }
15658 
15659 static void
15660 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15661 {
15662   section *s;
15663 
15664   if (!TARGET_AAPCS_BASED)
15665     {
15666       (is_ctor ?
15667        default_named_section_asm_out_constructor
15668        : default_named_section_asm_out_destructor) (symbol, priority);
15669       return;
15670     }
15671 
15672   /* Put these in the .init_array section, using a special relocation.  */
15673   if (priority != DEFAULT_INIT_PRIORITY)
15674     {
15675       char buf[18];
15676       sprintf (buf, "%s.%.5u",
15677 	       is_ctor ? ".init_array" : ".fini_array",
15678 	       priority);
15679       s = get_section (buf, SECTION_WRITE, NULL_TREE);
15680     }
15681   else if (is_ctor)
15682     s = ctors_section;
15683   else
15684     s = dtors_section;
15685 
15686   switch_to_section (s);
15687   assemble_align (POINTER_SIZE);
15688   fputs ("\t.word\t", asm_out_file);
15689   output_addr_const (asm_out_file, symbol);
15690   fputs ("(target1)\n", asm_out_file);
15691 }
15692 
15693 /* Add a function to the list of static constructors.  */
15694 
15695 static void
15696 arm_elf_asm_constructor (rtx symbol, int priority)
15697 {
15698   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15699 }
15700 
15701 /* Add a function to the list of static destructors.  */
15702 
15703 static void
15704 arm_elf_asm_destructor (rtx symbol, int priority)
15705 {
15706   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15707 }
15708 
15709 /* A finite state machine takes care of noticing whether or not instructions
15710    can be conditionally executed, and thus decrease execution time and code
15711    size by deleting branch instructions.  The fsm is controlled by
15712    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
15713 
15714 /* The state of the fsm controlling condition codes are:
15715    0: normal, do nothing special
15716    1: make ASM_OUTPUT_OPCODE not output this instruction
15717    2: make ASM_OUTPUT_OPCODE not output this instruction
15718    3: make instructions conditional
15719    4: make instructions conditional
15720 
15721    State transitions (state->state by whom under condition):
15722    0 -> 1 final_prescan_insn if the `target' is a label
15723    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15724    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15725    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15726    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15727           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15728    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15729           (the target insn is arm_target_insn).
15730 
15731    If the jump clobbers the conditions then we use states 2 and 4.
15732 
15733    A similar thing can be done with conditional return insns.
15734 
15735    XXX In case the `target' is an unconditional branch, this conditionalising
15736    of the instructions always reduces code size, but not always execution
15737    time.  But then, I want to reduce the code size to somewhere near what
15738    /bin/cc produces.  */
15739 
15740 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15741    instructions.  When a COND_EXEC instruction is seen the subsequent
15742    instructions are scanned so that multiple conditional instructions can be
15743    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
15744    specify the length and true/false mask for the IT block.  These will be
15745    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
15746 
15747 /* Returns the index of the ARM condition code string in
15748    `arm_condition_codes'.  COMPARISON should be an rtx like
15749    `(eq (...) (...))'.  */
15750 static enum arm_cond_code
15751 get_arm_condition_code (rtx comparison)
15752 {
15753   enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15754   enum arm_cond_code code;
15755   enum rtx_code comp_code = GET_CODE (comparison);
15756 
15757   if (GET_MODE_CLASS (mode) != MODE_CC)
15758     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15759 			   XEXP (comparison, 1));
15760 
15761   switch (mode)
15762     {
15763     case CC_DNEmode: code = ARM_NE; goto dominance;
15764     case CC_DEQmode: code = ARM_EQ; goto dominance;
15765     case CC_DGEmode: code = ARM_GE; goto dominance;
15766     case CC_DGTmode: code = ARM_GT; goto dominance;
15767     case CC_DLEmode: code = ARM_LE; goto dominance;
15768     case CC_DLTmode: code = ARM_LT; goto dominance;
15769     case CC_DGEUmode: code = ARM_CS; goto dominance;
15770     case CC_DGTUmode: code = ARM_HI; goto dominance;
15771     case CC_DLEUmode: code = ARM_LS; goto dominance;
15772     case CC_DLTUmode: code = ARM_CC;
15773 
15774     dominance:
15775       gcc_assert (comp_code == EQ || comp_code == NE);
15776 
15777       if (comp_code == EQ)
15778 	return ARM_INVERSE_CONDITION_CODE (code);
15779       return code;
15780 
15781     case CC_NOOVmode:
15782       switch (comp_code)
15783 	{
15784 	case NE: return ARM_NE;
15785 	case EQ: return ARM_EQ;
15786 	case GE: return ARM_PL;
15787 	case LT: return ARM_MI;
15788 	default: gcc_unreachable ();
15789 	}
15790 
15791     case CC_Zmode:
15792       switch (comp_code)
15793 	{
15794 	case NE: return ARM_NE;
15795 	case EQ: return ARM_EQ;
15796 	default: gcc_unreachable ();
15797 	}
15798 
15799     case CC_Nmode:
15800       switch (comp_code)
15801 	{
15802 	case NE: return ARM_MI;
15803 	case EQ: return ARM_PL;
15804 	default: gcc_unreachable ();
15805 	}
15806 
15807     case CCFPEmode:
15808     case CCFPmode:
15809       /* These encodings assume that AC=1 in the FPA system control
15810 	 byte.  This allows us to handle all cases except UNEQ and
15811 	 LTGT.  */
15812       switch (comp_code)
15813 	{
15814 	case GE: return ARM_GE;
15815 	case GT: return ARM_GT;
15816 	case LE: return ARM_LS;
15817 	case LT: return ARM_MI;
15818 	case NE: return ARM_NE;
15819 	case EQ: return ARM_EQ;
15820 	case ORDERED: return ARM_VC;
15821 	case UNORDERED: return ARM_VS;
15822 	case UNLT: return ARM_LT;
15823 	case UNLE: return ARM_LE;
15824 	case UNGT: return ARM_HI;
15825 	case UNGE: return ARM_PL;
15826 	  /* UNEQ and LTGT do not have a representation.  */
15827 	case UNEQ: /* Fall through.  */
15828 	case LTGT: /* Fall through.  */
15829 	default: gcc_unreachable ();
15830 	}
15831 
15832     case CC_SWPmode:
15833       switch (comp_code)
15834 	{
15835 	case NE: return ARM_NE;
15836 	case EQ: return ARM_EQ;
15837 	case GE: return ARM_LE;
15838 	case GT: return ARM_LT;
15839 	case LE: return ARM_GE;
15840 	case LT: return ARM_GT;
15841 	case GEU: return ARM_LS;
15842 	case GTU: return ARM_CC;
15843 	case LEU: return ARM_CS;
15844 	case LTU: return ARM_HI;
15845 	default: gcc_unreachable ();
15846 	}
15847 
15848     case CC_Cmode:
15849       switch (comp_code)
15850       {
15851       case LTU: return ARM_CS;
15852       case GEU: return ARM_CC;
15853       default: gcc_unreachable ();
15854       }
15855 
15856     case CCmode:
15857       switch (comp_code)
15858 	{
15859 	case NE: return ARM_NE;
15860 	case EQ: return ARM_EQ;
15861 	case GE: return ARM_GE;
15862 	case GT: return ARM_GT;
15863 	case LE: return ARM_LE;
15864 	case LT: return ARM_LT;
15865 	case GEU: return ARM_CS;
15866 	case GTU: return ARM_HI;
15867 	case LEU: return ARM_LS;
15868 	case LTU: return ARM_CC;
15869 	default: gcc_unreachable ();
15870 	}
15871 
15872     default: gcc_unreachable ();
15873     }
15874 }
15875 
15876 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15877    instructions.  */
15878 void
15879 thumb2_final_prescan_insn (rtx insn)
15880 {
15881   rtx first_insn = insn;
15882   rtx body = PATTERN (insn);
15883   rtx predicate;
15884   enum arm_cond_code code;
15885   int n;
15886   int mask;
15887 
15888   /* Remove the previous insn from the count of insns to be output.  */
15889   if (arm_condexec_count)
15890       arm_condexec_count--;
15891 
15892   /* Nothing to do if we are already inside a conditional block.  */
15893   if (arm_condexec_count)
15894     return;
15895 
15896   if (GET_CODE (body) != COND_EXEC)
15897     return;
15898 
15899   /* Conditional jumps are implemented directly.  */
15900   if (GET_CODE (insn) == JUMP_INSN)
15901     return;
15902 
15903   predicate = COND_EXEC_TEST (body);
15904   arm_current_cc = get_arm_condition_code (predicate);
15905 
15906   n = get_attr_ce_count (insn);
15907   arm_condexec_count = 1;
15908   arm_condexec_mask = (1 << n) - 1;
15909   arm_condexec_masklen = n;
15910   /* See if subsequent instructions can be combined into the same block.  */
15911   for (;;)
15912     {
15913       insn = next_nonnote_insn (insn);
15914 
15915       /* Jumping into the middle of an IT block is illegal, so a label or
15916          barrier terminates the block.  */
15917       if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
15918 	break;
15919 
15920       body = PATTERN (insn);
15921       /* USE and CLOBBER aren't really insns, so just skip them.  */
15922       if (GET_CODE (body) == USE
15923 	  || GET_CODE (body) == CLOBBER)
15924 	continue;
15925 
15926       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
15927       if (GET_CODE (body) != COND_EXEC)
15928 	break;
15929       /* Allow up to 4 conditionally executed instructions in a block.  */
15930       n = get_attr_ce_count (insn);
15931       if (arm_condexec_masklen + n > 4)
15932 	break;
15933 
15934       predicate = COND_EXEC_TEST (body);
15935       code = get_arm_condition_code (predicate);
15936       mask = (1 << n) - 1;
15937       if (arm_current_cc == code)
15938 	arm_condexec_mask |= (mask << arm_condexec_masklen);
15939       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
15940 	break;
15941 
15942       arm_condexec_count++;
15943       arm_condexec_masklen += n;
15944 
15945       /* A jump must be the last instruction in a conditional block.  */
15946       if (GET_CODE(insn) == JUMP_INSN)
15947 	break;
15948     }
15949   /* Restore recog_data (getting the attributes of other insns can
15950      destroy this array, but final.c assumes that it remains intact
15951      across this call).  */
15952   extract_constrain_insn_cached (first_insn);
15953 }
15954 
15955 void
15956 arm_final_prescan_insn (rtx insn)
15957 {
15958   /* BODY will hold the body of INSN.  */
15959   rtx body = PATTERN (insn);
15960 
15961   /* This will be 1 if trying to repeat the trick, and things need to be
15962      reversed if it appears to fail.  */
15963   int reverse = 0;
15964 
15965   /* If we start with a return insn, we only succeed if we find another one.  */
15966   int seeking_return = 0;
15967 
15968   /* START_INSN will hold the insn from where we start looking.  This is the
15969      first insn after the following code_label if REVERSE is true.  */
15970   rtx start_insn = insn;
15971 
15972   /* If in state 4, check if the target branch is reached, in order to
15973      change back to state 0.  */
15974   if (arm_ccfsm_state == 4)
15975     {
15976       if (insn == arm_target_insn)
15977 	{
15978 	  arm_target_insn = NULL;
15979 	  arm_ccfsm_state = 0;
15980 	}
15981       return;
15982     }
15983 
15984   /* If in state 3, it is possible to repeat the trick, if this insn is an
15985      unconditional branch to a label, and immediately following this branch
15986      is the previous target label which is only used once, and the label this
15987      branch jumps to is not too far off.  */
15988   if (arm_ccfsm_state == 3)
15989     {
15990       if (simplejump_p (insn))
15991 	{
15992 	  start_insn = next_nonnote_insn (start_insn);
15993 	  if (GET_CODE (start_insn) == BARRIER)
15994 	    {
15995 	      /* XXX Isn't this always a barrier?  */
15996 	      start_insn = next_nonnote_insn (start_insn);
15997 	    }
15998 	  if (GET_CODE (start_insn) == CODE_LABEL
15999 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16000 	      && LABEL_NUSES (start_insn) == 1)
16001 	    reverse = TRUE;
16002 	  else
16003 	    return;
16004 	}
16005       else if (GET_CODE (body) == RETURN)
16006         {
16007 	  start_insn = next_nonnote_insn (start_insn);
16008 	  if (GET_CODE (start_insn) == BARRIER)
16009 	    start_insn = next_nonnote_insn (start_insn);
16010 	  if (GET_CODE (start_insn) == CODE_LABEL
16011 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16012 	      && LABEL_NUSES (start_insn) == 1)
16013 	    {
16014 	      reverse = TRUE;
16015 	      seeking_return = 1;
16016 	    }
16017 	  else
16018 	    return;
16019         }
16020       else
16021 	return;
16022     }
16023 
16024   gcc_assert (!arm_ccfsm_state || reverse);
16025   if (GET_CODE (insn) != JUMP_INSN)
16026     return;
16027 
16028   /* This jump might be paralleled with a clobber of the condition codes
16029      the jump should always come first */
16030   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16031     body = XVECEXP (body, 0, 0);
16032 
16033   if (reverse
16034       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16035 	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16036     {
16037       int insns_skipped;
16038       int fail = FALSE, succeed = FALSE;
16039       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
16040       int then_not_else = TRUE;
16041       rtx this_insn = start_insn, label = 0;
16042 
16043       /* Register the insn jumped to.  */
16044       if (reverse)
16045         {
16046 	  if (!seeking_return)
16047 	    label = XEXP (SET_SRC (body), 0);
16048         }
16049       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16050 	label = XEXP (XEXP (SET_SRC (body), 1), 0);
16051       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16052 	{
16053 	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
16054 	  then_not_else = FALSE;
16055 	}
16056       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16057 	seeking_return = 1;
16058       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16059         {
16060 	  seeking_return = 1;
16061 	  then_not_else = FALSE;
16062         }
16063       else
16064 	gcc_unreachable ();
16065 
16066       /* See how many insns this branch skips, and what kind of insns.  If all
16067 	 insns are okay, and the label or unconditional branch to the same
16068 	 label is not too far away, succeed.  */
16069       for (insns_skipped = 0;
16070 	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
16071 	{
16072 	  rtx scanbody;
16073 
16074 	  this_insn = next_nonnote_insn (this_insn);
16075 	  if (!this_insn)
16076 	    break;
16077 
16078 	  switch (GET_CODE (this_insn))
16079 	    {
16080 	    case CODE_LABEL:
16081 	      /* Succeed if it is the target label, otherwise fail since
16082 		 control falls in from somewhere else.  */
16083 	      if (this_insn == label)
16084 		{
16085 		  arm_ccfsm_state = 1;
16086 		  succeed = TRUE;
16087 		}
16088 	      else
16089 		fail = TRUE;
16090 	      break;
16091 
16092 	    case BARRIER:
16093 	      /* Succeed if the following insn is the target label.
16094 		 Otherwise fail.
16095 		 If return insns are used then the last insn in a function
16096 		 will be a barrier.  */
16097 	      this_insn = next_nonnote_insn (this_insn);
16098 	      if (this_insn && this_insn == label)
16099 		{
16100 		  arm_ccfsm_state = 1;
16101 		  succeed = TRUE;
16102 		}
16103 	      else
16104 		fail = TRUE;
16105 	      break;
16106 
16107 	    case CALL_INSN:
16108 	      /* The AAPCS says that conditional calls should not be
16109 		 used since they make interworking inefficient (the
16110 		 linker can't transform BL<cond> into BLX).  That's
16111 		 only a problem if the machine has BLX.  */
16112 	      if (arm_arch5)
16113 		{
16114 		  fail = TRUE;
16115 		  break;
16116 		}
16117 
16118 	      /* Succeed if the following insn is the target label, or
16119 		 if the following two insns are a barrier and the
16120 		 target label.  */
16121 	      this_insn = next_nonnote_insn (this_insn);
16122 	      if (this_insn && GET_CODE (this_insn) == BARRIER)
16123 		this_insn = next_nonnote_insn (this_insn);
16124 
16125 	      if (this_insn && this_insn == label
16126 		  && insns_skipped < max_insns_skipped)
16127 		{
16128 		  arm_ccfsm_state = 1;
16129 		  succeed = TRUE;
16130 		}
16131 	      else
16132 		fail = TRUE;
16133 	      break;
16134 
16135 	    case JUMP_INSN:
16136       	      /* If this is an unconditional branch to the same label, succeed.
16137 		 If it is to another label, do nothing.  If it is conditional,
16138 		 fail.  */
16139 	      /* XXX Probably, the tests for SET and the PC are
16140 		 unnecessary.  */
16141 
16142 	      scanbody = PATTERN (this_insn);
16143 	      if (GET_CODE (scanbody) == SET
16144 		  && GET_CODE (SET_DEST (scanbody)) == PC)
16145 		{
16146 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16147 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16148 		    {
16149 		      arm_ccfsm_state = 2;
16150 		      succeed = TRUE;
16151 		    }
16152 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16153 		    fail = TRUE;
16154 		}
16155 	      /* Fail if a conditional return is undesirable (e.g. on a
16156 		 StrongARM), but still allow this if optimizing for size.  */
16157 	      else if (GET_CODE (scanbody) == RETURN
16158 		       && !use_return_insn (TRUE, NULL)
16159 		       && !optimize_size)
16160 		fail = TRUE;
16161 	      else if (GET_CODE (scanbody) == RETURN
16162 		       && seeking_return)
16163 	        {
16164 		  arm_ccfsm_state = 2;
16165 		  succeed = TRUE;
16166 	        }
16167 	      else if (GET_CODE (scanbody) == PARALLEL)
16168 	        {
16169 		  switch (get_attr_conds (this_insn))
16170 		    {
16171 		    case CONDS_NOCOND:
16172 		      break;
16173 		    default:
16174 		      fail = TRUE;
16175 		      break;
16176 		    }
16177 		}
16178 	      else
16179 		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
16180 
16181 	      break;
16182 
16183 	    case INSN:
16184 	      /* Instructions using or affecting the condition codes make it
16185 		 fail.  */
16186 	      scanbody = PATTERN (this_insn);
16187 	      if (!(GET_CODE (scanbody) == SET
16188 		    || GET_CODE (scanbody) == PARALLEL)
16189 		  || get_attr_conds (this_insn) != CONDS_NOCOND)
16190 		fail = TRUE;
16191 
16192 	      /* A conditional cirrus instruction must be followed by
16193 		 a non Cirrus instruction.  However, since we
16194 		 conditionalize instructions in this function and by
16195 		 the time we get here we can't add instructions
16196 		 (nops), because shorten_branches() has already been
16197 		 called, we will disable conditionalizing Cirrus
16198 		 instructions to be safe.  */
16199 	      if (GET_CODE (scanbody) != USE
16200 		  && GET_CODE (scanbody) != CLOBBER
16201 		  && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16202 		fail = TRUE;
16203 	      break;
16204 
16205 	    default:
16206 	      break;
16207 	    }
16208 	}
16209       if (succeed)
16210 	{
16211 	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16212 	    arm_target_label = CODE_LABEL_NUMBER (label);
16213 	  else
16214 	    {
16215 	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
16216 
16217 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16218 	        {
16219 		  this_insn = next_nonnote_insn (this_insn);
16220 		  gcc_assert (!this_insn
16221 			      || (GET_CODE (this_insn) != BARRIER
16222 				  && GET_CODE (this_insn) != CODE_LABEL));
16223 	        }
16224 	      if (!this_insn)
16225 	        {
16226 		  /* Oh, dear! we ran off the end.. give up.  */
16227 		  extract_constrain_insn_cached (insn);
16228 		  arm_ccfsm_state = 0;
16229 		  arm_target_insn = NULL;
16230 		  return;
16231 	        }
16232 	      arm_target_insn = this_insn;
16233 	    }
16234 
16235 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16236 	     what it was.  */
16237 	  if (!reverse)
16238 	    arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16239 
16240 	  if (reverse || then_not_else)
16241 	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16242 	}
16243 
16244       /* Restore recog_data (getting the attributes of other insns can
16245 	 destroy this array, but final.c assumes that it remains intact
16246 	 across this call.  */
16247       extract_constrain_insn_cached (insn);
16248     }
16249 }
16250 
16251 /* Output IT instructions.  */
16252 void
16253 thumb2_asm_output_opcode (FILE * stream)
16254 {
16255   char buff[5];
16256   int n;
16257 
16258   if (arm_condexec_mask)
16259     {
16260       for (n = 0; n < arm_condexec_masklen; n++)
16261 	buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16262       buff[n] = 0;
16263       asm_fprintf(stream, "i%s\t%s\n\t", buff,
16264 		  arm_condition_codes[arm_current_cc]);
16265       arm_condexec_mask = 0;
16266     }
16267 }
16268 
16269 /* Returns true if REGNO is a valid register
16270    for holding a quantity of type MODE.  */
16271 int
16272 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16273 {
16274   if (GET_MODE_CLASS (mode) == MODE_CC)
16275     return (regno == CC_REGNUM
16276 	    || (TARGET_HARD_FLOAT && TARGET_VFP
16277 		&& regno == VFPCC_REGNUM));
16278 
16279   if (TARGET_THUMB1)
16280     /* For the Thumb we only allow values bigger than SImode in
16281        registers 0 - 6, so that there is always a second low
16282        register available to hold the upper part of the value.
16283        We probably we ought to ensure that the register is the
16284        start of an even numbered register pair.  */
16285     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16286 
16287   if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16288       && IS_CIRRUS_REGNUM (regno))
16289     /* We have outlawed SI values in Cirrus registers because they
16290        reside in the lower 32 bits, but SF values reside in the
16291        upper 32 bits.  This causes gcc all sorts of grief.  We can't
16292        even split the registers into pairs because Cirrus SI values
16293        get sign extended to 64bits-- aldyh.  */
16294     return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16295 
16296   if (TARGET_HARD_FLOAT && TARGET_VFP
16297       && IS_VFP_REGNUM (regno))
16298     {
16299       if (mode == SFmode || mode == SImode)
16300 	return VFP_REGNO_OK_FOR_SINGLE (regno);
16301 
16302       if (mode == DFmode)
16303 	return VFP_REGNO_OK_FOR_DOUBLE (regno);
16304 
16305       /* VFP registers can hold HFmode values, but there is no point in
16306 	 putting them there unless we have hardware conversion insns. */
16307       if (mode == HFmode)
16308 	return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16309 
16310       if (TARGET_NEON)
16311         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16312                || (VALID_NEON_QREG_MODE (mode)
16313                    && NEON_REGNO_OK_FOR_QUAD (regno))
16314 	       || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16315 	       || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16316 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16317 	       || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16318 	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16319 
16320       return FALSE;
16321     }
16322 
16323   if (TARGET_REALLY_IWMMXT)
16324     {
16325       if (IS_IWMMXT_GR_REGNUM (regno))
16326 	return mode == SImode;
16327 
16328       if (IS_IWMMXT_REGNUM (regno))
16329 	return VALID_IWMMXT_REG_MODE (mode);
16330     }
16331 
16332   /* We allow almost any value to be stored in the general registers.
16333      Restrict doubleword quantities to even register pairs so that we can
16334      use ldrd.  Do not allow very large Neon structure opaque modes in
16335      general registers; they would use too many.  */
16336   if (regno <= LAST_ARM_REGNUM)
16337     return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16338       && ARM_NUM_REGS (mode) <= 4;
16339 
16340   if (regno == FRAME_POINTER_REGNUM
16341       || regno == ARG_POINTER_REGNUM)
16342     /* We only allow integers in the fake hard registers.  */
16343     return GET_MODE_CLASS (mode) == MODE_INT;
16344 
16345   /* The only registers left are the FPA registers
16346      which we only allow to hold FP values.  */
16347   return (TARGET_HARD_FLOAT && TARGET_FPA
16348 	  && GET_MODE_CLASS (mode) == MODE_FLOAT
16349 	  && regno >= FIRST_FPA_REGNUM
16350 	  && regno <= LAST_FPA_REGNUM);
16351 }
16352 
16353 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16354    not used in arm mode.  */
16355 
16356 enum reg_class
16357 arm_regno_class (int regno)
16358 {
16359   if (TARGET_THUMB1)
16360     {
16361       if (regno == STACK_POINTER_REGNUM)
16362 	return STACK_REG;
16363       if (regno == CC_REGNUM)
16364 	return CC_REG;
16365       if (regno < 8)
16366 	return LO_REGS;
16367       return HI_REGS;
16368     }
16369 
16370   if (TARGET_THUMB2 && regno < 8)
16371     return LO_REGS;
16372 
16373   if (   regno <= LAST_ARM_REGNUM
16374       || regno == FRAME_POINTER_REGNUM
16375       || regno == ARG_POINTER_REGNUM)
16376     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16377 
16378   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16379     return TARGET_THUMB2 ? CC_REG : NO_REGS;
16380 
16381   if (IS_CIRRUS_REGNUM (regno))
16382     return CIRRUS_REGS;
16383 
16384   if (IS_VFP_REGNUM (regno))
16385     {
16386       if (regno <= D7_VFP_REGNUM)
16387 	return VFP_D0_D7_REGS;
16388       else if (regno <= LAST_LO_VFP_REGNUM)
16389         return VFP_LO_REGS;
16390       else
16391         return VFP_HI_REGS;
16392     }
16393 
16394   if (IS_IWMMXT_REGNUM (regno))
16395     return IWMMXT_REGS;
16396 
16397   if (IS_IWMMXT_GR_REGNUM (regno))
16398     return IWMMXT_GR_REGS;
16399 
16400   return FPA_REGS;
16401 }
16402 
16403 /* Handle a special case when computing the offset
16404    of an argument from the frame pointer.  */
16405 int
16406 arm_debugger_arg_offset (int value, rtx addr)
16407 {
16408   rtx insn;
16409 
16410   /* We are only interested if dbxout_parms() failed to compute the offset.  */
16411   if (value != 0)
16412     return 0;
16413 
16414   /* We can only cope with the case where the address is held in a register.  */
16415   if (GET_CODE (addr) != REG)
16416     return 0;
16417 
16418   /* If we are using the frame pointer to point at the argument, then
16419      an offset of 0 is correct.  */
16420   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16421     return 0;
16422 
16423   /* If we are using the stack pointer to point at the
16424      argument, then an offset of 0 is correct.  */
16425   /* ??? Check this is consistent with thumb2 frame layout.  */
16426   if ((TARGET_THUMB || !frame_pointer_needed)
16427       && REGNO (addr) == SP_REGNUM)
16428     return 0;
16429 
16430   /* Oh dear.  The argument is pointed to by a register rather
16431      than being held in a register, or being stored at a known
16432      offset from the frame pointer.  Since GDB only understands
16433      those two kinds of argument we must translate the address
16434      held in the register into an offset from the frame pointer.
16435      We do this by searching through the insns for the function
16436      looking to see where this register gets its value.  If the
16437      register is initialized from the frame pointer plus an offset
16438      then we are in luck and we can continue, otherwise we give up.
16439 
16440      This code is exercised by producing debugging information
16441      for a function with arguments like this:
16442 
16443            double func (double a, double b, int c, double d) {return d;}
16444 
16445      Without this code the stab for parameter 'd' will be set to
16446      an offset of 0 from the frame pointer, rather than 8.  */
16447 
16448   /* The if() statement says:
16449 
16450      If the insn is a normal instruction
16451      and if the insn is setting the value in a register
16452      and if the register being set is the register holding the address of the argument
16453      and if the address is computing by an addition
16454      that involves adding to a register
16455      which is the frame pointer
16456      a constant integer
16457 
16458      then...  */
16459 
16460   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16461     {
16462       if (   GET_CODE (insn) == INSN
16463 	  && GET_CODE (PATTERN (insn)) == SET
16464 	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16465 	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16466 	  && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16467 	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16468 	  && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16469 	     )
16470 	{
16471 	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16472 
16473 	  break;
16474 	}
16475     }
16476 
16477   if (value == 0)
16478     {
16479       debug_rtx (addr);
16480       warning (0, "unable to compute real location of stacked parameter");
16481       value = 8; /* XXX magic hack */
16482     }
16483 
16484   return value;
16485 }
16486 
16487 #define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
16488   do									\
16489     {									\
16490       if ((MASK) & insn_flags)						\
16491         add_builtin_function ((NAME), (TYPE), (CODE),			\
16492 			     BUILT_IN_MD, NULL, NULL_TREE);		\
16493     }									\
16494   while (0)
16495 
16496 struct builtin_description
16497 {
16498   const unsigned int       mask;
16499   const enum insn_code     icode;
16500   const char * const       name;
16501   const enum arm_builtins  code;
16502   const enum rtx_code      comparison;
16503   const unsigned int       flag;
16504 };
16505 
16506 static const struct builtin_description bdesc_2arg[] =
16507 {
16508 #define IWMMXT_BUILTIN(code, string, builtin) \
16509   { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16510     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16511 
16512   IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16513   IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16514   IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16515   IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16516   IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16517   IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16518   IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16519   IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16520   IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16521   IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16522   IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16523   IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16524   IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16525   IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16526   IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16527   IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16528   IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16529   IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16530   IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16531   IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16532   IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16533   IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16534   IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16535   IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16536   IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16537   IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16538   IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16539   IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16540   IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16541   IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16542   IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16543   IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16544   IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16545   IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16546   IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16547   IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16548   IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16549   IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16550   IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16551   IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16552   IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16553   IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16554   IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16555   IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16556   IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16557   IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16558   IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16559   IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16560   IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16561   IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16562   IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16563   IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16564   IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16565   IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16566   IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16567   IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16568   IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16569   IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16570 
16571 #define IWMMXT_BUILTIN2(code, builtin) \
16572   { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16573 
16574   IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16575   IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16576   IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16577   IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16578   IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16579   IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16580   IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
16581   IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16582   IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
16583   IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16584   IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
16585   IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
16586   IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
16587   IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16588   IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
16589   IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16590   IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
16591   IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,  WSRLDI)
16592   IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
16593   IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16594   IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
16595   IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16596   IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
16597   IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,  WSRADI)
16598   IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
16599   IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
16600   IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
16601   IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
16602   IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
16603   IWMMXT_BUILTIN2 (rordi3,          WRORDI)
16604   IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
16605   IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
16606 };
16607 
16608 static const struct builtin_description bdesc_1arg[] =
16609 {
16610   IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16611   IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16612   IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16613   IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16614   IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16615   IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16616   IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16617   IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16618   IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16619   IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16620   IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16621   IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16622   IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16623   IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16624   IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16625   IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16626   IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16627   IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16628 };
16629 
16630 /* Set up all the iWMMXt builtins.  This is
16631    not called if TARGET_IWMMXT is zero.  */
16632 
16633 static void
16634 arm_init_iwmmxt_builtins (void)
16635 {
16636   const struct builtin_description * d;
16637   size_t i;
16638   tree endlink = void_list_node;
16639 
16640   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16641   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16642   tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16643 
16644   tree int_ftype_int
16645     = build_function_type (integer_type_node,
16646 			   tree_cons (NULL_TREE, integer_type_node, endlink));
16647   tree v8qi_ftype_v8qi_v8qi_int
16648     = build_function_type (V8QI_type_node,
16649 			   tree_cons (NULL_TREE, V8QI_type_node,
16650 				      tree_cons (NULL_TREE, V8QI_type_node,
16651 						 tree_cons (NULL_TREE,
16652 							    integer_type_node,
16653 							    endlink))));
16654   tree v4hi_ftype_v4hi_int
16655     = build_function_type (V4HI_type_node,
16656 			   tree_cons (NULL_TREE, V4HI_type_node,
16657 				      tree_cons (NULL_TREE, integer_type_node,
16658 						 endlink)));
16659   tree v2si_ftype_v2si_int
16660     = build_function_type (V2SI_type_node,
16661 			   tree_cons (NULL_TREE, V2SI_type_node,
16662 				      tree_cons (NULL_TREE, integer_type_node,
16663 						 endlink)));
16664   tree v2si_ftype_di_di
16665     = build_function_type (V2SI_type_node,
16666 			   tree_cons (NULL_TREE, long_long_integer_type_node,
16667 				      tree_cons (NULL_TREE, long_long_integer_type_node,
16668 						 endlink)));
16669   tree di_ftype_di_int
16670     = build_function_type (long_long_integer_type_node,
16671 			   tree_cons (NULL_TREE, long_long_integer_type_node,
16672 				      tree_cons (NULL_TREE, integer_type_node,
16673 						 endlink)));
16674   tree di_ftype_di_int_int
16675     = build_function_type (long_long_integer_type_node,
16676 			   tree_cons (NULL_TREE, long_long_integer_type_node,
16677 				      tree_cons (NULL_TREE, integer_type_node,
16678 						 tree_cons (NULL_TREE,
16679 							    integer_type_node,
16680 							    endlink))));
16681   tree int_ftype_v8qi
16682     = build_function_type (integer_type_node,
16683 			   tree_cons (NULL_TREE, V8QI_type_node,
16684 				      endlink));
16685   tree int_ftype_v4hi
16686     = build_function_type (integer_type_node,
16687 			   tree_cons (NULL_TREE, V4HI_type_node,
16688 				      endlink));
16689   tree int_ftype_v2si
16690     = build_function_type (integer_type_node,
16691 			   tree_cons (NULL_TREE, V2SI_type_node,
16692 				      endlink));
16693   tree int_ftype_v8qi_int
16694     = build_function_type (integer_type_node,
16695 			   tree_cons (NULL_TREE, V8QI_type_node,
16696 				      tree_cons (NULL_TREE, integer_type_node,
16697 						 endlink)));
16698   tree int_ftype_v4hi_int
16699     = build_function_type (integer_type_node,
16700 			   tree_cons (NULL_TREE, V4HI_type_node,
16701 				      tree_cons (NULL_TREE, integer_type_node,
16702 						 endlink)));
16703   tree int_ftype_v2si_int
16704     = build_function_type (integer_type_node,
16705 			   tree_cons (NULL_TREE, V2SI_type_node,
16706 				      tree_cons (NULL_TREE, integer_type_node,
16707 						 endlink)));
16708   tree v8qi_ftype_v8qi_int_int
16709     = build_function_type (V8QI_type_node,
16710 			   tree_cons (NULL_TREE, V8QI_type_node,
16711 				      tree_cons (NULL_TREE, integer_type_node,
16712 						 tree_cons (NULL_TREE,
16713 							    integer_type_node,
16714 							    endlink))));
16715   tree v4hi_ftype_v4hi_int_int
16716     = build_function_type (V4HI_type_node,
16717 			   tree_cons (NULL_TREE, V4HI_type_node,
16718 				      tree_cons (NULL_TREE, integer_type_node,
16719 						 tree_cons (NULL_TREE,
16720 							    integer_type_node,
16721 							    endlink))));
16722   tree v2si_ftype_v2si_int_int
16723     = build_function_type (V2SI_type_node,
16724 			   tree_cons (NULL_TREE, V2SI_type_node,
16725 				      tree_cons (NULL_TREE, integer_type_node,
16726 						 tree_cons (NULL_TREE,
16727 							    integer_type_node,
16728 							    endlink))));
16729   /* Miscellaneous.  */
16730   tree v8qi_ftype_v4hi_v4hi
16731     = build_function_type (V8QI_type_node,
16732 			   tree_cons (NULL_TREE, V4HI_type_node,
16733 				      tree_cons (NULL_TREE, V4HI_type_node,
16734 						 endlink)));
16735   tree v4hi_ftype_v2si_v2si
16736     = build_function_type (V4HI_type_node,
16737 			   tree_cons (NULL_TREE, V2SI_type_node,
16738 				      tree_cons (NULL_TREE, V2SI_type_node,
16739 						 endlink)));
16740   tree v2si_ftype_v4hi_v4hi
16741     = build_function_type (V2SI_type_node,
16742 			   tree_cons (NULL_TREE, V4HI_type_node,
16743 				      tree_cons (NULL_TREE, V4HI_type_node,
16744 						 endlink)));
16745   tree v2si_ftype_v8qi_v8qi
16746     = build_function_type (V2SI_type_node,
16747 			   tree_cons (NULL_TREE, V8QI_type_node,
16748 				      tree_cons (NULL_TREE, V8QI_type_node,
16749 						 endlink)));
16750   tree v4hi_ftype_v4hi_di
16751     = build_function_type (V4HI_type_node,
16752 			   tree_cons (NULL_TREE, V4HI_type_node,
16753 				      tree_cons (NULL_TREE,
16754 						 long_long_integer_type_node,
16755 						 endlink)));
16756   tree v2si_ftype_v2si_di
16757     = build_function_type (V2SI_type_node,
16758 			   tree_cons (NULL_TREE, V2SI_type_node,
16759 				      tree_cons (NULL_TREE,
16760 						 long_long_integer_type_node,
16761 						 endlink)));
16762   tree void_ftype_int_int
16763     = build_function_type (void_type_node,
16764 			   tree_cons (NULL_TREE, integer_type_node,
16765 				      tree_cons (NULL_TREE, integer_type_node,
16766 						 endlink)));
16767   tree di_ftype_void
16768     = build_function_type (long_long_unsigned_type_node, endlink);
16769   tree di_ftype_v8qi
16770     = build_function_type (long_long_integer_type_node,
16771 			   tree_cons (NULL_TREE, V8QI_type_node,
16772 				      endlink));
16773   tree di_ftype_v4hi
16774     = build_function_type (long_long_integer_type_node,
16775 			   tree_cons (NULL_TREE, V4HI_type_node,
16776 				      endlink));
16777   tree di_ftype_v2si
16778     = build_function_type (long_long_integer_type_node,
16779 			   tree_cons (NULL_TREE, V2SI_type_node,
16780 				      endlink));
16781   tree v2si_ftype_v4hi
16782     = build_function_type (V2SI_type_node,
16783 			   tree_cons (NULL_TREE, V4HI_type_node,
16784 				      endlink));
16785   tree v4hi_ftype_v8qi
16786     = build_function_type (V4HI_type_node,
16787 			   tree_cons (NULL_TREE, V8QI_type_node,
16788 				      endlink));
16789 
16790   tree di_ftype_di_v4hi_v4hi
16791     = build_function_type (long_long_unsigned_type_node,
16792 			   tree_cons (NULL_TREE,
16793 				      long_long_unsigned_type_node,
16794 				      tree_cons (NULL_TREE, V4HI_type_node,
16795 						 tree_cons (NULL_TREE,
16796 							    V4HI_type_node,
16797 							    endlink))));
16798 
16799   tree di_ftype_v4hi_v4hi
16800     = build_function_type (long_long_unsigned_type_node,
16801 			   tree_cons (NULL_TREE, V4HI_type_node,
16802 				      tree_cons (NULL_TREE, V4HI_type_node,
16803 						 endlink)));
16804 
16805   /* Normal vector binops.  */
16806   tree v8qi_ftype_v8qi_v8qi
16807     = build_function_type (V8QI_type_node,
16808 			   tree_cons (NULL_TREE, V8QI_type_node,
16809 				      tree_cons (NULL_TREE, V8QI_type_node,
16810 						 endlink)));
16811   tree v4hi_ftype_v4hi_v4hi
16812     = build_function_type (V4HI_type_node,
16813 			   tree_cons (NULL_TREE, V4HI_type_node,
16814 				      tree_cons (NULL_TREE, V4HI_type_node,
16815 						 endlink)));
16816   tree v2si_ftype_v2si_v2si
16817     = build_function_type (V2SI_type_node,
16818 			   tree_cons (NULL_TREE, V2SI_type_node,
16819 				      tree_cons (NULL_TREE, V2SI_type_node,
16820 						 endlink)));
16821   tree di_ftype_di_di
16822     = build_function_type (long_long_unsigned_type_node,
16823 			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
16824 				      tree_cons (NULL_TREE,
16825 						 long_long_unsigned_type_node,
16826 						 endlink)));
16827 
16828   /* Add all builtins that are more or less simple operations on two
16829      operands.  */
16830   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16831     {
16832       /* Use one of the operands; the target can have a different mode for
16833 	 mask-generating compares.  */
16834       enum machine_mode mode;
16835       tree type;
16836 
16837       if (d->name == 0)
16838 	continue;
16839 
16840       mode = insn_data[d->icode].operand[1].mode;
16841 
16842       switch (mode)
16843 	{
16844 	case V8QImode:
16845 	  type = v8qi_ftype_v8qi_v8qi;
16846 	  break;
16847 	case V4HImode:
16848 	  type = v4hi_ftype_v4hi_v4hi;
16849 	  break;
16850 	case V2SImode:
16851 	  type = v2si_ftype_v2si_v2si;
16852 	  break;
16853 	case DImode:
16854 	  type = di_ftype_di_di;
16855 	  break;
16856 
16857 	default:
16858 	  gcc_unreachable ();
16859 	}
16860 
16861       def_mbuiltin (d->mask, d->name, type, d->code);
16862     }
16863 
16864   /* Add the remaining MMX insns with somewhat more complicated types.  */
16865   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16866   def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16867   def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16868 
16869   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16870   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16871   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16872   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16873   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16874   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16875 
16876   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16877   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16878   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16879   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16880   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16881   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16882 
16883   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16884   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16885   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16886   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16887   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16888   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16889 
16890   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16891   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16892   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16893   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16894   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16895   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16896 
16897   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16898 
16899   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16900   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16901   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16902   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16903 
16904   def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16905   def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16906   def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16907   def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
16908   def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
16909   def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
16910   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
16911   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
16912   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
16913 
16914   def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
16915   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
16916   def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
16917 
16918   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
16919   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
16920   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
16921 
16922   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
16923   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
16924   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
16925   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
16926   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
16927   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
16928 
16929   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
16930   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
16931   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
16932   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
16933   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
16934   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
16935   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
16936   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
16937   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
16938   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
16939   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
16940   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
16941 
16942   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
16943   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
16944   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
16945   def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
16946 
16947   def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
16948   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
16949   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
16950   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
16951   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
16952   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
16953   def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
16954 }
16955 
16956 static void
16957 arm_init_tls_builtins (void)
16958 {
16959   tree ftype, decl;
16960 
16961   ftype = build_function_type (ptr_type_node, void_list_node);
16962   decl = add_builtin_function ("__builtin_thread_pointer", ftype,
16963 			       ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
16964 			       NULL, NULL_TREE);
16965   TREE_NOTHROW (decl) = 1;
16966   TREE_READONLY (decl) = 1;
16967 }
16968 
16969 enum neon_builtin_type_bits {
16970   T_V8QI  = 0x0001,
16971   T_V4HI  = 0x0002,
16972   T_V2SI  = 0x0004,
16973   T_V2SF  = 0x0008,
16974   T_DI    = 0x0010,
16975   T_V16QI = 0x0020,
16976   T_V8HI  = 0x0040,
16977   T_V4SI  = 0x0080,
16978   T_V4SF  = 0x0100,
16979   T_V2DI  = 0x0200,
16980   T_TI	  = 0x0400,
16981   T_EI	  = 0x0800,
16982   T_OI	  = 0x1000
16983 };
16984 
16985 #define v8qi_UP  T_V8QI
16986 #define v4hi_UP  T_V4HI
16987 #define v2si_UP  T_V2SI
16988 #define v2sf_UP  T_V2SF
16989 #define di_UP    T_DI
16990 #define v16qi_UP T_V16QI
16991 #define v8hi_UP  T_V8HI
16992 #define v4si_UP  T_V4SI
16993 #define v4sf_UP  T_V4SF
16994 #define v2di_UP  T_V2DI
16995 #define ti_UP	 T_TI
16996 #define ei_UP	 T_EI
16997 #define oi_UP	 T_OI
16998 
16999 #define UP(X) X##_UP
17000 
17001 #define T_MAX 13
17002 
17003 typedef enum {
17004   NEON_BINOP,
17005   NEON_TERNOP,
17006   NEON_UNOP,
17007   NEON_GETLANE,
17008   NEON_SETLANE,
17009   NEON_CREATE,
17010   NEON_DUP,
17011   NEON_DUPLANE,
17012   NEON_COMBINE,
17013   NEON_SPLIT,
17014   NEON_LANEMUL,
17015   NEON_LANEMULL,
17016   NEON_LANEMULH,
17017   NEON_LANEMAC,
17018   NEON_SCALARMUL,
17019   NEON_SCALARMULL,
17020   NEON_SCALARMULH,
17021   NEON_SCALARMAC,
17022   NEON_CONVERT,
17023   NEON_FIXCONV,
17024   NEON_SELECT,
17025   NEON_RESULTPAIR,
17026   NEON_REINTERP,
17027   NEON_VTBL,
17028   NEON_VTBX,
17029   NEON_LOAD1,
17030   NEON_LOAD1LANE,
17031   NEON_STORE1,
17032   NEON_STORE1LANE,
17033   NEON_LOADSTRUCT,
17034   NEON_LOADSTRUCTLANE,
17035   NEON_STORESTRUCT,
17036   NEON_STORESTRUCTLANE,
17037   NEON_LOGICBINOP,
17038   NEON_SHIFTINSERT,
17039   NEON_SHIFTIMM,
17040   NEON_SHIFTACC
17041 } neon_itype;
17042 
17043 typedef struct {
17044   const char *name;
17045   const neon_itype itype;
17046   const int bits;
17047   const enum insn_code codes[T_MAX];
17048   const unsigned int num_vars;
17049   unsigned int base_fcode;
17050 } neon_builtin_datum;
17051 
17052 #define CF(N,X) CODE_FOR_neon_##N##X
17053 
17054 #define VAR1(T, N, A) \
17055   #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17056 #define VAR2(T, N, A, B) \
17057   #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17058 #define VAR3(T, N, A, B, C) \
17059   #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17060   { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17061 #define VAR4(T, N, A, B, C, D) \
17062   #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17063   { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17064 #define VAR5(T, N, A, B, C, D, E) \
17065   #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17066   { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17067 #define VAR6(T, N, A, B, C, D, E, F) \
17068   #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17069   { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17070 #define VAR7(T, N, A, B, C, D, E, F, G) \
17071   #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17072   { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17073     CF (N, G) }, 7, 0
17074 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17075   #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17076                 | UP (H), \
17077   { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17078     CF (N, G), CF (N, H) }, 8, 0
17079 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17080   #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17081                 | UP (H) | UP (I), \
17082   { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17083     CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17084 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17085   #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17086                 | UP (H) | UP (I) | UP (J), \
17087   { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17088     CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17089 
17090 /* The mode entries in the following table correspond to the "key" type of the
17091    instruction variant, i.e. equivalent to that which would be specified after
17092    the assembler mnemonic, which usually refers to the last vector operand.
17093    (Signed/unsigned/polynomial types are not differentiated between though, and
17094    are all mapped onto the same mode for a given element size.) The modes
17095    listed per instruction should be the same as those defined for that
17096    instruction's pattern in neon.md.
17097    WARNING: Variants should be listed in the same increasing order as
17098    neon_builtin_type_bits.  */
17099 
17100 static neon_builtin_datum neon_builtin_data[] =
17101 {
17102   { VAR10 (BINOP, vadd,
17103 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17104   { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17105   { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17106   { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17107   { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17108   { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17109   { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17110   { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17111   { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17112   { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17113   { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17114   { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17115   { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17116   { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17117   { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17118   { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17119   { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17120   { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17121   { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17122   { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17123   { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17124   { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17125   { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17126   { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17127   { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17128   { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17129   { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17130   { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17131   { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17132   { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17133   { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17134   { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17135   { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17136   { VAR10 (BINOP, vsub,
17137 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17138   { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17139   { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17140   { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17141   { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17142   { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17143   { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17144   { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17145   { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17146   { VAR2 (BINOP, vcage, v2sf, v4sf) },
17147   { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17148   { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17149   { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17150   { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17151   { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17152   { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17153   { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17154   { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17155   { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17156   { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17157   { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17158   { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17159   { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17160   { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17161   { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17162   { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17163   { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17164   { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17165   { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17166   { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17167   { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17168   { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17169   { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17170   { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17171   { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17172   { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17173   { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17174   /* FIXME: vget_lane supports more variants than this!  */
17175   { VAR10 (GETLANE, vget_lane,
17176 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17177   { VAR10 (SETLANE, vset_lane,
17178 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17179   { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17180   { VAR10 (DUP, vdup_n,
17181 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17182   { VAR10 (DUPLANE, vdup_lane,
17183 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17184   { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17185   { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17186   { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17187   { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17188   { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17189   { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17190   { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17191   { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17192   { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17193   { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17194   { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17195   { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17196   { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17197   { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17198   { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17199   { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17200   { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17201   { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17202   { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17203   { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17204   { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17205   { VAR10 (BINOP, vext,
17206 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17207   { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17208   { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17209   { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17210   { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17211   { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17212   { VAR10 (SELECT, vbsl,
17213 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17214   { VAR1 (VTBL, vtbl1, v8qi) },
17215   { VAR1 (VTBL, vtbl2, v8qi) },
17216   { VAR1 (VTBL, vtbl3, v8qi) },
17217   { VAR1 (VTBL, vtbl4, v8qi) },
17218   { VAR1 (VTBX, vtbx1, v8qi) },
17219   { VAR1 (VTBX, vtbx2, v8qi) },
17220   { VAR1 (VTBX, vtbx3, v8qi) },
17221   { VAR1 (VTBX, vtbx4, v8qi) },
17222   { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17223   { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17224   { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17225   { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17226   { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17227   { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17228   { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17229   { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17230   { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17231   { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17232   { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17233   { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17234   { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17235   { VAR10 (LOAD1, vld1,
17236            v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17237   { VAR10 (LOAD1LANE, vld1_lane,
17238 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17239   { VAR10 (LOAD1, vld1_dup,
17240 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17241   { VAR10 (STORE1, vst1,
17242 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17243   { VAR10 (STORE1LANE, vst1_lane,
17244 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17245   { VAR9 (LOADSTRUCT,
17246 	  vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17247   { VAR7 (LOADSTRUCTLANE, vld2_lane,
17248 	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17249   { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17250   { VAR9 (STORESTRUCT, vst2,
17251 	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17252   { VAR7 (STORESTRUCTLANE, vst2_lane,
17253 	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17254   { VAR9 (LOADSTRUCT,
17255 	  vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17256   { VAR7 (LOADSTRUCTLANE, vld3_lane,
17257 	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17258   { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17259   { VAR9 (STORESTRUCT, vst3,
17260 	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17261   { VAR7 (STORESTRUCTLANE, vst3_lane,
17262 	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17263   { VAR9 (LOADSTRUCT, vld4,
17264 	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17265   { VAR7 (LOADSTRUCTLANE, vld4_lane,
17266 	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17267   { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17268   { VAR9 (STORESTRUCT, vst4,
17269 	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17270   { VAR7 (STORESTRUCTLANE, vst4_lane,
17271 	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17272   { VAR10 (LOGICBINOP, vand,
17273 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17274   { VAR10 (LOGICBINOP, vorr,
17275 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17276   { VAR10 (BINOP, veor,
17277 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17278   { VAR10 (LOGICBINOP, vbic,
17279 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17280   { VAR10 (LOGICBINOP, vorn,
17281 	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17282 };
17283 
17284 #undef CF
17285 #undef VAR1
17286 #undef VAR2
17287 #undef VAR3
17288 #undef VAR4
17289 #undef VAR5
17290 #undef VAR6
17291 #undef VAR7
17292 #undef VAR8
17293 #undef VAR9
17294 #undef VAR10
17295 
17296 static void
17297 arm_init_neon_builtins (void)
17298 {
17299   unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17300 
17301   tree neon_intQI_type_node;
17302   tree neon_intHI_type_node;
17303   tree neon_polyQI_type_node;
17304   tree neon_polyHI_type_node;
17305   tree neon_intSI_type_node;
17306   tree neon_intDI_type_node;
17307   tree neon_float_type_node;
17308 
17309   tree intQI_pointer_node;
17310   tree intHI_pointer_node;
17311   tree intSI_pointer_node;
17312   tree intDI_pointer_node;
17313   tree float_pointer_node;
17314 
17315   tree const_intQI_node;
17316   tree const_intHI_node;
17317   tree const_intSI_node;
17318   tree const_intDI_node;
17319   tree const_float_node;
17320 
17321   tree const_intQI_pointer_node;
17322   tree const_intHI_pointer_node;
17323   tree const_intSI_pointer_node;
17324   tree const_intDI_pointer_node;
17325   tree const_float_pointer_node;
17326 
17327   tree V8QI_type_node;
17328   tree V4HI_type_node;
17329   tree V2SI_type_node;
17330   tree V2SF_type_node;
17331   tree V16QI_type_node;
17332   tree V8HI_type_node;
17333   tree V4SI_type_node;
17334   tree V4SF_type_node;
17335   tree V2DI_type_node;
17336 
17337   tree intUQI_type_node;
17338   tree intUHI_type_node;
17339   tree intUSI_type_node;
17340   tree intUDI_type_node;
17341 
17342   tree intEI_type_node;
17343   tree intOI_type_node;
17344   tree intCI_type_node;
17345   tree intXI_type_node;
17346 
17347   tree V8QI_pointer_node;
17348   tree V4HI_pointer_node;
17349   tree V2SI_pointer_node;
17350   tree V2SF_pointer_node;
17351   tree V16QI_pointer_node;
17352   tree V8HI_pointer_node;
17353   tree V4SI_pointer_node;
17354   tree V4SF_pointer_node;
17355   tree V2DI_pointer_node;
17356 
17357   tree void_ftype_pv8qi_v8qi_v8qi;
17358   tree void_ftype_pv4hi_v4hi_v4hi;
17359   tree void_ftype_pv2si_v2si_v2si;
17360   tree void_ftype_pv2sf_v2sf_v2sf;
17361   tree void_ftype_pdi_di_di;
17362   tree void_ftype_pv16qi_v16qi_v16qi;
17363   tree void_ftype_pv8hi_v8hi_v8hi;
17364   tree void_ftype_pv4si_v4si_v4si;
17365   tree void_ftype_pv4sf_v4sf_v4sf;
17366   tree void_ftype_pv2di_v2di_v2di;
17367 
17368   tree reinterp_ftype_dreg[5][5];
17369   tree reinterp_ftype_qreg[5][5];
17370   tree dreg_types[5], qreg_types[5];
17371 
17372   /* Create distinguished type nodes for NEON vector element types,
17373      and pointers to values of such types, so we can detect them later.  */
17374   neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17375   neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17376   neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17377   neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17378   neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17379   neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17380   neon_float_type_node = make_node (REAL_TYPE);
17381   TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17382   layout_type (neon_float_type_node);
17383 
17384   /* Define typedefs which exactly correspond to the modes we are basing vector
17385      types on.  If you change these names you'll need to change
17386      the table used by arm_mangle_type too.  */
17387   (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17388 					     "__builtin_neon_qi");
17389   (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17390 					     "__builtin_neon_hi");
17391   (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17392 					     "__builtin_neon_si");
17393   (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17394 					     "__builtin_neon_sf");
17395   (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17396 					     "__builtin_neon_di");
17397   (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17398 					     "__builtin_neon_poly8");
17399   (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17400 					     "__builtin_neon_poly16");
17401 
17402   intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17403   intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17404   intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17405   intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17406   float_pointer_node = build_pointer_type (neon_float_type_node);
17407 
17408   /* Next create constant-qualified versions of the above types.  */
17409   const_intQI_node = build_qualified_type (neon_intQI_type_node,
17410 					   TYPE_QUAL_CONST);
17411   const_intHI_node = build_qualified_type (neon_intHI_type_node,
17412 					   TYPE_QUAL_CONST);
17413   const_intSI_node = build_qualified_type (neon_intSI_type_node,
17414 					   TYPE_QUAL_CONST);
17415   const_intDI_node = build_qualified_type (neon_intDI_type_node,
17416 					   TYPE_QUAL_CONST);
17417   const_float_node = build_qualified_type (neon_float_type_node,
17418 					   TYPE_QUAL_CONST);
17419 
17420   const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17421   const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17422   const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17423   const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17424   const_float_pointer_node = build_pointer_type (const_float_node);
17425 
17426   /* Now create vector types based on our NEON element types.  */
17427   /* 64-bit vectors.  */
17428   V8QI_type_node =
17429     build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17430   V4HI_type_node =
17431     build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17432   V2SI_type_node =
17433     build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17434   V2SF_type_node =
17435     build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17436   /* 128-bit vectors.  */
17437   V16QI_type_node =
17438     build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17439   V8HI_type_node =
17440     build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17441   V4SI_type_node =
17442     build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17443   V4SF_type_node =
17444     build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17445   V2DI_type_node =
17446     build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17447 
17448   /* Unsigned integer types for various mode sizes.  */
17449   intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17450   intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17451   intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17452   intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17453 
17454   (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17455 					     "__builtin_neon_uqi");
17456   (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17457 					     "__builtin_neon_uhi");
17458   (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17459 					     "__builtin_neon_usi");
17460   (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17461 					     "__builtin_neon_udi");
17462 
17463   /* Opaque integer types for structures of vectors.  */
17464   intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17465   intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17466   intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17467   intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17468 
17469   (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17470 					     "__builtin_neon_ti");
17471   (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17472 					     "__builtin_neon_ei");
17473   (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17474 					     "__builtin_neon_oi");
17475   (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17476 					     "__builtin_neon_ci");
17477   (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17478 					     "__builtin_neon_xi");
17479 
17480   /* Pointers to vector types.  */
17481   V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17482   V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17483   V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17484   V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17485   V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17486   V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17487   V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17488   V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17489   V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17490 
17491   /* Operations which return results as pairs.  */
17492   void_ftype_pv8qi_v8qi_v8qi =
17493     build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17494   			      V8QI_type_node, NULL);
17495   void_ftype_pv4hi_v4hi_v4hi =
17496     build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17497   			      V4HI_type_node, NULL);
17498   void_ftype_pv2si_v2si_v2si =
17499     build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17500   			      V2SI_type_node, NULL);
17501   void_ftype_pv2sf_v2sf_v2sf =
17502     build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17503   			      V2SF_type_node, NULL);
17504   void_ftype_pdi_di_di =
17505     build_function_type_list (void_type_node, intDI_pointer_node,
17506 			      neon_intDI_type_node, neon_intDI_type_node, NULL);
17507   void_ftype_pv16qi_v16qi_v16qi =
17508     build_function_type_list (void_type_node, V16QI_pointer_node,
17509 			      V16QI_type_node, V16QI_type_node, NULL);
17510   void_ftype_pv8hi_v8hi_v8hi =
17511     build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17512   			      V8HI_type_node, NULL);
17513   void_ftype_pv4si_v4si_v4si =
17514     build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17515   			      V4SI_type_node, NULL);
17516   void_ftype_pv4sf_v4sf_v4sf =
17517     build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17518   			      V4SF_type_node, NULL);
17519   void_ftype_pv2di_v2di_v2di =
17520     build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17521 			      V2DI_type_node, NULL);
17522 
17523   dreg_types[0] = V8QI_type_node;
17524   dreg_types[1] = V4HI_type_node;
17525   dreg_types[2] = V2SI_type_node;
17526   dreg_types[3] = V2SF_type_node;
17527   dreg_types[4] = neon_intDI_type_node;
17528 
17529   qreg_types[0] = V16QI_type_node;
17530   qreg_types[1] = V8HI_type_node;
17531   qreg_types[2] = V4SI_type_node;
17532   qreg_types[3] = V4SF_type_node;
17533   qreg_types[4] = V2DI_type_node;
17534 
17535   for (i = 0; i < 5; i++)
17536     {
17537       int j;
17538       for (j = 0; j < 5; j++)
17539         {
17540           reinterp_ftype_dreg[i][j]
17541             = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17542           reinterp_ftype_qreg[i][j]
17543             = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17544         }
17545     }
17546 
17547   for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17548     {
17549       neon_builtin_datum *d = &neon_builtin_data[i];
17550       unsigned int j, codeidx = 0;
17551 
17552       d->base_fcode = fcode;
17553 
17554       for (j = 0; j < T_MAX; j++)
17555 	{
17556 	  const char* const modenames[] = {
17557 	    "v8qi", "v4hi", "v2si", "v2sf", "di",
17558 	    "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17559 	  };
17560 	  char namebuf[60];
17561 	  tree ftype = NULL;
17562 	  enum insn_code icode;
17563 	  int is_load = 0, is_store = 0;
17564 
17565           if ((d->bits & (1 << j)) == 0)
17566             continue;
17567 
17568           icode = d->codes[codeidx++];
17569 
17570           switch (d->itype)
17571             {
17572 	    case NEON_LOAD1:
17573 	    case NEON_LOAD1LANE:
17574 	    case NEON_LOADSTRUCT:
17575 	    case NEON_LOADSTRUCTLANE:
17576 	      is_load = 1;
17577 	      /* Fall through.  */
17578 	    case NEON_STORE1:
17579 	    case NEON_STORE1LANE:
17580 	    case NEON_STORESTRUCT:
17581 	    case NEON_STORESTRUCTLANE:
17582 	      if (!is_load)
17583 	        is_store = 1;
17584 	      /* Fall through.  */
17585             case NEON_UNOP:
17586 	    case NEON_BINOP:
17587 	    case NEON_LOGICBINOP:
17588 	    case NEON_SHIFTINSERT:
17589 	    case NEON_TERNOP:
17590 	    case NEON_GETLANE:
17591 	    case NEON_SETLANE:
17592 	    case NEON_CREATE:
17593 	    case NEON_DUP:
17594 	    case NEON_DUPLANE:
17595 	    case NEON_SHIFTIMM:
17596 	    case NEON_SHIFTACC:
17597 	    case NEON_COMBINE:
17598 	    case NEON_SPLIT:
17599 	    case NEON_CONVERT:
17600 	    case NEON_FIXCONV:
17601 	    case NEON_LANEMUL:
17602 	    case NEON_LANEMULL:
17603 	    case NEON_LANEMULH:
17604 	    case NEON_LANEMAC:
17605 	    case NEON_SCALARMUL:
17606 	    case NEON_SCALARMULL:
17607 	    case NEON_SCALARMULH:
17608 	    case NEON_SCALARMAC:
17609 	    case NEON_SELECT:
17610 	    case NEON_VTBL:
17611 	    case NEON_VTBX:
17612 	      {
17613 		int k;
17614 		tree return_type = void_type_node, args = void_list_node;
17615 
17616 		/* Build a function type directly from the insn_data for this
17617 		   builtin.  The build_function_type() function takes care of
17618 		   removing duplicates for us.  */
17619 		for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17620 		  {
17621 		    tree eltype;
17622 
17623 		    if (is_load && k == 1)
17624 		      {
17625 		        /* Neon load patterns always have the memory operand
17626 			   (a SImode pointer) in the operand 1 position.  We
17627 			   want a const pointer to the element type in that
17628 			   position.  */
17629 		        gcc_assert (insn_data[icode].operand[k].mode == SImode);
17630 
17631 			switch (1 << j)
17632 			  {
17633 			  case T_V8QI:
17634 			  case T_V16QI:
17635 			    eltype = const_intQI_pointer_node;
17636 			    break;
17637 
17638 			  case T_V4HI:
17639 			  case T_V8HI:
17640 			    eltype = const_intHI_pointer_node;
17641 			    break;
17642 
17643 			  case T_V2SI:
17644 			  case T_V4SI:
17645 			    eltype = const_intSI_pointer_node;
17646 			    break;
17647 
17648 			  case T_V2SF:
17649 			  case T_V4SF:
17650 			    eltype = const_float_pointer_node;
17651 			    break;
17652 
17653 			  case T_DI:
17654 			  case T_V2DI:
17655 			    eltype = const_intDI_pointer_node;
17656 			    break;
17657 
17658 			  default: gcc_unreachable ();
17659 			  }
17660   		      }
17661 		    else if (is_store && k == 0)
17662 		      {
17663 		        /* Similarly, Neon store patterns use operand 0 as
17664 			   the memory location to store to (a SImode pointer).
17665 			   Use a pointer to the element type of the store in
17666 			   that position.  */
17667 			gcc_assert (insn_data[icode].operand[k].mode == SImode);
17668 
17669 			switch (1 << j)
17670 			  {
17671 			  case T_V8QI:
17672 			  case T_V16QI:
17673 			    eltype = intQI_pointer_node;
17674 			    break;
17675 
17676 			  case T_V4HI:
17677 			  case T_V8HI:
17678 			    eltype = intHI_pointer_node;
17679 			    break;
17680 
17681 			  case T_V2SI:
17682 			  case T_V4SI:
17683 			    eltype = intSI_pointer_node;
17684 			    break;
17685 
17686 			  case T_V2SF:
17687 			  case T_V4SF:
17688 			    eltype = float_pointer_node;
17689 			    break;
17690 
17691 			  case T_DI:
17692 			  case T_V2DI:
17693 			    eltype = intDI_pointer_node;
17694 			    break;
17695 
17696 			  default: gcc_unreachable ();
17697 			  }
17698 		      }
17699 		    else
17700 		      {
17701 			switch (insn_data[icode].operand[k].mode)
17702 	        	  {
17703 			  case VOIDmode: eltype = void_type_node; break;
17704 			  /* Scalars.  */
17705 			  case QImode: eltype = neon_intQI_type_node; break;
17706 			  case HImode: eltype = neon_intHI_type_node; break;
17707 			  case SImode: eltype = neon_intSI_type_node; break;
17708 			  case SFmode: eltype = neon_float_type_node; break;
17709 			  case DImode: eltype = neon_intDI_type_node; break;
17710 			  case TImode: eltype = intTI_type_node; break;
17711 			  case EImode: eltype = intEI_type_node; break;
17712 			  case OImode: eltype = intOI_type_node; break;
17713 			  case CImode: eltype = intCI_type_node; break;
17714 			  case XImode: eltype = intXI_type_node; break;
17715 			  /* 64-bit vectors.  */
17716 			  case V8QImode: eltype = V8QI_type_node; break;
17717 			  case V4HImode: eltype = V4HI_type_node; break;
17718 			  case V2SImode: eltype = V2SI_type_node; break;
17719 			  case V2SFmode: eltype = V2SF_type_node; break;
17720 			  /* 128-bit vectors.  */
17721 			  case V16QImode: eltype = V16QI_type_node; break;
17722 			  case V8HImode: eltype = V8HI_type_node; break;
17723 			  case V4SImode: eltype = V4SI_type_node; break;
17724 			  case V4SFmode: eltype = V4SF_type_node; break;
17725 			  case V2DImode: eltype = V2DI_type_node; break;
17726 			  default: gcc_unreachable ();
17727 			  }
17728 		      }
17729 
17730 		    if (k == 0 && !is_store)
17731 	              return_type = eltype;
17732 		    else
17733 		      args = tree_cons (NULL_TREE, eltype, args);
17734 		  }
17735 
17736 		ftype = build_function_type (return_type, args);
17737 	      }
17738 	      break;
17739 
17740 	    case NEON_RESULTPAIR:
17741               {
17742                 switch (insn_data[icode].operand[1].mode)
17743                   {
17744 		  case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17745                   case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17746                   case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17747                   case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17748                   case DImode: ftype = void_ftype_pdi_di_di; break;
17749                   case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17750                   case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17751                   case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17752                   case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17753                   case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17754                   default: gcc_unreachable ();
17755                   }
17756               }
17757               break;
17758 
17759 	    case NEON_REINTERP:
17760               {
17761                 /* We iterate over 5 doubleword types, then 5 quadword
17762                    types.  */
17763                 int rhs = j % 5;
17764                 switch (insn_data[icode].operand[0].mode)
17765                   {
17766                   case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17767                   case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17768                   case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17769                   case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17770                   case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17771                   case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17772                   case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17773                   case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17774 		  case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17775                   case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17776                   default: gcc_unreachable ();
17777                   }
17778               }
17779               break;
17780 
17781             default:
17782               gcc_unreachable ();
17783             }
17784 
17785           gcc_assert (ftype != NULL);
17786 
17787           sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17788 
17789           add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17790 				NULL_TREE);
17791         }
17792     }
17793 }
17794 
17795 static void
17796 arm_init_fp16_builtins (void)
17797 {
17798   tree fp16_type = make_node (REAL_TYPE);
17799   TYPE_PRECISION (fp16_type) = 16;
17800   layout_type (fp16_type);
17801   (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17802 }
17803 
17804 static void
17805 arm_init_builtins (void)
17806 {
17807   arm_init_tls_builtins ();
17808 
17809   if (TARGET_REALLY_IWMMXT)
17810     arm_init_iwmmxt_builtins ();
17811 
17812   if (TARGET_NEON)
17813     arm_init_neon_builtins ();
17814 
17815   if (arm_fp16_format)
17816     arm_init_fp16_builtins ();
17817 }
17818 
17819 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
17820 
17821 static const char *
17822 arm_invalid_parameter_type (const_tree t)
17823 {
17824   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17825     return N_("function parameters cannot have __fp16 type");
17826   return NULL;
17827 }
17828 
17829 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
17830 
17831 static const char *
17832 arm_invalid_return_type (const_tree t)
17833 {
17834   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17835     return N_("functions cannot return __fp16 type");
17836   return NULL;
17837 }
17838 
17839 /* Implement TARGET_PROMOTED_TYPE.  */
17840 
17841 static tree
17842 arm_promoted_type (const_tree t)
17843 {
17844   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17845     return float_type_node;
17846   return NULL_TREE;
17847 }
17848 
17849 /* Implement TARGET_CONVERT_TO_TYPE.
17850    Specifically, this hook implements the peculiarity of the ARM
17851    half-precision floating-point C semantics that requires conversions between
17852    __fp16 to or from double to do an intermediate conversion to float.  */
17853 
17854 static tree
17855 arm_convert_to_type (tree type, tree expr)
17856 {
17857   tree fromtype = TREE_TYPE (expr);
17858   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17859     return NULL_TREE;
17860   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17861       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17862     return convert (type, convert (float_type_node, expr));
17863   return NULL_TREE;
17864 }
17865 
17866 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17867    This simply adds HFmode as a supported mode; even though we don't
17868    implement arithmetic on this type directly, it's supported by
17869    optabs conversions, much the way the double-word arithmetic is
17870    special-cased in the default hook.  */
17871 
17872 static bool
17873 arm_scalar_mode_supported_p (enum machine_mode mode)
17874 {
17875   if (mode == HFmode)
17876     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17877   else
17878     return default_scalar_mode_supported_p (mode);
17879 }
17880 
17881 /* Errors in the source file can cause expand_expr to return const0_rtx
17882    where we expect a vector.  To avoid crashing, use one of the vector
17883    clear instructions.  */
17884 
17885 static rtx
17886 safe_vector_operand (rtx x, enum machine_mode mode)
17887 {
17888   if (x != const0_rtx)
17889     return x;
17890   x = gen_reg_rtx (mode);
17891 
17892   emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17893 			       : gen_rtx_SUBREG (DImode, x, 0)));
17894   return x;
17895 }
17896 
17897 /* Subroutine of arm_expand_builtin to take care of binop insns.  */
17898 
17899 static rtx
17900 arm_expand_binop_builtin (enum insn_code icode,
17901 			  tree exp, rtx target)
17902 {
17903   rtx pat;
17904   tree arg0 = CALL_EXPR_ARG (exp, 0);
17905   tree arg1 = CALL_EXPR_ARG (exp, 1);
17906   rtx op0 = expand_normal (arg0);
17907   rtx op1 = expand_normal (arg1);
17908   enum machine_mode tmode = insn_data[icode].operand[0].mode;
17909   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17910   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17911 
17912   if (VECTOR_MODE_P (mode0))
17913     op0 = safe_vector_operand (op0, mode0);
17914   if (VECTOR_MODE_P (mode1))
17915     op1 = safe_vector_operand (op1, mode1);
17916 
17917   if (! target
17918       || GET_MODE (target) != tmode
17919       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17920     target = gen_reg_rtx (tmode);
17921 
17922   gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
17923 
17924   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17925     op0 = copy_to_mode_reg (mode0, op0);
17926   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17927     op1 = copy_to_mode_reg (mode1, op1);
17928 
17929   pat = GEN_FCN (icode) (target, op0, op1);
17930   if (! pat)
17931     return 0;
17932   emit_insn (pat);
17933   return target;
17934 }
17935 
17936 /* Subroutine of arm_expand_builtin to take care of unop insns.  */
17937 
17938 static rtx
17939 arm_expand_unop_builtin (enum insn_code icode,
17940 			 tree exp, rtx target, int do_load)
17941 {
17942   rtx pat;
17943   tree arg0 = CALL_EXPR_ARG (exp, 0);
17944   rtx op0 = expand_normal (arg0);
17945   enum machine_mode tmode = insn_data[icode].operand[0].mode;
17946   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17947 
17948   if (! target
17949       || GET_MODE (target) != tmode
17950       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17951     target = gen_reg_rtx (tmode);
17952   if (do_load)
17953     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17954   else
17955     {
17956       if (VECTOR_MODE_P (mode0))
17957 	op0 = safe_vector_operand (op0, mode0);
17958 
17959       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17960 	op0 = copy_to_mode_reg (mode0, op0);
17961     }
17962 
17963   pat = GEN_FCN (icode) (target, op0);
17964   if (! pat)
17965     return 0;
17966   emit_insn (pat);
17967   return target;
17968 }
17969 
17970 static int
17971 neon_builtin_compare (const void *a, const void *b)
17972 {
17973   const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
17974   const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
17975   unsigned int soughtcode = key->base_fcode;
17976 
17977   if (soughtcode >= memb->base_fcode
17978       && soughtcode < memb->base_fcode + memb->num_vars)
17979     return 0;
17980   else if (soughtcode < memb->base_fcode)
17981     return -1;
17982   else
17983     return 1;
17984 }
17985 
17986 static enum insn_code
17987 locate_neon_builtin_icode (int fcode, neon_itype *itype)
17988 {
17989   neon_builtin_datum key, *found;
17990   int idx;
17991 
17992   key.base_fcode = fcode;
17993   found = (neon_builtin_datum *)
17994     bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
17995 		   sizeof (neon_builtin_data[0]), neon_builtin_compare);
17996   gcc_assert (found);
17997   idx = fcode - (int) found->base_fcode;
17998   gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
17999 
18000   if (itype)
18001     *itype = found->itype;
18002 
18003   return found->codes[idx];
18004 }
18005 
18006 typedef enum {
18007   NEON_ARG_COPY_TO_REG,
18008   NEON_ARG_CONSTANT,
18009   NEON_ARG_STOP
18010 } builtin_arg;
18011 
18012 #define NEON_MAX_BUILTIN_ARGS 5
18013 
18014 /* Expand a Neon builtin.  */
18015 static rtx
18016 arm_expand_neon_args (rtx target, int icode, int have_retval,
18017 		      tree exp, ...)
18018 {
18019   va_list ap;
18020   rtx pat;
18021   tree arg[NEON_MAX_BUILTIN_ARGS];
18022   rtx op[NEON_MAX_BUILTIN_ARGS];
18023   enum machine_mode tmode = insn_data[icode].operand[0].mode;
18024   enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18025   int argc = 0;
18026 
18027   if (have_retval
18028       && (!target
18029 	  || GET_MODE (target) != tmode
18030 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18031     target = gen_reg_rtx (tmode);
18032 
18033   va_start (ap, exp);
18034 
18035   for (;;)
18036     {
18037       builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18038 
18039       if (thisarg == NEON_ARG_STOP)
18040         break;
18041       else
18042         {
18043           arg[argc] = CALL_EXPR_ARG (exp, argc);
18044           op[argc] = expand_normal (arg[argc]);
18045           mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18046 
18047           switch (thisarg)
18048             {
18049             case NEON_ARG_COPY_TO_REG:
18050               /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18051               if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18052                      (op[argc], mode[argc]))
18053                 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18054               break;
18055 
18056             case NEON_ARG_CONSTANT:
18057               /* FIXME: This error message is somewhat unhelpful.  */
18058               if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18059                     (op[argc], mode[argc]))
18060 		error ("argument must be a constant");
18061               break;
18062 
18063             case NEON_ARG_STOP:
18064               gcc_unreachable ();
18065             }
18066 
18067           argc++;
18068         }
18069     }
18070 
18071   va_end (ap);
18072 
18073   if (have_retval)
18074     switch (argc)
18075       {
18076       case 1:
18077 	pat = GEN_FCN (icode) (target, op[0]);
18078 	break;
18079 
18080       case 2:
18081 	pat = GEN_FCN (icode) (target, op[0], op[1]);
18082 	break;
18083 
18084       case 3:
18085 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18086 	break;
18087 
18088       case 4:
18089 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18090 	break;
18091 
18092       case 5:
18093 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18094 	break;
18095 
18096       default:
18097 	gcc_unreachable ();
18098       }
18099   else
18100     switch (argc)
18101       {
18102       case 1:
18103 	pat = GEN_FCN (icode) (op[0]);
18104 	break;
18105 
18106       case 2:
18107 	pat = GEN_FCN (icode) (op[0], op[1]);
18108 	break;
18109 
18110       case 3:
18111 	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18112 	break;
18113 
18114       case 4:
18115 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18116 	break;
18117 
18118       case 5:
18119 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18120         break;
18121 
18122       default:
18123 	gcc_unreachable ();
18124       }
18125 
18126   if (!pat)
18127     return 0;
18128 
18129   emit_insn (pat);
18130 
18131   return target;
18132 }
18133 
18134 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18135    constants defined per-instruction or per instruction-variant. Instead, the
18136    required info is looked up in the table neon_builtin_data.  */
18137 static rtx
18138 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18139 {
18140   neon_itype itype;
18141   enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18142 
18143   switch (itype)
18144     {
18145     case NEON_UNOP:
18146     case NEON_CONVERT:
18147     case NEON_DUPLANE:
18148       return arm_expand_neon_args (target, icode, 1, exp,
18149         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18150 
18151     case NEON_BINOP:
18152     case NEON_SETLANE:
18153     case NEON_SCALARMUL:
18154     case NEON_SCALARMULL:
18155     case NEON_SCALARMULH:
18156     case NEON_SHIFTINSERT:
18157     case NEON_LOGICBINOP:
18158       return arm_expand_neon_args (target, icode, 1, exp,
18159         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18160         NEON_ARG_STOP);
18161 
18162     case NEON_TERNOP:
18163       return arm_expand_neon_args (target, icode, 1, exp,
18164         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18165         NEON_ARG_CONSTANT, NEON_ARG_STOP);
18166 
18167     case NEON_GETLANE:
18168     case NEON_FIXCONV:
18169     case NEON_SHIFTIMM:
18170       return arm_expand_neon_args (target, icode, 1, exp,
18171         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18172         NEON_ARG_STOP);
18173 
18174     case NEON_CREATE:
18175       return arm_expand_neon_args (target, icode, 1, exp,
18176         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18177 
18178     case NEON_DUP:
18179     case NEON_SPLIT:
18180     case NEON_REINTERP:
18181       return arm_expand_neon_args (target, icode, 1, exp,
18182         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18183 
18184     case NEON_COMBINE:
18185     case NEON_VTBL:
18186       return arm_expand_neon_args (target, icode, 1, exp,
18187         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18188 
18189     case NEON_RESULTPAIR:
18190       return arm_expand_neon_args (target, icode, 0, exp,
18191         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18192         NEON_ARG_STOP);
18193 
18194     case NEON_LANEMUL:
18195     case NEON_LANEMULL:
18196     case NEON_LANEMULH:
18197       return arm_expand_neon_args (target, icode, 1, exp,
18198         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18199         NEON_ARG_CONSTANT, NEON_ARG_STOP);
18200 
18201     case NEON_LANEMAC:
18202       return arm_expand_neon_args (target, icode, 1, exp,
18203         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18204         NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18205 
18206     case NEON_SHIFTACC:
18207       return arm_expand_neon_args (target, icode, 1, exp,
18208         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18209         NEON_ARG_CONSTANT, NEON_ARG_STOP);
18210 
18211     case NEON_SCALARMAC:
18212       return arm_expand_neon_args (target, icode, 1, exp,
18213 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18214         NEON_ARG_CONSTANT, NEON_ARG_STOP);
18215 
18216     case NEON_SELECT:
18217     case NEON_VTBX:
18218       return arm_expand_neon_args (target, icode, 1, exp,
18219 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18220         NEON_ARG_STOP);
18221 
18222     case NEON_LOAD1:
18223     case NEON_LOADSTRUCT:
18224       return arm_expand_neon_args (target, icode, 1, exp,
18225 	NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18226 
18227     case NEON_LOAD1LANE:
18228     case NEON_LOADSTRUCTLANE:
18229       return arm_expand_neon_args (target, icode, 1, exp,
18230 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18231 	NEON_ARG_STOP);
18232 
18233     case NEON_STORE1:
18234     case NEON_STORESTRUCT:
18235       return arm_expand_neon_args (target, icode, 0, exp,
18236 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18237 
18238     case NEON_STORE1LANE:
18239     case NEON_STORESTRUCTLANE:
18240       return arm_expand_neon_args (target, icode, 0, exp,
18241 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18242 	NEON_ARG_STOP);
18243     }
18244 
18245   gcc_unreachable ();
18246 }
18247 
18248 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
18249 void
18250 neon_reinterpret (rtx dest, rtx src)
18251 {
18252   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18253 }
18254 
18255 /* Emit code to place a Neon pair result in memory locations (with equal
18256    registers).  */
18257 void
18258 neon_emit_pair_result_insn (enum machine_mode mode,
18259 			    rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18260                             rtx op1, rtx op2)
18261 {
18262   rtx mem = gen_rtx_MEM (mode, destaddr);
18263   rtx tmp1 = gen_reg_rtx (mode);
18264   rtx tmp2 = gen_reg_rtx (mode);
18265 
18266   emit_insn (intfn (tmp1, op1, op2, tmp2));
18267 
18268   emit_move_insn (mem, tmp1);
18269   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18270   emit_move_insn (mem, tmp2);
18271 }
18272 
18273 /* Set up operands for a register copy from src to dest, taking care not to
18274    clobber registers in the process.
18275    FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18276    be called with a large N, so that should be OK.  */
18277 
18278 void
18279 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18280 {
18281   unsigned int copied = 0, opctr = 0;
18282   unsigned int done = (1 << count) - 1;
18283   unsigned int i, j;
18284 
18285   while (copied != done)
18286     {
18287       for (i = 0; i < count; i++)
18288         {
18289           int good = 1;
18290 
18291           for (j = 0; good && j < count; j++)
18292             if (i != j && (copied & (1 << j)) == 0
18293                 && reg_overlap_mentioned_p (src[j], dest[i]))
18294               good = 0;
18295 
18296           if (good)
18297             {
18298               operands[opctr++] = dest[i];
18299               operands[opctr++] = src[i];
18300               copied |= 1 << i;
18301             }
18302         }
18303     }
18304 
18305   gcc_assert (opctr == count * 2);
18306 }
18307 
18308 /* Expand an expression EXP that calls a built-in function,
18309    with result going to TARGET if that's convenient
18310    (and in mode MODE if that's convenient).
18311    SUBTARGET may be used as the target for computing one of EXP's operands.
18312    IGNORE is nonzero if the value is to be ignored.  */
18313 
18314 static rtx
18315 arm_expand_builtin (tree exp,
18316 		    rtx target,
18317 		    rtx subtarget ATTRIBUTE_UNUSED,
18318 		    enum machine_mode mode ATTRIBUTE_UNUSED,
18319 		    int ignore ATTRIBUTE_UNUSED)
18320 {
18321   const struct builtin_description * d;
18322   enum insn_code    icode;
18323   tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18324   tree              arg0;
18325   tree              arg1;
18326   tree              arg2;
18327   rtx               op0;
18328   rtx               op1;
18329   rtx               op2;
18330   rtx               pat;
18331   int               fcode = DECL_FUNCTION_CODE (fndecl);
18332   size_t            i;
18333   enum machine_mode tmode;
18334   enum machine_mode mode0;
18335   enum machine_mode mode1;
18336   enum machine_mode mode2;
18337 
18338   if (fcode >= ARM_BUILTIN_NEON_BASE)
18339     return arm_expand_neon_builtin (fcode, exp, target);
18340 
18341   switch (fcode)
18342     {
18343     case ARM_BUILTIN_TEXTRMSB:
18344     case ARM_BUILTIN_TEXTRMUB:
18345     case ARM_BUILTIN_TEXTRMSH:
18346     case ARM_BUILTIN_TEXTRMUH:
18347     case ARM_BUILTIN_TEXTRMSW:
18348     case ARM_BUILTIN_TEXTRMUW:
18349       icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18350 	       : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18351 	       : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18352 	       : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18353 	       : CODE_FOR_iwmmxt_textrmw);
18354 
18355       arg0 = CALL_EXPR_ARG (exp, 0);
18356       arg1 = CALL_EXPR_ARG (exp, 1);
18357       op0 = expand_normal (arg0);
18358       op1 = expand_normal (arg1);
18359       tmode = insn_data[icode].operand[0].mode;
18360       mode0 = insn_data[icode].operand[1].mode;
18361       mode1 = insn_data[icode].operand[2].mode;
18362 
18363       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18364 	op0 = copy_to_mode_reg (mode0, op0);
18365       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18366 	{
18367 	  /* @@@ better error message */
18368 	  error ("selector must be an immediate");
18369 	  return gen_reg_rtx (tmode);
18370 	}
18371       if (target == 0
18372 	  || GET_MODE (target) != tmode
18373 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18374 	target = gen_reg_rtx (tmode);
18375       pat = GEN_FCN (icode) (target, op0, op1);
18376       if (! pat)
18377 	return 0;
18378       emit_insn (pat);
18379       return target;
18380 
18381     case ARM_BUILTIN_TINSRB:
18382     case ARM_BUILTIN_TINSRH:
18383     case ARM_BUILTIN_TINSRW:
18384       icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18385 	       : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18386 	       : CODE_FOR_iwmmxt_tinsrw);
18387       arg0 = CALL_EXPR_ARG (exp, 0);
18388       arg1 = CALL_EXPR_ARG (exp, 1);
18389       arg2 = CALL_EXPR_ARG (exp, 2);
18390       op0 = expand_normal (arg0);
18391       op1 = expand_normal (arg1);
18392       op2 = expand_normal (arg2);
18393       tmode = insn_data[icode].operand[0].mode;
18394       mode0 = insn_data[icode].operand[1].mode;
18395       mode1 = insn_data[icode].operand[2].mode;
18396       mode2 = insn_data[icode].operand[3].mode;
18397 
18398       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18399 	op0 = copy_to_mode_reg (mode0, op0);
18400       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18401 	op1 = copy_to_mode_reg (mode1, op1);
18402       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18403 	{
18404 	  /* @@@ better error message */
18405 	  error ("selector must be an immediate");
18406 	  return const0_rtx;
18407 	}
18408       if (target == 0
18409 	  || GET_MODE (target) != tmode
18410 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18411 	target = gen_reg_rtx (tmode);
18412       pat = GEN_FCN (icode) (target, op0, op1, op2);
18413       if (! pat)
18414 	return 0;
18415       emit_insn (pat);
18416       return target;
18417 
18418     case ARM_BUILTIN_SETWCX:
18419       arg0 = CALL_EXPR_ARG (exp, 0);
18420       arg1 = CALL_EXPR_ARG (exp, 1);
18421       op0 = force_reg (SImode, expand_normal (arg0));
18422       op1 = expand_normal (arg1);
18423       emit_insn (gen_iwmmxt_tmcr (op1, op0));
18424       return 0;
18425 
18426     case ARM_BUILTIN_GETWCX:
18427       arg0 = CALL_EXPR_ARG (exp, 0);
18428       op0 = expand_normal (arg0);
18429       target = gen_reg_rtx (SImode);
18430       emit_insn (gen_iwmmxt_tmrc (target, op0));
18431       return target;
18432 
18433     case ARM_BUILTIN_WSHUFH:
18434       icode = CODE_FOR_iwmmxt_wshufh;
18435       arg0 = CALL_EXPR_ARG (exp, 0);
18436       arg1 = CALL_EXPR_ARG (exp, 1);
18437       op0 = expand_normal (arg0);
18438       op1 = expand_normal (arg1);
18439       tmode = insn_data[icode].operand[0].mode;
18440       mode1 = insn_data[icode].operand[1].mode;
18441       mode2 = insn_data[icode].operand[2].mode;
18442 
18443       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18444 	op0 = copy_to_mode_reg (mode1, op0);
18445       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18446 	{
18447 	  /* @@@ better error message */
18448 	  error ("mask must be an immediate");
18449 	  return const0_rtx;
18450 	}
18451       if (target == 0
18452 	  || GET_MODE (target) != tmode
18453 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18454 	target = gen_reg_rtx (tmode);
18455       pat = GEN_FCN (icode) (target, op0, op1);
18456       if (! pat)
18457 	return 0;
18458       emit_insn (pat);
18459       return target;
18460 
18461     case ARM_BUILTIN_WSADB:
18462       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18463     case ARM_BUILTIN_WSADH:
18464       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18465     case ARM_BUILTIN_WSADBZ:
18466       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18467     case ARM_BUILTIN_WSADHZ:
18468       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18469 
18470       /* Several three-argument builtins.  */
18471     case ARM_BUILTIN_WMACS:
18472     case ARM_BUILTIN_WMACU:
18473     case ARM_BUILTIN_WALIGN:
18474     case ARM_BUILTIN_TMIA:
18475     case ARM_BUILTIN_TMIAPH:
18476     case ARM_BUILTIN_TMIATT:
18477     case ARM_BUILTIN_TMIATB:
18478     case ARM_BUILTIN_TMIABT:
18479     case ARM_BUILTIN_TMIABB:
18480       icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18481 	       : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18482 	       : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18483 	       : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18484 	       : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18485 	       : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18486 	       : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18487 	       : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18488 	       : CODE_FOR_iwmmxt_walign);
18489       arg0 = CALL_EXPR_ARG (exp, 0);
18490       arg1 = CALL_EXPR_ARG (exp, 1);
18491       arg2 = CALL_EXPR_ARG (exp, 2);
18492       op0 = expand_normal (arg0);
18493       op1 = expand_normal (arg1);
18494       op2 = expand_normal (arg2);
18495       tmode = insn_data[icode].operand[0].mode;
18496       mode0 = insn_data[icode].operand[1].mode;
18497       mode1 = insn_data[icode].operand[2].mode;
18498       mode2 = insn_data[icode].operand[3].mode;
18499 
18500       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18501 	op0 = copy_to_mode_reg (mode0, op0);
18502       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18503 	op1 = copy_to_mode_reg (mode1, op1);
18504       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18505 	op2 = copy_to_mode_reg (mode2, op2);
18506       if (target == 0
18507 	  || GET_MODE (target) != tmode
18508 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18509 	target = gen_reg_rtx (tmode);
18510       pat = GEN_FCN (icode) (target, op0, op1, op2);
18511       if (! pat)
18512 	return 0;
18513       emit_insn (pat);
18514       return target;
18515 
18516     case ARM_BUILTIN_WZERO:
18517       target = gen_reg_rtx (DImode);
18518       emit_insn (gen_iwmmxt_clrdi (target));
18519       return target;
18520 
18521     case ARM_BUILTIN_THREAD_POINTER:
18522       return arm_load_tp (target);
18523 
18524     default:
18525       break;
18526     }
18527 
18528   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18529     if (d->code == (const enum arm_builtins) fcode)
18530       return arm_expand_binop_builtin (d->icode, exp, target);
18531 
18532   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18533     if (d->code == (const enum arm_builtins) fcode)
18534       return arm_expand_unop_builtin (d->icode, exp, target, 0);
18535 
18536   /* @@@ Should really do something sensible here.  */
18537   return NULL_RTX;
18538 }
18539 
18540 /* Return the number (counting from 0) of
18541    the least significant set bit in MASK.  */
18542 
18543 inline static int
18544 number_of_first_bit_set (unsigned mask)
18545 {
18546   int bit;
18547 
18548   for (bit = 0;
18549        (mask & (1 << bit)) == 0;
18550        ++bit)
18551     continue;
18552 
18553   return bit;
18554 }
18555 
18556 /* Emit code to push or pop registers to or from the stack.  F is the
18557    assembly file.  MASK is the registers to push or pop.  PUSH is
18558    nonzero if we should push, and zero if we should pop.  For debugging
18559    output, if pushing, adjust CFA_OFFSET by the amount of space added
18560    to the stack.  REAL_REGS should have the same number of bits set as
18561    MASK, and will be used instead (in the same order) to describe which
18562    registers were saved - this is used to mark the save slots when we
18563    push high registers after moving them to low registers.  */
18564 static void
18565 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18566 	       unsigned long real_regs)
18567 {
18568   int regno;
18569   int lo_mask = mask & 0xFF;
18570   int pushed_words = 0;
18571 
18572   gcc_assert (mask);
18573 
18574   if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18575     {
18576       /* Special case.  Do not generate a POP PC statement here, do it in
18577 	 thumb_exit() */
18578       thumb_exit (f, -1);
18579       return;
18580     }
18581 
18582   if (ARM_EABI_UNWIND_TABLES && push)
18583     {
18584       fprintf (f, "\t.save\t{");
18585       for (regno = 0; regno < 15; regno++)
18586 	{
18587 	  if (real_regs & (1 << regno))
18588 	    {
18589 	      if (real_regs & ((1 << regno) -1))
18590 		fprintf (f, ", ");
18591 	      asm_fprintf (f, "%r", regno);
18592 	    }
18593 	}
18594       fprintf (f, "}\n");
18595     }
18596 
18597   fprintf (f, "\t%s\t{", push ? "push" : "pop");
18598 
18599   /* Look at the low registers first.  */
18600   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18601     {
18602       if (lo_mask & 1)
18603 	{
18604 	  asm_fprintf (f, "%r", regno);
18605 
18606 	  if ((lo_mask & ~1) != 0)
18607 	    fprintf (f, ", ");
18608 
18609 	  pushed_words++;
18610 	}
18611     }
18612 
18613   if (push && (mask & (1 << LR_REGNUM)))
18614     {
18615       /* Catch pushing the LR.  */
18616       if (mask & 0xFF)
18617 	fprintf (f, ", ");
18618 
18619       asm_fprintf (f, "%r", LR_REGNUM);
18620 
18621       pushed_words++;
18622     }
18623   else if (!push && (mask & (1 << PC_REGNUM)))
18624     {
18625       /* Catch popping the PC.  */
18626       if (TARGET_INTERWORK || TARGET_BACKTRACE
18627 	  || crtl->calls_eh_return)
18628 	{
18629 	  /* The PC is never poped directly, instead
18630 	     it is popped into r3 and then BX is used.  */
18631 	  fprintf (f, "}\n");
18632 
18633 	  thumb_exit (f, -1);
18634 
18635 	  return;
18636 	}
18637       else
18638 	{
18639 	  if (mask & 0xFF)
18640 	    fprintf (f, ", ");
18641 
18642 	  asm_fprintf (f, "%r", PC_REGNUM);
18643 	}
18644     }
18645 
18646   fprintf (f, "}\n");
18647 
18648   if (push && pushed_words && dwarf2out_do_frame ())
18649     {
18650       char *l = dwarf2out_cfi_label (false);
18651       int pushed_mask = real_regs;
18652 
18653       *cfa_offset += pushed_words * 4;
18654       dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18655 
18656       pushed_words = 0;
18657       pushed_mask = real_regs;
18658       for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18659 	{
18660 	  if (pushed_mask & 1)
18661 	    dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18662 	}
18663     }
18664 }
18665 
18666 /* Generate code to return from a thumb function.
18667    If 'reg_containing_return_addr' is -1, then the return address is
18668    actually on the stack, at the stack pointer.  */
18669 static void
18670 thumb_exit (FILE *f, int reg_containing_return_addr)
18671 {
18672   unsigned regs_available_for_popping;
18673   unsigned regs_to_pop;
18674   int pops_needed;
18675   unsigned available;
18676   unsigned required;
18677   int mode;
18678   int size;
18679   int restore_a4 = FALSE;
18680 
18681   /* Compute the registers we need to pop.  */
18682   regs_to_pop = 0;
18683   pops_needed = 0;
18684 
18685   if (reg_containing_return_addr == -1)
18686     {
18687       regs_to_pop |= 1 << LR_REGNUM;
18688       ++pops_needed;
18689     }
18690 
18691   if (TARGET_BACKTRACE)
18692     {
18693       /* Restore the (ARM) frame pointer and stack pointer.  */
18694       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18695       pops_needed += 2;
18696     }
18697 
18698   /* If there is nothing to pop then just emit the BX instruction and
18699      return.  */
18700   if (pops_needed == 0)
18701     {
18702       if (crtl->calls_eh_return)
18703 	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18704 
18705       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18706       return;
18707     }
18708   /* Otherwise if we are not supporting interworking and we have not created
18709      a backtrace structure and the function was not entered in ARM mode then
18710      just pop the return address straight into the PC.  */
18711   else if (!TARGET_INTERWORK
18712 	   && !TARGET_BACKTRACE
18713 	   && !is_called_in_ARM_mode (current_function_decl)
18714 	   && !crtl->calls_eh_return)
18715     {
18716       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18717       return;
18718     }
18719 
18720   /* Find out how many of the (return) argument registers we can corrupt.  */
18721   regs_available_for_popping = 0;
18722 
18723   /* If returning via __builtin_eh_return, the bottom three registers
18724      all contain information needed for the return.  */
18725   if (crtl->calls_eh_return)
18726     size = 12;
18727   else
18728     {
18729       /* If we can deduce the registers used from the function's
18730 	 return value.  This is more reliable that examining
18731 	 df_regs_ever_live_p () because that will be set if the register is
18732 	 ever used in the function, not just if the register is used
18733 	 to hold a return value.  */
18734 
18735       if (crtl->return_rtx != 0)
18736 	mode = GET_MODE (crtl->return_rtx);
18737       else
18738 	mode = DECL_MODE (DECL_RESULT (current_function_decl));
18739 
18740       size = GET_MODE_SIZE (mode);
18741 
18742       if (size == 0)
18743 	{
18744 	  /* In a void function we can use any argument register.
18745 	     In a function that returns a structure on the stack
18746 	     we can use the second and third argument registers.  */
18747 	  if (mode == VOIDmode)
18748 	    regs_available_for_popping =
18749 	      (1 << ARG_REGISTER (1))
18750 	      | (1 << ARG_REGISTER (2))
18751 	      | (1 << ARG_REGISTER (3));
18752 	  else
18753 	    regs_available_for_popping =
18754 	      (1 << ARG_REGISTER (2))
18755 	      | (1 << ARG_REGISTER (3));
18756 	}
18757       else if (size <= 4)
18758 	regs_available_for_popping =
18759 	  (1 << ARG_REGISTER (2))
18760 	  | (1 << ARG_REGISTER (3));
18761       else if (size <= 8)
18762 	regs_available_for_popping =
18763 	  (1 << ARG_REGISTER (3));
18764     }
18765 
18766   /* Match registers to be popped with registers into which we pop them.  */
18767   for (available = regs_available_for_popping,
18768        required  = regs_to_pop;
18769        required != 0 && available != 0;
18770        available &= ~(available & - available),
18771        required  &= ~(required  & - required))
18772     -- pops_needed;
18773 
18774   /* If we have any popping registers left over, remove them.  */
18775   if (available > 0)
18776     regs_available_for_popping &= ~available;
18777 
18778   /* Otherwise if we need another popping register we can use
18779      the fourth argument register.  */
18780   else if (pops_needed)
18781     {
18782       /* If we have not found any free argument registers and
18783 	 reg a4 contains the return address, we must move it.  */
18784       if (regs_available_for_popping == 0
18785 	  && reg_containing_return_addr == LAST_ARG_REGNUM)
18786 	{
18787 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18788 	  reg_containing_return_addr = LR_REGNUM;
18789 	}
18790       else if (size > 12)
18791 	{
18792 	  /* Register a4 is being used to hold part of the return value,
18793 	     but we have dire need of a free, low register.  */
18794 	  restore_a4 = TRUE;
18795 
18796 	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18797 	}
18798 
18799       if (reg_containing_return_addr != LAST_ARG_REGNUM)
18800 	{
18801 	  /* The fourth argument register is available.  */
18802 	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18803 
18804 	  --pops_needed;
18805 	}
18806     }
18807 
18808   /* Pop as many registers as we can.  */
18809   thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18810 		 regs_available_for_popping);
18811 
18812   /* Process the registers we popped.  */
18813   if (reg_containing_return_addr == -1)
18814     {
18815       /* The return address was popped into the lowest numbered register.  */
18816       regs_to_pop &= ~(1 << LR_REGNUM);
18817 
18818       reg_containing_return_addr =
18819 	number_of_first_bit_set (regs_available_for_popping);
18820 
18821       /* Remove this register for the mask of available registers, so that
18822          the return address will not be corrupted by further pops.  */
18823       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18824     }
18825 
18826   /* If we popped other registers then handle them here.  */
18827   if (regs_available_for_popping)
18828     {
18829       int frame_pointer;
18830 
18831       /* Work out which register currently contains the frame pointer.  */
18832       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18833 
18834       /* Move it into the correct place.  */
18835       asm_fprintf (f, "\tmov\t%r, %r\n",
18836 		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18837 
18838       /* (Temporarily) remove it from the mask of popped registers.  */
18839       regs_available_for_popping &= ~(1 << frame_pointer);
18840       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18841 
18842       if (regs_available_for_popping)
18843 	{
18844 	  int stack_pointer;
18845 
18846 	  /* We popped the stack pointer as well,
18847 	     find the register that contains it.  */
18848 	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18849 
18850 	  /* Move it into the stack register.  */
18851 	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18852 
18853 	  /* At this point we have popped all necessary registers, so
18854 	     do not worry about restoring regs_available_for_popping
18855 	     to its correct value:
18856 
18857 	     assert (pops_needed == 0)
18858 	     assert (regs_available_for_popping == (1 << frame_pointer))
18859 	     assert (regs_to_pop == (1 << STACK_POINTER))  */
18860 	}
18861       else
18862 	{
18863 	  /* Since we have just move the popped value into the frame
18864 	     pointer, the popping register is available for reuse, and
18865 	     we know that we still have the stack pointer left to pop.  */
18866 	  regs_available_for_popping |= (1 << frame_pointer);
18867 	}
18868     }
18869 
18870   /* If we still have registers left on the stack, but we no longer have
18871      any registers into which we can pop them, then we must move the return
18872      address into the link register and make available the register that
18873      contained it.  */
18874   if (regs_available_for_popping == 0 && pops_needed > 0)
18875     {
18876       regs_available_for_popping |= 1 << reg_containing_return_addr;
18877 
18878       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18879 		   reg_containing_return_addr);
18880 
18881       reg_containing_return_addr = LR_REGNUM;
18882     }
18883 
18884   /* If we have registers left on the stack then pop some more.
18885      We know that at most we will want to pop FP and SP.  */
18886   if (pops_needed > 0)
18887     {
18888       int  popped_into;
18889       int  move_to;
18890 
18891       thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18892 		     regs_available_for_popping);
18893 
18894       /* We have popped either FP or SP.
18895 	 Move whichever one it is into the correct register.  */
18896       popped_into = number_of_first_bit_set (regs_available_for_popping);
18897       move_to     = number_of_first_bit_set (regs_to_pop);
18898 
18899       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18900 
18901       regs_to_pop &= ~(1 << move_to);
18902 
18903       --pops_needed;
18904     }
18905 
18906   /* If we still have not popped everything then we must have only
18907      had one register available to us and we are now popping the SP.  */
18908   if (pops_needed > 0)
18909     {
18910       int  popped_into;
18911 
18912       thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18913 		     regs_available_for_popping);
18914 
18915       popped_into = number_of_first_bit_set (regs_available_for_popping);
18916 
18917       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
18918       /*
18919 	assert (regs_to_pop == (1 << STACK_POINTER))
18920 	assert (pops_needed == 1)
18921       */
18922     }
18923 
18924   /* If necessary restore the a4 register.  */
18925   if (restore_a4)
18926     {
18927       if (reg_containing_return_addr != LR_REGNUM)
18928 	{
18929 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18930 	  reg_containing_return_addr = LR_REGNUM;
18931 	}
18932 
18933       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
18934     }
18935 
18936   if (crtl->calls_eh_return)
18937     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18938 
18939   /* Return to caller.  */
18940   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18941 }
18942 
18943 
18944 void
18945 thumb1_final_prescan_insn (rtx insn)
18946 {
18947   if (flag_print_asm_name)
18948     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
18949 		 INSN_ADDRESSES (INSN_UID (insn)));
18950 }
18951 
18952 int
18953 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
18954 {
18955   unsigned HOST_WIDE_INT mask = 0xff;
18956   int i;
18957 
18958   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
18959   if (val == 0) /* XXX */
18960     return 0;
18961 
18962   for (i = 0; i < 25; i++)
18963     if ((val & (mask << i)) == val)
18964       return 1;
18965 
18966   return 0;
18967 }
18968 
18969 /* Returns nonzero if the current function contains,
18970    or might contain a far jump.  */
18971 static int
18972 thumb_far_jump_used_p (void)
18973 {
18974   rtx insn;
18975 
18976   /* This test is only important for leaf functions.  */
18977   /* assert (!leaf_function_p ()); */
18978 
18979   /* If we have already decided that far jumps may be used,
18980      do not bother checking again, and always return true even if
18981      it turns out that they are not being used.  Once we have made
18982      the decision that far jumps are present (and that hence the link
18983      register will be pushed onto the stack) we cannot go back on it.  */
18984   if (cfun->machine->far_jump_used)
18985     return 1;
18986 
18987   /* If this function is not being called from the prologue/epilogue
18988      generation code then it must be being called from the
18989      INITIAL_ELIMINATION_OFFSET macro.  */
18990   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
18991     {
18992       /* In this case we know that we are being asked about the elimination
18993 	 of the arg pointer register.  If that register is not being used,
18994 	 then there are no arguments on the stack, and we do not have to
18995 	 worry that a far jump might force the prologue to push the link
18996 	 register, changing the stack offsets.  In this case we can just
18997 	 return false, since the presence of far jumps in the function will
18998 	 not affect stack offsets.
18999 
19000 	 If the arg pointer is live (or if it was live, but has now been
19001 	 eliminated and so set to dead) then we do have to test to see if
19002 	 the function might contain a far jump.  This test can lead to some
19003 	 false negatives, since before reload is completed, then length of
19004 	 branch instructions is not known, so gcc defaults to returning their
19005 	 longest length, which in turn sets the far jump attribute to true.
19006 
19007 	 A false negative will not result in bad code being generated, but it
19008 	 will result in a needless push and pop of the link register.  We
19009 	 hope that this does not occur too often.
19010 
19011 	 If we need doubleword stack alignment this could affect the other
19012 	 elimination offsets so we can't risk getting it wrong.  */
19013       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19014 	cfun->machine->arg_pointer_live = 1;
19015       else if (!cfun->machine->arg_pointer_live)
19016 	return 0;
19017     }
19018 
19019   /* Check to see if the function contains a branch
19020      insn with the far jump attribute set.  */
19021   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19022     {
19023       if (GET_CODE (insn) == JUMP_INSN
19024 	  /* Ignore tablejump patterns.  */
19025 	  && GET_CODE (PATTERN (insn)) != ADDR_VEC
19026 	  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19027 	  && get_attr_far_jump (insn) == FAR_JUMP_YES
19028 	  )
19029 	{
19030 	  /* Record the fact that we have decided that
19031 	     the function does use far jumps.  */
19032 	  cfun->machine->far_jump_used = 1;
19033 	  return 1;
19034 	}
19035     }
19036 
19037   return 0;
19038 }
19039 
19040 /* Return nonzero if FUNC must be entered in ARM mode.  */
19041 int
19042 is_called_in_ARM_mode (tree func)
19043 {
19044   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
19045 
19046   /* Ignore the problem about functions whose address is taken.  */
19047   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
19048     return TRUE;
19049 
19050 #ifdef ARM_PE
19051   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
19052 #else
19053   return FALSE;
19054 #endif
19055 }
19056 
19057 /* The bits which aren't usefully expanded as rtl.  */
19058 const char *
19059 thumb_unexpanded_epilogue (void)
19060 {
19061   arm_stack_offsets *offsets;
19062   int regno;
19063   unsigned long live_regs_mask = 0;
19064   int high_regs_pushed = 0;
19065   int had_to_push_lr;
19066   int size;
19067 
19068   if (cfun->machine->return_used_this_function != 0)
19069     return "";
19070 
19071   if (IS_NAKED (arm_current_func_type ()))
19072     return "";
19073 
19074   offsets = arm_get_frame_offsets ();
19075   live_regs_mask = offsets->saved_regs_mask;
19076   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19077 
19078   /* If we can deduce the registers used from the function's return value.
19079      This is more reliable that examining df_regs_ever_live_p () because that
19080      will be set if the register is ever used in the function, not just if
19081      the register is used to hold a return value.  */
19082   size = arm_size_return_regs ();
19083 
19084   /* The prolog may have pushed some high registers to use as
19085      work registers.  e.g. the testsuite file:
19086      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19087      compiles to produce:
19088 	push	{r4, r5, r6, r7, lr}
19089 	mov	r7, r9
19090 	mov	r6, r8
19091 	push	{r6, r7}
19092      as part of the prolog.  We have to undo that pushing here.  */
19093 
19094   if (high_regs_pushed)
19095     {
19096       unsigned long mask = live_regs_mask & 0xff;
19097       int next_hi_reg;
19098 
19099       /* The available low registers depend on the size of the value we are
19100          returning.  */
19101       if (size <= 12)
19102 	mask |=  1 << 3;
19103       if (size <= 8)
19104 	mask |= 1 << 2;
19105 
19106       if (mask == 0)
19107 	/* Oh dear!  We have no low registers into which we can pop
19108            high registers!  */
19109 	internal_error
19110 	  ("no low registers available for popping high registers");
19111 
19112       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19113 	if (live_regs_mask & (1 << next_hi_reg))
19114 	  break;
19115 
19116       while (high_regs_pushed)
19117 	{
19118 	  /* Find lo register(s) into which the high register(s) can
19119              be popped.  */
19120 	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19121 	    {
19122 	      if (mask & (1 << regno))
19123 		high_regs_pushed--;
19124 	      if (high_regs_pushed == 0)
19125 		break;
19126 	    }
19127 
19128 	  mask &= (2 << regno) - 1;	/* A noop if regno == 8 */
19129 
19130 	  /* Pop the values into the low register(s).  */
19131 	  thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19132 
19133 	  /* Move the value(s) into the high registers.  */
19134 	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19135 	    {
19136 	      if (mask & (1 << regno))
19137 		{
19138 		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19139 			       regno);
19140 
19141 		  for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19142 		    if (live_regs_mask & (1 << next_hi_reg))
19143 		      break;
19144 		}
19145 	    }
19146 	}
19147       live_regs_mask &= ~0x0f00;
19148     }
19149 
19150   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19151   live_regs_mask &= 0xff;
19152 
19153   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19154     {
19155       /* Pop the return address into the PC.  */
19156       if (had_to_push_lr)
19157 	live_regs_mask |= 1 << PC_REGNUM;
19158 
19159       /* Either no argument registers were pushed or a backtrace
19160 	 structure was created which includes an adjusted stack
19161 	 pointer, so just pop everything.  */
19162       if (live_regs_mask)
19163 	thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19164 		       live_regs_mask);
19165 
19166       /* We have either just popped the return address into the
19167 	 PC or it is was kept in LR for the entire function.  */
19168       if (!had_to_push_lr)
19169 	thumb_exit (asm_out_file, LR_REGNUM);
19170     }
19171   else
19172     {
19173       /* Pop everything but the return address.  */
19174       if (live_regs_mask)
19175 	thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19176 		       live_regs_mask);
19177 
19178       if (had_to_push_lr)
19179 	{
19180 	  if (size > 12)
19181 	    {
19182 	      /* We have no free low regs, so save one.  */
19183 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19184 			   LAST_ARG_REGNUM);
19185 	    }
19186 
19187 	  /* Get the return address into a temporary register.  */
19188 	  thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19189 			 1 << LAST_ARG_REGNUM);
19190 
19191 	  if (size > 12)
19192 	    {
19193 	      /* Move the return address to lr.  */
19194 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19195 			   LAST_ARG_REGNUM);
19196 	      /* Restore the low register.  */
19197 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19198 			   IP_REGNUM);
19199 	      regno = LR_REGNUM;
19200 	    }
19201 	  else
19202 	    regno = LAST_ARG_REGNUM;
19203 	}
19204       else
19205 	regno = LR_REGNUM;
19206 
19207       /* Remove the argument registers that were pushed onto the stack.  */
19208       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19209 		   SP_REGNUM, SP_REGNUM,
19210 		   crtl->args.pretend_args_size);
19211 
19212       thumb_exit (asm_out_file, regno);
19213     }
19214 
19215   return "";
19216 }
19217 
19218 /* Functions to save and restore machine-specific function data.  */
19219 static struct machine_function *
19220 arm_init_machine_status (void)
19221 {
19222   struct machine_function *machine;
19223   machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
19224 
19225 #if ARM_FT_UNKNOWN != 0
19226   machine->func_type = ARM_FT_UNKNOWN;
19227 #endif
19228   return machine;
19229 }
19230 
19231 /* Return an RTX indicating where the return address to the
19232    calling function can be found.  */
19233 rtx
19234 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19235 {
19236   if (count != 0)
19237     return NULL_RTX;
19238 
19239   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19240 }
19241 
19242 /* Do anything needed before RTL is emitted for each function.  */
19243 void
19244 arm_init_expanders (void)
19245 {
19246   /* Arrange to initialize and mark the machine per-function status.  */
19247   init_machine_status = arm_init_machine_status;
19248 
19249   /* This is to stop the combine pass optimizing away the alignment
19250      adjustment of va_arg.  */
19251   /* ??? It is claimed that this should not be necessary.  */
19252   if (cfun)
19253     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19254 }
19255 
19256 
19257 /* Like arm_compute_initial_elimination offset.  Simpler because there
19258    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
19259    to point at the base of the local variables after static stack
19260    space for a function has been allocated.  */
19261 
19262 HOST_WIDE_INT
19263 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19264 {
19265   arm_stack_offsets *offsets;
19266 
19267   offsets = arm_get_frame_offsets ();
19268 
19269   switch (from)
19270     {
19271     case ARG_POINTER_REGNUM:
19272       switch (to)
19273 	{
19274 	case STACK_POINTER_REGNUM:
19275 	  return offsets->outgoing_args - offsets->saved_args;
19276 
19277 	case FRAME_POINTER_REGNUM:
19278 	  return offsets->soft_frame - offsets->saved_args;
19279 
19280 	case ARM_HARD_FRAME_POINTER_REGNUM:
19281 	  return offsets->saved_regs - offsets->saved_args;
19282 
19283 	case THUMB_HARD_FRAME_POINTER_REGNUM:
19284 	  return offsets->locals_base - offsets->saved_args;
19285 
19286 	default:
19287 	  gcc_unreachable ();
19288 	}
19289       break;
19290 
19291     case FRAME_POINTER_REGNUM:
19292       switch (to)
19293 	{
19294 	case STACK_POINTER_REGNUM:
19295 	  return offsets->outgoing_args - offsets->soft_frame;
19296 
19297 	case ARM_HARD_FRAME_POINTER_REGNUM:
19298 	  return offsets->saved_regs - offsets->soft_frame;
19299 
19300 	case THUMB_HARD_FRAME_POINTER_REGNUM:
19301 	  return offsets->locals_base - offsets->soft_frame;
19302 
19303 	default:
19304 	  gcc_unreachable ();
19305 	}
19306       break;
19307 
19308     default:
19309       gcc_unreachable ();
19310     }
19311 }
19312 
19313 /* Generate the rest of a function's prologue.  */
19314 void
19315 thumb1_expand_prologue (void)
19316 {
19317   rtx insn, dwarf;
19318 
19319   HOST_WIDE_INT amount;
19320   arm_stack_offsets *offsets;
19321   unsigned long func_type;
19322   int regno;
19323   unsigned long live_regs_mask;
19324 
19325   func_type = arm_current_func_type ();
19326 
19327   /* Naked functions don't have prologues.  */
19328   if (IS_NAKED (func_type))
19329     return;
19330 
19331   if (IS_INTERRUPT (func_type))
19332     {
19333       error ("interrupt Service Routines cannot be coded in Thumb mode");
19334       return;
19335     }
19336 
19337   offsets = arm_get_frame_offsets ();
19338   live_regs_mask = offsets->saved_regs_mask;
19339   /* Load the pic register before setting the frame pointer,
19340      so we can use r7 as a temporary work register.  */
19341   if (flag_pic && arm_pic_register != INVALID_REGNUM)
19342     arm_load_pic_register (live_regs_mask);
19343 
19344   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19345     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19346 		    stack_pointer_rtx);
19347 
19348   amount = offsets->outgoing_args - offsets->saved_regs;
19349   if (amount)
19350     {
19351       if (amount < 512)
19352 	{
19353 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19354 					GEN_INT (- amount)));
19355 	  RTX_FRAME_RELATED_P (insn) = 1;
19356 	}
19357       else
19358 	{
19359 	  rtx reg;
19360 
19361 	  /* The stack decrement is too big for an immediate value in a single
19362 	     insn.  In theory we could issue multiple subtracts, but after
19363 	     three of them it becomes more space efficient to place the full
19364 	     value in the constant pool and load into a register.  (Also the
19365 	     ARM debugger really likes to see only one stack decrement per
19366 	     function).  So instead we look for a scratch register into which
19367 	     we can load the decrement, and then we subtract this from the
19368 	     stack pointer.  Unfortunately on the thumb the only available
19369 	     scratch registers are the argument registers, and we cannot use
19370 	     these as they may hold arguments to the function.  Instead we
19371 	     attempt to locate a call preserved register which is used by this
19372 	     function.  If we can find one, then we know that it will have
19373 	     been pushed at the start of the prologue and so we can corrupt
19374 	     it now.  */
19375 	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19376 	    if (live_regs_mask & (1 << regno))
19377 	      break;
19378 
19379 	  gcc_assert(regno <= LAST_LO_REGNUM);
19380 
19381 	  reg = gen_rtx_REG (SImode, regno);
19382 
19383 	  emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19384 
19385 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19386 					stack_pointer_rtx, reg));
19387 	  RTX_FRAME_RELATED_P (insn) = 1;
19388 	  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19389 			       plus_constant (stack_pointer_rtx,
19390 					      -amount));
19391 	  RTX_FRAME_RELATED_P (dwarf) = 1;
19392 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19393 	}
19394     }
19395 
19396   if (frame_pointer_needed)
19397     thumb_set_frame_pointer (offsets);
19398 
19399   /* If we are profiling, make sure no instructions are scheduled before
19400      the call to mcount.  Similarly if the user has requested no
19401      scheduling in the prolog.  Similarly if we want non-call exceptions
19402      using the EABI unwinder, to prevent faulting instructions from being
19403      swapped with a stack adjustment.  */
19404   if (crtl->profile || !TARGET_SCHED_PROLOG
19405       || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
19406     emit_insn (gen_blockage ());
19407 
19408   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19409   if (live_regs_mask & 0xff)
19410     cfun->machine->lr_save_eliminated = 0;
19411 }
19412 
19413 
19414 void
19415 thumb1_expand_epilogue (void)
19416 {
19417   HOST_WIDE_INT amount;
19418   arm_stack_offsets *offsets;
19419   int regno;
19420 
19421   /* Naked functions don't have prologues.  */
19422   if (IS_NAKED (arm_current_func_type ()))
19423     return;
19424 
19425   offsets = arm_get_frame_offsets ();
19426   amount = offsets->outgoing_args - offsets->saved_regs;
19427 
19428   if (frame_pointer_needed)
19429     {
19430       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19431       amount = offsets->locals_base - offsets->saved_regs;
19432     }
19433 
19434   gcc_assert (amount >= 0);
19435   if (amount)
19436     {
19437       if (amount < 512)
19438 	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19439 			       GEN_INT (amount)));
19440       else
19441 	{
19442 	  /* r3 is always free in the epilogue.  */
19443 	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19444 
19445 	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
19446 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19447 	}
19448     }
19449 
19450   /* Emit a USE (stack_pointer_rtx), so that
19451      the stack adjustment will not be deleted.  */
19452   emit_insn (gen_prologue_use (stack_pointer_rtx));
19453 
19454   if (crtl->profile || !TARGET_SCHED_PROLOG)
19455     emit_insn (gen_blockage ());
19456 
19457   /* Emit a clobber for each insn that will be restored in the epilogue,
19458      so that flow2 will get register lifetimes correct.  */
19459   for (regno = 0; regno < 13; regno++)
19460     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19461       emit_clobber (gen_rtx_REG (SImode, regno));
19462 
19463   if (! df_regs_ever_live_p (LR_REGNUM))
19464     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19465 }
19466 
19467 static void
19468 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19469 {
19470   arm_stack_offsets *offsets;
19471   unsigned long live_regs_mask = 0;
19472   unsigned long l_mask;
19473   unsigned high_regs_pushed = 0;
19474   int cfa_offset = 0;
19475   int regno;
19476 
19477   if (IS_NAKED (arm_current_func_type ()))
19478     return;
19479 
19480   if (is_called_in_ARM_mode (current_function_decl))
19481     {
19482       const char * name;
19483 
19484       gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19485       gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19486 		  == SYMBOL_REF);
19487       name = XSTR  (XEXP (DECL_RTL (current_function_decl), 0), 0);
19488 
19489       /* Generate code sequence to switch us into Thumb mode.  */
19490       /* The .code 32 directive has already been emitted by
19491 	 ASM_DECLARE_FUNCTION_NAME.  */
19492       asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19493       asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19494 
19495       /* Generate a label, so that the debugger will notice the
19496 	 change in instruction sets.  This label is also used by
19497 	 the assembler to bypass the ARM code when this function
19498 	 is called from a Thumb encoded function elsewhere in the
19499 	 same file.  Hence the definition of STUB_NAME here must
19500 	 agree with the definition in gas/config/tc-arm.c.  */
19501 
19502 #define STUB_NAME ".real_start_of"
19503 
19504       fprintf (f, "\t.code\t16\n");
19505 #ifdef ARM_PE
19506       if (arm_dllexport_name_p (name))
19507         name = arm_strip_name_encoding (name);
19508 #endif
19509       asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19510       fprintf (f, "\t.thumb_func\n");
19511       asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19512     }
19513 
19514   if (crtl->args.pretend_args_size)
19515     {
19516       /* Output unwind directive for the stack adjustment.  */
19517       if (ARM_EABI_UNWIND_TABLES)
19518 	fprintf (f, "\t.pad #%d\n",
19519 		 crtl->args.pretend_args_size);
19520 
19521       if (cfun->machine->uses_anonymous_args)
19522 	{
19523 	  int num_pushes;
19524 
19525 	  fprintf (f, "\tpush\t{");
19526 
19527 	  num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19528 
19529 	  for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19530 	       regno <= LAST_ARG_REGNUM;
19531 	       regno++)
19532 	    asm_fprintf (f, "%r%s", regno,
19533 			 regno == LAST_ARG_REGNUM ? "" : ", ");
19534 
19535 	  fprintf (f, "}\n");
19536 	}
19537       else
19538 	asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19539 		     SP_REGNUM, SP_REGNUM,
19540 		     crtl->args.pretend_args_size);
19541 
19542       /* We don't need to record the stores for unwinding (would it
19543 	 help the debugger any if we did?), but record the change in
19544 	 the stack pointer.  */
19545       if (dwarf2out_do_frame ())
19546 	{
19547 	  char *l = dwarf2out_cfi_label (false);
19548 
19549 	  cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19550 	  dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19551 	}
19552     }
19553 
19554   /* Get the registers we are going to push.  */
19555   offsets = arm_get_frame_offsets ();
19556   live_regs_mask = offsets->saved_regs_mask;
19557   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
19558   l_mask = live_regs_mask & 0x40ff;
19559   /* Then count how many other high registers will need to be pushed.  */
19560   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19561 
19562   if (TARGET_BACKTRACE)
19563     {
19564       unsigned offset;
19565       unsigned work_register;
19566 
19567       /* We have been asked to create a stack backtrace structure.
19568          The code looks like this:
19569 
19570 	 0   .align 2
19571 	 0   func:
19572          0     sub   SP, #16         Reserve space for 4 registers.
19573 	 2     push  {R7}            Push low registers.
19574          4     add   R7, SP, #20     Get the stack pointer before the push.
19575          6     str   R7, [SP, #8]    Store the stack pointer (before reserving the space).
19576          8     mov   R7, PC          Get hold of the start of this code plus 12.
19577         10     str   R7, [SP, #16]   Store it.
19578         12     mov   R7, FP          Get hold of the current frame pointer.
19579         14     str   R7, [SP, #4]    Store it.
19580         16     mov   R7, LR          Get hold of the current return address.
19581         18     str   R7, [SP, #12]   Store it.
19582         20     add   R7, SP, #16     Point at the start of the backtrace structure.
19583         22     mov   FP, R7          Put this value into the frame pointer.  */
19584 
19585       work_register = thumb_find_work_register (live_regs_mask);
19586 
19587       if (ARM_EABI_UNWIND_TABLES)
19588 	asm_fprintf (f, "\t.pad #16\n");
19589 
19590       asm_fprintf
19591 	(f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19592 	 SP_REGNUM, SP_REGNUM);
19593 
19594       if (dwarf2out_do_frame ())
19595 	{
19596 	  char *l = dwarf2out_cfi_label (false);
19597 
19598 	  cfa_offset = cfa_offset + 16;
19599 	  dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19600 	}
19601 
19602       if (l_mask)
19603 	{
19604 	  thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19605 	  offset = bit_count (l_mask) * UNITS_PER_WORD;
19606 	}
19607       else
19608 	offset = 0;
19609 
19610       asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19611 		   offset + 16 + crtl->args.pretend_args_size);
19612 
19613       asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19614 		   offset + 4);
19615 
19616       /* Make sure that the instruction fetching the PC is in the right place
19617 	 to calculate "start of backtrace creation code + 12".  */
19618       if (l_mask)
19619 	{
19620 	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19621 	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19622 		       offset + 12);
19623 	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19624 		       ARM_HARD_FRAME_POINTER_REGNUM);
19625 	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19626 		       offset);
19627 	}
19628       else
19629 	{
19630 	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19631 		       ARM_HARD_FRAME_POINTER_REGNUM);
19632 	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19633 		       offset);
19634 	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19635 	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19636 		       offset + 12);
19637 	}
19638 
19639       asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19640       asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19641 		   offset + 8);
19642       asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19643 		   offset + 12);
19644       asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19645 		   ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19646     }
19647   /* Optimization:  If we are not pushing any low registers but we are going
19648      to push some high registers then delay our first push.  This will just
19649      be a push of LR and we can combine it with the push of the first high
19650      register.  */
19651   else if ((l_mask & 0xff) != 0
19652 	   || (high_regs_pushed == 0 && l_mask))
19653     thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19654 
19655   if (high_regs_pushed)
19656     {
19657       unsigned pushable_regs;
19658       unsigned next_hi_reg;
19659 
19660       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19661 	if (live_regs_mask & (1 << next_hi_reg))
19662 	  break;
19663 
19664       pushable_regs = l_mask & 0xff;
19665 
19666       if (pushable_regs == 0)
19667 	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19668 
19669       while (high_regs_pushed > 0)
19670 	{
19671 	  unsigned long real_regs_mask = 0;
19672 
19673 	  for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19674 	    {
19675 	      if (pushable_regs & (1 << regno))
19676 		{
19677 		  asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19678 
19679 		  high_regs_pushed --;
19680 		  real_regs_mask |= (1 << next_hi_reg);
19681 
19682 		  if (high_regs_pushed)
19683 		    {
19684 		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19685 			   next_hi_reg --)
19686 			if (live_regs_mask & (1 << next_hi_reg))
19687 			  break;
19688 		    }
19689 		  else
19690 		    {
19691 		      pushable_regs &= ~((1 << regno) - 1);
19692 		      break;
19693 		    }
19694 		}
19695 	    }
19696 
19697 	  /* If we had to find a work register and we have not yet
19698 	     saved the LR then add it to the list of regs to push.  */
19699 	  if (l_mask == (1 << LR_REGNUM))
19700 	    {
19701 	      thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19702 			     1, &cfa_offset,
19703 			     real_regs_mask | (1 << LR_REGNUM));
19704 	      l_mask = 0;
19705 	    }
19706 	  else
19707 	    thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19708 	}
19709     }
19710 }
19711 
19712 /* Handle the case of a double word load into a low register from
19713    a computed memory address.  The computed address may involve a
19714    register which is overwritten by the load.  */
19715 const char *
19716 thumb_load_double_from_address (rtx *operands)
19717 {
19718   rtx addr;
19719   rtx base;
19720   rtx offset;
19721   rtx arg1;
19722   rtx arg2;
19723 
19724   gcc_assert (GET_CODE (operands[0]) == REG);
19725   gcc_assert (GET_CODE (operands[1]) == MEM);
19726 
19727   /* Get the memory address.  */
19728   addr = XEXP (operands[1], 0);
19729 
19730   /* Work out how the memory address is computed.  */
19731   switch (GET_CODE (addr))
19732     {
19733     case REG:
19734       operands[2] = adjust_address (operands[1], SImode, 4);
19735 
19736       if (REGNO (operands[0]) == REGNO (addr))
19737 	{
19738 	  output_asm_insn ("ldr\t%H0, %2", operands);
19739 	  output_asm_insn ("ldr\t%0, %1", operands);
19740 	}
19741       else
19742 	{
19743 	  output_asm_insn ("ldr\t%0, %1", operands);
19744 	  output_asm_insn ("ldr\t%H0, %2", operands);
19745 	}
19746       break;
19747 
19748     case CONST:
19749       /* Compute <address> + 4 for the high order load.  */
19750       operands[2] = adjust_address (operands[1], SImode, 4);
19751 
19752       output_asm_insn ("ldr\t%0, %1", operands);
19753       output_asm_insn ("ldr\t%H0, %2", operands);
19754       break;
19755 
19756     case PLUS:
19757       arg1   = XEXP (addr, 0);
19758       arg2   = XEXP (addr, 1);
19759 
19760       if (CONSTANT_P (arg1))
19761 	base = arg2, offset = arg1;
19762       else
19763 	base = arg1, offset = arg2;
19764 
19765       gcc_assert (GET_CODE (base) == REG);
19766 
19767       /* Catch the case of <address> = <reg> + <reg> */
19768       if (GET_CODE (offset) == REG)
19769 	{
19770 	  int reg_offset = REGNO (offset);
19771 	  int reg_base   = REGNO (base);
19772 	  int reg_dest   = REGNO (operands[0]);
19773 
19774 	  /* Add the base and offset registers together into the
19775              higher destination register.  */
19776 	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19777 		       reg_dest + 1, reg_base, reg_offset);
19778 
19779 	  /* Load the lower destination register from the address in
19780              the higher destination register.  */
19781 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19782 		       reg_dest, reg_dest + 1);
19783 
19784 	  /* Load the higher destination register from its own address
19785              plus 4.  */
19786 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19787 		       reg_dest + 1, reg_dest + 1);
19788 	}
19789       else
19790 	{
19791 	  /* Compute <address> + 4 for the high order load.  */
19792 	  operands[2] = adjust_address (operands[1], SImode, 4);
19793 
19794 	  /* If the computed address is held in the low order register
19795 	     then load the high order register first, otherwise always
19796 	     load the low order register first.  */
19797 	  if (REGNO (operands[0]) == REGNO (base))
19798 	    {
19799 	      output_asm_insn ("ldr\t%H0, %2", operands);
19800 	      output_asm_insn ("ldr\t%0, %1", operands);
19801 	    }
19802 	  else
19803 	    {
19804 	      output_asm_insn ("ldr\t%0, %1", operands);
19805 	      output_asm_insn ("ldr\t%H0, %2", operands);
19806 	    }
19807 	}
19808       break;
19809 
19810     case LABEL_REF:
19811       /* With no registers to worry about we can just load the value
19812          directly.  */
19813       operands[2] = adjust_address (operands[1], SImode, 4);
19814 
19815       output_asm_insn ("ldr\t%H0, %2", operands);
19816       output_asm_insn ("ldr\t%0, %1", operands);
19817       break;
19818 
19819     default:
19820       gcc_unreachable ();
19821     }
19822 
19823   return "";
19824 }
19825 
19826 const char *
19827 thumb_output_move_mem_multiple (int n, rtx *operands)
19828 {
19829   rtx tmp;
19830 
19831   switch (n)
19832     {
19833     case 2:
19834       if (REGNO (operands[4]) > REGNO (operands[5]))
19835 	{
19836 	  tmp = operands[4];
19837 	  operands[4] = operands[5];
19838 	  operands[5] = tmp;
19839 	}
19840       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19841       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19842       break;
19843 
19844     case 3:
19845       if (REGNO (operands[4]) > REGNO (operands[5]))
19846 	{
19847 	  tmp = operands[4];
19848 	  operands[4] = operands[5];
19849 	  operands[5] = tmp;
19850 	}
19851       if (REGNO (operands[5]) > REGNO (operands[6]))
19852 	{
19853 	  tmp = operands[5];
19854 	  operands[5] = operands[6];
19855 	  operands[6] = tmp;
19856 	}
19857       if (REGNO (operands[4]) > REGNO (operands[5]))
19858 	{
19859 	  tmp = operands[4];
19860 	  operands[4] = operands[5];
19861 	  operands[5] = tmp;
19862 	}
19863 
19864       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
19865       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
19866       break;
19867 
19868     default:
19869       gcc_unreachable ();
19870     }
19871 
19872   return "";
19873 }
19874 
19875 /* Output a call-via instruction for thumb state.  */
19876 const char *
19877 thumb_call_via_reg (rtx reg)
19878 {
19879   int regno = REGNO (reg);
19880   rtx *labelp;
19881 
19882   gcc_assert (regno < LR_REGNUM);
19883 
19884   /* If we are in the normal text section we can use a single instance
19885      per compilation unit.  If we are doing function sections, then we need
19886      an entry per section, since we can't rely on reachability.  */
19887   if (in_section == text_section)
19888     {
19889       thumb_call_reg_needed = 1;
19890 
19891       if (thumb_call_via_label[regno] == NULL)
19892 	thumb_call_via_label[regno] = gen_label_rtx ();
19893       labelp = thumb_call_via_label + regno;
19894     }
19895   else
19896     {
19897       if (cfun->machine->call_via[regno] == NULL)
19898 	cfun->machine->call_via[regno] = gen_label_rtx ();
19899       labelp = cfun->machine->call_via + regno;
19900     }
19901 
19902   output_asm_insn ("bl\t%a0", labelp);
19903   return "";
19904 }
19905 
19906 /* Routines for generating rtl.  */
19907 void
19908 thumb_expand_movmemqi (rtx *operands)
19909 {
19910   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
19911   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
19912   HOST_WIDE_INT len = INTVAL (operands[2]);
19913   HOST_WIDE_INT offset = 0;
19914 
19915   while (len >= 12)
19916     {
19917       emit_insn (gen_movmem12b (out, in, out, in));
19918       len -= 12;
19919     }
19920 
19921   if (len >= 8)
19922     {
19923       emit_insn (gen_movmem8b (out, in, out, in));
19924       len -= 8;
19925     }
19926 
19927   if (len >= 4)
19928     {
19929       rtx reg = gen_reg_rtx (SImode);
19930       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
19931       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
19932       len -= 4;
19933       offset += 4;
19934     }
19935 
19936   if (len >= 2)
19937     {
19938       rtx reg = gen_reg_rtx (HImode);
19939       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
19940 					      plus_constant (in, offset))));
19941       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
19942 			    reg));
19943       len -= 2;
19944       offset += 2;
19945     }
19946 
19947   if (len)
19948     {
19949       rtx reg = gen_reg_rtx (QImode);
19950       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
19951 					      plus_constant (in, offset))));
19952       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
19953 			    reg));
19954     }
19955 }
19956 
19957 void
19958 thumb_reload_out_hi (rtx *operands)
19959 {
19960   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
19961 }
19962 
19963 /* Handle reading a half-word from memory during reload.  */
19964 void
19965 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
19966 {
19967   gcc_unreachable ();
19968 }
19969 
19970 /* Return the length of a function name prefix
19971     that starts with the character 'c'.  */
19972 static int
19973 arm_get_strip_length (int c)
19974 {
19975   switch (c)
19976     {
19977     ARM_NAME_ENCODING_LENGTHS
19978       default: return 0;
19979     }
19980 }
19981 
19982 /* Return a pointer to a function's name with any
19983    and all prefix encodings stripped from it.  */
19984 const char *
19985 arm_strip_name_encoding (const char *name)
19986 {
19987   int skip;
19988 
19989   while ((skip = arm_get_strip_length (* name)))
19990     name += skip;
19991 
19992   return name;
19993 }
19994 
19995 /* If there is a '*' anywhere in the name's prefix, then
19996    emit the stripped name verbatim, otherwise prepend an
19997    underscore if leading underscores are being used.  */
19998 void
19999 arm_asm_output_labelref (FILE *stream, const char *name)
20000 {
20001   int skip;
20002   int verbatim = 0;
20003 
20004   while ((skip = arm_get_strip_length (* name)))
20005     {
20006       verbatim |= (*name == '*');
20007       name += skip;
20008     }
20009 
20010   if (verbatim)
20011     fputs (name, stream);
20012   else
20013     asm_fprintf (stream, "%U%s", name);
20014 }
20015 
20016 static void
20017 arm_file_start (void)
20018 {
20019   int val;
20020 
20021   if (TARGET_UNIFIED_ASM)
20022     asm_fprintf (asm_out_file, "\t.syntax unified\n");
20023 
20024   if (TARGET_BPABI)
20025     {
20026       const char *fpu_name;
20027       if (arm_select[0].string)
20028 	asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
20029       else if (arm_select[1].string)
20030 	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
20031       else
20032 	asm_fprintf (asm_out_file, "\t.cpu %s\n",
20033 		     all_cores[arm_default_cpu].name);
20034 
20035       if (TARGET_SOFT_FLOAT)
20036 	{
20037 	  if (TARGET_VFP)
20038 	    fpu_name = "softvfp";
20039 	  else
20040 	    fpu_name = "softfpa";
20041 	}
20042       else
20043 	{
20044 	  fpu_name = arm_fpu_desc->name;
20045 	  if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
20046 	    {
20047 	      if (TARGET_HARD_FLOAT)
20048 		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
20049 	      if (TARGET_HARD_FLOAT_ABI)
20050 		asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
20051 	    }
20052 	}
20053       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
20054 
20055       /* Some of these attributes only apply when the corresponding features
20056          are used.  However we don't have any easy way of figuring this out.
20057 	 Conservatively record the setting that would have been used.  */
20058 
20059       /* Tag_ABI_FP_rounding.  */
20060       if (flag_rounding_math)
20061 	asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
20062       if (!flag_unsafe_math_optimizations)
20063 	{
20064 	  /* Tag_ABI_FP_denomal.  */
20065 	  asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
20066 	  /* Tag_ABI_FP_exceptions.  */
20067 	  asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
20068 	}
20069       /* Tag_ABI_FP_user_exceptions.  */
20070       if (flag_signaling_nans)
20071 	asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
20072       /* Tag_ABI_FP_number_model.  */
20073       asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
20074 		   flag_finite_math_only ? 1 : 3);
20075 
20076       /* Tag_ABI_align8_needed.  */
20077       asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
20078       /* Tag_ABI_align8_preserved.  */
20079       asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20080       /* Tag_ABI_enum_size.  */
20081       asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20082 		   flag_short_enums ? 1 : 2);
20083 
20084       /* Tag_ABI_optimization_goals.  */
20085       if (optimize_size)
20086 	val = 4;
20087       else if (optimize >= 2)
20088 	val = 2;
20089       else if (optimize)
20090 	val = 1;
20091       else
20092 	val = 6;
20093       asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20094 
20095       /* Tag_ABI_FP_16bit_format.  */
20096       if (arm_fp16_format)
20097 	asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20098 		     (int)arm_fp16_format);
20099 
20100       if (arm_lang_output_object_attributes_hook)
20101 	arm_lang_output_object_attributes_hook();
20102     }
20103   default_file_start();
20104 }
20105 
20106 static void
20107 arm_file_end (void)
20108 {
20109   int regno;
20110 
20111   if (NEED_INDICATE_EXEC_STACK)
20112     /* Add .note.GNU-stack.  */
20113     file_end_indicate_exec_stack ();
20114 
20115   if (! thumb_call_reg_needed)
20116     return;
20117 
20118   switch_to_section (text_section);
20119   asm_fprintf (asm_out_file, "\t.code 16\n");
20120   ASM_OUTPUT_ALIGN (asm_out_file, 1);
20121 
20122   for (regno = 0; regno < LR_REGNUM; regno++)
20123     {
20124       rtx label = thumb_call_via_label[regno];
20125 
20126       if (label != 0)
20127 	{
20128 	  targetm.asm_out.internal_label (asm_out_file, "L",
20129 					  CODE_LABEL_NUMBER (label));
20130 	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20131 	}
20132     }
20133 }
20134 
20135 #ifndef ARM_PE
20136 /* Symbols in the text segment can be accessed without indirecting via the
20137    constant pool; it may take an extra binary operation, but this is still
20138    faster than indirecting via memory.  Don't do this when not optimizing,
20139    since we won't be calculating al of the offsets necessary to do this
20140    simplification.  */
20141 
20142 static void
20143 arm_encode_section_info (tree decl, rtx rtl, int first)
20144 {
20145   if (optimize > 0 && TREE_CONSTANT (decl))
20146     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20147 
20148   default_encode_section_info (decl, rtl, first);
20149 }
20150 #endif /* !ARM_PE */
20151 
20152 static void
20153 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20154 {
20155   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20156       && !strcmp (prefix, "L"))
20157     {
20158       arm_ccfsm_state = 0;
20159       arm_target_insn = NULL;
20160     }
20161   default_internal_label (stream, prefix, labelno);
20162 }
20163 
20164 /* Output code to add DELTA to the first argument, and then jump
20165    to FUNCTION.  Used for C++ multiple inheritance.  */
20166 static void
20167 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20168 		     HOST_WIDE_INT delta,
20169 		     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20170 		     tree function)
20171 {
20172   static int thunk_label = 0;
20173   char label[256];
20174   char labelpc[256];
20175   int mi_delta = delta;
20176   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20177   int shift = 0;
20178   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20179                     ? 1 : 0);
20180   if (mi_delta < 0)
20181     mi_delta = - mi_delta;
20182 
20183   if (TARGET_THUMB1)
20184     {
20185       int labelno = thunk_label++;
20186       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20187       /* Thunks are entered in arm mode when avaiable.  */
20188       if (TARGET_THUMB1_ONLY)
20189 	{
20190 	  /* push r3 so we can use it as a temporary.  */
20191 	  /* TODO: Omit this save if r3 is not used.  */
20192 	  fputs ("\tpush {r3}\n", file);
20193 	  fputs ("\tldr\tr3, ", file);
20194 	}
20195       else
20196 	{
20197 	  fputs ("\tldr\tr12, ", file);
20198 	}
20199       assemble_name (file, label);
20200       fputc ('\n', file);
20201       if (flag_pic)
20202 	{
20203 	  /* If we are generating PIC, the ldr instruction below loads
20204 	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
20205 	     the address of the add + 8, so we have:
20206 
20207 	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20208 	         = target + 1.
20209 
20210 	     Note that we have "+ 1" because some versions of GNU ld
20211 	     don't set the low bit of the result for R_ARM_REL32
20212 	     relocations against thumb function symbols.
20213 	     On ARMv6M this is +4, not +8.  */
20214 	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20215 	  assemble_name (file, labelpc);
20216 	  fputs (":\n", file);
20217 	  if (TARGET_THUMB1_ONLY)
20218 	    {
20219 	      /* This is 2 insns after the start of the thunk, so we know it
20220 	         is 4-byte aligned.  */
20221 	      fputs ("\tadd\tr3, pc, r3\n", file);
20222 	      fputs ("\tmov r12, r3\n", file);
20223 	    }
20224 	  else
20225 	    fputs ("\tadd\tr12, pc, r12\n", file);
20226 	}
20227       else if (TARGET_THUMB1_ONLY)
20228 	fputs ("\tmov r12, r3\n", file);
20229     }
20230   if (TARGET_THUMB1_ONLY)
20231     {
20232       if (mi_delta > 255)
20233 	{
20234 	  fputs ("\tldr\tr3, ", file);
20235 	  assemble_name (file, label);
20236 	  fputs ("+4\n", file);
20237 	  asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20238 		       mi_op, this_regno, this_regno);
20239 	}
20240       else if (mi_delta != 0)
20241 	{
20242 	  asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20243 		       mi_op, this_regno, this_regno,
20244 		       mi_delta);
20245 	}
20246     }
20247   else
20248     {
20249       /* TODO: Use movw/movt for large constants when available.  */
20250       while (mi_delta != 0)
20251 	{
20252 	  if ((mi_delta & (3 << shift)) == 0)
20253 	    shift += 2;
20254 	  else
20255 	    {
20256 	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20257 			   mi_op, this_regno, this_regno,
20258 			   mi_delta & (0xff << shift));
20259 	      mi_delta &= ~(0xff << shift);
20260 	      shift += 8;
20261 	    }
20262 	}
20263     }
20264   if (TARGET_THUMB1)
20265     {
20266       if (TARGET_THUMB1_ONLY)
20267 	fputs ("\tpop\t{r3}\n", file);
20268 
20269       fprintf (file, "\tbx\tr12\n");
20270       ASM_OUTPUT_ALIGN (file, 2);
20271       assemble_name (file, label);
20272       fputs (":\n", file);
20273       if (flag_pic)
20274 	{
20275 	  /* Output ".word .LTHUNKn-7-.LTHUNKPCn".  */
20276 	  rtx tem = XEXP (DECL_RTL (function), 0);
20277 	  tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20278 	  tem = gen_rtx_MINUS (GET_MODE (tem),
20279 			       tem,
20280 			       gen_rtx_SYMBOL_REF (Pmode,
20281 						   ggc_strdup (labelpc)));
20282 	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
20283 	}
20284       else
20285 	/* Output ".word .LTHUNKn".  */
20286 	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20287 
20288       if (TARGET_THUMB1_ONLY && mi_delta > 255)
20289 	assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20290     }
20291   else
20292     {
20293       fputs ("\tb\t", file);
20294       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20295       if (NEED_PLT_RELOC)
20296         fputs ("(PLT)", file);
20297       fputc ('\n', file);
20298     }
20299 }
20300 
20301 int
20302 arm_emit_vector_const (FILE *file, rtx x)
20303 {
20304   int i;
20305   const char * pattern;
20306 
20307   gcc_assert (GET_CODE (x) == CONST_VECTOR);
20308 
20309   switch (GET_MODE (x))
20310     {
20311     case V2SImode: pattern = "%08x"; break;
20312     case V4HImode: pattern = "%04x"; break;
20313     case V8QImode: pattern = "%02x"; break;
20314     default:       gcc_unreachable ();
20315     }
20316 
20317   fprintf (file, "0x");
20318   for (i = CONST_VECTOR_NUNITS (x); i--;)
20319     {
20320       rtx element;
20321 
20322       element = CONST_VECTOR_ELT (x, i);
20323       fprintf (file, pattern, INTVAL (element));
20324     }
20325 
20326   return 1;
20327 }
20328 
20329 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20330    HFmode constant pool entries are actually loaded with ldr.  */
20331 void
20332 arm_emit_fp16_const (rtx c)
20333 {
20334   REAL_VALUE_TYPE r;
20335   long bits;
20336 
20337   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20338   bits = real_to_target (NULL, &r, HFmode);
20339   if (WORDS_BIG_ENDIAN)
20340     assemble_zeros (2);
20341   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20342   if (!WORDS_BIG_ENDIAN)
20343     assemble_zeros (2);
20344 }
20345 
20346 const char *
20347 arm_output_load_gr (rtx *operands)
20348 {
20349   rtx reg;
20350   rtx offset;
20351   rtx wcgr;
20352   rtx sum;
20353 
20354   if (GET_CODE (operands [1]) != MEM
20355       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20356       || GET_CODE (reg = XEXP (sum, 0)) != REG
20357       || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20358       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20359     return "wldrw%?\t%0, %1";
20360 
20361   /* Fix up an out-of-range load of a GR register.  */
20362   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20363   wcgr = operands[0];
20364   operands[0] = reg;
20365   output_asm_insn ("ldr%?\t%0, %1", operands);
20366 
20367   operands[0] = wcgr;
20368   operands[1] = reg;
20369   output_asm_insn ("tmcr%?\t%0, %1", operands);
20370   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20371 
20372   return "";
20373 }
20374 
20375 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20376 
20377    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20378    named arg and all anonymous args onto the stack.
20379    XXX I know the prologue shouldn't be pushing registers, but it is faster
20380    that way.  */
20381 
20382 static void
20383 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20384 			    enum machine_mode mode,
20385 			    tree type,
20386 			    int *pretend_size,
20387 			    int second_time ATTRIBUTE_UNUSED)
20388 {
20389   int nregs;
20390 
20391   cfun->machine->uses_anonymous_args = 1;
20392   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20393     {
20394       nregs = pcum->aapcs_ncrn;
20395       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20396 	nregs++;
20397     }
20398   else
20399     nregs = pcum->nregs;
20400 
20401   if (nregs < NUM_ARG_REGS)
20402     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20403 }
20404 
20405 /* Return nonzero if the CONSUMER instruction (a store) does not need
20406    PRODUCER's value to calculate the address.  */
20407 
20408 int
20409 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20410 {
20411   rtx value = PATTERN (producer);
20412   rtx addr = PATTERN (consumer);
20413 
20414   if (GET_CODE (value) == COND_EXEC)
20415     value = COND_EXEC_CODE (value);
20416   if (GET_CODE (value) == PARALLEL)
20417     value = XVECEXP (value, 0, 0);
20418   value = XEXP (value, 0);
20419   if (GET_CODE (addr) == COND_EXEC)
20420     addr = COND_EXEC_CODE (addr);
20421   if (GET_CODE (addr) == PARALLEL)
20422     addr = XVECEXP (addr, 0, 0);
20423   addr = XEXP (addr, 0);
20424 
20425   return !reg_overlap_mentioned_p (value, addr);
20426 }
20427 
20428 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20429    have an early register shift value or amount dependency on the
20430    result of PRODUCER.  */
20431 
20432 int
20433 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20434 {
20435   rtx value = PATTERN (producer);
20436   rtx op = PATTERN (consumer);
20437   rtx early_op;
20438 
20439   if (GET_CODE (value) == COND_EXEC)
20440     value = COND_EXEC_CODE (value);
20441   if (GET_CODE (value) == PARALLEL)
20442     value = XVECEXP (value, 0, 0);
20443   value = XEXP (value, 0);
20444   if (GET_CODE (op) == COND_EXEC)
20445     op = COND_EXEC_CODE (op);
20446   if (GET_CODE (op) == PARALLEL)
20447     op = XVECEXP (op, 0, 0);
20448   op = XEXP (op, 1);
20449 
20450   early_op = XEXP (op, 0);
20451   /* This is either an actual independent shift, or a shift applied to
20452      the first operand of another operation.  We want the whole shift
20453      operation.  */
20454   if (GET_CODE (early_op) == REG)
20455     early_op = op;
20456 
20457   return !reg_overlap_mentioned_p (value, early_op);
20458 }
20459 
20460 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20461    have an early register shift value dependency on the result of
20462    PRODUCER.  */
20463 
20464 int
20465 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20466 {
20467   rtx value = PATTERN (producer);
20468   rtx op = PATTERN (consumer);
20469   rtx early_op;
20470 
20471   if (GET_CODE (value) == COND_EXEC)
20472     value = COND_EXEC_CODE (value);
20473   if (GET_CODE (value) == PARALLEL)
20474     value = XVECEXP (value, 0, 0);
20475   value = XEXP (value, 0);
20476   if (GET_CODE (op) == COND_EXEC)
20477     op = COND_EXEC_CODE (op);
20478   if (GET_CODE (op) == PARALLEL)
20479     op = XVECEXP (op, 0, 0);
20480   op = XEXP (op, 1);
20481 
20482   early_op = XEXP (op, 0);
20483 
20484   /* This is either an actual independent shift, or a shift applied to
20485      the first operand of another operation.  We want the value being
20486      shifted, in either case.  */
20487   if (GET_CODE (early_op) != REG)
20488     early_op = XEXP (early_op, 0);
20489 
20490   return !reg_overlap_mentioned_p (value, early_op);
20491 }
20492 
20493 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20494    have an early register mult dependency on the result of
20495    PRODUCER.  */
20496 
20497 int
20498 arm_no_early_mul_dep (rtx producer, rtx consumer)
20499 {
20500   rtx value = PATTERN (producer);
20501   rtx op = PATTERN (consumer);
20502 
20503   if (GET_CODE (value) == COND_EXEC)
20504     value = COND_EXEC_CODE (value);
20505   if (GET_CODE (value) == PARALLEL)
20506     value = XVECEXP (value, 0, 0);
20507   value = XEXP (value, 0);
20508   if (GET_CODE (op) == COND_EXEC)
20509     op = COND_EXEC_CODE (op);
20510   if (GET_CODE (op) == PARALLEL)
20511     op = XVECEXP (op, 0, 0);
20512   op = XEXP (op, 1);
20513 
20514   if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20515     {
20516       if (GET_CODE (XEXP (op, 0)) == MULT)
20517 	return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20518       else
20519 	return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20520     }
20521 
20522   return 0;
20523 }
20524 
20525 /* We can't rely on the caller doing the proper promotion when
20526    using APCS or ATPCS.  */
20527 
20528 static bool
20529 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20530 {
20531     return !TARGET_AAPCS_BASED;
20532 }
20533 
20534 static enum machine_mode
20535 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20536                            enum machine_mode mode,
20537                            int *punsignedp ATTRIBUTE_UNUSED,
20538                            const_tree fntype ATTRIBUTE_UNUSED,
20539                            int for_return ATTRIBUTE_UNUSED)
20540 {
20541   if (GET_MODE_CLASS (mode) == MODE_INT
20542       && GET_MODE_SIZE (mode) < 4)
20543     return SImode;
20544 
20545   return mode;
20546 }
20547 
20548 /* AAPCS based ABIs use short enums by default.  */
20549 
20550 static bool
20551 arm_default_short_enums (void)
20552 {
20553   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20554 }
20555 
20556 
20557 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
20558 
20559 static bool
20560 arm_align_anon_bitfield (void)
20561 {
20562   return TARGET_AAPCS_BASED;
20563 }
20564 
20565 
20566 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
20567 
20568 static tree
20569 arm_cxx_guard_type (void)
20570 {
20571   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20572 }
20573 
20574 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20575    has an accumulator dependency on the result of the producer (a
20576    multiplication instruction) and no other dependency on that result.  */
20577 int
20578 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20579 {
20580   rtx mul = PATTERN (producer);
20581   rtx mac = PATTERN (consumer);
20582   rtx mul_result;
20583   rtx mac_op0, mac_op1, mac_acc;
20584 
20585   if (GET_CODE (mul) == COND_EXEC)
20586     mul = COND_EXEC_CODE (mul);
20587   if (GET_CODE (mac) == COND_EXEC)
20588     mac = COND_EXEC_CODE (mac);
20589 
20590   /* Check that mul is of the form (set (...) (mult ...))
20591      and mla is of the form (set (...) (plus (mult ...) (...))).  */
20592   if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20593       || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20594           || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20595     return 0;
20596 
20597   mul_result = XEXP (mul, 0);
20598   mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20599   mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20600   mac_acc = XEXP (XEXP (mac, 1), 1);
20601 
20602   return (reg_overlap_mentioned_p (mul_result, mac_acc)
20603           && !reg_overlap_mentioned_p (mul_result, mac_op0)
20604           && !reg_overlap_mentioned_p (mul_result, mac_op1));
20605 }
20606 
20607 
20608 /* The EABI says test the least significant bit of a guard variable.  */
20609 
20610 static bool
20611 arm_cxx_guard_mask_bit (void)
20612 {
20613   return TARGET_AAPCS_BASED;
20614 }
20615 
20616 
20617 /* The EABI specifies that all array cookies are 8 bytes long.  */
20618 
20619 static tree
20620 arm_get_cookie_size (tree type)
20621 {
20622   tree size;
20623 
20624   if (!TARGET_AAPCS_BASED)
20625     return default_cxx_get_cookie_size (type);
20626 
20627   size = build_int_cst (sizetype, 8);
20628   return size;
20629 }
20630 
20631 
20632 /* The EABI says that array cookies should also contain the element size.  */
20633 
20634 static bool
20635 arm_cookie_has_size (void)
20636 {
20637   return TARGET_AAPCS_BASED;
20638 }
20639 
20640 
20641 /* The EABI says constructors and destructors should return a pointer to
20642    the object constructed/destroyed.  */
20643 
20644 static bool
20645 arm_cxx_cdtor_returns_this (void)
20646 {
20647   return TARGET_AAPCS_BASED;
20648 }
20649 
20650 /* The EABI says that an inline function may never be the key
20651    method.  */
20652 
20653 static bool
20654 arm_cxx_key_method_may_be_inline (void)
20655 {
20656   return !TARGET_AAPCS_BASED;
20657 }
20658 
20659 static void
20660 arm_cxx_determine_class_data_visibility (tree decl)
20661 {
20662   if (!TARGET_AAPCS_BASED
20663       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20664     return;
20665 
20666   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20667      is exported.  However, on systems without dynamic vague linkage,
20668      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
20669   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20670     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20671   else
20672     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20673   DECL_VISIBILITY_SPECIFIED (decl) = 1;
20674 }
20675 
20676 static bool
20677 arm_cxx_class_data_always_comdat (void)
20678 {
20679   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20680      vague linkage if the class has no key function.  */
20681   return !TARGET_AAPCS_BASED;
20682 }
20683 
20684 
20685 /* The EABI says __aeabi_atexit should be used to register static
20686    destructors.  */
20687 
20688 static bool
20689 arm_cxx_use_aeabi_atexit (void)
20690 {
20691   return TARGET_AAPCS_BASED;
20692 }
20693 
20694 
20695 void
20696 arm_set_return_address (rtx source, rtx scratch)
20697 {
20698   arm_stack_offsets *offsets;
20699   HOST_WIDE_INT delta;
20700   rtx addr;
20701   unsigned long saved_regs;
20702 
20703   offsets = arm_get_frame_offsets ();
20704   saved_regs = offsets->saved_regs_mask;
20705 
20706   if ((saved_regs & (1 << LR_REGNUM)) == 0)
20707     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20708   else
20709     {
20710       if (frame_pointer_needed)
20711 	addr = plus_constant(hard_frame_pointer_rtx, -4);
20712       else
20713 	{
20714 	  /* LR will be the first saved register.  */
20715 	  delta = offsets->outgoing_args - (offsets->frame + 4);
20716 
20717 
20718 	  if (delta >= 4096)
20719 	    {
20720 	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20721 				     GEN_INT (delta & ~4095)));
20722 	      addr = scratch;
20723 	      delta &= 4095;
20724 	    }
20725 	  else
20726 	    addr = stack_pointer_rtx;
20727 
20728 	  addr = plus_constant (addr, delta);
20729 	}
20730       emit_move_insn (gen_frame_mem (Pmode, addr), source);
20731     }
20732 }
20733 
20734 
20735 void
20736 thumb_set_return_address (rtx source, rtx scratch)
20737 {
20738   arm_stack_offsets *offsets;
20739   HOST_WIDE_INT delta;
20740   HOST_WIDE_INT limit;
20741   int reg;
20742   rtx addr;
20743   unsigned long mask;
20744 
20745   emit_use (source);
20746 
20747   offsets = arm_get_frame_offsets ();
20748   mask = offsets->saved_regs_mask;
20749   if (mask & (1 << LR_REGNUM))
20750     {
20751       limit = 1024;
20752       /* Find the saved regs.  */
20753       if (frame_pointer_needed)
20754 	{
20755 	  delta = offsets->soft_frame - offsets->saved_args;
20756 	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20757 	  if (TARGET_THUMB1)
20758 	    limit = 128;
20759 	}
20760       else
20761 	{
20762 	  delta = offsets->outgoing_args - offsets->saved_args;
20763 	  reg = SP_REGNUM;
20764 	}
20765       /* Allow for the stack frame.  */
20766       if (TARGET_THUMB1 && TARGET_BACKTRACE)
20767 	delta -= 16;
20768       /* The link register is always the first saved register.  */
20769       delta -= 4;
20770 
20771       /* Construct the address.  */
20772       addr = gen_rtx_REG (SImode, reg);
20773       if (delta > limit)
20774 	{
20775 	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20776 	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20777 	  addr = scratch;
20778 	}
20779       else
20780 	addr = plus_constant (addr, delta);
20781 
20782       emit_move_insn (gen_frame_mem (Pmode, addr), source);
20783     }
20784   else
20785     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20786 }
20787 
20788 /* Implements target hook vector_mode_supported_p.  */
20789 bool
20790 arm_vector_mode_supported_p (enum machine_mode mode)
20791 {
20792   /* Neon also supports V2SImode, etc. listed in the clause below.  */
20793   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20794       || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20795     return true;
20796 
20797   if ((TARGET_NEON || TARGET_IWMMXT)
20798       && ((mode == V2SImode)
20799 	  || (mode == V4HImode)
20800 	  || (mode == V8QImode)))
20801     return true;
20802 
20803   return false;
20804 }
20805 
20806 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
20807    ARM insns and therefore guarantee that the shift count is modulo 256.
20808    DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20809    guarantee no particular behavior for out-of-range counts.  */
20810 
20811 static unsigned HOST_WIDE_INT
20812 arm_shift_truncation_mask (enum machine_mode mode)
20813 {
20814   return mode == SImode ? 255 : 0;
20815 }
20816 
20817 
20818 /* Map internal gcc register numbers to DWARF2 register numbers.  */
20819 
20820 unsigned int
20821 arm_dbx_register_number (unsigned int regno)
20822 {
20823   if (regno < 16)
20824     return regno;
20825 
20826   /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20827      compatibility.  The EABI defines them as registers 96-103.  */
20828   if (IS_FPA_REGNUM (regno))
20829     return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20830 
20831   if (IS_VFP_REGNUM (regno))
20832     {
20833       /* See comment in arm_dwarf_register_span.  */
20834       if (VFP_REGNO_OK_FOR_SINGLE (regno))
20835 	return 64 + regno - FIRST_VFP_REGNUM;
20836       else
20837 	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
20838     }
20839 
20840   if (IS_IWMMXT_GR_REGNUM (regno))
20841     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20842 
20843   if (IS_IWMMXT_REGNUM (regno))
20844     return 112 + regno - FIRST_IWMMXT_REGNUM;
20845 
20846   gcc_unreachable ();
20847 }
20848 
20849 /* Dwarf models VFPv3 registers as 32 64-bit registers.
20850    GCC models tham as 64 32-bit registers, so we need to describe this to
20851    the DWARF generation code.  Other registers can use the default.  */
20852 static rtx
20853 arm_dwarf_register_span (rtx rtl)
20854 {
20855   unsigned regno;
20856   int nregs;
20857   int i;
20858   rtx p;
20859 
20860   regno = REGNO (rtl);
20861   if (!IS_VFP_REGNUM (regno))
20862     return NULL_RTX;
20863 
20864   /* XXX FIXME: The EABI defines two VFP register ranges:
20865 	64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
20866 	256-287: D0-D31
20867      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
20868      corresponding D register.  Until GDB supports this, we shall use the
20869      legacy encodings.  We also use these encodings for D0-D15 for
20870      compatibility with older debuggers.  */
20871   if (VFP_REGNO_OK_FOR_SINGLE (regno))
20872     return NULL_RTX;
20873 
20874   nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
20875   p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
20876   regno = (regno - FIRST_VFP_REGNUM) / 2;
20877   for (i = 0; i < nregs; i++)
20878     XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
20879 
20880   return p;
20881 }
20882 
20883 #ifdef TARGET_UNWIND_INFO
20884 /* Emit unwind directives for a store-multiple instruction or stack pointer
20885    push during alignment.
20886    These should only ever be generated by the function prologue code, so
20887    expect them to have a particular form.  */
20888 
20889 static void
20890 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
20891 {
20892   int i;
20893   HOST_WIDE_INT offset;
20894   HOST_WIDE_INT nregs;
20895   int reg_size;
20896   unsigned reg;
20897   unsigned lastreg;
20898   rtx e;
20899 
20900   e = XVECEXP (p, 0, 0);
20901   if (GET_CODE (e) != SET)
20902     abort ();
20903 
20904   /* First insn will adjust the stack pointer.  */
20905   if (GET_CODE (e) != SET
20906       || GET_CODE (XEXP (e, 0)) != REG
20907       || REGNO (XEXP (e, 0)) != SP_REGNUM
20908       || GET_CODE (XEXP (e, 1)) != PLUS)
20909     abort ();
20910 
20911   offset = -INTVAL (XEXP (XEXP (e, 1), 1));
20912   nregs = XVECLEN (p, 0) - 1;
20913 
20914   reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
20915   if (reg < 16)
20916     {
20917       /* The function prologue may also push pc, but not annotate it as it is
20918 	 never restored.  We turn this into a stack pointer adjustment.  */
20919       if (nregs * 4 == offset - 4)
20920 	{
20921 	  fprintf (asm_out_file, "\t.pad #4\n");
20922 	  offset -= 4;
20923 	}
20924       reg_size = 4;
20925       fprintf (asm_out_file, "\t.save {");
20926     }
20927   else if (IS_VFP_REGNUM (reg))
20928     {
20929       reg_size = 8;
20930       fprintf (asm_out_file, "\t.vsave {");
20931     }
20932   else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
20933     {
20934       /* FPA registers are done differently.  */
20935       asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
20936       return;
20937     }
20938   else
20939     /* Unknown register type.  */
20940     abort ();
20941 
20942   /* If the stack increment doesn't match the size of the saved registers,
20943      something has gone horribly wrong.  */
20944   if (offset != nregs * reg_size)
20945     abort ();
20946 
20947   offset = 0;
20948   lastreg = 0;
20949   /* The remaining insns will describe the stores.  */
20950   for (i = 1; i <= nregs; i++)
20951     {
20952       /* Expect (set (mem <addr>) (reg)).
20953          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
20954       e = XVECEXP (p, 0, i);
20955       if (GET_CODE (e) != SET
20956 	  || GET_CODE (XEXP (e, 0)) != MEM
20957 	  || GET_CODE (XEXP (e, 1)) != REG)
20958 	abort ();
20959 
20960       reg = REGNO (XEXP (e, 1));
20961       if (reg < lastreg)
20962 	abort ();
20963 
20964       if (i != 1)
20965 	fprintf (asm_out_file, ", ");
20966       /* We can't use %r for vfp because we need to use the
20967 	 double precision register names.  */
20968       if (IS_VFP_REGNUM (reg))
20969 	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
20970       else
20971 	asm_fprintf (asm_out_file, "%r", reg);
20972 
20973 #ifdef ENABLE_CHECKING
20974       /* Check that the addresses are consecutive.  */
20975       e = XEXP (XEXP (e, 0), 0);
20976       if (GET_CODE (e) == PLUS)
20977 	{
20978 	  offset += reg_size;
20979 	  if (GET_CODE (XEXP (e, 0)) != REG
20980 	      || REGNO (XEXP (e, 0)) != SP_REGNUM
20981 	      || GET_CODE (XEXP (e, 1)) != CONST_INT
20982 	      || offset != INTVAL (XEXP (e, 1)))
20983 	    abort ();
20984 	}
20985       else if (i != 1
20986 	       || GET_CODE (e) != REG
20987 	       || REGNO (e) != SP_REGNUM)
20988 	abort ();
20989 #endif
20990     }
20991   fprintf (asm_out_file, "}\n");
20992 }
20993 
20994 /*  Emit unwind directives for a SET.  */
20995 
20996 static void
20997 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
20998 {
20999   rtx e0;
21000   rtx e1;
21001   unsigned reg;
21002 
21003   e0 = XEXP (p, 0);
21004   e1 = XEXP (p, 1);
21005   switch (GET_CODE (e0))
21006     {
21007     case MEM:
21008       /* Pushing a single register.  */
21009       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
21010 	  || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
21011 	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
21012 	abort ();
21013 
21014       asm_fprintf (asm_out_file, "\t.save ");
21015       if (IS_VFP_REGNUM (REGNO (e1)))
21016 	asm_fprintf(asm_out_file, "{d%d}\n",
21017 		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
21018       else
21019 	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
21020       break;
21021 
21022     case REG:
21023       if (REGNO (e0) == SP_REGNUM)
21024 	{
21025 	  /* A stack increment.  */
21026 	  if (GET_CODE (e1) != PLUS
21027 	      || GET_CODE (XEXP (e1, 0)) != REG
21028 	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
21029 	      || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21030 	    abort ();
21031 
21032 	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
21033 		       -INTVAL (XEXP (e1, 1)));
21034 	}
21035       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
21036 	{
21037 	  HOST_WIDE_INT offset;
21038 
21039 	  if (GET_CODE (e1) == PLUS)
21040 	    {
21041 	      if (GET_CODE (XEXP (e1, 0)) != REG
21042 		  || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21043 		abort ();
21044 	      reg = REGNO (XEXP (e1, 0));
21045 	      offset = INTVAL (XEXP (e1, 1));
21046 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
21047 			   HARD_FRAME_POINTER_REGNUM, reg,
21048 			   INTVAL (XEXP (e1, 1)));
21049 	    }
21050 	  else if (GET_CODE (e1) == REG)
21051 	    {
21052 	      reg = REGNO (e1);
21053 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
21054 			   HARD_FRAME_POINTER_REGNUM, reg);
21055 	    }
21056 	  else
21057 	    abort ();
21058 	}
21059       else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
21060 	{
21061 	  /* Move from sp to reg.  */
21062 	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
21063 	}
21064      else if (GET_CODE (e1) == PLUS
21065 	      && GET_CODE (XEXP (e1, 0)) == REG
21066 	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
21067 	      && GET_CODE (XEXP (e1, 1)) == CONST_INT)
21068 	{
21069 	  /* Set reg to offset from sp.  */
21070 	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
21071 		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
21072 	}
21073       else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
21074 	{
21075 	  /* Stack pointer save before alignment.  */
21076 	  reg = REGNO (e0);
21077 	  asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21078 		       reg + 0x90, reg);
21079 	}
21080       else
21081 	abort ();
21082       break;
21083 
21084     default:
21085       abort ();
21086     }
21087 }
21088 
21089 
21090 /* Emit unwind directives for the given insn.  */
21091 
21092 static void
21093 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21094 {
21095   rtx pat;
21096 
21097   if (!ARM_EABI_UNWIND_TABLES)
21098     return;
21099 
21100   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21101       && (TREE_NOTHROW (current_function_decl)
21102 	  || crtl->all_throwers_are_sibcalls))
21103     return;
21104 
21105   if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21106     return;
21107 
21108   pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21109   if (pat)
21110     pat = XEXP (pat, 0);
21111   else
21112     pat = PATTERN (insn);
21113 
21114   switch (GET_CODE (pat))
21115     {
21116     case SET:
21117       arm_unwind_emit_set (asm_out_file, pat);
21118       break;
21119 
21120     case SEQUENCE:
21121       /* Store multiple.  */
21122       arm_unwind_emit_sequence (asm_out_file, pat);
21123       break;
21124 
21125     default:
21126       abort();
21127     }
21128 }
21129 
21130 
21131 /* Output a reference from a function exception table to the type_info
21132    object X.  The EABI specifies that the symbol should be relocated by
21133    an R_ARM_TARGET2 relocation.  */
21134 
21135 static bool
21136 arm_output_ttype (rtx x)
21137 {
21138   fputs ("\t.word\t", asm_out_file);
21139   output_addr_const (asm_out_file, x);
21140   /* Use special relocations for symbol references.  */
21141   if (GET_CODE (x) != CONST_INT)
21142     fputs ("(TARGET2)", asm_out_file);
21143   fputc ('\n', asm_out_file);
21144 
21145   return TRUE;
21146 }
21147 #endif /* TARGET_UNWIND_INFO */
21148 
21149 
21150 /* Handle UNSPEC DWARF call frame instructions.  These are needed for dynamic
21151    stack alignment.  */
21152 
21153 static void
21154 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21155 {
21156   rtx unspec = SET_SRC (pattern);
21157   gcc_assert (GET_CODE (unspec) == UNSPEC);
21158 
21159   switch (index)
21160     {
21161     case UNSPEC_STACK_ALIGN:
21162       /* ??? We should set the CFA = (SP & ~7).  At this point we haven't
21163          put anything on the stack, so hopefully it won't matter.
21164          CFA = SP will be correct after alignment.  */
21165       dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21166                               SET_DEST (pattern));
21167       break;
21168     default:
21169       gcc_unreachable ();
21170     }
21171 }
21172 
21173 
21174 /* Output unwind directives for the start/end of a function.  */
21175 
21176 void
21177 arm_output_fn_unwind (FILE * f, bool prologue)
21178 {
21179   if (!ARM_EABI_UNWIND_TABLES)
21180     return;
21181 
21182   if (prologue)
21183     fputs ("\t.fnstart\n", f);
21184   else
21185     {
21186       /* If this function will never be unwound, then mark it as such.
21187          The came condition is used in arm_unwind_emit to suppress
21188 	 the frame annotations.  */
21189       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21190 	  && (TREE_NOTHROW (current_function_decl)
21191 	      || crtl->all_throwers_are_sibcalls))
21192 	fputs("\t.cantunwind\n", f);
21193 
21194       fputs ("\t.fnend\n", f);
21195     }
21196 }
21197 
21198 static bool
21199 arm_emit_tls_decoration (FILE *fp, rtx x)
21200 {
21201   enum tls_reloc reloc;
21202   rtx val;
21203 
21204   val = XVECEXP (x, 0, 0);
21205   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21206 
21207   output_addr_const (fp, val);
21208 
21209   switch (reloc)
21210     {
21211     case TLS_GD32:
21212       fputs ("(tlsgd)", fp);
21213       break;
21214     case TLS_LDM32:
21215       fputs ("(tlsldm)", fp);
21216       break;
21217     case TLS_LDO32:
21218       fputs ("(tlsldo)", fp);
21219       break;
21220     case TLS_IE32:
21221       fputs ("(gottpoff)", fp);
21222       break;
21223     case TLS_LE32:
21224       fputs ("(tpoff)", fp);
21225       break;
21226     default:
21227       gcc_unreachable ();
21228     }
21229 
21230   switch (reloc)
21231     {
21232     case TLS_GD32:
21233     case TLS_LDM32:
21234     case TLS_IE32:
21235       fputs (" + (. - ", fp);
21236       output_addr_const (fp, XVECEXP (x, 0, 2));
21237       fputs (" - ", fp);
21238       output_addr_const (fp, XVECEXP (x, 0, 3));
21239       fputc (')', fp);
21240       break;
21241     default:
21242       break;
21243     }
21244 
21245   return TRUE;
21246 }
21247 
21248 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
21249 
21250 static void
21251 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21252 {
21253   gcc_assert (size == 4);
21254   fputs ("\t.word\t", file);
21255   output_addr_const (file, x);
21256   fputs ("(tlsldo)", file);
21257 }
21258 
21259 bool
21260 arm_output_addr_const_extra (FILE *fp, rtx x)
21261 {
21262   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21263     return arm_emit_tls_decoration (fp, x);
21264   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21265     {
21266       char label[256];
21267       int labelno = INTVAL (XVECEXP (x, 0, 0));
21268 
21269       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21270       assemble_name_raw (fp, label);
21271 
21272       return TRUE;
21273     }
21274   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21275     {
21276       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21277       if (GOT_PCREL)
21278 	fputs ("+.", fp);
21279       fputs ("-(", fp);
21280       output_addr_const (fp, XVECEXP (x, 0, 0));
21281       fputc (')', fp);
21282       return TRUE;
21283     }
21284   else if (GET_CODE (x) == CONST_VECTOR)
21285     return arm_emit_vector_const (fp, x);
21286 
21287   return FALSE;
21288 }
21289 
21290 /* Output assembly for a shift instruction.
21291    SET_FLAGS determines how the instruction modifies the condition codes.
21292    0 - Do not set condition codes.
21293    1 - Set condition codes.
21294    2 - Use smallest instruction.  */
21295 const char *
21296 arm_output_shift(rtx * operands, int set_flags)
21297 {
21298   char pattern[100];
21299   static const char flag_chars[3] = {'?', '.', '!'};
21300   const char *shift;
21301   HOST_WIDE_INT val;
21302   char c;
21303 
21304   c = flag_chars[set_flags];
21305   if (TARGET_UNIFIED_ASM)
21306     {
21307       shift = shift_op(operands[3], &val);
21308       if (shift)
21309 	{
21310 	  if (val != -1)
21311 	    operands[2] = GEN_INT(val);
21312 	  sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21313 	}
21314       else
21315 	sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21316     }
21317   else
21318     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21319   output_asm_insn (pattern, operands);
21320   return "";
21321 }
21322 
21323 /* Output a Thumb-1 casesi dispatch sequence.  */
21324 const char *
21325 thumb1_output_casesi (rtx *operands)
21326 {
21327   rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21328   addr_diff_vec_flags flags;
21329 
21330   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21331 
21332   flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
21333 
21334   switch (GET_MODE(diff_vec))
21335     {
21336     case QImode:
21337       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21338 	      "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21339     case HImode:
21340       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21341 	      "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21342     case SImode:
21343       return "bl\t%___gnu_thumb1_case_si";
21344     default:
21345       gcc_unreachable ();
21346     }
21347 }
21348 
21349 /* Output a Thumb-2 casesi instruction.  */
21350 const char *
21351 thumb2_output_casesi (rtx *operands)
21352 {
21353   rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21354 
21355   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21356 
21357   output_asm_insn ("cmp\t%0, %1", operands);
21358   output_asm_insn ("bhi\t%l3", operands);
21359   switch (GET_MODE(diff_vec))
21360     {
21361     case QImode:
21362       return "tbb\t[%|pc, %0]";
21363     case HImode:
21364       return "tbh\t[%|pc, %0, lsl #1]";
21365     case SImode:
21366       if (flag_pic)
21367 	{
21368 	  output_asm_insn ("adr\t%4, %l2", operands);
21369 	  output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21370 	  output_asm_insn ("add\t%4, %4, %5", operands);
21371 	  return "bx\t%4";
21372 	}
21373       else
21374 	{
21375 	  output_asm_insn ("adr\t%4, %l2", operands);
21376 	  return "ldr\t%|pc, [%4, %0, lsl #2]";
21377 	}
21378     default:
21379       gcc_unreachable ();
21380     }
21381 }
21382 
21383 /* Most ARM cores are single issue, but some newer ones can dual issue.
21384    The scheduler descriptions rely on this being correct.  */
21385 static int
21386 arm_issue_rate (void)
21387 {
21388   switch (arm_tune)
21389     {
21390     case cortexr4:
21391     case cortexr4f:
21392     case cortexa8:
21393     case cortexa9:
21394       return 2;
21395 
21396     default:
21397       return 1;
21398     }
21399 }
21400 
21401 /* A table and a function to perform ARM-specific name mangling for
21402    NEON vector types in order to conform to the AAPCS (see "Procedure
21403    Call Standard for the ARM Architecture", Appendix A).  To qualify
21404    for emission with the mangled names defined in that document, a
21405    vector type must not only be of the correct mode but also be
21406    composed of NEON vector element types (e.g. __builtin_neon_qi).  */
21407 typedef struct
21408 {
21409   enum machine_mode mode;
21410   const char *element_type_name;
21411   const char *aapcs_name;
21412 } arm_mangle_map_entry;
21413 
21414 static arm_mangle_map_entry arm_mangle_map[] = {
21415   /* 64-bit containerized types.  */
21416   { V8QImode,  "__builtin_neon_qi",     "15__simd64_int8_t" },
21417   { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
21418   { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
21419   { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
21420   { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
21421   { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
21422   { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
21423   { V8QImode,  "__builtin_neon_poly8",  "16__simd64_poly8_t" },
21424   { V4HImode,  "__builtin_neon_poly16", "17__simd64_poly16_t" },
21425   /* 128-bit containerized types.  */
21426   { V16QImode, "__builtin_neon_qi",     "16__simd128_int8_t" },
21427   { V16QImode, "__builtin_neon_uqi",    "17__simd128_uint8_t" },
21428   { V8HImode,  "__builtin_neon_hi",     "17__simd128_int16_t" },
21429   { V8HImode,  "__builtin_neon_uhi",    "18__simd128_uint16_t" },
21430   { V4SImode,  "__builtin_neon_si",     "17__simd128_int32_t" },
21431   { V4SImode,  "__builtin_neon_usi",    "18__simd128_uint32_t" },
21432   { V4SFmode,  "__builtin_neon_sf",     "19__simd128_float32_t" },
21433   { V16QImode, "__builtin_neon_poly8",  "17__simd128_poly8_t" },
21434   { V8HImode,  "__builtin_neon_poly16", "18__simd128_poly16_t" },
21435   { VOIDmode, NULL, NULL }
21436 };
21437 
21438 const char *
21439 arm_mangle_type (const_tree type)
21440 {
21441   arm_mangle_map_entry *pos = arm_mangle_map;
21442 
21443   /* The ARM ABI documents (10th October 2008) say that "__va_list"
21444      has to be managled as if it is in the "std" namespace.  */
21445   if (TARGET_AAPCS_BASED
21446       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21447     {
21448       static bool warned;
21449       if (!warned && warn_psabi && !in_system_header)
21450 	{
21451 	  warned = true;
21452 	  inform (input_location,
21453 		  "the mangling of %<va_list%> has changed in GCC 4.4");
21454 	}
21455       return "St9__va_list";
21456     }
21457 
21458   /* Half-precision float.  */
21459   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21460     return "Dh";
21461 
21462   if (TREE_CODE (type) != VECTOR_TYPE)
21463     return NULL;
21464 
21465   /* Check the mode of the vector type, and the name of the vector
21466      element type, against the table.  */
21467   while (pos->mode != VOIDmode)
21468     {
21469       tree elt_type = TREE_TYPE (type);
21470 
21471       if (pos->mode == TYPE_MODE (type)
21472 	  && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21473 	  && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21474 		      pos->element_type_name))
21475         return pos->aapcs_name;
21476 
21477       pos++;
21478     }
21479 
21480   /* Use the default mangling for unrecognized (possibly user-defined)
21481      vector types.  */
21482   return NULL;
21483 }
21484 
21485 /* Order of allocation of core registers for Thumb: this allocation is
21486    written over the corresponding initial entries of the array
21487    initialized with REG_ALLOC_ORDER.  We allocate all low registers
21488    first.  Saving and restoring a low register is usually cheaper than
21489    using a call-clobbered high register.  */
21490 
21491 static const int thumb_core_reg_alloc_order[] =
21492 {
21493    3,  2,  1,  0,  4,  5,  6,  7,
21494   14, 12,  8,  9, 10, 11, 13, 15
21495 };
21496 
21497 /* Adjust register allocation order when compiling for Thumb.  */
21498 
21499 void
21500 arm_order_regs_for_local_alloc (void)
21501 {
21502   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21503   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21504   if (TARGET_THUMB)
21505     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21506             sizeof (thumb_core_reg_alloc_order));
21507 }
21508 
21509 /* Set default optimization options.  */
21510 void
21511 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21512 {
21513   /* Enable section anchors by default at -O1 or higher.
21514      Use 2 to distinguish from an explicit -fsection-anchors
21515      given on the command line.  */
21516   if (level > 0)
21517     flag_section_anchors = 2;
21518 }
21519 
21520 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
21521 
21522 bool
21523 arm_frame_pointer_required (void)
21524 {
21525   return (cfun->has_nonlocal_label
21526           || SUBTARGET_FRAME_POINTER_REQUIRED
21527           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21528 }
21529 
21530 /* Only thumb1 can't support conditional execution, so return true if
21531    the target is not thumb1.  */
21532 static bool
21533 arm_have_conditional_execution (void)
21534 {
21535   return !TARGET_THUMB1;
21536 }
21537 
21538 #include "gt-arm.h"
21539