xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/arm/arm.c (revision 6cd39ddb8550f6fa1bff3fed32053d7f19fd0453)
1 /* Output routines for GCC for ARM.
2    Copyright (C) 1991-2013 Free Software Foundation, Inc.
3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4    and Martin Simmons (@harleqn.co.uk).
5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
6 
7    This file is part of GCC.
8 
9    GCC is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published
11    by the Free Software Foundation; either version 3, or (at your
12    option) any later version.
13 
14    GCC is distributed in the hope that it will be useful, but WITHOUT
15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
17    License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with GCC; see the file COPYING3.  If not see
21    <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "obstack.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "reload.h"
38 #include "function.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "diagnostic-core.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "ggc.h"
45 #include "except.h"
46 #include "tm_p.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "debug.h"
50 #include "langhooks.h"
51 #include "df.h"
52 #include "intl.h"
53 #include "libfuncs.h"
54 #include "params.h"
55 #include "opts.h"
56 #include "dumpfile.h"
57 
58 /* Forward definitions of types.  */
59 typedef struct minipool_node    Mnode;
60 typedef struct minipool_fixup   Mfix;
61 
62 void (*arm_lang_output_object_attributes_hook)(void);
63 
64 struct four_ints
65 {
66   int i[4];
67 };
68 
69 /* Forward function declarations.  */
70 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
71 static int arm_compute_static_chain_stack_bytes (void);
72 static arm_stack_offsets *arm_get_frame_offsets (void);
73 static void arm_add_gc_roots (void);
74 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
75 			     HOST_WIDE_INT, rtx, rtx, int, int);
76 static unsigned bit_count (unsigned long);
77 static int arm_address_register_rtx_p (rtx, int);
78 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
79 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
80 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
81 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
82 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
83 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
84 inline static int thumb1_index_register_rtx_p (rtx, int);
85 static int thumb_far_jump_used_p (void);
86 static bool thumb_force_lr_save (void);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 					   int, HOST_WIDE_INT);
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 			       rtx);
115 static void arm_reorg (void);
116 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 #endif
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
133 static int optimal_immediate_sequence (enum rtx_code code,
134 				       unsigned HOST_WIDE_INT val,
135 				       struct four_ints *return_sequence);
136 static int optimal_immediate_sequence_1 (enum rtx_code code,
137 					 unsigned HOST_WIDE_INT val,
138 					 struct four_ints *return_sequence,
139 					 int i);
140 static int arm_get_strip_length (int);
141 static bool arm_function_ok_for_sibcall (tree, tree);
142 static enum machine_mode arm_promote_function_mode (const_tree,
143 						    enum machine_mode, int *,
144 						    const_tree, int);
145 static bool arm_return_in_memory (const_tree, const_tree);
146 static rtx arm_function_value (const_tree, const_tree, bool);
147 static rtx arm_libcall_value_1 (enum machine_mode);
148 static rtx arm_libcall_value (enum machine_mode, const_rtx);
149 static bool arm_function_value_regno_p (const unsigned int);
150 static void arm_internal_label (FILE *, const char *, unsigned long);
151 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
152 				 tree);
153 static bool arm_have_conditional_execution (void);
154 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
155 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
156 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
157 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
158 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
159 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
160 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
163 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
164 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
165 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
166 static void arm_init_builtins (void);
167 static void arm_init_iwmmxt_builtins (void);
168 static rtx safe_vector_operand (rtx, enum machine_mode);
169 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
170 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
171 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
172 static tree arm_builtin_decl (unsigned, bool);
173 static void emit_constant_insn (rtx cond, rtx pattern);
174 static rtx emit_set_insn (rtx, rtx);
175 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
176 				  tree, bool);
177 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
178 			     const_tree, bool);
179 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
180 				      const_tree, bool);
181 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
182 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
183 				      const_tree);
184 static rtx aapcs_libcall_value (enum machine_mode);
185 static int aapcs_select_return_coproc (const_tree, const_tree);
186 
187 #ifdef OBJECT_FORMAT_ELF
188 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
189 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
190 #endif
191 #ifndef ARM_PE
192 static void arm_encode_section_info (tree, rtx, int);
193 #endif
194 
195 static void arm_file_end (void);
196 static void arm_file_start (void);
197 
198 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
199 					tree, int *, int);
200 static bool arm_pass_by_reference (cumulative_args_t,
201 				   enum machine_mode, const_tree, bool);
202 static bool arm_promote_prototypes (const_tree);
203 static bool arm_default_short_enums (void);
204 static bool arm_align_anon_bitfield (void);
205 static bool arm_return_in_msb (const_tree);
206 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
207 static bool arm_return_in_memory (const_tree, const_tree);
208 #if ARM_UNWIND_INFO
209 static void arm_unwind_emit (FILE *, rtx);
210 static bool arm_output_ttype (rtx);
211 static void arm_asm_emit_except_personality (rtx);
212 static void arm_asm_init_sections (void);
213 #endif
214 static rtx arm_dwarf_register_span (rtx);
215 
216 static tree arm_cxx_guard_type (void);
217 static bool arm_cxx_guard_mask_bit (void);
218 static tree arm_get_cookie_size (tree);
219 static bool arm_cookie_has_size (void);
220 static bool arm_cxx_cdtor_returns_this (void);
221 static bool arm_cxx_key_method_may_be_inline (void);
222 static void arm_cxx_determine_class_data_visibility (tree);
223 static bool arm_cxx_class_data_always_comdat (void);
224 static bool arm_cxx_use_aeabi_atexit (void);
225 static void arm_init_libfuncs (void);
226 static tree arm_build_builtin_va_list (void);
227 static void arm_expand_builtin_va_start (tree, rtx);
228 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
229 static void arm_option_override (void);
230 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
231 static bool arm_cannot_copy_insn_p (rtx);
232 static int arm_issue_rate (void);
233 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
234 static bool arm_output_addr_const_extra (FILE *, rtx);
235 static bool arm_allocate_stack_slots_for_args (void);
236 static bool arm_warn_func_return (tree);
237 static const char *arm_invalid_parameter_type (const_tree t);
238 static const char *arm_invalid_return_type (const_tree t);
239 static tree arm_promoted_type (const_tree t);
240 static tree arm_convert_to_type (tree type, tree expr);
241 static bool arm_scalar_mode_supported_p (enum machine_mode);
242 static bool arm_frame_pointer_required (void);
243 static bool arm_can_eliminate (const int, const int);
244 static void arm_asm_trampoline_template (FILE *);
245 static void arm_trampoline_init (rtx, tree, rtx);
246 static rtx arm_trampoline_adjust_address (rtx);
247 static rtx arm_pic_static_addr (rtx orig, rtx reg);
248 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
249 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
250 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
251 static bool arm_array_mode_supported_p (enum machine_mode,
252 					unsigned HOST_WIDE_INT);
253 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
254 static bool arm_class_likely_spilled_p (reg_class_t);
255 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
256 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
257 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
258 						     const_tree type,
259 						     int misalignment,
260 						     bool is_packed);
261 static void arm_conditional_register_usage (void);
262 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
263 static unsigned int arm_autovectorize_vector_sizes (void);
264 static int arm_default_branch_cost (bool, bool);
265 static int arm_cortex_a5_branch_cost (bool, bool);
266 
267 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
268 					     const unsigned char *sel);
269 
270 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
271 					   tree vectype,
272 					   int misalign ATTRIBUTE_UNUSED);
273 static unsigned arm_add_stmt_cost (void *data, int count,
274 				   enum vect_cost_for_stmt kind,
275 				   struct _stmt_vec_info *stmt_info,
276 				   int misalign,
277 				   enum vect_cost_model_location where);
278 
279 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
280 					 bool op0_preserve_value);
281 
282 /* Table of machine attributes.  */
283 static const struct attribute_spec arm_attribute_table[] =
284 {
285   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
286        affects_type_identity } */
287   /* Function calls made to this symbol must be done indirectly, because
288      it may lie outside of the 26 bit addressing range of a normal function
289      call.  */
290   { "long_call",    0, 0, false, true,  true,  NULL, false },
291   /* Whereas these functions are always known to reside within the 26 bit
292      addressing range.  */
293   { "short_call",   0, 0, false, true,  true,  NULL, false },
294   /* Specify the procedure call conventions for a function.  */
295   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
296     false },
297   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
298   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
299     false },
300   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
301     false },
302   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
303     false },
304 #ifdef ARM_PE
305   /* ARM/PE has three new attributes:
306      interfacearm - ?
307      dllexport - for exporting a function/variable that will live in a dll
308      dllimport - for importing a function/variable from a dll
309 
310      Microsoft allows multiple declspecs in one __declspec, separating
311      them with spaces.  We do NOT support this.  Instead, use __declspec
312      multiple times.
313   */
314   { "dllimport",    0, 0, true,  false, false, NULL, false },
315   { "dllexport",    0, 0, true,  false, false, NULL, false },
316   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
317     false },
318 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
319   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
320   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
321   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
322     false },
323 #endif
324   { NULL,           0, 0, false, false, false, NULL, false }
325 };
326 
327 /* Initialize the GCC target structure.  */
328 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
329 #undef  TARGET_MERGE_DECL_ATTRIBUTES
330 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
331 #endif
332 
333 #undef TARGET_LEGITIMIZE_ADDRESS
334 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
335 
336 #undef  TARGET_ATTRIBUTE_TABLE
337 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
338 
339 #undef TARGET_ASM_FILE_START
340 #define TARGET_ASM_FILE_START arm_file_start
341 #undef TARGET_ASM_FILE_END
342 #define TARGET_ASM_FILE_END arm_file_end
343 
344 #undef  TARGET_ASM_ALIGNED_SI_OP
345 #define TARGET_ASM_ALIGNED_SI_OP NULL
346 #undef  TARGET_ASM_INTEGER
347 #define TARGET_ASM_INTEGER arm_assemble_integer
348 
349 #undef TARGET_PRINT_OPERAND
350 #define TARGET_PRINT_OPERAND arm_print_operand
351 #undef TARGET_PRINT_OPERAND_ADDRESS
352 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
353 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
354 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
355 
356 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
357 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
358 
359 #undef  TARGET_ASM_FUNCTION_PROLOGUE
360 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
361 
362 #undef  TARGET_ASM_FUNCTION_EPILOGUE
363 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
364 
365 #undef  TARGET_OPTION_OVERRIDE
366 #define TARGET_OPTION_OVERRIDE arm_option_override
367 
368 #undef  TARGET_COMP_TYPE_ATTRIBUTES
369 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
370 
371 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
372 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
373 
374 #undef  TARGET_SCHED_ADJUST_COST
375 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
376 
377 #undef TARGET_SCHED_REORDER
378 #define TARGET_SCHED_REORDER arm_sched_reorder
379 
380 #undef TARGET_REGISTER_MOVE_COST
381 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
382 
383 #undef TARGET_MEMORY_MOVE_COST
384 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
385 
386 #undef TARGET_ENCODE_SECTION_INFO
387 #ifdef ARM_PE
388 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
389 #else
390 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
391 #endif
392 
393 #undef  TARGET_STRIP_NAME_ENCODING
394 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
395 
396 #undef  TARGET_ASM_INTERNAL_LABEL
397 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
398 
399 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
400 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
401 
402 #undef  TARGET_FUNCTION_VALUE
403 #define TARGET_FUNCTION_VALUE arm_function_value
404 
405 #undef  TARGET_LIBCALL_VALUE
406 #define TARGET_LIBCALL_VALUE arm_libcall_value
407 
408 #undef TARGET_FUNCTION_VALUE_REGNO_P
409 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
410 
411 #undef  TARGET_ASM_OUTPUT_MI_THUNK
412 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
413 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
414 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
415 
416 #undef  TARGET_RTX_COSTS
417 #define TARGET_RTX_COSTS arm_rtx_costs
418 #undef  TARGET_ADDRESS_COST
419 #define TARGET_ADDRESS_COST arm_address_cost
420 
421 #undef TARGET_SHIFT_TRUNCATION_MASK
422 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
423 #undef TARGET_VECTOR_MODE_SUPPORTED_P
424 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
425 #undef TARGET_ARRAY_MODE_SUPPORTED_P
426 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
427 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
428 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
429 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
430 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
431   arm_autovectorize_vector_sizes
432 
433 #undef  TARGET_MACHINE_DEPENDENT_REORG
434 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
435 
436 #undef  TARGET_INIT_BUILTINS
437 #define TARGET_INIT_BUILTINS  arm_init_builtins
438 #undef  TARGET_EXPAND_BUILTIN
439 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
440 #undef  TARGET_BUILTIN_DECL
441 #define TARGET_BUILTIN_DECL arm_builtin_decl
442 
443 #undef TARGET_INIT_LIBFUNCS
444 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
445 
446 #undef TARGET_PROMOTE_FUNCTION_MODE
447 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
448 #undef TARGET_PROMOTE_PROTOTYPES
449 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
450 #undef TARGET_PASS_BY_REFERENCE
451 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
452 #undef TARGET_ARG_PARTIAL_BYTES
453 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
454 #undef TARGET_FUNCTION_ARG
455 #define TARGET_FUNCTION_ARG arm_function_arg
456 #undef TARGET_FUNCTION_ARG_ADVANCE
457 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
458 #undef TARGET_FUNCTION_ARG_BOUNDARY
459 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
460 
461 #undef  TARGET_SETUP_INCOMING_VARARGS
462 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
463 
464 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
465 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
466 
467 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
468 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
469 #undef TARGET_TRAMPOLINE_INIT
470 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
471 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
472 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
473 
474 #undef TARGET_WARN_FUNC_RETURN
475 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
476 
477 #undef TARGET_DEFAULT_SHORT_ENUMS
478 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
479 
480 #undef TARGET_ALIGN_ANON_BITFIELD
481 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
482 
483 #undef TARGET_NARROW_VOLATILE_BITFIELD
484 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
485 
486 #undef TARGET_CXX_GUARD_TYPE
487 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
488 
489 #undef TARGET_CXX_GUARD_MASK_BIT
490 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
491 
492 #undef TARGET_CXX_GET_COOKIE_SIZE
493 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
494 
495 #undef TARGET_CXX_COOKIE_HAS_SIZE
496 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
497 
498 #undef TARGET_CXX_CDTOR_RETURNS_THIS
499 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
500 
501 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
502 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
503 
504 #undef TARGET_CXX_USE_AEABI_ATEXIT
505 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
506 
507 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
508 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
509   arm_cxx_determine_class_data_visibility
510 
511 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
512 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
513 
514 #undef TARGET_RETURN_IN_MSB
515 #define TARGET_RETURN_IN_MSB arm_return_in_msb
516 
517 #undef TARGET_RETURN_IN_MEMORY
518 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
519 
520 #undef TARGET_MUST_PASS_IN_STACK
521 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
522 
523 #if ARM_UNWIND_INFO
524 #undef TARGET_ASM_UNWIND_EMIT
525 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
526 
527 /* EABI unwinding tables use a different format for the typeinfo tables.  */
528 #undef TARGET_ASM_TTYPE
529 #define TARGET_ASM_TTYPE arm_output_ttype
530 
531 #undef TARGET_ARM_EABI_UNWINDER
532 #define TARGET_ARM_EABI_UNWINDER true
533 
534 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
535 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
536 
537 #undef TARGET_ASM_INIT_SECTIONS
538 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
539 #endif /* ARM_UNWIND_INFO */
540 
541 #undef TARGET_DWARF_REGISTER_SPAN
542 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
543 
544 #undef  TARGET_CANNOT_COPY_INSN_P
545 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
546 
547 #ifdef HAVE_AS_TLS
548 #undef TARGET_HAVE_TLS
549 #define TARGET_HAVE_TLS true
550 #endif
551 
552 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
553 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
554 
555 #undef TARGET_LEGITIMATE_CONSTANT_P
556 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
557 
558 #undef TARGET_CANNOT_FORCE_CONST_MEM
559 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
560 
561 #undef TARGET_MAX_ANCHOR_OFFSET
562 #define TARGET_MAX_ANCHOR_OFFSET 4095
563 
564 /* The minimum is set such that the total size of the block
565    for a particular anchor is -4088 + 1 + 4095 bytes, which is
566    divisible by eight, ensuring natural spacing of anchors.  */
567 #undef TARGET_MIN_ANCHOR_OFFSET
568 #define TARGET_MIN_ANCHOR_OFFSET -4088
569 
570 #undef TARGET_SCHED_ISSUE_RATE
571 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
572 
573 #undef TARGET_MANGLE_TYPE
574 #define TARGET_MANGLE_TYPE arm_mangle_type
575 
576 #undef TARGET_BUILD_BUILTIN_VA_LIST
577 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
578 #undef TARGET_EXPAND_BUILTIN_VA_START
579 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
580 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
581 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
582 
583 #ifdef HAVE_AS_TLS
584 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
585 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
586 #endif
587 
588 #undef TARGET_LEGITIMATE_ADDRESS_P
589 #define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p
590 
591 #undef TARGET_PREFERRED_RELOAD_CLASS
592 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
593 
594 #undef TARGET_INVALID_PARAMETER_TYPE
595 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
596 
597 #undef TARGET_INVALID_RETURN_TYPE
598 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
599 
600 #undef TARGET_PROMOTED_TYPE
601 #define TARGET_PROMOTED_TYPE arm_promoted_type
602 
603 #undef TARGET_CONVERT_TO_TYPE
604 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
605 
606 #undef TARGET_SCALAR_MODE_SUPPORTED_P
607 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
608 
609 #undef TARGET_FRAME_POINTER_REQUIRED
610 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
611 
612 #undef TARGET_CAN_ELIMINATE
613 #define TARGET_CAN_ELIMINATE arm_can_eliminate
614 
615 #undef TARGET_CONDITIONAL_REGISTER_USAGE
616 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
617 
618 #undef TARGET_CLASS_LIKELY_SPILLED_P
619 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
620 
621 #undef TARGET_VECTOR_ALIGNMENT
622 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
623 
624 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
625 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
626   arm_vector_alignment_reachable
627 
628 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
629 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
630   arm_builtin_support_vector_misalignment
631 
632 #undef TARGET_PREFERRED_RENAME_CLASS
633 #define TARGET_PREFERRED_RENAME_CLASS \
634   arm_preferred_rename_class
635 
636 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
637 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
638   arm_vectorize_vec_perm_const_ok
639 
640 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
641 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
642   arm_builtin_vectorization_cost
643 #undef TARGET_VECTORIZE_ADD_STMT_COST
644 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
645 
646 #undef TARGET_CANONICALIZE_COMPARISON
647 #define TARGET_CANONICALIZE_COMPARISON \
648   arm_canonicalize_comparison
649 
650 struct gcc_target targetm = TARGET_INITIALIZER;
651 
652 /* Obstack for minipool constant handling.  */
653 static struct obstack minipool_obstack;
654 static char *         minipool_startobj;
655 
656 /* The maximum number of insns skipped which
657    will be conditionalised if possible.  */
658 static int max_insns_skipped = 5;
659 
660 extern FILE * asm_out_file;
661 
662 /* True if we are currently building a constant table.  */
663 int making_const_table;
664 
665 /* The processor for which instructions should be scheduled.  */
666 enum processor_type arm_tune = arm_none;
667 
668 /* The current tuning set.  */
669 const struct tune_params *current_tune;
670 
671 /* Which floating point hardware to schedule for.  */
672 int arm_fpu_attr;
673 
674 /* Which floating popint hardware to use.  */
675 const struct arm_fpu_desc *arm_fpu_desc;
676 
677 /* Used for Thumb call_via trampolines.  */
678 rtx thumb_call_via_label[14];
679 static int thumb_call_reg_needed;
680 
681 /* Bit values used to identify processor capabilities.  */
682 #define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
683 #define FL_ARCH3M     (1 << 1)        /* Extended multiply */
684 #define FL_MODE26     (1 << 2)        /* 26-bit mode support */
685 #define FL_MODE32     (1 << 3)        /* 32-bit mode support */
686 #define FL_ARCH4      (1 << 4)        /* Architecture rel 4 */
687 #define FL_ARCH5      (1 << 5)        /* Architecture rel 5 */
688 #define FL_THUMB      (1 << 6)        /* Thumb aware */
689 #define FL_LDSCHED    (1 << 7)	      /* Load scheduling necessary */
690 #define FL_STRONG     (1 << 8)	      /* StrongARM */
691 #define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
692 #define FL_XSCALE     (1 << 10)	      /* XScale */
693 /* spare	      (1 << 11)	*/
694 #define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
695 					 media instructions.  */
696 #define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
697 #define FL_WBUF	      (1 << 14)	      /* Schedule for write buffer ops.
698 					 Note: ARM6 & 7 derivatives only.  */
699 #define FL_ARCH6K     (1 << 15)       /* Architecture rel 6 K extensions.  */
700 #define FL_THUMB2     (1 << 16)	      /* Thumb-2.  */
701 #define FL_NOTM	      (1 << 17)	      /* Instructions not present in the 'M'
702 					 profile.  */
703 #define FL_THUMB_DIV  (1 << 18)	      /* Hardware divide (Thumb mode).  */
704 #define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
705 #define FL_NEON       (1 << 20)       /* Neon instructions.  */
706 #define FL_ARCH7EM    (1 << 21)	      /* Instructions present in the ARMv7E-M
707 					 architecture.  */
708 #define FL_ARCH7      (1 << 22)       /* Architecture 7.  */
709 #define FL_ARM_DIV    (1 << 23)	      /* Hardware divide (ARM mode).  */
710 #define FL_ARCH8      (1 << 24)       /* Architecture 8.  */
711 
712 #define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
713 #define FL_IWMMXT2    (1 << 30)       /* "Intel Wireless MMX2 technology".  */
714 
715 /* Flags that only effect tuning, not available instructions.  */
716 #define FL_TUNE		(FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
717 			 | FL_CO_PROC)
718 
719 #define FL_FOR_ARCH2	FL_NOTM
720 #define FL_FOR_ARCH3	(FL_FOR_ARCH2 | FL_MODE32)
721 #define FL_FOR_ARCH3M	(FL_FOR_ARCH3 | FL_ARCH3M)
722 #define FL_FOR_ARCH4	(FL_FOR_ARCH3M | FL_ARCH4)
723 #define FL_FOR_ARCH4T	(FL_FOR_ARCH4 | FL_THUMB)
724 #define FL_FOR_ARCH5	(FL_FOR_ARCH4 | FL_ARCH5)
725 #define FL_FOR_ARCH5T	(FL_FOR_ARCH5 | FL_THUMB)
726 #define FL_FOR_ARCH5E	(FL_FOR_ARCH5 | FL_ARCH5E)
727 #define FL_FOR_ARCH5TE	(FL_FOR_ARCH5E | FL_THUMB)
728 #define FL_FOR_ARCH5TEJ	FL_FOR_ARCH5TE
729 #define FL_FOR_ARCH6	(FL_FOR_ARCH5TE | FL_ARCH6)
730 #define FL_FOR_ARCH6J	FL_FOR_ARCH6
731 #define FL_FOR_ARCH6K	(FL_FOR_ARCH6 | FL_ARCH6K)
732 #define FL_FOR_ARCH6Z	FL_FOR_ARCH6
733 #define FL_FOR_ARCH6ZK	FL_FOR_ARCH6K
734 #define FL_FOR_ARCH6T2	(FL_FOR_ARCH6 | FL_THUMB2)
735 #define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
736 #define FL_FOR_ARCH7	((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
737 #define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
738 #define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_THUMB_DIV)
739 #define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_THUMB_DIV)
740 #define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
741 #define FL_FOR_ARCH8A	(FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
742 			 | FL_ARM_DIV | FL_NOTM)
743 
744 /* The bits in this mask specify which
745    instructions we are allowed to generate.  */
746 static unsigned long insn_flags = 0;
747 
748 /* The bits in this mask specify which instruction scheduling options should
749    be used.  */
750 static unsigned long tune_flags = 0;
751 
752 /* The highest ARM architecture version supported by the
753    target.  */
754 enum base_architecture arm_base_arch = BASE_ARCH_0;
755 
756 /* The following are used in the arm.md file as equivalents to bits
757    in the above two flag variables.  */
758 
759 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
760 int arm_arch3m = 0;
761 
762 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
763 int arm_arch4 = 0;
764 
765 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
766 int arm_arch4t = 0;
767 
768 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
769 int arm_arch5 = 0;
770 
771 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
772 int arm_arch5e = 0;
773 
774 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
775 int arm_arch6 = 0;
776 
777 /* Nonzero if this chip supports the ARM 6K extensions.  */
778 int arm_arch6k = 0;
779 
780 /* Nonzero if instructions present in ARMv6-M can be used.  */
781 int arm_arch6m = 0;
782 
783 /* Nonzero if this chip supports the ARM 7 extensions.  */
784 int arm_arch7 = 0;
785 
786 /* Nonzero if instructions not present in the 'M' profile can be used.  */
787 int arm_arch_notm = 0;
788 
789 /* Nonzero if instructions present in ARMv7E-M can be used.  */
790 int arm_arch7em = 0;
791 
792 /* Nonzero if instructions present in ARMv8 can be used.  */
793 int arm_arch8 = 0;
794 
795 /* Nonzero if this chip can benefit from load scheduling.  */
796 int arm_ld_sched = 0;
797 
798 /* Nonzero if this chip is a StrongARM.  */
799 int arm_tune_strongarm = 0;
800 
801 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
802 int arm_arch_iwmmxt = 0;
803 
804 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
805 int arm_arch_iwmmxt2 = 0;
806 
807 /* Nonzero if this chip is an XScale.  */
808 int arm_arch_xscale = 0;
809 
810 /* Nonzero if tuning for XScale  */
811 int arm_tune_xscale = 0;
812 
813 /* Nonzero if we want to tune for stores that access the write-buffer.
814    This typically means an ARM6 or ARM7 with MMU or MPU.  */
815 int arm_tune_wbuf = 0;
816 
817 /* Nonzero if tuning for Cortex-A9.  */
818 int arm_tune_cortex_a9 = 0;
819 
820 /* Nonzero if generating Thumb instructions.  */
821 int thumb_code = 0;
822 
823 /* Nonzero if generating Thumb-1 instructions.  */
824 int thumb1_code = 0;
825 
826 /* Nonzero if we should define __THUMB_INTERWORK__ in the
827    preprocessor.
828    XXX This is a bit of a hack, it's intended to help work around
829    problems in GLD which doesn't understand that armv5t code is
830    interworking clean.  */
831 int arm_cpp_interwork = 0;
832 
833 /* Nonzero if chip supports Thumb 2.  */
834 int arm_arch_thumb2;
835 
836 /* Nonzero if chip supports integer division instruction.  */
837 int arm_arch_arm_hwdiv;
838 int arm_arch_thumb_hwdiv;
839 
840 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
841    we must report the mode of the memory reference from
842    TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
843 enum machine_mode output_memory_reference_mode;
844 
845 /* The register number to be used for the PIC offset register.  */
846 unsigned arm_pic_register = INVALID_REGNUM;
847 
848 /* Set to 1 after arm_reorg has started.  Reset to start at the start of
849    the next function.  */
850 static int after_arm_reorg = 0;
851 
852 enum arm_pcs arm_pcs_default;
853 
854 /* For an explanation of these variables, see final_prescan_insn below.  */
855 int arm_ccfsm_state;
856 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
857 enum arm_cond_code arm_current_cc;
858 
859 rtx arm_target_insn;
860 int arm_target_label;
861 /* The number of conditionally executed insns, including the current insn.  */
862 int arm_condexec_count = 0;
863 /* A bitmask specifying the patterns for the IT block.
864    Zero means do not output an IT block before this insn. */
865 int arm_condexec_mask = 0;
866 /* The number of bits used in arm_condexec_mask.  */
867 int arm_condexec_masklen = 0;
868 
869 /* The condition codes of the ARM, and the inverse function.  */
870 static const char * const arm_condition_codes[] =
871 {
872   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
873   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
874 };
875 
876 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
877 int arm_regs_in_sequence[] =
878 {
879   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
880 };
881 
882 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
883 #define streq(string1, string2) (strcmp (string1, string2) == 0)
884 
885 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
886 				   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
887 				   | (1 << PIC_OFFSET_TABLE_REGNUM)))
888 
889 /* Initialization code.  */
890 
891 struct processors
892 {
893   const char *const name;
894   enum processor_type core;
895   const char *arch;
896   enum base_architecture base_arch;
897   const unsigned long flags;
898   const struct tune_params *const tune;
899 };
900 
901 
902 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
903 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
904   prefetch_slots, \
905   l1_size, \
906   l1_line_size
907 
908 /* arm generic vectorizer costs.  */
909 static const
910 struct cpu_vec_costs arm_default_vec_cost = {
911   1,					/* scalar_stmt_cost.  */
912   1,					/* scalar load_cost.  */
913   1,					/* scalar_store_cost.  */
914   1,					/* vec_stmt_cost.  */
915   1,					/* vec_to_scalar_cost.  */
916   1,					/* scalar_to_vec_cost.  */
917   1,					/* vec_align_load_cost.  */
918   1,					/* vec_unalign_load_cost.  */
919   1,					/* vec_unalign_store_cost.  */
920   1,					/* vec_store_cost.  */
921   3,					/* cond_taken_branch_cost.  */
922   1,					/* cond_not_taken_branch_cost.  */
923 };
924 
925 const struct tune_params arm_slowmul_tune =
926 {
927   arm_slowmul_rtx_costs,
928   NULL,
929   3,						/* Constant limit.  */
930   5,						/* Max cond insns.  */
931   ARM_PREFETCH_NOT_BENEFICIAL,
932   true,						/* Prefer constant pool.  */
933   arm_default_branch_cost,
934   false,					/* Prefer LDRD/STRD.  */
935   {true, true},					/* Prefer non short circuit.  */
936   &arm_default_vec_cost,                        /* Vectorizer costs.  */
937 };
938 
939 const struct tune_params arm_fastmul_tune =
940 {
941   arm_fastmul_rtx_costs,
942   NULL,
943   1,						/* Constant limit.  */
944   5,						/* Max cond insns.  */
945   ARM_PREFETCH_NOT_BENEFICIAL,
946   true,						/* Prefer constant pool.  */
947   arm_default_branch_cost,
948   false,					/* Prefer LDRD/STRD.  */
949   {true, true},					/* Prefer non short circuit.  */
950   &arm_default_vec_cost,                        /* Vectorizer costs.  */
951 };
952 
953 /* StrongARM has early execution of branches, so a sequence that is worth
954    skipping is shorter.  Set max_insns_skipped to a lower value.  */
955 
956 const struct tune_params arm_strongarm_tune =
957 {
958   arm_fastmul_rtx_costs,
959   NULL,
960   1,						/* Constant limit.  */
961   3,						/* Max cond insns.  */
962   ARM_PREFETCH_NOT_BENEFICIAL,
963   true,						/* Prefer constant pool.  */
964   arm_default_branch_cost,
965   false,					/* Prefer LDRD/STRD.  */
966   {true, true},					/* Prefer non short circuit.  */
967   &arm_default_vec_cost,                        /* Vectorizer costs.  */
968 };
969 
970 const struct tune_params arm_xscale_tune =
971 {
972   arm_xscale_rtx_costs,
973   xscale_sched_adjust_cost,
974   2,						/* Constant limit.  */
975   3,						/* Max cond insns.  */
976   ARM_PREFETCH_NOT_BENEFICIAL,
977   true,						/* Prefer constant pool.  */
978   arm_default_branch_cost,
979   false,					/* Prefer LDRD/STRD.  */
980   {true, true},					/* Prefer non short circuit.  */
981   &arm_default_vec_cost,                        /* Vectorizer costs.  */
982 };
983 
984 const struct tune_params arm_9e_tune =
985 {
986   arm_9e_rtx_costs,
987   NULL,
988   1,						/* Constant limit.  */
989   5,						/* Max cond insns.  */
990   ARM_PREFETCH_NOT_BENEFICIAL,
991   true,						/* Prefer constant pool.  */
992   arm_default_branch_cost,
993   false,					/* Prefer LDRD/STRD.  */
994   {true, true},					/* Prefer non short circuit.  */
995   &arm_default_vec_cost,                        /* Vectorizer costs.  */
996 };
997 
998 const struct tune_params arm_v6t2_tune =
999 {
1000   arm_9e_rtx_costs,
1001   NULL,
1002   1,						/* Constant limit.  */
1003   5,						/* Max cond insns.  */
1004   ARM_PREFETCH_NOT_BENEFICIAL,
1005   false,					/* Prefer constant pool.  */
1006   arm_default_branch_cost,
1007   false,					/* Prefer LDRD/STRD.  */
1008   {true, true},					/* Prefer non short circuit.  */
1009   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1010 };
1011 
1012 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1013 const struct tune_params arm_cortex_tune =
1014 {
1015   arm_9e_rtx_costs,
1016   NULL,
1017   1,						/* Constant limit.  */
1018   5,						/* Max cond insns.  */
1019   ARM_PREFETCH_NOT_BENEFICIAL,
1020   false,					/* Prefer constant pool.  */
1021   arm_default_branch_cost,
1022   false,					/* Prefer LDRD/STRD.  */
1023   {true, true},					/* Prefer non short circuit.  */
1024   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1025 };
1026 
1027 const struct tune_params arm_cortex_a15_tune =
1028 {
1029   arm_9e_rtx_costs,
1030   NULL,
1031   1,						/* Constant limit.  */
1032   5,						/* Max cond insns.  */
1033   ARM_PREFETCH_NOT_BENEFICIAL,
1034   false,					/* Prefer constant pool.  */
1035   arm_default_branch_cost,
1036   true,						/* Prefer LDRD/STRD.  */
1037   {true, true},					/* Prefer non short circuit.  */
1038   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1039 };
1040 
1041 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1042    less appealing.  Set max_insns_skipped to a low value.  */
1043 
1044 const struct tune_params arm_cortex_a5_tune =
1045 {
1046   arm_9e_rtx_costs,
1047   NULL,
1048   1,						/* Constant limit.  */
1049   1,						/* Max cond insns.  */
1050   ARM_PREFETCH_NOT_BENEFICIAL,
1051   false,					/* Prefer constant pool.  */
1052   arm_cortex_a5_branch_cost,
1053   false,					/* Prefer LDRD/STRD.  */
1054   {false, false},				/* Prefer non short circuit.  */
1055   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1056 };
1057 
1058 const struct tune_params arm_cortex_a9_tune =
1059 {
1060   arm_9e_rtx_costs,
1061   cortex_a9_sched_adjust_cost,
1062   1,						/* Constant limit.  */
1063   5,						/* Max cond insns.  */
1064   ARM_PREFETCH_BENEFICIAL(4,32,32),
1065   false,					/* Prefer constant pool.  */
1066   arm_default_branch_cost,
1067   false,					/* Prefer LDRD/STRD.  */
1068   {true, true},					/* Prefer non short circuit.  */
1069   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1070 };
1071 
1072 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1073    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
1074 const struct tune_params arm_v6m_tune =
1075 {
1076   arm_9e_rtx_costs,
1077   NULL,
1078   1,						/* Constant limit.  */
1079   5,						/* Max cond insns.  */
1080   ARM_PREFETCH_NOT_BENEFICIAL,
1081   false,					/* Prefer constant pool.  */
1082   arm_default_branch_cost,
1083   false,					/* Prefer LDRD/STRD.  */
1084   {false, false},				/* Prefer non short circuit.  */
1085   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1086 };
1087 
1088 const struct tune_params arm_fa726te_tune =
1089 {
1090   arm_9e_rtx_costs,
1091   fa726te_sched_adjust_cost,
1092   1,						/* Constant limit.  */
1093   5,						/* Max cond insns.  */
1094   ARM_PREFETCH_NOT_BENEFICIAL,
1095   true,						/* Prefer constant pool.  */
1096   arm_default_branch_cost,
1097   false,					/* Prefer LDRD/STRD.  */
1098   {true, true},					/* Prefer non short circuit.  */
1099   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1100 };
1101 
1102 
1103 /* Not all of these give usefully different compilation alternatives,
1104    but there is no simple way of generalizing them.  */
1105 static const struct processors all_cores[] =
1106 {
1107   /* ARM Cores */
1108 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1109   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,	  \
1110     FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1111 #include "arm-cores.def"
1112 #undef ARM_CORE
1113   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1114 };
1115 
1116 static const struct processors all_architectures[] =
1117 {
1118   /* ARM Architectures */
1119   /* We don't specify tuning costs here as it will be figured out
1120      from the core.  */
1121 
1122 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1123   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1124 #include "arm-arches.def"
1125 #undef ARM_ARCH
1126   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1127 };
1128 
1129 
1130 /* These are populated as commandline arguments are processed, or NULL
1131    if not specified.  */
1132 static const struct processors *arm_selected_arch;
1133 static const struct processors *arm_selected_cpu;
1134 static const struct processors *arm_selected_tune;
1135 
1136 /* The name of the preprocessor macro to define for this architecture.  */
1137 
1138 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1139 
1140 /* Available values for -mfpu=.  */
1141 
1142 static const struct arm_fpu_desc all_fpus[] =
1143 {
1144 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1145   { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1146 #include "arm-fpus.def"
1147 #undef ARM_FPU
1148 };
1149 
1150 
1151 /* Supported TLS relocations.  */
1152 
1153 enum tls_reloc {
1154   TLS_GD32,
1155   TLS_LDM32,
1156   TLS_LDO32,
1157   TLS_IE32,
1158   TLS_LE32,
1159   TLS_DESCSEQ	/* GNU scheme */
1160 };
1161 
1162 /* The maximum number of insns to be used when loading a constant.  */
1163 inline static int
1164 arm_constant_limit (bool size_p)
1165 {
1166   return size_p ? 1 : current_tune->constant_limit;
1167 }
1168 
1169 /* Emit an insn that's a simple single-set.  Both the operands must be known
1170    to be valid.  */
1171 inline static rtx
1172 emit_set_insn (rtx x, rtx y)
1173 {
1174   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1175 }
1176 
1177 /* Return the number of bits set in VALUE.  */
1178 static unsigned
1179 bit_count (unsigned long value)
1180 {
1181   unsigned long count = 0;
1182 
1183   while (value)
1184     {
1185       count++;
1186       value &= value - 1;  /* Clear the least-significant set bit.  */
1187     }
1188 
1189   return count;
1190 }
1191 
1192 typedef struct
1193 {
1194   enum machine_mode mode;
1195   const char *name;
1196 } arm_fixed_mode_set;
1197 
1198 /* A small helper for setting fixed-point library libfuncs.  */
1199 
1200 static void
1201 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1202 			     const char *funcname, const char *modename,
1203 			     int num_suffix)
1204 {
1205   char buffer[50];
1206 
1207   if (num_suffix == 0)
1208     sprintf (buffer, "__gnu_%s%s", funcname, modename);
1209   else
1210     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1211 
1212   set_optab_libfunc (optable, mode, buffer);
1213 }
1214 
1215 static void
1216 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1217 			    enum machine_mode from, const char *funcname,
1218 			    const char *toname, const char *fromname)
1219 {
1220   char buffer[50];
1221   const char *maybe_suffix_2 = "";
1222 
1223   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
1224   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1225       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1226       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1227     maybe_suffix_2 = "2";
1228 
1229   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1230 	   maybe_suffix_2);
1231 
1232   set_conv_libfunc (optable, to, from, buffer);
1233 }
1234 
1235 /* Set up library functions unique to ARM.  */
1236 
1237 static void
1238 arm_init_libfuncs (void)
1239 {
1240   /* For Linux, we have access to kernel support for atomic operations.  */
1241   if (arm_abi == ARM_ABI_AAPCS_LINUX)
1242     init_sync_libfuncs (2 * UNITS_PER_WORD);
1243 
1244   /* There are no special library functions unless we are using the
1245      ARM BPABI.  */
1246   if (!TARGET_BPABI)
1247     return;
1248 
1249   /* The functions below are described in Section 4 of the "Run-Time
1250      ABI for the ARM architecture", Version 1.0.  */
1251 
1252   /* Double-precision floating-point arithmetic.  Table 2.  */
1253   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1254   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1255   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1256   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1257   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1258 
1259   /* Double-precision comparisons.  Table 3.  */
1260   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1261   set_optab_libfunc (ne_optab, DFmode, NULL);
1262   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1263   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1264   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1265   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1266   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1267 
1268   /* Single-precision floating-point arithmetic.  Table 4.  */
1269   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1270   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1271   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1272   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1273   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1274 
1275   /* Single-precision comparisons.  Table 5.  */
1276   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1277   set_optab_libfunc (ne_optab, SFmode, NULL);
1278   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1279   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1280   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1281   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1282   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1283 
1284   /* Floating-point to integer conversions.  Table 6.  */
1285   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1286   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1287   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1288   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1289   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1290   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1291   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1292   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1293 
1294   /* Conversions between floating types.  Table 7.  */
1295   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1296   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1297 
1298   /* Integer to floating-point conversions.  Table 8.  */
1299   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1300   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1301   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1302   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1303   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1304   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1305   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1306   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1307 
1308   /* Long long.  Table 9.  */
1309   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1310   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1311   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1312   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1313   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1314   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1315   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1316   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1317 
1318   /* Integer (32/32->32) division.  \S 4.3.1.  */
1319   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1320   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1321 
1322   /* The divmod functions are designed so that they can be used for
1323      plain division, even though they return both the quotient and the
1324      remainder.  The quotient is returned in the usual location (i.e.,
1325      r0 for SImode, {r0, r1} for DImode), just as would be expected
1326      for an ordinary division routine.  Because the AAPCS calling
1327      conventions specify that all of { r0, r1, r2, r3 } are
1328      callee-saved registers, there is no need to tell the compiler
1329      explicitly that those registers are clobbered by these
1330      routines.  */
1331   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1332   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1333 
1334   /* For SImode division the ABI provides div-without-mod routines,
1335      which are faster.  */
1336   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1337   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1338 
1339   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
1340      divmod libcalls instead.  */
1341   set_optab_libfunc (smod_optab, DImode, NULL);
1342   set_optab_libfunc (umod_optab, DImode, NULL);
1343   set_optab_libfunc (smod_optab, SImode, NULL);
1344   set_optab_libfunc (umod_optab, SImode, NULL);
1345 
1346   /* Half-precision float operations.  The compiler handles all operations
1347      with NULL libfuncs by converting the SFmode.  */
1348   switch (arm_fp16_format)
1349     {
1350     case ARM_FP16_FORMAT_IEEE:
1351     case ARM_FP16_FORMAT_ALTERNATIVE:
1352 
1353       /* Conversions.  */
1354       set_conv_libfunc (trunc_optab, HFmode, SFmode,
1355 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
1356 			 ? "__gnu_f2h_ieee"
1357 			 : "__gnu_f2h_alternative"));
1358       set_conv_libfunc (sext_optab, SFmode, HFmode,
1359 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
1360 			 ? "__gnu_h2f_ieee"
1361 			 : "__gnu_h2f_alternative"));
1362 
1363       /* Arithmetic.  */
1364       set_optab_libfunc (add_optab, HFmode, NULL);
1365       set_optab_libfunc (sdiv_optab, HFmode, NULL);
1366       set_optab_libfunc (smul_optab, HFmode, NULL);
1367       set_optab_libfunc (neg_optab, HFmode, NULL);
1368       set_optab_libfunc (sub_optab, HFmode, NULL);
1369 
1370       /* Comparisons.  */
1371       set_optab_libfunc (eq_optab, HFmode, NULL);
1372       set_optab_libfunc (ne_optab, HFmode, NULL);
1373       set_optab_libfunc (lt_optab, HFmode, NULL);
1374       set_optab_libfunc (le_optab, HFmode, NULL);
1375       set_optab_libfunc (ge_optab, HFmode, NULL);
1376       set_optab_libfunc (gt_optab, HFmode, NULL);
1377       set_optab_libfunc (unord_optab, HFmode, NULL);
1378       break;
1379 
1380     default:
1381       break;
1382     }
1383 
1384   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
1385   {
1386     const arm_fixed_mode_set fixed_arith_modes[] =
1387       {
1388 	{ QQmode, "qq" },
1389 	{ UQQmode, "uqq" },
1390 	{ HQmode, "hq" },
1391 	{ UHQmode, "uhq" },
1392 	{ SQmode, "sq" },
1393 	{ USQmode, "usq" },
1394 	{ DQmode, "dq" },
1395 	{ UDQmode, "udq" },
1396 	{ TQmode, "tq" },
1397 	{ UTQmode, "utq" },
1398 	{ HAmode, "ha" },
1399 	{ UHAmode, "uha" },
1400 	{ SAmode, "sa" },
1401 	{ USAmode, "usa" },
1402 	{ DAmode, "da" },
1403 	{ UDAmode, "uda" },
1404 	{ TAmode, "ta" },
1405 	{ UTAmode, "uta" }
1406       };
1407     const arm_fixed_mode_set fixed_conv_modes[] =
1408       {
1409 	{ QQmode, "qq" },
1410 	{ UQQmode, "uqq" },
1411 	{ HQmode, "hq" },
1412 	{ UHQmode, "uhq" },
1413 	{ SQmode, "sq" },
1414 	{ USQmode, "usq" },
1415 	{ DQmode, "dq" },
1416 	{ UDQmode, "udq" },
1417 	{ TQmode, "tq" },
1418 	{ UTQmode, "utq" },
1419 	{ HAmode, "ha" },
1420 	{ UHAmode, "uha" },
1421 	{ SAmode, "sa" },
1422 	{ USAmode, "usa" },
1423 	{ DAmode, "da" },
1424 	{ UDAmode, "uda" },
1425 	{ TAmode, "ta" },
1426 	{ UTAmode, "uta" },
1427 	{ QImode, "qi" },
1428 	{ HImode, "hi" },
1429 	{ SImode, "si" },
1430 	{ DImode, "di" },
1431 	{ TImode, "ti" },
1432 	{ SFmode, "sf" },
1433 	{ DFmode, "df" }
1434       };
1435     unsigned int i, j;
1436 
1437     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1438       {
1439 	arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1440 				     "add", fixed_arith_modes[i].name, 3);
1441 	arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1442 				     "ssadd", fixed_arith_modes[i].name, 3);
1443 	arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1444 				     "usadd", fixed_arith_modes[i].name, 3);
1445 	arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1446 				     "sub", fixed_arith_modes[i].name, 3);
1447 	arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1448 				     "sssub", fixed_arith_modes[i].name, 3);
1449 	arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1450 				     "ussub", fixed_arith_modes[i].name, 3);
1451 	arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1452 				     "mul", fixed_arith_modes[i].name, 3);
1453 	arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1454 				     "ssmul", fixed_arith_modes[i].name, 3);
1455 	arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1456 				     "usmul", fixed_arith_modes[i].name, 3);
1457 	arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1458 				     "div", fixed_arith_modes[i].name, 3);
1459 	arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1460 				     "udiv", fixed_arith_modes[i].name, 3);
1461 	arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1462 				     "ssdiv", fixed_arith_modes[i].name, 3);
1463 	arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1464 				     "usdiv", fixed_arith_modes[i].name, 3);
1465 	arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1466 				     "neg", fixed_arith_modes[i].name, 2);
1467 	arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1468 				     "ssneg", fixed_arith_modes[i].name, 2);
1469 	arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1470 				     "usneg", fixed_arith_modes[i].name, 2);
1471 	arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1472 				     "ashl", fixed_arith_modes[i].name, 3);
1473 	arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1474 				     "ashr", fixed_arith_modes[i].name, 3);
1475 	arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1476 				     "lshr", fixed_arith_modes[i].name, 3);
1477 	arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1478 				     "ssashl", fixed_arith_modes[i].name, 3);
1479 	arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1480 				     "usashl", fixed_arith_modes[i].name, 3);
1481 	arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1482 				     "cmp", fixed_arith_modes[i].name, 2);
1483       }
1484 
1485     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1486       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1487 	{
1488 	  if (i == j
1489 	      || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1490 		  && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1491 	    continue;
1492 
1493 	  arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1494 				      fixed_conv_modes[j].mode, "fract",
1495 				      fixed_conv_modes[i].name,
1496 				      fixed_conv_modes[j].name);
1497 	  arm_set_fixed_conv_libfunc (satfract_optab,
1498 				      fixed_conv_modes[i].mode,
1499 				      fixed_conv_modes[j].mode, "satfract",
1500 				      fixed_conv_modes[i].name,
1501 				      fixed_conv_modes[j].name);
1502 	  arm_set_fixed_conv_libfunc (fractuns_optab,
1503 				      fixed_conv_modes[i].mode,
1504 				      fixed_conv_modes[j].mode, "fractuns",
1505 				      fixed_conv_modes[i].name,
1506 				      fixed_conv_modes[j].name);
1507 	  arm_set_fixed_conv_libfunc (satfractuns_optab,
1508 				      fixed_conv_modes[i].mode,
1509 				      fixed_conv_modes[j].mode, "satfractuns",
1510 				      fixed_conv_modes[i].name,
1511 				      fixed_conv_modes[j].name);
1512 	}
1513   }
1514 
1515   if (TARGET_AAPCS_BASED)
1516     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1517 }
1518 
1519 /* On AAPCS systems, this is the "struct __va_list".  */
1520 static GTY(()) tree va_list_type;
1521 
1522 /* Return the type to use as __builtin_va_list.  */
1523 static tree
1524 arm_build_builtin_va_list (void)
1525 {
1526   tree va_list_name;
1527   tree ap_field;
1528 
1529   if (!TARGET_AAPCS_BASED)
1530     return std_build_builtin_va_list ();
1531 
1532   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1533      defined as:
1534 
1535        struct __va_list
1536        {
1537 	 void *__ap;
1538        };
1539 
1540      The C Library ABI further reinforces this definition in \S
1541      4.1.
1542 
1543      We must follow this definition exactly.  The structure tag
1544      name is visible in C++ mangled names, and thus forms a part
1545      of the ABI.  The field name may be used by people who
1546      #include <stdarg.h>.  */
1547   /* Create the type.  */
1548   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1549   /* Give it the required name.  */
1550   va_list_name = build_decl (BUILTINS_LOCATION,
1551 			     TYPE_DECL,
1552 			     get_identifier ("__va_list"),
1553 			     va_list_type);
1554   DECL_ARTIFICIAL (va_list_name) = 1;
1555   TYPE_NAME (va_list_type) = va_list_name;
1556   TYPE_STUB_DECL (va_list_type) = va_list_name;
1557   /* Create the __ap field.  */
1558   ap_field = build_decl (BUILTINS_LOCATION,
1559 			 FIELD_DECL,
1560 			 get_identifier ("__ap"),
1561 			 ptr_type_node);
1562   DECL_ARTIFICIAL (ap_field) = 1;
1563   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1564   TYPE_FIELDS (va_list_type) = ap_field;
1565   /* Compute its layout.  */
1566   layout_type (va_list_type);
1567 
1568   return va_list_type;
1569 }
1570 
1571 /* Return an expression of type "void *" pointing to the next
1572    available argument in a variable-argument list.  VALIST is the
1573    user-level va_list object, of type __builtin_va_list.  */
1574 static tree
1575 arm_extract_valist_ptr (tree valist)
1576 {
1577   if (TREE_TYPE (valist) == error_mark_node)
1578     return error_mark_node;
1579 
1580   /* On an AAPCS target, the pointer is stored within "struct
1581      va_list".  */
1582   if (TARGET_AAPCS_BASED)
1583     {
1584       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1585       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1586 		       valist, ap_field, NULL_TREE);
1587     }
1588 
1589   return valist;
1590 }
1591 
1592 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
1593 static void
1594 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1595 {
1596   valist = arm_extract_valist_ptr (valist);
1597   std_expand_builtin_va_start (valist, nextarg);
1598 }
1599 
1600 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
1601 static tree
1602 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1603 			  gimple_seq *post_p)
1604 {
1605   valist = arm_extract_valist_ptr (valist);
1606   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1607 }
1608 
1609 /* Fix up any incompatible options that the user has specified.  */
1610 static void
1611 arm_option_override (void)
1612 {
1613   if (global_options_set.x_arm_arch_option)
1614     arm_selected_arch = &all_architectures[arm_arch_option];
1615 
1616   if (global_options_set.x_arm_cpu_option)
1617     arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1618 
1619   if (global_options_set.x_arm_tune_option)
1620     arm_selected_tune = &all_cores[(int) arm_tune_option];
1621 
1622 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1623   SUBTARGET_OVERRIDE_OPTIONS;
1624 #endif
1625 
1626   if (arm_selected_arch)
1627     {
1628       if (arm_selected_cpu)
1629 	{
1630 	  /* Check for conflict between mcpu and march.  */
1631 	  if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1632 	    {
1633 	      warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1634 		       arm_selected_cpu->name, arm_selected_arch->name);
1635 	      /* -march wins for code generation.
1636 	         -mcpu wins for default tuning.  */
1637 	      if (!arm_selected_tune)
1638 		arm_selected_tune = arm_selected_cpu;
1639 
1640 	      arm_selected_cpu = arm_selected_arch;
1641 	    }
1642 	  else
1643 	    /* -mcpu wins.  */
1644 	    arm_selected_arch = NULL;
1645 	}
1646       else
1647 	/* Pick a CPU based on the architecture.  */
1648 	arm_selected_cpu = arm_selected_arch;
1649     }
1650 
1651   /* If the user did not specify a processor, choose one for them.  */
1652   if (!arm_selected_cpu)
1653     {
1654       const struct processors * sel;
1655       unsigned int        sought;
1656 
1657       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1658       if (!arm_selected_cpu->name)
1659 	{
1660 #ifdef SUBTARGET_CPU_DEFAULT
1661 	  /* Use the subtarget default CPU if none was specified by
1662 	     configure.  */
1663 	  arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1664 #endif
1665 	  /* Default to ARM6.  */
1666 	  if (!arm_selected_cpu->name)
1667 	    arm_selected_cpu = &all_cores[arm6];
1668 	}
1669 
1670       sel = arm_selected_cpu;
1671       insn_flags = sel->flags;
1672 
1673       /* Now check to see if the user has specified some command line
1674 	 switch that require certain abilities from the cpu.  */
1675       sought = 0;
1676 
1677       if (TARGET_INTERWORK || TARGET_THUMB)
1678 	{
1679 	  sought |= (FL_THUMB | FL_MODE32);
1680 
1681 	  /* There are no ARM processors that support both APCS-26 and
1682 	     interworking.  Therefore we force FL_MODE26 to be removed
1683 	     from insn_flags here (if it was set), so that the search
1684 	     below will always be able to find a compatible processor.  */
1685 	  insn_flags &= ~FL_MODE26;
1686 	}
1687 
1688       if (sought != 0 && ((sought & insn_flags) != sought))
1689 	{
1690 	  /* Try to locate a CPU type that supports all of the abilities
1691 	     of the default CPU, plus the extra abilities requested by
1692 	     the user.  */
1693 	  for (sel = all_cores; sel->name != NULL; sel++)
1694 	    if ((sel->flags & sought) == (sought | insn_flags))
1695 	      break;
1696 
1697 	  if (sel->name == NULL)
1698 	    {
1699 	      unsigned current_bit_count = 0;
1700 	      const struct processors * best_fit = NULL;
1701 
1702 	      /* Ideally we would like to issue an error message here
1703 		 saying that it was not possible to find a CPU compatible
1704 		 with the default CPU, but which also supports the command
1705 		 line options specified by the programmer, and so they
1706 		 ought to use the -mcpu=<name> command line option to
1707 		 override the default CPU type.
1708 
1709 		 If we cannot find a cpu that has both the
1710 		 characteristics of the default cpu and the given
1711 		 command line options we scan the array again looking
1712 		 for a best match.  */
1713 	      for (sel = all_cores; sel->name != NULL; sel++)
1714 		if ((sel->flags & sought) == sought)
1715 		  {
1716 		    unsigned count;
1717 
1718 		    count = bit_count (sel->flags & insn_flags);
1719 
1720 		    if (count >= current_bit_count)
1721 		      {
1722 			best_fit = sel;
1723 			current_bit_count = count;
1724 		      }
1725 		  }
1726 
1727 	      gcc_assert (best_fit);
1728 	      sel = best_fit;
1729 	    }
1730 
1731 	  arm_selected_cpu = sel;
1732 	}
1733     }
1734 
1735   gcc_assert (arm_selected_cpu);
1736   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
1737   if (!arm_selected_tune)
1738     arm_selected_tune = &all_cores[arm_selected_cpu->core];
1739 
1740   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1741   insn_flags = arm_selected_cpu->flags;
1742   arm_base_arch = arm_selected_cpu->base_arch;
1743 
1744   arm_tune = arm_selected_tune->core;
1745   tune_flags = arm_selected_tune->flags;
1746   current_tune = arm_selected_tune->tune;
1747 
1748   /* Make sure that the processor choice does not conflict with any of the
1749      other command line choices.  */
1750   if (TARGET_ARM && !(insn_flags & FL_NOTM))
1751     error ("target CPU does not support ARM mode");
1752 
1753   /* BPABI targets use linker tricks to allow interworking on cores
1754      without thumb support.  */
1755   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1756     {
1757       warning (0, "target CPU does not support interworking" );
1758       target_flags &= ~MASK_INTERWORK;
1759     }
1760 
1761   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1762     {
1763       warning (0, "target CPU does not support THUMB instructions");
1764       target_flags &= ~MASK_THUMB;
1765     }
1766 
1767   if (TARGET_APCS_FRAME && TARGET_THUMB)
1768     {
1769       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1770       target_flags &= ~MASK_APCS_FRAME;
1771     }
1772 
1773   /* Callee super interworking implies thumb interworking.  Adding
1774      this to the flags here simplifies the logic elsewhere.  */
1775   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1776     target_flags |= MASK_INTERWORK;
1777 
1778   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1779      from here where no function is being compiled currently.  */
1780   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1781     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1782 
1783   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1784     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1785 
1786   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1787     {
1788       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1789       target_flags |= MASK_APCS_FRAME;
1790     }
1791 
1792   if (TARGET_POKE_FUNCTION_NAME)
1793     target_flags |= MASK_APCS_FRAME;
1794 
1795   if (TARGET_APCS_REENT && flag_pic)
1796     error ("-fpic and -mapcs-reent are incompatible");
1797 
1798   if (TARGET_APCS_REENT)
1799     warning (0, "APCS reentrant code not supported.  Ignored");
1800 
1801   /* If this target is normally configured to use APCS frames, warn if they
1802      are turned off and debugging is turned on.  */
1803   if (TARGET_ARM
1804       && write_symbols != NO_DEBUG
1805       && !TARGET_APCS_FRAME
1806       && (TARGET_DEFAULT & MASK_APCS_FRAME))
1807     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1808 
1809   if (TARGET_APCS_FLOAT)
1810     warning (0, "passing floating point arguments in fp regs not yet supported");
1811 
1812   if (TARGET_LITTLE_WORDS)
1813     warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1814 	     "will be removed in a future release");
1815 
1816   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
1817   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1818   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1819   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1820   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1821   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1822   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1823   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1824   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1825   arm_arch6m = arm_arch6 && !arm_arch_notm;
1826   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1827   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1828   arm_arch8 = (insn_flags & FL_ARCH8) != 0;
1829   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1830   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1831 
1832   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1833   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1834   thumb_code = TARGET_ARM == 0;
1835   thumb1_code = TARGET_THUMB1 != 0;
1836   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1837   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1838   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1839   arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1840   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1841   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1842   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1843 
1844   /* If we are not using the default (ARM mode) section anchor offset
1845      ranges, then set the correct ranges now.  */
1846   if (TARGET_THUMB1)
1847     {
1848       /* Thumb-1 LDR instructions cannot have negative offsets.
1849          Permissible positive offset ranges are 5-bit (for byte loads),
1850          6-bit (for halfword loads), or 7-bit (for word loads).
1851          Empirical results suggest a 7-bit anchor range gives the best
1852          overall code size.  */
1853       targetm.min_anchor_offset = 0;
1854       targetm.max_anchor_offset = 127;
1855     }
1856   else if (TARGET_THUMB2)
1857     {
1858       /* The minimum is set such that the total size of the block
1859          for a particular anchor is 248 + 1 + 4095 bytes, which is
1860          divisible by eight, ensuring natural spacing of anchors.  */
1861       targetm.min_anchor_offset = -248;
1862       targetm.max_anchor_offset = 4095;
1863     }
1864 
1865   /* V5 code we generate is completely interworking capable, so we turn off
1866      TARGET_INTERWORK here to avoid many tests later on.  */
1867 
1868   /* XXX However, we must pass the right pre-processor defines to CPP
1869      or GLD can get confused.  This is a hack.  */
1870   if (TARGET_INTERWORK)
1871     arm_cpp_interwork = 1;
1872 
1873   if (arm_arch5)
1874     target_flags &= ~MASK_INTERWORK;
1875 
1876   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1877     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1878 
1879   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1880     error ("iwmmxt abi requires an iwmmxt capable cpu");
1881 
1882   if (!global_options_set.x_arm_fpu_index)
1883     {
1884       const char *target_fpu_name;
1885       bool ok;
1886 
1887 #ifdef FPUTYPE_DEFAULT
1888       target_fpu_name = FPUTYPE_DEFAULT;
1889 #else
1890       target_fpu_name = "vfp";
1891 #endif
1892 
1893       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1894 				  CL_TARGET);
1895       gcc_assert (ok);
1896     }
1897 
1898   arm_fpu_desc = &all_fpus[arm_fpu_index];
1899 
1900   switch (arm_fpu_desc->model)
1901     {
1902     case ARM_FP_MODEL_VFP:
1903       arm_fpu_attr = FPU_VFP;
1904       break;
1905 
1906     default:
1907       gcc_unreachable();
1908     }
1909 
1910   if (TARGET_AAPCS_BASED)
1911     {
1912       if (TARGET_CALLER_INTERWORKING)
1913 	error ("AAPCS does not support -mcaller-super-interworking");
1914       else
1915 	if (TARGET_CALLEE_INTERWORKING)
1916 	  error ("AAPCS does not support -mcallee-super-interworking");
1917     }
1918 
1919   /* iWMMXt and NEON are incompatible.  */
1920   if (TARGET_IWMMXT && TARGET_NEON)
1921     error ("iWMMXt and NEON are incompatible");
1922 
1923   /* iWMMXt unsupported under Thumb mode.  */
1924   if (TARGET_THUMB && TARGET_IWMMXT)
1925     error ("iWMMXt unsupported under Thumb mode");
1926 
1927   /* __fp16 support currently assumes the core has ldrh.  */
1928   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1929     sorry ("__fp16 and no ldrh");
1930 
1931   /* If soft-float is specified then don't use FPU.  */
1932   if (TARGET_SOFT_FLOAT)
1933     arm_fpu_attr = FPU_NONE;
1934 
1935   if (TARGET_AAPCS_BASED)
1936     {
1937       if (arm_abi == ARM_ABI_IWMMXT)
1938 	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1939       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1940 	       && TARGET_HARD_FLOAT
1941 	       && TARGET_VFP)
1942 	arm_pcs_default = ARM_PCS_AAPCS_VFP;
1943       else
1944 	arm_pcs_default = ARM_PCS_AAPCS;
1945     }
1946   else
1947     {
1948       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1949 	sorry ("-mfloat-abi=hard and VFP");
1950 
1951       if (arm_abi == ARM_ABI_APCS)
1952 	arm_pcs_default = ARM_PCS_APCS;
1953       else
1954 	arm_pcs_default = ARM_PCS_ATPCS;
1955     }
1956 
1957   /* For arm2/3 there is no need to do any scheduling if we are doing
1958      software floating-point.  */
1959   if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1960     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1961 
1962   /* Use the cp15 method if it is available.  */
1963   if (target_thread_pointer == TP_AUTO)
1964     {
1965       if (arm_arch6k && !TARGET_THUMB1)
1966 	target_thread_pointer = TP_CP15;
1967       else
1968 	target_thread_pointer = TP_SOFT;
1969     }
1970 
1971   if (TARGET_HARD_TP && TARGET_THUMB1)
1972     error ("can not use -mtp=cp15 with 16-bit Thumb");
1973 
1974   /* Override the default structure alignment for AAPCS ABI.  */
1975   if (!global_options_set.x_arm_structure_size_boundary)
1976     {
1977       if (TARGET_AAPCS_BASED)
1978 	arm_structure_size_boundary = 8;
1979     }
1980   else
1981     {
1982       if (arm_structure_size_boundary != 8
1983 	  && arm_structure_size_boundary != 32
1984 	  && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1985 	{
1986 	  if (ARM_DOUBLEWORD_ALIGN)
1987 	    warning (0,
1988 		     "structure size boundary can only be set to 8, 32 or 64");
1989 	  else
1990 	    warning (0, "structure size boundary can only be set to 8 or 32");
1991 	  arm_structure_size_boundary
1992 	    = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1993 	}
1994     }
1995 
1996   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1997     {
1998       error ("RTP PIC is incompatible with Thumb");
1999       flag_pic = 0;
2000     }
2001 
2002   /* If stack checking is disabled, we can use r10 as the PIC register,
2003      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
2004   if (flag_pic && TARGET_SINGLE_PIC_BASE)
2005     {
2006       if (TARGET_VXWORKS_RTP)
2007 	warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2008       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2009     }
2010 
2011   if (flag_pic && TARGET_VXWORKS_RTP)
2012     arm_pic_register = 9;
2013 
2014   if (arm_pic_register_string != NULL)
2015     {
2016       int pic_register = decode_reg_name (arm_pic_register_string);
2017 
2018       if (!flag_pic)
2019 	warning (0, "-mpic-register= is useless without -fpic");
2020 
2021       /* Prevent the user from choosing an obviously stupid PIC register.  */
2022       else if (pic_register < 0 || call_used_regs[pic_register]
2023 	       || pic_register == HARD_FRAME_POINTER_REGNUM
2024 	       || pic_register == STACK_POINTER_REGNUM
2025 	       || pic_register >= PC_REGNUM
2026 	       || (TARGET_VXWORKS_RTP
2027 		   && (unsigned int) pic_register != arm_pic_register))
2028 	error ("unable to use '%s' for PIC register", arm_pic_register_string);
2029       else
2030 	arm_pic_register = pic_register;
2031     }
2032 
2033   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
2034   if (fix_cm3_ldrd == 2)
2035     {
2036       if (arm_selected_cpu->core == cortexm3)
2037 	fix_cm3_ldrd = 1;
2038       else
2039 	fix_cm3_ldrd = 0;
2040     }
2041 
2042   /* Enable -munaligned-access by default for
2043      - all ARMv6 architecture-based processors
2044      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2045      - ARMv8 architecture-base processors.
2046 
2047      Disable -munaligned-access by default for
2048      - all pre-ARMv6 architecture-based processors
2049      - ARMv6-M architecture-based processors.  */
2050 
2051   if (unaligned_access == 2)
2052     {
2053       if (arm_arch6 && (arm_arch_notm || arm_arch7))
2054 	unaligned_access = 1;
2055       else
2056 	unaligned_access = 0;
2057     }
2058   else if (unaligned_access == 1
2059 	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2060     {
2061       warning (0, "target CPU does not support unaligned accesses");
2062       unaligned_access = 0;
2063     }
2064 
2065   if (TARGET_THUMB1 && flag_schedule_insns)
2066     {
2067       /* Don't warn since it's on by default in -O2.  */
2068       flag_schedule_insns = 0;
2069     }
2070 
2071   if (optimize_size)
2072     {
2073       /* If optimizing for size, bump the number of instructions that we
2074          are prepared to conditionally execute (even on a StrongARM).  */
2075       max_insns_skipped = 6;
2076     }
2077   else
2078     max_insns_skipped = current_tune->max_insns_skipped;
2079 
2080   /* Hot/Cold partitioning is not currently supported, since we can't
2081      handle literal pool placement in that case.  */
2082   if (flag_reorder_blocks_and_partition)
2083     {
2084       inform (input_location,
2085 	      "-freorder-blocks-and-partition not supported on this architecture");
2086       flag_reorder_blocks_and_partition = 0;
2087       flag_reorder_blocks = 1;
2088     }
2089 
2090   if (flag_pic)
2091     /* Hoisting PIC address calculations more aggressively provides a small,
2092        but measurable, size reduction for PIC code.  Therefore, we decrease
2093        the bar for unrestricted expression hoisting to the cost of PIC address
2094        calculation, which is 2 instructions.  */
2095     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2096 			   global_options.x_param_values,
2097 			   global_options_set.x_param_values);
2098 
2099   /* ARM EABI defaults to strict volatile bitfields.  */
2100   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2101       && abi_version_at_least(2))
2102     flag_strict_volatile_bitfields = 1;
2103 
2104   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2105      it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
2106   if (flag_prefetch_loop_arrays < 0
2107       && HAVE_prefetch
2108       && optimize >= 3
2109       && current_tune->num_prefetch_slots > 0)
2110     flag_prefetch_loop_arrays = 1;
2111 
2112   /* Set up parameters to be used in prefetching algorithm.  Do not override the
2113      defaults unless we are tuning for a core we have researched values for.  */
2114   if (current_tune->num_prefetch_slots > 0)
2115     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2116                            current_tune->num_prefetch_slots,
2117                            global_options.x_param_values,
2118                            global_options_set.x_param_values);
2119   if (current_tune->l1_cache_line_size >= 0)
2120     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2121                            current_tune->l1_cache_line_size,
2122                            global_options.x_param_values,
2123                            global_options_set.x_param_values);
2124   if (current_tune->l1_cache_size >= 0)
2125     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2126                            current_tune->l1_cache_size,
2127                            global_options.x_param_values,
2128                            global_options_set.x_param_values);
2129 
2130   /* Use the alternative scheduling-pressure algorithm by default.  */
2131   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2132                          global_options.x_param_values,
2133                          global_options_set.x_param_values);
2134 
2135   /* Register global variables with the garbage collector.  */
2136   arm_add_gc_roots ();
2137 }
2138 
2139 static void
2140 arm_add_gc_roots (void)
2141 {
2142   gcc_obstack_init(&minipool_obstack);
2143   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2144 }
2145 
2146 /* A table of known ARM exception types.
2147    For use with the interrupt function attribute.  */
2148 
2149 typedef struct
2150 {
2151   const char *const arg;
2152   const unsigned long return_value;
2153 }
2154 isr_attribute_arg;
2155 
2156 static const isr_attribute_arg isr_attribute_args [] =
2157 {
2158   { "IRQ",   ARM_FT_ISR },
2159   { "irq",   ARM_FT_ISR },
2160   { "FIQ",   ARM_FT_FIQ },
2161   { "fiq",   ARM_FT_FIQ },
2162   { "ABORT", ARM_FT_ISR },
2163   { "abort", ARM_FT_ISR },
2164   { "ABORT", ARM_FT_ISR },
2165   { "abort", ARM_FT_ISR },
2166   { "UNDEF", ARM_FT_EXCEPTION },
2167   { "undef", ARM_FT_EXCEPTION },
2168   { "SWI",   ARM_FT_EXCEPTION },
2169   { "swi",   ARM_FT_EXCEPTION },
2170   { NULL,    ARM_FT_NORMAL }
2171 };
2172 
2173 /* Returns the (interrupt) function type of the current
2174    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
2175 
2176 static unsigned long
2177 arm_isr_value (tree argument)
2178 {
2179   const isr_attribute_arg * ptr;
2180   const char *              arg;
2181 
2182   if (!arm_arch_notm)
2183     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2184 
2185   /* No argument - default to IRQ.  */
2186   if (argument == NULL_TREE)
2187     return ARM_FT_ISR;
2188 
2189   /* Get the value of the argument.  */
2190   if (TREE_VALUE (argument) == NULL_TREE
2191       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2192     return ARM_FT_UNKNOWN;
2193 
2194   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2195 
2196   /* Check it against the list of known arguments.  */
2197   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2198     if (streq (arg, ptr->arg))
2199       return ptr->return_value;
2200 
2201   /* An unrecognized interrupt type.  */
2202   return ARM_FT_UNKNOWN;
2203 }
2204 
2205 /* Computes the type of the current function.  */
2206 
2207 static unsigned long
2208 arm_compute_func_type (void)
2209 {
2210   unsigned long type = ARM_FT_UNKNOWN;
2211   tree a;
2212   tree attr;
2213 
2214   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2215 
2216   /* Decide if the current function is volatile.  Such functions
2217      never return, and many memory cycles can be saved by not storing
2218      register values that will never be needed again.  This optimization
2219      was added to speed up context switching in a kernel application.  */
2220   if (optimize > 0
2221       && (TREE_NOTHROW (current_function_decl)
2222           || !(flag_unwind_tables
2223                || (flag_exceptions
2224 		   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2225       && TREE_THIS_VOLATILE (current_function_decl))
2226     type |= ARM_FT_VOLATILE;
2227 
2228   if (cfun->static_chain_decl != NULL)
2229     type |= ARM_FT_NESTED;
2230 
2231   attr = DECL_ATTRIBUTES (current_function_decl);
2232 
2233   a = lookup_attribute ("naked", attr);
2234   if (a != NULL_TREE)
2235     type |= ARM_FT_NAKED;
2236 
2237   a = lookup_attribute ("isr", attr);
2238   if (a == NULL_TREE)
2239     a = lookup_attribute ("interrupt", attr);
2240 
2241   if (a == NULL_TREE)
2242     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2243   else
2244     type |= arm_isr_value (TREE_VALUE (a));
2245 
2246   return type;
2247 }
2248 
2249 /* Returns the type of the current function.  */
2250 
2251 unsigned long
2252 arm_current_func_type (void)
2253 {
2254   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2255     cfun->machine->func_type = arm_compute_func_type ();
2256 
2257   return cfun->machine->func_type;
2258 }
2259 
2260 bool
2261 arm_allocate_stack_slots_for_args (void)
2262 {
2263   /* Naked functions should not allocate stack slots for arguments.  */
2264   return !IS_NAKED (arm_current_func_type ());
2265 }
2266 
2267 static bool
2268 arm_warn_func_return (tree decl)
2269 {
2270   /* Naked functions are implemented entirely in assembly, including the
2271      return sequence, so suppress warnings about this.  */
2272   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2273 }
2274 
2275 
2276 /* Output assembler code for a block containing the constant parts
2277    of a trampoline, leaving space for the variable parts.
2278 
2279    On the ARM, (if r8 is the static chain regnum, and remembering that
2280    referencing pc adds an offset of 8) the trampoline looks like:
2281 	   ldr 		r8, [pc, #0]
2282 	   ldr		pc, [pc]
2283 	   .word	static chain value
2284 	   .word	function's address
2285    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
2286 
2287 static void
2288 arm_asm_trampoline_template (FILE *f)
2289 {
2290   if (TARGET_ARM)
2291     {
2292       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2293       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2294     }
2295   else if (TARGET_THUMB2)
2296     {
2297       /* The Thumb-2 trampoline is similar to the arm implementation.
2298 	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
2299       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2300 		   STATIC_CHAIN_REGNUM, PC_REGNUM);
2301       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2302     }
2303   else
2304     {
2305       ASM_OUTPUT_ALIGN (f, 2);
2306       fprintf (f, "\t.code\t16\n");
2307       fprintf (f, ".Ltrampoline_start:\n");
2308       asm_fprintf (f, "\tpush\t{r0, r1}\n");
2309       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2310       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2311       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2312       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2313       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2314     }
2315   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2316   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2317 }
2318 
2319 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
2320 
2321 static void
2322 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2323 {
2324   rtx fnaddr, mem, a_tramp;
2325 
2326   emit_block_move (m_tramp, assemble_trampoline_template (),
2327 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2328 
2329   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2330   emit_move_insn (mem, chain_value);
2331 
2332   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2333   fnaddr = XEXP (DECL_RTL (fndecl), 0);
2334   emit_move_insn (mem, fnaddr);
2335 
2336   a_tramp = XEXP (m_tramp, 0);
2337   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2338 		     LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2339 		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2340 }
2341 
2342 /* Thumb trampolines should be entered in thumb mode, so set
2343    the bottom bit of the address.  */
2344 
2345 static rtx
2346 arm_trampoline_adjust_address (rtx addr)
2347 {
2348   if (TARGET_THUMB)
2349     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2350 				NULL, 0, OPTAB_LIB_WIDEN);
2351   return addr;
2352 }
2353 
2354 /* Return 1 if it is possible to return using a single instruction.
2355    If SIBLING is non-null, this is a test for a return before a sibling
2356    call.  SIBLING is the call insn, so we can examine its register usage.  */
2357 
2358 int
2359 use_return_insn (int iscond, rtx sibling)
2360 {
2361   int regno;
2362   unsigned int func_type;
2363   unsigned long saved_int_regs;
2364   unsigned HOST_WIDE_INT stack_adjust;
2365   arm_stack_offsets *offsets;
2366 
2367   /* Never use a return instruction before reload has run.  */
2368   if (!reload_completed)
2369     return 0;
2370 
2371   func_type = arm_current_func_type ();
2372 
2373   /* Naked, volatile and stack alignment functions need special
2374      consideration.  */
2375   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2376     return 0;
2377 
2378   /* So do interrupt functions that use the frame pointer and Thumb
2379      interrupt functions.  */
2380   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2381     return 0;
2382 
2383   offsets = arm_get_frame_offsets ();
2384   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2385 
2386   /* As do variadic functions.  */
2387   if (crtl->args.pretend_args_size
2388       || cfun->machine->uses_anonymous_args
2389       /* Or if the function calls __builtin_eh_return () */
2390       || crtl->calls_eh_return
2391       /* Or if the function calls alloca */
2392       || cfun->calls_alloca
2393       /* Or if there is a stack adjustment.  However, if the stack pointer
2394 	 is saved on the stack, we can use a pre-incrementing stack load.  */
2395       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2396 				 && stack_adjust == 4)))
2397     return 0;
2398 
2399   saved_int_regs = offsets->saved_regs_mask;
2400 
2401   /* Unfortunately, the insn
2402 
2403        ldmib sp, {..., sp, ...}
2404 
2405      triggers a bug on most SA-110 based devices, such that the stack
2406      pointer won't be correctly restored if the instruction takes a
2407      page fault.  We work around this problem by popping r3 along with
2408      the other registers, since that is never slower than executing
2409      another instruction.
2410 
2411      We test for !arm_arch5 here, because code for any architecture
2412      less than this could potentially be run on one of the buggy
2413      chips.  */
2414   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2415     {
2416       /* Validate that r3 is a call-clobbered register (always true in
2417 	 the default abi) ...  */
2418       if (!call_used_regs[3])
2419 	return 0;
2420 
2421       /* ... that it isn't being used for a return value ... */
2422       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2423 	return 0;
2424 
2425       /* ... or for a tail-call argument ...  */
2426       if (sibling)
2427 	{
2428 	  gcc_assert (CALL_P (sibling));
2429 
2430 	  if (find_regno_fusage (sibling, USE, 3))
2431 	    return 0;
2432 	}
2433 
2434       /* ... and that there are no call-saved registers in r0-r2
2435 	 (always true in the default ABI).  */
2436       if (saved_int_regs & 0x7)
2437 	return 0;
2438     }
2439 
2440   /* Can't be done if interworking with Thumb, and any registers have been
2441      stacked.  */
2442   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2443     return 0;
2444 
2445   /* On StrongARM, conditional returns are expensive if they aren't
2446      taken and multiple registers have been stacked.  */
2447   if (iscond && arm_tune_strongarm)
2448     {
2449       /* Conditional return when just the LR is stored is a simple
2450 	 conditional-load instruction, that's not expensive.  */
2451       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2452 	return 0;
2453 
2454       if (flag_pic
2455 	  && arm_pic_register != INVALID_REGNUM
2456 	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2457 	return 0;
2458     }
2459 
2460   /* If there are saved registers but the LR isn't saved, then we need
2461      two instructions for the return.  */
2462   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2463     return 0;
2464 
2465   /* Can't be done if any of the VFP regs are pushed,
2466      since this also requires an insn.  */
2467   if (TARGET_HARD_FLOAT && TARGET_VFP)
2468     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2469       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2470 	return 0;
2471 
2472   if (TARGET_REALLY_IWMMXT)
2473     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2474       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2475 	return 0;
2476 
2477   return 1;
2478 }
2479 
2480 /* Return TRUE if int I is a valid immediate ARM constant.  */
2481 
2482 int
2483 const_ok_for_arm (HOST_WIDE_INT i)
2484 {
2485   int lowbit;
2486 
2487   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2488      be all zero, or all one.  */
2489   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2490       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2491 	  != ((~(unsigned HOST_WIDE_INT) 0)
2492 	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2493     return FALSE;
2494 
2495   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2496 
2497   /* Fast return for 0 and small values.  We must do this for zero, since
2498      the code below can't handle that one case.  */
2499   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2500     return TRUE;
2501 
2502   /* Get the number of trailing zeros.  */
2503   lowbit = ffs((int) i) - 1;
2504 
2505   /* Only even shifts are allowed in ARM mode so round down to the
2506      nearest even number.  */
2507   if (TARGET_ARM)
2508     lowbit &= ~1;
2509 
2510   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2511     return TRUE;
2512 
2513   if (TARGET_ARM)
2514     {
2515       /* Allow rotated constants in ARM mode.  */
2516       if (lowbit <= 4
2517 	   && ((i & ~0xc000003f) == 0
2518 	       || (i & ~0xf000000f) == 0
2519 	       || (i & ~0xfc000003) == 0))
2520 	return TRUE;
2521     }
2522   else
2523     {
2524       HOST_WIDE_INT v;
2525 
2526       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
2527       v = i & 0xff;
2528       v |= v << 16;
2529       if (i == v || i == (v | (v << 8)))
2530 	return TRUE;
2531 
2532       /* Allow repeated pattern 0xXY00XY00.  */
2533       v = i & 0xff00;
2534       v |= v << 16;
2535       if (i == v)
2536 	return TRUE;
2537     }
2538 
2539   return FALSE;
2540 }
2541 
2542 /* Return true if I is a valid constant for the operation CODE.  */
2543 int
2544 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2545 {
2546   if (const_ok_for_arm (i))
2547     return 1;
2548 
2549   switch (code)
2550     {
2551     case SET:
2552       /* See if we can use movw.  */
2553       if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2554 	return 1;
2555       else
2556 	/* Otherwise, try mvn.  */
2557 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2558 
2559     case PLUS:
2560       /* See if we can use addw or subw.  */
2561       if (TARGET_THUMB2
2562 	  && ((i & 0xfffff000) == 0
2563 	      || ((-i) & 0xfffff000) == 0))
2564 	return 1;
2565       /* else fall through.  */
2566 
2567     case COMPARE:
2568     case EQ:
2569     case NE:
2570     case GT:
2571     case LE:
2572     case LT:
2573     case GE:
2574     case GEU:
2575     case LTU:
2576     case GTU:
2577     case LEU:
2578     case UNORDERED:
2579     case ORDERED:
2580     case UNEQ:
2581     case UNGE:
2582     case UNLT:
2583     case UNGT:
2584     case UNLE:
2585       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2586 
2587     case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
2588     case XOR:
2589       return 0;
2590 
2591     case IOR:
2592       if (TARGET_THUMB2)
2593 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2594       return 0;
2595 
2596     case AND:
2597       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2598 
2599     default:
2600       gcc_unreachable ();
2601     }
2602 }
2603 
2604 /* Return true if I is a valid di mode constant for the operation CODE.  */
2605 int
2606 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2607 {
2608   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2609   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2610   rtx hi = GEN_INT (hi_val);
2611   rtx lo = GEN_INT (lo_val);
2612 
2613   if (TARGET_THUMB1)
2614     return 0;
2615 
2616   switch (code)
2617     {
2618     case PLUS:
2619       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2620 
2621     default:
2622       return 0;
2623     }
2624 }
2625 
2626 /* Emit a sequence of insns to handle a large constant.
2627    CODE is the code of the operation required, it can be any of SET, PLUS,
2628    IOR, AND, XOR, MINUS;
2629    MODE is the mode in which the operation is being performed;
2630    VAL is the integer to operate on;
2631    SOURCE is the other operand (a register, or a null-pointer for SET);
2632    SUBTARGETS means it is safe to create scratch registers if that will
2633    either produce a simpler sequence, or we will want to cse the values.
2634    Return value is the number of insns emitted.  */
2635 
2636 /* ??? Tweak this for thumb2.  */
2637 int
2638 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2639 		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2640 {
2641   rtx cond;
2642 
2643   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2644     cond = COND_EXEC_TEST (PATTERN (insn));
2645   else
2646     cond = NULL_RTX;
2647 
2648   if (subtargets || code == SET
2649       || (REG_P (target) && REG_P (source)
2650 	  && REGNO (target) != REGNO (source)))
2651     {
2652       /* After arm_reorg has been called, we can't fix up expensive
2653 	 constants by pushing them into memory so we must synthesize
2654 	 them in-line, regardless of the cost.  This is only likely to
2655 	 be more costly on chips that have load delay slots and we are
2656 	 compiling without running the scheduler (so no splitting
2657 	 occurred before the final instruction emission).
2658 
2659 	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2660       */
2661       if (!after_arm_reorg
2662 	  && !cond
2663 	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2664 				1, 0)
2665 	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
2666 		 + (code != SET))))
2667 	{
2668 	  if (code == SET)
2669 	    {
2670 	      /* Currently SET is the only monadic value for CODE, all
2671 		 the rest are diadic.  */
2672 	      if (TARGET_USE_MOVT)
2673 		arm_emit_movpair (target, GEN_INT (val));
2674 	      else
2675 		emit_set_insn (target, GEN_INT (val));
2676 
2677 	      return 1;
2678 	    }
2679 	  else
2680 	    {
2681 	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2682 
2683 	      if (TARGET_USE_MOVT)
2684 		arm_emit_movpair (temp, GEN_INT (val));
2685 	      else
2686 		emit_set_insn (temp, GEN_INT (val));
2687 
2688 	      /* For MINUS, the value is subtracted from, since we never
2689 		 have subtraction of a constant.  */
2690 	      if (code == MINUS)
2691 		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2692 	      else
2693 		emit_set_insn (target,
2694 			       gen_rtx_fmt_ee (code, mode, source, temp));
2695 	      return 2;
2696 	    }
2697 	}
2698     }
2699 
2700   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2701 			   1);
2702 }
2703 
2704 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2705    ARM/THUMB2 immediates, and add up to VAL.
2706    Thr function return value gives the number of insns required.  */
2707 static int
2708 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2709 			    struct four_ints *return_sequence)
2710 {
2711   int best_consecutive_zeros = 0;
2712   int i;
2713   int best_start = 0;
2714   int insns1, insns2;
2715   struct four_ints tmp_sequence;
2716 
2717   /* If we aren't targeting ARM, the best place to start is always at
2718      the bottom, otherwise look more closely.  */
2719   if (TARGET_ARM)
2720     {
2721       for (i = 0; i < 32; i += 2)
2722 	{
2723 	  int consecutive_zeros = 0;
2724 
2725 	  if (!(val & (3 << i)))
2726 	    {
2727 	      while ((i < 32) && !(val & (3 << i)))
2728 		{
2729 		  consecutive_zeros += 2;
2730 		  i += 2;
2731 		}
2732 	      if (consecutive_zeros > best_consecutive_zeros)
2733 		{
2734 		  best_consecutive_zeros = consecutive_zeros;
2735 		  best_start = i - consecutive_zeros;
2736 		}
2737 	      i -= 2;
2738 	    }
2739 	}
2740     }
2741 
2742   /* So long as it won't require any more insns to do so, it's
2743      desirable to emit a small constant (in bits 0...9) in the last
2744      insn.  This way there is more chance that it can be combined with
2745      a later addressing insn to form a pre-indexed load or store
2746      operation.  Consider:
2747 
2748 	   *((volatile int *)0xe0000100) = 1;
2749 	   *((volatile int *)0xe0000110) = 2;
2750 
2751      We want this to wind up as:
2752 
2753 	    mov rA, #0xe0000000
2754 	    mov rB, #1
2755 	    str rB, [rA, #0x100]
2756 	    mov rB, #2
2757 	    str rB, [rA, #0x110]
2758 
2759      rather than having to synthesize both large constants from scratch.
2760 
2761      Therefore, we calculate how many insns would be required to emit
2762      the constant starting from `best_start', and also starting from
2763      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
2764      yield a shorter sequence, we may as well use zero.  */
2765   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2766   if (best_start != 0
2767       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2768     {
2769       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2770       if (insns2 <= insns1)
2771 	{
2772 	  *return_sequence = tmp_sequence;
2773 	  insns1 = insns2;
2774 	}
2775     }
2776 
2777   return insns1;
2778 }
2779 
2780 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
2781 static int
2782 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2783 			     struct four_ints *return_sequence, int i)
2784 {
2785   int remainder = val & 0xffffffff;
2786   int insns = 0;
2787 
2788   /* Try and find a way of doing the job in either two or three
2789      instructions.
2790 
2791      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2792      location.  We start at position I.  This may be the MSB, or
2793      optimial_immediate_sequence may have positioned it at the largest block
2794      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2795      wrapping around to the top of the word when we drop off the bottom.
2796      In the worst case this code should produce no more than four insns.
2797 
2798      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2799      constants, shifted to any arbitrary location.  We should always start
2800      at the MSB.  */
2801   do
2802     {
2803       int end;
2804       unsigned int b1, b2, b3, b4;
2805       unsigned HOST_WIDE_INT result;
2806       int loc;
2807 
2808       gcc_assert (insns < 4);
2809 
2810       if (i <= 0)
2811 	i += 32;
2812 
2813       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
2814       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2815 	{
2816 	  loc = i;
2817 	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2818 	    /* We can use addw/subw for the last 12 bits.  */
2819 	    result = remainder;
2820 	  else
2821 	    {
2822 	      /* Use an 8-bit shifted/rotated immediate.  */
2823 	      end = i - 8;
2824 	      if (end < 0)
2825 		end += 32;
2826 	      result = remainder & ((0x0ff << end)
2827 				   | ((i < end) ? (0xff >> (32 - end))
2828 						: 0));
2829 	      i -= 8;
2830 	    }
2831 	}
2832       else
2833 	{
2834 	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
2835 	     arbitrary shifts.  */
2836 	  i -= TARGET_ARM ? 2 : 1;
2837 	  continue;
2838 	}
2839 
2840       /* Next, see if we can do a better job with a thumb2 replicated
2841 	 constant.
2842 
2843          We do it this way around to catch the cases like 0x01F001E0 where
2844 	 two 8-bit immediates would work, but a replicated constant would
2845 	 make it worse.
2846 
2847          TODO: 16-bit constants that don't clear all the bits, but still win.
2848          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
2849       if (TARGET_THUMB2)
2850 	{
2851 	  b1 = (remainder & 0xff000000) >> 24;
2852 	  b2 = (remainder & 0x00ff0000) >> 16;
2853 	  b3 = (remainder & 0x0000ff00) >> 8;
2854 	  b4 = remainder & 0xff;
2855 
2856 	  if (loc > 24)
2857 	    {
2858 	      /* The 8-bit immediate already found clears b1 (and maybe b2),
2859 		 but must leave b3 and b4 alone.  */
2860 
2861 	      /* First try to find a 32-bit replicated constant that clears
2862 		 almost everything.  We can assume that we can't do it in one,
2863 		 or else we wouldn't be here.  */
2864 	      unsigned int tmp = b1 & b2 & b3 & b4;
2865 	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2866 				  + (tmp << 24);
2867 	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2868 					    + (tmp == b3) + (tmp == b4);
2869 	      if (tmp
2870 		  && (matching_bytes >= 3
2871 		      || (matching_bytes == 2
2872 			  && const_ok_for_op (remainder & ~tmp2, code))))
2873 		{
2874 		  /* At least 3 of the bytes match, and the fourth has at
2875 		     least as many bits set, or two of the bytes match
2876 		     and it will only require one more insn to finish.  */
2877 		  result = tmp2;
2878 		  i = tmp != b1 ? 32
2879 		      : tmp != b2 ? 24
2880 		      : tmp != b3 ? 16
2881 		      : 8;
2882 		}
2883 
2884 	      /* Second, try to find a 16-bit replicated constant that can
2885 		 leave three of the bytes clear.  If b2 or b4 is already
2886 		 zero, then we can.  If the 8-bit from above would not
2887 		 clear b2 anyway, then we still win.  */
2888 	      else if (b1 == b3 && (!b2 || !b4
2889 			       || (remainder & 0x00ff0000 & ~result)))
2890 		{
2891 		  result = remainder & 0xff00ff00;
2892 		  i = 24;
2893 		}
2894 	    }
2895 	  else if (loc > 16)
2896 	    {
2897 	      /* The 8-bit immediate already found clears b2 (and maybe b3)
2898 		 and we don't get here unless b1 is alredy clear, but it will
2899 		 leave b4 unchanged.  */
2900 
2901 	      /* If we can clear b2 and b4 at once, then we win, since the
2902 		 8-bits couldn't possibly reach that far.  */
2903 	      if (b2 == b4)
2904 		{
2905 		  result = remainder & 0x00ff00ff;
2906 		  i = 16;
2907 		}
2908 	    }
2909 	}
2910 
2911       return_sequence->i[insns++] = result;
2912       remainder &= ~result;
2913 
2914       if (code == SET || code == MINUS)
2915 	code = PLUS;
2916     }
2917   while (remainder);
2918 
2919   return insns;
2920 }
2921 
2922 /* Emit an instruction with the indicated PATTERN.  If COND is
2923    non-NULL, conditionalize the execution of the instruction on COND
2924    being true.  */
2925 
2926 static void
2927 emit_constant_insn (rtx cond, rtx pattern)
2928 {
2929   if (cond)
2930     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2931   emit_insn (pattern);
2932 }
2933 
2934 /* As above, but extra parameter GENERATE which, if clear, suppresses
2935    RTL generation.  */
2936 
2937 static int
2938 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2939 		  HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2940 		  int generate)
2941 {
2942   int can_invert = 0;
2943   int can_negate = 0;
2944   int final_invert = 0;
2945   int i;
2946   int set_sign_bit_copies = 0;
2947   int clear_sign_bit_copies = 0;
2948   int clear_zero_bit_copies = 0;
2949   int set_zero_bit_copies = 0;
2950   int insns = 0, neg_insns, inv_insns;
2951   unsigned HOST_WIDE_INT temp1, temp2;
2952   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2953   struct four_ints *immediates;
2954   struct four_ints pos_immediates, neg_immediates, inv_immediates;
2955 
2956   /* Find out which operations are safe for a given CODE.  Also do a quick
2957      check for degenerate cases; these can occur when DImode operations
2958      are split.  */
2959   switch (code)
2960     {
2961     case SET:
2962       can_invert = 1;
2963       break;
2964 
2965     case PLUS:
2966       can_negate = 1;
2967       break;
2968 
2969     case IOR:
2970       if (remainder == 0xffffffff)
2971 	{
2972 	  if (generate)
2973 	    emit_constant_insn (cond,
2974 				gen_rtx_SET (VOIDmode, target,
2975 					     GEN_INT (ARM_SIGN_EXTEND (val))));
2976 	  return 1;
2977 	}
2978 
2979       if (remainder == 0)
2980 	{
2981 	  if (reload_completed && rtx_equal_p (target, source))
2982 	    return 0;
2983 
2984 	  if (generate)
2985 	    emit_constant_insn (cond,
2986 				gen_rtx_SET (VOIDmode, target, source));
2987 	  return 1;
2988 	}
2989       break;
2990 
2991     case AND:
2992       if (remainder == 0)
2993 	{
2994 	  if (generate)
2995 	    emit_constant_insn (cond,
2996 				gen_rtx_SET (VOIDmode, target, const0_rtx));
2997 	  return 1;
2998 	}
2999       if (remainder == 0xffffffff)
3000 	{
3001 	  if (reload_completed && rtx_equal_p (target, source))
3002 	    return 0;
3003 	  if (generate)
3004 	    emit_constant_insn (cond,
3005 				gen_rtx_SET (VOIDmode, target, source));
3006 	  return 1;
3007 	}
3008       can_invert = 1;
3009       break;
3010 
3011     case XOR:
3012       if (remainder == 0)
3013 	{
3014 	  if (reload_completed && rtx_equal_p (target, source))
3015 	    return 0;
3016 	  if (generate)
3017 	    emit_constant_insn (cond,
3018 				gen_rtx_SET (VOIDmode, target, source));
3019 	  return 1;
3020 	}
3021 
3022       if (remainder == 0xffffffff)
3023 	{
3024 	  if (generate)
3025 	    emit_constant_insn (cond,
3026 				gen_rtx_SET (VOIDmode, target,
3027 					     gen_rtx_NOT (mode, source)));
3028 	  return 1;
3029 	}
3030       final_invert = 1;
3031       break;
3032 
3033     case MINUS:
3034       /* We treat MINUS as (val - source), since (source - val) is always
3035 	 passed as (source + (-val)).  */
3036       if (remainder == 0)
3037 	{
3038 	  if (generate)
3039 	    emit_constant_insn (cond,
3040 				gen_rtx_SET (VOIDmode, target,
3041 					     gen_rtx_NEG (mode, source)));
3042 	  return 1;
3043 	}
3044       if (const_ok_for_arm (val))
3045 	{
3046 	  if (generate)
3047 	    emit_constant_insn (cond,
3048 				gen_rtx_SET (VOIDmode, target,
3049 					     gen_rtx_MINUS (mode, GEN_INT (val),
3050 							    source)));
3051 	  return 1;
3052 	}
3053 
3054       break;
3055 
3056     default:
3057       gcc_unreachable ();
3058     }
3059 
3060   /* If we can do it in one insn get out quickly.  */
3061   if (const_ok_for_op (val, code))
3062     {
3063       if (generate)
3064 	emit_constant_insn (cond,
3065 			    gen_rtx_SET (VOIDmode, target,
3066 					 (source
3067 					  ? gen_rtx_fmt_ee (code, mode, source,
3068 							    GEN_INT (val))
3069 					  : GEN_INT (val))));
3070       return 1;
3071     }
3072 
3073   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3074      insn.  */
3075   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3076       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3077     {
3078       if (generate)
3079 	{
3080 	  if (mode == SImode && i == 16)
3081 	    /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3082 	       smaller insn.  */
3083 	    emit_constant_insn (cond,
3084 				gen_zero_extendhisi2
3085 				(target, gen_lowpart (HImode, source)));
3086 	  else
3087 	    /* Extz only supports SImode, but we can coerce the operands
3088 	       into that mode.  */
3089 	    emit_constant_insn (cond,
3090 				gen_extzv_t2 (gen_lowpart (SImode, target),
3091 					      gen_lowpart (SImode, source),
3092 					      GEN_INT (i), const0_rtx));
3093 	}
3094 
3095       return 1;
3096     }
3097 
3098   /* Calculate a few attributes that may be useful for specific
3099      optimizations.  */
3100   /* Count number of leading zeros.  */
3101   for (i = 31; i >= 0; i--)
3102     {
3103       if ((remainder & (1 << i)) == 0)
3104 	clear_sign_bit_copies++;
3105       else
3106 	break;
3107     }
3108 
3109   /* Count number of leading 1's.  */
3110   for (i = 31; i >= 0; i--)
3111     {
3112       if ((remainder & (1 << i)) != 0)
3113 	set_sign_bit_copies++;
3114       else
3115 	break;
3116     }
3117 
3118   /* Count number of trailing zero's.  */
3119   for (i = 0; i <= 31; i++)
3120     {
3121       if ((remainder & (1 << i)) == 0)
3122 	clear_zero_bit_copies++;
3123       else
3124 	break;
3125     }
3126 
3127   /* Count number of trailing 1's.  */
3128   for (i = 0; i <= 31; i++)
3129     {
3130       if ((remainder & (1 << i)) != 0)
3131 	set_zero_bit_copies++;
3132       else
3133 	break;
3134     }
3135 
3136   switch (code)
3137     {
3138     case SET:
3139       /* See if we can do this by sign_extending a constant that is known
3140 	 to be negative.  This is a good, way of doing it, since the shift
3141 	 may well merge into a subsequent insn.  */
3142       if (set_sign_bit_copies > 1)
3143 	{
3144 	  if (const_ok_for_arm
3145 	      (temp1 = ARM_SIGN_EXTEND (remainder
3146 					<< (set_sign_bit_copies - 1))))
3147 	    {
3148 	      if (generate)
3149 		{
3150 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3151 		  emit_constant_insn (cond,
3152 				      gen_rtx_SET (VOIDmode, new_src,
3153 						   GEN_INT (temp1)));
3154 		  emit_constant_insn (cond,
3155 				      gen_ashrsi3 (target, new_src,
3156 						   GEN_INT (set_sign_bit_copies - 1)));
3157 		}
3158 	      return 2;
3159 	    }
3160 	  /* For an inverted constant, we will need to set the low bits,
3161 	     these will be shifted out of harm's way.  */
3162 	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3163 	  if (const_ok_for_arm (~temp1))
3164 	    {
3165 	      if (generate)
3166 		{
3167 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3168 		  emit_constant_insn (cond,
3169 				      gen_rtx_SET (VOIDmode, new_src,
3170 						   GEN_INT (temp1)));
3171 		  emit_constant_insn (cond,
3172 				      gen_ashrsi3 (target, new_src,
3173 						   GEN_INT (set_sign_bit_copies - 1)));
3174 		}
3175 	      return 2;
3176 	    }
3177 	}
3178 
3179       /* See if we can calculate the value as the difference between two
3180 	 valid immediates.  */
3181       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3182 	{
3183 	  int topshift = clear_sign_bit_copies & ~1;
3184 
3185 	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3186 				   & (0xff000000 >> topshift));
3187 
3188 	  /* If temp1 is zero, then that means the 9 most significant
3189 	     bits of remainder were 1 and we've caused it to overflow.
3190 	     When topshift is 0 we don't need to do anything since we
3191 	     can borrow from 'bit 32'.  */
3192 	  if (temp1 == 0 && topshift != 0)
3193 	    temp1 = 0x80000000 >> (topshift - 1);
3194 
3195 	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3196 
3197 	  if (const_ok_for_arm (temp2))
3198 	    {
3199 	      if (generate)
3200 		{
3201 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3202 		  emit_constant_insn (cond,
3203 				      gen_rtx_SET (VOIDmode, new_src,
3204 						   GEN_INT (temp1)));
3205 		  emit_constant_insn (cond,
3206 				      gen_addsi3 (target, new_src,
3207 						  GEN_INT (-temp2)));
3208 		}
3209 
3210 	      return 2;
3211 	    }
3212 	}
3213 
3214       /* See if we can generate this by setting the bottom (or the top)
3215 	 16 bits, and then shifting these into the other half of the
3216 	 word.  We only look for the simplest cases, to do more would cost
3217 	 too much.  Be careful, however, not to generate this when the
3218 	 alternative would take fewer insns.  */
3219       if (val & 0xffff0000)
3220 	{
3221 	  temp1 = remainder & 0xffff0000;
3222 	  temp2 = remainder & 0x0000ffff;
3223 
3224 	  /* Overlaps outside this range are best done using other methods.  */
3225 	  for (i = 9; i < 24; i++)
3226 	    {
3227 	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3228 		  && !const_ok_for_arm (temp2))
3229 		{
3230 		  rtx new_src = (subtargets
3231 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3232 				 : target);
3233 		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3234 					    source, subtargets, generate);
3235 		  source = new_src;
3236 		  if (generate)
3237 		    emit_constant_insn
3238 		      (cond,
3239 		       gen_rtx_SET
3240 		       (VOIDmode, target,
3241 			gen_rtx_IOR (mode,
3242 				     gen_rtx_ASHIFT (mode, source,
3243 						     GEN_INT (i)),
3244 				     source)));
3245 		  return insns + 1;
3246 		}
3247 	    }
3248 
3249 	  /* Don't duplicate cases already considered.  */
3250 	  for (i = 17; i < 24; i++)
3251 	    {
3252 	      if (((temp1 | (temp1 >> i)) == remainder)
3253 		  && !const_ok_for_arm (temp1))
3254 		{
3255 		  rtx new_src = (subtargets
3256 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3257 				 : target);
3258 		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3259 					    source, subtargets, generate);
3260 		  source = new_src;
3261 		  if (generate)
3262 		    emit_constant_insn
3263 		      (cond,
3264 		       gen_rtx_SET (VOIDmode, target,
3265 				    gen_rtx_IOR
3266 				    (mode,
3267 				     gen_rtx_LSHIFTRT (mode, source,
3268 						       GEN_INT (i)),
3269 				     source)));
3270 		  return insns + 1;
3271 		}
3272 	    }
3273 	}
3274       break;
3275 
3276     case IOR:
3277     case XOR:
3278       /* If we have IOR or XOR, and the constant can be loaded in a
3279 	 single instruction, and we can find a temporary to put it in,
3280 	 then this can be done in two instructions instead of 3-4.  */
3281       if (subtargets
3282 	  /* TARGET can't be NULL if SUBTARGETS is 0 */
3283 	  || (reload_completed && !reg_mentioned_p (target, source)))
3284 	{
3285 	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3286 	    {
3287 	      if (generate)
3288 		{
3289 		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3290 
3291 		  emit_constant_insn (cond,
3292 				      gen_rtx_SET (VOIDmode, sub,
3293 						   GEN_INT (val)));
3294 		  emit_constant_insn (cond,
3295 				      gen_rtx_SET (VOIDmode, target,
3296 						   gen_rtx_fmt_ee (code, mode,
3297 								   source, sub)));
3298 		}
3299 	      return 2;
3300 	    }
3301 	}
3302 
3303       if (code == XOR)
3304 	break;
3305 
3306       /*  Convert.
3307 	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3308 	                     and the remainder 0s for e.g. 0xfff00000)
3309 	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3310 
3311 	  This can be done in 2 instructions by using shifts with mov or mvn.
3312 	  e.g. for
3313 	  x = x | 0xfff00000;
3314 	  we generate.
3315 	  mvn	r0, r0, asl #12
3316 	  mvn	r0, r0, lsr #12  */
3317       if (set_sign_bit_copies > 8
3318 	  && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3319 	{
3320 	  if (generate)
3321 	    {
3322 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3323 	      rtx shift = GEN_INT (set_sign_bit_copies);
3324 
3325 	      emit_constant_insn
3326 		(cond,
3327 		 gen_rtx_SET (VOIDmode, sub,
3328 			      gen_rtx_NOT (mode,
3329 					   gen_rtx_ASHIFT (mode,
3330 							   source,
3331 							   shift))));
3332 	      emit_constant_insn
3333 		(cond,
3334 		 gen_rtx_SET (VOIDmode, target,
3335 			      gen_rtx_NOT (mode,
3336 					   gen_rtx_LSHIFTRT (mode, sub,
3337 							     shift))));
3338 	    }
3339 	  return 2;
3340 	}
3341 
3342       /* Convert
3343 	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
3344 	   to
3345 	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3346 
3347 	  For eg. r0 = r0 | 0xfff
3348 	       mvn	r0, r0, lsr #12
3349 	       mvn	r0, r0, asl #12
3350 
3351       */
3352       if (set_zero_bit_copies > 8
3353 	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3354 	{
3355 	  if (generate)
3356 	    {
3357 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3358 	      rtx shift = GEN_INT (set_zero_bit_copies);
3359 
3360 	      emit_constant_insn
3361 		(cond,
3362 		 gen_rtx_SET (VOIDmode, sub,
3363 			      gen_rtx_NOT (mode,
3364 					   gen_rtx_LSHIFTRT (mode,
3365 							     source,
3366 							     shift))));
3367 	      emit_constant_insn
3368 		(cond,
3369 		 gen_rtx_SET (VOIDmode, target,
3370 			      gen_rtx_NOT (mode,
3371 					   gen_rtx_ASHIFT (mode, sub,
3372 							   shift))));
3373 	    }
3374 	  return 2;
3375 	}
3376 
3377       /* This will never be reached for Thumb2 because orn is a valid
3378 	 instruction. This is for Thumb1 and the ARM 32 bit cases.
3379 
3380 	 x = y | constant (such that ~constant is a valid constant)
3381 	 Transform this to
3382 	 x = ~(~y & ~constant).
3383       */
3384       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3385 	{
3386 	  if (generate)
3387 	    {
3388 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3389 	      emit_constant_insn (cond,
3390 				  gen_rtx_SET (VOIDmode, sub,
3391 					       gen_rtx_NOT (mode, source)));
3392 	      source = sub;
3393 	      if (subtargets)
3394 		sub = gen_reg_rtx (mode);
3395 	      emit_constant_insn (cond,
3396 				  gen_rtx_SET (VOIDmode, sub,
3397 					       gen_rtx_AND (mode, source,
3398 							    GEN_INT (temp1))));
3399 	      emit_constant_insn (cond,
3400 				  gen_rtx_SET (VOIDmode, target,
3401 					       gen_rtx_NOT (mode, sub)));
3402 	    }
3403 	  return 3;
3404 	}
3405       break;
3406 
3407     case AND:
3408       /* See if two shifts will do 2 or more insn's worth of work.  */
3409       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3410 	{
3411 	  HOST_WIDE_INT shift_mask = ((0xffffffff
3412 				       << (32 - clear_sign_bit_copies))
3413 				      & 0xffffffff);
3414 
3415 	  if ((remainder | shift_mask) != 0xffffffff)
3416 	    {
3417 	      if (generate)
3418 		{
3419 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3420 		  insns = arm_gen_constant (AND, mode, cond,
3421 					    remainder | shift_mask,
3422 					    new_src, source, subtargets, 1);
3423 		  source = new_src;
3424 		}
3425 	      else
3426 		{
3427 		  rtx targ = subtargets ? NULL_RTX : target;
3428 		  insns = arm_gen_constant (AND, mode, cond,
3429 					    remainder | shift_mask,
3430 					    targ, source, subtargets, 0);
3431 		}
3432 	    }
3433 
3434 	  if (generate)
3435 	    {
3436 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3437 	      rtx shift = GEN_INT (clear_sign_bit_copies);
3438 
3439 	      emit_insn (gen_ashlsi3 (new_src, source, shift));
3440 	      emit_insn (gen_lshrsi3 (target, new_src, shift));
3441 	    }
3442 
3443 	  return insns + 2;
3444 	}
3445 
3446       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3447 	{
3448 	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3449 
3450 	  if ((remainder | shift_mask) != 0xffffffff)
3451 	    {
3452 	      if (generate)
3453 		{
3454 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3455 
3456 		  insns = arm_gen_constant (AND, mode, cond,
3457 					    remainder | shift_mask,
3458 					    new_src, source, subtargets, 1);
3459 		  source = new_src;
3460 		}
3461 	      else
3462 		{
3463 		  rtx targ = subtargets ? NULL_RTX : target;
3464 
3465 		  insns = arm_gen_constant (AND, mode, cond,
3466 					    remainder | shift_mask,
3467 					    targ, source, subtargets, 0);
3468 		}
3469 	    }
3470 
3471 	  if (generate)
3472 	    {
3473 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3474 	      rtx shift = GEN_INT (clear_zero_bit_copies);
3475 
3476 	      emit_insn (gen_lshrsi3 (new_src, source, shift));
3477 	      emit_insn (gen_ashlsi3 (target, new_src, shift));
3478 	    }
3479 
3480 	  return insns + 2;
3481 	}
3482 
3483       break;
3484 
3485     default:
3486       break;
3487     }
3488 
3489   /* Calculate what the instruction sequences would be if we generated it
3490      normally, negated, or inverted.  */
3491   if (code == AND)
3492     /* AND cannot be split into multiple insns, so invert and use BIC.  */
3493     insns = 99;
3494   else
3495     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3496 
3497   if (can_negate)
3498     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3499 					    &neg_immediates);
3500   else
3501     neg_insns = 99;
3502 
3503   if (can_invert || final_invert)
3504     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3505 					    &inv_immediates);
3506   else
3507     inv_insns = 99;
3508 
3509   immediates = &pos_immediates;
3510 
3511   /* Is the negated immediate sequence more efficient?  */
3512   if (neg_insns < insns && neg_insns <= inv_insns)
3513     {
3514       insns = neg_insns;
3515       immediates = &neg_immediates;
3516     }
3517   else
3518     can_negate = 0;
3519 
3520   /* Is the inverted immediate sequence more efficient?
3521      We must allow for an extra NOT instruction for XOR operations, although
3522      there is some chance that the final 'mvn' will get optimized later.  */
3523   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3524     {
3525       insns = inv_insns;
3526       immediates = &inv_immediates;
3527     }
3528   else
3529     {
3530       can_invert = 0;
3531       final_invert = 0;
3532     }
3533 
3534   /* Now output the chosen sequence as instructions.  */
3535   if (generate)
3536     {
3537       for (i = 0; i < insns; i++)
3538 	{
3539 	  rtx new_src, temp1_rtx;
3540 
3541 	  temp1 = immediates->i[i];
3542 
3543 	  if (code == SET || code == MINUS)
3544 	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
3545 	  else if ((final_invert || i < (insns - 1)) && subtargets)
3546 	    new_src = gen_reg_rtx (mode);
3547 	  else
3548 	    new_src = target;
3549 
3550 	  if (can_invert)
3551 	    temp1 = ~temp1;
3552 	  else if (can_negate)
3553 	    temp1 = -temp1;
3554 
3555 	  temp1 = trunc_int_for_mode (temp1, mode);
3556 	  temp1_rtx = GEN_INT (temp1);
3557 
3558 	  if (code == SET)
3559 	    ;
3560 	  else if (code == MINUS)
3561 	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3562 	  else
3563 	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3564 
3565 	  emit_constant_insn (cond,
3566 			      gen_rtx_SET (VOIDmode, new_src,
3567 					   temp1_rtx));
3568 	  source = new_src;
3569 
3570 	  if (code == SET)
3571 	    {
3572 	      can_negate = can_invert;
3573 	      can_invert = 0;
3574 	      code = PLUS;
3575 	    }
3576 	  else if (code == MINUS)
3577 	    code = PLUS;
3578 	}
3579     }
3580 
3581   if (final_invert)
3582     {
3583       if (generate)
3584 	emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3585 					       gen_rtx_NOT (mode, source)));
3586       insns++;
3587     }
3588 
3589   return insns;
3590 }
3591 
3592 /* Canonicalize a comparison so that we are more likely to recognize it.
3593    This can be done for a few constant compares, where we can make the
3594    immediate value easier to load.  */
3595 
3596 static void
3597 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
3598 			     bool op0_preserve_value)
3599 {
3600   enum machine_mode mode;
3601   unsigned HOST_WIDE_INT i, maxval;
3602 
3603   mode = GET_MODE (*op0);
3604   if (mode == VOIDmode)
3605     mode = GET_MODE (*op1);
3606 
3607   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3608 
3609   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
3610      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
3611      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
3612      for GTU/LEU in Thumb mode.  */
3613   if (mode == DImode)
3614     {
3615       rtx tem;
3616 
3617       if (*code == GT || *code == LE
3618 	  || (!TARGET_ARM && (*code == GTU || *code == LEU)))
3619 	{
3620 	  /* Missing comparison.  First try to use an available
3621 	     comparison.  */
3622 	  if (CONST_INT_P (*op1))
3623 	    {
3624 	      i = INTVAL (*op1);
3625 	      switch (*code)
3626 		{
3627 		case GT:
3628 		case LE:
3629 		  if (i != maxval
3630 		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
3631 		    {
3632 		      *op1 = GEN_INT (i + 1);
3633 		      *code = *code == GT ? GE : LT;
3634 		      return;
3635 		    }
3636 		  break;
3637 		case GTU:
3638 		case LEU:
3639 		  if (i != ~((unsigned HOST_WIDE_INT) 0)
3640 		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
3641 		    {
3642 		      *op1 = GEN_INT (i + 1);
3643 		      *code = *code == GTU ? GEU : LTU;
3644 		      return;
3645 		    }
3646 		  break;
3647 		default:
3648 		  gcc_unreachable ();
3649 		}
3650 	    }
3651 
3652 	  /* If that did not work, reverse the condition.  */
3653 	  if (!op0_preserve_value)
3654 	    {
3655 	      tem = *op0;
3656 	      *op0 = *op1;
3657 	      *op1 = tem;
3658 	      *code = (int)swap_condition ((enum rtx_code)*code);
3659 	    }
3660 	}
3661       return;
3662     }
3663 
3664   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3665      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3666      to facilitate possible combining with a cmp into 'ands'.  */
3667   if (mode == SImode
3668       && GET_CODE (*op0) == ZERO_EXTEND
3669       && GET_CODE (XEXP (*op0, 0)) == SUBREG
3670       && GET_MODE (XEXP (*op0, 0)) == QImode
3671       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3672       && subreg_lowpart_p (XEXP (*op0, 0))
3673       && *op1 == const0_rtx)
3674     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3675 			GEN_INT (255));
3676 
3677   /* Comparisons smaller than DImode.  Only adjust comparisons against
3678      an out-of-range constant.  */
3679   if (!CONST_INT_P (*op1)
3680       || const_ok_for_arm (INTVAL (*op1))
3681       || const_ok_for_arm (- INTVAL (*op1)))
3682     return;
3683 
3684   i = INTVAL (*op1);
3685 
3686   switch (*code)
3687     {
3688     case EQ:
3689     case NE:
3690       return;
3691 
3692     case GT:
3693     case LE:
3694       if (i != maxval
3695 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3696 	{
3697 	  *op1 = GEN_INT (i + 1);
3698 	  *code = *code == GT ? GE : LT;
3699 	  return;
3700 	}
3701       break;
3702 
3703     case GE:
3704     case LT:
3705       if (i != ~maxval
3706 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3707 	{
3708 	  *op1 = GEN_INT (i - 1);
3709 	  *code = *code == GE ? GT : LE;
3710 	  return;
3711 	}
3712       break;
3713 
3714     case GTU:
3715     case LEU:
3716       if (i != ~((unsigned HOST_WIDE_INT) 0)
3717 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3718 	{
3719 	  *op1 = GEN_INT (i + 1);
3720 	  *code = *code == GTU ? GEU : LTU;
3721 	  return;
3722 	}
3723       break;
3724 
3725     case GEU:
3726     case LTU:
3727       if (i != 0
3728 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3729 	{
3730 	  *op1 = GEN_INT (i - 1);
3731 	  *code = *code == GEU ? GTU : LEU;
3732 	  return;
3733 	}
3734       break;
3735 
3736     default:
3737       gcc_unreachable ();
3738     }
3739 }
3740 
3741 
3742 /* Define how to find the value returned by a function.  */
3743 
3744 static rtx
3745 arm_function_value(const_tree type, const_tree func,
3746 		   bool outgoing ATTRIBUTE_UNUSED)
3747 {
3748   enum machine_mode mode;
3749   int unsignedp ATTRIBUTE_UNUSED;
3750   rtx r ATTRIBUTE_UNUSED;
3751 
3752   mode = TYPE_MODE (type);
3753 
3754   if (TARGET_AAPCS_BASED)
3755     return aapcs_allocate_return_reg (mode, type, func);
3756 
3757   /* Promote integer types.  */
3758   if (INTEGRAL_TYPE_P (type))
3759     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3760 
3761   /* Promotes small structs returned in a register to full-word size
3762      for big-endian AAPCS.  */
3763   if (arm_return_in_msb (type))
3764     {
3765       HOST_WIDE_INT size = int_size_in_bytes (type);
3766       if (size % UNITS_PER_WORD != 0)
3767 	{
3768 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3769 	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3770 	}
3771     }
3772 
3773   return arm_libcall_value_1 (mode);
3774 }
3775 
3776 static int
3777 libcall_eq (const void *p1, const void *p2)
3778 {
3779   return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3780 }
3781 
3782 static hashval_t
3783 libcall_hash (const void *p1)
3784 {
3785   return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3786 }
3787 
3788 static void
3789 add_libcall (htab_t htab, rtx libcall)
3790 {
3791   *htab_find_slot (htab, libcall, INSERT) = libcall;
3792 }
3793 
3794 static bool
3795 arm_libcall_uses_aapcs_base (const_rtx libcall)
3796 {
3797   static bool init_done = false;
3798   static htab_t libcall_htab;
3799 
3800   if (!init_done)
3801     {
3802       init_done = true;
3803 
3804       libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3805 				  NULL);
3806       add_libcall (libcall_htab,
3807 		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3808       add_libcall (libcall_htab,
3809 		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3810       add_libcall (libcall_htab,
3811 		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3812       add_libcall (libcall_htab,
3813 		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3814 
3815       add_libcall (libcall_htab,
3816 		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3817       add_libcall (libcall_htab,
3818 		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3819       add_libcall (libcall_htab,
3820 		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3821       add_libcall (libcall_htab,
3822 		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3823 
3824       add_libcall (libcall_htab,
3825 		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
3826       add_libcall (libcall_htab,
3827 		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3828       add_libcall (libcall_htab,
3829 		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
3830       add_libcall (libcall_htab,
3831 		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
3832       add_libcall (libcall_htab,
3833 		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
3834       add_libcall (libcall_htab,
3835 		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
3836       add_libcall (libcall_htab,
3837 		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
3838       add_libcall (libcall_htab,
3839 		   convert_optab_libfunc (ufix_optab, DImode, SFmode));
3840 
3841       /* Values from double-precision helper functions are returned in core
3842 	 registers if the selected core only supports single-precision
3843 	 arithmetic, even if we are using the hard-float ABI.  The same is
3844 	 true for single-precision helpers, but we will never be using the
3845 	 hard-float ABI on a CPU which doesn't support single-precision
3846 	 operations in hardware.  */
3847       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3848       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3849       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3850       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3851       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3852       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3853       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3854       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3855       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3856       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3857       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3858       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3859 							SFmode));
3860       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3861 							DFmode));
3862     }
3863 
3864   return libcall && htab_find (libcall_htab, libcall) != NULL;
3865 }
3866 
3867 static rtx
3868 arm_libcall_value_1 (enum machine_mode mode)
3869 {
3870   if (TARGET_AAPCS_BASED)
3871     return aapcs_libcall_value (mode);
3872   else if (TARGET_IWMMXT_ABI
3873 	   && arm_vector_mode_supported_p (mode))
3874     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3875   else
3876     return gen_rtx_REG (mode, ARG_REGISTER (1));
3877 }
3878 
3879 /* Define how to find the value returned by a library function
3880    assuming the value has mode MODE.  */
3881 
3882 static rtx
3883 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3884 {
3885   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3886       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3887     {
3888       /* The following libcalls return their result in integer registers,
3889 	 even though they return a floating point value.  */
3890       if (arm_libcall_uses_aapcs_base (libcall))
3891 	return gen_rtx_REG (mode, ARG_REGISTER(1));
3892 
3893     }
3894 
3895   return arm_libcall_value_1 (mode);
3896 }
3897 
3898 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
3899 
3900 static bool
3901 arm_function_value_regno_p (const unsigned int regno)
3902 {
3903   if (regno == ARG_REGISTER (1)
3904       || (TARGET_32BIT
3905 	  && TARGET_AAPCS_BASED
3906 	  && TARGET_VFP
3907 	  && TARGET_HARD_FLOAT
3908 	  && regno == FIRST_VFP_REGNUM)
3909       || (TARGET_IWMMXT_ABI
3910 	  && regno == FIRST_IWMMXT_REGNUM))
3911     return true;
3912 
3913   return false;
3914 }
3915 
3916 /* Determine the amount of memory needed to store the possible return
3917    registers of an untyped call.  */
3918 int
3919 arm_apply_result_size (void)
3920 {
3921   int size = 16;
3922 
3923   if (TARGET_32BIT)
3924     {
3925       if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3926 	size += 32;
3927       if (TARGET_IWMMXT_ABI)
3928 	size += 8;
3929     }
3930 
3931   return size;
3932 }
3933 
3934 /* Decide whether TYPE should be returned in memory (true)
3935    or in a register (false).  FNTYPE is the type of the function making
3936    the call.  */
3937 static bool
3938 arm_return_in_memory (const_tree type, const_tree fntype)
3939 {
3940   HOST_WIDE_INT size;
3941 
3942   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
3943 
3944   if (TARGET_AAPCS_BASED)
3945     {
3946       /* Simple, non-aggregate types (ie not including vectors and
3947 	 complex) are always returned in a register (or registers).
3948 	 We don't care about which register here, so we can short-cut
3949 	 some of the detail.  */
3950       if (!AGGREGATE_TYPE_P (type)
3951 	  && TREE_CODE (type) != VECTOR_TYPE
3952 	  && TREE_CODE (type) != COMPLEX_TYPE)
3953 	return false;
3954 
3955       /* Any return value that is no larger than one word can be
3956 	 returned in r0.  */
3957       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3958 	return false;
3959 
3960       /* Check any available co-processors to see if they accept the
3961 	 type as a register candidate (VFP, for example, can return
3962 	 some aggregates in consecutive registers).  These aren't
3963 	 available if the call is variadic.  */
3964       if (aapcs_select_return_coproc (type, fntype) >= 0)
3965 	return false;
3966 
3967       /* Vector values should be returned using ARM registers, not
3968 	 memory (unless they're over 16 bytes, which will break since
3969 	 we only have four call-clobbered registers to play with).  */
3970       if (TREE_CODE (type) == VECTOR_TYPE)
3971 	return (size < 0 || size > (4 * UNITS_PER_WORD));
3972 
3973       /* The rest go in memory.  */
3974       return true;
3975     }
3976 
3977   if (TREE_CODE (type) == VECTOR_TYPE)
3978     return (size < 0 || size > (4 * UNITS_PER_WORD));
3979 
3980   if (!AGGREGATE_TYPE_P (type) &&
3981       (TREE_CODE (type) != VECTOR_TYPE))
3982     /* All simple types are returned in registers.  */
3983     return false;
3984 
3985   if (arm_abi != ARM_ABI_APCS)
3986     {
3987       /* ATPCS and later return aggregate types in memory only if they are
3988 	 larger than a word (or are variable size).  */
3989       return (size < 0 || size > UNITS_PER_WORD);
3990     }
3991 
3992   /* For the arm-wince targets we choose to be compatible with Microsoft's
3993      ARM and Thumb compilers, which always return aggregates in memory.  */
3994 #ifndef ARM_WINCE
3995   /* All structures/unions bigger than one word are returned in memory.
3996      Also catch the case where int_size_in_bytes returns -1.  In this case
3997      the aggregate is either huge or of variable size, and in either case
3998      we will want to return it via memory and not in a register.  */
3999   if (size < 0 || size > UNITS_PER_WORD)
4000     return true;
4001 
4002   if (TREE_CODE (type) == RECORD_TYPE)
4003     {
4004       tree field;
4005 
4006       /* For a struct the APCS says that we only return in a register
4007 	 if the type is 'integer like' and every addressable element
4008 	 has an offset of zero.  For practical purposes this means
4009 	 that the structure can have at most one non bit-field element
4010 	 and that this element must be the first one in the structure.  */
4011 
4012       /* Find the first field, ignoring non FIELD_DECL things which will
4013 	 have been created by C++.  */
4014       for (field = TYPE_FIELDS (type);
4015 	   field && TREE_CODE (field) != FIELD_DECL;
4016 	   field = DECL_CHAIN (field))
4017 	continue;
4018 
4019       if (field == NULL)
4020 	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
4021 
4022       /* Check that the first field is valid for returning in a register.  */
4023 
4024       /* ... Floats are not allowed */
4025       if (FLOAT_TYPE_P (TREE_TYPE (field)))
4026 	return true;
4027 
4028       /* ... Aggregates that are not themselves valid for returning in
4029 	 a register are not allowed.  */
4030       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4031 	return true;
4032 
4033       /* Now check the remaining fields, if any.  Only bitfields are allowed,
4034 	 since they are not addressable.  */
4035       for (field = DECL_CHAIN (field);
4036 	   field;
4037 	   field = DECL_CHAIN (field))
4038 	{
4039 	  if (TREE_CODE (field) != FIELD_DECL)
4040 	    continue;
4041 
4042 	  if (!DECL_BIT_FIELD_TYPE (field))
4043 	    return true;
4044 	}
4045 
4046       return false;
4047     }
4048 
4049   if (TREE_CODE (type) == UNION_TYPE)
4050     {
4051       tree field;
4052 
4053       /* Unions can be returned in registers if every element is
4054 	 integral, or can be returned in an integer register.  */
4055       for (field = TYPE_FIELDS (type);
4056 	   field;
4057 	   field = DECL_CHAIN (field))
4058 	{
4059 	  if (TREE_CODE (field) != FIELD_DECL)
4060 	    continue;
4061 
4062 	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
4063 	    return true;
4064 
4065 	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4066 	    return true;
4067 	}
4068 
4069       return false;
4070     }
4071 #endif /* not ARM_WINCE */
4072 
4073   /* Return all other types in memory.  */
4074   return true;
4075 }
4076 
4077 const struct pcs_attribute_arg
4078 {
4079   const char *arg;
4080   enum arm_pcs value;
4081 } pcs_attribute_args[] =
4082   {
4083     {"aapcs", ARM_PCS_AAPCS},
4084     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4085 #if 0
4086     /* We could recognize these, but changes would be needed elsewhere
4087      * to implement them.  */
4088     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4089     {"atpcs", ARM_PCS_ATPCS},
4090     {"apcs", ARM_PCS_APCS},
4091 #endif
4092     {NULL, ARM_PCS_UNKNOWN}
4093   };
4094 
4095 static enum arm_pcs
4096 arm_pcs_from_attribute (tree attr)
4097 {
4098   const struct pcs_attribute_arg *ptr;
4099   const char *arg;
4100 
4101   /* Get the value of the argument.  */
4102   if (TREE_VALUE (attr) == NULL_TREE
4103       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4104     return ARM_PCS_UNKNOWN;
4105 
4106   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4107 
4108   /* Check it against the list of known arguments.  */
4109   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4110     if (streq (arg, ptr->arg))
4111       return ptr->value;
4112 
4113   /* An unrecognized interrupt type.  */
4114   return ARM_PCS_UNKNOWN;
4115 }
4116 
4117 /* Get the PCS variant to use for this call.  TYPE is the function's type
4118    specification, DECL is the specific declartion.  DECL may be null if
4119    the call could be indirect or if this is a library call.  */
4120 static enum arm_pcs
4121 arm_get_pcs_model (const_tree type, const_tree decl)
4122 {
4123   bool user_convention = false;
4124   enum arm_pcs user_pcs = arm_pcs_default;
4125   tree attr;
4126 
4127   gcc_assert (type);
4128 
4129   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4130   if (attr)
4131     {
4132       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4133       user_convention = true;
4134     }
4135 
4136   if (TARGET_AAPCS_BASED)
4137     {
4138       /* Detect varargs functions.  These always use the base rules
4139 	 (no argument is ever a candidate for a co-processor
4140 	 register).  */
4141       bool base_rules = stdarg_p (type);
4142 
4143       if (user_convention)
4144 	{
4145 	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4146 	    sorry ("non-AAPCS derived PCS variant");
4147 	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4148 	    error ("variadic functions must use the base AAPCS variant");
4149 	}
4150 
4151       if (base_rules)
4152 	return ARM_PCS_AAPCS;
4153       else if (user_convention)
4154 	return user_pcs;
4155       else if (decl && flag_unit_at_a_time)
4156 	{
4157 	  /* Local functions never leak outside this compilation unit,
4158 	     so we are free to use whatever conventions are
4159 	     appropriate.  */
4160 	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
4161 	  struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4162 	  if (i && i->local)
4163 	    return ARM_PCS_AAPCS_LOCAL;
4164 	}
4165     }
4166   else if (user_convention && user_pcs != arm_pcs_default)
4167     sorry ("PCS variant");
4168 
4169   /* For everything else we use the target's default.  */
4170   return arm_pcs_default;
4171 }
4172 
4173 
4174 static void
4175 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
4176 		    const_tree fntype ATTRIBUTE_UNUSED,
4177 		    rtx libcall ATTRIBUTE_UNUSED,
4178 		    const_tree fndecl ATTRIBUTE_UNUSED)
4179 {
4180   /* Record the unallocated VFP registers.  */
4181   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4182   pcum->aapcs_vfp_reg_alloc = 0;
4183 }
4184 
4185 /* Walk down the type tree of TYPE counting consecutive base elements.
4186    If *MODEP is VOIDmode, then set it to the first valid floating point
4187    type.  If a non-floating point type is found, or if a floating point
4188    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4189    otherwise return the count in the sub-tree.  */
4190 static int
4191 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4192 {
4193   enum machine_mode mode;
4194   HOST_WIDE_INT size;
4195 
4196   switch (TREE_CODE (type))
4197     {
4198     case REAL_TYPE:
4199       mode = TYPE_MODE (type);
4200       if (mode != DFmode && mode != SFmode)
4201 	return -1;
4202 
4203       if (*modep == VOIDmode)
4204 	*modep = mode;
4205 
4206       if (*modep == mode)
4207 	return 1;
4208 
4209       break;
4210 
4211     case COMPLEX_TYPE:
4212       mode = TYPE_MODE (TREE_TYPE (type));
4213       if (mode != DFmode && mode != SFmode)
4214 	return -1;
4215 
4216       if (*modep == VOIDmode)
4217 	*modep = mode;
4218 
4219       if (*modep == mode)
4220 	return 2;
4221 
4222       break;
4223 
4224     case VECTOR_TYPE:
4225       /* Use V2SImode and V4SImode as representatives of all 64-bit
4226 	 and 128-bit vector types, whether or not those modes are
4227 	 supported with the present options.  */
4228       size = int_size_in_bytes (type);
4229       switch (size)
4230 	{
4231 	case 8:
4232 	  mode = V2SImode;
4233 	  break;
4234 	case 16:
4235 	  mode = V4SImode;
4236 	  break;
4237 	default:
4238 	  return -1;
4239 	}
4240 
4241       if (*modep == VOIDmode)
4242 	*modep = mode;
4243 
4244       /* Vector modes are considered to be opaque: two vectors are
4245 	 equivalent for the purposes of being homogeneous aggregates
4246 	 if they are the same size.  */
4247       if (*modep == mode)
4248 	return 1;
4249 
4250       break;
4251 
4252     case ARRAY_TYPE:
4253       {
4254 	int count;
4255 	tree index = TYPE_DOMAIN (type);
4256 
4257 	/* Can't handle incomplete types.  */
4258 	if (!COMPLETE_TYPE_P (type))
4259 	  return -1;
4260 
4261 	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4262 	if (count == -1
4263 	    || !index
4264 	    || !TYPE_MAX_VALUE (index)
4265 	    || !host_integerp (TYPE_MAX_VALUE (index), 1)
4266 	    || !TYPE_MIN_VALUE (index)
4267 	    || !host_integerp (TYPE_MIN_VALUE (index), 1)
4268 	    || count < 0)
4269 	  return -1;
4270 
4271 	count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4272 		      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4273 
4274 	/* There must be no padding.  */
4275 	if (!host_integerp (TYPE_SIZE (type), 1)
4276 	    || (tree_low_cst (TYPE_SIZE (type), 1)
4277 		!= count * GET_MODE_BITSIZE (*modep)))
4278 	  return -1;
4279 
4280 	return count;
4281       }
4282 
4283     case RECORD_TYPE:
4284       {
4285 	int count = 0;
4286 	int sub_count;
4287 	tree field;
4288 
4289 	/* Can't handle incomplete types.  */
4290 	if (!COMPLETE_TYPE_P (type))
4291 	  return -1;
4292 
4293 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4294 	  {
4295 	    if (TREE_CODE (field) != FIELD_DECL)
4296 	      continue;
4297 
4298 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4299 	    if (sub_count < 0)
4300 	      return -1;
4301 	    count += sub_count;
4302 	  }
4303 
4304 	/* There must be no padding.  */
4305 	if (!host_integerp (TYPE_SIZE (type), 1)
4306 	    || (tree_low_cst (TYPE_SIZE (type), 1)
4307 		!= count * GET_MODE_BITSIZE (*modep)))
4308 	  return -1;
4309 
4310 	return count;
4311       }
4312 
4313     case UNION_TYPE:
4314     case QUAL_UNION_TYPE:
4315       {
4316 	/* These aren't very interesting except in a degenerate case.  */
4317 	int count = 0;
4318 	int sub_count;
4319 	tree field;
4320 
4321 	/* Can't handle incomplete types.  */
4322 	if (!COMPLETE_TYPE_P (type))
4323 	  return -1;
4324 
4325 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4326 	  {
4327 	    if (TREE_CODE (field) != FIELD_DECL)
4328 	      continue;
4329 
4330 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4331 	    if (sub_count < 0)
4332 	      return -1;
4333 	    count = count > sub_count ? count : sub_count;
4334 	  }
4335 
4336 	/* There must be no padding.  */
4337 	if (!host_integerp (TYPE_SIZE (type), 1)
4338 	    || (tree_low_cst (TYPE_SIZE (type), 1)
4339 		!= count * GET_MODE_BITSIZE (*modep)))
4340 	  return -1;
4341 
4342 	return count;
4343       }
4344 
4345     default:
4346       break;
4347     }
4348 
4349   return -1;
4350 }
4351 
4352 /* Return true if PCS_VARIANT should use VFP registers.  */
4353 static bool
4354 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4355 {
4356   if (pcs_variant == ARM_PCS_AAPCS_VFP)
4357     {
4358       static bool seen_thumb1_vfp = false;
4359 
4360       if (TARGET_THUMB1 && !seen_thumb1_vfp)
4361 	{
4362 	  sorry ("Thumb-1 hard-float VFP ABI");
4363 	  /* sorry() is not immediately fatal, so only display this once.  */
4364 	  seen_thumb1_vfp = true;
4365 	}
4366 
4367       return true;
4368     }
4369 
4370   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4371     return false;
4372 
4373   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4374 	  (TARGET_VFP_DOUBLE || !is_double));
4375 }
4376 
4377 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4378    suitable for passing or returning in VFP registers for the PCS
4379    variant selected.  If it is, then *BASE_MODE is updated to contain
4380    a machine mode describing each element of the argument's type and
4381    *COUNT to hold the number of such elements.  */
4382 static bool
4383 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4384 				       enum machine_mode mode, const_tree type,
4385 				       enum machine_mode *base_mode, int *count)
4386 {
4387   enum machine_mode new_mode = VOIDmode;
4388 
4389   /* If we have the type information, prefer that to working things
4390      out from the mode.  */
4391   if (type)
4392     {
4393       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4394 
4395       if (ag_count > 0 && ag_count <= 4)
4396 	*count = ag_count;
4397       else
4398 	return false;
4399     }
4400   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4401 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4402 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4403     {
4404       *count = 1;
4405       new_mode = mode;
4406     }
4407   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4408     {
4409       *count = 2;
4410       new_mode = (mode == DCmode ? DFmode : SFmode);
4411     }
4412   else
4413     return false;
4414 
4415 
4416   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4417     return false;
4418 
4419   *base_mode = new_mode;
4420   return true;
4421 }
4422 
4423 static bool
4424 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4425 			       enum machine_mode mode, const_tree type)
4426 {
4427   int count ATTRIBUTE_UNUSED;
4428   enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4429 
4430   if (!use_vfp_abi (pcs_variant, false))
4431     return false;
4432   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4433 						&ag_mode, &count);
4434 }
4435 
4436 static bool
4437 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4438 			     const_tree type)
4439 {
4440   if (!use_vfp_abi (pcum->pcs_variant, false))
4441     return false;
4442 
4443   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4444 						&pcum->aapcs_vfp_rmode,
4445 						&pcum->aapcs_vfp_rcount);
4446 }
4447 
4448 static bool
4449 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4450 		    const_tree type  ATTRIBUTE_UNUSED)
4451 {
4452   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4453   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4454   int regno;
4455 
4456   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4457     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4458       {
4459 	pcum->aapcs_vfp_reg_alloc = mask << regno;
4460 	if (mode == BLKmode
4461 	    || (mode == TImode && ! TARGET_NEON)
4462 	    || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
4463 	  {
4464 	    int i;
4465 	    int rcount = pcum->aapcs_vfp_rcount;
4466 	    int rshift = shift;
4467 	    enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4468 	    rtx par;
4469 	    if (!TARGET_NEON)
4470 	      {
4471 		/* Avoid using unsupported vector modes.  */
4472 		if (rmode == V2SImode)
4473 		  rmode = DImode;
4474 		else if (rmode == V4SImode)
4475 		  {
4476 		    rmode = DImode;
4477 		    rcount *= 2;
4478 		    rshift /= 2;
4479 		  }
4480 	      }
4481 	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4482 	    for (i = 0; i < rcount; i++)
4483 	      {
4484 		rtx tmp = gen_rtx_REG (rmode,
4485 				       FIRST_VFP_REGNUM + regno + i * rshift);
4486 		tmp = gen_rtx_EXPR_LIST
4487 		  (VOIDmode, tmp,
4488 		   GEN_INT (i * GET_MODE_SIZE (rmode)));
4489 		XVECEXP (par, 0, i) = tmp;
4490 	      }
4491 
4492 	    pcum->aapcs_reg = par;
4493 	  }
4494 	else
4495 	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4496 	return true;
4497       }
4498   return false;
4499 }
4500 
4501 static rtx
4502 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4503 			       enum machine_mode mode,
4504 			       const_tree type ATTRIBUTE_UNUSED)
4505 {
4506   if (!use_vfp_abi (pcs_variant, false))
4507     return NULL;
4508 
4509   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4510     {
4511       int count;
4512       enum machine_mode ag_mode;
4513       int i;
4514       rtx par;
4515       int shift;
4516 
4517       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4518 					     &ag_mode, &count);
4519 
4520       if (!TARGET_NEON)
4521 	{
4522 	  if (ag_mode == V2SImode)
4523 	    ag_mode = DImode;
4524 	  else if (ag_mode == V4SImode)
4525 	    {
4526 	      ag_mode = DImode;
4527 	      count *= 2;
4528 	    }
4529 	}
4530       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4531       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4532       for (i = 0; i < count; i++)
4533 	{
4534 	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4535 	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4536 				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4537 	  XVECEXP (par, 0, i) = tmp;
4538 	}
4539 
4540       return par;
4541     }
4542 
4543   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4544 }
4545 
4546 static void
4547 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
4548 		   enum machine_mode mode  ATTRIBUTE_UNUSED,
4549 		   const_tree type  ATTRIBUTE_UNUSED)
4550 {
4551   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4552   pcum->aapcs_vfp_reg_alloc = 0;
4553   return;
4554 }
4555 
4556 #define AAPCS_CP(X)				\
4557   {						\
4558     aapcs_ ## X ## _cum_init,			\
4559     aapcs_ ## X ## _is_call_candidate,		\
4560     aapcs_ ## X ## _allocate,			\
4561     aapcs_ ## X ## _is_return_candidate,	\
4562     aapcs_ ## X ## _allocate_return_reg,	\
4563     aapcs_ ## X ## _advance			\
4564   }
4565 
4566 /* Table of co-processors that can be used to pass arguments in
4567    registers.  Idealy no arugment should be a candidate for more than
4568    one co-processor table entry, but the table is processed in order
4569    and stops after the first match.  If that entry then fails to put
4570    the argument into a co-processor register, the argument will go on
4571    the stack.  */
4572 static struct
4573 {
4574   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
4575   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4576 
4577   /* Return true if an argument of mode MODE (or type TYPE if MODE is
4578      BLKmode) is a candidate for this co-processor's registers; this
4579      function should ignore any position-dependent state in
4580      CUMULATIVE_ARGS and only use call-type dependent information.  */
4581   bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4582 
4583   /* Return true if the argument does get a co-processor register; it
4584      should set aapcs_reg to an RTX of the register allocated as is
4585      required for a return from FUNCTION_ARG.  */
4586   bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4587 
4588   /* Return true if a result of mode MODE (or type TYPE if MODE is
4589      BLKmode) is can be returned in this co-processor's registers.  */
4590   bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4591 
4592   /* Allocate and return an RTX element to hold the return type of a
4593      call, this routine must not fail and will only be called if
4594      is_return_candidate returned true with the same parameters.  */
4595   rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4596 
4597   /* Finish processing this argument and prepare to start processing
4598      the next one.  */
4599   void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4600 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4601   {
4602     AAPCS_CP(vfp)
4603   };
4604 
4605 #undef AAPCS_CP
4606 
4607 static int
4608 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4609 			  const_tree type)
4610 {
4611   int i;
4612 
4613   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4614     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4615       return i;
4616 
4617   return -1;
4618 }
4619 
4620 static int
4621 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4622 {
4623   /* We aren't passed a decl, so we can't check that a call is local.
4624      However, it isn't clear that that would be a win anyway, since it
4625      might limit some tail-calling opportunities.  */
4626   enum arm_pcs pcs_variant;
4627 
4628   if (fntype)
4629     {
4630       const_tree fndecl = NULL_TREE;
4631 
4632       if (TREE_CODE (fntype) == FUNCTION_DECL)
4633 	{
4634 	  fndecl = fntype;
4635 	  fntype = TREE_TYPE (fntype);
4636 	}
4637 
4638       pcs_variant = arm_get_pcs_model (fntype, fndecl);
4639     }
4640   else
4641     pcs_variant = arm_pcs_default;
4642 
4643   if (pcs_variant != ARM_PCS_AAPCS)
4644     {
4645       int i;
4646 
4647       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4648 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4649 							TYPE_MODE (type),
4650 							type))
4651 	  return i;
4652     }
4653   return -1;
4654 }
4655 
4656 static rtx
4657 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4658 			   const_tree fntype)
4659 {
4660   /* We aren't passed a decl, so we can't check that a call is local.
4661      However, it isn't clear that that would be a win anyway, since it
4662      might limit some tail-calling opportunities.  */
4663   enum arm_pcs pcs_variant;
4664   int unsignedp ATTRIBUTE_UNUSED;
4665 
4666   if (fntype)
4667     {
4668       const_tree fndecl = NULL_TREE;
4669 
4670       if (TREE_CODE (fntype) == FUNCTION_DECL)
4671 	{
4672 	  fndecl = fntype;
4673 	  fntype = TREE_TYPE (fntype);
4674 	}
4675 
4676       pcs_variant = arm_get_pcs_model (fntype, fndecl);
4677     }
4678   else
4679     pcs_variant = arm_pcs_default;
4680 
4681   /* Promote integer types.  */
4682   if (type && INTEGRAL_TYPE_P (type))
4683     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4684 
4685   if (pcs_variant != ARM_PCS_AAPCS)
4686     {
4687       int i;
4688 
4689       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4690 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4691 							type))
4692 	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4693 							     mode, type);
4694     }
4695 
4696   /* Promotes small structs returned in a register to full-word size
4697      for big-endian AAPCS.  */
4698   if (type && arm_return_in_msb (type))
4699     {
4700       HOST_WIDE_INT size = int_size_in_bytes (type);
4701       if (size % UNITS_PER_WORD != 0)
4702 	{
4703 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4704 	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4705 	}
4706     }
4707 
4708   return gen_rtx_REG (mode, R0_REGNUM);
4709 }
4710 
4711 static rtx
4712 aapcs_libcall_value (enum machine_mode mode)
4713 {
4714   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4715       && GET_MODE_SIZE (mode) <= 4)
4716     mode = SImode;
4717 
4718   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4719 }
4720 
4721 /* Lay out a function argument using the AAPCS rules.  The rule
4722    numbers referred to here are those in the AAPCS.  */
4723 static void
4724 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4725 		  const_tree type, bool named)
4726 {
4727   int nregs, nregs2;
4728   int ncrn;
4729 
4730   /* We only need to do this once per argument.  */
4731   if (pcum->aapcs_arg_processed)
4732     return;
4733 
4734   pcum->aapcs_arg_processed = true;
4735 
4736   /* Special case: if named is false then we are handling an incoming
4737      anonymous argument which is on the stack.  */
4738   if (!named)
4739     return;
4740 
4741   /* Is this a potential co-processor register candidate?  */
4742   if (pcum->pcs_variant != ARM_PCS_AAPCS)
4743     {
4744       int slot = aapcs_select_call_coproc (pcum, mode, type);
4745       pcum->aapcs_cprc_slot = slot;
4746 
4747       /* We don't have to apply any of the rules from part B of the
4748 	 preparation phase, these are handled elsewhere in the
4749 	 compiler.  */
4750 
4751       if (slot >= 0)
4752 	{
4753 	  /* A Co-processor register candidate goes either in its own
4754 	     class of registers or on the stack.  */
4755 	  if (!pcum->aapcs_cprc_failed[slot])
4756 	    {
4757 	      /* C1.cp - Try to allocate the argument to co-processor
4758 		 registers.  */
4759 	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4760 		return;
4761 
4762 	      /* C2.cp - Put the argument on the stack and note that we
4763 		 can't assign any more candidates in this slot.  We also
4764 		 need to note that we have allocated stack space, so that
4765 		 we won't later try to split a non-cprc candidate between
4766 		 core registers and the stack.  */
4767 	      pcum->aapcs_cprc_failed[slot] = true;
4768 	      pcum->can_split = false;
4769 	    }
4770 
4771 	  /* We didn't get a register, so this argument goes on the
4772 	     stack.  */
4773 	  gcc_assert (pcum->can_split == false);
4774 	  return;
4775 	}
4776     }
4777 
4778   /* C3 - For double-word aligned arguments, round the NCRN up to the
4779      next even number.  */
4780   ncrn = pcum->aapcs_ncrn;
4781   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4782     ncrn++;
4783 
4784   nregs = ARM_NUM_REGS2(mode, type);
4785 
4786   /* Sigh, this test should really assert that nregs > 0, but a GCC
4787      extension allows empty structs and then gives them empty size; it
4788      then allows such a structure to be passed by value.  For some of
4789      the code below we have to pretend that such an argument has
4790      non-zero size so that we 'locate' it correctly either in
4791      registers or on the stack.  */
4792   gcc_assert (nregs >= 0);
4793 
4794   nregs2 = nregs ? nregs : 1;
4795 
4796   /* C4 - Argument fits entirely in core registers.  */
4797   if (ncrn + nregs2 <= NUM_ARG_REGS)
4798     {
4799       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4800       pcum->aapcs_next_ncrn = ncrn + nregs;
4801       return;
4802     }
4803 
4804   /* C5 - Some core registers left and there are no arguments already
4805      on the stack: split this argument between the remaining core
4806      registers and the stack.  */
4807   if (ncrn < NUM_ARG_REGS && pcum->can_split)
4808     {
4809       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4810       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4811       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4812       return;
4813     }
4814 
4815   /* C6 - NCRN is set to 4.  */
4816   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4817 
4818   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
4819   return;
4820 }
4821 
4822 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4823    for a call to a function whose data type is FNTYPE.
4824    For a library call, FNTYPE is NULL.  */
4825 void
4826 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4827 			  rtx libname,
4828 			  tree fndecl ATTRIBUTE_UNUSED)
4829 {
4830   /* Long call handling.  */
4831   if (fntype)
4832     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4833   else
4834     pcum->pcs_variant = arm_pcs_default;
4835 
4836   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4837     {
4838       if (arm_libcall_uses_aapcs_base (libname))
4839 	pcum->pcs_variant = ARM_PCS_AAPCS;
4840 
4841       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4842       pcum->aapcs_reg = NULL_RTX;
4843       pcum->aapcs_partial = 0;
4844       pcum->aapcs_arg_processed = false;
4845       pcum->aapcs_cprc_slot = -1;
4846       pcum->can_split = true;
4847 
4848       if (pcum->pcs_variant != ARM_PCS_AAPCS)
4849 	{
4850 	  int i;
4851 
4852 	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4853 	    {
4854 	      pcum->aapcs_cprc_failed[i] = false;
4855 	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4856 	    }
4857 	}
4858       return;
4859     }
4860 
4861   /* Legacy ABIs */
4862 
4863   /* On the ARM, the offset starts at 0.  */
4864   pcum->nregs = 0;
4865   pcum->iwmmxt_nregs = 0;
4866   pcum->can_split = true;
4867 
4868   /* Varargs vectors are treated the same as long long.
4869      named_count avoids having to change the way arm handles 'named' */
4870   pcum->named_count = 0;
4871   pcum->nargs = 0;
4872 
4873   if (TARGET_REALLY_IWMMXT && fntype)
4874     {
4875       tree fn_arg;
4876 
4877       for (fn_arg = TYPE_ARG_TYPES (fntype);
4878 	   fn_arg;
4879 	   fn_arg = TREE_CHAIN (fn_arg))
4880 	pcum->named_count += 1;
4881 
4882       if (! pcum->named_count)
4883 	pcum->named_count = INT_MAX;
4884     }
4885 }
4886 
4887 
4888 /* Return true if mode/type need doubleword alignment.  */
4889 static bool
4890 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4891 {
4892   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4893 	  || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4894 }
4895 
4896 
4897 /* Determine where to put an argument to a function.
4898    Value is zero to push the argument on the stack,
4899    or a hard register in which to store the argument.
4900 
4901    MODE is the argument's machine mode.
4902    TYPE is the data type of the argument (as a tree).
4903     This is null for libcalls where that information may
4904     not be available.
4905    CUM is a variable of type CUMULATIVE_ARGS which gives info about
4906     the preceding args and about the function being called.
4907    NAMED is nonzero if this argument is a named parameter
4908     (otherwise it is an extra parameter matching an ellipsis).
4909 
4910    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4911    other arguments are passed on the stack.  If (NAMED == 0) (which happens
4912    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4913    defined), say it is passed in the stack (function_prologue will
4914    indeed make it pass in the stack if necessary).  */
4915 
4916 static rtx
4917 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4918 		  const_tree type, bool named)
4919 {
4920   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4921   int nregs;
4922 
4923   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
4924      a call insn (op3 of a call_value insn).  */
4925   if (mode == VOIDmode)
4926     return const0_rtx;
4927 
4928   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4929     {
4930       aapcs_layout_arg (pcum, mode, type, named);
4931       return pcum->aapcs_reg;
4932     }
4933 
4934   /* Varargs vectors are treated the same as long long.
4935      named_count avoids having to change the way arm handles 'named' */
4936   if (TARGET_IWMMXT_ABI
4937       && arm_vector_mode_supported_p (mode)
4938       && pcum->named_count > pcum->nargs + 1)
4939     {
4940       if (pcum->iwmmxt_nregs <= 9)
4941 	return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4942       else
4943 	{
4944 	  pcum->can_split = false;
4945 	  return NULL_RTX;
4946 	}
4947     }
4948 
4949   /* Put doubleword aligned quantities in even register pairs.  */
4950   if (pcum->nregs & 1
4951       && ARM_DOUBLEWORD_ALIGN
4952       && arm_needs_doubleword_align (mode, type))
4953     pcum->nregs++;
4954 
4955   /* Only allow splitting an arg between regs and memory if all preceding
4956      args were allocated to regs.  For args passed by reference we only count
4957      the reference pointer.  */
4958   if (pcum->can_split)
4959     nregs = 1;
4960   else
4961     nregs = ARM_NUM_REGS2 (mode, type);
4962 
4963   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4964     return NULL_RTX;
4965 
4966   return gen_rtx_REG (mode, pcum->nregs);
4967 }
4968 
4969 static unsigned int
4970 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4971 {
4972   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4973 	  ? DOUBLEWORD_ALIGNMENT
4974 	  : PARM_BOUNDARY);
4975 }
4976 
4977 static int
4978 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4979 		       tree type, bool named)
4980 {
4981   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4982   int nregs = pcum->nregs;
4983 
4984   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4985     {
4986       aapcs_layout_arg (pcum, mode, type, named);
4987       return pcum->aapcs_partial;
4988     }
4989 
4990   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4991     return 0;
4992 
4993   if (NUM_ARG_REGS > nregs
4994       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4995       && pcum->can_split)
4996     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4997 
4998   return 0;
4999 }
5000 
5001 /* Update the data in PCUM to advance over an argument
5002    of mode MODE and data type TYPE.
5003    (TYPE is null for libcalls where that information may not be available.)  */
5004 
5005 static void
5006 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5007 			  const_tree type, bool named)
5008 {
5009   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5010 
5011   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5012     {
5013       aapcs_layout_arg (pcum, mode, type, named);
5014 
5015       if (pcum->aapcs_cprc_slot >= 0)
5016 	{
5017 	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5018 							      type);
5019 	  pcum->aapcs_cprc_slot = -1;
5020 	}
5021 
5022       /* Generic stuff.  */
5023       pcum->aapcs_arg_processed = false;
5024       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5025       pcum->aapcs_reg = NULL_RTX;
5026       pcum->aapcs_partial = 0;
5027     }
5028   else
5029     {
5030       pcum->nargs += 1;
5031       if (arm_vector_mode_supported_p (mode)
5032 	  && pcum->named_count > pcum->nargs
5033 	  && TARGET_IWMMXT_ABI)
5034 	pcum->iwmmxt_nregs += 1;
5035       else
5036 	pcum->nregs += ARM_NUM_REGS2 (mode, type);
5037     }
5038 }
5039 
5040 /* Variable sized types are passed by reference.  This is a GCC
5041    extension to the ARM ABI.  */
5042 
5043 static bool
5044 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5045 		       enum machine_mode mode ATTRIBUTE_UNUSED,
5046 		       const_tree type, bool named ATTRIBUTE_UNUSED)
5047 {
5048   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5049 }
5050 
5051 /* Encode the current state of the #pragma [no_]long_calls.  */
5052 typedef enum
5053 {
5054   OFF,		/* No #pragma [no_]long_calls is in effect.  */
5055   LONG,		/* #pragma long_calls is in effect.  */
5056   SHORT		/* #pragma no_long_calls is in effect.  */
5057 } arm_pragma_enum;
5058 
5059 static arm_pragma_enum arm_pragma_long_calls = OFF;
5060 
5061 void
5062 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5063 {
5064   arm_pragma_long_calls = LONG;
5065 }
5066 
5067 void
5068 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5069 {
5070   arm_pragma_long_calls = SHORT;
5071 }
5072 
5073 void
5074 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5075 {
5076   arm_pragma_long_calls = OFF;
5077 }
5078 
5079 /* Handle an attribute requiring a FUNCTION_DECL;
5080    arguments as in struct attribute_spec.handler.  */
5081 static tree
5082 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5083 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5084 {
5085   if (TREE_CODE (*node) != FUNCTION_DECL)
5086     {
5087       warning (OPT_Wattributes, "%qE attribute only applies to functions",
5088 	       name);
5089       *no_add_attrs = true;
5090     }
5091 
5092   return NULL_TREE;
5093 }
5094 
5095 /* Handle an "interrupt" or "isr" attribute;
5096    arguments as in struct attribute_spec.handler.  */
5097 static tree
5098 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5099 			  bool *no_add_attrs)
5100 {
5101   if (DECL_P (*node))
5102     {
5103       if (TREE_CODE (*node) != FUNCTION_DECL)
5104 	{
5105 	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
5106 		   name);
5107 	  *no_add_attrs = true;
5108 	}
5109       /* FIXME: the argument if any is checked for type attributes;
5110 	 should it be checked for decl ones?  */
5111     }
5112   else
5113     {
5114       if (TREE_CODE (*node) == FUNCTION_TYPE
5115 	  || TREE_CODE (*node) == METHOD_TYPE)
5116 	{
5117 	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5118 	    {
5119 	      warning (OPT_Wattributes, "%qE attribute ignored",
5120 		       name);
5121 	      *no_add_attrs = true;
5122 	    }
5123 	}
5124       else if (TREE_CODE (*node) == POINTER_TYPE
5125 	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5126 		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5127 	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
5128 	{
5129 	  *node = build_variant_type_copy (*node);
5130 	  TREE_TYPE (*node) = build_type_attribute_variant
5131 	    (TREE_TYPE (*node),
5132 	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5133 	  *no_add_attrs = true;
5134 	}
5135       else
5136 	{
5137 	  /* Possibly pass this attribute on from the type to a decl.  */
5138 	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
5139 		       | (int) ATTR_FLAG_FUNCTION_NEXT
5140 		       | (int) ATTR_FLAG_ARRAY_NEXT))
5141 	    {
5142 	      *no_add_attrs = true;
5143 	      return tree_cons (name, args, NULL_TREE);
5144 	    }
5145 	  else
5146 	    {
5147 	      warning (OPT_Wattributes, "%qE attribute ignored",
5148 		       name);
5149 	    }
5150 	}
5151     }
5152 
5153   return NULL_TREE;
5154 }
5155 
5156 /* Handle a "pcs" attribute; arguments as in struct
5157    attribute_spec.handler.  */
5158 static tree
5159 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5160 			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5161 {
5162   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5163     {
5164       warning (OPT_Wattributes, "%qE attribute ignored", name);
5165       *no_add_attrs = true;
5166     }
5167   return NULL_TREE;
5168 }
5169 
5170 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5171 /* Handle the "notshared" attribute.  This attribute is another way of
5172    requesting hidden visibility.  ARM's compiler supports
5173    "__declspec(notshared)"; we support the same thing via an
5174    attribute.  */
5175 
5176 static tree
5177 arm_handle_notshared_attribute (tree *node,
5178 				tree name ATTRIBUTE_UNUSED,
5179 				tree args ATTRIBUTE_UNUSED,
5180 				int flags ATTRIBUTE_UNUSED,
5181 				bool *no_add_attrs)
5182 {
5183   tree decl = TYPE_NAME (*node);
5184 
5185   if (decl)
5186     {
5187       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5188       DECL_VISIBILITY_SPECIFIED (decl) = 1;
5189       *no_add_attrs = false;
5190     }
5191   return NULL_TREE;
5192 }
5193 #endif
5194 
5195 /* Return 0 if the attributes for two types are incompatible, 1 if they
5196    are compatible, and 2 if they are nearly compatible (which causes a
5197    warning to be generated).  */
5198 static int
5199 arm_comp_type_attributes (const_tree type1, const_tree type2)
5200 {
5201   int l1, l2, s1, s2;
5202 
5203   /* Check for mismatch of non-default calling convention.  */
5204   if (TREE_CODE (type1) != FUNCTION_TYPE)
5205     return 1;
5206 
5207   /* Check for mismatched call attributes.  */
5208   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5209   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5210   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5211   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5212 
5213   /* Only bother to check if an attribute is defined.  */
5214   if (l1 | l2 | s1 | s2)
5215     {
5216       /* If one type has an attribute, the other must have the same attribute.  */
5217       if ((l1 != l2) || (s1 != s2))
5218 	return 0;
5219 
5220       /* Disallow mixed attributes.  */
5221       if ((l1 & s2) || (l2 & s1))
5222 	return 0;
5223     }
5224 
5225   /* Check for mismatched ISR attribute.  */
5226   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5227   if (! l1)
5228     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5229   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5230   if (! l2)
5231     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5232   if (l1 != l2)
5233     return 0;
5234 
5235   return 1;
5236 }
5237 
5238 /*  Assigns default attributes to newly defined type.  This is used to
5239     set short_call/long_call attributes for function types of
5240     functions defined inside corresponding #pragma scopes.  */
5241 static void
5242 arm_set_default_type_attributes (tree type)
5243 {
5244   /* Add __attribute__ ((long_call)) to all functions, when
5245      inside #pragma long_calls or __attribute__ ((short_call)),
5246      when inside #pragma no_long_calls.  */
5247   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5248     {
5249       tree type_attr_list, attr_name;
5250       type_attr_list = TYPE_ATTRIBUTES (type);
5251 
5252       if (arm_pragma_long_calls == LONG)
5253  	attr_name = get_identifier ("long_call");
5254       else if (arm_pragma_long_calls == SHORT)
5255  	attr_name = get_identifier ("short_call");
5256       else
5257  	return;
5258 
5259       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5260       TYPE_ATTRIBUTES (type) = type_attr_list;
5261     }
5262 }
5263 
5264 /* Return true if DECL is known to be linked into section SECTION.  */
5265 
5266 static bool
5267 arm_function_in_section_p (tree decl, section *section)
5268 {
5269   /* We can only be certain about functions defined in the same
5270      compilation unit.  */
5271   if (!TREE_STATIC (decl))
5272     return false;
5273 
5274   /* Make sure that SYMBOL always binds to the definition in this
5275      compilation unit.  */
5276   if (!targetm.binds_local_p (decl))
5277     return false;
5278 
5279   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
5280   if (!DECL_SECTION_NAME (decl))
5281     {
5282       /* Make sure that we will not create a unique section for DECL.  */
5283       if (flag_function_sections || DECL_ONE_ONLY (decl))
5284 	return false;
5285     }
5286 
5287   return function_section (decl) == section;
5288 }
5289 
5290 /* Return nonzero if a 32-bit "long_call" should be generated for
5291    a call from the current function to DECL.  We generate a long_call
5292    if the function:
5293 
5294         a.  has an __attribute__((long call))
5295      or b.  is within the scope of a #pragma long_calls
5296      or c.  the -mlong-calls command line switch has been specified
5297 
5298    However we do not generate a long call if the function:
5299 
5300         d.  has an __attribute__ ((short_call))
5301      or e.  is inside the scope of a #pragma no_long_calls
5302      or f.  is defined in the same section as the current function.  */
5303 
5304 bool
5305 arm_is_long_call_p (tree decl)
5306 {
5307   tree attrs;
5308 
5309   if (!decl)
5310     return TARGET_LONG_CALLS;
5311 
5312   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5313   if (lookup_attribute ("short_call", attrs))
5314     return false;
5315 
5316   /* For "f", be conservative, and only cater for cases in which the
5317      whole of the current function is placed in the same section.  */
5318   if (!flag_reorder_blocks_and_partition
5319       && TREE_CODE (decl) == FUNCTION_DECL
5320       && arm_function_in_section_p (decl, current_function_section ()))
5321     return false;
5322 
5323   if (lookup_attribute ("long_call", attrs))
5324     return true;
5325 
5326   return TARGET_LONG_CALLS;
5327 }
5328 
5329 /* Return nonzero if it is ok to make a tail-call to DECL.  */
5330 static bool
5331 arm_function_ok_for_sibcall (tree decl, tree exp)
5332 {
5333   unsigned long func_type;
5334 
5335   if (cfun->machine->sibcall_blocked)
5336     return false;
5337 
5338   /* Never tailcall something for which we have no decl, or if we
5339      are generating code for Thumb-1.  */
5340   if (decl == NULL || TARGET_THUMB1)
5341     return false;
5342 
5343   /* The PIC register is live on entry to VxWorks PLT entries, so we
5344      must make the call before restoring the PIC register.  */
5345   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5346     return false;
5347 
5348   /* Cannot tail-call to long calls, since these are out of range of
5349      a branch instruction.  */
5350   if (arm_is_long_call_p (decl))
5351     return false;
5352 
5353   /* If we are interworking and the function is not declared static
5354      then we can't tail-call it unless we know that it exists in this
5355      compilation unit (since it might be a Thumb routine).  */
5356   if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5357     return false;
5358 
5359   func_type = arm_current_func_type ();
5360   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
5361   if (IS_INTERRUPT (func_type))
5362     return false;
5363 
5364   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5365     {
5366       /* Check that the return value locations are the same.  For
5367 	 example that we aren't returning a value from the sibling in
5368 	 a VFP register but then need to transfer it to a core
5369 	 register.  */
5370       rtx a, b;
5371 
5372       a = arm_function_value (TREE_TYPE (exp), decl, false);
5373       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5374 			      cfun->decl, false);
5375       if (!rtx_equal_p (a, b))
5376 	return false;
5377     }
5378 
5379   /* Never tailcall if function may be called with a misaligned SP.  */
5380   if (IS_STACKALIGN (func_type))
5381     return false;
5382 
5383   /* The AAPCS says that, on bare-metal, calls to unresolved weak
5384      references should become a NOP.  Don't convert such calls into
5385      sibling calls.  */
5386   if (TARGET_AAPCS_BASED
5387       && arm_abi == ARM_ABI_AAPCS
5388       && DECL_WEAK (decl))
5389     return false;
5390 
5391   /* Everything else is ok.  */
5392   return true;
5393 }
5394 
5395 
5396 /* Addressing mode support functions.  */
5397 
5398 /* Return nonzero if X is a legitimate immediate operand when compiling
5399    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
5400 int
5401 legitimate_pic_operand_p (rtx x)
5402 {
5403   if (GET_CODE (x) == SYMBOL_REF
5404       || (GET_CODE (x) == CONST
5405 	  && GET_CODE (XEXP (x, 0)) == PLUS
5406 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5407     return 0;
5408 
5409   return 1;
5410 }
5411 
5412 /* Record that the current function needs a PIC register.  Initialize
5413    cfun->machine->pic_reg if we have not already done so.  */
5414 
5415 static void
5416 require_pic_register (void)
5417 {
5418   /* A lot of the logic here is made obscure by the fact that this
5419      routine gets called as part of the rtx cost estimation process.
5420      We don't want those calls to affect any assumptions about the real
5421      function; and further, we can't call entry_of_function() until we
5422      start the real expansion process.  */
5423   if (!crtl->uses_pic_offset_table)
5424     {
5425       gcc_assert (can_create_pseudo_p ());
5426       if (arm_pic_register != INVALID_REGNUM
5427 	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
5428 	{
5429 	  if (!cfun->machine->pic_reg)
5430 	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5431 
5432 	  /* Play games to avoid marking the function as needing pic
5433 	     if we are being called as part of the cost-estimation
5434 	     process.  */
5435 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5436 	    crtl->uses_pic_offset_table = 1;
5437 	}
5438       else
5439 	{
5440 	  rtx seq, insn;
5441 
5442 	  if (!cfun->machine->pic_reg)
5443 	    cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5444 
5445 	  /* Play games to avoid marking the function as needing pic
5446 	     if we are being called as part of the cost-estimation
5447 	     process.  */
5448 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5449 	    {
5450 	      crtl->uses_pic_offset_table = 1;
5451 	      start_sequence ();
5452 
5453 	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
5454 		  && arm_pic_register > LAST_LO_REGNUM)
5455 		emit_move_insn (cfun->machine->pic_reg,
5456 				gen_rtx_REG (Pmode, arm_pic_register));
5457 	      else
5458 		arm_load_pic_register (0UL);
5459 
5460 	      seq = get_insns ();
5461 	      end_sequence ();
5462 
5463 	      for (insn = seq; insn; insn = NEXT_INSN (insn))
5464 		if (INSN_P (insn))
5465 		  INSN_LOCATION (insn) = prologue_location;
5466 
5467 	      /* We can be called during expansion of PHI nodes, where
5468 	         we can't yet emit instructions directly in the final
5469 		 insn stream.  Queue the insns on the entry edge, they will
5470 		 be committed after everything else is expanded.  */
5471 	      insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5472 	    }
5473 	}
5474     }
5475 }
5476 
5477 rtx
5478 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5479 {
5480   if (GET_CODE (orig) == SYMBOL_REF
5481       || GET_CODE (orig) == LABEL_REF)
5482     {
5483       rtx insn;
5484 
5485       if (reg == 0)
5486 	{
5487 	  gcc_assert (can_create_pseudo_p ());
5488 	  reg = gen_reg_rtx (Pmode);
5489 	}
5490 
5491       /* VxWorks does not impose a fixed gap between segments; the run-time
5492 	 gap can be different from the object-file gap.  We therefore can't
5493 	 use GOTOFF unless we are absolutely sure that the symbol is in the
5494 	 same segment as the GOT.  Unfortunately, the flexibility of linker
5495 	 scripts means that we can't be sure of that in general, so assume
5496 	 that GOTOFF is never valid on VxWorks.  */
5497       if ((GET_CODE (orig) == LABEL_REF
5498 	   || (GET_CODE (orig) == SYMBOL_REF &&
5499 	       SYMBOL_REF_LOCAL_P (orig)))
5500 	  && NEED_GOT_RELOC
5501 	  && !TARGET_VXWORKS_RTP)
5502 	insn = arm_pic_static_addr (orig, reg);
5503       else
5504 	{
5505 	  rtx pat;
5506 	  rtx mem;
5507 
5508 	  /* If this function doesn't have a pic register, create one now.  */
5509 	  require_pic_register ();
5510 
5511 	  pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5512 
5513 	  /* Make the MEM as close to a constant as possible.  */
5514 	  mem = SET_SRC (pat);
5515 	  gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5516 	  MEM_READONLY_P (mem) = 1;
5517 	  MEM_NOTRAP_P (mem) = 1;
5518 
5519 	  insn = emit_insn (pat);
5520 	}
5521 
5522       /* Put a REG_EQUAL note on this insn, so that it can be optimized
5523 	 by loop.  */
5524       set_unique_reg_note (insn, REG_EQUAL, orig);
5525 
5526       return reg;
5527     }
5528   else if (GET_CODE (orig) == CONST)
5529     {
5530       rtx base, offset;
5531 
5532       if (GET_CODE (XEXP (orig, 0)) == PLUS
5533 	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5534 	return orig;
5535 
5536       /* Handle the case where we have: const (UNSPEC_TLS).  */
5537       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5538 	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5539 	return orig;
5540 
5541       /* Handle the case where we have:
5542          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
5543          CONST_INT.  */
5544       if (GET_CODE (XEXP (orig, 0)) == PLUS
5545           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5546           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5547         {
5548 	  gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5549 	  return orig;
5550 	}
5551 
5552       if (reg == 0)
5553 	{
5554 	  gcc_assert (can_create_pseudo_p ());
5555 	  reg = gen_reg_rtx (Pmode);
5556 	}
5557 
5558       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5559 
5560       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5561       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5562 				       base == reg ? 0 : reg);
5563 
5564       if (CONST_INT_P (offset))
5565 	{
5566 	  /* The base register doesn't really matter, we only want to
5567 	     test the index for the appropriate mode.  */
5568 	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
5569 	    {
5570 	      gcc_assert (can_create_pseudo_p ());
5571 	      offset = force_reg (Pmode, offset);
5572 	    }
5573 
5574 	  if (CONST_INT_P (offset))
5575 	    return plus_constant (Pmode, base, INTVAL (offset));
5576 	}
5577 
5578       if (GET_MODE_SIZE (mode) > 4
5579 	  && (GET_MODE_CLASS (mode) == MODE_INT
5580 	      || TARGET_SOFT_FLOAT))
5581 	{
5582 	  emit_insn (gen_addsi3 (reg, base, offset));
5583 	  return reg;
5584 	}
5585 
5586       return gen_rtx_PLUS (Pmode, base, offset);
5587     }
5588 
5589   return orig;
5590 }
5591 
5592 
5593 /* Find a spare register to use during the prolog of a function.  */
5594 
5595 static int
5596 thumb_find_work_register (unsigned long pushed_regs_mask)
5597 {
5598   int reg;
5599 
5600   /* Check the argument registers first as these are call-used.  The
5601      register allocation order means that sometimes r3 might be used
5602      but earlier argument registers might not, so check them all.  */
5603   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5604     if (!df_regs_ever_live_p (reg))
5605       return reg;
5606 
5607   /* Before going on to check the call-saved registers we can try a couple
5608      more ways of deducing that r3 is available.  The first is when we are
5609      pushing anonymous arguments onto the stack and we have less than 4
5610      registers worth of fixed arguments(*).  In this case r3 will be part of
5611      the variable argument list and so we can be sure that it will be
5612      pushed right at the start of the function.  Hence it will be available
5613      for the rest of the prologue.
5614      (*): ie crtl->args.pretend_args_size is greater than 0.  */
5615   if (cfun->machine->uses_anonymous_args
5616       && crtl->args.pretend_args_size > 0)
5617     return LAST_ARG_REGNUM;
5618 
5619   /* The other case is when we have fixed arguments but less than 4 registers
5620      worth.  In this case r3 might be used in the body of the function, but
5621      it is not being used to convey an argument into the function.  In theory
5622      we could just check crtl->args.size to see how many bytes are
5623      being passed in argument registers, but it seems that it is unreliable.
5624      Sometimes it will have the value 0 when in fact arguments are being
5625      passed.  (See testcase execute/20021111-1.c for an example).  So we also
5626      check the args_info.nregs field as well.  The problem with this field is
5627      that it makes no allowances for arguments that are passed to the
5628      function but which are not used.  Hence we could miss an opportunity
5629      when a function has an unused argument in r3.  But it is better to be
5630      safe than to be sorry.  */
5631   if (! cfun->machine->uses_anonymous_args
5632       && crtl->args.size >= 0
5633       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5634       && (TARGET_AAPCS_BASED
5635 	  ? crtl->args.info.aapcs_ncrn < 4
5636 	  : crtl->args.info.nregs < 4))
5637     return LAST_ARG_REGNUM;
5638 
5639   /* Otherwise look for a call-saved register that is going to be pushed.  */
5640   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5641     if (pushed_regs_mask & (1 << reg))
5642       return reg;
5643 
5644   if (TARGET_THUMB2)
5645     {
5646       /* Thumb-2 can use high regs.  */
5647       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5648 	if (pushed_regs_mask & (1 << reg))
5649 	  return reg;
5650     }
5651   /* Something went wrong - thumb_compute_save_reg_mask()
5652      should have arranged for a suitable register to be pushed.  */
5653   gcc_unreachable ();
5654 }
5655 
5656 static GTY(()) int pic_labelno;
5657 
5658 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
5659    low register.  */
5660 
5661 void
5662 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5663 {
5664   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5665 
5666   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5667     return;
5668 
5669   gcc_assert (flag_pic);
5670 
5671   pic_reg = cfun->machine->pic_reg;
5672   if (TARGET_VXWORKS_RTP)
5673     {
5674       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5675       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5676       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5677 
5678       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5679 
5680       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5681       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5682     }
5683   else
5684     {
5685       /* We use an UNSPEC rather than a LABEL_REF because this label
5686 	 never appears in the code stream.  */
5687 
5688       labelno = GEN_INT (pic_labelno++);
5689       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5690       l1 = gen_rtx_CONST (VOIDmode, l1);
5691 
5692       /* On the ARM the PC register contains 'dot + 8' at the time of the
5693 	 addition, on the Thumb it is 'dot + 4'.  */
5694       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5695       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5696 				UNSPEC_GOTSYM_OFF);
5697       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5698 
5699       if (TARGET_32BIT)
5700 	{
5701 	  emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5702 	}
5703       else /* TARGET_THUMB1 */
5704 	{
5705 	  if (arm_pic_register != INVALID_REGNUM
5706 	      && REGNO (pic_reg) > LAST_LO_REGNUM)
5707 	    {
5708 	      /* We will have pushed the pic register, so we should always be
5709 		 able to find a work register.  */
5710 	      pic_tmp = gen_rtx_REG (SImode,
5711 				     thumb_find_work_register (saved_regs));
5712 	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5713 	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5714 	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5715 	    }
5716 	  else if (arm_pic_register != INVALID_REGNUM
5717 		   && arm_pic_register > LAST_LO_REGNUM
5718 		   && REGNO (pic_reg) <= LAST_LO_REGNUM)
5719 	    {
5720 	      emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5721 	      emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
5722 	      emit_use (gen_rtx_REG (Pmode, arm_pic_register));
5723 	    }
5724 	  else
5725 	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5726 	}
5727     }
5728 
5729   /* Need to emit this whether or not we obey regdecls,
5730      since setjmp/longjmp can cause life info to screw up.  */
5731   emit_use (pic_reg);
5732 }
5733 
5734 /* Generate code to load the address of a static var when flag_pic is set.  */
5735 static rtx
5736 arm_pic_static_addr (rtx orig, rtx reg)
5737 {
5738   rtx l1, labelno, offset_rtx, insn;
5739 
5740   gcc_assert (flag_pic);
5741 
5742   /* We use an UNSPEC rather than a LABEL_REF because this label
5743      never appears in the code stream.  */
5744   labelno = GEN_INT (pic_labelno++);
5745   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5746   l1 = gen_rtx_CONST (VOIDmode, l1);
5747 
5748   /* On the ARM the PC register contains 'dot + 8' at the time of the
5749      addition, on the Thumb it is 'dot + 4'.  */
5750   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5751   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5752                                UNSPEC_SYMBOL_OFFSET);
5753   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5754 
5755   insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5756   return insn;
5757 }
5758 
5759 /* Return nonzero if X is valid as an ARM state addressing register.  */
5760 static int
5761 arm_address_register_rtx_p (rtx x, int strict_p)
5762 {
5763   int regno;
5764 
5765   if (!REG_P (x))
5766     return 0;
5767 
5768   regno = REGNO (x);
5769 
5770   if (strict_p)
5771     return ARM_REGNO_OK_FOR_BASE_P (regno);
5772 
5773   return (regno <= LAST_ARM_REGNUM
5774 	  || regno >= FIRST_PSEUDO_REGISTER
5775 	  || regno == FRAME_POINTER_REGNUM
5776 	  || regno == ARG_POINTER_REGNUM);
5777 }
5778 
5779 /* Return TRUE if this rtx is the difference of a symbol and a label,
5780    and will reduce to a PC-relative relocation in the object file.
5781    Expressions like this can be left alone when generating PIC, rather
5782    than forced through the GOT.  */
5783 static int
5784 pcrel_constant_p (rtx x)
5785 {
5786   if (GET_CODE (x) == MINUS)
5787     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5788 
5789   return FALSE;
5790 }
5791 
5792 /* Return true if X will surely end up in an index register after next
5793    splitting pass.  */
5794 static bool
5795 will_be_in_index_register (const_rtx x)
5796 {
5797   /* arm.md: calculate_pic_address will split this into a register.  */
5798   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5799 }
5800 
5801 /* Return nonzero if X is a valid ARM state address operand.  */
5802 int
5803 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5804 			        int strict_p)
5805 {
5806   bool use_ldrd;
5807   enum rtx_code code = GET_CODE (x);
5808 
5809   if (arm_address_register_rtx_p (x, strict_p))
5810     return 1;
5811 
5812   use_ldrd = (TARGET_LDRD
5813 	      && (mode == DImode
5814 		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5815 
5816   if (code == POST_INC || code == PRE_DEC
5817       || ((code == PRE_INC || code == POST_DEC)
5818 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5819     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5820 
5821   else if ((code == POST_MODIFY || code == PRE_MODIFY)
5822 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5823 	   && GET_CODE (XEXP (x, 1)) == PLUS
5824 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5825     {
5826       rtx addend = XEXP (XEXP (x, 1), 1);
5827 
5828       /* Don't allow ldrd post increment by register because it's hard
5829 	 to fixup invalid register choices.  */
5830       if (use_ldrd
5831 	  && GET_CODE (x) == POST_MODIFY
5832 	  && REG_P (addend))
5833 	return 0;
5834 
5835       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5836 	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
5837     }
5838 
5839   /* After reload constants split into minipools will have addresses
5840      from a LABEL_REF.  */
5841   else if (reload_completed
5842 	   && (code == LABEL_REF
5843 	       || (code == CONST
5844 		   && GET_CODE (XEXP (x, 0)) == PLUS
5845 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5846 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5847     return 1;
5848 
5849   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5850     return 0;
5851 
5852   else if (code == PLUS)
5853     {
5854       rtx xop0 = XEXP (x, 0);
5855       rtx xop1 = XEXP (x, 1);
5856 
5857       return ((arm_address_register_rtx_p (xop0, strict_p)
5858 	       && ((CONST_INT_P (xop1)
5859 		    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5860 		   || (!strict_p && will_be_in_index_register (xop1))))
5861 	      || (arm_address_register_rtx_p (xop1, strict_p)
5862 		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5863     }
5864 
5865 #if 0
5866   /* Reload currently can't handle MINUS, so disable this for now */
5867   else if (GET_CODE (x) == MINUS)
5868     {
5869       rtx xop0 = XEXP (x, 0);
5870       rtx xop1 = XEXP (x, 1);
5871 
5872       return (arm_address_register_rtx_p (xop0, strict_p)
5873 	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5874     }
5875 #endif
5876 
5877   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5878 	   && code == SYMBOL_REF
5879 	   && CONSTANT_POOL_ADDRESS_P (x)
5880 	   && ! (flag_pic
5881 		 && symbol_mentioned_p (get_pool_constant (x))
5882 		 && ! pcrel_constant_p (get_pool_constant (x))))
5883     return 1;
5884 
5885   return 0;
5886 }
5887 
5888 /* Return nonzero if X is a valid Thumb-2 address operand.  */
5889 static int
5890 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5891 {
5892   bool use_ldrd;
5893   enum rtx_code code = GET_CODE (x);
5894 
5895   if (arm_address_register_rtx_p (x, strict_p))
5896     return 1;
5897 
5898   use_ldrd = (TARGET_LDRD
5899 	      && (mode == DImode
5900 		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5901 
5902   if (code == POST_INC || code == PRE_DEC
5903       || ((code == PRE_INC || code == POST_DEC)
5904 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5905     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5906 
5907   else if ((code == POST_MODIFY || code == PRE_MODIFY)
5908 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5909 	   && GET_CODE (XEXP (x, 1)) == PLUS
5910 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5911     {
5912       /* Thumb-2 only has autoincrement by constant.  */
5913       rtx addend = XEXP (XEXP (x, 1), 1);
5914       HOST_WIDE_INT offset;
5915 
5916       if (!CONST_INT_P (addend))
5917 	return 0;
5918 
5919       offset = INTVAL(addend);
5920       if (GET_MODE_SIZE (mode) <= 4)
5921 	return (offset > -256 && offset < 256);
5922 
5923       return (use_ldrd && offset > -1024 && offset < 1024
5924 	      && (offset & 3) == 0);
5925     }
5926 
5927   /* After reload constants split into minipools will have addresses
5928      from a LABEL_REF.  */
5929   else if (reload_completed
5930 	   && (code == LABEL_REF
5931 	       || (code == CONST
5932 		   && GET_CODE (XEXP (x, 0)) == PLUS
5933 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5934 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5935     return 1;
5936 
5937   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5938     return 0;
5939 
5940   else if (code == PLUS)
5941     {
5942       rtx xop0 = XEXP (x, 0);
5943       rtx xop1 = XEXP (x, 1);
5944 
5945       return ((arm_address_register_rtx_p (xop0, strict_p)
5946 	       && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5947 		   || (!strict_p && will_be_in_index_register (xop1))))
5948 	      || (arm_address_register_rtx_p (xop1, strict_p)
5949 		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5950     }
5951 
5952   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5953 	   && code == SYMBOL_REF
5954 	   && CONSTANT_POOL_ADDRESS_P (x)
5955 	   && ! (flag_pic
5956 		 && symbol_mentioned_p (get_pool_constant (x))
5957 		 && ! pcrel_constant_p (get_pool_constant (x))))
5958     return 1;
5959 
5960   return 0;
5961 }
5962 
5963 /* Return nonzero if INDEX is valid for an address index operand in
5964    ARM state.  */
5965 static int
5966 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5967 			int strict_p)
5968 {
5969   HOST_WIDE_INT range;
5970   enum rtx_code code = GET_CODE (index);
5971 
5972   /* Standard coprocessor addressing modes.  */
5973   if (TARGET_HARD_FLOAT
5974       && TARGET_VFP
5975       && (mode == SFmode || mode == DFmode))
5976     return (code == CONST_INT && INTVAL (index) < 1024
5977 	    && INTVAL (index) > -1024
5978 	    && (INTVAL (index) & 3) == 0);
5979 
5980   /* For quad modes, we restrict the constant offset to be slightly less
5981      than what the instruction format permits.  We do this because for
5982      quad mode moves, we will actually decompose them into two separate
5983      double-mode reads or writes.  INDEX must therefore be a valid
5984      (double-mode) offset and so should INDEX+8.  */
5985   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5986     return (code == CONST_INT
5987 	    && INTVAL (index) < 1016
5988 	    && INTVAL (index) > -1024
5989 	    && (INTVAL (index) & 3) == 0);
5990 
5991   /* We have no such constraint on double mode offsets, so we permit the
5992      full range of the instruction format.  */
5993   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5994     return (code == CONST_INT
5995 	    && INTVAL (index) < 1024
5996 	    && INTVAL (index) > -1024
5997 	    && (INTVAL (index) & 3) == 0);
5998 
5999   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6000     return (code == CONST_INT
6001 	    && INTVAL (index) < 1024
6002 	    && INTVAL (index) > -1024
6003 	    && (INTVAL (index) & 3) == 0);
6004 
6005   if (arm_address_register_rtx_p (index, strict_p)
6006       && (GET_MODE_SIZE (mode) <= 4))
6007     return 1;
6008 
6009   if (mode == DImode || mode == DFmode)
6010     {
6011       if (code == CONST_INT)
6012 	{
6013 	  HOST_WIDE_INT val = INTVAL (index);
6014 
6015 	  if (TARGET_LDRD)
6016 	    return val > -256 && val < 256;
6017 	  else
6018 	    return val > -4096 && val < 4092;
6019 	}
6020 
6021       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6022     }
6023 
6024   if (GET_MODE_SIZE (mode) <= 4
6025       && ! (arm_arch4
6026 	    && (mode == HImode
6027 		|| mode == HFmode
6028 		|| (mode == QImode && outer == SIGN_EXTEND))))
6029     {
6030       if (code == MULT)
6031 	{
6032 	  rtx xiop0 = XEXP (index, 0);
6033 	  rtx xiop1 = XEXP (index, 1);
6034 
6035 	  return ((arm_address_register_rtx_p (xiop0, strict_p)
6036 		   && power_of_two_operand (xiop1, SImode))
6037 		  || (arm_address_register_rtx_p (xiop1, strict_p)
6038 		      && power_of_two_operand (xiop0, SImode)));
6039 	}
6040       else if (code == LSHIFTRT || code == ASHIFTRT
6041 	       || code == ASHIFT || code == ROTATERT)
6042 	{
6043 	  rtx op = XEXP (index, 1);
6044 
6045 	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6046 		  && CONST_INT_P (op)
6047 		  && INTVAL (op) > 0
6048 		  && INTVAL (op) <= 31);
6049 	}
6050     }
6051 
6052   /* For ARM v4 we may be doing a sign-extend operation during the
6053      load.  */
6054   if (arm_arch4)
6055     {
6056       if (mode == HImode
6057 	  || mode == HFmode
6058 	  || (outer == SIGN_EXTEND && mode == QImode))
6059 	range = 256;
6060       else
6061 	range = 4096;
6062     }
6063   else
6064     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6065 
6066   return (code == CONST_INT
6067 	  && INTVAL (index) < range
6068 	  && INTVAL (index) > -range);
6069 }
6070 
6071 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6072    index operand.  i.e. 1, 2, 4 or 8.  */
6073 static bool
6074 thumb2_index_mul_operand (rtx op)
6075 {
6076   HOST_WIDE_INT val;
6077 
6078   if (!CONST_INT_P (op))
6079     return false;
6080 
6081   val = INTVAL(op);
6082   return (val == 1 || val == 2 || val == 4 || val == 8);
6083 }
6084 
6085 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
6086 static int
6087 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6088 {
6089   enum rtx_code code = GET_CODE (index);
6090 
6091   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
6092   /* Standard coprocessor addressing modes.  */
6093   if (TARGET_HARD_FLOAT
6094       && TARGET_VFP
6095       && (mode == SFmode || mode == DFmode))
6096     return (code == CONST_INT && INTVAL (index) < 1024
6097 	    /* Thumb-2 allows only > -256 index range for it's core register
6098 	       load/stores. Since we allow SF/DF in core registers, we have
6099 	       to use the intersection between -256~4096 (core) and -1024~1024
6100 	       (coprocessor).  */
6101 	    && INTVAL (index) > -256
6102 	    && (INTVAL (index) & 3) == 0);
6103 
6104   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6105     {
6106       /* For DImode assume values will usually live in core regs
6107 	 and only allow LDRD addressing modes.  */
6108       if (!TARGET_LDRD || mode != DImode)
6109 	return (code == CONST_INT
6110 		&& INTVAL (index) < 1024
6111 		&& INTVAL (index) > -1024
6112 		&& (INTVAL (index) & 3) == 0);
6113     }
6114 
6115   /* For quad modes, we restrict the constant offset to be slightly less
6116      than what the instruction format permits.  We do this because for
6117      quad mode moves, we will actually decompose them into two separate
6118      double-mode reads or writes.  INDEX must therefore be a valid
6119      (double-mode) offset and so should INDEX+8.  */
6120   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6121     return (code == CONST_INT
6122 	    && INTVAL (index) < 1016
6123 	    && INTVAL (index) > -1024
6124 	    && (INTVAL (index) & 3) == 0);
6125 
6126   /* We have no such constraint on double mode offsets, so we permit the
6127      full range of the instruction format.  */
6128   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6129     return (code == CONST_INT
6130 	    && INTVAL (index) < 1024
6131 	    && INTVAL (index) > -1024
6132 	    && (INTVAL (index) & 3) == 0);
6133 
6134   if (arm_address_register_rtx_p (index, strict_p)
6135       && (GET_MODE_SIZE (mode) <= 4))
6136     return 1;
6137 
6138   if (mode == DImode || mode == DFmode)
6139     {
6140       if (code == CONST_INT)
6141 	{
6142 	  HOST_WIDE_INT val = INTVAL (index);
6143 	  /* ??? Can we assume ldrd for thumb2?  */
6144 	  /* Thumb-2 ldrd only has reg+const addressing modes.  */
6145 	  /* ldrd supports offsets of +-1020.
6146 	     However the ldr fallback does not.  */
6147 	  return val > -256 && val < 256 && (val & 3) == 0;
6148 	}
6149       else
6150 	return 0;
6151     }
6152 
6153   if (code == MULT)
6154     {
6155       rtx xiop0 = XEXP (index, 0);
6156       rtx xiop1 = XEXP (index, 1);
6157 
6158       return ((arm_address_register_rtx_p (xiop0, strict_p)
6159 	       && thumb2_index_mul_operand (xiop1))
6160 	      || (arm_address_register_rtx_p (xiop1, strict_p)
6161 		  && thumb2_index_mul_operand (xiop0)));
6162     }
6163   else if (code == ASHIFT)
6164     {
6165       rtx op = XEXP (index, 1);
6166 
6167       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6168 	      && CONST_INT_P (op)
6169 	      && INTVAL (op) > 0
6170 	      && INTVAL (op) <= 3);
6171     }
6172 
6173   return (code == CONST_INT
6174 	  && INTVAL (index) < 4096
6175 	  && INTVAL (index) > -256);
6176 }
6177 
6178 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
6179 static int
6180 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6181 {
6182   int regno;
6183 
6184   if (!REG_P (x))
6185     return 0;
6186 
6187   regno = REGNO (x);
6188 
6189   if (strict_p)
6190     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6191 
6192   return (regno <= LAST_LO_REGNUM
6193 	  || regno > LAST_VIRTUAL_REGISTER
6194 	  || regno == FRAME_POINTER_REGNUM
6195 	  || (GET_MODE_SIZE (mode) >= 4
6196 	      && (regno == STACK_POINTER_REGNUM
6197 		  || regno >= FIRST_PSEUDO_REGISTER
6198 		  || x == hard_frame_pointer_rtx
6199 		  || x == arg_pointer_rtx)));
6200 }
6201 
6202 /* Return nonzero if x is a legitimate index register.  This is the case
6203    for any base register that can access a QImode object.  */
6204 inline static int
6205 thumb1_index_register_rtx_p (rtx x, int strict_p)
6206 {
6207   return thumb1_base_register_rtx_p (x, QImode, strict_p);
6208 }
6209 
6210 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6211 
6212    The AP may be eliminated to either the SP or the FP, so we use the
6213    least common denominator, e.g. SImode, and offsets from 0 to 64.
6214 
6215    ??? Verify whether the above is the right approach.
6216 
6217    ??? Also, the FP may be eliminated to the SP, so perhaps that
6218    needs special handling also.
6219 
6220    ??? Look at how the mips16 port solves this problem.  It probably uses
6221    better ways to solve some of these problems.
6222 
6223    Although it is not incorrect, we don't accept QImode and HImode
6224    addresses based on the frame pointer or arg pointer until the
6225    reload pass starts.  This is so that eliminating such addresses
6226    into stack based ones won't produce impossible code.  */
6227 int
6228 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6229 {
6230   /* ??? Not clear if this is right.  Experiment.  */
6231   if (GET_MODE_SIZE (mode) < 4
6232       && !(reload_in_progress || reload_completed)
6233       && (reg_mentioned_p (frame_pointer_rtx, x)
6234 	  || reg_mentioned_p (arg_pointer_rtx, x)
6235 	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
6236 	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6237 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6238 	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6239     return 0;
6240 
6241   /* Accept any base register.  SP only in SImode or larger.  */
6242   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6243     return 1;
6244 
6245   /* This is PC relative data before arm_reorg runs.  */
6246   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6247 	   && GET_CODE (x) == SYMBOL_REF
6248            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6249     return 1;
6250 
6251   /* This is PC relative data after arm_reorg runs.  */
6252   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6253 	   && reload_completed
6254 	   && (GET_CODE (x) == LABEL_REF
6255 	       || (GET_CODE (x) == CONST
6256 		   && GET_CODE (XEXP (x, 0)) == PLUS
6257 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6258 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6259     return 1;
6260 
6261   /* Post-inc indexing only supported for SImode and larger.  */
6262   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6263 	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6264     return 1;
6265 
6266   else if (GET_CODE (x) == PLUS)
6267     {
6268       /* REG+REG address can be any two index registers.  */
6269       /* We disallow FRAME+REG addressing since we know that FRAME
6270 	 will be replaced with STACK, and SP relative addressing only
6271 	 permits SP+OFFSET.  */
6272       if (GET_MODE_SIZE (mode) <= 4
6273 	  && XEXP (x, 0) != frame_pointer_rtx
6274 	  && XEXP (x, 1) != frame_pointer_rtx
6275 	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6276 	  && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6277 	      || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6278 	return 1;
6279 
6280       /* REG+const has 5-7 bit offset for non-SP registers.  */
6281       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6282 		|| XEXP (x, 0) == arg_pointer_rtx)
6283 	       && CONST_INT_P (XEXP (x, 1))
6284 	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6285 	return 1;
6286 
6287       /* REG+const has 10-bit offset for SP, but only SImode and
6288 	 larger is supported.  */
6289       /* ??? Should probably check for DI/DFmode overflow here
6290 	 just like GO_IF_LEGITIMATE_OFFSET does.  */
6291       else if (REG_P (XEXP (x, 0))
6292 	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6293 	       && GET_MODE_SIZE (mode) >= 4
6294 	       && CONST_INT_P (XEXP (x, 1))
6295 	       && INTVAL (XEXP (x, 1)) >= 0
6296 	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6297 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
6298 	return 1;
6299 
6300       else if (REG_P (XEXP (x, 0))
6301 	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6302 		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6303 		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6304 		       && REGNO (XEXP (x, 0))
6305 			  <= LAST_VIRTUAL_POINTER_REGISTER))
6306 	       && GET_MODE_SIZE (mode) >= 4
6307 	       && CONST_INT_P (XEXP (x, 1))
6308 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
6309 	return 1;
6310     }
6311 
6312   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6313 	   && GET_MODE_SIZE (mode) == 4
6314 	   && GET_CODE (x) == SYMBOL_REF
6315 	   && CONSTANT_POOL_ADDRESS_P (x)
6316 	   && ! (flag_pic
6317 		 && symbol_mentioned_p (get_pool_constant (x))
6318 		 && ! pcrel_constant_p (get_pool_constant (x))))
6319     return 1;
6320 
6321   return 0;
6322 }
6323 
6324 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6325    instruction of mode MODE.  */
6326 int
6327 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6328 {
6329   switch (GET_MODE_SIZE (mode))
6330     {
6331     case 1:
6332       return val >= 0 && val < 32;
6333 
6334     case 2:
6335       return val >= 0 && val < 64 && (val & 1) == 0;
6336 
6337     default:
6338       return (val >= 0
6339 	      && (val + GET_MODE_SIZE (mode)) <= 128
6340 	      && (val & 3) == 0);
6341     }
6342 }
6343 
6344 bool
6345 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6346 {
6347   if (TARGET_ARM)
6348     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6349   else if (TARGET_THUMB2)
6350     return thumb2_legitimate_address_p (mode, x, strict_p);
6351   else /* if (TARGET_THUMB1) */
6352     return thumb1_legitimate_address_p (mode, x, strict_p);
6353 }
6354 
6355 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6356 
6357    Given an rtx X being reloaded into a reg required to be
6358    in class CLASS, return the class of reg to actually use.
6359    In general this is just CLASS, but for the Thumb core registers and
6360    immediate constants we prefer a LO_REGS class or a subset.  */
6361 
6362 static reg_class_t
6363 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6364 {
6365   if (TARGET_32BIT)
6366     return rclass;
6367   else
6368     {
6369       if (rclass == GENERAL_REGS
6370 	  || rclass == HI_REGS
6371 	  || rclass == NO_REGS
6372 	  || rclass == STACK_REG)
6373 	return LO_REGS;
6374       else
6375 	return rclass;
6376     }
6377 }
6378 
6379 /* Build the SYMBOL_REF for __tls_get_addr.  */
6380 
6381 static GTY(()) rtx tls_get_addr_libfunc;
6382 
6383 static rtx
6384 get_tls_get_addr (void)
6385 {
6386   if (!tls_get_addr_libfunc)
6387     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6388   return tls_get_addr_libfunc;
6389 }
6390 
6391 rtx
6392 arm_load_tp (rtx target)
6393 {
6394   if (!target)
6395     target = gen_reg_rtx (SImode);
6396 
6397   if (TARGET_HARD_TP)
6398     {
6399       /* Can return in any reg.  */
6400       emit_insn (gen_load_tp_hard (target));
6401     }
6402   else
6403     {
6404       /* Always returned in r0.  Immediately copy the result into a pseudo,
6405 	 otherwise other uses of r0 (e.g. setting up function arguments) may
6406 	 clobber the value.  */
6407 
6408       rtx tmp;
6409 
6410       emit_insn (gen_load_tp_soft ());
6411 
6412       tmp = gen_rtx_REG (SImode, 0);
6413       emit_move_insn (target, tmp);
6414     }
6415   return target;
6416 }
6417 
6418 static rtx
6419 load_tls_operand (rtx x, rtx reg)
6420 {
6421   rtx tmp;
6422 
6423   if (reg == NULL_RTX)
6424     reg = gen_reg_rtx (SImode);
6425 
6426   tmp = gen_rtx_CONST (SImode, x);
6427 
6428   emit_move_insn (reg, tmp);
6429 
6430   return reg;
6431 }
6432 
6433 static rtx
6434 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6435 {
6436   rtx insns, label, labelno, sum;
6437 
6438   gcc_assert (reloc != TLS_DESCSEQ);
6439   start_sequence ();
6440 
6441   labelno = GEN_INT (pic_labelno++);
6442   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6443   label = gen_rtx_CONST (VOIDmode, label);
6444 
6445   sum = gen_rtx_UNSPEC (Pmode,
6446 			gen_rtvec (4, x, GEN_INT (reloc), label,
6447 				   GEN_INT (TARGET_ARM ? 8 : 4)),
6448 			UNSPEC_TLS);
6449   reg = load_tls_operand (sum, reg);
6450 
6451   if (TARGET_ARM)
6452     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6453   else
6454     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6455 
6456   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6457 				     LCT_PURE, /* LCT_CONST?  */
6458 				     Pmode, 1, reg, Pmode);
6459 
6460   insns = get_insns ();
6461   end_sequence ();
6462 
6463   return insns;
6464 }
6465 
6466 static rtx
6467 arm_tls_descseq_addr (rtx x, rtx reg)
6468 {
6469   rtx labelno = GEN_INT (pic_labelno++);
6470   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6471   rtx sum = gen_rtx_UNSPEC (Pmode,
6472 			    gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6473 				       gen_rtx_CONST (VOIDmode, label),
6474 				       GEN_INT (!TARGET_ARM)),
6475 			    UNSPEC_TLS);
6476   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6477 
6478   emit_insn (gen_tlscall (x, labelno));
6479   if (!reg)
6480     reg = gen_reg_rtx (SImode);
6481   else
6482     gcc_assert (REGNO (reg) != 0);
6483 
6484   emit_move_insn (reg, reg0);
6485 
6486   return reg;
6487 }
6488 
6489 rtx
6490 legitimize_tls_address (rtx x, rtx reg)
6491 {
6492   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6493   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6494 
6495   switch (model)
6496     {
6497     case TLS_MODEL_GLOBAL_DYNAMIC:
6498       if (TARGET_GNU2_TLS)
6499 	{
6500 	  reg = arm_tls_descseq_addr (x, reg);
6501 
6502 	  tp = arm_load_tp (NULL_RTX);
6503 
6504 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
6505 	}
6506       else
6507 	{
6508 	  /* Original scheme */
6509 	  insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6510 	  dest = gen_reg_rtx (Pmode);
6511 	  emit_libcall_block (insns, dest, ret, x);
6512 	}
6513       return dest;
6514 
6515     case TLS_MODEL_LOCAL_DYNAMIC:
6516       if (TARGET_GNU2_TLS)
6517 	{
6518 	  reg = arm_tls_descseq_addr (x, reg);
6519 
6520 	  tp = arm_load_tp (NULL_RTX);
6521 
6522 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
6523 	}
6524       else
6525 	{
6526 	  insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6527 
6528 	  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6529 	     share the LDM result with other LD model accesses.  */
6530 	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6531 				UNSPEC_TLS);
6532 	  dest = gen_reg_rtx (Pmode);
6533 	  emit_libcall_block (insns, dest, ret, eqv);
6534 
6535 	  /* Load the addend.  */
6536 	  addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6537 						     GEN_INT (TLS_LDO32)),
6538 				   UNSPEC_TLS);
6539 	  addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6540 	  dest = gen_rtx_PLUS (Pmode, dest, addend);
6541 	}
6542       return dest;
6543 
6544     case TLS_MODEL_INITIAL_EXEC:
6545       labelno = GEN_INT (pic_labelno++);
6546       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6547       label = gen_rtx_CONST (VOIDmode, label);
6548       sum = gen_rtx_UNSPEC (Pmode,
6549 			    gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6550 				       GEN_INT (TARGET_ARM ? 8 : 4)),
6551 			    UNSPEC_TLS);
6552       reg = load_tls_operand (sum, reg);
6553 
6554       if (TARGET_ARM)
6555 	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6556       else if (TARGET_THUMB2)
6557 	emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6558       else
6559 	{
6560 	  emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6561 	  emit_move_insn (reg, gen_const_mem (SImode, reg));
6562 	}
6563 
6564       tp = arm_load_tp (NULL_RTX);
6565 
6566       return gen_rtx_PLUS (Pmode, tp, reg);
6567 
6568     case TLS_MODEL_LOCAL_EXEC:
6569       tp = arm_load_tp (NULL_RTX);
6570 
6571       reg = gen_rtx_UNSPEC (Pmode,
6572 			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6573 			    UNSPEC_TLS);
6574       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6575 
6576       return gen_rtx_PLUS (Pmode, tp, reg);
6577 
6578     default:
6579       abort ();
6580     }
6581 }
6582 
6583 /* Try machine-dependent ways of modifying an illegitimate address
6584    to be legitimate.  If we find one, return the new, valid address.  */
6585 rtx
6586 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6587 {
6588   if (arm_tls_referenced_p (x))
6589     {
6590       rtx addend = NULL;
6591 
6592       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
6593 	{
6594 	  addend = XEXP (XEXP (x, 0), 1);
6595 	  x = XEXP (XEXP (x, 0), 0);
6596 	}
6597 
6598       if (GET_CODE (x) != SYMBOL_REF)
6599 	return x;
6600 
6601       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
6602 
6603       x = legitimize_tls_address (x, NULL_RTX);
6604 
6605       if (addend)
6606 	{
6607 	  x = gen_rtx_PLUS (SImode, x, addend);
6608 	  orig_x = x;
6609 	}
6610       else
6611 	return x;
6612     }
6613 
6614   if (!TARGET_ARM)
6615     {
6616       /* TODO: legitimize_address for Thumb2.  */
6617       if (TARGET_THUMB2)
6618         return x;
6619       return thumb_legitimize_address (x, orig_x, mode);
6620     }
6621 
6622   if (GET_CODE (x) == PLUS)
6623     {
6624       rtx xop0 = XEXP (x, 0);
6625       rtx xop1 = XEXP (x, 1);
6626 
6627       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6628 	xop0 = force_reg (SImode, xop0);
6629 
6630       if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6631 	xop1 = force_reg (SImode, xop1);
6632 
6633       if (ARM_BASE_REGISTER_RTX_P (xop0)
6634 	  && CONST_INT_P (xop1))
6635 	{
6636 	  HOST_WIDE_INT n, low_n;
6637 	  rtx base_reg, val;
6638 	  n = INTVAL (xop1);
6639 
6640 	  /* VFP addressing modes actually allow greater offsets, but for
6641 	     now we just stick with the lowest common denominator.  */
6642 	  if (mode == DImode
6643 	      || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6644 	    {
6645 	      low_n = n & 0x0f;
6646 	      n &= ~0x0f;
6647 	      if (low_n > 4)
6648 		{
6649 		  n += 16;
6650 		  low_n -= 16;
6651 		}
6652 	    }
6653 	  else
6654 	    {
6655 	      low_n = ((mode) == TImode ? 0
6656 		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6657 	      n -= low_n;
6658 	    }
6659 
6660 	  base_reg = gen_reg_rtx (SImode);
6661 	  val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6662 	  emit_move_insn (base_reg, val);
6663 	  x = plus_constant (Pmode, base_reg, low_n);
6664 	}
6665       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6666 	x = gen_rtx_PLUS (SImode, xop0, xop1);
6667     }
6668 
6669   /* XXX We don't allow MINUS any more -- see comment in
6670      arm_legitimate_address_outer_p ().  */
6671   else if (GET_CODE (x) == MINUS)
6672     {
6673       rtx xop0 = XEXP (x, 0);
6674       rtx xop1 = XEXP (x, 1);
6675 
6676       if (CONSTANT_P (xop0))
6677 	xop0 = force_reg (SImode, xop0);
6678 
6679       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6680 	xop1 = force_reg (SImode, xop1);
6681 
6682       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6683 	x = gen_rtx_MINUS (SImode, xop0, xop1);
6684     }
6685 
6686   /* Make sure to take full advantage of the pre-indexed addressing mode
6687      with absolute addresses which often allows for the base register to
6688      be factorized for multiple adjacent memory references, and it might
6689      even allows for the mini pool to be avoided entirely. */
6690   else if (CONST_INT_P (x) && optimize > 0)
6691     {
6692       unsigned int bits;
6693       HOST_WIDE_INT mask, base, index;
6694       rtx base_reg;
6695 
6696       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6697          use a 8-bit index. So let's use a 12-bit index for SImode only and
6698          hope that arm_gen_constant will enable ldrb to use more bits. */
6699       bits = (mode == SImode) ? 12 : 8;
6700       mask = (1 << bits) - 1;
6701       base = INTVAL (x) & ~mask;
6702       index = INTVAL (x) & mask;
6703       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6704         {
6705 	  /* It'll most probably be more efficient to generate the base
6706 	     with more bits set and use a negative index instead. */
6707 	  base |= mask;
6708 	  index -= mask;
6709 	}
6710       base_reg = force_reg (SImode, GEN_INT (base));
6711       x = plus_constant (Pmode, base_reg, index);
6712     }
6713 
6714   if (flag_pic)
6715     {
6716       /* We need to find and carefully transform any SYMBOL and LABEL
6717 	 references; so go back to the original address expression.  */
6718       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6719 
6720       if (new_x != orig_x)
6721 	x = new_x;
6722     }
6723 
6724   return x;
6725 }
6726 
6727 
6728 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6729    to be legitimate.  If we find one, return the new, valid address.  */
6730 rtx
6731 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6732 {
6733   if (GET_CODE (x) == PLUS
6734       && CONST_INT_P (XEXP (x, 1))
6735       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6736 	  || INTVAL (XEXP (x, 1)) < 0))
6737     {
6738       rtx xop0 = XEXP (x, 0);
6739       rtx xop1 = XEXP (x, 1);
6740       HOST_WIDE_INT offset = INTVAL (xop1);
6741 
6742       /* Try and fold the offset into a biasing of the base register and
6743 	 then offsetting that.  Don't do this when optimizing for space
6744 	 since it can cause too many CSEs.  */
6745       if (optimize_size && offset >= 0
6746 	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
6747 	{
6748 	  HOST_WIDE_INT delta;
6749 
6750 	  if (offset >= 256)
6751 	    delta = offset - (256 - GET_MODE_SIZE (mode));
6752 	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6753 	    delta = 31 * GET_MODE_SIZE (mode);
6754 	  else
6755 	    delta = offset & (~31 * GET_MODE_SIZE (mode));
6756 
6757 	  xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6758 				NULL_RTX);
6759 	  x = plus_constant (Pmode, xop0, delta);
6760 	}
6761       else if (offset < 0 && offset > -256)
6762 	/* Small negative offsets are best done with a subtract before the
6763 	   dereference, forcing these into a register normally takes two
6764 	   instructions.  */
6765 	x = force_operand (x, NULL_RTX);
6766       else
6767 	{
6768 	  /* For the remaining cases, force the constant into a register.  */
6769 	  xop1 = force_reg (SImode, xop1);
6770 	  x = gen_rtx_PLUS (SImode, xop0, xop1);
6771 	}
6772     }
6773   else if (GET_CODE (x) == PLUS
6774 	   && s_register_operand (XEXP (x, 1), SImode)
6775 	   && !s_register_operand (XEXP (x, 0), SImode))
6776     {
6777       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6778 
6779       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6780     }
6781 
6782   if (flag_pic)
6783     {
6784       /* We need to find and carefully transform any SYMBOL and LABEL
6785 	 references; so go back to the original address expression.  */
6786       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6787 
6788       if (new_x != orig_x)
6789 	x = new_x;
6790     }
6791 
6792   return x;
6793 }
6794 
6795 bool
6796 arm_legitimize_reload_address (rtx *p,
6797 			       enum machine_mode mode,
6798 			       int opnum, int type,
6799 			       int ind_levels ATTRIBUTE_UNUSED)
6800 {
6801   /* We must recognize output that we have already generated ourselves.  */
6802   if (GET_CODE (*p) == PLUS
6803       && GET_CODE (XEXP (*p, 0)) == PLUS
6804       && REG_P (XEXP (XEXP (*p, 0), 0))
6805       && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6806       && CONST_INT_P (XEXP (*p, 1)))
6807     {
6808       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6809 		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6810 		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
6811       return true;
6812     }
6813 
6814   if (GET_CODE (*p) == PLUS
6815       && REG_P (XEXP (*p, 0))
6816       && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6817       /* If the base register is equivalent to a constant, let the generic
6818 	 code handle it.  Otherwise we will run into problems if a future
6819 	 reload pass decides to rematerialize the constant.  */
6820       && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6821       && CONST_INT_P (XEXP (*p, 1)))
6822     {
6823       HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6824       HOST_WIDE_INT low, high;
6825 
6826       /* Detect coprocessor load/stores.  */
6827       bool coproc_p = ((TARGET_HARD_FLOAT
6828 			&& TARGET_VFP
6829 			&& (mode == SFmode || mode == DFmode))
6830 		       || (TARGET_REALLY_IWMMXT
6831 			   && VALID_IWMMXT_REG_MODE (mode))
6832 		       || (TARGET_NEON
6833 			   && (VALID_NEON_DREG_MODE (mode)
6834 			       || VALID_NEON_QREG_MODE (mode))));
6835 
6836       /* For some conditions, bail out when lower two bits are unaligned.  */
6837       if ((val & 0x3) != 0
6838 	  /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
6839 	  && (coproc_p
6840 	      /* For DI, and DF under soft-float: */
6841 	      || ((mode == DImode || mode == DFmode)
6842 		  /* Without ldrd, we use stm/ldm, which does not
6843 		     fair well with unaligned bits.  */
6844 		  && (! TARGET_LDRD
6845 		      /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
6846 		      || TARGET_THUMB2))))
6847 	return false;
6848 
6849       /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6850 	 of which the (reg+high) gets turned into a reload add insn,
6851 	 we try to decompose the index into high/low values that can often
6852 	 also lead to better reload CSE.
6853 	 For example:
6854 	         ldr r0, [r2, #4100]  // Offset too large
6855 		 ldr r1, [r2, #4104]  // Offset too large
6856 
6857 	 is best reloaded as:
6858 	         add t1, r2, #4096
6859 		 ldr r0, [t1, #4]
6860 		 add t2, r2, #4096
6861 		 ldr r1, [t2, #8]
6862 
6863 	 which post-reload CSE can simplify in most cases to eliminate the
6864 	 second add instruction:
6865 	         add t1, r2, #4096
6866 		 ldr r0, [t1, #4]
6867 		 ldr r1, [t1, #8]
6868 
6869 	 The idea here is that we want to split out the bits of the constant
6870 	 as a mask, rather than as subtracting the maximum offset that the
6871 	 respective type of load/store used can handle.
6872 
6873 	 When encountering negative offsets, we can still utilize it even if
6874 	 the overall offset is positive; sometimes this may lead to an immediate
6875 	 that can be constructed with fewer instructions.
6876 	 For example:
6877 	         ldr r0, [r2, #0x3FFFFC]
6878 
6879 	 This is best reloaded as:
6880 	         add t1, r2, #0x400000
6881 		 ldr r0, [t1, #-4]
6882 
6883 	 The trick for spotting this for a load insn with N bits of offset
6884 	 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6885 	 negative offset that is going to make bit N and all the bits below
6886 	 it become zero in the remainder part.
6887 
6888 	 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6889 	 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6890 	 used in most cases of ARM load/store instructions.  */
6891 
6892 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N)					\
6893       (((VAL) & ((1 << (N)) - 1))					\
6894        ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))	\
6895        : 0)
6896 
6897       if (coproc_p)
6898 	{
6899 	  low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6900 
6901 	  /* NEON quad-word load/stores are made of two double-word accesses,
6902 	     so the valid index range is reduced by 8. Treat as 9-bit range if
6903 	     we go over it.  */
6904 	  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6905 	    low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6906 	}
6907       else if (GET_MODE_SIZE (mode) == 8)
6908 	{
6909 	  if (TARGET_LDRD)
6910 	    low = (TARGET_THUMB2
6911 		   ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6912 		   : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6913 	  else
6914 	    /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6915 	       to access doublewords. The supported load/store offsets are
6916 	       -8, -4, and 4, which we try to produce here.  */
6917 	    low = ((val & 0xf) ^ 0x8) - 0x8;
6918 	}
6919       else if (GET_MODE_SIZE (mode) < 8)
6920 	{
6921 	  /* NEON element load/stores do not have an offset.  */
6922 	  if (TARGET_NEON_FP16 && mode == HFmode)
6923 	    return false;
6924 
6925 	  if (TARGET_THUMB2)
6926 	    {
6927 	      /* Thumb-2 has an asymmetrical index range of (-256,4096).
6928 		 Try the wider 12-bit range first, and re-try if the result
6929 		 is out of range.  */
6930 	      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6931 	      if (low < -255)
6932 		low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6933 	    }
6934 	  else
6935 	    {
6936 	      if (mode == HImode || mode == HFmode)
6937 		{
6938 		  if (arm_arch4)
6939 		    low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6940 		  else
6941 		    {
6942 		      /* The storehi/movhi_bytes fallbacks can use only
6943 			 [-4094,+4094] of the full ldrb/strb index range.  */
6944 		      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6945 		      if (low == 4095 || low == -4095)
6946 			return false;
6947 		    }
6948 		}
6949 	      else
6950 		low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6951 	    }
6952 	}
6953       else
6954 	return false;
6955 
6956       high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6957 	       ^ (unsigned HOST_WIDE_INT) 0x80000000)
6958 	      - (unsigned HOST_WIDE_INT) 0x80000000);
6959       /* Check for overflow or zero */
6960       if (low == 0 || high == 0 || (high + low != val))
6961 	return false;
6962 
6963       /* Reload the high part into a base reg; leave the low part
6964 	 in the mem.  */
6965       *p = gen_rtx_PLUS (GET_MODE (*p),
6966 			 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6967 				       GEN_INT (high)),
6968 			 GEN_INT (low));
6969       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6970 		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6971 		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
6972       return true;
6973     }
6974 
6975   return false;
6976 }
6977 
6978 rtx
6979 thumb_legitimize_reload_address (rtx *x_p,
6980 				 enum machine_mode mode,
6981 				 int opnum, int type,
6982 				 int ind_levels ATTRIBUTE_UNUSED)
6983 {
6984   rtx x = *x_p;
6985 
6986   if (GET_CODE (x) == PLUS
6987       && GET_MODE_SIZE (mode) < 4
6988       && REG_P (XEXP (x, 0))
6989       && XEXP (x, 0) == stack_pointer_rtx
6990       && CONST_INT_P (XEXP (x, 1))
6991       && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6992     {
6993       rtx orig_x = x;
6994 
6995       x = copy_rtx (x);
6996       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6997 		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6998       return x;
6999     }
7000 
7001   /* If both registers are hi-regs, then it's better to reload the
7002      entire expression rather than each register individually.  That
7003      only requires one reload register rather than two.  */
7004   if (GET_CODE (x) == PLUS
7005       && REG_P (XEXP (x, 0))
7006       && REG_P (XEXP (x, 1))
7007       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7008       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7009     {
7010       rtx orig_x = x;
7011 
7012       x = copy_rtx (x);
7013       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7014 		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7015       return x;
7016     }
7017 
7018   return NULL;
7019 }
7020 
7021 /* Test for various thread-local symbols.  */
7022 
7023 /* Helper for arm_tls_referenced_p.  */
7024 
7025 static int
7026 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7027 {
7028   if (GET_CODE (*x) == SYMBOL_REF)
7029     return SYMBOL_REF_TLS_MODEL (*x) != 0;
7030 
7031   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7032      TLS offsets, not real symbol references.  */
7033   if (GET_CODE (*x) == UNSPEC
7034       && XINT (*x, 1) == UNSPEC_TLS)
7035     return -1;
7036 
7037   return 0;
7038 }
7039 
7040 /* Return TRUE if X contains any TLS symbol references.  */
7041 
7042 bool
7043 arm_tls_referenced_p (rtx x)
7044 {
7045   if (! TARGET_HAVE_TLS)
7046     return false;
7047 
7048   return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7049 }
7050 
7051 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7052 
7053    On the ARM, allow any integer (invalid ones are removed later by insn
7054    patterns), nice doubles and symbol_refs which refer to the function's
7055    constant pool XXX.
7056 
7057    When generating pic allow anything.  */
7058 
7059 static bool
7060 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7061 {
7062   /* At present, we have no support for Neon structure constants, so forbid
7063      them here.  It might be possible to handle simple cases like 0 and -1
7064      in future.  */
7065   if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7066     return false;
7067 
7068   return flag_pic || !label_mentioned_p (x);
7069 }
7070 
7071 static bool
7072 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7073 {
7074   return (CONST_INT_P (x)
7075 	  || CONST_DOUBLE_P (x)
7076 	  || CONSTANT_ADDRESS_P (x)
7077 	  || flag_pic);
7078 }
7079 
7080 static bool
7081 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7082 {
7083   return (!arm_cannot_force_const_mem (mode, x)
7084 	  && (TARGET_32BIT
7085 	      ? arm_legitimate_constant_p_1 (mode, x)
7086 	      : thumb_legitimate_constant_p (mode, x)));
7087 }
7088 
7089 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
7090 
7091 static bool
7092 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7093 {
7094   rtx base, offset;
7095 
7096   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7097     {
7098       split_const (x, &base, &offset);
7099       if (GET_CODE (base) == SYMBOL_REF
7100 	  && !offset_within_block_p (base, INTVAL (offset)))
7101 	return true;
7102     }
7103   return arm_tls_referenced_p (x);
7104 }
7105 
7106 #define REG_OR_SUBREG_REG(X)						\
7107   (REG_P (X)							\
7108    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7109 
7110 #define REG_OR_SUBREG_RTX(X)			\
7111    (REG_P (X) ? (X) : SUBREG_REG (X))
7112 
7113 static inline int
7114 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7115 {
7116   enum machine_mode mode = GET_MODE (x);
7117   int total, words;
7118 
7119   switch (code)
7120     {
7121     case ASHIFT:
7122     case ASHIFTRT:
7123     case LSHIFTRT:
7124     case ROTATERT:
7125       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7126 
7127     case PLUS:
7128     case MINUS:
7129     case COMPARE:
7130     case NEG:
7131     case NOT:
7132       return COSTS_N_INSNS (1);
7133 
7134     case MULT:
7135       if (CONST_INT_P (XEXP (x, 1)))
7136 	{
7137 	  int cycles = 0;
7138 	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7139 
7140 	  while (i)
7141 	    {
7142 	      i >>= 2;
7143 	      cycles++;
7144 	    }
7145 	  return COSTS_N_INSNS (2) + cycles;
7146 	}
7147       return COSTS_N_INSNS (1) + 16;
7148 
7149     case SET:
7150       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7151 	 the mode.  */
7152       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7153       return (COSTS_N_INSNS (words)
7154 	      + 4 * ((MEM_P (SET_SRC (x)))
7155 		     + MEM_P (SET_DEST (x))));
7156 
7157     case CONST_INT:
7158       if (outer == SET)
7159 	{
7160 	  if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7161 	    return 0;
7162 	  if (thumb_shiftable_const (INTVAL (x)))
7163 	    return COSTS_N_INSNS (2);
7164 	  return COSTS_N_INSNS (3);
7165 	}
7166       else if ((outer == PLUS || outer == COMPARE)
7167 	       && INTVAL (x) < 256 && INTVAL (x) > -256)
7168 	return 0;
7169       else if ((outer == IOR || outer == XOR || outer == AND)
7170 	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
7171 	return COSTS_N_INSNS (1);
7172       else if (outer == AND)
7173 	{
7174 	  int i;
7175 	  /* This duplicates the tests in the andsi3 expander.  */
7176 	  for (i = 9; i <= 31; i++)
7177 	    if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7178 		|| (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7179 	      return COSTS_N_INSNS (2);
7180 	}
7181       else if (outer == ASHIFT || outer == ASHIFTRT
7182 	       || outer == LSHIFTRT)
7183 	return 0;
7184       return COSTS_N_INSNS (2);
7185 
7186     case CONST:
7187     case CONST_DOUBLE:
7188     case LABEL_REF:
7189     case SYMBOL_REF:
7190       return COSTS_N_INSNS (3);
7191 
7192     case UDIV:
7193     case UMOD:
7194     case DIV:
7195     case MOD:
7196       return 100;
7197 
7198     case TRUNCATE:
7199       return 99;
7200 
7201     case AND:
7202     case XOR:
7203     case IOR:
7204       /* XXX guess.  */
7205       return 8;
7206 
7207     case MEM:
7208       /* XXX another guess.  */
7209       /* Memory costs quite a lot for the first word, but subsequent words
7210 	 load at the equivalent of a single insn each.  */
7211       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7212 	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7213 		 ? 4 : 0));
7214 
7215     case IF_THEN_ELSE:
7216       /* XXX a guess.  */
7217       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7218 	return 14;
7219       return 2;
7220 
7221     case SIGN_EXTEND:
7222     case ZERO_EXTEND:
7223       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7224       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7225 
7226       if (mode == SImode)
7227 	return total;
7228 
7229       if (arm_arch6)
7230 	return total + COSTS_N_INSNS (1);
7231 
7232       /* Assume a two-shift sequence.  Increase the cost slightly so
7233 	 we prefer actual shifts over an extend operation.  */
7234       return total + 1 + COSTS_N_INSNS (2);
7235 
7236     default:
7237       return 99;
7238     }
7239 }
7240 
7241 static inline bool
7242 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7243 {
7244   enum machine_mode mode = GET_MODE (x);
7245   enum rtx_code subcode;
7246   rtx operand;
7247   enum rtx_code code = GET_CODE (x);
7248   *total = 0;
7249 
7250   switch (code)
7251     {
7252     case MEM:
7253       /* Memory costs quite a lot for the first word, but subsequent words
7254 	 load at the equivalent of a single insn each.  */
7255       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7256       return true;
7257 
7258     case DIV:
7259     case MOD:
7260     case UDIV:
7261     case UMOD:
7262       if (TARGET_HARD_FLOAT && mode == SFmode)
7263 	*total = COSTS_N_INSNS (2);
7264       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7265 	*total = COSTS_N_INSNS (4);
7266       else
7267 	*total = COSTS_N_INSNS (20);
7268       return false;
7269 
7270     case ROTATE:
7271       if (REG_P (XEXP (x, 1)))
7272 	*total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7273       else if (!CONST_INT_P (XEXP (x, 1)))
7274 	*total = rtx_cost (XEXP (x, 1), code, 1, speed);
7275 
7276       /* Fall through */
7277     case ROTATERT:
7278       if (mode != SImode)
7279 	{
7280 	  *total += COSTS_N_INSNS (4);
7281 	  return true;
7282 	}
7283 
7284       /* Fall through */
7285     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7286       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7287       if (mode == DImode)
7288 	{
7289 	  *total += COSTS_N_INSNS (3);
7290 	  return true;
7291 	}
7292 
7293       *total += COSTS_N_INSNS (1);
7294       /* Increase the cost of complex shifts because they aren't any faster,
7295          and reduce dual issue opportunities.  */
7296       if (arm_tune_cortex_a9
7297 	  && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7298 	++*total;
7299 
7300       return true;
7301 
7302     case MINUS:
7303       if (mode == DImode)
7304 	{
7305 	  *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7306 	  if (CONST_INT_P (XEXP (x, 0))
7307 	      && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7308 	    {
7309 	      *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7310 	      return true;
7311 	    }
7312 
7313 	  if (CONST_INT_P (XEXP (x, 1))
7314 	      && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7315 	    {
7316 	      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7317 	      return true;
7318 	    }
7319 
7320 	  return false;
7321 	}
7322 
7323       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7324 	{
7325 	  if (TARGET_HARD_FLOAT
7326 	      && (mode == SFmode
7327 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
7328 	    {
7329 	      *total = COSTS_N_INSNS (1);
7330 	      if (CONST_DOUBLE_P (XEXP (x, 0))
7331 		  && arm_const_double_rtx (XEXP (x, 0)))
7332 		{
7333 		  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7334 		  return true;
7335 		}
7336 
7337 	      if (CONST_DOUBLE_P (XEXP (x, 1))
7338 		  && arm_const_double_rtx (XEXP (x, 1)))
7339 		{
7340 		  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7341 		  return true;
7342 		}
7343 
7344 	      return false;
7345 	    }
7346 	  *total = COSTS_N_INSNS (20);
7347 	  return false;
7348 	}
7349 
7350       *total = COSTS_N_INSNS (1);
7351       if (CONST_INT_P (XEXP (x, 0))
7352 	  && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7353 	{
7354 	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7355 	  return true;
7356 	}
7357 
7358       subcode = GET_CODE (XEXP (x, 1));
7359       if (subcode == ASHIFT || subcode == ASHIFTRT
7360 	  || subcode == LSHIFTRT
7361 	  || subcode == ROTATE || subcode == ROTATERT)
7362 	{
7363 	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7364 	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7365 	  return true;
7366 	}
7367 
7368       /* A shift as a part of RSB costs no more than RSB itself.  */
7369       if (GET_CODE (XEXP (x, 0)) == MULT
7370 	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7371 	{
7372 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7373 	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7374 	  return true;
7375 	}
7376 
7377       if (subcode == MULT
7378 	  && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7379 	{
7380 	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7381 	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7382 	  return true;
7383 	}
7384 
7385       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7386 	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7387 	{
7388 	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7389 	  if (REG_P (XEXP (XEXP (x, 1), 0))
7390 	      && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7391 	    *total += COSTS_N_INSNS (1);
7392 
7393 	  return true;
7394 	}
7395 
7396       /* Fall through */
7397 
7398     case PLUS:
7399       if (code == PLUS && arm_arch6 && mode == SImode
7400 	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7401 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7402 	{
7403 	  *total = COSTS_N_INSNS (1);
7404 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7405 			      0, speed);
7406 	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7407 	  return true;
7408 	}
7409 
7410       /* MLA: All arguments must be registers.  We filter out
7411 	 multiplication by a power of two, so that we fall down into
7412 	 the code below.  */
7413       if (GET_CODE (XEXP (x, 0)) == MULT
7414 	  && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7415 	{
7416 	  /* The cost comes from the cost of the multiply.  */
7417 	  return false;
7418 	}
7419 
7420       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7421 	{
7422 	  if (TARGET_HARD_FLOAT
7423 	      && (mode == SFmode
7424 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
7425 	    {
7426 	      *total = COSTS_N_INSNS (1);
7427 	      if (CONST_DOUBLE_P (XEXP (x, 1))
7428 		  && arm_const_double_rtx (XEXP (x, 1)))
7429 		{
7430 		  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7431 		  return true;
7432 		}
7433 
7434 	      return false;
7435 	    }
7436 
7437 	  *total = COSTS_N_INSNS (20);
7438 	  return false;
7439 	}
7440 
7441       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7442 	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7443 	{
7444 	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7445 	  if (REG_P (XEXP (XEXP (x, 0), 0))
7446 	      && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7447 	    *total += COSTS_N_INSNS (1);
7448 	  return true;
7449 	}
7450 
7451       /* Fall through */
7452 
7453     case AND: case XOR: case IOR:
7454 
7455       /* Normally the frame registers will be spilt into reg+const during
7456 	 reload, so it is a bad idea to combine them with other instructions,
7457 	 since then they might not be moved outside of loops.  As a compromise
7458 	 we allow integration with ops that have a constant as their second
7459 	 operand.  */
7460       if (REG_OR_SUBREG_REG (XEXP (x, 0))
7461 	  && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7462 	  && !CONST_INT_P (XEXP (x, 1)))
7463 	*total = COSTS_N_INSNS (1);
7464 
7465       if (mode == DImode)
7466 	{
7467 	  *total += COSTS_N_INSNS (2);
7468 	  if (CONST_INT_P (XEXP (x, 1))
7469 	      && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7470 	    {
7471 	      *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7472 	      return true;
7473 	    }
7474 
7475 	  return false;
7476 	}
7477 
7478       *total += COSTS_N_INSNS (1);
7479       if (CONST_INT_P (XEXP (x, 1))
7480 	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7481 	{
7482 	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7483 	  return true;
7484 	}
7485       subcode = GET_CODE (XEXP (x, 0));
7486       if (subcode == ASHIFT || subcode == ASHIFTRT
7487 	  || subcode == LSHIFTRT
7488 	  || subcode == ROTATE || subcode == ROTATERT)
7489 	{
7490 	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7491 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7492 	  return true;
7493 	}
7494 
7495       if (subcode == MULT
7496 	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7497 	{
7498 	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7499 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7500 	  return true;
7501 	}
7502 
7503       if (subcode == UMIN || subcode == UMAX
7504 	  || subcode == SMIN || subcode == SMAX)
7505 	{
7506 	  *total = COSTS_N_INSNS (3);
7507 	  return true;
7508 	}
7509 
7510       return false;
7511 
7512     case MULT:
7513       /* This should have been handled by the CPU specific routines.  */
7514       gcc_unreachable ();
7515 
7516     case TRUNCATE:
7517       if (arm_arch3m && mode == SImode
7518 	  && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7519 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7520 	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7521 	      == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7522 	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7523 	      || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7524 	{
7525 	  *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7526 	  return true;
7527 	}
7528       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7529       return false;
7530 
7531     case NEG:
7532       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7533 	{
7534 	  if (TARGET_HARD_FLOAT
7535 	      && (mode == SFmode
7536 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
7537 	    {
7538 	      *total = COSTS_N_INSNS (1);
7539 	      return false;
7540 	    }
7541 	  *total = COSTS_N_INSNS (2);
7542 	  return false;
7543 	}
7544 
7545       /* Fall through */
7546     case NOT:
7547       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7548       if (mode == SImode && code == NOT)
7549 	{
7550 	  subcode = GET_CODE (XEXP (x, 0));
7551 	  if (subcode == ASHIFT || subcode == ASHIFTRT
7552 	      || subcode == LSHIFTRT
7553 	      || subcode == ROTATE || subcode == ROTATERT
7554 	      || (subcode == MULT
7555 		  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7556 	    {
7557 	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7558 	      /* Register shifts cost an extra cycle.  */
7559 	      if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7560 		*total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7561 							subcode, 1, speed);
7562 	      return true;
7563 	    }
7564 	}
7565 
7566       return false;
7567 
7568     case IF_THEN_ELSE:
7569       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7570 	{
7571 	  *total = COSTS_N_INSNS (4);
7572 	  return true;
7573 	}
7574 
7575       operand = XEXP (x, 0);
7576 
7577       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7578 	     || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7579 	    && REG_P (XEXP (operand, 0))
7580 	    && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7581 	*total += COSTS_N_INSNS (1);
7582       *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7583 		 + rtx_cost (XEXP (x, 2), code, 2, speed));
7584       return true;
7585 
7586     case NE:
7587       if (mode == SImode && XEXP (x, 1) == const0_rtx)
7588 	{
7589 	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7590 	  return true;
7591 	}
7592       goto scc_insn;
7593 
7594     case GE:
7595       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7596 	  && mode == SImode && XEXP (x, 1) == const0_rtx)
7597 	{
7598 	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7599 	  return true;
7600 	}
7601       goto scc_insn;
7602 
7603     case LT:
7604       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7605 	  && mode == SImode && XEXP (x, 1) == const0_rtx)
7606 	{
7607 	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7608 	  return true;
7609 	}
7610       goto scc_insn;
7611 
7612     case EQ:
7613     case GT:
7614     case LE:
7615     case GEU:
7616     case LTU:
7617     case GTU:
7618     case LEU:
7619     case UNORDERED:
7620     case ORDERED:
7621     case UNEQ:
7622     case UNGE:
7623     case UNLT:
7624     case UNGT:
7625     case UNLE:
7626     scc_insn:
7627       /* SCC insns.  In the case where the comparison has already been
7628 	 performed, then they cost 2 instructions.  Otherwise they need
7629 	 an additional comparison before them.  */
7630       *total = COSTS_N_INSNS (2);
7631       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7632 	{
7633 	  return true;
7634 	}
7635 
7636       /* Fall through */
7637     case COMPARE:
7638       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7639 	{
7640 	  *total = 0;
7641 	  return true;
7642 	}
7643 
7644       *total += COSTS_N_INSNS (1);
7645       if (CONST_INT_P (XEXP (x, 1))
7646 	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7647 	{
7648 	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7649 	  return true;
7650 	}
7651 
7652       subcode = GET_CODE (XEXP (x, 0));
7653       if (subcode == ASHIFT || subcode == ASHIFTRT
7654 	  || subcode == LSHIFTRT
7655 	  || subcode == ROTATE || subcode == ROTATERT)
7656 	{
7657 	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7658 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7659 	  return true;
7660 	}
7661 
7662       if (subcode == MULT
7663 	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7664 	{
7665 	  *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7666 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7667 	  return true;
7668 	}
7669 
7670       return false;
7671 
7672     case UMIN:
7673     case UMAX:
7674     case SMIN:
7675     case SMAX:
7676       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7677       if (!CONST_INT_P (XEXP (x, 1))
7678 	  || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7679 	*total += rtx_cost (XEXP (x, 1), code, 1, speed);
7680       return true;
7681 
7682     case ABS:
7683       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7684 	{
7685 	  if (TARGET_HARD_FLOAT
7686 	      && (mode == SFmode
7687 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
7688 	    {
7689 	      *total = COSTS_N_INSNS (1);
7690 	      return false;
7691 	    }
7692 	  *total = COSTS_N_INSNS (20);
7693 	  return false;
7694 	}
7695       *total = COSTS_N_INSNS (1);
7696       if (mode == DImode)
7697 	*total += COSTS_N_INSNS (3);
7698       return false;
7699 
7700     case SIGN_EXTEND:
7701     case ZERO_EXTEND:
7702       *total = 0;
7703       if (GET_MODE_CLASS (mode) == MODE_INT)
7704 	{
7705 	  rtx op = XEXP (x, 0);
7706 	  enum machine_mode opmode = GET_MODE (op);
7707 
7708 	  if (mode == DImode)
7709 	    *total += COSTS_N_INSNS (1);
7710 
7711 	  if (opmode != SImode)
7712 	    {
7713 	      if (MEM_P (op))
7714 		{
7715 		  /* If !arm_arch4, we use one of the extendhisi2_mem
7716 		     or movhi_bytes patterns for HImode.  For a QImode
7717 		     sign extension, we first zero-extend from memory
7718 		     and then perform a shift sequence.  */
7719 		  if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7720 		    *total += COSTS_N_INSNS (2);
7721 		}
7722 	      else if (arm_arch6)
7723 		*total += COSTS_N_INSNS (1);
7724 
7725 	      /* We don't have the necessary insn, so we need to perform some
7726 		 other operation.  */
7727 	      else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7728 		/* An and with constant 255.  */
7729 		*total += COSTS_N_INSNS (1);
7730 	      else
7731 		/* A shift sequence.  Increase costs slightly to avoid
7732 		   combining two shifts into an extend operation.  */
7733 		*total += COSTS_N_INSNS (2) + 1;
7734 	    }
7735 
7736 	  return false;
7737 	}
7738 
7739       switch (GET_MODE (XEXP (x, 0)))
7740 	{
7741 	case V8QImode:
7742 	case V4HImode:
7743 	case V2SImode:
7744 	case V4QImode:
7745 	case V2HImode:
7746 	  *total = COSTS_N_INSNS (1);
7747 	  return false;
7748 
7749 	default:
7750 	  gcc_unreachable ();
7751 	}
7752       gcc_unreachable ();
7753 
7754     case ZERO_EXTRACT:
7755     case SIGN_EXTRACT:
7756       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7757       return true;
7758 
7759     case CONST_INT:
7760       if (const_ok_for_arm (INTVAL (x))
7761 	  || const_ok_for_arm (~INTVAL (x)))
7762 	*total = COSTS_N_INSNS (1);
7763       else
7764 	*total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7765 						  INTVAL (x), NULL_RTX,
7766 						  NULL_RTX, 0, 0));
7767       return true;
7768 
7769     case CONST:
7770     case LABEL_REF:
7771     case SYMBOL_REF:
7772       *total = COSTS_N_INSNS (3);
7773       return true;
7774 
7775     case HIGH:
7776       *total = COSTS_N_INSNS (1);
7777       return true;
7778 
7779     case LO_SUM:
7780       *total = COSTS_N_INSNS (1);
7781       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7782       return true;
7783 
7784     case CONST_DOUBLE:
7785       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7786 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
7787 	*total = COSTS_N_INSNS (1);
7788       else
7789 	*total = COSTS_N_INSNS (4);
7790       return true;
7791 
7792     case SET:
7793       /* The vec_extract patterns accept memory operands that require an
7794 	 address reload.  Account for the cost of that reload to give the
7795 	 auto-inc-dec pass an incentive to try to replace them.  */
7796       if (TARGET_NEON && MEM_P (SET_DEST (x))
7797 	  && GET_CODE (SET_SRC (x)) == VEC_SELECT)
7798 	{
7799 	  *total = rtx_cost (SET_DEST (x), code, 0, speed);
7800 	  if (!neon_vector_mem_operand (SET_DEST (x), 2))
7801 	    *total += COSTS_N_INSNS (1);
7802 	  return true;
7803 	}
7804       /* Likewise for the vec_set patterns.  */
7805       if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
7806 	  && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
7807 	  && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
7808 	{
7809 	  rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
7810 	  *total = rtx_cost (mem, code, 0, speed);
7811 	  if (!neon_vector_mem_operand (mem, 2))
7812 	    *total += COSTS_N_INSNS (1);
7813 	  return true;
7814 	}
7815       return false;
7816 
7817     case UNSPEC:
7818       /* We cost this as high as our memory costs to allow this to
7819 	 be hoisted from loops.  */
7820       if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7821 	{
7822 	  *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7823 	}
7824       return true;
7825 
7826     case CONST_VECTOR:
7827       if (TARGET_NEON
7828 	  && TARGET_HARD_FLOAT
7829 	  && outer == SET
7830 	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7831 	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7832 	*total = COSTS_N_INSNS (1);
7833       else
7834 	*total = COSTS_N_INSNS (4);
7835       return true;
7836 
7837     default:
7838       *total = COSTS_N_INSNS (4);
7839       return false;
7840     }
7841 }
7842 
7843 /* Estimates the size cost of thumb1 instructions.
7844    For now most of the code is copied from thumb1_rtx_costs. We need more
7845    fine grain tuning when we have more related test cases.  */
7846 static inline int
7847 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7848 {
7849   enum machine_mode mode = GET_MODE (x);
7850   int words;
7851 
7852   switch (code)
7853     {
7854     case ASHIFT:
7855     case ASHIFTRT:
7856     case LSHIFTRT:
7857     case ROTATERT:
7858       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7859 
7860     case PLUS:
7861     case MINUS:
7862       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
7863 	 defined by RTL expansion, especially for the expansion of
7864 	 multiplication.  */
7865       if ((GET_CODE (XEXP (x, 0)) == MULT
7866 	   && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
7867 	  || (GET_CODE (XEXP (x, 1)) == MULT
7868 	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
7869 	return COSTS_N_INSNS (2);
7870       /* On purpose fall through for normal RTX.  */
7871     case COMPARE:
7872     case NEG:
7873     case NOT:
7874       return COSTS_N_INSNS (1);
7875 
7876     case MULT:
7877       if (CONST_INT_P (XEXP (x, 1)))
7878         {
7879           /* Thumb1 mul instruction can't operate on const. We must Load it
7880              into a register first.  */
7881           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7882           return COSTS_N_INSNS (1) + const_size;
7883         }
7884       return COSTS_N_INSNS (1);
7885 
7886     case SET:
7887       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7888 	 the mode.  */
7889       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7890       return (COSTS_N_INSNS (words)
7891               + 4 * ((MEM_P (SET_SRC (x)))
7892                      + MEM_P (SET_DEST (x))));
7893 
7894     case CONST_INT:
7895       if (outer == SET)
7896         {
7897           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7898             return COSTS_N_INSNS (1);
7899 	  /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
7900 	  if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7901             return COSTS_N_INSNS (2);
7902 	  /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
7903           if (thumb_shiftable_const (INTVAL (x)))
7904             return COSTS_N_INSNS (2);
7905           return COSTS_N_INSNS (3);
7906         }
7907       else if ((outer == PLUS || outer == COMPARE)
7908                && INTVAL (x) < 256 && INTVAL (x) > -256)
7909         return 0;
7910       else if ((outer == IOR || outer == XOR || outer == AND)
7911                && INTVAL (x) < 256 && INTVAL (x) >= -256)
7912         return COSTS_N_INSNS (1);
7913       else if (outer == AND)
7914         {
7915           int i;
7916           /* This duplicates the tests in the andsi3 expander.  */
7917           for (i = 9; i <= 31; i++)
7918             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7919                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7920               return COSTS_N_INSNS (2);
7921         }
7922       else if (outer == ASHIFT || outer == ASHIFTRT
7923                || outer == LSHIFTRT)
7924         return 0;
7925       return COSTS_N_INSNS (2);
7926 
7927     case CONST:
7928     case CONST_DOUBLE:
7929     case LABEL_REF:
7930     case SYMBOL_REF:
7931       return COSTS_N_INSNS (3);
7932 
7933     case UDIV:
7934     case UMOD:
7935     case DIV:
7936     case MOD:
7937       return 100;
7938 
7939     case TRUNCATE:
7940       return 99;
7941 
7942     case AND:
7943     case XOR:
7944     case IOR:
7945       /* XXX guess.  */
7946       return 8;
7947 
7948     case MEM:
7949       /* XXX another guess.  */
7950       /* Memory costs quite a lot for the first word, but subsequent words
7951          load at the equivalent of a single insn each.  */
7952       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7953               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7954                  ? 4 : 0));
7955 
7956     case IF_THEN_ELSE:
7957       /* XXX a guess.  */
7958       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7959         return 14;
7960       return 2;
7961 
7962     case ZERO_EXTEND:
7963       /* XXX still guessing.  */
7964       switch (GET_MODE (XEXP (x, 0)))
7965         {
7966           case QImode:
7967             return (1 + (mode == DImode ? 4 : 0)
7968                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7969 
7970           case HImode:
7971             return (4 + (mode == DImode ? 4 : 0)
7972                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7973 
7974           case SImode:
7975             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7976 
7977           default:
7978             return 99;
7979         }
7980 
7981     default:
7982       return 99;
7983     }
7984 }
7985 
7986 /* RTX costs when optimizing for size.  */
7987 static bool
7988 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7989 		    int *total)
7990 {
7991   enum machine_mode mode = GET_MODE (x);
7992   if (TARGET_THUMB1)
7993     {
7994       *total = thumb1_size_rtx_costs (x, code, outer_code);
7995       return true;
7996     }
7997 
7998   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
7999   switch (code)
8000     {
8001     case MEM:
8002       /* A memory access costs 1 insn if the mode is small, or the address is
8003 	 a single register, otherwise it costs one insn per word.  */
8004       if (REG_P (XEXP (x, 0)))
8005 	*total = COSTS_N_INSNS (1);
8006       else if (flag_pic
8007 	       && GET_CODE (XEXP (x, 0)) == PLUS
8008 	       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8009 	/* This will be split into two instructions.
8010 	   See arm.md:calculate_pic_address.  */
8011 	*total = COSTS_N_INSNS (2);
8012       else
8013 	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8014       return true;
8015 
8016     case DIV:
8017     case MOD:
8018     case UDIV:
8019     case UMOD:
8020       /* Needs a libcall, so it costs about this.  */
8021       *total = COSTS_N_INSNS (2);
8022       return false;
8023 
8024     case ROTATE:
8025       if (mode == SImode && REG_P (XEXP (x, 1)))
8026 	{
8027 	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8028 	  return true;
8029 	}
8030       /* Fall through */
8031     case ROTATERT:
8032     case ASHIFT:
8033     case LSHIFTRT:
8034     case ASHIFTRT:
8035       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8036 	{
8037 	  *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8038 	  return true;
8039 	}
8040       else if (mode == SImode)
8041 	{
8042 	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8043 	  /* Slightly disparage register shifts, but not by much.  */
8044 	  if (!CONST_INT_P (XEXP (x, 1)))
8045 	    *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8046 	  return true;
8047 	}
8048 
8049       /* Needs a libcall.  */
8050       *total = COSTS_N_INSNS (2);
8051       return false;
8052 
8053     case MINUS:
8054       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8055 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
8056 	{
8057 	  *total = COSTS_N_INSNS (1);
8058 	  return false;
8059 	}
8060 
8061       if (mode == SImode)
8062 	{
8063 	  enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8064 	  enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8065 
8066 	  if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8067 	      || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8068 	      || subcode1 == ROTATE || subcode1 == ROTATERT
8069 	      || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8070 	      || subcode1 == ASHIFTRT)
8071 	    {
8072 	      /* It's just the cost of the two operands.  */
8073 	      *total = 0;
8074 	      return false;
8075 	    }
8076 
8077 	  *total = COSTS_N_INSNS (1);
8078 	  return false;
8079 	}
8080 
8081       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8082       return false;
8083 
8084     case PLUS:
8085       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8086 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
8087 	{
8088 	  *total = COSTS_N_INSNS (1);
8089 	  return false;
8090 	}
8091 
8092       /* A shift as a part of ADD costs nothing.  */
8093       if (GET_CODE (XEXP (x, 0)) == MULT
8094 	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8095 	{
8096 	  *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8097 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8098 	  *total += rtx_cost (XEXP (x, 1), code, 1, false);
8099 	  return true;
8100 	}
8101 
8102       /* Fall through */
8103     case AND: case XOR: case IOR:
8104       if (mode == SImode)
8105 	{
8106 	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8107 
8108 	  if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8109 	      || subcode == LSHIFTRT || subcode == ASHIFTRT
8110 	      || (code == AND && subcode == NOT))
8111 	    {
8112 	      /* It's just the cost of the two operands.  */
8113 	      *total = 0;
8114 	      return false;
8115 	    }
8116 	}
8117 
8118       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8119       return false;
8120 
8121     case MULT:
8122       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8123       return false;
8124 
8125     case NEG:
8126       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8127 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
8128 	{
8129 	  *total = COSTS_N_INSNS (1);
8130 	  return false;
8131 	}
8132 
8133       /* Fall through */
8134     case NOT:
8135       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8136 
8137       return false;
8138 
8139     case IF_THEN_ELSE:
8140       *total = 0;
8141       return false;
8142 
8143     case COMPARE:
8144       if (cc_register (XEXP (x, 0), VOIDmode))
8145 	* total = 0;
8146       else
8147 	*total = COSTS_N_INSNS (1);
8148       return false;
8149 
8150     case ABS:
8151       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8152 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
8153 	*total = COSTS_N_INSNS (1);
8154       else
8155 	*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8156       return false;
8157 
8158     case SIGN_EXTEND:
8159     case ZERO_EXTEND:
8160       return arm_rtx_costs_1 (x, outer_code, total, 0);
8161 
8162     case CONST_INT:
8163       if (const_ok_for_arm (INTVAL (x)))
8164 	/* A multiplication by a constant requires another instruction
8165 	   to load the constant to a register.  */
8166 	*total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8167 				? 1 : 0);
8168       else if (const_ok_for_arm (~INTVAL (x)))
8169 	*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8170       else if (const_ok_for_arm (-INTVAL (x)))
8171 	{
8172 	  if (outer_code == COMPARE || outer_code == PLUS
8173 	      || outer_code == MINUS)
8174 	    *total = 0;
8175 	  else
8176 	    *total = COSTS_N_INSNS (1);
8177 	}
8178       else
8179 	*total = COSTS_N_INSNS (2);
8180       return true;
8181 
8182     case CONST:
8183     case LABEL_REF:
8184     case SYMBOL_REF:
8185       *total = COSTS_N_INSNS (2);
8186       return true;
8187 
8188     case CONST_DOUBLE:
8189       *total = COSTS_N_INSNS (4);
8190       return true;
8191 
8192     case CONST_VECTOR:
8193       if (TARGET_NEON
8194 	  && TARGET_HARD_FLOAT
8195 	  && outer_code == SET
8196 	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8197 	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8198 	*total = COSTS_N_INSNS (1);
8199       else
8200 	*total = COSTS_N_INSNS (4);
8201       return true;
8202 
8203     case HIGH:
8204     case LO_SUM:
8205       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8206 	 cost of these slightly.  */
8207       *total = COSTS_N_INSNS (1) + 1;
8208       return true;
8209 
8210     case SET:
8211       return false;
8212 
8213     default:
8214       if (mode != VOIDmode)
8215 	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8216       else
8217 	*total = COSTS_N_INSNS (4); /* How knows?  */
8218       return false;
8219     }
8220 }
8221 
8222 /* RTX costs when optimizing for size.  */
8223 static bool
8224 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8225 	       int *total, bool speed)
8226 {
8227   if (!speed)
8228     return arm_size_rtx_costs (x, (enum rtx_code) code,
8229 			       (enum rtx_code) outer_code, total);
8230   else
8231     return current_tune->rtx_costs (x, (enum rtx_code) code,
8232 				    (enum rtx_code) outer_code,
8233 				    total, speed);
8234 }
8235 
8236 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
8237    supported on any "slowmul" cores, so it can be ignored.  */
8238 
8239 static bool
8240 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8241 		       int *total, bool speed)
8242 {
8243   enum machine_mode mode = GET_MODE (x);
8244 
8245   if (TARGET_THUMB)
8246     {
8247       *total = thumb1_rtx_costs (x, code, outer_code);
8248       return true;
8249     }
8250 
8251   switch (code)
8252     {
8253     case MULT:
8254       if (GET_MODE_CLASS (mode) == MODE_FLOAT
8255 	  || mode == DImode)
8256 	{
8257 	  *total = COSTS_N_INSNS (20);
8258 	  return false;
8259 	}
8260 
8261       if (CONST_INT_P (XEXP (x, 1)))
8262 	{
8263 	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8264 				      & (unsigned HOST_WIDE_INT) 0xffffffff);
8265 	  int cost, const_ok = const_ok_for_arm (i);
8266 	  int j, booth_unit_size;
8267 
8268 	  /* Tune as appropriate.  */
8269 	  cost = const_ok ? 4 : 8;
8270 	  booth_unit_size = 2;
8271 	  for (j = 0; i && j < 32; j += booth_unit_size)
8272 	    {
8273 	      i >>= booth_unit_size;
8274 	      cost++;
8275 	    }
8276 
8277 	  *total = COSTS_N_INSNS (cost);
8278 	  *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8279 	  return true;
8280 	}
8281 
8282       *total = COSTS_N_INSNS (20);
8283       return false;
8284 
8285     default:
8286       return arm_rtx_costs_1 (x, outer_code, total, speed);;
8287     }
8288 }
8289 
8290 
8291 /* RTX cost for cores with a fast multiply unit (M variants).  */
8292 
8293 static bool
8294 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8295 		       int *total, bool speed)
8296 {
8297   enum machine_mode mode = GET_MODE (x);
8298 
8299   if (TARGET_THUMB1)
8300     {
8301       *total = thumb1_rtx_costs (x, code, outer_code);
8302       return true;
8303     }
8304 
8305   /* ??? should thumb2 use different costs?  */
8306   switch (code)
8307     {
8308     case MULT:
8309       /* There is no point basing this on the tuning, since it is always the
8310 	 fast variant if it exists at all.  */
8311       if (mode == DImode
8312 	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8313 	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8314 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8315 	{
8316 	  *total = COSTS_N_INSNS(2);
8317 	  return false;
8318 	}
8319 
8320 
8321       if (mode == DImode)
8322 	{
8323 	  *total = COSTS_N_INSNS (5);
8324 	  return false;
8325 	}
8326 
8327       if (CONST_INT_P (XEXP (x, 1)))
8328 	{
8329 	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8330 				      & (unsigned HOST_WIDE_INT) 0xffffffff);
8331 	  int cost, const_ok = const_ok_for_arm (i);
8332 	  int j, booth_unit_size;
8333 
8334 	  /* Tune as appropriate.  */
8335 	  cost = const_ok ? 4 : 8;
8336 	  booth_unit_size = 8;
8337 	  for (j = 0; i && j < 32; j += booth_unit_size)
8338 	    {
8339 	      i >>= booth_unit_size;
8340 	      cost++;
8341 	    }
8342 
8343 	  *total = COSTS_N_INSNS(cost);
8344 	  return false;
8345 	}
8346 
8347       if (mode == SImode)
8348 	{
8349 	  *total = COSTS_N_INSNS (4);
8350 	  return false;
8351 	}
8352 
8353       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8354 	{
8355 	  if (TARGET_HARD_FLOAT
8356 	      && (mode == SFmode
8357 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
8358 	    {
8359 	      *total = COSTS_N_INSNS (1);
8360 	      return false;
8361 	    }
8362 	}
8363 
8364       /* Requires a lib call */
8365       *total = COSTS_N_INSNS (20);
8366       return false;
8367 
8368     default:
8369       return arm_rtx_costs_1 (x, outer_code, total, speed);
8370     }
8371 }
8372 
8373 
8374 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
8375    so it can be ignored.  */
8376 
8377 static bool
8378 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8379 		      int *total, bool speed)
8380 {
8381   enum machine_mode mode = GET_MODE (x);
8382 
8383   if (TARGET_THUMB)
8384     {
8385       *total = thumb1_rtx_costs (x, code, outer_code);
8386       return true;
8387     }
8388 
8389   switch (code)
8390     {
8391     case COMPARE:
8392       if (GET_CODE (XEXP (x, 0)) != MULT)
8393 	return arm_rtx_costs_1 (x, outer_code, total, speed);
8394 
8395       /* A COMPARE of a MULT is slow on XScale; the muls instruction
8396 	 will stall until the multiplication is complete.  */
8397       *total = COSTS_N_INSNS (3);
8398       return false;
8399 
8400     case MULT:
8401       /* There is no point basing this on the tuning, since it is always the
8402 	 fast variant if it exists at all.  */
8403       if (mode == DImode
8404 	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8405 	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8406 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8407 	{
8408 	  *total = COSTS_N_INSNS (2);
8409 	  return false;
8410 	}
8411 
8412 
8413       if (mode == DImode)
8414 	{
8415 	  *total = COSTS_N_INSNS (5);
8416 	  return false;
8417 	}
8418 
8419       if (CONST_INT_P (XEXP (x, 1)))
8420 	{
8421 	  /* If operand 1 is a constant we can more accurately
8422 	     calculate the cost of the multiply.  The multiplier can
8423 	     retire 15 bits on the first cycle and a further 12 on the
8424 	     second.  We do, of course, have to load the constant into
8425 	     a register first.  */
8426 	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8427 	  /* There's a general overhead of one cycle.  */
8428 	  int cost = 1;
8429 	  unsigned HOST_WIDE_INT masked_const;
8430 
8431 	  if (i & 0x80000000)
8432 	    i = ~i;
8433 
8434 	  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8435 
8436 	  masked_const = i & 0xffff8000;
8437 	  if (masked_const != 0)
8438 	    {
8439 	      cost++;
8440 	      masked_const = i & 0xf8000000;
8441 	      if (masked_const != 0)
8442 		cost++;
8443 	    }
8444 	  *total = COSTS_N_INSNS (cost);
8445 	  return false;
8446 	}
8447 
8448       if (mode == SImode)
8449 	{
8450 	  *total = COSTS_N_INSNS (3);
8451 	  return false;
8452 	}
8453 
8454       /* Requires a lib call */
8455       *total = COSTS_N_INSNS (20);
8456       return false;
8457 
8458     default:
8459       return arm_rtx_costs_1 (x, outer_code, total, speed);
8460     }
8461 }
8462 
8463 
8464 /* RTX costs for 9e (and later) cores.  */
8465 
8466 static bool
8467 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8468 		  int *total, bool speed)
8469 {
8470   enum machine_mode mode = GET_MODE (x);
8471 
8472   if (TARGET_THUMB1)
8473     {
8474       switch (code)
8475 	{
8476 	case MULT:
8477 	  *total = COSTS_N_INSNS (3);
8478 	  return true;
8479 
8480 	default:
8481 	  *total = thumb1_rtx_costs (x, code, outer_code);
8482 	  return true;
8483 	}
8484     }
8485 
8486   switch (code)
8487     {
8488     case MULT:
8489       /* There is no point basing this on the tuning, since it is always the
8490 	 fast variant if it exists at all.  */
8491       if (mode == DImode
8492 	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8493 	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8494 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8495 	{
8496 	  *total = COSTS_N_INSNS (2);
8497 	  return false;
8498 	}
8499 
8500 
8501       if (mode == DImode)
8502 	{
8503 	  *total = COSTS_N_INSNS (5);
8504 	  return false;
8505 	}
8506 
8507       if (mode == SImode)
8508 	{
8509 	  *total = COSTS_N_INSNS (2);
8510 	  return false;
8511 	}
8512 
8513       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8514 	{
8515 	  if (TARGET_HARD_FLOAT
8516 	      && (mode == SFmode
8517 		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
8518 	    {
8519 	      *total = COSTS_N_INSNS (1);
8520 	      return false;
8521 	    }
8522 	}
8523 
8524       *total = COSTS_N_INSNS (20);
8525       return false;
8526 
8527     default:
8528       return arm_rtx_costs_1 (x, outer_code, total, speed);
8529     }
8530 }
8531 /* All address computations that can be done are free, but rtx cost returns
8532    the same for practically all of them.  So we weight the different types
8533    of address here in the order (most pref first):
8534    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
8535 static inline int
8536 arm_arm_address_cost (rtx x)
8537 {
8538   enum rtx_code c  = GET_CODE (x);
8539 
8540   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8541     return 0;
8542   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8543     return 10;
8544 
8545   if (c == PLUS)
8546     {
8547       if (CONST_INT_P (XEXP (x, 1)))
8548 	return 2;
8549 
8550       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8551 	return 3;
8552 
8553       return 4;
8554     }
8555 
8556   return 6;
8557 }
8558 
8559 static inline int
8560 arm_thumb_address_cost (rtx x)
8561 {
8562   enum rtx_code c  = GET_CODE (x);
8563 
8564   if (c == REG)
8565     return 1;
8566   if (c == PLUS
8567       && REG_P (XEXP (x, 0))
8568       && CONST_INT_P (XEXP (x, 1)))
8569     return 1;
8570 
8571   return 2;
8572 }
8573 
8574 static int
8575 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8576 		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8577 {
8578   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8579 }
8580 
8581 /* Adjust cost hook for XScale.  */
8582 static bool
8583 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8584 {
8585   /* Some true dependencies can have a higher cost depending
8586      on precisely how certain input operands are used.  */
8587   if (REG_NOTE_KIND(link) == 0
8588       && recog_memoized (insn) >= 0
8589       && recog_memoized (dep) >= 0)
8590     {
8591       int shift_opnum = get_attr_shift (insn);
8592       enum attr_type attr_type = get_attr_type (dep);
8593 
8594       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8595 	 operand for INSN.  If we have a shifted input operand and the
8596 	 instruction we depend on is another ALU instruction, then we may
8597 	 have to account for an additional stall.  */
8598       if (shift_opnum != 0
8599 	  && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8600 	{
8601 	  rtx shifted_operand;
8602 	  int opno;
8603 
8604 	  /* Get the shifted operand.  */
8605 	  extract_insn (insn);
8606 	  shifted_operand = recog_data.operand[shift_opnum];
8607 
8608 	  /* Iterate over all the operands in DEP.  If we write an operand
8609 	     that overlaps with SHIFTED_OPERAND, then we have increase the
8610 	     cost of this dependency.  */
8611 	  extract_insn (dep);
8612 	  preprocess_constraints ();
8613 	  for (opno = 0; opno < recog_data.n_operands; opno++)
8614 	    {
8615 	      /* We can ignore strict inputs.  */
8616 	      if (recog_data.operand_type[opno] == OP_IN)
8617 		continue;
8618 
8619 	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
8620 					   shifted_operand))
8621 		{
8622 		  *cost = 2;
8623 		  return false;
8624 		}
8625 	    }
8626 	}
8627     }
8628   return true;
8629 }
8630 
8631 /* Adjust cost hook for Cortex A9.  */
8632 static bool
8633 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8634 {
8635   switch (REG_NOTE_KIND (link))
8636     {
8637     case REG_DEP_ANTI:
8638       *cost = 0;
8639       return false;
8640 
8641     case REG_DEP_TRUE:
8642     case REG_DEP_OUTPUT:
8643 	if (recog_memoized (insn) >= 0
8644 	    && recog_memoized (dep) >= 0)
8645 	  {
8646 	    if (GET_CODE (PATTERN (insn)) == SET)
8647 	      {
8648 		if (GET_MODE_CLASS
8649 		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8650 		  || GET_MODE_CLASS
8651 		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8652 		  {
8653 		    enum attr_type attr_type_insn = get_attr_type (insn);
8654 		    enum attr_type attr_type_dep = get_attr_type (dep);
8655 
8656 		    /* By default all dependencies of the form
8657 		       s0 = s0 <op> s1
8658 		       s0 = s0 <op> s2
8659 		       have an extra latency of 1 cycle because
8660 		       of the input and output dependency in this
8661 		       case. However this gets modeled as an true
8662 		       dependency and hence all these checks.  */
8663 		    if (REG_P (SET_DEST (PATTERN (insn)))
8664 			&& REG_P (SET_DEST (PATTERN (dep)))
8665 			&& reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8666 						    SET_DEST (PATTERN (dep))))
8667 		      {
8668 			/* FMACS is a special case where the dependent
8669 			   instruction can be issued 3 cycles before
8670 			   the normal latency in case of an output
8671 			   dependency.  */
8672 			if ((attr_type_insn == TYPE_FMACS
8673 			     || attr_type_insn == TYPE_FMACD)
8674 			    && (attr_type_dep == TYPE_FMACS
8675 				|| attr_type_dep == TYPE_FMACD))
8676 			  {
8677 			    if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8678 			      *cost = insn_default_latency (dep) - 3;
8679 			    else
8680 			      *cost = insn_default_latency (dep);
8681 			    return false;
8682 			  }
8683 			else
8684 			  {
8685 			    if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8686 			      *cost = insn_default_latency (dep) + 1;
8687 			    else
8688 			      *cost = insn_default_latency (dep);
8689 			  }
8690 			return false;
8691 		      }
8692 		  }
8693 	      }
8694 	  }
8695 	break;
8696 
8697     default:
8698       gcc_unreachable ();
8699     }
8700 
8701   return true;
8702 }
8703 
8704 /* Adjust cost hook for FA726TE.  */
8705 static bool
8706 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8707 {
8708   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8709      have penalty of 3.  */
8710   if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8711       && recog_memoized (insn) >= 0
8712       && recog_memoized (dep) >= 0
8713       && get_attr_conds (dep) == CONDS_SET)
8714     {
8715       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
8716       if (get_attr_conds (insn) == CONDS_USE
8717           && get_attr_type (insn) != TYPE_BRANCH)
8718         {
8719           *cost = 3;
8720           return false;
8721         }
8722 
8723       if (GET_CODE (PATTERN (insn)) == COND_EXEC
8724           || get_attr_conds (insn) == CONDS_USE)
8725         {
8726           *cost = 0;
8727           return false;
8728         }
8729     }
8730 
8731   return true;
8732 }
8733 
8734 /* Implement TARGET_REGISTER_MOVE_COST.
8735 
8736    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8737    it is typically more expensive than a single memory access.  We set
8738    the cost to less than two memory accesses so that floating
8739    point to integer conversion does not go through memory.  */
8740 
8741 int
8742 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8743 			reg_class_t from, reg_class_t to)
8744 {
8745   if (TARGET_32BIT)
8746     {
8747       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8748 	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8749 	return 15;
8750       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8751 	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8752 	return 4;
8753       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8754 	return 20;
8755       else
8756 	return 2;
8757     }
8758   else
8759     {
8760       if (from == HI_REGS || to == HI_REGS)
8761 	return 4;
8762       else
8763 	return 2;
8764     }
8765 }
8766 
8767 /* Implement TARGET_MEMORY_MOVE_COST.  */
8768 
8769 int
8770 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8771 		      bool in ATTRIBUTE_UNUSED)
8772 {
8773   if (TARGET_32BIT)
8774     return 10;
8775   else
8776     {
8777       if (GET_MODE_SIZE (mode) < 4)
8778 	return 8;
8779       else
8780 	return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8781     }
8782 }
8783 
8784 /* Vectorizer cost model implementation.  */
8785 
8786 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
8787 static int
8788 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8789 				tree vectype,
8790 				int misalign ATTRIBUTE_UNUSED)
8791 {
8792   unsigned elements;
8793 
8794   switch (type_of_cost)
8795     {
8796       case scalar_stmt:
8797         return current_tune->vec_costs->scalar_stmt_cost;
8798 
8799       case scalar_load:
8800         return current_tune->vec_costs->scalar_load_cost;
8801 
8802       case scalar_store:
8803         return current_tune->vec_costs->scalar_store_cost;
8804 
8805       case vector_stmt:
8806         return current_tune->vec_costs->vec_stmt_cost;
8807 
8808       case vector_load:
8809         return current_tune->vec_costs->vec_align_load_cost;
8810 
8811       case vector_store:
8812         return current_tune->vec_costs->vec_store_cost;
8813 
8814       case vec_to_scalar:
8815         return current_tune->vec_costs->vec_to_scalar_cost;
8816 
8817       case scalar_to_vec:
8818         return current_tune->vec_costs->scalar_to_vec_cost;
8819 
8820       case unaligned_load:
8821         return current_tune->vec_costs->vec_unalign_load_cost;
8822 
8823       case unaligned_store:
8824         return current_tune->vec_costs->vec_unalign_store_cost;
8825 
8826       case cond_branch_taken:
8827         return current_tune->vec_costs->cond_taken_branch_cost;
8828 
8829       case cond_branch_not_taken:
8830         return current_tune->vec_costs->cond_not_taken_branch_cost;
8831 
8832       case vec_perm:
8833       case vec_promote_demote:
8834         return current_tune->vec_costs->vec_stmt_cost;
8835 
8836       case vec_construct:
8837 	elements = TYPE_VECTOR_SUBPARTS (vectype);
8838 	return elements / 2 + 1;
8839 
8840       default:
8841         gcc_unreachable ();
8842     }
8843 }
8844 
8845 /* Implement targetm.vectorize.add_stmt_cost.  */
8846 
8847 static unsigned
8848 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8849 		   struct _stmt_vec_info *stmt_info, int misalign,
8850 		   enum vect_cost_model_location where)
8851 {
8852   unsigned *cost = (unsigned *) data;
8853   unsigned retval = 0;
8854 
8855   if (flag_vect_cost_model)
8856     {
8857       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8858       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
8859 
8860       /* Statements in an inner loop relative to the loop being
8861 	 vectorized are weighted more heavily.  The value here is
8862 	 arbitrary and could potentially be improved with analysis.  */
8863       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
8864 	count *= 50;  /* FIXME.  */
8865 
8866       retval = (unsigned) (count * stmt_cost);
8867       cost[where] += retval;
8868     }
8869 
8870   return retval;
8871 }
8872 
8873 /* Return true if and only if this insn can dual-issue only as older.  */
8874 static bool
8875 cortexa7_older_only (rtx insn)
8876 {
8877   if (recog_memoized (insn) < 0)
8878     return false;
8879 
8880   if (get_attr_insn (insn) == INSN_MOV)
8881     return false;
8882 
8883   switch (get_attr_type (insn))
8884     {
8885     case TYPE_ALU_REG:
8886     case TYPE_LOAD_BYTE:
8887     case TYPE_LOAD1:
8888     case TYPE_STORE1:
8889     case TYPE_FFARITHS:
8890     case TYPE_FADDS:
8891     case TYPE_FFARITHD:
8892     case TYPE_FADDD:
8893     case TYPE_FCPYS:
8894     case TYPE_F_CVT:
8895     case TYPE_FCMPS:
8896     case TYPE_FCMPD:
8897     case TYPE_FCONSTS:
8898     case TYPE_FCONSTD:
8899     case TYPE_FMULS:
8900     case TYPE_FMACS:
8901     case TYPE_FMULD:
8902     case TYPE_FMACD:
8903     case TYPE_FDIVS:
8904     case TYPE_FDIVD:
8905     case TYPE_F_2_R:
8906     case TYPE_F_FLAG:
8907     case TYPE_F_LOADS:
8908     case TYPE_F_STORES:
8909       return true;
8910     default:
8911       return false;
8912     }
8913 }
8914 
8915 /* Return true if and only if this insn can dual-issue as younger.  */
8916 static bool
8917 cortexa7_younger (FILE *file, int verbose, rtx insn)
8918 {
8919   if (recog_memoized (insn) < 0)
8920     {
8921       if (verbose > 5)
8922         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
8923       return false;
8924     }
8925 
8926   if (get_attr_insn (insn) == INSN_MOV)
8927     return true;
8928 
8929   switch (get_attr_type (insn))
8930     {
8931     case TYPE_SIMPLE_ALU_IMM:
8932     case TYPE_SIMPLE_ALU_SHIFT:
8933     case TYPE_BRANCH:
8934     case TYPE_CALL:
8935       return true;
8936     default:
8937       return false;
8938     }
8939 }
8940 
8941 
8942 /* Look for an instruction that can dual issue only as an older
8943    instruction, and move it in front of any instructions that can
8944    dual-issue as younger, while preserving the relative order of all
8945    other instructions in the ready list.  This is a hueuristic to help
8946    dual-issue in later cycles, by postponing issue of more flexible
8947    instructions.  This heuristic may affect dual issue opportunities
8948    in the current cycle.  */
8949 static void
8950 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
8951                         int clock)
8952 {
8953   int i;
8954   int first_older_only = -1, first_younger = -1;
8955 
8956   if (verbose > 5)
8957     fprintf (file,
8958              ";; sched_reorder for cycle %d with %d insns in ready list\n",
8959              clock,
8960              *n_readyp);
8961 
8962   /* Traverse the ready list from the head (the instruction to issue
8963      first), and looking for the first instruction that can issue as
8964      younger and the first instruction that can dual-issue only as
8965      older.  */
8966   for (i = *n_readyp - 1; i >= 0; i--)
8967     {
8968       rtx insn = ready[i];
8969       if (cortexa7_older_only (insn))
8970         {
8971           first_older_only = i;
8972           if (verbose > 5)
8973             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
8974           break;
8975         }
8976       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
8977         first_younger = i;
8978     }
8979 
8980   /* Nothing to reorder because either no younger insn found or insn
8981      that can dual-issue only as older appears before any insn that
8982      can dual-issue as younger.  */
8983   if (first_younger == -1)
8984     {
8985       if (verbose > 5)
8986         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
8987       return;
8988     }
8989 
8990   /* Nothing to reorder because no older-only insn in the ready list.  */
8991   if (first_older_only == -1)
8992     {
8993       if (verbose > 5)
8994         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
8995       return;
8996     }
8997 
8998   /* Move first_older_only insn before first_younger.  */
8999   if (verbose > 5)
9000     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
9001              INSN_UID(ready [first_older_only]),
9002              INSN_UID(ready [first_younger]));
9003   rtx first_older_only_insn = ready [first_older_only];
9004   for (i = first_older_only; i < first_younger; i++)
9005     {
9006       ready[i] = ready[i+1];
9007     }
9008 
9009   ready[i] = first_older_only_insn;
9010   return;
9011 }
9012 
9013 /* Implement TARGET_SCHED_REORDER. */
9014 static int
9015 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
9016                    int clock)
9017 {
9018   switch (arm_tune)
9019     {
9020     case cortexa7:
9021       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
9022       break;
9023     default:
9024       /* Do nothing for other cores.  */
9025       break;
9026     }
9027 
9028   return arm_issue_rate ();
9029 }
9030 
9031 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
9032    It corrects the value of COST based on the relationship between
9033    INSN and DEP through the dependence LINK.  It returns the new
9034    value. There is a per-core adjust_cost hook to adjust scheduler costs
9035    and the per-core hook can choose to completely override the generic
9036    adjust_cost function. Only put bits of code into arm_adjust_cost that
9037    are common across all cores.  */
9038 static int
9039 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
9040 {
9041   rtx i_pat, d_pat;
9042 
9043  /* When generating Thumb-1 code, we want to place flag-setting operations
9044     close to a conditional branch which depends on them, so that we can
9045     omit the comparison. */
9046   if (TARGET_THUMB1
9047       && REG_NOTE_KIND (link) == 0
9048       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
9049       && recog_memoized (dep) >= 0
9050       && get_attr_conds (dep) == CONDS_SET)
9051     return 0;
9052 
9053   if (current_tune->sched_adjust_cost != NULL)
9054     {
9055       if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
9056 	return cost;
9057     }
9058 
9059   /* XXX Is this strictly true?  */
9060   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9061       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
9062     return 0;
9063 
9064   /* Call insns don't incur a stall, even if they follow a load.  */
9065   if (REG_NOTE_KIND (link) == 0
9066       && CALL_P (insn))
9067     return 1;
9068 
9069   if ((i_pat = single_set (insn)) != NULL
9070       && MEM_P (SET_SRC (i_pat))
9071       && (d_pat = single_set (dep)) != NULL
9072       && MEM_P (SET_DEST (d_pat)))
9073     {
9074       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
9075       /* This is a load after a store, there is no conflict if the load reads
9076 	 from a cached area.  Assume that loads from the stack, and from the
9077 	 constant pool are cached, and that others will miss.  This is a
9078 	 hack.  */
9079 
9080       if ((GET_CODE (src_mem) == SYMBOL_REF
9081 	   && CONSTANT_POOL_ADDRESS_P (src_mem))
9082 	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
9083 	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
9084 	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
9085 	return 1;
9086     }
9087 
9088   return cost;
9089 }
9090 
9091 static int
9092 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
9093 {
9094   if (TARGET_32BIT)
9095     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
9096   else
9097     return (optimize > 0) ? 2 : 0;
9098 }
9099 
9100 static int
9101 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
9102 {
9103   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
9104 }
9105 
9106 static bool fp_consts_inited = false;
9107 
9108 static REAL_VALUE_TYPE value_fp0;
9109 
9110 static void
9111 init_fp_table (void)
9112 {
9113   REAL_VALUE_TYPE r;
9114 
9115   r = REAL_VALUE_ATOF ("0", DFmode);
9116   value_fp0 = r;
9117   fp_consts_inited = true;
9118 }
9119 
9120 /* Return TRUE if rtx X is a valid immediate FP constant.  */
9121 int
9122 arm_const_double_rtx (rtx x)
9123 {
9124   REAL_VALUE_TYPE r;
9125 
9126   if (!fp_consts_inited)
9127     init_fp_table ();
9128 
9129   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9130   if (REAL_VALUE_MINUS_ZERO (r))
9131     return 0;
9132 
9133   if (REAL_VALUES_EQUAL (r, value_fp0))
9134     return 1;
9135 
9136   return 0;
9137 }
9138 
9139 /* VFPv3 has a fairly wide range of representable immediates, formed from
9140    "quarter-precision" floating-point values. These can be evaluated using this
9141    formula (with ^ for exponentiation):
9142 
9143      -1^s * n * 2^-r
9144 
9145    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
9146    16 <= n <= 31 and 0 <= r <= 7.
9147 
9148    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
9149 
9150      - A (most-significant) is the sign bit.
9151      - BCD are the exponent (encoded as r XOR 3).
9152      - EFGH are the mantissa (encoded as n - 16).
9153 */
9154 
9155 /* Return an integer index for a VFPv3 immediate operand X suitable for the
9156    fconst[sd] instruction, or -1 if X isn't suitable.  */
9157 static int
9158 vfp3_const_double_index (rtx x)
9159 {
9160   REAL_VALUE_TYPE r, m;
9161   int sign, exponent;
9162   unsigned HOST_WIDE_INT mantissa, mant_hi;
9163   unsigned HOST_WIDE_INT mask;
9164   HOST_WIDE_INT m1, m2;
9165   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9166 
9167   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
9168     return -1;
9169 
9170   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9171 
9172   /* We can't represent these things, so detect them first.  */
9173   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
9174     return -1;
9175 
9176   /* Extract sign, exponent and mantissa.  */
9177   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
9178   r = real_value_abs (&r);
9179   exponent = REAL_EXP (&r);
9180   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9181      highest (sign) bit, with a fixed binary point at bit point_pos.
9182      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
9183      bits for the mantissa, this may fail (low bits would be lost).  */
9184   real_ldexp (&m, &r, point_pos - exponent);
9185   REAL_VALUE_TO_INT (&m1, &m2, m);
9186   mantissa = m1;
9187   mant_hi = m2;
9188 
9189   /* If there are bits set in the low part of the mantissa, we can't
9190      represent this value.  */
9191   if (mantissa != 0)
9192     return -1;
9193 
9194   /* Now make it so that mantissa contains the most-significant bits, and move
9195      the point_pos to indicate that the least-significant bits have been
9196      discarded.  */
9197   point_pos -= HOST_BITS_PER_WIDE_INT;
9198   mantissa = mant_hi;
9199 
9200   /* We can permit four significant bits of mantissa only, plus a high bit
9201      which is always 1.  */
9202   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9203   if ((mantissa & mask) != 0)
9204     return -1;
9205 
9206   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
9207   mantissa >>= point_pos - 5;
9208 
9209   /* The mantissa may be zero. Disallow that case. (It's possible to load the
9210      floating-point immediate zero with Neon using an integer-zero load, but
9211      that case is handled elsewhere.)  */
9212   if (mantissa == 0)
9213     return -1;
9214 
9215   gcc_assert (mantissa >= 16 && mantissa <= 31);
9216 
9217   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
9218      normalized significands are in the range [1, 2). (Our mantissa is shifted
9219      left 4 places at this point relative to normalized IEEE754 values).  GCC
9220      internally uses [0.5, 1) (see real.c), so the exponent returned from
9221      REAL_EXP must be altered.  */
9222   exponent = 5 - exponent;
9223 
9224   if (exponent < 0 || exponent > 7)
9225     return -1;
9226 
9227   /* Sign, mantissa and exponent are now in the correct form to plug into the
9228      formula described in the comment above.  */
9229   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
9230 }
9231 
9232 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
9233 int
9234 vfp3_const_double_rtx (rtx x)
9235 {
9236   if (!TARGET_VFP3)
9237     return 0;
9238 
9239   return vfp3_const_double_index (x) != -1;
9240 }
9241 
9242 /* Recognize immediates which can be used in various Neon instructions. Legal
9243    immediates are described by the following table (for VMVN variants, the
9244    bitwise inverse of the constant shown is recognized. In either case, VMOV
9245    is output and the correct instruction to use for a given constant is chosen
9246    by the assembler). The constant shown is replicated across all elements of
9247    the destination vector.
9248 
9249    insn elems variant constant (binary)
9250    ---- ----- ------- -----------------
9251    vmov  i32     0    00000000 00000000 00000000 abcdefgh
9252    vmov  i32     1    00000000 00000000 abcdefgh 00000000
9253    vmov  i32     2    00000000 abcdefgh 00000000 00000000
9254    vmov  i32     3    abcdefgh 00000000 00000000 00000000
9255    vmov  i16     4    00000000 abcdefgh
9256    vmov  i16     5    abcdefgh 00000000
9257    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
9258    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
9259    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
9260    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
9261    vmvn  i16    10    00000000 abcdefgh
9262    vmvn  i16    11    abcdefgh 00000000
9263    vmov  i32    12    00000000 00000000 abcdefgh 11111111
9264    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
9265    vmov  i32    14    00000000 abcdefgh 11111111 11111111
9266    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
9267    vmov   i8    16    abcdefgh
9268    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
9269                       eeeeeeee ffffffff gggggggg hhhhhhhh
9270    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
9271    vmov  f32    19    00000000 00000000 00000000 00000000
9272 
9273    For case 18, B = !b. Representable values are exactly those accepted by
9274    vfp3_const_double_index, but are output as floating-point numbers rather
9275    than indices.
9276 
9277    For case 19, we will change it to vmov.i32 when assembling.
9278 
9279    Variants 0-5 (inclusive) may also be used as immediates for the second
9280    operand of VORR/VBIC instructions.
9281 
9282    The INVERSE argument causes the bitwise inverse of the given operand to be
9283    recognized instead (used for recognizing legal immediates for the VAND/VORN
9284    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
9285    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
9286    output, rather than the real insns vbic/vorr).
9287 
9288    INVERSE makes no difference to the recognition of float vectors.
9289 
9290    The return value is the variant of immediate as shown in the above table, or
9291    -1 if the given value doesn't match any of the listed patterns.
9292 */
9293 static int
9294 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
9295 		      rtx *modconst, int *elementwidth)
9296 {
9297 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
9298   matches = 1;					\
9299   for (i = 0; i < idx; i += (STRIDE))		\
9300     if (!(TEST))				\
9301       matches = 0;				\
9302   if (matches)					\
9303     {						\
9304       immtype = (CLASS);			\
9305       elsize = (ELSIZE);			\
9306       break;					\
9307     }
9308 
9309   unsigned int i, elsize = 0, idx = 0, n_elts;
9310   unsigned int innersize;
9311   unsigned char bytes[16];
9312   int immtype = -1, matches;
9313   unsigned int invmask = inverse ? 0xff : 0;
9314   bool vector = GET_CODE (op) == CONST_VECTOR;
9315 
9316   if (vector)
9317     {
9318       n_elts = CONST_VECTOR_NUNITS (op);
9319       innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9320     }
9321   else
9322     {
9323       n_elts = 1;
9324       if (mode == VOIDmode)
9325 	mode = DImode;
9326       innersize = GET_MODE_SIZE (mode);
9327     }
9328 
9329   /* Vectors of float constants.  */
9330   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9331     {
9332       rtx el0 = CONST_VECTOR_ELT (op, 0);
9333       REAL_VALUE_TYPE r0;
9334 
9335       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
9336         return -1;
9337 
9338       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
9339 
9340       for (i = 1; i < n_elts; i++)
9341         {
9342           rtx elt = CONST_VECTOR_ELT (op, i);
9343           REAL_VALUE_TYPE re;
9344 
9345           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
9346 
9347           if (!REAL_VALUES_EQUAL (r0, re))
9348             return -1;
9349         }
9350 
9351       if (modconst)
9352         *modconst = CONST_VECTOR_ELT (op, 0);
9353 
9354       if (elementwidth)
9355         *elementwidth = 0;
9356 
9357       if (el0 == CONST0_RTX (GET_MODE (el0)))
9358 	return 19;
9359       else
9360 	return 18;
9361     }
9362 
9363   /* Splat vector constant out into a byte vector.  */
9364   for (i = 0; i < n_elts; i++)
9365     {
9366       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
9367       unsigned HOST_WIDE_INT elpart;
9368       unsigned int part, parts;
9369 
9370       if (CONST_INT_P (el))
9371         {
9372           elpart = INTVAL (el);
9373           parts = 1;
9374         }
9375       else if (CONST_DOUBLE_P (el))
9376         {
9377           elpart = CONST_DOUBLE_LOW (el);
9378           parts = 2;
9379         }
9380       else
9381         gcc_unreachable ();
9382 
9383       for (part = 0; part < parts; part++)
9384         {
9385           unsigned int byte;
9386           for (byte = 0; byte < innersize; byte++)
9387             {
9388               bytes[idx++] = (elpart & 0xff) ^ invmask;
9389               elpart >>= BITS_PER_UNIT;
9390             }
9391           if (CONST_DOUBLE_P (el))
9392             elpart = CONST_DOUBLE_HIGH (el);
9393         }
9394     }
9395 
9396   /* Sanity check.  */
9397   gcc_assert (idx == GET_MODE_SIZE (mode));
9398 
9399   do
9400     {
9401       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9402 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9403 
9404       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9405 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9406 
9407       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9408 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9409 
9410       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9411 		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9412 
9413       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9414 
9415       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9416 
9417       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9418 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9419 
9420       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9421 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9422 
9423       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9424 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9425 
9426       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9427 		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9428 
9429       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9430 
9431       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9432 
9433       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9434 			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
9435 
9436       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9437 			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9438 
9439       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9440 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9441 
9442       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9443 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9444 
9445       CHECK (1, 8, 16, bytes[i] == bytes[0]);
9446 
9447       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9448 			&& bytes[i] == bytes[(i + 8) % idx]);
9449     }
9450   while (0);
9451 
9452   if (immtype == -1)
9453     return -1;
9454 
9455   if (elementwidth)
9456     *elementwidth = elsize;
9457 
9458   if (modconst)
9459     {
9460       unsigned HOST_WIDE_INT imm = 0;
9461 
9462       /* Un-invert bytes of recognized vector, if necessary.  */
9463       if (invmask != 0)
9464         for (i = 0; i < idx; i++)
9465           bytes[i] ^= invmask;
9466 
9467       if (immtype == 17)
9468         {
9469           /* FIXME: Broken on 32-bit H_W_I hosts.  */
9470           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9471 
9472           for (i = 0; i < 8; i++)
9473             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9474                    << (i * BITS_PER_UNIT);
9475 
9476           *modconst = GEN_INT (imm);
9477         }
9478       else
9479         {
9480           unsigned HOST_WIDE_INT imm = 0;
9481 
9482           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9483             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9484 
9485           *modconst = GEN_INT (imm);
9486         }
9487     }
9488 
9489   return immtype;
9490 #undef CHECK
9491 }
9492 
9493 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9494    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9495    float elements), and a modified constant (whatever should be output for a
9496    VMOV) in *MODCONST.  */
9497 
9498 int
9499 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9500 			       rtx *modconst, int *elementwidth)
9501 {
9502   rtx tmpconst;
9503   int tmpwidth;
9504   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9505 
9506   if (retval == -1)
9507     return 0;
9508 
9509   if (modconst)
9510     *modconst = tmpconst;
9511 
9512   if (elementwidth)
9513     *elementwidth = tmpwidth;
9514 
9515   return 1;
9516 }
9517 
9518 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
9519    the immediate is valid, write a constant suitable for using as an operand
9520    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9521    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
9522 
9523 int
9524 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9525 				rtx *modconst, int *elementwidth)
9526 {
9527   rtx tmpconst;
9528   int tmpwidth;
9529   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9530 
9531   if (retval < 0 || retval > 5)
9532     return 0;
9533 
9534   if (modconst)
9535     *modconst = tmpconst;
9536 
9537   if (elementwidth)
9538     *elementwidth = tmpwidth;
9539 
9540   return 1;
9541 }
9542 
9543 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
9544    the immediate is valid, write a constant suitable for using as an operand
9545    to VSHR/VSHL to *MODCONST and the corresponding element width to
9546    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9547    because they have different limitations.  */
9548 
9549 int
9550 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9551 				rtx *modconst, int *elementwidth,
9552 				bool isleftshift)
9553 {
9554   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9555   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9556   unsigned HOST_WIDE_INT last_elt = 0;
9557   unsigned HOST_WIDE_INT maxshift;
9558 
9559   /* Split vector constant out into a byte vector.  */
9560   for (i = 0; i < n_elts; i++)
9561     {
9562       rtx el = CONST_VECTOR_ELT (op, i);
9563       unsigned HOST_WIDE_INT elpart;
9564 
9565       if (CONST_INT_P (el))
9566         elpart = INTVAL (el);
9567       else if (CONST_DOUBLE_P (el))
9568         return 0;
9569       else
9570         gcc_unreachable ();
9571 
9572       if (i != 0 && elpart != last_elt)
9573         return 0;
9574 
9575       last_elt = elpart;
9576     }
9577 
9578   /* Shift less than element size.  */
9579   maxshift = innersize * 8;
9580 
9581   if (isleftshift)
9582     {
9583       /* Left shift immediate value can be from 0 to <size>-1.  */
9584       if (last_elt >= maxshift)
9585         return 0;
9586     }
9587   else
9588     {
9589       /* Right shift immediate value can be from 1 to <size>.  */
9590       if (last_elt == 0 || last_elt > maxshift)
9591 	return 0;
9592     }
9593 
9594   if (elementwidth)
9595     *elementwidth = innersize * 8;
9596 
9597   if (modconst)
9598     *modconst = CONST_VECTOR_ELT (op, 0);
9599 
9600   return 1;
9601 }
9602 
9603 /* Return a string suitable for output of Neon immediate logic operation
9604    MNEM.  */
9605 
9606 char *
9607 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9608 			     int inverse, int quad)
9609 {
9610   int width, is_valid;
9611   static char templ[40];
9612 
9613   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9614 
9615   gcc_assert (is_valid != 0);
9616 
9617   if (quad)
9618     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9619   else
9620     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9621 
9622   return templ;
9623 }
9624 
9625 /* Return a string suitable for output of Neon immediate shift operation
9626    (VSHR or VSHL) MNEM.  */
9627 
9628 char *
9629 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9630 			     enum machine_mode mode, int quad,
9631 			     bool isleftshift)
9632 {
9633   int width, is_valid;
9634   static char templ[40];
9635 
9636   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9637   gcc_assert (is_valid != 0);
9638 
9639   if (quad)
9640     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9641   else
9642     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9643 
9644   return templ;
9645 }
9646 
9647 /* Output a sequence of pairwise operations to implement a reduction.
9648    NOTE: We do "too much work" here, because pairwise operations work on two
9649    registers-worth of operands in one go. Unfortunately we can't exploit those
9650    extra calculations to do the full operation in fewer steps, I don't think.
9651    Although all vector elements of the result but the first are ignored, we
9652    actually calculate the same result in each of the elements. An alternative
9653    such as initially loading a vector with zero to use as each of the second
9654    operands would use up an additional register and take an extra instruction,
9655    for no particular gain.  */
9656 
9657 void
9658 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9659 		      rtx (*reduc) (rtx, rtx, rtx))
9660 {
9661   enum machine_mode inner = GET_MODE_INNER (mode);
9662   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9663   rtx tmpsum = op1;
9664 
9665   for (i = parts / 2; i >= 1; i /= 2)
9666     {
9667       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9668       emit_insn (reduc (dest, tmpsum, tmpsum));
9669       tmpsum = dest;
9670     }
9671 }
9672 
9673 /* If VALS is a vector constant that can be loaded into a register
9674    using VDUP, generate instructions to do so and return an RTX to
9675    assign to the register.  Otherwise return NULL_RTX.  */
9676 
9677 static rtx
9678 neon_vdup_constant (rtx vals)
9679 {
9680   enum machine_mode mode = GET_MODE (vals);
9681   enum machine_mode inner_mode = GET_MODE_INNER (mode);
9682   int n_elts = GET_MODE_NUNITS (mode);
9683   bool all_same = true;
9684   rtx x;
9685   int i;
9686 
9687   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9688     return NULL_RTX;
9689 
9690   for (i = 0; i < n_elts; ++i)
9691     {
9692       x = XVECEXP (vals, 0, i);
9693       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9694 	all_same = false;
9695     }
9696 
9697   if (!all_same)
9698     /* The elements are not all the same.  We could handle repeating
9699        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9700        {0, C, 0, C, 0, C, 0, C} which can be loaded using
9701        vdup.i16).  */
9702     return NULL_RTX;
9703 
9704   /* We can load this constant by using VDUP and a constant in a
9705      single ARM register.  This will be cheaper than a vector
9706      load.  */
9707 
9708   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9709   return gen_rtx_VEC_DUPLICATE (mode, x);
9710 }
9711 
9712 /* Generate code to load VALS, which is a PARALLEL containing only
9713    constants (for vec_init) or CONST_VECTOR, efficiently into a
9714    register.  Returns an RTX to copy into the register, or NULL_RTX
9715    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
9716 
9717 rtx
9718 neon_make_constant (rtx vals)
9719 {
9720   enum machine_mode mode = GET_MODE (vals);
9721   rtx target;
9722   rtx const_vec = NULL_RTX;
9723   int n_elts = GET_MODE_NUNITS (mode);
9724   int n_const = 0;
9725   int i;
9726 
9727   if (GET_CODE (vals) == CONST_VECTOR)
9728     const_vec = vals;
9729   else if (GET_CODE (vals) == PARALLEL)
9730     {
9731       /* A CONST_VECTOR must contain only CONST_INTs and
9732 	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9733 	 Only store valid constants in a CONST_VECTOR.  */
9734       for (i = 0; i < n_elts; ++i)
9735 	{
9736 	  rtx x = XVECEXP (vals, 0, i);
9737 	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9738 	    n_const++;
9739 	}
9740       if (n_const == n_elts)
9741 	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9742     }
9743   else
9744     gcc_unreachable ();
9745 
9746   if (const_vec != NULL
9747       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9748     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
9749     return const_vec;
9750   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9751     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
9752        pipeline cycle; creating the constant takes one or two ARM
9753        pipeline cycles.  */
9754     return target;
9755   else if (const_vec != NULL_RTX)
9756     /* Load from constant pool.  On Cortex-A8 this takes two cycles
9757        (for either double or quad vectors).  We can not take advantage
9758        of single-cycle VLD1 because we need a PC-relative addressing
9759        mode.  */
9760     return const_vec;
9761   else
9762     /* A PARALLEL containing something not valid inside CONST_VECTOR.
9763        We can not construct an initializer.  */
9764     return NULL_RTX;
9765 }
9766 
9767 /* Initialize vector TARGET to VALS.  */
9768 
9769 void
9770 neon_expand_vector_init (rtx target, rtx vals)
9771 {
9772   enum machine_mode mode = GET_MODE (target);
9773   enum machine_mode inner_mode = GET_MODE_INNER (mode);
9774   int n_elts = GET_MODE_NUNITS (mode);
9775   int n_var = 0, one_var = -1;
9776   bool all_same = true;
9777   rtx x, mem;
9778   int i;
9779 
9780   for (i = 0; i < n_elts; ++i)
9781     {
9782       x = XVECEXP (vals, 0, i);
9783       if (!CONSTANT_P (x))
9784 	++n_var, one_var = i;
9785 
9786       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9787 	all_same = false;
9788     }
9789 
9790   if (n_var == 0)
9791     {
9792       rtx constant = neon_make_constant (vals);
9793       if (constant != NULL_RTX)
9794 	{
9795 	  emit_move_insn (target, constant);
9796 	  return;
9797 	}
9798     }
9799 
9800   /* Splat a single non-constant element if we can.  */
9801   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9802     {
9803       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9804       emit_insn (gen_rtx_SET (VOIDmode, target,
9805 			      gen_rtx_VEC_DUPLICATE (mode, x)));
9806       return;
9807     }
9808 
9809   /* One field is non-constant.  Load constant then overwrite varying
9810      field.  This is more efficient than using the stack.  */
9811   if (n_var == 1)
9812     {
9813       rtx copy = copy_rtx (vals);
9814       rtx index = GEN_INT (one_var);
9815 
9816       /* Load constant part of vector, substitute neighboring value for
9817 	 varying element.  */
9818       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9819       neon_expand_vector_init (target, copy);
9820 
9821       /* Insert variable.  */
9822       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9823       switch (mode)
9824 	{
9825 	case V8QImode:
9826 	  emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9827 	  break;
9828 	case V16QImode:
9829 	  emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9830 	  break;
9831 	case V4HImode:
9832 	  emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9833 	  break;
9834 	case V8HImode:
9835 	  emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9836 	  break;
9837 	case V2SImode:
9838 	  emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9839 	  break;
9840 	case V4SImode:
9841 	  emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9842 	  break;
9843 	case V2SFmode:
9844 	  emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9845 	  break;
9846 	case V4SFmode:
9847 	  emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9848 	  break;
9849 	case V2DImode:
9850 	  emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9851 	  break;
9852 	default:
9853 	  gcc_unreachable ();
9854 	}
9855       return;
9856     }
9857 
9858   /* Construct the vector in memory one field at a time
9859      and load the whole vector.  */
9860   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9861   for (i = 0; i < n_elts; i++)
9862     emit_move_insn (adjust_address_nv (mem, inner_mode,
9863 				    i * GET_MODE_SIZE (inner_mode)),
9864 		    XVECEXP (vals, 0, i));
9865   emit_move_insn (target, mem);
9866 }
9867 
9868 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
9869    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
9870    reported source locations are bogus.  */
9871 
9872 static void
9873 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9874 	      const char *err)
9875 {
9876   HOST_WIDE_INT lane;
9877 
9878   gcc_assert (CONST_INT_P (operand));
9879 
9880   lane = INTVAL (operand);
9881 
9882   if (lane < low || lane >= high)
9883     error (err);
9884 }
9885 
9886 /* Bounds-check lanes.  */
9887 
9888 void
9889 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9890 {
9891   bounds_check (operand, low, high, "lane out of range");
9892 }
9893 
9894 /* Bounds-check constants.  */
9895 
9896 void
9897 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9898 {
9899   bounds_check (operand, low, high, "constant out of range");
9900 }
9901 
9902 HOST_WIDE_INT
9903 neon_element_bits (enum machine_mode mode)
9904 {
9905   if (mode == DImode)
9906     return GET_MODE_BITSIZE (mode);
9907   else
9908     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9909 }
9910 
9911 
9912 /* Predicates for `match_operand' and `match_operator'.  */
9913 
9914 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9915    WB is true if full writeback address modes are allowed and is false
9916    if limited writeback address modes (POST_INC and PRE_DEC) are
9917    allowed.  */
9918 
9919 int
9920 arm_coproc_mem_operand (rtx op, bool wb)
9921 {
9922   rtx ind;
9923 
9924   /* Reject eliminable registers.  */
9925   if (! (reload_in_progress || reload_completed)
9926       && (   reg_mentioned_p (frame_pointer_rtx, op)
9927 	  || reg_mentioned_p (arg_pointer_rtx, op)
9928 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
9929 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9930 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9931 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9932     return FALSE;
9933 
9934   /* Constants are converted into offsets from labels.  */
9935   if (!MEM_P (op))
9936     return FALSE;
9937 
9938   ind = XEXP (op, 0);
9939 
9940   if (reload_completed
9941       && (GET_CODE (ind) == LABEL_REF
9942 	  || (GET_CODE (ind) == CONST
9943 	      && GET_CODE (XEXP (ind, 0)) == PLUS
9944 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9945 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9946     return TRUE;
9947 
9948   /* Match: (mem (reg)).  */
9949   if (REG_P (ind))
9950     return arm_address_register_rtx_p (ind, 0);
9951 
9952   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
9953      acceptable in any case (subject to verification by
9954      arm_address_register_rtx_p).  We need WB to be true to accept
9955      PRE_INC and POST_DEC.  */
9956   if (GET_CODE (ind) == POST_INC
9957       || GET_CODE (ind) == PRE_DEC
9958       || (wb
9959 	  && (GET_CODE (ind) == PRE_INC
9960 	      || GET_CODE (ind) == POST_DEC)))
9961     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9962 
9963   if (wb
9964       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9965       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9966       && GET_CODE (XEXP (ind, 1)) == PLUS
9967       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9968     ind = XEXP (ind, 1);
9969 
9970   /* Match:
9971      (plus (reg)
9972 	   (const)).  */
9973   if (GET_CODE (ind) == PLUS
9974       && REG_P (XEXP (ind, 0))
9975       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9976       && CONST_INT_P (XEXP (ind, 1))
9977       && INTVAL (XEXP (ind, 1)) > -1024
9978       && INTVAL (XEXP (ind, 1)) <  1024
9979       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9980     return TRUE;
9981 
9982   return FALSE;
9983 }
9984 
9985 /* Return TRUE if OP is a memory operand which we can load or store a vector
9986    to/from. TYPE is one of the following values:
9987     0 - Vector load/stor (vldr)
9988     1 - Core registers (ldm)
9989     2 - Element/structure loads (vld1)
9990  */
9991 int
9992 neon_vector_mem_operand (rtx op, int type)
9993 {
9994   rtx ind;
9995 
9996   /* Reject eliminable registers.  */
9997   if (! (reload_in_progress || reload_completed)
9998       && (   reg_mentioned_p (frame_pointer_rtx, op)
9999 	  || reg_mentioned_p (arg_pointer_rtx, op)
10000 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
10001 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10002 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10003 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10004     return FALSE;
10005 
10006   /* Constants are converted into offsets from labels.  */
10007   if (!MEM_P (op))
10008     return FALSE;
10009 
10010   ind = XEXP (op, 0);
10011 
10012   if (reload_completed
10013       && (GET_CODE (ind) == LABEL_REF
10014 	  || (GET_CODE (ind) == CONST
10015 	      && GET_CODE (XEXP (ind, 0)) == PLUS
10016 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10017 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10018     return TRUE;
10019 
10020   /* Match: (mem (reg)).  */
10021   if (REG_P (ind))
10022     return arm_address_register_rtx_p (ind, 0);
10023 
10024   /* Allow post-increment with Neon registers.  */
10025   if ((type != 1 && GET_CODE (ind) == POST_INC)
10026       || (type == 0 && GET_CODE (ind) == PRE_DEC))
10027     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10028 
10029   /* FIXME: vld1 allows register post-modify.  */
10030 
10031   /* Match:
10032      (plus (reg)
10033           (const)).  */
10034   if (type == 0
10035       && GET_CODE (ind) == PLUS
10036       && REG_P (XEXP (ind, 0))
10037       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
10038       && CONST_INT_P (XEXP (ind, 1))
10039       && INTVAL (XEXP (ind, 1)) > -1024
10040       /* For quad modes, we restrict the constant offset to be slightly less
10041 	 than what the instruction format permits.  We have no such constraint
10042 	 on double mode offsets.  (This must match arm_legitimate_index_p.)  */
10043       && (INTVAL (XEXP (ind, 1))
10044 	  < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
10045       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10046     return TRUE;
10047 
10048   return FALSE;
10049 }
10050 
10051 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
10052    type.  */
10053 int
10054 neon_struct_mem_operand (rtx op)
10055 {
10056   rtx ind;
10057 
10058   /* Reject eliminable registers.  */
10059   if (! (reload_in_progress || reload_completed)
10060       && (   reg_mentioned_p (frame_pointer_rtx, op)
10061 	  || reg_mentioned_p (arg_pointer_rtx, op)
10062 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
10063 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10064 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10065 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10066     return FALSE;
10067 
10068   /* Constants are converted into offsets from labels.  */
10069   if (!MEM_P (op))
10070     return FALSE;
10071 
10072   ind = XEXP (op, 0);
10073 
10074   if (reload_completed
10075       && (GET_CODE (ind) == LABEL_REF
10076 	  || (GET_CODE (ind) == CONST
10077 	      && GET_CODE (XEXP (ind, 0)) == PLUS
10078 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10079 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10080     return TRUE;
10081 
10082   /* Match: (mem (reg)).  */
10083   if (REG_P (ind))
10084     return arm_address_register_rtx_p (ind, 0);
10085 
10086   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
10087   if (GET_CODE (ind) == POST_INC
10088       || GET_CODE (ind) == PRE_DEC)
10089     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10090 
10091   return FALSE;
10092 }
10093 
10094 /* Return true if X is a register that will be eliminated later on.  */
10095 int
10096 arm_eliminable_register (rtx x)
10097 {
10098   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
10099 		       || REGNO (x) == ARG_POINTER_REGNUM
10100 		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
10101 			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
10102 }
10103 
10104 /* Return GENERAL_REGS if a scratch register required to reload x to/from
10105    coprocessor registers.  Otherwise return NO_REGS.  */
10106 
10107 enum reg_class
10108 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
10109 {
10110   if (mode == HFmode)
10111     {
10112       if (!TARGET_NEON_FP16)
10113 	return GENERAL_REGS;
10114       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
10115 	return NO_REGS;
10116       return GENERAL_REGS;
10117     }
10118 
10119   /* The neon move patterns handle all legitimate vector and struct
10120      addresses.  */
10121   if (TARGET_NEON
10122       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
10123       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10124 	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
10125 	  || VALID_NEON_STRUCT_MODE (mode)))
10126     return NO_REGS;
10127 
10128   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
10129     return NO_REGS;
10130 
10131   return GENERAL_REGS;
10132 }
10133 
10134 /* Values which must be returned in the most-significant end of the return
10135    register.  */
10136 
10137 static bool
10138 arm_return_in_msb (const_tree valtype)
10139 {
10140   return (TARGET_AAPCS_BASED
10141           && BYTES_BIG_ENDIAN
10142 	  && (AGGREGATE_TYPE_P (valtype)
10143 	      || TREE_CODE (valtype) == COMPLEX_TYPE
10144 	      || FIXED_POINT_TYPE_P (valtype)));
10145 }
10146 
10147 /* Return TRUE if X references a SYMBOL_REF.  */
10148 int
10149 symbol_mentioned_p (rtx x)
10150 {
10151   const char * fmt;
10152   int i;
10153 
10154   if (GET_CODE (x) == SYMBOL_REF)
10155     return 1;
10156 
10157   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
10158      are constant offsets, not symbols.  */
10159   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10160     return 0;
10161 
10162   fmt = GET_RTX_FORMAT (GET_CODE (x));
10163 
10164   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10165     {
10166       if (fmt[i] == 'E')
10167 	{
10168 	  int j;
10169 
10170 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10171 	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
10172 	      return 1;
10173 	}
10174       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
10175 	return 1;
10176     }
10177 
10178   return 0;
10179 }
10180 
10181 /* Return TRUE if X references a LABEL_REF.  */
10182 int
10183 label_mentioned_p (rtx x)
10184 {
10185   const char * fmt;
10186   int i;
10187 
10188   if (GET_CODE (x) == LABEL_REF)
10189     return 1;
10190 
10191   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
10192      instruction, but they are constant offsets, not symbols.  */
10193   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10194     return 0;
10195 
10196   fmt = GET_RTX_FORMAT (GET_CODE (x));
10197   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10198     {
10199       if (fmt[i] == 'E')
10200 	{
10201 	  int j;
10202 
10203 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10204 	    if (label_mentioned_p (XVECEXP (x, i, j)))
10205 	      return 1;
10206 	}
10207       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
10208 	return 1;
10209     }
10210 
10211   return 0;
10212 }
10213 
10214 int
10215 tls_mentioned_p (rtx x)
10216 {
10217   switch (GET_CODE (x))
10218     {
10219     case CONST:
10220       return tls_mentioned_p (XEXP (x, 0));
10221 
10222     case UNSPEC:
10223       if (XINT (x, 1) == UNSPEC_TLS)
10224 	return 1;
10225 
10226     default:
10227       return 0;
10228     }
10229 }
10230 
10231 /* Must not copy any rtx that uses a pc-relative address.  */
10232 
10233 static int
10234 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10235 {
10236   if (GET_CODE (*x) == UNSPEC
10237       && (XINT (*x, 1) == UNSPEC_PIC_BASE
10238 	  || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10239     return 1;
10240   return 0;
10241 }
10242 
10243 static bool
10244 arm_cannot_copy_insn_p (rtx insn)
10245 {
10246   /* The tls call insn cannot be copied, as it is paired with a data
10247      word.  */
10248   if (recog_memoized (insn) == CODE_FOR_tlscall)
10249     return true;
10250 
10251   return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10252 }
10253 
10254 enum rtx_code
10255 minmax_code (rtx x)
10256 {
10257   enum rtx_code code = GET_CODE (x);
10258 
10259   switch (code)
10260     {
10261     case SMAX:
10262       return GE;
10263     case SMIN:
10264       return LE;
10265     case UMIN:
10266       return LEU;
10267     case UMAX:
10268       return GEU;
10269     default:
10270       gcc_unreachable ();
10271     }
10272 }
10273 
10274 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
10275 
10276 bool
10277 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
10278 			int *mask, bool *signed_sat)
10279 {
10280   /* The high bound must be a power of two minus one.  */
10281   int log = exact_log2 (INTVAL (hi_bound) + 1);
10282   if (log == -1)
10283     return false;
10284 
10285   /* The low bound is either zero (for usat) or one less than the
10286      negation of the high bound (for ssat).  */
10287   if (INTVAL (lo_bound) == 0)
10288     {
10289       if (mask)
10290         *mask = log;
10291       if (signed_sat)
10292         *signed_sat = false;
10293 
10294       return true;
10295     }
10296 
10297   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
10298     {
10299       if (mask)
10300         *mask = log + 1;
10301       if (signed_sat)
10302         *signed_sat = true;
10303 
10304       return true;
10305     }
10306 
10307   return false;
10308 }
10309 
10310 /* Return 1 if memory locations are adjacent.  */
10311 int
10312 adjacent_mem_locations (rtx a, rtx b)
10313 {
10314   /* We don't guarantee to preserve the order of these memory refs.  */
10315   if (volatile_refs_p (a) || volatile_refs_p (b))
10316     return 0;
10317 
10318   if ((REG_P (XEXP (a, 0))
10319        || (GET_CODE (XEXP (a, 0)) == PLUS
10320 	   && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
10321       && (REG_P (XEXP (b, 0))
10322 	  || (GET_CODE (XEXP (b, 0)) == PLUS
10323 	      && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
10324     {
10325       HOST_WIDE_INT val0 = 0, val1 = 0;
10326       rtx reg0, reg1;
10327       int val_diff;
10328 
10329       if (GET_CODE (XEXP (a, 0)) == PLUS)
10330         {
10331 	  reg0 = XEXP (XEXP (a, 0), 0);
10332 	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10333         }
10334       else
10335 	reg0 = XEXP (a, 0);
10336 
10337       if (GET_CODE (XEXP (b, 0)) == PLUS)
10338         {
10339 	  reg1 = XEXP (XEXP (b, 0), 0);
10340 	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10341         }
10342       else
10343 	reg1 = XEXP (b, 0);
10344 
10345       /* Don't accept any offset that will require multiple
10346 	 instructions to handle, since this would cause the
10347 	 arith_adjacentmem pattern to output an overlong sequence.  */
10348       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10349 	return 0;
10350 
10351       /* Don't allow an eliminable register: register elimination can make
10352 	 the offset too large.  */
10353       if (arm_eliminable_register (reg0))
10354 	return 0;
10355 
10356       val_diff = val1 - val0;
10357 
10358       if (arm_ld_sched)
10359 	{
10360 	  /* If the target has load delay slots, then there's no benefit
10361 	     to using an ldm instruction unless the offset is zero and
10362 	     we are optimizing for size.  */
10363 	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10364 		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10365 		  && (val_diff == 4 || val_diff == -4));
10366 	}
10367 
10368       return ((REGNO (reg0) == REGNO (reg1))
10369 	      && (val_diff == 4 || val_diff == -4));
10370     }
10371 
10372   return 0;
10373 }
10374 
10375 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
10376    for load operations, false for store operations.  CONSECUTIVE is true
10377    if the register numbers in the operation must be consecutive in the register
10378    bank. RETURN_PC is true if value is to be loaded in PC.
10379    The pattern we are trying to match for load is:
10380      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10381       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10382        :
10383        :
10384       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10385      ]
10386      where
10387      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10388      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10389      3.  If consecutive is TRUE, then for kth register being loaded,
10390          REGNO (R_dk) = REGNO (R_d0) + k.
10391    The pattern for store is similar.  */
10392 bool
10393 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
10394                      bool consecutive, bool return_pc)
10395 {
10396   HOST_WIDE_INT count = XVECLEN (op, 0);
10397   rtx reg, mem, addr;
10398   unsigned regno;
10399   unsigned first_regno;
10400   HOST_WIDE_INT i = 1, base = 0, offset = 0;
10401   rtx elt;
10402   bool addr_reg_in_reglist = false;
10403   bool update = false;
10404   int reg_increment;
10405   int offset_adj;
10406   int regs_per_val;
10407 
10408   /* If not in SImode, then registers must be consecutive
10409      (e.g., VLDM instructions for DFmode).  */
10410   gcc_assert ((mode == SImode) || consecutive);
10411   /* Setting return_pc for stores is illegal.  */
10412   gcc_assert (!return_pc || load);
10413 
10414   /* Set up the increments and the regs per val based on the mode.  */
10415   reg_increment = GET_MODE_SIZE (mode);
10416   regs_per_val = reg_increment / 4;
10417   offset_adj = return_pc ? 1 : 0;
10418 
10419   if (count <= 1
10420       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10421       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10422     return false;
10423 
10424   /* Check if this is a write-back.  */
10425   elt = XVECEXP (op, 0, offset_adj);
10426   if (GET_CODE (SET_SRC (elt)) == PLUS)
10427     {
10428       i++;
10429       base = 1;
10430       update = true;
10431 
10432       /* The offset adjustment must be the number of registers being
10433          popped times the size of a single register.  */
10434       if (!REG_P (SET_DEST (elt))
10435           || !REG_P (XEXP (SET_SRC (elt), 0))
10436           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10437           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10438           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10439              ((count - 1 - offset_adj) * reg_increment))
10440         return false;
10441     }
10442 
10443   i = i + offset_adj;
10444   base = base + offset_adj;
10445   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10446      success depends on the type: VLDM can do just one reg,
10447      LDM must do at least two.  */
10448   if ((count <= i) && (mode == SImode))
10449       return false;
10450 
10451   elt = XVECEXP (op, 0, i - 1);
10452   if (GET_CODE (elt) != SET)
10453     return false;
10454 
10455   if (load)
10456     {
10457       reg = SET_DEST (elt);
10458       mem = SET_SRC (elt);
10459     }
10460   else
10461     {
10462       reg = SET_SRC (elt);
10463       mem = SET_DEST (elt);
10464     }
10465 
10466   if (!REG_P (reg) || !MEM_P (mem))
10467     return false;
10468 
10469   regno = REGNO (reg);
10470   first_regno = regno;
10471   addr = XEXP (mem, 0);
10472   if (GET_CODE (addr) == PLUS)
10473     {
10474       if (!CONST_INT_P (XEXP (addr, 1)))
10475 	return false;
10476 
10477       offset = INTVAL (XEXP (addr, 1));
10478       addr = XEXP (addr, 0);
10479     }
10480 
10481   if (!REG_P (addr))
10482     return false;
10483 
10484   /* Don't allow SP to be loaded unless it is also the base register. It
10485      guarantees that SP is reset correctly when an LDM instruction
10486      is interruptted. Otherwise, we might end up with a corrupt stack.  */
10487   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10488     return false;
10489 
10490   for (; i < count; i++)
10491     {
10492       elt = XVECEXP (op, 0, i);
10493       if (GET_CODE (elt) != SET)
10494         return false;
10495 
10496       if (load)
10497         {
10498           reg = SET_DEST (elt);
10499           mem = SET_SRC (elt);
10500         }
10501       else
10502         {
10503           reg = SET_SRC (elt);
10504           mem = SET_DEST (elt);
10505         }
10506 
10507       if (!REG_P (reg)
10508           || GET_MODE (reg) != mode
10509           || REGNO (reg) <= regno
10510           || (consecutive
10511               && (REGNO (reg) !=
10512                   (unsigned int) (first_regno + regs_per_val * (i - base))))
10513           /* Don't allow SP to be loaded unless it is also the base register. It
10514              guarantees that SP is reset correctly when an LDM instruction
10515              is interrupted. Otherwise, we might end up with a corrupt stack.  */
10516           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10517           || !MEM_P (mem)
10518           || GET_MODE (mem) != mode
10519           || ((GET_CODE (XEXP (mem, 0)) != PLUS
10520 	       || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10521 	       || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10522 	       || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10523                    offset + (i - base) * reg_increment))
10524 	      && (!REG_P (XEXP (mem, 0))
10525 		  || offset + (i - base) * reg_increment != 0)))
10526         return false;
10527 
10528       regno = REGNO (reg);
10529       if (regno == REGNO (addr))
10530         addr_reg_in_reglist = true;
10531     }
10532 
10533   if (load)
10534     {
10535       if (update && addr_reg_in_reglist)
10536         return false;
10537 
10538       /* For Thumb-1, address register is always modified - either by write-back
10539          or by explicit load.  If the pattern does not describe an update,
10540          then the address register must be in the list of loaded registers.  */
10541       if (TARGET_THUMB1)
10542         return update || addr_reg_in_reglist;
10543     }
10544 
10545   return true;
10546 }
10547 
10548 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10549    or stores (depending on IS_STORE) into a load-multiple or store-multiple
10550    instruction.  ADD_OFFSET is nonzero if the base address register needs
10551    to be modified with an add instruction before we can use it.  */
10552 
10553 static bool
10554 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10555 				 int nops, HOST_WIDE_INT add_offset)
10556  {
10557   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10558      if the offset isn't small enough.  The reason 2 ldrs are faster
10559      is because these ARMs are able to do more than one cache access
10560      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
10561      whilst the ARM8 has a double bandwidth cache.  This means that
10562      these cores can do both an instruction fetch and a data fetch in
10563      a single cycle, so the trick of calculating the address into a
10564      scratch register (one of the result regs) and then doing a load
10565      multiple actually becomes slower (and no smaller in code size).
10566      That is the transformation
10567 
10568  	ldr	rd1, [rbase + offset]
10569  	ldr	rd2, [rbase + offset + 4]
10570 
10571      to
10572 
10573  	add	rd1, rbase, offset
10574  	ldmia	rd1, {rd1, rd2}
10575 
10576      produces worse code -- '3 cycles + any stalls on rd2' instead of
10577      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
10578      access per cycle, the first sequence could never complete in less
10579      than 6 cycles, whereas the ldm sequence would only take 5 and
10580      would make better use of sequential accesses if not hitting the
10581      cache.
10582 
10583      We cheat here and test 'arm_ld_sched' which we currently know to
10584      only be true for the ARM8, ARM9 and StrongARM.  If this ever
10585      changes, then the test below needs to be reworked.  */
10586   if (nops == 2 && arm_ld_sched && add_offset != 0)
10587     return false;
10588 
10589   /* XScale has load-store double instructions, but they have stricter
10590      alignment requirements than load-store multiple, so we cannot
10591      use them.
10592 
10593      For XScale ldm requires 2 + NREGS cycles to complete and blocks
10594      the pipeline until completion.
10595 
10596 	NREGS		CYCLES
10597 	  1		  3
10598 	  2		  4
10599 	  3		  5
10600 	  4		  6
10601 
10602      An ldr instruction takes 1-3 cycles, but does not block the
10603      pipeline.
10604 
10605 	NREGS		CYCLES
10606 	  1		 1-3
10607 	  2		 2-6
10608 	  3		 3-9
10609 	  4		 4-12
10610 
10611      Best case ldr will always win.  However, the more ldr instructions
10612      we issue, the less likely we are to be able to schedule them well.
10613      Using ldr instructions also increases code size.
10614 
10615      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10616      for counts of 3 or 4 regs.  */
10617   if (nops <= 2 && arm_tune_xscale && !optimize_size)
10618     return false;
10619   return true;
10620 }
10621 
10622 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10623    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10624    an array ORDER which describes the sequence to use when accessing the
10625    offsets that produces an ascending order.  In this sequence, each
10626    offset must be larger by exactly 4 than the previous one.  ORDER[0]
10627    must have been filled in with the lowest offset by the caller.
10628    If UNSORTED_REGS is nonnull, it is an array of register numbers that
10629    we use to verify that ORDER produces an ascending order of registers.
10630    Return true if it was possible to construct such an order, false if
10631    not.  */
10632 
10633 static bool
10634 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10635 		      int *unsorted_regs)
10636 {
10637   int i;
10638   for (i = 1; i < nops; i++)
10639     {
10640       int j;
10641 
10642       order[i] = order[i - 1];
10643       for (j = 0; j < nops; j++)
10644 	if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10645 	  {
10646 	    /* We must find exactly one offset that is higher than the
10647 	       previous one by 4.  */
10648 	    if (order[i] != order[i - 1])
10649 	      return false;
10650 	    order[i] = j;
10651 	  }
10652       if (order[i] == order[i - 1])
10653 	return false;
10654       /* The register numbers must be ascending.  */
10655       if (unsorted_regs != NULL
10656 	  && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10657 	return false;
10658     }
10659   return true;
10660 }
10661 
10662 /* Used to determine in a peephole whether a sequence of load
10663    instructions can be changed into a load-multiple instruction.
10664    NOPS is the number of separate load instructions we are examining.  The
10665    first NOPS entries in OPERANDS are the destination registers, the
10666    next NOPS entries are memory operands.  If this function is
10667    successful, *BASE is set to the common base register of the memory
10668    accesses; *LOAD_OFFSET is set to the first memory location's offset
10669    from that base register.
10670    REGS is an array filled in with the destination register numbers.
10671    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10672    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
10673    the sequence of registers in REGS matches the loads from ascending memory
10674    locations, and the function verifies that the register numbers are
10675    themselves ascending.  If CHECK_REGS is false, the register numbers
10676    are stored in the order they are found in the operands.  */
10677 static int
10678 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10679 			int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10680 {
10681   int unsorted_regs[MAX_LDM_STM_OPS];
10682   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10683   int order[MAX_LDM_STM_OPS];
10684   rtx base_reg_rtx = NULL;
10685   int base_reg = -1;
10686   int i, ldm_case;
10687 
10688   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10689      easily extended if required.  */
10690   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10691 
10692   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10693 
10694   /* Loop over the operands and check that the memory references are
10695      suitable (i.e. immediate offsets from the same base register).  At
10696      the same time, extract the target register, and the memory
10697      offsets.  */
10698   for (i = 0; i < nops; i++)
10699     {
10700       rtx reg;
10701       rtx offset;
10702 
10703       /* Convert a subreg of a mem into the mem itself.  */
10704       if (GET_CODE (operands[nops + i]) == SUBREG)
10705 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
10706 
10707       gcc_assert (MEM_P (operands[nops + i]));
10708 
10709       /* Don't reorder volatile memory references; it doesn't seem worth
10710 	 looking for the case where the order is ok anyway.  */
10711       if (MEM_VOLATILE_P (operands[nops + i]))
10712 	return 0;
10713 
10714       offset = const0_rtx;
10715 
10716       if ((REG_P (reg = XEXP (operands[nops + i], 0))
10717 	   || (GET_CODE (reg) == SUBREG
10718 	       && REG_P (reg = SUBREG_REG (reg))))
10719 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10720 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10721 		  || (GET_CODE (reg) == SUBREG
10722 		      && REG_P (reg = SUBREG_REG (reg))))
10723 	      && (CONST_INT_P (offset
10724 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
10725 	{
10726 	  if (i == 0)
10727 	    {
10728 	      base_reg = REGNO (reg);
10729 	      base_reg_rtx = reg;
10730 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10731 		return 0;
10732 	    }
10733 	  else if (base_reg != (int) REGNO (reg))
10734 	    /* Not addressed from the same base register.  */
10735 	    return 0;
10736 
10737 	  unsorted_regs[i] = (REG_P (operands[i])
10738 			      ? REGNO (operands[i])
10739 			      : REGNO (SUBREG_REG (operands[i])));
10740 
10741 	  /* If it isn't an integer register, or if it overwrites the
10742 	     base register but isn't the last insn in the list, then
10743 	     we can't do this.  */
10744 	  if (unsorted_regs[i] < 0
10745 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10746 	      || unsorted_regs[i] > 14
10747 	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
10748 	    return 0;
10749 
10750           /* Don't allow SP to be loaded unless it is also the base
10751              register.  It guarantees that SP is reset correctly when
10752              an LDM instruction is interrupted.  Otherwise, we might
10753              end up with a corrupt stack.  */
10754           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
10755             return 0;
10756 
10757 	  unsorted_offsets[i] = INTVAL (offset);
10758 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10759 	    order[0] = i;
10760 	}
10761       else
10762 	/* Not a suitable memory address.  */
10763 	return 0;
10764     }
10765 
10766   /* All the useful information has now been extracted from the
10767      operands into unsorted_regs and unsorted_offsets; additionally,
10768      order[0] has been set to the lowest offset in the list.  Sort
10769      the offsets into order, verifying that they are adjacent, and
10770      check that the register numbers are ascending.  */
10771   if (!compute_offset_order (nops, unsorted_offsets, order,
10772 			     check_regs ? unsorted_regs : NULL))
10773     return 0;
10774 
10775   if (saved_order)
10776     memcpy (saved_order, order, sizeof order);
10777 
10778   if (base)
10779     {
10780       *base = base_reg;
10781 
10782       for (i = 0; i < nops; i++)
10783 	regs[i] = unsorted_regs[check_regs ? order[i] : i];
10784 
10785       *load_offset = unsorted_offsets[order[0]];
10786     }
10787 
10788   if (TARGET_THUMB1
10789       && !peep2_reg_dead_p (nops, base_reg_rtx))
10790     return 0;
10791 
10792   if (unsorted_offsets[order[0]] == 0)
10793     ldm_case = 1; /* ldmia */
10794   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10795     ldm_case = 2; /* ldmib */
10796   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10797     ldm_case = 3; /* ldmda */
10798   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10799     ldm_case = 4; /* ldmdb */
10800   else if (const_ok_for_arm (unsorted_offsets[order[0]])
10801 	   || const_ok_for_arm (-unsorted_offsets[order[0]]))
10802     ldm_case = 5;
10803   else
10804     return 0;
10805 
10806   if (!multiple_operation_profitable_p (false, nops,
10807 					ldm_case == 5
10808 					? unsorted_offsets[order[0]] : 0))
10809     return 0;
10810 
10811   return ldm_case;
10812 }
10813 
10814 /* Used to determine in a peephole whether a sequence of store instructions can
10815    be changed into a store-multiple instruction.
10816    NOPS is the number of separate store instructions we are examining.
10817    NOPS_TOTAL is the total number of instructions recognized by the peephole
10818    pattern.
10819    The first NOPS entries in OPERANDS are the source registers, the next
10820    NOPS entries are memory operands.  If this function is successful, *BASE is
10821    set to the common base register of the memory accesses; *LOAD_OFFSET is set
10822    to the first memory location's offset from that base register.  REGS is an
10823    array filled in with the source register numbers, REG_RTXS (if nonnull) is
10824    likewise filled with the corresponding rtx's.
10825    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10826    numbers to an ascending order of stores.
10827    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10828    from ascending memory locations, and the function verifies that the register
10829    numbers are themselves ascending.  If CHECK_REGS is false, the register
10830    numbers are stored in the order they are found in the operands.  */
10831 static int
10832 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10833 			 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10834 			 HOST_WIDE_INT *load_offset, bool check_regs)
10835 {
10836   int unsorted_regs[MAX_LDM_STM_OPS];
10837   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10838   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10839   int order[MAX_LDM_STM_OPS];
10840   int base_reg = -1;
10841   rtx base_reg_rtx = NULL;
10842   int i, stm_case;
10843 
10844   /* Write back of base register is currently only supported for Thumb 1.  */
10845   int base_writeback = TARGET_THUMB1;
10846 
10847   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10848      easily extended if required.  */
10849   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10850 
10851   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10852 
10853   /* Loop over the operands and check that the memory references are
10854      suitable (i.e. immediate offsets from the same base register).  At
10855      the same time, extract the target register, and the memory
10856      offsets.  */
10857   for (i = 0; i < nops; i++)
10858     {
10859       rtx reg;
10860       rtx offset;
10861 
10862       /* Convert a subreg of a mem into the mem itself.  */
10863       if (GET_CODE (operands[nops + i]) == SUBREG)
10864 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
10865 
10866       gcc_assert (MEM_P (operands[nops + i]));
10867 
10868       /* Don't reorder volatile memory references; it doesn't seem worth
10869 	 looking for the case where the order is ok anyway.  */
10870       if (MEM_VOLATILE_P (operands[nops + i]))
10871 	return 0;
10872 
10873       offset = const0_rtx;
10874 
10875       if ((REG_P (reg = XEXP (operands[nops + i], 0))
10876 	   || (GET_CODE (reg) == SUBREG
10877 	       && REG_P (reg = SUBREG_REG (reg))))
10878 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10879 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10880 		  || (GET_CODE (reg) == SUBREG
10881 		      && REG_P (reg = SUBREG_REG (reg))))
10882 	      && (CONST_INT_P (offset
10883 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
10884 	{
10885 	  unsorted_reg_rtxs[i] = (REG_P (operands[i])
10886 				  ? operands[i] : SUBREG_REG (operands[i]));
10887 	  unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10888 
10889 	  if (i == 0)
10890 	    {
10891 	      base_reg = REGNO (reg);
10892 	      base_reg_rtx = reg;
10893 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10894 		return 0;
10895 	    }
10896 	  else if (base_reg != (int) REGNO (reg))
10897 	    /* Not addressed from the same base register.  */
10898 	    return 0;
10899 
10900 	  /* If it isn't an integer register, then we can't do this.  */
10901 	  if (unsorted_regs[i] < 0
10902 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10903 	      /* The effects are unpredictable if the base register is
10904 		 both updated and stored.  */
10905 	      || (base_writeback && unsorted_regs[i] == base_reg)
10906 	      || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10907 	      || unsorted_regs[i] > 14)
10908 	    return 0;
10909 
10910 	  unsorted_offsets[i] = INTVAL (offset);
10911 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10912 	    order[0] = i;
10913 	}
10914       else
10915 	/* Not a suitable memory address.  */
10916 	return 0;
10917     }
10918 
10919   /* All the useful information has now been extracted from the
10920      operands into unsorted_regs and unsorted_offsets; additionally,
10921      order[0] has been set to the lowest offset in the list.  Sort
10922      the offsets into order, verifying that they are adjacent, and
10923      check that the register numbers are ascending.  */
10924   if (!compute_offset_order (nops, unsorted_offsets, order,
10925 			     check_regs ? unsorted_regs : NULL))
10926     return 0;
10927 
10928   if (saved_order)
10929     memcpy (saved_order, order, sizeof order);
10930 
10931   if (base)
10932     {
10933       *base = base_reg;
10934 
10935       for (i = 0; i < nops; i++)
10936 	{
10937 	  regs[i] = unsorted_regs[check_regs ? order[i] : i];
10938 	  if (reg_rtxs)
10939 	    reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10940 	}
10941 
10942       *load_offset = unsorted_offsets[order[0]];
10943     }
10944 
10945   if (TARGET_THUMB1
10946       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10947     return 0;
10948 
10949   if (unsorted_offsets[order[0]] == 0)
10950     stm_case = 1; /* stmia */
10951   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10952     stm_case = 2; /* stmib */
10953   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10954     stm_case = 3; /* stmda */
10955   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10956     stm_case = 4; /* stmdb */
10957   else
10958     return 0;
10959 
10960   if (!multiple_operation_profitable_p (false, nops, 0))
10961     return 0;
10962 
10963   return stm_case;
10964 }
10965 
10966 /* Routines for use in generating RTL.  */
10967 
10968 /* Generate a load-multiple instruction.  COUNT is the number of loads in
10969    the instruction; REGS and MEMS are arrays containing the operands.
10970    BASEREG is the base register to be used in addressing the memory operands.
10971    WBACK_OFFSET is nonzero if the instruction should update the base
10972    register.  */
10973 
10974 static rtx
10975 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10976 			 HOST_WIDE_INT wback_offset)
10977 {
10978   int i = 0, j;
10979   rtx result;
10980 
10981   if (!multiple_operation_profitable_p (false, count, 0))
10982     {
10983       rtx seq;
10984 
10985       start_sequence ();
10986 
10987       for (i = 0; i < count; i++)
10988 	emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10989 
10990       if (wback_offset != 0)
10991 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10992 
10993       seq = get_insns ();
10994       end_sequence ();
10995 
10996       return seq;
10997     }
10998 
10999   result = gen_rtx_PARALLEL (VOIDmode,
11000 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11001   if (wback_offset != 0)
11002     {
11003       XVECEXP (result, 0, 0)
11004 	= gen_rtx_SET (VOIDmode, basereg,
11005 		       plus_constant (Pmode, basereg, wback_offset));
11006       i = 1;
11007       count++;
11008     }
11009 
11010   for (j = 0; i < count; i++, j++)
11011     XVECEXP (result, 0, i)
11012       = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
11013 
11014   return result;
11015 }
11016 
11017 /* Generate a store-multiple instruction.  COUNT is the number of stores in
11018    the instruction; REGS and MEMS are arrays containing the operands.
11019    BASEREG is the base register to be used in addressing the memory operands.
11020    WBACK_OFFSET is nonzero if the instruction should update the base
11021    register.  */
11022 
11023 static rtx
11024 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
11025 			  HOST_WIDE_INT wback_offset)
11026 {
11027   int i = 0, j;
11028   rtx result;
11029 
11030   if (GET_CODE (basereg) == PLUS)
11031     basereg = XEXP (basereg, 0);
11032 
11033   if (!multiple_operation_profitable_p (false, count, 0))
11034     {
11035       rtx seq;
11036 
11037       start_sequence ();
11038 
11039       for (i = 0; i < count; i++)
11040 	emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
11041 
11042       if (wback_offset != 0)
11043 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11044 
11045       seq = get_insns ();
11046       end_sequence ();
11047 
11048       return seq;
11049     }
11050 
11051   result = gen_rtx_PARALLEL (VOIDmode,
11052 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11053   if (wback_offset != 0)
11054     {
11055       XVECEXP (result, 0, 0)
11056 	= gen_rtx_SET (VOIDmode, basereg,
11057 		       plus_constant (Pmode, basereg, wback_offset));
11058       i = 1;
11059       count++;
11060     }
11061 
11062   for (j = 0; i < count; i++, j++)
11063     XVECEXP (result, 0, i)
11064       = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
11065 
11066   return result;
11067 }
11068 
11069 /* Generate either a load-multiple or a store-multiple instruction.  This
11070    function can be used in situations where we can start with a single MEM
11071    rtx and adjust its address upwards.
11072    COUNT is the number of operations in the instruction, not counting a
11073    possible update of the base register.  REGS is an array containing the
11074    register operands.
11075    BASEREG is the base register to be used in addressing the memory operands,
11076    which are constructed from BASEMEM.
11077    WRITE_BACK specifies whether the generated instruction should include an
11078    update of the base register.
11079    OFFSETP is used to pass an offset to and from this function; this offset
11080    is not used when constructing the address (instead BASEMEM should have an
11081    appropriate offset in its address), it is used only for setting
11082    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
11083 
11084 static rtx
11085 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
11086 		     bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
11087 {
11088   rtx mems[MAX_LDM_STM_OPS];
11089   HOST_WIDE_INT offset = *offsetp;
11090   int i;
11091 
11092   gcc_assert (count <= MAX_LDM_STM_OPS);
11093 
11094   if (GET_CODE (basereg) == PLUS)
11095     basereg = XEXP (basereg, 0);
11096 
11097   for (i = 0; i < count; i++)
11098     {
11099       rtx addr = plus_constant (Pmode, basereg, i * 4);
11100       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
11101       offset += 4;
11102     }
11103 
11104   if (write_back)
11105     *offsetp = offset;
11106 
11107   if (is_load)
11108     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
11109 				    write_back ? 4 * count : 0);
11110   else
11111     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
11112 				     write_back ? 4 * count : 0);
11113 }
11114 
11115 rtx
11116 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
11117 		       rtx basemem, HOST_WIDE_INT *offsetp)
11118 {
11119   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
11120 			      offsetp);
11121 }
11122 
11123 rtx
11124 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
11125 			rtx basemem, HOST_WIDE_INT *offsetp)
11126 {
11127   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
11128 			      offsetp);
11129 }
11130 
11131 /* Called from a peephole2 expander to turn a sequence of loads into an
11132    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
11133    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
11134    is true if we can reorder the registers because they are used commutatively
11135    subsequently.
11136    Returns true iff we could generate a new instruction.  */
11137 
11138 bool
11139 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
11140 {
11141   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11142   rtx mems[MAX_LDM_STM_OPS];
11143   int i, j, base_reg;
11144   rtx base_reg_rtx;
11145   HOST_WIDE_INT offset;
11146   int write_back = FALSE;
11147   int ldm_case;
11148   rtx addr;
11149 
11150   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
11151 				     &base_reg, &offset, !sort_regs);
11152 
11153   if (ldm_case == 0)
11154     return false;
11155 
11156   if (sort_regs)
11157     for (i = 0; i < nops - 1; i++)
11158       for (j = i + 1; j < nops; j++)
11159 	if (regs[i] > regs[j])
11160 	  {
11161 	    int t = regs[i];
11162 	    regs[i] = regs[j];
11163 	    regs[j] = t;
11164 	  }
11165   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11166 
11167   if (TARGET_THUMB1)
11168     {
11169       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
11170       gcc_assert (ldm_case == 1 || ldm_case == 5);
11171       write_back = TRUE;
11172     }
11173 
11174   if (ldm_case == 5)
11175     {
11176       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
11177       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
11178       offset = 0;
11179       if (!TARGET_THUMB1)
11180 	{
11181 	  base_reg = regs[0];
11182 	  base_reg_rtx = newbase;
11183 	}
11184     }
11185 
11186   for (i = 0; i < nops; i++)
11187     {
11188       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11189       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11190 					      SImode, addr, 0);
11191     }
11192   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
11193 				      write_back ? offset + i * 4 : 0));
11194   return true;
11195 }
11196 
11197 /* Called from a peephole2 expander to turn a sequence of stores into an
11198    STM instruction.  OPERANDS are the operands found by the peephole matcher;
11199    NOPS indicates how many separate stores we are trying to combine.
11200    Returns true iff we could generate a new instruction.  */
11201 
11202 bool
11203 gen_stm_seq (rtx *operands, int nops)
11204 {
11205   int i;
11206   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11207   rtx mems[MAX_LDM_STM_OPS];
11208   int base_reg;
11209   rtx base_reg_rtx;
11210   HOST_WIDE_INT offset;
11211   int write_back = FALSE;
11212   int stm_case;
11213   rtx addr;
11214   bool base_reg_dies;
11215 
11216   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
11217 				      mem_order, &base_reg, &offset, true);
11218 
11219   if (stm_case == 0)
11220     return false;
11221 
11222   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11223 
11224   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
11225   if (TARGET_THUMB1)
11226     {
11227       gcc_assert (base_reg_dies);
11228       write_back = TRUE;
11229     }
11230 
11231   if (stm_case == 5)
11232     {
11233       gcc_assert (base_reg_dies);
11234       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11235       offset = 0;
11236     }
11237 
11238   addr = plus_constant (Pmode, base_reg_rtx, offset);
11239 
11240   for (i = 0; i < nops; i++)
11241     {
11242       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11243       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11244 					      SImode, addr, 0);
11245     }
11246   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
11247 				       write_back ? offset + i * 4 : 0));
11248   return true;
11249 }
11250 
11251 /* Called from a peephole2 expander to turn a sequence of stores that are
11252    preceded by constant loads into an STM instruction.  OPERANDS are the
11253    operands found by the peephole matcher; NOPS indicates how many
11254    separate stores we are trying to combine; there are 2 * NOPS
11255    instructions in the peephole.
11256    Returns true iff we could generate a new instruction.  */
11257 
11258 bool
11259 gen_const_stm_seq (rtx *operands, int nops)
11260 {
11261   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
11262   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11263   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
11264   rtx mems[MAX_LDM_STM_OPS];
11265   int base_reg;
11266   rtx base_reg_rtx;
11267   HOST_WIDE_INT offset;
11268   int write_back = FALSE;
11269   int stm_case;
11270   rtx addr;
11271   bool base_reg_dies;
11272   int i, j;
11273   HARD_REG_SET allocated;
11274 
11275   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
11276 				      mem_order, &base_reg, &offset, false);
11277 
11278   if (stm_case == 0)
11279     return false;
11280 
11281   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
11282 
11283   /* If the same register is used more than once, try to find a free
11284      register.  */
11285   CLEAR_HARD_REG_SET (allocated);
11286   for (i = 0; i < nops; i++)
11287     {
11288       for (j = i + 1; j < nops; j++)
11289 	if (regs[i] == regs[j])
11290 	  {
11291 	    rtx t = peep2_find_free_register (0, nops * 2,
11292 					      TARGET_THUMB1 ? "l" : "r",
11293 					      SImode, &allocated);
11294 	    if (t == NULL_RTX)
11295 	      return false;
11296 	    reg_rtxs[i] = t;
11297 	    regs[i] = REGNO (t);
11298 	  }
11299     }
11300 
11301   /* Compute an ordering that maps the register numbers to an ascending
11302      sequence.  */
11303   reg_order[0] = 0;
11304   for (i = 0; i < nops; i++)
11305     if (regs[i] < regs[reg_order[0]])
11306       reg_order[0] = i;
11307 
11308   for (i = 1; i < nops; i++)
11309     {
11310       int this_order = reg_order[i - 1];
11311       for (j = 0; j < nops; j++)
11312 	if (regs[j] > regs[reg_order[i - 1]]
11313 	    && (this_order == reg_order[i - 1]
11314 		|| regs[j] < regs[this_order]))
11315 	  this_order = j;
11316       reg_order[i] = this_order;
11317     }
11318 
11319   /* Ensure that registers that must be live after the instruction end
11320      up with the correct value.  */
11321   for (i = 0; i < nops; i++)
11322     {
11323       int this_order = reg_order[i];
11324       if ((this_order != mem_order[i]
11325 	   || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
11326 	  && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
11327 	return false;
11328     }
11329 
11330   /* Load the constants.  */
11331   for (i = 0; i < nops; i++)
11332     {
11333       rtx op = operands[2 * nops + mem_order[i]];
11334       sorted_regs[i] = regs[reg_order[i]];
11335       emit_move_insn (reg_rtxs[reg_order[i]], op);
11336     }
11337 
11338   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11339 
11340   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
11341   if (TARGET_THUMB1)
11342     {
11343       gcc_assert (base_reg_dies);
11344       write_back = TRUE;
11345     }
11346 
11347   if (stm_case == 5)
11348     {
11349       gcc_assert (base_reg_dies);
11350       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11351       offset = 0;
11352     }
11353 
11354   addr = plus_constant (Pmode, base_reg_rtx, offset);
11355 
11356   for (i = 0; i < nops; i++)
11357     {
11358       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11359       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11360 					      SImode, addr, 0);
11361     }
11362   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
11363 				       write_back ? offset + i * 4 : 0));
11364   return true;
11365 }
11366 
11367 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
11368    unaligned copies on processors which support unaligned semantics for those
11369    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
11370    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
11371    An interleave factor of 1 (the minimum) will perform no interleaving.
11372    Load/store multiple are used for aligned addresses where possible.  */
11373 
11374 static void
11375 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
11376 				   HOST_WIDE_INT length,
11377 				   unsigned int interleave_factor)
11378 {
11379   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
11380   int *regnos = XALLOCAVEC (int, interleave_factor);
11381   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
11382   HOST_WIDE_INT i, j;
11383   HOST_WIDE_INT remaining = length, words;
11384   rtx halfword_tmp = NULL, byte_tmp = NULL;
11385   rtx dst, src;
11386   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
11387   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
11388   HOST_WIDE_INT srcoffset, dstoffset;
11389   HOST_WIDE_INT src_autoinc, dst_autoinc;
11390   rtx mem, addr;
11391 
11392   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
11393 
11394   /* Use hard registers if we have aligned source or destination so we can use
11395      load/store multiple with contiguous registers.  */
11396   if (dst_aligned || src_aligned)
11397     for (i = 0; i < interleave_factor; i++)
11398       regs[i] = gen_rtx_REG (SImode, i);
11399   else
11400     for (i = 0; i < interleave_factor; i++)
11401       regs[i] = gen_reg_rtx (SImode);
11402 
11403   dst = copy_addr_to_reg (XEXP (dstbase, 0));
11404   src = copy_addr_to_reg (XEXP (srcbase, 0));
11405 
11406   srcoffset = dstoffset = 0;
11407 
11408   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11409      For copying the last bytes we want to subtract this offset again.  */
11410   src_autoinc = dst_autoinc = 0;
11411 
11412   for (i = 0; i < interleave_factor; i++)
11413     regnos[i] = i;
11414 
11415   /* Copy BLOCK_SIZE_BYTES chunks.  */
11416 
11417   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11418     {
11419       /* Load words.  */
11420       if (src_aligned && interleave_factor > 1)
11421 	{
11422 	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11423 					    TRUE, srcbase, &srcoffset));
11424 	  src_autoinc += UNITS_PER_WORD * interleave_factor;
11425 	}
11426       else
11427 	{
11428 	  for (j = 0; j < interleave_factor; j++)
11429 	    {
11430 	      addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11431 						 - src_autoinc));
11432 	      mem = adjust_automodify_address (srcbase, SImode, addr,
11433 					       srcoffset + j * UNITS_PER_WORD);
11434 	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
11435 	    }
11436 	  srcoffset += block_size_bytes;
11437 	}
11438 
11439       /* Store words.  */
11440       if (dst_aligned && interleave_factor > 1)
11441 	{
11442 	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11443 					     TRUE, dstbase, &dstoffset));
11444 	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
11445 	}
11446       else
11447 	{
11448 	  for (j = 0; j < interleave_factor; j++)
11449 	    {
11450 	      addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11451 						 - dst_autoinc));
11452 	      mem = adjust_automodify_address (dstbase, SImode, addr,
11453 					       dstoffset + j * UNITS_PER_WORD);
11454 	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
11455 	    }
11456 	  dstoffset += block_size_bytes;
11457 	}
11458 
11459       remaining -= block_size_bytes;
11460     }
11461 
11462   /* Copy any whole words left (note these aren't interleaved with any
11463      subsequent halfword/byte load/stores in the interests of simplicity).  */
11464 
11465   words = remaining / UNITS_PER_WORD;
11466 
11467   gcc_assert (words < interleave_factor);
11468 
11469   if (src_aligned && words > 1)
11470     {
11471       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11472 					&srcoffset));
11473       src_autoinc += UNITS_PER_WORD * words;
11474     }
11475   else
11476     {
11477       for (j = 0; j < words; j++)
11478 	{
11479 	  addr = plus_constant (Pmode, src,
11480 				srcoffset + j * UNITS_PER_WORD - src_autoinc);
11481 	  mem = adjust_automodify_address (srcbase, SImode, addr,
11482 					   srcoffset + j * UNITS_PER_WORD);
11483 	  emit_insn (gen_unaligned_loadsi (regs[j], mem));
11484 	}
11485       srcoffset += words * UNITS_PER_WORD;
11486     }
11487 
11488   if (dst_aligned && words > 1)
11489     {
11490       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11491 					 &dstoffset));
11492       dst_autoinc += words * UNITS_PER_WORD;
11493     }
11494   else
11495     {
11496       for (j = 0; j < words; j++)
11497 	{
11498 	  addr = plus_constant (Pmode, dst,
11499 				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11500 	  mem = adjust_automodify_address (dstbase, SImode, addr,
11501 					   dstoffset + j * UNITS_PER_WORD);
11502 	  emit_insn (gen_unaligned_storesi (mem, regs[j]));
11503 	}
11504       dstoffset += words * UNITS_PER_WORD;
11505     }
11506 
11507   remaining -= words * UNITS_PER_WORD;
11508 
11509   gcc_assert (remaining < 4);
11510 
11511   /* Copy a halfword if necessary.  */
11512 
11513   if (remaining >= 2)
11514     {
11515       halfword_tmp = gen_reg_rtx (SImode);
11516 
11517       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11518       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11519       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11520 
11521       /* Either write out immediately, or delay until we've loaded the last
11522 	 byte, depending on interleave factor.  */
11523       if (interleave_factor == 1)
11524 	{
11525 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11526 	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11527 	  emit_insn (gen_unaligned_storehi (mem,
11528 		       gen_lowpart (HImode, halfword_tmp)));
11529 	  halfword_tmp = NULL;
11530 	  dstoffset += 2;
11531 	}
11532 
11533       remaining -= 2;
11534       srcoffset += 2;
11535     }
11536 
11537   gcc_assert (remaining < 2);
11538 
11539   /* Copy last byte.  */
11540 
11541   if ((remaining & 1) != 0)
11542     {
11543       byte_tmp = gen_reg_rtx (SImode);
11544 
11545       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11546       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11547       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11548 
11549       if (interleave_factor == 1)
11550 	{
11551 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11552 	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11553 	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11554 	  byte_tmp = NULL;
11555 	  dstoffset++;
11556 	}
11557 
11558       remaining--;
11559       srcoffset++;
11560     }
11561 
11562   /* Store last halfword if we haven't done so already.  */
11563 
11564   if (halfword_tmp)
11565     {
11566       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11567       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11568       emit_insn (gen_unaligned_storehi (mem,
11569 		   gen_lowpart (HImode, halfword_tmp)));
11570       dstoffset += 2;
11571     }
11572 
11573   /* Likewise for last byte.  */
11574 
11575   if (byte_tmp)
11576     {
11577       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11578       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11579       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11580       dstoffset++;
11581     }
11582 
11583   gcc_assert (remaining == 0 && srcoffset == dstoffset);
11584 }
11585 
11586 /* From mips_adjust_block_mem:
11587 
11588    Helper function for doing a loop-based block operation on memory
11589    reference MEM.  Each iteration of the loop will operate on LENGTH
11590    bytes of MEM.
11591 
11592    Create a new base register for use within the loop and point it to
11593    the start of MEM.  Create a new memory reference that uses this
11594    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
11595 
11596 static void
11597 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11598 		      rtx *loop_mem)
11599 {
11600   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11601 
11602   /* Although the new mem does not refer to a known location,
11603      it does keep up to LENGTH bytes of alignment.  */
11604   *loop_mem = change_address (mem, BLKmode, *loop_reg);
11605   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11606 }
11607 
11608 /* From mips_block_move_loop:
11609 
11610    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11611    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
11612    the memory regions do not overlap.  */
11613 
11614 static void
11615 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11616 			       unsigned int interleave_factor,
11617 			       HOST_WIDE_INT bytes_per_iter)
11618 {
11619   rtx label, src_reg, dest_reg, final_src, test;
11620   HOST_WIDE_INT leftover;
11621 
11622   leftover = length % bytes_per_iter;
11623   length -= leftover;
11624 
11625   /* Create registers and memory references for use within the loop.  */
11626   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11627   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11628 
11629   /* Calculate the value that SRC_REG should have after the last iteration of
11630      the loop.  */
11631   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11632 				   0, 0, OPTAB_WIDEN);
11633 
11634   /* Emit the start of the loop.  */
11635   label = gen_label_rtx ();
11636   emit_label (label);
11637 
11638   /* Emit the loop body.  */
11639   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11640 				     interleave_factor);
11641 
11642   /* Move on to the next block.  */
11643   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11644   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11645 
11646   /* Emit the loop condition.  */
11647   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11648   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11649 
11650   /* Mop up any left-over bytes.  */
11651   if (leftover)
11652     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11653 }
11654 
11655 /* Emit a block move when either the source or destination is unaligned (not
11656    aligned to a four-byte boundary).  This may need further tuning depending on
11657    core type, optimize_size setting, etc.  */
11658 
11659 static int
11660 arm_movmemqi_unaligned (rtx *operands)
11661 {
11662   HOST_WIDE_INT length = INTVAL (operands[2]);
11663 
11664   if (optimize_size)
11665     {
11666       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11667       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11668       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11669 	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
11670 	 or dst_aligned though: allow more interleaving in those cases since the
11671 	 resulting code can be smaller.  */
11672       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11673       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11674 
11675       if (length > 12)
11676 	arm_block_move_unaligned_loop (operands[0], operands[1], length,
11677 				       interleave_factor, bytes_per_iter);
11678       else
11679 	arm_block_move_unaligned_straight (operands[0], operands[1], length,
11680 					   interleave_factor);
11681     }
11682   else
11683     {
11684       /* Note that the loop created by arm_block_move_unaligned_loop may be
11685 	 subject to loop unrolling, which makes tuning this condition a little
11686 	 redundant.  */
11687       if (length > 32)
11688 	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11689       else
11690 	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11691     }
11692 
11693   return 1;
11694 }
11695 
11696 int
11697 arm_gen_movmemqi (rtx *operands)
11698 {
11699   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11700   HOST_WIDE_INT srcoffset, dstoffset;
11701   int i;
11702   rtx src, dst, srcbase, dstbase;
11703   rtx part_bytes_reg = NULL;
11704   rtx mem;
11705 
11706   if (!CONST_INT_P (operands[2])
11707       || !CONST_INT_P (operands[3])
11708       || INTVAL (operands[2]) > 64)
11709     return 0;
11710 
11711   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11712     return arm_movmemqi_unaligned (operands);
11713 
11714   if (INTVAL (operands[3]) & 3)
11715     return 0;
11716 
11717   dstbase = operands[0];
11718   srcbase = operands[1];
11719 
11720   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11721   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11722 
11723   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11724   out_words_to_go = INTVAL (operands[2]) / 4;
11725   last_bytes = INTVAL (operands[2]) & 3;
11726   dstoffset = srcoffset = 0;
11727 
11728   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11729     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11730 
11731   for (i = 0; in_words_to_go >= 2; i+=4)
11732     {
11733       if (in_words_to_go > 4)
11734 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11735 					  TRUE, srcbase, &srcoffset));
11736       else
11737 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11738 					  src, FALSE, srcbase,
11739 					  &srcoffset));
11740 
11741       if (out_words_to_go)
11742 	{
11743 	  if (out_words_to_go > 4)
11744 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11745 					       TRUE, dstbase, &dstoffset));
11746 	  else if (out_words_to_go != 1)
11747 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11748 					       out_words_to_go, dst,
11749 					       (last_bytes == 0
11750 						? FALSE : TRUE),
11751 					       dstbase, &dstoffset));
11752 	  else
11753 	    {
11754 	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11755 	      emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11756 	      if (last_bytes != 0)
11757 		{
11758 		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11759 		  dstoffset += 4;
11760 		}
11761 	    }
11762 	}
11763 
11764       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11765       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11766     }
11767 
11768   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
11769   if (out_words_to_go)
11770     {
11771       rtx sreg;
11772 
11773       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11774       sreg = copy_to_reg (mem);
11775 
11776       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11777       emit_move_insn (mem, sreg);
11778       in_words_to_go--;
11779 
11780       gcc_assert (!in_words_to_go);	/* Sanity check */
11781     }
11782 
11783   if (in_words_to_go)
11784     {
11785       gcc_assert (in_words_to_go > 0);
11786 
11787       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11788       part_bytes_reg = copy_to_mode_reg (SImode, mem);
11789     }
11790 
11791   gcc_assert (!last_bytes || part_bytes_reg);
11792 
11793   if (BYTES_BIG_ENDIAN && last_bytes)
11794     {
11795       rtx tmp = gen_reg_rtx (SImode);
11796 
11797       /* The bytes we want are in the top end of the word.  */
11798       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11799 			      GEN_INT (8 * (4 - last_bytes))));
11800       part_bytes_reg = tmp;
11801 
11802       while (last_bytes)
11803 	{
11804 	  mem = adjust_automodify_address (dstbase, QImode,
11805 					   plus_constant (Pmode, dst,
11806 							  last_bytes - 1),
11807 					   dstoffset + last_bytes - 1);
11808 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11809 
11810 	  if (--last_bytes)
11811 	    {
11812 	      tmp = gen_reg_rtx (SImode);
11813 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11814 	      part_bytes_reg = tmp;
11815 	    }
11816 	}
11817 
11818     }
11819   else
11820     {
11821       if (last_bytes > 1)
11822 	{
11823 	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11824 	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11825 	  last_bytes -= 2;
11826 	  if (last_bytes)
11827 	    {
11828 	      rtx tmp = gen_reg_rtx (SImode);
11829 	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11830 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11831 	      part_bytes_reg = tmp;
11832 	      dstoffset += 2;
11833 	    }
11834 	}
11835 
11836       if (last_bytes)
11837 	{
11838 	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11839 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11840 	}
11841     }
11842 
11843   return 1;
11844 }
11845 
11846 /* Select a dominance comparison mode if possible for a test of the general
11847    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
11848    COND_OR == DOM_CC_X_AND_Y => (X && Y)
11849    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11850    COND_OR == DOM_CC_X_OR_Y => (X || Y)
11851    In all cases OP will be either EQ or NE, but we don't need to know which
11852    here.  If we are unable to support a dominance comparison we return
11853    CC mode.  This will then fail to match for the RTL expressions that
11854    generate this call.  */
11855 enum machine_mode
11856 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11857 {
11858   enum rtx_code cond1, cond2;
11859   int swapped = 0;
11860 
11861   /* Currently we will probably get the wrong result if the individual
11862      comparisons are not simple.  This also ensures that it is safe to
11863      reverse a comparison if necessary.  */
11864   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11865        != CCmode)
11866       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11867 	  != CCmode))
11868     return CCmode;
11869 
11870   /* The if_then_else variant of this tests the second condition if the
11871      first passes, but is true if the first fails.  Reverse the first
11872      condition to get a true "inclusive-or" expression.  */
11873   if (cond_or == DOM_CC_NX_OR_Y)
11874     cond1 = reverse_condition (cond1);
11875 
11876   /* If the comparisons are not equal, and one doesn't dominate the other,
11877      then we can't do this.  */
11878   if (cond1 != cond2
11879       && !comparison_dominates_p (cond1, cond2)
11880       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11881     return CCmode;
11882 
11883   if (swapped)
11884     {
11885       enum rtx_code temp = cond1;
11886       cond1 = cond2;
11887       cond2 = temp;
11888     }
11889 
11890   switch (cond1)
11891     {
11892     case EQ:
11893       if (cond_or == DOM_CC_X_AND_Y)
11894 	return CC_DEQmode;
11895 
11896       switch (cond2)
11897 	{
11898 	case EQ: return CC_DEQmode;
11899 	case LE: return CC_DLEmode;
11900 	case LEU: return CC_DLEUmode;
11901 	case GE: return CC_DGEmode;
11902 	case GEU: return CC_DGEUmode;
11903 	default: gcc_unreachable ();
11904 	}
11905 
11906     case LT:
11907       if (cond_or == DOM_CC_X_AND_Y)
11908 	return CC_DLTmode;
11909 
11910       switch (cond2)
11911 	{
11912 	case  LT:
11913 	    return CC_DLTmode;
11914 	case LE:
11915 	  return CC_DLEmode;
11916 	case NE:
11917 	  return CC_DNEmode;
11918 	default:
11919 	  gcc_unreachable ();
11920 	}
11921 
11922     case GT:
11923       if (cond_or == DOM_CC_X_AND_Y)
11924 	return CC_DGTmode;
11925 
11926       switch (cond2)
11927 	{
11928 	case GT:
11929 	  return CC_DGTmode;
11930 	case GE:
11931 	  return CC_DGEmode;
11932 	case NE:
11933 	  return CC_DNEmode;
11934 	default:
11935 	  gcc_unreachable ();
11936 	}
11937 
11938     case LTU:
11939       if (cond_or == DOM_CC_X_AND_Y)
11940 	return CC_DLTUmode;
11941 
11942       switch (cond2)
11943 	{
11944 	case LTU:
11945 	  return CC_DLTUmode;
11946 	case LEU:
11947 	  return CC_DLEUmode;
11948 	case NE:
11949 	  return CC_DNEmode;
11950 	default:
11951 	  gcc_unreachable ();
11952 	}
11953 
11954     case GTU:
11955       if (cond_or == DOM_CC_X_AND_Y)
11956 	return CC_DGTUmode;
11957 
11958       switch (cond2)
11959 	{
11960 	case GTU:
11961 	  return CC_DGTUmode;
11962 	case GEU:
11963 	  return CC_DGEUmode;
11964 	case NE:
11965 	  return CC_DNEmode;
11966 	default:
11967 	  gcc_unreachable ();
11968 	}
11969 
11970     /* The remaining cases only occur when both comparisons are the
11971        same.  */
11972     case NE:
11973       gcc_assert (cond1 == cond2);
11974       return CC_DNEmode;
11975 
11976     case LE:
11977       gcc_assert (cond1 == cond2);
11978       return CC_DLEmode;
11979 
11980     case GE:
11981       gcc_assert (cond1 == cond2);
11982       return CC_DGEmode;
11983 
11984     case LEU:
11985       gcc_assert (cond1 == cond2);
11986       return CC_DLEUmode;
11987 
11988     case GEU:
11989       gcc_assert (cond1 == cond2);
11990       return CC_DGEUmode;
11991 
11992     default:
11993       gcc_unreachable ();
11994     }
11995 }
11996 
11997 enum machine_mode
11998 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11999 {
12000   /* All floating point compares return CCFP if it is an equality
12001      comparison, and CCFPE otherwise.  */
12002   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12003     {
12004       switch (op)
12005 	{
12006 	case EQ:
12007 	case NE:
12008 	case UNORDERED:
12009 	case ORDERED:
12010 	case UNLT:
12011 	case UNLE:
12012 	case UNGT:
12013 	case UNGE:
12014 	case UNEQ:
12015 	case LTGT:
12016 	  return CCFPmode;
12017 
12018 	case LT:
12019 	case LE:
12020 	case GT:
12021 	case GE:
12022 	  return CCFPEmode;
12023 
12024 	default:
12025 	  gcc_unreachable ();
12026 	}
12027     }
12028 
12029   /* A compare with a shifted operand.  Because of canonicalization, the
12030      comparison will have to be swapped when we emit the assembler.  */
12031   if (GET_MODE (y) == SImode
12032       && (REG_P (y) || (GET_CODE (y) == SUBREG))
12033       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12034 	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
12035 	  || GET_CODE (x) == ROTATERT))
12036     return CC_SWPmode;
12037 
12038   /* This operation is performed swapped, but since we only rely on the Z
12039      flag we don't need an additional mode.  */
12040   if (GET_MODE (y) == SImode
12041       && (REG_P (y) || (GET_CODE (y) == SUBREG))
12042       && GET_CODE (x) == NEG
12043       && (op ==	EQ || op == NE))
12044     return CC_Zmode;
12045 
12046   /* This is a special case that is used by combine to allow a
12047      comparison of a shifted byte load to be split into a zero-extend
12048      followed by a comparison of the shifted integer (only valid for
12049      equalities and unsigned inequalities).  */
12050   if (GET_MODE (x) == SImode
12051       && GET_CODE (x) == ASHIFT
12052       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
12053       && GET_CODE (XEXP (x, 0)) == SUBREG
12054       && MEM_P (SUBREG_REG (XEXP (x, 0)))
12055       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
12056       && (op == EQ || op == NE
12057 	  || op == GEU || op == GTU || op == LTU || op == LEU)
12058       && CONST_INT_P (y))
12059     return CC_Zmode;
12060 
12061   /* A construct for a conditional compare, if the false arm contains
12062      0, then both conditions must be true, otherwise either condition
12063      must be true.  Not all conditions are possible, so CCmode is
12064      returned if it can't be done.  */
12065   if (GET_CODE (x) == IF_THEN_ELSE
12066       && (XEXP (x, 2) == const0_rtx
12067 	  || XEXP (x, 2) == const1_rtx)
12068       && COMPARISON_P (XEXP (x, 0))
12069       && COMPARISON_P (XEXP (x, 1)))
12070     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12071 					 INTVAL (XEXP (x, 2)));
12072 
12073   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
12074   if (GET_CODE (x) == AND
12075       && (op == EQ || op == NE)
12076       && COMPARISON_P (XEXP (x, 0))
12077       && COMPARISON_P (XEXP (x, 1)))
12078     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12079 					 DOM_CC_X_AND_Y);
12080 
12081   if (GET_CODE (x) == IOR
12082       && (op == EQ || op == NE)
12083       && COMPARISON_P (XEXP (x, 0))
12084       && COMPARISON_P (XEXP (x, 1)))
12085     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12086 					 DOM_CC_X_OR_Y);
12087 
12088   /* An operation (on Thumb) where we want to test for a single bit.
12089      This is done by shifting that bit up into the top bit of a
12090      scratch register; we can then branch on the sign bit.  */
12091   if (TARGET_THUMB1
12092       && GET_MODE (x) == SImode
12093       && (op == EQ || op == NE)
12094       && GET_CODE (x) == ZERO_EXTRACT
12095       && XEXP (x, 1) == const1_rtx)
12096     return CC_Nmode;
12097 
12098   /* An operation that sets the condition codes as a side-effect, the
12099      V flag is not set correctly, so we can only use comparisons where
12100      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
12101      instead.)  */
12102   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
12103   if (GET_MODE (x) == SImode
12104       && y == const0_rtx
12105       && (op == EQ || op == NE || op == LT || op == GE)
12106       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
12107 	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
12108 	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
12109 	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
12110 	  || GET_CODE (x) == LSHIFTRT
12111 	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12112 	  || GET_CODE (x) == ROTATERT
12113 	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
12114     return CC_NOOVmode;
12115 
12116   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
12117     return CC_Zmode;
12118 
12119   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
12120       && GET_CODE (x) == PLUS
12121       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
12122     return CC_Cmode;
12123 
12124   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
12125     {
12126       switch (op)
12127 	{
12128 	case EQ:
12129 	case NE:
12130 	  /* A DImode comparison against zero can be implemented by
12131 	     or'ing the two halves together.  */
12132 	  if (y == const0_rtx)
12133 	    return CC_Zmode;
12134 
12135 	  /* We can do an equality test in three Thumb instructions.  */
12136 	  if (!TARGET_32BIT)
12137 	    return CC_Zmode;
12138 
12139 	  /* FALLTHROUGH */
12140 
12141 	case LTU:
12142 	case LEU:
12143 	case GTU:
12144 	case GEU:
12145 	  /* DImode unsigned comparisons can be implemented by cmp +
12146 	     cmpeq without a scratch register.  Not worth doing in
12147 	     Thumb-2.  */
12148 	  if (TARGET_32BIT)
12149 	    return CC_CZmode;
12150 
12151 	  /* FALLTHROUGH */
12152 
12153 	case LT:
12154 	case LE:
12155 	case GT:
12156 	case GE:
12157 	  /* DImode signed and unsigned comparisons can be implemented
12158 	     by cmp + sbcs with a scratch register, but that does not
12159 	     set the Z flag - we must reverse GT/LE/GTU/LEU.  */
12160 	  gcc_assert (op != EQ && op != NE);
12161 	  return CC_NCVmode;
12162 
12163 	default:
12164 	  gcc_unreachable ();
12165 	}
12166     }
12167 
12168   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
12169     return GET_MODE (x);
12170 
12171   return CCmode;
12172 }
12173 
12174 /* X and Y are two things to compare using CODE.  Emit the compare insn and
12175    return the rtx for register 0 in the proper mode.  FP means this is a
12176    floating point compare: I don't think that it is needed on the arm.  */
12177 rtx
12178 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
12179 {
12180   enum machine_mode mode;
12181   rtx cc_reg;
12182   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
12183 
12184   /* We might have X as a constant, Y as a register because of the predicates
12185      used for cmpdi.  If so, force X to a register here.  */
12186   if (dimode_comparison && !REG_P (x))
12187     x = force_reg (DImode, x);
12188 
12189   mode = SELECT_CC_MODE (code, x, y);
12190   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
12191 
12192   if (dimode_comparison
12193       && mode != CC_CZmode)
12194     {
12195       rtx clobber, set;
12196 
12197       /* To compare two non-zero values for equality, XOR them and
12198 	 then compare against zero.  Not used for ARM mode; there
12199 	 CC_CZmode is cheaper.  */
12200       if (mode == CC_Zmode && y != const0_rtx)
12201 	{
12202 	  gcc_assert (!reload_completed);
12203 	  x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
12204 	  y = const0_rtx;
12205 	}
12206 
12207       /* A scratch register is required.  */
12208       if (reload_completed)
12209 	gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
12210       else
12211 	scratch = gen_rtx_SCRATCH (SImode);
12212 
12213       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12214       set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
12215       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12216     }
12217   else
12218     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
12219 
12220   return cc_reg;
12221 }
12222 
12223 /* Generate a sequence of insns that will generate the correct return
12224    address mask depending on the physical architecture that the program
12225    is running on.  */
12226 rtx
12227 arm_gen_return_addr_mask (void)
12228 {
12229   rtx reg = gen_reg_rtx (Pmode);
12230 
12231   emit_insn (gen_return_addr_mask (reg));
12232   return reg;
12233 }
12234 
12235 void
12236 arm_reload_in_hi (rtx *operands)
12237 {
12238   rtx ref = operands[1];
12239   rtx base, scratch;
12240   HOST_WIDE_INT offset = 0;
12241 
12242   if (GET_CODE (ref) == SUBREG)
12243     {
12244       offset = SUBREG_BYTE (ref);
12245       ref = SUBREG_REG (ref);
12246     }
12247 
12248   if (REG_P (ref))
12249     {
12250       /* We have a pseudo which has been spilt onto the stack; there
12251 	 are two cases here: the first where there is a simple
12252 	 stack-slot replacement and a second where the stack-slot is
12253 	 out of range, or is used as a subreg.  */
12254       if (reg_equiv_mem (REGNO (ref)))
12255 	{
12256 	  ref = reg_equiv_mem (REGNO (ref));
12257 	  base = find_replacement (&XEXP (ref, 0));
12258 	}
12259       else
12260 	/* The slot is out of range, or was dressed up in a SUBREG.  */
12261 	base = reg_equiv_address (REGNO (ref));
12262     }
12263   else
12264     base = find_replacement (&XEXP (ref, 0));
12265 
12266   /* Handle the case where the address is too complex to be offset by 1.  */
12267   if (GET_CODE (base) == MINUS
12268       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12269     {
12270       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12271 
12272       emit_set_insn (base_plus, base);
12273       base = base_plus;
12274     }
12275   else if (GET_CODE (base) == PLUS)
12276     {
12277       /* The addend must be CONST_INT, or we would have dealt with it above.  */
12278       HOST_WIDE_INT hi, lo;
12279 
12280       offset += INTVAL (XEXP (base, 1));
12281       base = XEXP (base, 0);
12282 
12283       /* Rework the address into a legal sequence of insns.  */
12284       /* Valid range for lo is -4095 -> 4095 */
12285       lo = (offset >= 0
12286 	    ? (offset & 0xfff)
12287 	    : -((-offset) & 0xfff));
12288 
12289       /* Corner case, if lo is the max offset then we would be out of range
12290 	 once we have added the additional 1 below, so bump the msb into the
12291 	 pre-loading insn(s).  */
12292       if (lo == 4095)
12293 	lo &= 0x7ff;
12294 
12295       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12296 	     ^ (HOST_WIDE_INT) 0x80000000)
12297 	    - (HOST_WIDE_INT) 0x80000000);
12298 
12299       gcc_assert (hi + lo == offset);
12300 
12301       if (hi != 0)
12302 	{
12303 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12304 
12305 	  /* Get the base address; addsi3 knows how to handle constants
12306 	     that require more than one insn.  */
12307 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12308 	  base = base_plus;
12309 	  offset = lo;
12310 	}
12311     }
12312 
12313   /* Operands[2] may overlap operands[0] (though it won't overlap
12314      operands[1]), that's why we asked for a DImode reg -- so we can
12315      use the bit that does not overlap.  */
12316   if (REGNO (operands[2]) == REGNO (operands[0]))
12317     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12318   else
12319     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12320 
12321   emit_insn (gen_zero_extendqisi2 (scratch,
12322 				   gen_rtx_MEM (QImode,
12323 						plus_constant (Pmode, base,
12324 							       offset))));
12325   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
12326 				   gen_rtx_MEM (QImode,
12327 						plus_constant (Pmode, base,
12328 							       offset + 1))));
12329   if (!BYTES_BIG_ENDIAN)
12330     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12331 		   gen_rtx_IOR (SImode,
12332 				gen_rtx_ASHIFT
12333 				(SImode,
12334 				 gen_rtx_SUBREG (SImode, operands[0], 0),
12335 				 GEN_INT (8)),
12336 				scratch));
12337   else
12338     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12339 		   gen_rtx_IOR (SImode,
12340 				gen_rtx_ASHIFT (SImode, scratch,
12341 						GEN_INT (8)),
12342 				gen_rtx_SUBREG (SImode, operands[0], 0)));
12343 }
12344 
12345 /* Handle storing a half-word to memory during reload by synthesizing as two
12346    byte stores.  Take care not to clobber the input values until after we
12347    have moved them somewhere safe.  This code assumes that if the DImode
12348    scratch in operands[2] overlaps either the input value or output address
12349    in some way, then that value must die in this insn (we absolutely need
12350    two scratch registers for some corner cases).  */
12351 void
12352 arm_reload_out_hi (rtx *operands)
12353 {
12354   rtx ref = operands[0];
12355   rtx outval = operands[1];
12356   rtx base, scratch;
12357   HOST_WIDE_INT offset = 0;
12358 
12359   if (GET_CODE (ref) == SUBREG)
12360     {
12361       offset = SUBREG_BYTE (ref);
12362       ref = SUBREG_REG (ref);
12363     }
12364 
12365   if (REG_P (ref))
12366     {
12367       /* We have a pseudo which has been spilt onto the stack; there
12368 	 are two cases here: the first where there is a simple
12369 	 stack-slot replacement and a second where the stack-slot is
12370 	 out of range, or is used as a subreg.  */
12371       if (reg_equiv_mem (REGNO (ref)))
12372 	{
12373 	  ref = reg_equiv_mem (REGNO (ref));
12374 	  base = find_replacement (&XEXP (ref, 0));
12375 	}
12376       else
12377 	/* The slot is out of range, or was dressed up in a SUBREG.  */
12378 	base = reg_equiv_address (REGNO (ref));
12379     }
12380   else
12381     base = find_replacement (&XEXP (ref, 0));
12382 
12383   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12384 
12385   /* Handle the case where the address is too complex to be offset by 1.  */
12386   if (GET_CODE (base) == MINUS
12387       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12388     {
12389       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12390 
12391       /* Be careful not to destroy OUTVAL.  */
12392       if (reg_overlap_mentioned_p (base_plus, outval))
12393 	{
12394 	  /* Updating base_plus might destroy outval, see if we can
12395 	     swap the scratch and base_plus.  */
12396 	  if (!reg_overlap_mentioned_p (scratch, outval))
12397 	    {
12398 	      rtx tmp = scratch;
12399 	      scratch = base_plus;
12400 	      base_plus = tmp;
12401 	    }
12402 	  else
12403 	    {
12404 	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12405 
12406 	      /* Be conservative and copy OUTVAL into the scratch now,
12407 		 this should only be necessary if outval is a subreg
12408 		 of something larger than a word.  */
12409 	      /* XXX Might this clobber base?  I can't see how it can,
12410 		 since scratch is known to overlap with OUTVAL, and
12411 		 must be wider than a word.  */
12412 	      emit_insn (gen_movhi (scratch_hi, outval));
12413 	      outval = scratch_hi;
12414 	    }
12415 	}
12416 
12417       emit_set_insn (base_plus, base);
12418       base = base_plus;
12419     }
12420   else if (GET_CODE (base) == PLUS)
12421     {
12422       /* The addend must be CONST_INT, or we would have dealt with it above.  */
12423       HOST_WIDE_INT hi, lo;
12424 
12425       offset += INTVAL (XEXP (base, 1));
12426       base = XEXP (base, 0);
12427 
12428       /* Rework the address into a legal sequence of insns.  */
12429       /* Valid range for lo is -4095 -> 4095 */
12430       lo = (offset >= 0
12431 	    ? (offset & 0xfff)
12432 	    : -((-offset) & 0xfff));
12433 
12434       /* Corner case, if lo is the max offset then we would be out of range
12435 	 once we have added the additional 1 below, so bump the msb into the
12436 	 pre-loading insn(s).  */
12437       if (lo == 4095)
12438 	lo &= 0x7ff;
12439 
12440       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12441 	     ^ (HOST_WIDE_INT) 0x80000000)
12442 	    - (HOST_WIDE_INT) 0x80000000);
12443 
12444       gcc_assert (hi + lo == offset);
12445 
12446       if (hi != 0)
12447 	{
12448 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12449 
12450 	  /* Be careful not to destroy OUTVAL.  */
12451 	  if (reg_overlap_mentioned_p (base_plus, outval))
12452 	    {
12453 	      /* Updating base_plus might destroy outval, see if we
12454 		 can swap the scratch and base_plus.  */
12455 	      if (!reg_overlap_mentioned_p (scratch, outval))
12456 		{
12457 		  rtx tmp = scratch;
12458 		  scratch = base_plus;
12459 		  base_plus = tmp;
12460 		}
12461 	      else
12462 		{
12463 		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12464 
12465 		  /* Be conservative and copy outval into scratch now,
12466 		     this should only be necessary if outval is a
12467 		     subreg of something larger than a word.  */
12468 		  /* XXX Might this clobber base?  I can't see how it
12469 		     can, since scratch is known to overlap with
12470 		     outval.  */
12471 		  emit_insn (gen_movhi (scratch_hi, outval));
12472 		  outval = scratch_hi;
12473 		}
12474 	    }
12475 
12476 	  /* Get the base address; addsi3 knows how to handle constants
12477 	     that require more than one insn.  */
12478 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12479 	  base = base_plus;
12480 	  offset = lo;
12481 	}
12482     }
12483 
12484   if (BYTES_BIG_ENDIAN)
12485     {
12486       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12487 					 plus_constant (Pmode, base,
12488 							offset + 1)),
12489 			    gen_lowpart (QImode, outval)));
12490       emit_insn (gen_lshrsi3 (scratch,
12491 			      gen_rtx_SUBREG (SImode, outval, 0),
12492 			      GEN_INT (8)));
12493       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12494 								offset)),
12495 			    gen_lowpart (QImode, scratch)));
12496     }
12497   else
12498     {
12499       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12500 								offset)),
12501 			    gen_lowpart (QImode, outval)));
12502       emit_insn (gen_lshrsi3 (scratch,
12503 			      gen_rtx_SUBREG (SImode, outval, 0),
12504 			      GEN_INT (8)));
12505       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12506 					 plus_constant (Pmode, base,
12507 							offset + 1)),
12508 			    gen_lowpart (QImode, scratch)));
12509     }
12510 }
12511 
12512 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12513    (padded to the size of a word) should be passed in a register.  */
12514 
12515 static bool
12516 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12517 {
12518   if (TARGET_AAPCS_BASED)
12519     return must_pass_in_stack_var_size (mode, type);
12520   else
12521     return must_pass_in_stack_var_size_or_pad (mode, type);
12522 }
12523 
12524 
12525 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12526    Return true if an argument passed on the stack should be padded upwards,
12527    i.e. if the least-significant byte has useful data.
12528    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
12529    aggregate types are placed in the lowest memory address.  */
12530 
12531 bool
12532 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12533 {
12534   if (!TARGET_AAPCS_BASED)
12535     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12536 
12537   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12538     return false;
12539 
12540   return true;
12541 }
12542 
12543 
12544 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12545    Return !BYTES_BIG_ENDIAN if the least significant byte of the
12546    register has useful data, and return the opposite if the most
12547    significant byte does.  */
12548 
12549 bool
12550 arm_pad_reg_upward (enum machine_mode mode,
12551                     tree type, int first ATTRIBUTE_UNUSED)
12552 {
12553   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12554     {
12555       /* For AAPCS, small aggregates, small fixed-point types,
12556 	 and small complex types are always padded upwards.  */
12557       if (type)
12558 	{
12559 	  if ((AGGREGATE_TYPE_P (type)
12560 	       || TREE_CODE (type) == COMPLEX_TYPE
12561 	       || FIXED_POINT_TYPE_P (type))
12562 	      && int_size_in_bytes (type) <= 4)
12563 	    return true;
12564 	}
12565       else
12566 	{
12567 	  if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12568 	      && GET_MODE_SIZE (mode) <= 4)
12569 	    return true;
12570 	}
12571     }
12572 
12573   /* Otherwise, use default padding.  */
12574   return !BYTES_BIG_ENDIAN;
12575 }
12576 
12577 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12578    assuming that the address in the base register is word aligned.  */
12579 bool
12580 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
12581 {
12582   HOST_WIDE_INT max_offset;
12583 
12584   /* Offset must be a multiple of 4 in Thumb mode.  */
12585   if (TARGET_THUMB2 && ((offset & 3) != 0))
12586     return false;
12587 
12588   if (TARGET_THUMB2)
12589     max_offset = 1020;
12590   else if (TARGET_ARM)
12591     max_offset = 255;
12592   else
12593     return false;
12594 
12595   return ((offset <= max_offset) && (offset >= -max_offset));
12596 }
12597 
12598 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12599    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
12600    Assumes that the address in the base register RN is word aligned.  Pattern
12601    guarantees that both memory accesses use the same base register,
12602    the offsets are constants within the range, and the gap between the offsets is 4.
12603    If preload complete then check that registers are legal.  WBACK indicates whether
12604    address is updated.  LOAD indicates whether memory access is load or store.  */
12605 bool
12606 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
12607                        bool wback, bool load)
12608 {
12609   unsigned int t, t2, n;
12610 
12611   if (!reload_completed)
12612     return true;
12613 
12614   if (!offset_ok_for_ldrd_strd (offset))
12615     return false;
12616 
12617   t = REGNO (rt);
12618   t2 = REGNO (rt2);
12619   n = REGNO (rn);
12620 
12621   if ((TARGET_THUMB2)
12622       && ((wback && (n == t || n == t2))
12623           || (t == SP_REGNUM)
12624           || (t == PC_REGNUM)
12625           || (t2 == SP_REGNUM)
12626           || (t2 == PC_REGNUM)
12627           || (!load && (n == PC_REGNUM))
12628           || (load && (t == t2))
12629           /* Triggers Cortex-M3 LDRD errata.  */
12630           || (!wback && load && fix_cm3_ldrd && (n == t))))
12631     return false;
12632 
12633   if ((TARGET_ARM)
12634       && ((wback && (n == t || n == t2))
12635           || (t2 == PC_REGNUM)
12636           || (t % 2 != 0)   /* First destination register is not even.  */
12637           || (t2 != t + 1)
12638           /* PC can be used as base register (for offset addressing only),
12639              but it is depricated.  */
12640           || (n == PC_REGNUM)))
12641     return false;
12642 
12643   return true;
12644 }
12645 
12646 
12647 /* Print a symbolic form of X to the debug file, F.  */
12648 static void
12649 arm_print_value (FILE *f, rtx x)
12650 {
12651   switch (GET_CODE (x))
12652     {
12653     case CONST_INT:
12654       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12655       return;
12656 
12657     case CONST_DOUBLE:
12658       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12659       return;
12660 
12661     case CONST_VECTOR:
12662       {
12663 	int i;
12664 
12665 	fprintf (f, "<");
12666 	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12667 	  {
12668 	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12669 	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
12670 	      fputc (',', f);
12671 	  }
12672 	fprintf (f, ">");
12673       }
12674       return;
12675 
12676     case CONST_STRING:
12677       fprintf (f, "\"%s\"", XSTR (x, 0));
12678       return;
12679 
12680     case SYMBOL_REF:
12681       fprintf (f, "`%s'", XSTR (x, 0));
12682       return;
12683 
12684     case LABEL_REF:
12685       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12686       return;
12687 
12688     case CONST:
12689       arm_print_value (f, XEXP (x, 0));
12690       return;
12691 
12692     case PLUS:
12693       arm_print_value (f, XEXP (x, 0));
12694       fprintf (f, "+");
12695       arm_print_value (f, XEXP (x, 1));
12696       return;
12697 
12698     case PC:
12699       fprintf (f, "pc");
12700       return;
12701 
12702     default:
12703       fprintf (f, "????");
12704       return;
12705     }
12706 }
12707 
12708 /* Routines for manipulation of the constant pool.  */
12709 
12710 /* Arm instructions cannot load a large constant directly into a
12711    register; they have to come from a pc relative load.  The constant
12712    must therefore be placed in the addressable range of the pc
12713    relative load.  Depending on the precise pc relative load
12714    instruction the range is somewhere between 256 bytes and 4k.  This
12715    means that we often have to dump a constant inside a function, and
12716    generate code to branch around it.
12717 
12718    It is important to minimize this, since the branches will slow
12719    things down and make the code larger.
12720 
12721    Normally we can hide the table after an existing unconditional
12722    branch so that there is no interruption of the flow, but in the
12723    worst case the code looks like this:
12724 
12725 	ldr	rn, L1
12726 	...
12727 	b	L2
12728 	align
12729 	L1:	.long value
12730 	L2:
12731 	...
12732 
12733 	ldr	rn, L3
12734 	...
12735 	b	L4
12736 	align
12737 	L3:	.long value
12738 	L4:
12739 	...
12740 
12741    We fix this by performing a scan after scheduling, which notices
12742    which instructions need to have their operands fetched from the
12743    constant table and builds the table.
12744 
12745    The algorithm starts by building a table of all the constants that
12746    need fixing up and all the natural barriers in the function (places
12747    where a constant table can be dropped without breaking the flow).
12748    For each fixup we note how far the pc-relative replacement will be
12749    able to reach and the offset of the instruction into the function.
12750 
12751    Having built the table we then group the fixes together to form
12752    tables that are as large as possible (subject to addressing
12753    constraints) and emit each table of constants after the last
12754    barrier that is within range of all the instructions in the group.
12755    If a group does not contain a barrier, then we forcibly create one
12756    by inserting a jump instruction into the flow.  Once the table has
12757    been inserted, the insns are then modified to reference the
12758    relevant entry in the pool.
12759 
12760    Possible enhancements to the algorithm (not implemented) are:
12761 
12762    1) For some processors and object formats, there may be benefit in
12763    aligning the pools to the start of cache lines; this alignment
12764    would need to be taken into account when calculating addressability
12765    of a pool.  */
12766 
12767 /* These typedefs are located at the start of this file, so that
12768    they can be used in the prototypes there.  This comment is to
12769    remind readers of that fact so that the following structures
12770    can be understood more easily.
12771 
12772      typedef struct minipool_node    Mnode;
12773      typedef struct minipool_fixup   Mfix;  */
12774 
12775 struct minipool_node
12776 {
12777   /* Doubly linked chain of entries.  */
12778   Mnode * next;
12779   Mnode * prev;
12780   /* The maximum offset into the code that this entry can be placed.  While
12781      pushing fixes for forward references, all entries are sorted in order
12782      of increasing max_address.  */
12783   HOST_WIDE_INT max_address;
12784   /* Similarly for an entry inserted for a backwards ref.  */
12785   HOST_WIDE_INT min_address;
12786   /* The number of fixes referencing this entry.  This can become zero
12787      if we "unpush" an entry.  In this case we ignore the entry when we
12788      come to emit the code.  */
12789   int refcount;
12790   /* The offset from the start of the minipool.  */
12791   HOST_WIDE_INT offset;
12792   /* The value in table.  */
12793   rtx value;
12794   /* The mode of value.  */
12795   enum machine_mode mode;
12796   /* The size of the value.  With iWMMXt enabled
12797      sizes > 4 also imply an alignment of 8-bytes.  */
12798   int fix_size;
12799 };
12800 
12801 struct minipool_fixup
12802 {
12803   Mfix *            next;
12804   rtx               insn;
12805   HOST_WIDE_INT     address;
12806   rtx *             loc;
12807   enum machine_mode mode;
12808   int               fix_size;
12809   rtx               value;
12810   Mnode *           minipool;
12811   HOST_WIDE_INT     forwards;
12812   HOST_WIDE_INT     backwards;
12813 };
12814 
12815 /* Fixes less than a word need padding out to a word boundary.  */
12816 #define MINIPOOL_FIX_SIZE(mode) \
12817   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12818 
12819 static Mnode *	minipool_vector_head;
12820 static Mnode *	minipool_vector_tail;
12821 static rtx	minipool_vector_label;
12822 static int	minipool_pad;
12823 
12824 /* The linked list of all minipool fixes required for this function.  */
12825 Mfix * 		minipool_fix_head;
12826 Mfix * 		minipool_fix_tail;
12827 /* The fix entry for the current minipool, once it has been placed.  */
12828 Mfix *		minipool_barrier;
12829 
12830 /* Determines if INSN is the start of a jump table.  Returns the end
12831    of the TABLE or NULL_RTX.  */
12832 static rtx
12833 is_jump_table (rtx insn)
12834 {
12835   rtx table;
12836 
12837   if (jump_to_label_p (insn)
12838       && ((table = next_real_insn (JUMP_LABEL (insn)))
12839 	  == next_real_insn (insn))
12840       && table != NULL
12841       && JUMP_P (table)
12842       && (GET_CODE (PATTERN (table)) == ADDR_VEC
12843 	  || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12844     return table;
12845 
12846   return NULL_RTX;
12847 }
12848 
12849 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12850 #define JUMP_TABLES_IN_TEXT_SECTION 0
12851 #endif
12852 
12853 static HOST_WIDE_INT
12854 get_jump_table_size (rtx insn)
12855 {
12856   /* ADDR_VECs only take room if read-only data does into the text
12857      section.  */
12858   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12859     {
12860       rtx body = PATTERN (insn);
12861       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12862       HOST_WIDE_INT size;
12863       HOST_WIDE_INT modesize;
12864 
12865       modesize = GET_MODE_SIZE (GET_MODE (body));
12866       size = modesize * XVECLEN (body, elt);
12867       switch (modesize)
12868 	{
12869 	case 1:
12870 	  /* Round up size  of TBB table to a halfword boundary.  */
12871 	  size = (size + 1) & ~(HOST_WIDE_INT)1;
12872 	  break;
12873 	case 2:
12874 	  /* No padding necessary for TBH.  */
12875 	  break;
12876 	case 4:
12877 	  /* Add two bytes for alignment on Thumb.  */
12878 	  if (TARGET_THUMB)
12879 	    size += 2;
12880 	  break;
12881 	default:
12882 	  gcc_unreachable ();
12883 	}
12884       return size;
12885     }
12886 
12887   return 0;
12888 }
12889 
12890 /* Return the maximum amount of padding that will be inserted before
12891    label LABEL.  */
12892 
12893 static HOST_WIDE_INT
12894 get_label_padding (rtx label)
12895 {
12896   HOST_WIDE_INT align, min_insn_size;
12897 
12898   align = 1 << label_to_alignment (label);
12899   min_insn_size = TARGET_THUMB ? 2 : 4;
12900   return align > min_insn_size ? align - min_insn_size : 0;
12901 }
12902 
12903 /* Move a minipool fix MP from its current location to before MAX_MP.
12904    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12905    constraints may need updating.  */
12906 static Mnode *
12907 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12908 			       HOST_WIDE_INT max_address)
12909 {
12910   /* The code below assumes these are different.  */
12911   gcc_assert (mp != max_mp);
12912 
12913   if (max_mp == NULL)
12914     {
12915       if (max_address < mp->max_address)
12916 	mp->max_address = max_address;
12917     }
12918   else
12919     {
12920       if (max_address > max_mp->max_address - mp->fix_size)
12921 	mp->max_address = max_mp->max_address - mp->fix_size;
12922       else
12923 	mp->max_address = max_address;
12924 
12925       /* Unlink MP from its current position.  Since max_mp is non-null,
12926        mp->prev must be non-null.  */
12927       mp->prev->next = mp->next;
12928       if (mp->next != NULL)
12929 	mp->next->prev = mp->prev;
12930       else
12931 	minipool_vector_tail = mp->prev;
12932 
12933       /* Re-insert it before MAX_MP.  */
12934       mp->next = max_mp;
12935       mp->prev = max_mp->prev;
12936       max_mp->prev = mp;
12937 
12938       if (mp->prev != NULL)
12939 	mp->prev->next = mp;
12940       else
12941 	minipool_vector_head = mp;
12942     }
12943 
12944   /* Save the new entry.  */
12945   max_mp = mp;
12946 
12947   /* Scan over the preceding entries and adjust their addresses as
12948      required.  */
12949   while (mp->prev != NULL
12950 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12951     {
12952       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12953       mp = mp->prev;
12954     }
12955 
12956   return max_mp;
12957 }
12958 
12959 /* Add a constant to the minipool for a forward reference.  Returns the
12960    node added or NULL if the constant will not fit in this pool.  */
12961 static Mnode *
12962 add_minipool_forward_ref (Mfix *fix)
12963 {
12964   /* If set, max_mp is the first pool_entry that has a lower
12965      constraint than the one we are trying to add.  */
12966   Mnode *       max_mp = NULL;
12967   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12968   Mnode *       mp;
12969 
12970   /* If the minipool starts before the end of FIX->INSN then this FIX
12971      can not be placed into the current pool.  Furthermore, adding the
12972      new constant pool entry may cause the pool to start FIX_SIZE bytes
12973      earlier.  */
12974   if (minipool_vector_head &&
12975       (fix->address + get_attr_length (fix->insn)
12976        >= minipool_vector_head->max_address - fix->fix_size))
12977     return NULL;
12978 
12979   /* Scan the pool to see if a constant with the same value has
12980      already been added.  While we are doing this, also note the
12981      location where we must insert the constant if it doesn't already
12982      exist.  */
12983   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12984     {
12985       if (GET_CODE (fix->value) == GET_CODE (mp->value)
12986 	  && fix->mode == mp->mode
12987 	  && (!LABEL_P (fix->value)
12988 	      || (CODE_LABEL_NUMBER (fix->value)
12989 		  == CODE_LABEL_NUMBER (mp->value)))
12990 	  && rtx_equal_p (fix->value, mp->value))
12991 	{
12992 	  /* More than one fix references this entry.  */
12993 	  mp->refcount++;
12994 	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12995 	}
12996 
12997       /* Note the insertion point if necessary.  */
12998       if (max_mp == NULL
12999 	  && mp->max_address > max_address)
13000 	max_mp = mp;
13001 
13002       /* If we are inserting an 8-bytes aligned quantity and
13003 	 we have not already found an insertion point, then
13004 	 make sure that all such 8-byte aligned quantities are
13005 	 placed at the start of the pool.  */
13006       if (ARM_DOUBLEWORD_ALIGN
13007 	  && max_mp == NULL
13008 	  && fix->fix_size >= 8
13009 	  && mp->fix_size < 8)
13010 	{
13011 	  max_mp = mp;
13012 	  max_address = mp->max_address;
13013 	}
13014     }
13015 
13016   /* The value is not currently in the minipool, so we need to create
13017      a new entry for it.  If MAX_MP is NULL, the entry will be put on
13018      the end of the list since the placement is less constrained than
13019      any existing entry.  Otherwise, we insert the new fix before
13020      MAX_MP and, if necessary, adjust the constraints on the other
13021      entries.  */
13022   mp = XNEW (Mnode);
13023   mp->fix_size = fix->fix_size;
13024   mp->mode = fix->mode;
13025   mp->value = fix->value;
13026   mp->refcount = 1;
13027   /* Not yet required for a backwards ref.  */
13028   mp->min_address = -65536;
13029 
13030   if (max_mp == NULL)
13031     {
13032       mp->max_address = max_address;
13033       mp->next = NULL;
13034       mp->prev = minipool_vector_tail;
13035 
13036       if (mp->prev == NULL)
13037 	{
13038 	  minipool_vector_head = mp;
13039 	  minipool_vector_label = gen_label_rtx ();
13040 	}
13041       else
13042 	mp->prev->next = mp;
13043 
13044       minipool_vector_tail = mp;
13045     }
13046   else
13047     {
13048       if (max_address > max_mp->max_address - mp->fix_size)
13049 	mp->max_address = max_mp->max_address - mp->fix_size;
13050       else
13051 	mp->max_address = max_address;
13052 
13053       mp->next = max_mp;
13054       mp->prev = max_mp->prev;
13055       max_mp->prev = mp;
13056       if (mp->prev != NULL)
13057 	mp->prev->next = mp;
13058       else
13059 	minipool_vector_head = mp;
13060     }
13061 
13062   /* Save the new entry.  */
13063   max_mp = mp;
13064 
13065   /* Scan over the preceding entries and adjust their addresses as
13066      required.  */
13067   while (mp->prev != NULL
13068 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13069     {
13070       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13071       mp = mp->prev;
13072     }
13073 
13074   return max_mp;
13075 }
13076 
13077 static Mnode *
13078 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
13079 				HOST_WIDE_INT  min_address)
13080 {
13081   HOST_WIDE_INT offset;
13082 
13083   /* The code below assumes these are different.  */
13084   gcc_assert (mp != min_mp);
13085 
13086   if (min_mp == NULL)
13087     {
13088       if (min_address > mp->min_address)
13089 	mp->min_address = min_address;
13090     }
13091   else
13092     {
13093       /* We will adjust this below if it is too loose.  */
13094       mp->min_address = min_address;
13095 
13096       /* Unlink MP from its current position.  Since min_mp is non-null,
13097 	 mp->next must be non-null.  */
13098       mp->next->prev = mp->prev;
13099       if (mp->prev != NULL)
13100 	mp->prev->next = mp->next;
13101       else
13102 	minipool_vector_head = mp->next;
13103 
13104       /* Reinsert it after MIN_MP.  */
13105       mp->prev = min_mp;
13106       mp->next = min_mp->next;
13107       min_mp->next = mp;
13108       if (mp->next != NULL)
13109 	mp->next->prev = mp;
13110       else
13111 	minipool_vector_tail = mp;
13112     }
13113 
13114   min_mp = mp;
13115 
13116   offset = 0;
13117   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13118     {
13119       mp->offset = offset;
13120       if (mp->refcount > 0)
13121 	offset += mp->fix_size;
13122 
13123       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
13124 	mp->next->min_address = mp->min_address + mp->fix_size;
13125     }
13126 
13127   return min_mp;
13128 }
13129 
13130 /* Add a constant to the minipool for a backward reference.  Returns the
13131    node added or NULL if the constant will not fit in this pool.
13132 
13133    Note that the code for insertion for a backwards reference can be
13134    somewhat confusing because the calculated offsets for each fix do
13135    not take into account the size of the pool (which is still under
13136    construction.  */
13137 static Mnode *
13138 add_minipool_backward_ref (Mfix *fix)
13139 {
13140   /* If set, min_mp is the last pool_entry that has a lower constraint
13141      than the one we are trying to add.  */
13142   Mnode *min_mp = NULL;
13143   /* This can be negative, since it is only a constraint.  */
13144   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
13145   Mnode *mp;
13146 
13147   /* If we can't reach the current pool from this insn, or if we can't
13148      insert this entry at the end of the pool without pushing other
13149      fixes out of range, then we don't try.  This ensures that we
13150      can't fail later on.  */
13151   if (min_address >= minipool_barrier->address
13152       || (minipool_vector_tail->min_address + fix->fix_size
13153 	  >= minipool_barrier->address))
13154     return NULL;
13155 
13156   /* Scan the pool to see if a constant with the same value has
13157      already been added.  While we are doing this, also note the
13158      location where we must insert the constant if it doesn't already
13159      exist.  */
13160   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
13161     {
13162       if (GET_CODE (fix->value) == GET_CODE (mp->value)
13163 	  && fix->mode == mp->mode
13164 	  && (!LABEL_P (fix->value)
13165 	      || (CODE_LABEL_NUMBER (fix->value)
13166 		  == CODE_LABEL_NUMBER (mp->value)))
13167 	  && rtx_equal_p (fix->value, mp->value)
13168 	  /* Check that there is enough slack to move this entry to the
13169 	     end of the table (this is conservative).  */
13170 	  && (mp->max_address
13171 	      > (minipool_barrier->address
13172 		 + minipool_vector_tail->offset
13173 		 + minipool_vector_tail->fix_size)))
13174 	{
13175 	  mp->refcount++;
13176 	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
13177 	}
13178 
13179       if (min_mp != NULL)
13180 	mp->min_address += fix->fix_size;
13181       else
13182 	{
13183 	  /* Note the insertion point if necessary.  */
13184 	  if (mp->min_address < min_address)
13185 	    {
13186 	      /* For now, we do not allow the insertion of 8-byte alignment
13187 		 requiring nodes anywhere but at the start of the pool.  */
13188 	      if (ARM_DOUBLEWORD_ALIGN
13189 		  && fix->fix_size >= 8 && mp->fix_size < 8)
13190 		return NULL;
13191 	      else
13192 		min_mp = mp;
13193 	    }
13194 	  else if (mp->max_address
13195 		   < minipool_barrier->address + mp->offset + fix->fix_size)
13196 	    {
13197 	      /* Inserting before this entry would push the fix beyond
13198 		 its maximum address (which can happen if we have
13199 		 re-located a forwards fix); force the new fix to come
13200 		 after it.  */
13201 	      if (ARM_DOUBLEWORD_ALIGN
13202 		  && fix->fix_size >= 8 && mp->fix_size < 8)
13203 		return NULL;
13204 	      else
13205 		{
13206 		  min_mp = mp;
13207 		  min_address = mp->min_address + fix->fix_size;
13208 		}
13209 	    }
13210 	  /* Do not insert a non-8-byte aligned quantity before 8-byte
13211 	     aligned quantities.  */
13212 	  else if (ARM_DOUBLEWORD_ALIGN
13213 		   && fix->fix_size < 8
13214 		   && mp->fix_size >= 8)
13215 	    {
13216 	      min_mp = mp;
13217 	      min_address = mp->min_address + fix->fix_size;
13218 	    }
13219 	}
13220     }
13221 
13222   /* We need to create a new entry.  */
13223   mp = XNEW (Mnode);
13224   mp->fix_size = fix->fix_size;
13225   mp->mode = fix->mode;
13226   mp->value = fix->value;
13227   mp->refcount = 1;
13228   mp->max_address = minipool_barrier->address + 65536;
13229 
13230   mp->min_address = min_address;
13231 
13232   if (min_mp == NULL)
13233     {
13234       mp->prev = NULL;
13235       mp->next = minipool_vector_head;
13236 
13237       if (mp->next == NULL)
13238 	{
13239 	  minipool_vector_tail = mp;
13240 	  minipool_vector_label = gen_label_rtx ();
13241 	}
13242       else
13243 	mp->next->prev = mp;
13244 
13245       minipool_vector_head = mp;
13246     }
13247   else
13248     {
13249       mp->next = min_mp->next;
13250       mp->prev = min_mp;
13251       min_mp->next = mp;
13252 
13253       if (mp->next != NULL)
13254 	mp->next->prev = mp;
13255       else
13256 	minipool_vector_tail = mp;
13257     }
13258 
13259   /* Save the new entry.  */
13260   min_mp = mp;
13261 
13262   if (mp->prev)
13263     mp = mp->prev;
13264   else
13265     mp->offset = 0;
13266 
13267   /* Scan over the following entries and adjust their offsets.  */
13268   while (mp->next != NULL)
13269     {
13270       if (mp->next->min_address < mp->min_address + mp->fix_size)
13271 	mp->next->min_address = mp->min_address + mp->fix_size;
13272 
13273       if (mp->refcount)
13274 	mp->next->offset = mp->offset + mp->fix_size;
13275       else
13276 	mp->next->offset = mp->offset;
13277 
13278       mp = mp->next;
13279     }
13280 
13281   return min_mp;
13282 }
13283 
13284 static void
13285 assign_minipool_offsets (Mfix *barrier)
13286 {
13287   HOST_WIDE_INT offset = 0;
13288   Mnode *mp;
13289 
13290   minipool_barrier = barrier;
13291 
13292   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13293     {
13294       mp->offset = offset;
13295 
13296       if (mp->refcount > 0)
13297 	offset += mp->fix_size;
13298     }
13299 }
13300 
13301 /* Output the literal table */
13302 static void
13303 dump_minipool (rtx scan)
13304 {
13305   Mnode * mp;
13306   Mnode * nmp;
13307   int align64 = 0;
13308 
13309   if (ARM_DOUBLEWORD_ALIGN)
13310     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13311       if (mp->refcount > 0 && mp->fix_size >= 8)
13312 	{
13313 	  align64 = 1;
13314 	  break;
13315 	}
13316 
13317   if (dump_file)
13318     fprintf (dump_file,
13319 	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
13320 	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
13321 
13322   scan = emit_label_after (gen_label_rtx (), scan);
13323   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
13324   scan = emit_label_after (minipool_vector_label, scan);
13325 
13326   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
13327     {
13328       if (mp->refcount > 0)
13329 	{
13330 	  if (dump_file)
13331 	    {
13332 	      fprintf (dump_file,
13333 		       ";;  Offset %u, min %ld, max %ld ",
13334 		       (unsigned) mp->offset, (unsigned long) mp->min_address,
13335 		       (unsigned long) mp->max_address);
13336 	      arm_print_value (dump_file, mp->value);
13337 	      fputc ('\n', dump_file);
13338 	    }
13339 
13340 	  switch (GET_MODE_SIZE (mp->mode))
13341 	    {
13342 #ifdef HAVE_consttable_1
13343 	    case 1:
13344 	      scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
13345 	      break;
13346 
13347 #endif
13348 #ifdef HAVE_consttable_2
13349 	    case 2:
13350 	      scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
13351 	      break;
13352 
13353 #endif
13354 #ifdef HAVE_consttable_4
13355 	    case 4:
13356 	      scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
13357 	      break;
13358 
13359 #endif
13360 #ifdef HAVE_consttable_8
13361 	    case 8:
13362 	      scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
13363 	      break;
13364 
13365 #endif
13366 #ifdef HAVE_consttable_16
13367 	    case 16:
13368               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
13369               break;
13370 
13371 #endif
13372 	    default:
13373 	      gcc_unreachable ();
13374 	    }
13375 	}
13376 
13377       nmp = mp->next;
13378       free (mp);
13379     }
13380 
13381   minipool_vector_head = minipool_vector_tail = NULL;
13382   scan = emit_insn_after (gen_consttable_end (), scan);
13383   scan = emit_barrier_after (scan);
13384 }
13385 
13386 /* Return the cost of forcibly inserting a barrier after INSN.  */
13387 static int
13388 arm_barrier_cost (rtx insn)
13389 {
13390   /* Basing the location of the pool on the loop depth is preferable,
13391      but at the moment, the basic block information seems to be
13392      corrupt by this stage of the compilation.  */
13393   int base_cost = 50;
13394   rtx next = next_nonnote_insn (insn);
13395 
13396   if (next != NULL && LABEL_P (next))
13397     base_cost -= 20;
13398 
13399   switch (GET_CODE (insn))
13400     {
13401     case CODE_LABEL:
13402       /* It will always be better to place the table before the label, rather
13403 	 than after it.  */
13404       return 50;
13405 
13406     case INSN:
13407     case CALL_INSN:
13408       return base_cost;
13409 
13410     case JUMP_INSN:
13411       return base_cost - 10;
13412 
13413     default:
13414       return base_cost + 10;
13415     }
13416 }
13417 
13418 /* Find the best place in the insn stream in the range
13419    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13420    Create the barrier by inserting a jump and add a new fix entry for
13421    it.  */
13422 static Mfix *
13423 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
13424 {
13425   HOST_WIDE_INT count = 0;
13426   rtx barrier;
13427   rtx from = fix->insn;
13428   /* The instruction after which we will insert the jump.  */
13429   rtx selected = NULL;
13430   int selected_cost;
13431   /* The address at which the jump instruction will be placed.  */
13432   HOST_WIDE_INT selected_address;
13433   Mfix * new_fix;
13434   HOST_WIDE_INT max_count = max_address - fix->address;
13435   rtx label = gen_label_rtx ();
13436 
13437   selected_cost = arm_barrier_cost (from);
13438   selected_address = fix->address;
13439 
13440   while (from && count < max_count)
13441     {
13442       rtx tmp;
13443       int new_cost;
13444 
13445       /* This code shouldn't have been called if there was a natural barrier
13446 	 within range.  */
13447       gcc_assert (!BARRIER_P (from));
13448 
13449       /* Count the length of this insn.  This must stay in sync with the
13450 	 code that pushes minipool fixes.  */
13451       if (LABEL_P (from))
13452 	count += get_label_padding (from);
13453       else
13454 	count += get_attr_length (from);
13455 
13456       /* If there is a jump table, add its length.  */
13457       tmp = is_jump_table (from);
13458       if (tmp != NULL)
13459 	{
13460 	  count += get_jump_table_size (tmp);
13461 
13462 	  /* Jump tables aren't in a basic block, so base the cost on
13463 	     the dispatch insn.  If we select this location, we will
13464 	     still put the pool after the table.  */
13465 	  new_cost = arm_barrier_cost (from);
13466 
13467 	  if (count < max_count
13468 	      && (!selected || new_cost <= selected_cost))
13469 	    {
13470 	      selected = tmp;
13471 	      selected_cost = new_cost;
13472 	      selected_address = fix->address + count;
13473 	    }
13474 
13475 	  /* Continue after the dispatch table.  */
13476 	  from = NEXT_INSN (tmp);
13477 	  continue;
13478 	}
13479 
13480       new_cost = arm_barrier_cost (from);
13481 
13482       if (count < max_count
13483 	  && (!selected || new_cost <= selected_cost))
13484 	{
13485 	  selected = from;
13486 	  selected_cost = new_cost;
13487 	  selected_address = fix->address + count;
13488 	}
13489 
13490       from = NEXT_INSN (from);
13491     }
13492 
13493   /* Make sure that we found a place to insert the jump.  */
13494   gcc_assert (selected);
13495 
13496   /* Make sure we do not split a call and its corresponding
13497      CALL_ARG_LOCATION note.  */
13498   if (CALL_P (selected))
13499     {
13500       rtx next = NEXT_INSN (selected);
13501       if (next && NOTE_P (next)
13502 	  && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13503 	  selected = next;
13504     }
13505 
13506   /* Create a new JUMP_INSN that branches around a barrier.  */
13507   from = emit_jump_insn_after (gen_jump (label), selected);
13508   JUMP_LABEL (from) = label;
13509   barrier = emit_barrier_after (from);
13510   emit_label_after (label, barrier);
13511 
13512   /* Create a minipool barrier entry for the new barrier.  */
13513   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13514   new_fix->insn = barrier;
13515   new_fix->address = selected_address;
13516   new_fix->next = fix->next;
13517   fix->next = new_fix;
13518 
13519   return new_fix;
13520 }
13521 
13522 /* Record that there is a natural barrier in the insn stream at
13523    ADDRESS.  */
13524 static void
13525 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13526 {
13527   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13528 
13529   fix->insn = insn;
13530   fix->address = address;
13531 
13532   fix->next = NULL;
13533   if (minipool_fix_head != NULL)
13534     minipool_fix_tail->next = fix;
13535   else
13536     minipool_fix_head = fix;
13537 
13538   minipool_fix_tail = fix;
13539 }
13540 
13541 /* Record INSN, which will need fixing up to load a value from the
13542    minipool.  ADDRESS is the offset of the insn since the start of the
13543    function; LOC is a pointer to the part of the insn which requires
13544    fixing; VALUE is the constant that must be loaded, which is of type
13545    MODE.  */
13546 static void
13547 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13548 		   enum machine_mode mode, rtx value)
13549 {
13550   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13551 
13552   fix->insn = insn;
13553   fix->address = address;
13554   fix->loc = loc;
13555   fix->mode = mode;
13556   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13557   fix->value = value;
13558   fix->forwards = get_attr_pool_range (insn);
13559   fix->backwards = get_attr_neg_pool_range (insn);
13560   fix->minipool = NULL;
13561 
13562   /* If an insn doesn't have a range defined for it, then it isn't
13563      expecting to be reworked by this code.  Better to stop now than
13564      to generate duff assembly code.  */
13565   gcc_assert (fix->forwards || fix->backwards);
13566 
13567   /* If an entry requires 8-byte alignment then assume all constant pools
13568      require 4 bytes of padding.  Trying to do this later on a per-pool
13569      basis is awkward because existing pool entries have to be modified.  */
13570   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13571     minipool_pad = 4;
13572 
13573   if (dump_file)
13574     {
13575       fprintf (dump_file,
13576 	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13577 	       GET_MODE_NAME (mode),
13578 	       INSN_UID (insn), (unsigned long) address,
13579 	       -1 * (long)fix->backwards, (long)fix->forwards);
13580       arm_print_value (dump_file, fix->value);
13581       fprintf (dump_file, "\n");
13582     }
13583 
13584   /* Add it to the chain of fixes.  */
13585   fix->next = NULL;
13586 
13587   if (minipool_fix_head != NULL)
13588     minipool_fix_tail->next = fix;
13589   else
13590     minipool_fix_head = fix;
13591 
13592   minipool_fix_tail = fix;
13593 }
13594 
13595 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13596    Returns the number of insns needed, or 99 if we don't know how to
13597    do it.  */
13598 int
13599 arm_const_double_inline_cost (rtx val)
13600 {
13601   rtx lowpart, highpart;
13602   enum machine_mode mode;
13603 
13604   mode = GET_MODE (val);
13605 
13606   if (mode == VOIDmode)
13607     mode = DImode;
13608 
13609   gcc_assert (GET_MODE_SIZE (mode) == 8);
13610 
13611   lowpart = gen_lowpart (SImode, val);
13612   highpart = gen_highpart_mode (SImode, mode, val);
13613 
13614   gcc_assert (CONST_INT_P (lowpart));
13615   gcc_assert (CONST_INT_P (highpart));
13616 
13617   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13618 			    NULL_RTX, NULL_RTX, 0, 0)
13619 	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13620 			      NULL_RTX, NULL_RTX, 0, 0));
13621 }
13622 
13623 /* Return true if it is worthwhile to split a 64-bit constant into two
13624    32-bit operations.  This is the case if optimizing for size, or
13625    if we have load delay slots, or if one 32-bit part can be done with
13626    a single data operation.  */
13627 bool
13628 arm_const_double_by_parts (rtx val)
13629 {
13630   enum machine_mode mode = GET_MODE (val);
13631   rtx part;
13632 
13633   if (optimize_size || arm_ld_sched)
13634     return true;
13635 
13636   if (mode == VOIDmode)
13637     mode = DImode;
13638 
13639   part = gen_highpart_mode (SImode, mode, val);
13640 
13641   gcc_assert (CONST_INT_P (part));
13642 
13643   if (const_ok_for_arm (INTVAL (part))
13644       || const_ok_for_arm (~INTVAL (part)))
13645     return true;
13646 
13647   part = gen_lowpart (SImode, val);
13648 
13649   gcc_assert (CONST_INT_P (part));
13650 
13651   if (const_ok_for_arm (INTVAL (part))
13652       || const_ok_for_arm (~INTVAL (part)))
13653     return true;
13654 
13655   return false;
13656 }
13657 
13658 /* Return true if it is possible to inline both the high and low parts
13659    of a 64-bit constant into 32-bit data processing instructions.  */
13660 bool
13661 arm_const_double_by_immediates (rtx val)
13662 {
13663   enum machine_mode mode = GET_MODE (val);
13664   rtx part;
13665 
13666   if (mode == VOIDmode)
13667     mode = DImode;
13668 
13669   part = gen_highpart_mode (SImode, mode, val);
13670 
13671   gcc_assert (CONST_INT_P (part));
13672 
13673   if (!const_ok_for_arm (INTVAL (part)))
13674     return false;
13675 
13676   part = gen_lowpart (SImode, val);
13677 
13678   gcc_assert (CONST_INT_P (part));
13679 
13680   if (!const_ok_for_arm (INTVAL (part)))
13681     return false;
13682 
13683   return true;
13684 }
13685 
13686 /* Scan INSN and note any of its operands that need fixing.
13687    If DO_PUSHES is false we do not actually push any of the fixups
13688    needed.  */
13689 static void
13690 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13691 {
13692   int opno;
13693 
13694   extract_insn (insn);
13695 
13696   if (!constrain_operands (1))
13697     fatal_insn_not_found (insn);
13698 
13699   if (recog_data.n_alternatives == 0)
13700     return;
13701 
13702   /* Fill in recog_op_alt with information about the constraints of
13703      this insn.  */
13704   preprocess_constraints ();
13705 
13706   for (opno = 0; opno < recog_data.n_operands; opno++)
13707     {
13708       /* Things we need to fix can only occur in inputs.  */
13709       if (recog_data.operand_type[opno] != OP_IN)
13710 	continue;
13711 
13712       /* If this alternative is a memory reference, then any mention
13713 	 of constants in this alternative is really to fool reload
13714 	 into allowing us to accept one there.  We need to fix them up
13715 	 now so that we output the right code.  */
13716       if (recog_op_alt[opno][which_alternative].memory_ok)
13717 	{
13718 	  rtx op = recog_data.operand[opno];
13719 
13720 	  if (CONSTANT_P (op))
13721 	    {
13722 	      if (do_pushes)
13723 		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13724 				   recog_data.operand_mode[opno], op);
13725 	    }
13726 	  else if (MEM_P (op)
13727 		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13728 		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13729 	    {
13730 	      if (do_pushes)
13731 		{
13732 		  rtx cop = avoid_constant_pool_reference (op);
13733 
13734 		  /* Casting the address of something to a mode narrower
13735 		     than a word can cause avoid_constant_pool_reference()
13736 		     to return the pool reference itself.  That's no good to
13737 		     us here.  Lets just hope that we can use the
13738 		     constant pool value directly.  */
13739 		  if (op == cop)
13740 		    cop = get_pool_constant (XEXP (op, 0));
13741 
13742 		  push_minipool_fix (insn, address,
13743 				     recog_data.operand_loc[opno],
13744 				     recog_data.operand_mode[opno], cop);
13745 		}
13746 
13747 	    }
13748 	}
13749     }
13750 
13751   return;
13752 }
13753 
13754 /* Rewrite move insn into subtract of 0 if the condition codes will
13755    be useful in next conditional jump insn.  */
13756 
13757 static void
13758 thumb1_reorg (void)
13759 {
13760   basic_block bb;
13761 
13762   FOR_EACH_BB (bb)
13763     {
13764       rtx set, dest, src;
13765       rtx pat, op0;
13766       rtx prev, insn = BB_END (bb);
13767 
13768       while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
13769 	insn = PREV_INSN (insn);
13770 
13771       /* Find the last cbranchsi4_insn in basic block BB.  */
13772       if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
13773 	continue;
13774 
13775       /* Find the first non-note insn before INSN in basic block BB.  */
13776       gcc_assert (insn != BB_HEAD (bb));
13777       prev = PREV_INSN (insn);
13778       while (prev != BB_HEAD (bb) && (NOTE_P (prev) || DEBUG_INSN_P (prev)))
13779 	prev = PREV_INSN (prev);
13780 
13781       set = single_set (prev);
13782       if (!set)
13783 	continue;
13784 
13785       dest = SET_DEST (set);
13786       src = SET_SRC (set);
13787       if (!low_register_operand (dest, SImode)
13788 	  || !low_register_operand (src, SImode))
13789 	continue;
13790 
13791       pat = PATTERN (insn);
13792       op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
13793       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
13794 	 in INSN. Don't need to check dest since cprop_hardreg pass propagates
13795 	 src into INSN.  */
13796       if (REGNO (op0) == REGNO (src))
13797 	{
13798 	  dest = copy_rtx (dest);
13799 	  src = copy_rtx (src);
13800 	  src = gen_rtx_MINUS (SImode, src, const0_rtx);
13801 	  PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
13802 	  INSN_CODE (prev) = -1;
13803 	  /* Set test register in INSN to dest.  */
13804 	  XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
13805 	  INSN_CODE (insn) = -1;
13806 	}
13807     }
13808 }
13809 
13810 /* Convert instructions to their cc-clobbering variant if possible, since
13811    that allows us to use smaller encodings.  */
13812 
13813 static void
13814 thumb2_reorg (void)
13815 {
13816   basic_block bb;
13817   regset_head live;
13818 
13819   INIT_REG_SET (&live);
13820 
13821   /* We are freeing block_for_insn in the toplev to keep compatibility
13822      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
13823   compute_bb_for_insn ();
13824   df_analyze ();
13825 
13826   FOR_EACH_BB (bb)
13827     {
13828       rtx insn;
13829 
13830       COPY_REG_SET (&live, DF_LR_OUT (bb));
13831       df_simulate_initialize_backwards (bb, &live);
13832       FOR_BB_INSNS_REVERSE (bb, insn)
13833 	{
13834 	  if (NONJUMP_INSN_P (insn)
13835 	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
13836 	      && GET_CODE (PATTERN (insn)) == SET)
13837 	    {
13838 	      enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13839 	      rtx pat = PATTERN (insn);
13840 	      rtx dst = XEXP (pat, 0);
13841 	      rtx src = XEXP (pat, 1);
13842 	      rtx op0 = NULL_RTX, op1 = NULL_RTX;
13843 
13844 	      if (!OBJECT_P (src))
13845 		  op0 = XEXP (src, 0);
13846 
13847 	      if (BINARY_P (src))
13848 		  op1 = XEXP (src, 1);
13849 
13850 	      if (low_register_operand (dst, SImode))
13851 		{
13852 		  switch (GET_CODE (src))
13853 		    {
13854 		    case PLUS:
13855 		      /* Adding two registers and storing the result
13856 			 in the first source is already a 16-bit
13857 			 operation.  */
13858 		      if (rtx_equal_p (dst, op0)
13859 			  && register_operand (op1, SImode))
13860 			break;
13861 
13862 		      if (low_register_operand (op0, SImode))
13863 			{
13864 			  /* ADDS <Rd>,<Rn>,<Rm>  */
13865 			  if (low_register_operand (op1, SImode))
13866 			    action = CONV;
13867 			  /* ADDS <Rdn>,#<imm8>  */
13868 			  /* SUBS <Rdn>,#<imm8>  */
13869 			  else if (rtx_equal_p (dst, op0)
13870 				   && CONST_INT_P (op1)
13871 				   && IN_RANGE (INTVAL (op1), -255, 255))
13872 			    action = CONV;
13873 			  /* ADDS <Rd>,<Rn>,#<imm3>  */
13874 			  /* SUBS <Rd>,<Rn>,#<imm3>  */
13875 			  else if (CONST_INT_P (op1)
13876 				   && IN_RANGE (INTVAL (op1), -7, 7))
13877 			    action = CONV;
13878 			}
13879 		      break;
13880 
13881 		    case MINUS:
13882 		      /* RSBS <Rd>,<Rn>,#0
13883 			 Not handled here: see NEG below.  */
13884 		      /* SUBS <Rd>,<Rn>,#<imm3>
13885 			 SUBS <Rdn>,#<imm8>
13886 			 Not handled here: see PLUS above.  */
13887 		      /* SUBS <Rd>,<Rn>,<Rm>  */
13888 		      if (low_register_operand (op0, SImode)
13889 			  && low_register_operand (op1, SImode))
13890 			    action = CONV;
13891 		      break;
13892 
13893 		    case MULT:
13894 		      /* MULS <Rdm>,<Rn>,<Rdm>
13895 			 As an exception to the rule, this is only used
13896 			 when optimizing for size since MULS is slow on all
13897 			 known implementations.  We do not even want to use
13898 			 MULS in cold code, if optimizing for speed, so we
13899 			 test the global flag here.  */
13900 		      if (!optimize_size)
13901 			break;
13902 		      /* else fall through.  */
13903 		    case AND:
13904 		    case IOR:
13905 		    case XOR:
13906 		      /* ANDS <Rdn>,<Rm>  */
13907 		      if (rtx_equal_p (dst, op0)
13908 			  && low_register_operand (op1, SImode))
13909 			action = CONV;
13910 		      else if (rtx_equal_p (dst, op1)
13911 			       && low_register_operand (op0, SImode))
13912 			action = SWAP_CONV;
13913 		      break;
13914 
13915 		    case ASHIFTRT:
13916 		    case ASHIFT:
13917 		    case LSHIFTRT:
13918 		      /* ASRS <Rdn>,<Rm> */
13919 		      /* LSRS <Rdn>,<Rm> */
13920 		      /* LSLS <Rdn>,<Rm> */
13921 		      if (rtx_equal_p (dst, op0)
13922 			  && low_register_operand (op1, SImode))
13923 			action = CONV;
13924 		      /* ASRS <Rd>,<Rm>,#<imm5> */
13925 		      /* LSRS <Rd>,<Rm>,#<imm5> */
13926 		      /* LSLS <Rd>,<Rm>,#<imm5> */
13927 		      else if (low_register_operand (op0, SImode)
13928 			       && CONST_INT_P (op1)
13929 			       && IN_RANGE (INTVAL (op1), 0, 31))
13930 			action = CONV;
13931 		      break;
13932 
13933 		    case ROTATERT:
13934 		      /* RORS <Rdn>,<Rm>  */
13935 		      if (rtx_equal_p (dst, op0)
13936 			  && low_register_operand (op1, SImode))
13937 			action = CONV;
13938 		      break;
13939 
13940 		    case NOT:
13941 		    case NEG:
13942 		      /* MVNS <Rd>,<Rm>  */
13943 		      /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
13944 		      if (low_register_operand (op0, SImode))
13945 			action = CONV;
13946 		      break;
13947 
13948 		    case CONST_INT:
13949 		      /* MOVS <Rd>,#<imm8>  */
13950 		      if (CONST_INT_P (src)
13951 			  && IN_RANGE (INTVAL (src), 0, 255))
13952 			action = CONV;
13953 		      break;
13954 
13955 		    case REG:
13956 		      /* MOVS and MOV<c> with registers have different
13957 			 encodings, so are not relevant here.  */
13958 		      break;
13959 
13960 		    default:
13961 		      break;
13962 		    }
13963 		}
13964 
13965 	      if (action != SKIP)
13966 		{
13967 		  rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13968 		  rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13969 		  rtvec vec;
13970 
13971 		  if (action == SWAP_CONV)
13972 		    {
13973 		      src = copy_rtx (src);
13974 		      XEXP (src, 0) = op1;
13975 		      XEXP (src, 1) = op0;
13976 		      pat = gen_rtx_SET (VOIDmode, dst, src);
13977 		      vec = gen_rtvec (2, pat, clobber);
13978 		    }
13979 		  else /* action == CONV */
13980 		    vec = gen_rtvec (2, pat, clobber);
13981 
13982 		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13983 		  INSN_CODE (insn) = -1;
13984 		}
13985 	    }
13986 
13987 	  if (NONDEBUG_INSN_P (insn))
13988 	    df_simulate_one_insn_backwards (bb, insn, &live);
13989 	}
13990     }
13991 
13992   CLEAR_REG_SET (&live);
13993 }
13994 
13995 /* Gcc puts the pool in the wrong place for ARM, since we can only
13996    load addresses a limited distance around the pc.  We do some
13997    special munging to move the constant pool values to the correct
13998    point in the code.  */
13999 static void
14000 arm_reorg (void)
14001 {
14002   rtx insn;
14003   HOST_WIDE_INT address = 0;
14004   Mfix * fix;
14005 
14006   if (TARGET_THUMB1)
14007     thumb1_reorg ();
14008   else if (TARGET_THUMB2)
14009     thumb2_reorg ();
14010 
14011   /* Ensure all insns that must be split have been split at this point.
14012      Otherwise, the pool placement code below may compute incorrect
14013      insn lengths.  Note that when optimizing, all insns have already
14014      been split at this point.  */
14015   if (!optimize)
14016     split_all_insns_noflow ();
14017 
14018   minipool_fix_head = minipool_fix_tail = NULL;
14019 
14020   /* The first insn must always be a note, or the code below won't
14021      scan it properly.  */
14022   insn = get_insns ();
14023   gcc_assert (NOTE_P (insn));
14024   minipool_pad = 0;
14025 
14026   /* Scan all the insns and record the operands that will need fixing.  */
14027   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
14028     {
14029       if (BARRIER_P (insn))
14030 	push_minipool_barrier (insn, address);
14031       else if (INSN_P (insn))
14032 	{
14033 	  rtx table;
14034 
14035 	  note_invalid_constants (insn, address, true);
14036 	  address += get_attr_length (insn);
14037 
14038 	  /* If the insn is a vector jump, add the size of the table
14039 	     and skip the table.  */
14040 	  if ((table = is_jump_table (insn)) != NULL)
14041 	    {
14042 	      address += get_jump_table_size (table);
14043 	      insn = table;
14044 	    }
14045 	}
14046       else if (LABEL_P (insn))
14047 	/* Add the worst-case padding due to alignment.  We don't add
14048 	   the _current_ padding because the minipool insertions
14049 	   themselves might change it.  */
14050 	address += get_label_padding (insn);
14051     }
14052 
14053   fix = minipool_fix_head;
14054 
14055   /* Now scan the fixups and perform the required changes.  */
14056   while (fix)
14057     {
14058       Mfix * ftmp;
14059       Mfix * fdel;
14060       Mfix *  last_added_fix;
14061       Mfix * last_barrier = NULL;
14062       Mfix * this_fix;
14063 
14064       /* Skip any further barriers before the next fix.  */
14065       while (fix && BARRIER_P (fix->insn))
14066 	fix = fix->next;
14067 
14068       /* No more fixes.  */
14069       if (fix == NULL)
14070 	break;
14071 
14072       last_added_fix = NULL;
14073 
14074       for (ftmp = fix; ftmp; ftmp = ftmp->next)
14075 	{
14076 	  if (BARRIER_P (ftmp->insn))
14077 	    {
14078 	      if (ftmp->address >= minipool_vector_head->max_address)
14079 		break;
14080 
14081 	      last_barrier = ftmp;
14082 	    }
14083 	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
14084 	    break;
14085 
14086 	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
14087 	}
14088 
14089       /* If we found a barrier, drop back to that; any fixes that we
14090 	 could have reached but come after the barrier will now go in
14091 	 the next mini-pool.  */
14092       if (last_barrier != NULL)
14093 	{
14094 	  /* Reduce the refcount for those fixes that won't go into this
14095 	     pool after all.  */
14096 	  for (fdel = last_barrier->next;
14097 	       fdel && fdel != ftmp;
14098 	       fdel = fdel->next)
14099 	    {
14100 	      fdel->minipool->refcount--;
14101 	      fdel->minipool = NULL;
14102 	    }
14103 
14104 	  ftmp = last_barrier;
14105 	}
14106       else
14107         {
14108 	  /* ftmp is first fix that we can't fit into this pool and
14109 	     there no natural barriers that we could use.  Insert a
14110 	     new barrier in the code somewhere between the previous
14111 	     fix and this one, and arrange to jump around it.  */
14112 	  HOST_WIDE_INT max_address;
14113 
14114 	  /* The last item on the list of fixes must be a barrier, so
14115 	     we can never run off the end of the list of fixes without
14116 	     last_barrier being set.  */
14117 	  gcc_assert (ftmp);
14118 
14119 	  max_address = minipool_vector_head->max_address;
14120 	  /* Check that there isn't another fix that is in range that
14121 	     we couldn't fit into this pool because the pool was
14122 	     already too large: we need to put the pool before such an
14123 	     instruction.  The pool itself may come just after the
14124 	     fix because create_fix_barrier also allows space for a
14125 	     jump instruction.  */
14126 	  if (ftmp->address < max_address)
14127 	    max_address = ftmp->address + 1;
14128 
14129 	  last_barrier = create_fix_barrier (last_added_fix, max_address);
14130 	}
14131 
14132       assign_minipool_offsets (last_barrier);
14133 
14134       while (ftmp)
14135 	{
14136 	  if (!BARRIER_P (ftmp->insn)
14137 	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
14138 		  == NULL))
14139 	    break;
14140 
14141 	  ftmp = ftmp->next;
14142 	}
14143 
14144       /* Scan over the fixes we have identified for this pool, fixing them
14145 	 up and adding the constants to the pool itself.  */
14146       for (this_fix = fix; this_fix && ftmp != this_fix;
14147 	   this_fix = this_fix->next)
14148 	if (!BARRIER_P (this_fix->insn))
14149 	  {
14150 	    rtx addr
14151 	      = plus_constant (Pmode,
14152 			       gen_rtx_LABEL_REF (VOIDmode,
14153 						  minipool_vector_label),
14154 			       this_fix->minipool->offset);
14155 	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
14156 	  }
14157 
14158       dump_minipool (last_barrier->insn);
14159       fix = ftmp;
14160     }
14161 
14162   /* From now on we must synthesize any constants that we can't handle
14163      directly.  This can happen if the RTL gets split during final
14164      instruction generation.  */
14165   after_arm_reorg = 1;
14166 
14167   /* Free the minipool memory.  */
14168   obstack_free (&minipool_obstack, minipool_startobj);
14169 }
14170 
14171 /* Routines to output assembly language.  */
14172 
14173 /* If the rtx is the correct value then return the string of the number.
14174    In this way we can ensure that valid double constants are generated even
14175    when cross compiling.  */
14176 const char *
14177 fp_immediate_constant (rtx x)
14178 {
14179   REAL_VALUE_TYPE r;
14180 
14181   if (!fp_consts_inited)
14182     init_fp_table ();
14183 
14184   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14185 
14186   gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
14187   return "0";
14188 }
14189 
14190 /* As for fp_immediate_constant, but value is passed directly, not in rtx.  */
14191 static const char *
14192 fp_const_from_val (REAL_VALUE_TYPE *r)
14193 {
14194   if (!fp_consts_inited)
14195     init_fp_table ();
14196 
14197   gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
14198   return "0";
14199 }
14200 
14201 /* OPERANDS[0] is the entire list of insns that constitute pop,
14202    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
14203    is in the list, UPDATE is true iff the list contains explicit
14204    update of base register.  */
14205 void
14206 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
14207                          bool update)
14208 {
14209   int i;
14210   char pattern[100];
14211   int offset;
14212   const char *conditional;
14213   int num_saves = XVECLEN (operands[0], 0);
14214   unsigned int regno;
14215   unsigned int regno_base = REGNO (operands[1]);
14216 
14217   offset = 0;
14218   offset += update ? 1 : 0;
14219   offset += return_pc ? 1 : 0;
14220 
14221   /* Is the base register in the list?  */
14222   for (i = offset; i < num_saves; i++)
14223     {
14224       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
14225       /* If SP is in the list, then the base register must be SP.  */
14226       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
14227       /* If base register is in the list, there must be no explicit update.  */
14228       if (regno == regno_base)
14229         gcc_assert (!update);
14230     }
14231 
14232   conditional = reverse ? "%?%D0" : "%?%d0";
14233   if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
14234     {
14235       /* Output pop (not stmfd) because it has a shorter encoding.  */
14236       gcc_assert (update);
14237       sprintf (pattern, "pop%s\t{", conditional);
14238     }
14239   else
14240     {
14241       /* Output ldmfd when the base register is SP, otherwise output ldmia.
14242          It's just a convention, their semantics are identical.  */
14243       if (regno_base == SP_REGNUM)
14244         sprintf (pattern, "ldm%sfd\t", conditional);
14245       else if (TARGET_UNIFIED_ASM)
14246         sprintf (pattern, "ldmia%s\t", conditional);
14247       else
14248         sprintf (pattern, "ldm%sia\t", conditional);
14249 
14250       strcat (pattern, reg_names[regno_base]);
14251       if (update)
14252         strcat (pattern, "!, {");
14253       else
14254         strcat (pattern, ", {");
14255     }
14256 
14257   /* Output the first destination register.  */
14258   strcat (pattern,
14259           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
14260 
14261   /* Output the rest of the destination registers.  */
14262   for (i = offset + 1; i < num_saves; i++)
14263     {
14264       strcat (pattern, ", ");
14265       strcat (pattern,
14266               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
14267     }
14268 
14269   strcat (pattern, "}");
14270 
14271   if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
14272     strcat (pattern, "^");
14273 
14274   output_asm_insn (pattern, &cond);
14275 }
14276 
14277 
14278 /* Output the assembly for a store multiple.  */
14279 
14280 const char *
14281 vfp_output_fstmd (rtx * operands)
14282 {
14283   char pattern[100];
14284   int p;
14285   int base;
14286   int i;
14287 
14288   strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
14289   p = strlen (pattern);
14290 
14291   gcc_assert (REG_P (operands[1]));
14292 
14293   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
14294   for (i = 1; i < XVECLEN (operands[2], 0); i++)
14295     {
14296       p += sprintf (&pattern[p], ", d%d", base + i);
14297     }
14298   strcpy (&pattern[p], "}");
14299 
14300   output_asm_insn (pattern, operands);
14301   return "";
14302 }
14303 
14304 
14305 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
14306    number of bytes pushed.  */
14307 
14308 static int
14309 vfp_emit_fstmd (int base_reg, int count)
14310 {
14311   rtx par;
14312   rtx dwarf;
14313   rtx tmp, reg;
14314   int i;
14315 
14316   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
14317      register pairs are stored by a store multiple insn.  We avoid this
14318      by pushing an extra pair.  */
14319   if (count == 2 && !arm_arch6)
14320     {
14321       if (base_reg == LAST_VFP_REGNUM - 3)
14322 	base_reg -= 2;
14323       count++;
14324     }
14325 
14326   /* FSTMD may not store more than 16 doubleword registers at once.  Split
14327      larger stores into multiple parts (up to a maximum of two, in
14328      practice).  */
14329   if (count > 16)
14330     {
14331       int saved;
14332       /* NOTE: base_reg is an internal register number, so each D register
14333          counts as 2.  */
14334       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
14335       saved += vfp_emit_fstmd (base_reg, 16);
14336       return saved;
14337     }
14338 
14339   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14340   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14341 
14342   reg = gen_rtx_REG (DFmode, base_reg);
14343   base_reg += 2;
14344 
14345   XVECEXP (par, 0, 0)
14346     = gen_rtx_SET (VOIDmode,
14347 		   gen_frame_mem
14348 		   (BLKmode,
14349 		    gen_rtx_PRE_MODIFY (Pmode,
14350 					stack_pointer_rtx,
14351 					plus_constant
14352 					(Pmode, stack_pointer_rtx,
14353 					 - (count * 8)))
14354 		    ),
14355 		   gen_rtx_UNSPEC (BLKmode,
14356 				   gen_rtvec (1, reg),
14357 				   UNSPEC_PUSH_MULT));
14358 
14359   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14360 		     plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
14361   RTX_FRAME_RELATED_P (tmp) = 1;
14362   XVECEXP (dwarf, 0, 0) = tmp;
14363 
14364   tmp = gen_rtx_SET (VOIDmode,
14365 		     gen_frame_mem (DFmode, stack_pointer_rtx),
14366 		     reg);
14367   RTX_FRAME_RELATED_P (tmp) = 1;
14368   XVECEXP (dwarf, 0, 1) = tmp;
14369 
14370   for (i = 1; i < count; i++)
14371     {
14372       reg = gen_rtx_REG (DFmode, base_reg);
14373       base_reg += 2;
14374       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14375 
14376       tmp = gen_rtx_SET (VOIDmode,
14377 			 gen_frame_mem (DFmode,
14378 					plus_constant (Pmode,
14379 						       stack_pointer_rtx,
14380 						       i * 8)),
14381 			 reg);
14382       RTX_FRAME_RELATED_P (tmp) = 1;
14383       XVECEXP (dwarf, 0, i + 1) = tmp;
14384     }
14385 
14386   par = emit_insn (par);
14387   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14388   RTX_FRAME_RELATED_P (par) = 1;
14389 
14390   return count * 8;
14391 }
14392 
14393 /* Emit a call instruction with pattern PAT.  ADDR is the address of
14394    the call target.  */
14395 
14396 void
14397 arm_emit_call_insn (rtx pat, rtx addr)
14398 {
14399   rtx insn;
14400 
14401   insn = emit_call_insn (pat);
14402 
14403   /* The PIC register is live on entry to VxWorks PIC PLT entries.
14404      If the call might use such an entry, add a use of the PIC register
14405      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
14406   if (TARGET_VXWORKS_RTP
14407       && flag_pic
14408       && GET_CODE (addr) == SYMBOL_REF
14409       && (SYMBOL_REF_DECL (addr)
14410 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
14411 	  : !SYMBOL_REF_LOCAL_P (addr)))
14412     {
14413       require_pic_register ();
14414       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
14415     }
14416 }
14417 
14418 /* Output a 'call' insn.  */
14419 const char *
14420 output_call (rtx *operands)
14421 {
14422   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
14423 
14424   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
14425   if (REGNO (operands[0]) == LR_REGNUM)
14426     {
14427       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
14428       output_asm_insn ("mov%?\t%0, %|lr", operands);
14429     }
14430 
14431   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14432 
14433   if (TARGET_INTERWORK || arm_arch4t)
14434     output_asm_insn ("bx%?\t%0", operands);
14435   else
14436     output_asm_insn ("mov%?\t%|pc, %0", operands);
14437 
14438   return "";
14439 }
14440 
14441 /* Output a 'call' insn that is a reference in memory. This is
14442    disabled for ARMv5 and we prefer a blx instead because otherwise
14443    there's a significant performance overhead.  */
14444 const char *
14445 output_call_mem (rtx *operands)
14446 {
14447   gcc_assert (!arm_arch5);
14448   if (TARGET_INTERWORK)
14449     {
14450       output_asm_insn ("ldr%?\t%|ip, %0", operands);
14451       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14452       output_asm_insn ("bx%?\t%|ip", operands);
14453     }
14454   else if (regno_use_in (LR_REGNUM, operands[0]))
14455     {
14456       /* LR is used in the memory address.  We load the address in the
14457 	 first instruction.  It's safe to use IP as the target of the
14458 	 load since the call will kill it anyway.  */
14459       output_asm_insn ("ldr%?\t%|ip, %0", operands);
14460       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14461       if (arm_arch4t)
14462 	output_asm_insn ("bx%?\t%|ip", operands);
14463       else
14464 	output_asm_insn ("mov%?\t%|pc, %|ip", operands);
14465     }
14466   else
14467     {
14468       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14469       output_asm_insn ("ldr%?\t%|pc, %0", operands);
14470     }
14471 
14472   return "";
14473 }
14474 
14475 
14476 /* Output a move from arm registers to arm registers of a long double
14477    OPERANDS[0] is the destination.
14478    OPERANDS[1] is the source.  */
14479 const char *
14480 output_mov_long_double_arm_from_arm (rtx *operands)
14481 {
14482   /* We have to be careful here because the two might overlap.  */
14483   int dest_start = REGNO (operands[0]);
14484   int src_start = REGNO (operands[1]);
14485   rtx ops[2];
14486   int i;
14487 
14488   if (dest_start < src_start)
14489     {
14490       for (i = 0; i < 3; i++)
14491 	{
14492 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
14493 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
14494 	  output_asm_insn ("mov%?\t%0, %1", ops);
14495 	}
14496     }
14497   else
14498     {
14499       for (i = 2; i >= 0; i--)
14500 	{
14501 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
14502 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
14503 	  output_asm_insn ("mov%?\t%0, %1", ops);
14504 	}
14505     }
14506 
14507   return "";
14508 }
14509 
14510 void
14511 arm_emit_movpair (rtx dest, rtx src)
14512  {
14513   /* If the src is an immediate, simplify it.  */
14514   if (CONST_INT_P (src))
14515     {
14516       HOST_WIDE_INT val = INTVAL (src);
14517       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
14518       if ((val >> 16) & 0x0000ffff)
14519         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
14520                                              GEN_INT (16)),
14521                        GEN_INT ((val >> 16) & 0x0000ffff));
14522       return;
14523     }
14524    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
14525    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
14526  }
14527 
14528 /* Output a move between double words.  It must be REG<-MEM
14529    or MEM<-REG.  */
14530 const char *
14531 output_move_double (rtx *operands, bool emit, int *count)
14532 {
14533   enum rtx_code code0 = GET_CODE (operands[0]);
14534   enum rtx_code code1 = GET_CODE (operands[1]);
14535   rtx otherops[3];
14536   if (count)
14537     *count = 1;
14538 
14539   /* The only case when this might happen is when
14540      you are looking at the length of a DImode instruction
14541      that has an invalid constant in it.  */
14542   if (code0 == REG && code1 != MEM)
14543     {
14544       gcc_assert (!emit);
14545       *count = 2;
14546       return "";
14547     }
14548 
14549   if (code0 == REG)
14550     {
14551       unsigned int reg0 = REGNO (operands[0]);
14552 
14553       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
14554 
14555       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
14556 
14557       switch (GET_CODE (XEXP (operands[1], 0)))
14558 	{
14559 	case REG:
14560 
14561 	  if (emit)
14562 	    {
14563 	      if (TARGET_LDRD
14564 		  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
14565 		output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
14566 	      else
14567 		output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14568 	    }
14569 	  break;
14570 
14571 	case PRE_INC:
14572 	  gcc_assert (TARGET_LDRD);
14573 	  if (emit)
14574 	    output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
14575 	  break;
14576 
14577 	case PRE_DEC:
14578 	  if (emit)
14579 	    {
14580 	      if (TARGET_LDRD)
14581 		output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
14582 	      else
14583 		output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14584 	    }
14585 	  break;
14586 
14587 	case POST_INC:
14588 	  if (emit)
14589 	    {
14590 	      if (TARGET_LDRD)
14591 		output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14592 	      else
14593 		output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14594 	    }
14595 	  break;
14596 
14597 	case POST_DEC:
14598 	  gcc_assert (TARGET_LDRD);
14599 	  if (emit)
14600 	    output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14601 	  break;
14602 
14603 	case PRE_MODIFY:
14604 	case POST_MODIFY:
14605 	  /* Autoicrement addressing modes should never have overlapping
14606 	     base and destination registers, and overlapping index registers
14607 	     are already prohibited, so this doesn't need to worry about
14608 	     fix_cm3_ldrd.  */
14609 	  otherops[0] = operands[0];
14610 	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14611 	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14612 
14613 	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14614 	    {
14615 	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14616 		{
14617 		  /* Registers overlap so split out the increment.  */
14618 		  if (emit)
14619 		    {
14620 		      output_asm_insn ("add%?\t%1, %1, %2", otherops);
14621 		      output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14622 		    }
14623 		  if (count)
14624 		    *count = 2;
14625 		}
14626 	      else
14627 		{
14628 		  /* Use a single insn if we can.
14629 		     FIXME: IWMMXT allows offsets larger than ldrd can
14630 		     handle, fix these up with a pair of ldr.  */
14631 		  if (TARGET_THUMB2
14632 		      || !CONST_INT_P (otherops[2])
14633 		      || (INTVAL (otherops[2]) > -256
14634 			  && INTVAL (otherops[2]) < 256))
14635 		    {
14636 		      if (emit)
14637 			output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14638 		    }
14639 		  else
14640 		    {
14641 		      if (emit)
14642 			{
14643 			  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14644 			  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14645 			}
14646 		      if (count)
14647 			*count = 2;
14648 
14649 		    }
14650 		}
14651 	    }
14652 	  else
14653 	    {
14654 	      /* Use a single insn if we can.
14655 		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14656 		 fix these up with a pair of ldr.  */
14657 	      if (TARGET_THUMB2
14658 		  || !CONST_INT_P (otherops[2])
14659 		  || (INTVAL (otherops[2]) > -256
14660 		      && INTVAL (otherops[2]) < 256))
14661 		{
14662 		  if (emit)
14663 		    output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14664 		}
14665 	      else
14666 		{
14667 		  if (emit)
14668 		    {
14669 		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14670 		      output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14671 		    }
14672 		  if (count)
14673 		    *count = 2;
14674 		}
14675 	    }
14676 	  break;
14677 
14678 	case LABEL_REF:
14679 	case CONST:
14680 	  /* We might be able to use ldrd %0, %1 here.  However the range is
14681 	     different to ldr/adr, and it is broken on some ARMv7-M
14682 	     implementations.  */
14683 	  /* Use the second register of the pair to avoid problematic
14684 	     overlap.  */
14685 	  otherops[1] = operands[1];
14686 	  if (emit)
14687 	    output_asm_insn ("adr%?\t%0, %1", otherops);
14688 	  operands[1] = otherops[0];
14689 	  if (emit)
14690 	    {
14691 	      if (TARGET_LDRD)
14692 		output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14693 	      else
14694 		output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14695 	    }
14696 
14697 	  if (count)
14698 	    *count = 2;
14699 	  break;
14700 
14701 	  /* ??? This needs checking for thumb2.  */
14702 	default:
14703 	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14704 			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14705 	    {
14706 	      otherops[0] = operands[0];
14707 	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14708 	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14709 
14710 	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14711 		{
14712 		  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14713 		    {
14714 		      switch ((int) INTVAL (otherops[2]))
14715 			{
14716 			case -8:
14717 			  if (emit)
14718 			    output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14719 			  return "";
14720 			case -4:
14721 			  if (TARGET_THUMB2)
14722 			    break;
14723 			  if (emit)
14724 			    output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14725 			  return "";
14726 			case 4:
14727 			  if (TARGET_THUMB2)
14728 			    break;
14729 			  if (emit)
14730 			    output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14731 			  return "";
14732 			}
14733 		    }
14734 		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14735 		  operands[1] = otherops[0];
14736 		  if (TARGET_LDRD
14737 		      && (REG_P (otherops[2])
14738 			  || TARGET_THUMB2
14739 			  || (CONST_INT_P (otherops[2])
14740 			      && INTVAL (otherops[2]) > -256
14741 			      && INTVAL (otherops[2]) < 256)))
14742 		    {
14743 		      if (reg_overlap_mentioned_p (operands[0],
14744 						   otherops[2]))
14745 			{
14746 			  rtx tmp;
14747 			  /* Swap base and index registers over to
14748 			     avoid a conflict.  */
14749 			  tmp = otherops[1];
14750 			  otherops[1] = otherops[2];
14751 			  otherops[2] = tmp;
14752 			}
14753 		      /* If both registers conflict, it will usually
14754 			 have been fixed by a splitter.  */
14755 		      if (reg_overlap_mentioned_p (operands[0], otherops[2])
14756 			  || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14757 			{
14758 			  if (emit)
14759 			    {
14760 			      output_asm_insn ("add%?\t%0, %1, %2", otherops);
14761 			      output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14762 			    }
14763 			  if (count)
14764 			    *count = 2;
14765 			}
14766 		      else
14767 			{
14768 			  otherops[0] = operands[0];
14769 			  if (emit)
14770 			    output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14771 			}
14772 		      return "";
14773 		    }
14774 
14775 		  if (CONST_INT_P (otherops[2]))
14776 		    {
14777 		      if (emit)
14778 			{
14779 			  if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14780 			    output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14781 			  else
14782 			    output_asm_insn ("add%?\t%0, %1, %2", otherops);
14783 			}
14784 		    }
14785 		  else
14786 		    {
14787 		      if (emit)
14788 			output_asm_insn ("add%?\t%0, %1, %2", otherops);
14789 		    }
14790 		}
14791 	      else
14792 		{
14793 		  if (emit)
14794 		    output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14795 		}
14796 
14797 	      if (count)
14798 		*count = 2;
14799 
14800 	      if (TARGET_LDRD)
14801 		return "ldr%(d%)\t%0, [%1]";
14802 
14803 	      return "ldm%(ia%)\t%1, %M0";
14804 	    }
14805 	  else
14806 	    {
14807 	      otherops[1] = adjust_address (operands[1], SImode, 4);
14808 	      /* Take care of overlapping base/data reg.  */
14809 	      if (reg_mentioned_p (operands[0], operands[1]))
14810 		{
14811 		  if (emit)
14812 		    {
14813 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
14814 		      output_asm_insn ("ldr%?\t%0, %1", operands);
14815 		    }
14816 		  if (count)
14817 		    *count = 2;
14818 
14819 		}
14820 	      else
14821 		{
14822 		  if (emit)
14823 		    {
14824 		      output_asm_insn ("ldr%?\t%0, %1", operands);
14825 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
14826 		    }
14827 		  if (count)
14828 		    *count = 2;
14829 		}
14830 	    }
14831 	}
14832     }
14833   else
14834     {
14835       /* Constraints should ensure this.  */
14836       gcc_assert (code0 == MEM && code1 == REG);
14837       gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14838 
14839       switch (GET_CODE (XEXP (operands[0], 0)))
14840         {
14841 	case REG:
14842 	  if (emit)
14843 	    {
14844 	      if (TARGET_LDRD)
14845 		output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14846 	      else
14847 		output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14848 	    }
14849 	  break;
14850 
14851         case PRE_INC:
14852 	  gcc_assert (TARGET_LDRD);
14853 	  if (emit)
14854 	    output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14855 	  break;
14856 
14857         case PRE_DEC:
14858 	  if (emit)
14859 	    {
14860 	      if (TARGET_LDRD)
14861 		output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14862 	      else
14863 		output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14864 	    }
14865 	  break;
14866 
14867         case POST_INC:
14868 	  if (emit)
14869 	    {
14870 	      if (TARGET_LDRD)
14871 		output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14872 	      else
14873 		output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14874 	    }
14875 	  break;
14876 
14877         case POST_DEC:
14878 	  gcc_assert (TARGET_LDRD);
14879 	  if (emit)
14880 	    output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14881 	  break;
14882 
14883 	case PRE_MODIFY:
14884 	case POST_MODIFY:
14885 	  otherops[0] = operands[1];
14886 	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14887 	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14888 
14889 	  /* IWMMXT allows offsets larger than ldrd can handle,
14890 	     fix these up with a pair of ldr.  */
14891 	  if (!TARGET_THUMB2
14892 	      && CONST_INT_P (otherops[2])
14893 	      && (INTVAL(otherops[2]) <= -256
14894 		  || INTVAL(otherops[2]) >= 256))
14895 	    {
14896 	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14897 		{
14898 		  if (emit)
14899 		    {
14900 		      output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14901 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14902 		    }
14903 		  if (count)
14904 		    *count = 2;
14905 		}
14906 	      else
14907 		{
14908 		  if (emit)
14909 		    {
14910 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14911 		      output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14912 		    }
14913 		  if (count)
14914 		    *count = 2;
14915 		}
14916 	    }
14917 	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14918 	    {
14919 	      if (emit)
14920 		output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14921 	    }
14922 	  else
14923 	    {
14924 	      if (emit)
14925 		output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14926 	    }
14927 	  break;
14928 
14929 	case PLUS:
14930 	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14931 	  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14932 	    {
14933 	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14934 		{
14935 		case -8:
14936 		  if (emit)
14937 		    output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14938 		  return "";
14939 
14940 		case -4:
14941 		  if (TARGET_THUMB2)
14942 		    break;
14943 		  if (emit)
14944 		    output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14945 		  return "";
14946 
14947 		case 4:
14948 		  if (TARGET_THUMB2)
14949 		    break;
14950 		  if (emit)
14951 		    output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14952 		  return "";
14953 		}
14954 	    }
14955 	  if (TARGET_LDRD
14956 	      && (REG_P (otherops[2])
14957 		  || TARGET_THUMB2
14958 		  || (CONST_INT_P (otherops[2])
14959 		      && INTVAL (otherops[2]) > -256
14960 		      && INTVAL (otherops[2]) < 256)))
14961 	    {
14962 	      otherops[0] = operands[1];
14963 	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14964 	      if (emit)
14965 		output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14966 	      return "";
14967 	    }
14968 	  /* Fall through */
14969 
14970         default:
14971 	  otherops[0] = adjust_address (operands[0], SImode, 4);
14972 	  otherops[1] = operands[1];
14973 	  if (emit)
14974 	    {
14975 	      output_asm_insn ("str%?\t%1, %0", operands);
14976 	      output_asm_insn ("str%?\t%H1, %0", otherops);
14977 	    }
14978 	  if (count)
14979 	    *count = 2;
14980 	}
14981     }
14982 
14983   return "";
14984 }
14985 
14986 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
14987    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
14988 
14989 const char *
14990 output_move_quad (rtx *operands)
14991 {
14992   if (REG_P (operands[0]))
14993     {
14994       /* Load, or reg->reg move.  */
14995 
14996       if (MEM_P (operands[1]))
14997         {
14998           switch (GET_CODE (XEXP (operands[1], 0)))
14999             {
15000             case REG:
15001               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
15002               break;
15003 
15004             case LABEL_REF:
15005             case CONST:
15006               output_asm_insn ("adr%?\t%0, %1", operands);
15007               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
15008               break;
15009 
15010             default:
15011               gcc_unreachable ();
15012             }
15013         }
15014       else
15015         {
15016           rtx ops[2];
15017           int dest, src, i;
15018 
15019           gcc_assert (REG_P (operands[1]));
15020 
15021           dest = REGNO (operands[0]);
15022           src = REGNO (operands[1]);
15023 
15024           /* This seems pretty dumb, but hopefully GCC won't try to do it
15025              very often.  */
15026           if (dest < src)
15027             for (i = 0; i < 4; i++)
15028               {
15029                 ops[0] = gen_rtx_REG (SImode, dest + i);
15030                 ops[1] = gen_rtx_REG (SImode, src + i);
15031                 output_asm_insn ("mov%?\t%0, %1", ops);
15032               }
15033           else
15034             for (i = 3; i >= 0; i--)
15035               {
15036                 ops[0] = gen_rtx_REG (SImode, dest + i);
15037                 ops[1] = gen_rtx_REG (SImode, src + i);
15038                 output_asm_insn ("mov%?\t%0, %1", ops);
15039               }
15040         }
15041     }
15042   else
15043     {
15044       gcc_assert (MEM_P (operands[0]));
15045       gcc_assert (REG_P (operands[1]));
15046       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
15047 
15048       switch (GET_CODE (XEXP (operands[0], 0)))
15049         {
15050         case REG:
15051           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15052           break;
15053 
15054         default:
15055           gcc_unreachable ();
15056         }
15057     }
15058 
15059   return "";
15060 }
15061 
15062 /* Output a VFP load or store instruction.  */
15063 
15064 const char *
15065 output_move_vfp (rtx *operands)
15066 {
15067   rtx reg, mem, addr, ops[2];
15068   int load = REG_P (operands[0]);
15069   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
15070   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
15071   const char *templ;
15072   char buff[50];
15073   enum machine_mode mode;
15074 
15075   reg = operands[!load];
15076   mem = operands[load];
15077 
15078   mode = GET_MODE (reg);
15079 
15080   gcc_assert (REG_P (reg));
15081   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
15082   gcc_assert (mode == SFmode
15083 	      || mode == DFmode
15084 	      || mode == SImode
15085 	      || mode == DImode
15086               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
15087   gcc_assert (MEM_P (mem));
15088 
15089   addr = XEXP (mem, 0);
15090 
15091   switch (GET_CODE (addr))
15092     {
15093     case PRE_DEC:
15094       templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
15095       ops[0] = XEXP (addr, 0);
15096       ops[1] = reg;
15097       break;
15098 
15099     case POST_INC:
15100       templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
15101       ops[0] = XEXP (addr, 0);
15102       ops[1] = reg;
15103       break;
15104 
15105     default:
15106       templ = "f%s%c%%?\t%%%s0, %%1%s";
15107       ops[0] = reg;
15108       ops[1] = mem;
15109       break;
15110     }
15111 
15112   sprintf (buff, templ,
15113 	   load ? "ld" : "st",
15114 	   dp ? 'd' : 's',
15115 	   dp ? "P" : "",
15116 	   integer_p ? "\t%@ int" : "");
15117   output_asm_insn (buff, ops);
15118 
15119   return "";
15120 }
15121 
15122 /* Output a Neon double-word or quad-word load or store, or a load
15123    or store for larger structure modes.
15124 
15125    WARNING: The ordering of elements is weird in big-endian mode,
15126    because the EABI requires that vectors stored in memory appear
15127    as though they were stored by a VSTM, as required by the EABI.
15128    GCC RTL defines element ordering based on in-memory order.
15129    This can be different from the architectural ordering of elements
15130    within a NEON register. The intrinsics defined in arm_neon.h use the
15131    NEON register element ordering, not the GCC RTL element ordering.
15132 
15133    For example, the in-memory ordering of a big-endian a quadword
15134    vector with 16-bit elements when stored from register pair {d0,d1}
15135    will be (lowest address first, d0[N] is NEON register element N):
15136 
15137      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
15138 
15139    When necessary, quadword registers (dN, dN+1) are moved to ARM
15140    registers from rN in the order:
15141 
15142      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
15143 
15144    So that STM/LDM can be used on vectors in ARM registers, and the
15145    same memory layout will result as if VSTM/VLDM were used.
15146 
15147    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
15148    possible, which allows use of appropriate alignment tags.
15149    Note that the choice of "64" is independent of the actual vector
15150    element size; this size simply ensures that the behavior is
15151    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
15152 
15153    Due to limitations of those instructions, use of VST1.64/VLD1.64
15154    is not possible if:
15155     - the address contains PRE_DEC, or
15156     - the mode refers to more than 4 double-word registers
15157 
15158    In those cases, it would be possible to replace VSTM/VLDM by a
15159    sequence of instructions; this is not currently implemented since
15160    this is not certain to actually improve performance.  */
15161 
15162 const char *
15163 output_move_neon (rtx *operands)
15164 {
15165   rtx reg, mem, addr, ops[2];
15166   int regno, nregs, load = REG_P (operands[0]);
15167   const char *templ;
15168   char buff[50];
15169   enum machine_mode mode;
15170 
15171   reg = operands[!load];
15172   mem = operands[load];
15173 
15174   mode = GET_MODE (reg);
15175 
15176   gcc_assert (REG_P (reg));
15177   regno = REGNO (reg);
15178   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
15179   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
15180 	      || NEON_REGNO_OK_FOR_QUAD (regno));
15181   gcc_assert (VALID_NEON_DREG_MODE (mode)
15182 	      || VALID_NEON_QREG_MODE (mode)
15183 	      || VALID_NEON_STRUCT_MODE (mode));
15184   gcc_assert (MEM_P (mem));
15185 
15186   addr = XEXP (mem, 0);
15187 
15188   /* Strip off const from addresses like (const (plus (...))).  */
15189   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15190     addr = XEXP (addr, 0);
15191 
15192   switch (GET_CODE (addr))
15193     {
15194     case POST_INC:
15195       /* We have to use vldm / vstm for too-large modes.  */
15196       if (nregs > 4)
15197 	{
15198 	  templ = "v%smia%%?\t%%0!, %%h1";
15199 	  ops[0] = XEXP (addr, 0);
15200 	}
15201       else
15202 	{
15203 	  templ = "v%s1.64\t%%h1, %%A0";
15204 	  ops[0] = mem;
15205 	}
15206       ops[1] = reg;
15207       break;
15208 
15209     case PRE_DEC:
15210       /* We have to use vldm / vstm in this case, since there is no
15211 	 pre-decrement form of the vld1 / vst1 instructions.  */
15212       templ = "v%smdb%%?\t%%0!, %%h1";
15213       ops[0] = XEXP (addr, 0);
15214       ops[1] = reg;
15215       break;
15216 
15217     case POST_MODIFY:
15218       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
15219       gcc_unreachable ();
15220 
15221     case LABEL_REF:
15222     case PLUS:
15223       {
15224 	int i;
15225 	int overlap = -1;
15226 	for (i = 0; i < nregs; i++)
15227 	  {
15228 	    /* We're only using DImode here because it's a convenient size.  */
15229 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
15230 	    ops[1] = adjust_address (mem, DImode, 8 * i);
15231 	    if (reg_overlap_mentioned_p (ops[0], mem))
15232 	      {
15233 		gcc_assert (overlap == -1);
15234 		overlap = i;
15235 	      }
15236 	    else
15237 	      {
15238 		sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15239 		output_asm_insn (buff, ops);
15240 	      }
15241 	  }
15242 	if (overlap != -1)
15243 	  {
15244 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
15245 	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
15246 	    sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15247 	    output_asm_insn (buff, ops);
15248 	  }
15249 
15250         return "";
15251       }
15252 
15253     default:
15254       /* We have to use vldm / vstm for too-large modes.  */
15255       if (nregs > 4)
15256 	templ = "v%smia%%?\t%%m0, %%h1";
15257       else
15258 	templ = "v%s1.64\t%%h1, %%A0";
15259 
15260       ops[0] = mem;
15261       ops[1] = reg;
15262     }
15263 
15264   sprintf (buff, templ, load ? "ld" : "st");
15265   output_asm_insn (buff, ops);
15266 
15267   return "";
15268 }
15269 
15270 /* Compute and return the length of neon_mov<mode>, where <mode> is
15271    one of VSTRUCT modes: EI, OI, CI or XI.  */
15272 int
15273 arm_attr_length_move_neon (rtx insn)
15274 {
15275   rtx reg, mem, addr;
15276   int load;
15277   enum machine_mode mode;
15278 
15279   extract_insn_cached (insn);
15280 
15281   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
15282     {
15283       mode = GET_MODE (recog_data.operand[0]);
15284       switch (mode)
15285 	{
15286 	case EImode:
15287 	case OImode:
15288 	  return 8;
15289 	case CImode:
15290 	  return 12;
15291 	case XImode:
15292 	  return 16;
15293 	default:
15294 	  gcc_unreachable ();
15295 	}
15296     }
15297 
15298   load = REG_P (recog_data.operand[0]);
15299   reg = recog_data.operand[!load];
15300   mem = recog_data.operand[load];
15301 
15302   gcc_assert (MEM_P (mem));
15303 
15304   mode = GET_MODE (reg);
15305   addr = XEXP (mem, 0);
15306 
15307   /* Strip off const from addresses like (const (plus (...))).  */
15308   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15309     addr = XEXP (addr, 0);
15310 
15311   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
15312     {
15313       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
15314       return insns * 4;
15315     }
15316   else
15317     return 4;
15318 }
15319 
15320 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
15321    return zero.  */
15322 
15323 int
15324 arm_address_offset_is_imm (rtx insn)
15325 {
15326   rtx mem, addr;
15327 
15328   extract_insn_cached (insn);
15329 
15330   if (REG_P (recog_data.operand[0]))
15331     return 0;
15332 
15333   mem = recog_data.operand[0];
15334 
15335   gcc_assert (MEM_P (mem));
15336 
15337   addr = XEXP (mem, 0);
15338 
15339   if (REG_P (addr)
15340       || (GET_CODE (addr) == PLUS
15341 	  && REG_P (XEXP (addr, 0))
15342 	  && CONST_INT_P (XEXP (addr, 1))))
15343     return 1;
15344   else
15345     return 0;
15346 }
15347 
15348 /* Output an ADD r, s, #n where n may be too big for one instruction.
15349    If adding zero to one register, output nothing.  */
15350 const char *
15351 output_add_immediate (rtx *operands)
15352 {
15353   HOST_WIDE_INT n = INTVAL (operands[2]);
15354 
15355   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
15356     {
15357       if (n < 0)
15358 	output_multi_immediate (operands,
15359 				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
15360 				-n);
15361       else
15362 	output_multi_immediate (operands,
15363 				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
15364 				n);
15365     }
15366 
15367   return "";
15368 }
15369 
15370 /* Output a multiple immediate operation.
15371    OPERANDS is the vector of operands referred to in the output patterns.
15372    INSTR1 is the output pattern to use for the first constant.
15373    INSTR2 is the output pattern to use for subsequent constants.
15374    IMMED_OP is the index of the constant slot in OPERANDS.
15375    N is the constant value.  */
15376 static const char *
15377 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
15378 			int immed_op, HOST_WIDE_INT n)
15379 {
15380 #if HOST_BITS_PER_WIDE_INT > 32
15381   n &= 0xffffffff;
15382 #endif
15383 
15384   if (n == 0)
15385     {
15386       /* Quick and easy output.  */
15387       operands[immed_op] = const0_rtx;
15388       output_asm_insn (instr1, operands);
15389     }
15390   else
15391     {
15392       int i;
15393       const char * instr = instr1;
15394 
15395       /* Note that n is never zero here (which would give no output).  */
15396       for (i = 0; i < 32; i += 2)
15397 	{
15398 	  if (n & (3 << i))
15399 	    {
15400 	      operands[immed_op] = GEN_INT (n & (255 << i));
15401 	      output_asm_insn (instr, operands);
15402 	      instr = instr2;
15403 	      i += 6;
15404 	    }
15405 	}
15406     }
15407 
15408   return "";
15409 }
15410 
15411 /* Return the name of a shifter operation.  */
15412 static const char *
15413 arm_shift_nmem(enum rtx_code code)
15414 {
15415   switch (code)
15416     {
15417     case ASHIFT:
15418       return ARM_LSL_NAME;
15419 
15420     case ASHIFTRT:
15421       return "asr";
15422 
15423     case LSHIFTRT:
15424       return "lsr";
15425 
15426     case ROTATERT:
15427       return "ror";
15428 
15429     default:
15430       abort();
15431     }
15432 }
15433 
15434 /* Return the appropriate ARM instruction for the operation code.
15435    The returned result should not be overwritten.  OP is the rtx of the
15436    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
15437    was shifted.  */
15438 const char *
15439 arithmetic_instr (rtx op, int shift_first_arg)
15440 {
15441   switch (GET_CODE (op))
15442     {
15443     case PLUS:
15444       return "add";
15445 
15446     case MINUS:
15447       return shift_first_arg ? "rsb" : "sub";
15448 
15449     case IOR:
15450       return "orr";
15451 
15452     case XOR:
15453       return "eor";
15454 
15455     case AND:
15456       return "and";
15457 
15458     case ASHIFT:
15459     case ASHIFTRT:
15460     case LSHIFTRT:
15461     case ROTATERT:
15462       return arm_shift_nmem(GET_CODE(op));
15463 
15464     default:
15465       gcc_unreachable ();
15466     }
15467 }
15468 
15469 /* Ensure valid constant shifts and return the appropriate shift mnemonic
15470    for the operation code.  The returned result should not be overwritten.
15471    OP is the rtx code of the shift.
15472    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
15473    shift.  */
15474 static const char *
15475 shift_op (rtx op, HOST_WIDE_INT *amountp)
15476 {
15477   const char * mnem;
15478   enum rtx_code code = GET_CODE (op);
15479 
15480   switch (code)
15481     {
15482     case ROTATE:
15483       if (!CONST_INT_P (XEXP (op, 1)))
15484 	{
15485 	  output_operand_lossage ("invalid shift operand");
15486 	  return NULL;
15487 	}
15488 
15489       code = ROTATERT;
15490       *amountp = 32 - INTVAL (XEXP (op, 1));
15491       mnem = "ror";
15492       break;
15493 
15494     case ASHIFT:
15495     case ASHIFTRT:
15496     case LSHIFTRT:
15497     case ROTATERT:
15498       mnem = arm_shift_nmem(code);
15499       if (CONST_INT_P (XEXP (op, 1)))
15500 	{
15501 	  *amountp = INTVAL (XEXP (op, 1));
15502 	}
15503       else if (REG_P (XEXP (op, 1)))
15504 	{
15505 	  *amountp = -1;
15506 	  return mnem;
15507 	}
15508       else
15509 	{
15510 	  output_operand_lossage ("invalid shift operand");
15511 	  return NULL;
15512 	}
15513       break;
15514 
15515     case MULT:
15516       /* We never have to worry about the amount being other than a
15517 	 power of 2, since this case can never be reloaded from a reg.  */
15518       if (!CONST_INT_P (XEXP (op, 1)))
15519 	{
15520 	  output_operand_lossage ("invalid shift operand");
15521 	  return NULL;
15522 	}
15523 
15524       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
15525 
15526       /* Amount must be a power of two.  */
15527       if (*amountp & (*amountp - 1))
15528 	{
15529 	  output_operand_lossage ("invalid shift operand");
15530 	  return NULL;
15531 	}
15532 
15533       *amountp = int_log2 (*amountp);
15534       return ARM_LSL_NAME;
15535 
15536     default:
15537       output_operand_lossage ("invalid shift operand");
15538       return NULL;
15539     }
15540 
15541   /* This is not 100% correct, but follows from the desire to merge
15542      multiplication by a power of 2 with the recognizer for a
15543      shift.  >=32 is not a valid shift for "lsl", so we must try and
15544      output a shift that produces the correct arithmetical result.
15545      Using lsr #32 is identical except for the fact that the carry bit
15546      is not set correctly if we set the flags; but we never use the
15547      carry bit from such an operation, so we can ignore that.  */
15548   if (code == ROTATERT)
15549     /* Rotate is just modulo 32.  */
15550     *amountp &= 31;
15551   else if (*amountp != (*amountp & 31))
15552     {
15553       if (code == ASHIFT)
15554 	mnem = "lsr";
15555       *amountp = 32;
15556     }
15557 
15558   /* Shifts of 0 are no-ops.  */
15559   if (*amountp == 0)
15560     return NULL;
15561 
15562   return mnem;
15563 }
15564 
15565 /* Obtain the shift from the POWER of two.  */
15566 
15567 static HOST_WIDE_INT
15568 int_log2 (HOST_WIDE_INT power)
15569 {
15570   HOST_WIDE_INT shift = 0;
15571 
15572   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
15573     {
15574       gcc_assert (shift <= 31);
15575       shift++;
15576     }
15577 
15578   return shift;
15579 }
15580 
15581 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
15582    because /bin/as is horribly restrictive.  The judgement about
15583    whether or not each character is 'printable' (and can be output as
15584    is) or not (and must be printed with an octal escape) must be made
15585    with reference to the *host* character set -- the situation is
15586    similar to that discussed in the comments above pp_c_char in
15587    c-pretty-print.c.  */
15588 
15589 #define MAX_ASCII_LEN 51
15590 
15591 void
15592 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
15593 {
15594   int i;
15595   int len_so_far = 0;
15596 
15597   fputs ("\t.ascii\t\"", stream);
15598 
15599   for (i = 0; i < len; i++)
15600     {
15601       int c = p[i];
15602 
15603       if (len_so_far >= MAX_ASCII_LEN)
15604 	{
15605 	  fputs ("\"\n\t.ascii\t\"", stream);
15606 	  len_so_far = 0;
15607 	}
15608 
15609       if (ISPRINT (c))
15610 	{
15611 	  if (c == '\\' || c == '\"')
15612 	    {
15613 	      putc ('\\', stream);
15614 	      len_so_far++;
15615 	    }
15616 	  putc (c, stream);
15617 	  len_so_far++;
15618 	}
15619       else
15620 	{
15621 	  fprintf (stream, "\\%03o", c);
15622 	  len_so_far += 4;
15623 	}
15624     }
15625 
15626   fputs ("\"\n", stream);
15627 }
15628 
15629 /* Whether a register is callee saved or not.  This is necessary because high
15630    registers are marked as caller saved when optimizing for size on Thumb-1
15631    targets despite being callee saved in order to avoid using them.  */
15632 #define callee_saved_reg_p(reg) \
15633   (!call_used_regs[reg] \
15634    || (TARGET_THUMB1 && optimize_size \
15635        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
15636 
15637 /* Compute the register save mask for registers 0 through 12
15638    inclusive.  This code is used by arm_compute_save_reg_mask.  */
15639 
15640 static unsigned long
15641 arm_compute_save_reg0_reg12_mask (void)
15642 {
15643   unsigned long func_type = arm_current_func_type ();
15644   unsigned long save_reg_mask = 0;
15645   unsigned int reg;
15646 
15647   if (IS_INTERRUPT (func_type))
15648     {
15649       unsigned int max_reg;
15650       /* Interrupt functions must not corrupt any registers,
15651 	 even call clobbered ones.  If this is a leaf function
15652 	 we can just examine the registers used by the RTL, but
15653 	 otherwise we have to assume that whatever function is
15654 	 called might clobber anything, and so we have to save
15655 	 all the call-clobbered registers as well.  */
15656       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15657 	/* FIQ handlers have registers r8 - r12 banked, so
15658 	   we only need to check r0 - r7, Normal ISRs only
15659 	   bank r14 and r15, so we must check up to r12.
15660 	   r13 is the stack pointer which is always preserved,
15661 	   so we do not need to consider it here.  */
15662 	max_reg = 7;
15663       else
15664 	max_reg = 12;
15665 
15666       for (reg = 0; reg <= max_reg; reg++)
15667 	if (df_regs_ever_live_p (reg)
15668 	    || (! crtl->is_leaf && call_used_regs[reg]))
15669 	  save_reg_mask |= (1 << reg);
15670 
15671       /* Also save the pic base register if necessary.  */
15672       if (flag_pic
15673 	  && !TARGET_SINGLE_PIC_BASE
15674 	  && arm_pic_register != INVALID_REGNUM
15675 	  && crtl->uses_pic_offset_table)
15676 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15677     }
15678   else if (IS_VOLATILE(func_type))
15679     {
15680       /* For noreturn functions we historically omitted register saves
15681 	 altogether.  However this really messes up debugging.  As a
15682 	 compromise save just the frame pointers.  Combined with the link
15683 	 register saved elsewhere this should be sufficient to get
15684 	 a backtrace.  */
15685       if (frame_pointer_needed)
15686 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15687       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15688 	save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15689       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15690 	save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15691     }
15692   else
15693     {
15694       /* In the normal case we only need to save those registers
15695 	 which are call saved and which are used by this function.  */
15696       for (reg = 0; reg <= 11; reg++)
15697 	if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
15698 	  save_reg_mask |= (1 << reg);
15699 
15700       /* Handle the frame pointer as a special case.  */
15701       if (frame_pointer_needed)
15702 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15703 
15704       /* If we aren't loading the PIC register,
15705 	 don't stack it even though it may be live.  */
15706       if (flag_pic
15707 	  && !TARGET_SINGLE_PIC_BASE
15708 	  && arm_pic_register != INVALID_REGNUM
15709 	  && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15710 	      || crtl->uses_pic_offset_table))
15711 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15712 
15713       /* The prologue will copy SP into R0, so save it.  */
15714       if (IS_STACKALIGN (func_type))
15715 	save_reg_mask |= 1;
15716     }
15717 
15718   /* Save registers so the exception handler can modify them.  */
15719   if (crtl->calls_eh_return)
15720     {
15721       unsigned int i;
15722 
15723       for (i = 0; ; i++)
15724 	{
15725 	  reg = EH_RETURN_DATA_REGNO (i);
15726 	  if (reg == INVALID_REGNUM)
15727 	    break;
15728 	  save_reg_mask |= 1 << reg;
15729 	}
15730     }
15731 
15732   return save_reg_mask;
15733 }
15734 
15735 
15736 /* Compute the number of bytes used to store the static chain register on the
15737    stack, above the stack frame. We need to know this accurately to get the
15738    alignment of the rest of the stack frame correct. */
15739 
15740 static int arm_compute_static_chain_stack_bytes (void)
15741 {
15742   unsigned long func_type = arm_current_func_type ();
15743   int static_chain_stack_bytes = 0;
15744 
15745   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15746       IS_NESTED (func_type) &&
15747       df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15748     static_chain_stack_bytes = 4;
15749 
15750   return static_chain_stack_bytes;
15751 }
15752 
15753 
15754 /* Compute a bit mask of which registers need to be
15755    saved on the stack for the current function.
15756    This is used by arm_get_frame_offsets, which may add extra registers.  */
15757 
15758 static unsigned long
15759 arm_compute_save_reg_mask (void)
15760 {
15761   unsigned int save_reg_mask = 0;
15762   unsigned long func_type = arm_current_func_type ();
15763   unsigned int reg;
15764 
15765   if (IS_NAKED (func_type))
15766     /* This should never really happen.  */
15767     return 0;
15768 
15769   /* If we are creating a stack frame, then we must save the frame pointer,
15770      IP (which will hold the old stack pointer), LR and the PC.  */
15771   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15772     save_reg_mask |=
15773       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15774       | (1 << IP_REGNUM)
15775       | (1 << LR_REGNUM)
15776       | (1 << PC_REGNUM);
15777 
15778   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15779 
15780   /* Decide if we need to save the link register.
15781      Interrupt routines have their own banked link register,
15782      so they never need to save it.
15783      Otherwise if we do not use the link register we do not need to save
15784      it.  If we are pushing other registers onto the stack however, we
15785      can save an instruction in the epilogue by pushing the link register
15786      now and then popping it back into the PC.  This incurs extra memory
15787      accesses though, so we only do it when optimizing for size, and only
15788      if we know that we will not need a fancy return sequence.  */
15789   if (df_regs_ever_live_p (LR_REGNUM)
15790       || (save_reg_mask
15791 	  && optimize_size
15792 	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15793 	  && !crtl->calls_eh_return))
15794     save_reg_mask |= 1 << LR_REGNUM;
15795 
15796   if (cfun->machine->lr_save_eliminated)
15797     save_reg_mask &= ~ (1 << LR_REGNUM);
15798 
15799   if (TARGET_REALLY_IWMMXT
15800       && ((bit_count (save_reg_mask)
15801 	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
15802 			   arm_compute_static_chain_stack_bytes())
15803 	   ) % 2) != 0)
15804     {
15805       /* The total number of registers that are going to be pushed
15806 	 onto the stack is odd.  We need to ensure that the stack
15807 	 is 64-bit aligned before we start to save iWMMXt registers,
15808 	 and also before we start to create locals.  (A local variable
15809 	 might be a double or long long which we will load/store using
15810 	 an iWMMXt instruction).  Therefore we need to push another
15811 	 ARM register, so that the stack will be 64-bit aligned.  We
15812 	 try to avoid using the arg registers (r0 -r3) as they might be
15813 	 used to pass values in a tail call.  */
15814       for (reg = 4; reg <= 12; reg++)
15815 	if ((save_reg_mask & (1 << reg)) == 0)
15816 	  break;
15817 
15818       if (reg <= 12)
15819 	save_reg_mask |= (1 << reg);
15820       else
15821 	{
15822 	  cfun->machine->sibcall_blocked = 1;
15823 	  save_reg_mask |= (1 << 3);
15824 	}
15825     }
15826 
15827   /* We may need to push an additional register for use initializing the
15828      PIC base register.  */
15829   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15830       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15831     {
15832       reg = thumb_find_work_register (1 << 4);
15833       if (!call_used_regs[reg])
15834 	save_reg_mask |= (1 << reg);
15835     }
15836 
15837   return save_reg_mask;
15838 }
15839 
15840 
15841 /* Compute a bit mask of which registers need to be
15842    saved on the stack for the current function.  */
15843 static unsigned long
15844 thumb1_compute_save_reg_mask (void)
15845 {
15846   unsigned long mask;
15847   unsigned reg;
15848 
15849   mask = 0;
15850   for (reg = 0; reg < 12; reg ++)
15851     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
15852       mask |= 1 << reg;
15853 
15854   if (flag_pic
15855       && !TARGET_SINGLE_PIC_BASE
15856       && arm_pic_register != INVALID_REGNUM
15857       && crtl->uses_pic_offset_table)
15858     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15859 
15860   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
15861   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15862     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15863 
15864   /* LR will also be pushed if any lo regs are pushed.  */
15865   if (mask & 0xff || thumb_force_lr_save ())
15866     mask |= (1 << LR_REGNUM);
15867 
15868   /* Make sure we have a low work register if we need one.
15869      We will need one if we are going to push a high register,
15870      but we are not currently intending to push a low register.  */
15871   if ((mask & 0xff) == 0
15872       && ((mask & 0x0f00) || TARGET_BACKTRACE))
15873     {
15874       /* Use thumb_find_work_register to choose which register
15875 	 we will use.  If the register is live then we will
15876 	 have to push it.  Use LAST_LO_REGNUM as our fallback
15877 	 choice for the register to select.  */
15878       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15879       /* Make sure the register returned by thumb_find_work_register is
15880 	 not part of the return value.  */
15881       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15882 	reg = LAST_LO_REGNUM;
15883 
15884       if (callee_saved_reg_p (reg))
15885 	mask |= 1 << reg;
15886     }
15887 
15888   /* The 504 below is 8 bytes less than 512 because there are two possible
15889      alignment words.  We can't tell here if they will be present or not so we
15890      have to play it safe and assume that they are. */
15891   if ((CALLER_INTERWORKING_SLOT_SIZE +
15892        ROUND_UP_WORD (get_frame_size ()) +
15893        crtl->outgoing_args_size) >= 504)
15894     {
15895       /* This is the same as the code in thumb1_expand_prologue() which
15896 	 determines which register to use for stack decrement. */
15897       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15898 	if (mask & (1 << reg))
15899 	  break;
15900 
15901       if (reg > LAST_LO_REGNUM)
15902 	{
15903 	  /* Make sure we have a register available for stack decrement. */
15904 	  mask |= 1 << LAST_LO_REGNUM;
15905 	}
15906     }
15907 
15908   return mask;
15909 }
15910 
15911 
15912 /* Return the number of bytes required to save VFP registers.  */
15913 static int
15914 arm_get_vfp_saved_size (void)
15915 {
15916   unsigned int regno;
15917   int count;
15918   int saved;
15919 
15920   saved = 0;
15921   /* Space for saved VFP registers.  */
15922   if (TARGET_HARD_FLOAT && TARGET_VFP)
15923     {
15924       count = 0;
15925       for (regno = FIRST_VFP_REGNUM;
15926 	   regno < LAST_VFP_REGNUM;
15927 	   regno += 2)
15928 	{
15929 	  if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15930 	      && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15931 	    {
15932 	      if (count > 0)
15933 		{
15934 		  /* Workaround ARM10 VFPr1 bug.  */
15935 		  if (count == 2 && !arm_arch6)
15936 		    count++;
15937 		  saved += count * 8;
15938 		}
15939 	      count = 0;
15940 	    }
15941 	  else
15942 	    count++;
15943 	}
15944       if (count > 0)
15945 	{
15946 	  if (count == 2 && !arm_arch6)
15947 	    count++;
15948 	  saved += count * 8;
15949 	}
15950     }
15951   return saved;
15952 }
15953 
15954 
15955 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
15956    everything bar the final return instruction.  If simple_return is true,
15957    then do not output epilogue, because it has already been emitted in RTL.  */
15958 const char *
15959 output_return_instruction (rtx operand, bool really_return, bool reverse,
15960                            bool simple_return)
15961 {
15962   char conditional[10];
15963   char instr[100];
15964   unsigned reg;
15965   unsigned long live_regs_mask;
15966   unsigned long func_type;
15967   arm_stack_offsets *offsets;
15968 
15969   func_type = arm_current_func_type ();
15970 
15971   if (IS_NAKED (func_type))
15972     return "";
15973 
15974   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15975     {
15976       /* If this function was declared non-returning, and we have
15977 	 found a tail call, then we have to trust that the called
15978 	 function won't return.  */
15979       if (really_return)
15980 	{
15981 	  rtx ops[2];
15982 
15983 	  /* Otherwise, trap an attempted return by aborting.  */
15984 	  ops[0] = operand;
15985 	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15986 				       : "abort");
15987 	  assemble_external_libcall (ops[1]);
15988 	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15989 	}
15990 
15991       return "";
15992     }
15993 
15994   gcc_assert (!cfun->calls_alloca || really_return);
15995 
15996   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15997 
15998   cfun->machine->return_used_this_function = 1;
15999 
16000   offsets = arm_get_frame_offsets ();
16001   live_regs_mask = offsets->saved_regs_mask;
16002 
16003   if (!simple_return && live_regs_mask)
16004     {
16005       const char * return_reg;
16006 
16007       /* If we do not have any special requirements for function exit
16008 	 (e.g. interworking) then we can load the return address
16009 	 directly into the PC.  Otherwise we must load it into LR.  */
16010       if (really_return
16011 	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
16012 	return_reg = reg_names[PC_REGNUM];
16013       else
16014 	return_reg = reg_names[LR_REGNUM];
16015 
16016       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
16017 	{
16018 	  /* There are three possible reasons for the IP register
16019 	     being saved.  1) a stack frame was created, in which case
16020 	     IP contains the old stack pointer, or 2) an ISR routine
16021 	     corrupted it, or 3) it was saved to align the stack on
16022 	     iWMMXt.  In case 1, restore IP into SP, otherwise just
16023 	     restore IP.  */
16024 	  if (frame_pointer_needed)
16025 	    {
16026 	      live_regs_mask &= ~ (1 << IP_REGNUM);
16027 	      live_regs_mask |=   (1 << SP_REGNUM);
16028 	    }
16029 	  else
16030 	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
16031 	}
16032 
16033       /* On some ARM architectures it is faster to use LDR rather than
16034 	 LDM to load a single register.  On other architectures, the
16035 	 cost is the same.  In 26 bit mode, or for exception handlers,
16036 	 we have to use LDM to load the PC so that the CPSR is also
16037 	 restored.  */
16038       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
16039 	if (live_regs_mask == (1U << reg))
16040 	  break;
16041 
16042       if (reg <= LAST_ARM_REGNUM
16043 	  && (reg != LR_REGNUM
16044 	      || ! really_return
16045 	      || ! IS_INTERRUPT (func_type)))
16046 	{
16047 	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
16048 		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
16049 	}
16050       else
16051 	{
16052 	  char *p;
16053 	  int first = 1;
16054 
16055 	  /* Generate the load multiple instruction to restore the
16056 	     registers.  Note we can get here, even if
16057 	     frame_pointer_needed is true, but only if sp already
16058 	     points to the base of the saved core registers.  */
16059 	  if (live_regs_mask & (1 << SP_REGNUM))
16060 	    {
16061 	      unsigned HOST_WIDE_INT stack_adjust;
16062 
16063 	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
16064 	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
16065 
16066 	      if (stack_adjust && arm_arch5 && TARGET_ARM)
16067 		if (TARGET_UNIFIED_ASM)
16068 		  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
16069 		else
16070 		  sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
16071 	      else
16072 		{
16073 		  /* If we can't use ldmib (SA110 bug),
16074 		     then try to pop r3 instead.  */
16075 		  if (stack_adjust)
16076 		    live_regs_mask |= 1 << 3;
16077 
16078 		  if (TARGET_UNIFIED_ASM)
16079 		    sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
16080 		  else
16081 		    sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
16082 		}
16083 	    }
16084 	  else
16085 	    if (TARGET_UNIFIED_ASM)
16086 	      sprintf (instr, "pop%s\t{", conditional);
16087 	    else
16088 	      sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
16089 
16090 	  p = instr + strlen (instr);
16091 
16092 	  for (reg = 0; reg <= SP_REGNUM; reg++)
16093 	    if (live_regs_mask & (1 << reg))
16094 	      {
16095 		int l = strlen (reg_names[reg]);
16096 
16097 		if (first)
16098 		  first = 0;
16099 		else
16100 		  {
16101 		    memcpy (p, ", ", 2);
16102 		    p += 2;
16103 		  }
16104 
16105 		memcpy (p, "%|", 2);
16106 		memcpy (p + 2, reg_names[reg], l);
16107 		p += l + 2;
16108 	      }
16109 
16110 	  if (live_regs_mask & (1 << LR_REGNUM))
16111 	    {
16112 	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
16113 	      /* If returning from an interrupt, restore the CPSR.  */
16114 	      if (IS_INTERRUPT (func_type))
16115 		strcat (p, "^");
16116 	    }
16117 	  else
16118 	    strcpy (p, "}");
16119 	}
16120 
16121       output_asm_insn (instr, & operand);
16122 
16123       /* See if we need to generate an extra instruction to
16124 	 perform the actual function return.  */
16125       if (really_return
16126 	  && func_type != ARM_FT_INTERWORKED
16127 	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
16128 	{
16129 	  /* The return has already been handled
16130 	     by loading the LR into the PC.  */
16131           return "";
16132 	}
16133     }
16134 
16135   if (really_return)
16136     {
16137       switch ((int) ARM_FUNC_TYPE (func_type))
16138 	{
16139 	case ARM_FT_ISR:
16140 	case ARM_FT_FIQ:
16141 	  /* ??? This is wrong for unified assembly syntax.  */
16142 	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
16143 	  break;
16144 
16145 	case ARM_FT_INTERWORKED:
16146 	  sprintf (instr, "bx%s\t%%|lr", conditional);
16147 	  break;
16148 
16149 	case ARM_FT_EXCEPTION:
16150 	  /* ??? This is wrong for unified assembly syntax.  */
16151 	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
16152 	  break;
16153 
16154 	default:
16155 	  /* Use bx if it's available.  */
16156 	  if (arm_arch5 || arm_arch4t)
16157 	    sprintf (instr, "bx%s\t%%|lr", conditional);
16158 	  else
16159 	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
16160 	  break;
16161 	}
16162 
16163       output_asm_insn (instr, & operand);
16164     }
16165 
16166   return "";
16167 }
16168 
16169 /* Write the function name into the code section, directly preceding
16170    the function prologue.
16171 
16172    Code will be output similar to this:
16173      t0
16174 	 .ascii "arm_poke_function_name", 0
16175 	 .align
16176      t1
16177 	 .word 0xff000000 + (t1 - t0)
16178      arm_poke_function_name
16179 	 mov     ip, sp
16180 	 stmfd   sp!, {fp, ip, lr, pc}
16181 	 sub     fp, ip, #4
16182 
16183    When performing a stack backtrace, code can inspect the value
16184    of 'pc' stored at 'fp' + 0.  If the trace function then looks
16185    at location pc - 12 and the top 8 bits are set, then we know
16186    that there is a function name embedded immediately preceding this
16187    location and has length ((pc[-3]) & 0xff000000).
16188 
16189    We assume that pc is declared as a pointer to an unsigned long.
16190 
16191    It is of no benefit to output the function name if we are assembling
16192    a leaf function.  These function types will not contain a stack
16193    backtrace structure, therefore it is not possible to determine the
16194    function name.  */
16195 void
16196 arm_poke_function_name (FILE *stream, const char *name)
16197 {
16198   unsigned long alignlength;
16199   unsigned long length;
16200   rtx           x;
16201 
16202   length      = strlen (name) + 1;
16203   alignlength = ROUND_UP_WORD (length);
16204 
16205   ASM_OUTPUT_ASCII (stream, name, length);
16206   ASM_OUTPUT_ALIGN (stream, 2);
16207   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
16208   assemble_aligned_integer (UNITS_PER_WORD, x);
16209 }
16210 
16211 /* Place some comments into the assembler stream
16212    describing the current function.  */
16213 static void
16214 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
16215 {
16216   unsigned long func_type;
16217 
16218   /* ??? Do we want to print some of the below anyway?  */
16219   if (TARGET_THUMB1)
16220     return;
16221 
16222   /* Sanity check.  */
16223   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
16224 
16225   func_type = arm_current_func_type ();
16226 
16227   switch ((int) ARM_FUNC_TYPE (func_type))
16228     {
16229     default:
16230     case ARM_FT_NORMAL:
16231       break;
16232     case ARM_FT_INTERWORKED:
16233       asm_fprintf (f, "\t%@ Function supports interworking.\n");
16234       break;
16235     case ARM_FT_ISR:
16236       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
16237       break;
16238     case ARM_FT_FIQ:
16239       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
16240       break;
16241     case ARM_FT_EXCEPTION:
16242       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
16243       break;
16244     }
16245 
16246   if (IS_NAKED (func_type))
16247     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
16248 
16249   if (IS_VOLATILE (func_type))
16250     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
16251 
16252   if (IS_NESTED (func_type))
16253     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
16254   if (IS_STACKALIGN (func_type))
16255     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
16256 
16257   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
16258 	       crtl->args.size,
16259 	       crtl->args.pretend_args_size, frame_size);
16260 
16261   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
16262 	       frame_pointer_needed,
16263 	       cfun->machine->uses_anonymous_args);
16264 
16265   if (cfun->machine->lr_save_eliminated)
16266     asm_fprintf (f, "\t%@ link register save eliminated.\n");
16267 
16268   if (crtl->calls_eh_return)
16269     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
16270 
16271 }
16272 
16273 static void
16274 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16275 			      HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16276 {
16277   arm_stack_offsets *offsets;
16278 
16279   if (TARGET_THUMB1)
16280     {
16281       int regno;
16282 
16283       /* Emit any call-via-reg trampolines that are needed for v4t support
16284 	 of call_reg and call_value_reg type insns.  */
16285       for (regno = 0; regno < LR_REGNUM; regno++)
16286 	{
16287 	  rtx label = cfun->machine->call_via[regno];
16288 
16289 	  if (label != NULL)
16290 	    {
16291 	      switch_to_section (function_section (current_function_decl));
16292 	      targetm.asm_out.internal_label (asm_out_file, "L",
16293 					      CODE_LABEL_NUMBER (label));
16294 	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16295 	    }
16296 	}
16297 
16298       /* ??? Probably not safe to set this here, since it assumes that a
16299 	 function will be emitted as assembly immediately after we generate
16300 	 RTL for it.  This does not happen for inline functions.  */
16301       cfun->machine->return_used_this_function = 0;
16302     }
16303   else /* TARGET_32BIT */
16304     {
16305       /* We need to take into account any stack-frame rounding.  */
16306       offsets = arm_get_frame_offsets ();
16307 
16308       gcc_assert (!use_return_insn (FALSE, NULL)
16309 		  || (cfun->machine->return_used_this_function != 0)
16310 		  || offsets->saved_regs == offsets->outgoing_args
16311 		  || frame_pointer_needed);
16312 
16313       /* Reset the ARM-specific per-function variables.  */
16314       after_arm_reorg = 0;
16315     }
16316 }
16317 
16318 /* Generate and emit a pattern that will be recognized as STRD pattern.  If even
16319    number of registers are being pushed, multiple STRD patterns are created for
16320    all register pairs.  If odd number of registers are pushed, emit a
16321    combination of STRDs and STR for the prologue saves.  */
16322 static void
16323 thumb2_emit_strd_push (unsigned long saved_regs_mask)
16324 {
16325   int num_regs = 0;
16326   int i, j;
16327   rtx par = NULL_RTX;
16328   rtx insn = NULL_RTX;
16329   rtx dwarf = NULL_RTX;
16330   rtx tmp, reg, tmp1;
16331 
16332   for (i = 0; i <= LAST_ARM_REGNUM; i++)
16333     if (saved_regs_mask & (1 << i))
16334       num_regs++;
16335 
16336   gcc_assert (num_regs && num_regs <= 16);
16337 
16338   /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
16339      registers to push.  */
16340   tmp = gen_rtx_SET (VOIDmode,
16341                      stack_pointer_rtx,
16342                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16343   RTX_FRAME_RELATED_P (tmp) = 1;
16344   insn = emit_insn (tmp);
16345 
16346   /* Create sequence for DWARF info.  */
16347   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
16348 
16349   /* RTLs cannot be shared, hence create new copy for dwarf.  */
16350   tmp1 = gen_rtx_SET (VOIDmode,
16351                      stack_pointer_rtx,
16352                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16353   RTX_FRAME_RELATED_P (tmp1) = 1;
16354   XVECEXP (dwarf, 0, 0) = tmp1;
16355 
16356   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16357   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
16358 
16359   /* Var j iterates over all the registers to gather all the registers in
16360      saved_regs_mask.  Var i gives index of register R_j in stack frame.
16361      A PARALLEL RTX of register-pair is created here, so that pattern for
16362      STRD can be matched.  If num_regs is odd, 1st register will be pushed
16363      using STR and remaining registers will be pushed with STRD in pairs.
16364      If num_regs is even, all registers are pushed with STRD in pairs.
16365      Hence, skip first element for odd num_regs.  */
16366   for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
16367     if (saved_regs_mask & (1 << j))
16368       {
16369         /* Create RTX for store.  New RTX is created for dwarf as
16370            they are not sharable.  */
16371         reg = gen_rtx_REG (SImode, j);
16372         tmp = gen_rtx_SET (SImode,
16373                            gen_frame_mem
16374                            (SImode,
16375                             plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16376                            reg);
16377 
16378         tmp1 = gen_rtx_SET (SImode,
16379                            gen_frame_mem
16380                            (SImode,
16381                             plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16382                            reg);
16383         RTX_FRAME_RELATED_P (tmp) = 1;
16384         RTX_FRAME_RELATED_P (tmp1) = 1;
16385 
16386         if (((i - (num_regs % 2)) % 2) == 1)
16387           /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
16388              be created.  Hence create it first.  The STRD pattern we are
16389              generating is :
16390              [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
16391                (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
16392              where the target registers need not be consecutive.  */
16393           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16394 
16395         /* Register R_j is added in PARALLEL RTX.  If (i - (num_regs % 2)) is
16396            even, the reg_j is added as 0th element and if it is odd, reg_i is
16397            added as 1st element of STRD pattern shown above.  */
16398         XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
16399         XVECEXP (dwarf, 0, (i + 1)) = tmp1;
16400 
16401         if (((i - (num_regs % 2)) % 2) == 0)
16402           /* When (i - (num_regs % 2)) is even, RTXs for both the registers
16403              to be loaded are generated in above given STRD pattern, and the
16404              pattern can be emitted now.  */
16405           emit_insn (par);
16406 
16407         i--;
16408       }
16409 
16410   if ((num_regs % 2) == 1)
16411     {
16412       /* If odd number of registers are pushed, generate STR pattern to store
16413          lone register.  */
16414       for (; (saved_regs_mask & (1 << j)) == 0; j--);
16415 
16416       tmp1 = gen_frame_mem (SImode, plus_constant (Pmode,
16417                                                    stack_pointer_rtx, 4 * i));
16418       reg = gen_rtx_REG (SImode, j);
16419       tmp = gen_rtx_SET (SImode, tmp1, reg);
16420       RTX_FRAME_RELATED_P (tmp) = 1;
16421 
16422       emit_insn (tmp);
16423 
16424       tmp1 = gen_rtx_SET (SImode,
16425                          gen_frame_mem
16426                          (SImode,
16427                           plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
16428                           reg);
16429       RTX_FRAME_RELATED_P (tmp1) = 1;
16430       XVECEXP (dwarf, 0, (i + 1)) = tmp1;
16431     }
16432 
16433   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16434   RTX_FRAME_RELATED_P (insn) = 1;
16435   return;
16436 }
16437 
16438 /* Generate and emit an insn that we will recognize as a push_multi.
16439    Unfortunately, since this insn does not reflect very well the actual
16440    semantics of the operation, we need to annotate the insn for the benefit
16441    of DWARF2 frame unwind information.  */
16442 static rtx
16443 emit_multi_reg_push (unsigned long mask)
16444 {
16445   int num_regs = 0;
16446   int num_dwarf_regs;
16447   int i, j;
16448   rtx par;
16449   rtx dwarf;
16450   int dwarf_par_index;
16451   rtx tmp, reg;
16452 
16453   for (i = 0; i <= LAST_ARM_REGNUM; i++)
16454     if (mask & (1 << i))
16455       num_regs++;
16456 
16457   gcc_assert (num_regs && num_regs <= 16);
16458 
16459   /* We don't record the PC in the dwarf frame information.  */
16460   num_dwarf_regs = num_regs;
16461   if (mask & (1 << PC_REGNUM))
16462     num_dwarf_regs--;
16463 
16464   /* For the body of the insn we are going to generate an UNSPEC in
16465      parallel with several USEs.  This allows the insn to be recognized
16466      by the push_multi pattern in the arm.md file.
16467 
16468      The body of the insn looks something like this:
16469 
16470        (parallel [
16471            (set (mem:BLK (pre_modify:SI (reg:SI sp)
16472 	                                (const_int:SI <num>)))
16473 	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16474            (use (reg:SI XX))
16475            (use (reg:SI YY))
16476 	   ...
16477         ])
16478 
16479      For the frame note however, we try to be more explicit and actually
16480      show each register being stored into the stack frame, plus a (single)
16481      decrement of the stack pointer.  We do it this way in order to be
16482      friendly to the stack unwinding code, which only wants to see a single
16483      stack decrement per instruction.  The RTL we generate for the note looks
16484      something like this:
16485 
16486       (sequence [
16487            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16488            (set (mem:SI (reg:SI sp)) (reg:SI r4))
16489            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16490            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16491 	   ...
16492         ])
16493 
16494      FIXME:: In an ideal world the PRE_MODIFY would not exist and
16495      instead we'd have a parallel expression detailing all
16496      the stores to the various memory addresses so that debug
16497      information is more up-to-date. Remember however while writing
16498      this to take care of the constraints with the push instruction.
16499 
16500      Note also that this has to be taken care of for the VFP registers.
16501 
16502      For more see PR43399.  */
16503 
16504   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16505   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16506   dwarf_par_index = 1;
16507 
16508   for (i = 0; i <= LAST_ARM_REGNUM; i++)
16509     {
16510       if (mask & (1 << i))
16511 	{
16512 	  reg = gen_rtx_REG (SImode, i);
16513 
16514 	  XVECEXP (par, 0, 0)
16515 	    = gen_rtx_SET (VOIDmode,
16516 			   gen_frame_mem
16517 			   (BLKmode,
16518 			    gen_rtx_PRE_MODIFY (Pmode,
16519 						stack_pointer_rtx,
16520 						plus_constant
16521 						(Pmode, stack_pointer_rtx,
16522 						 -4 * num_regs))
16523 			    ),
16524 			   gen_rtx_UNSPEC (BLKmode,
16525 					   gen_rtvec (1, reg),
16526 					   UNSPEC_PUSH_MULT));
16527 
16528 	  if (i != PC_REGNUM)
16529 	    {
16530 	      tmp = gen_rtx_SET (VOIDmode,
16531 				 gen_frame_mem (SImode, stack_pointer_rtx),
16532 				 reg);
16533 	      RTX_FRAME_RELATED_P (tmp) = 1;
16534 	      XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16535 	      dwarf_par_index++;
16536 	    }
16537 
16538 	  break;
16539 	}
16540     }
16541 
16542   for (j = 1, i++; j < num_regs; i++)
16543     {
16544       if (mask & (1 << i))
16545 	{
16546 	  reg = gen_rtx_REG (SImode, i);
16547 
16548 	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16549 
16550 	  if (i != PC_REGNUM)
16551 	    {
16552 	      tmp
16553 		= gen_rtx_SET (VOIDmode,
16554 			       gen_frame_mem
16555 			       (SImode,
16556 				plus_constant (Pmode, stack_pointer_rtx,
16557 					       4 * j)),
16558 			       reg);
16559 	      RTX_FRAME_RELATED_P (tmp) = 1;
16560 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16561 	    }
16562 
16563 	  j++;
16564 	}
16565     }
16566 
16567   par = emit_insn (par);
16568 
16569   tmp = gen_rtx_SET (VOIDmode,
16570 		     stack_pointer_rtx,
16571 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16572   RTX_FRAME_RELATED_P (tmp) = 1;
16573   XVECEXP (dwarf, 0, 0) = tmp;
16574 
16575   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16576 
16577   return par;
16578 }
16579 
16580 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
16581    SAVED_REGS_MASK shows which registers need to be restored.
16582 
16583    Unfortunately, since this insn does not reflect very well the actual
16584    semantics of the operation, we need to annotate the insn for the benefit
16585    of DWARF2 frame unwind information.  */
16586 static void
16587 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
16588 {
16589   int num_regs = 0;
16590   int i, j;
16591   rtx par;
16592   rtx dwarf = NULL_RTX;
16593   rtx tmp, reg;
16594   bool return_in_pc;
16595   int offset_adj;
16596   int emit_update;
16597 
16598   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
16599   offset_adj = return_in_pc ? 1 : 0;
16600   for (i = 0; i <= LAST_ARM_REGNUM; i++)
16601     if (saved_regs_mask & (1 << i))
16602       num_regs++;
16603 
16604   gcc_assert (num_regs && num_regs <= 16);
16605 
16606   /* If SP is in reglist, then we don't emit SP update insn.  */
16607   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
16608 
16609   /* The parallel needs to hold num_regs SETs
16610      and one SET for the stack update.  */
16611   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
16612 
16613   if (return_in_pc)
16614     {
16615       tmp = ret_rtx;
16616       XVECEXP (par, 0, 0) = tmp;
16617     }
16618 
16619   if (emit_update)
16620     {
16621       /* Increment the stack pointer, based on there being
16622          num_regs 4-byte registers to restore.  */
16623       tmp = gen_rtx_SET (VOIDmode,
16624                          stack_pointer_rtx,
16625                          plus_constant (Pmode,
16626                                         stack_pointer_rtx,
16627                                         4 * num_regs));
16628       RTX_FRAME_RELATED_P (tmp) = 1;
16629       XVECEXP (par, 0, offset_adj) = tmp;
16630     }
16631 
16632   /* Now restore every reg, which may include PC.  */
16633   for (j = 0, i = 0; j < num_regs; i++)
16634     if (saved_regs_mask & (1 << i))
16635       {
16636         reg = gen_rtx_REG (SImode, i);
16637         tmp = gen_rtx_SET (VOIDmode,
16638                            reg,
16639                            gen_frame_mem
16640                            (SImode,
16641                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
16642         RTX_FRAME_RELATED_P (tmp) = 1;
16643         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
16644 
16645         /* We need to maintain a sequence for DWARF info too.  As dwarf info
16646            should not have PC, skip PC.  */
16647         if (i != PC_REGNUM)
16648           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16649 
16650         j++;
16651       }
16652 
16653   if (return_in_pc)
16654     par = emit_jump_insn (par);
16655   else
16656     par = emit_insn (par);
16657 
16658   REG_NOTES (par) = dwarf;
16659 }
16660 
16661 /* Generate and emit an insn pattern that we will recognize as a pop_multi
16662    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
16663 
16664    Unfortunately, since this insn does not reflect very well the actual
16665    semantics of the operation, we need to annotate the insn for the benefit
16666    of DWARF2 frame unwind information.  */
16667 static void
16668 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
16669 {
16670   int i, j;
16671   rtx par;
16672   rtx dwarf = NULL_RTX;
16673   rtx tmp, reg;
16674 
16675   gcc_assert (num_regs && num_regs <= 32);
16676 
16677     /* Workaround ARM10 VFPr1 bug.  */
16678   if (num_regs == 2 && !arm_arch6)
16679     {
16680       if (first_reg == 15)
16681         first_reg--;
16682 
16683       num_regs++;
16684     }
16685 
16686   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
16687      there could be up to 32 D-registers to restore.
16688      If there are more than 16 D-registers, make two recursive calls,
16689      each of which emits one pop_multi instruction.  */
16690   if (num_regs > 16)
16691     {
16692       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
16693       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
16694       return;
16695     }
16696 
16697   /* The parallel needs to hold num_regs SETs
16698      and one SET for the stack update.  */
16699   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
16700 
16701   /* Increment the stack pointer, based on there being
16702      num_regs 8-byte registers to restore.  */
16703   tmp = gen_rtx_SET (VOIDmode,
16704                      base_reg,
16705                      plus_constant (Pmode, base_reg, 8 * num_regs));
16706   RTX_FRAME_RELATED_P (tmp) = 1;
16707   XVECEXP (par, 0, 0) = tmp;
16708 
16709   /* Now show every reg that will be restored, using a SET for each.  */
16710   for (j = 0, i=first_reg; j < num_regs; i += 2)
16711     {
16712       reg = gen_rtx_REG (DFmode, i);
16713 
16714       tmp = gen_rtx_SET (VOIDmode,
16715                          reg,
16716                          gen_frame_mem
16717                          (DFmode,
16718                           plus_constant (Pmode, base_reg, 8 * j)));
16719       RTX_FRAME_RELATED_P (tmp) = 1;
16720       XVECEXP (par, 0, j + 1) = tmp;
16721 
16722       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16723 
16724       j++;
16725     }
16726 
16727   par = emit_insn (par);
16728   REG_NOTES (par) = dwarf;
16729 }
16730 
16731 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
16732    number of registers are being popped, multiple LDRD patterns are created for
16733    all register pairs.  If odd number of registers are popped, last register is
16734    loaded by using LDR pattern.  */
16735 static void
16736 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
16737 {
16738   int num_regs = 0;
16739   int i, j;
16740   rtx par = NULL_RTX;
16741   rtx dwarf = NULL_RTX;
16742   rtx tmp, reg, tmp1;
16743   bool return_in_pc;
16744 
16745   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
16746   for (i = 0; i <= LAST_ARM_REGNUM; i++)
16747     if (saved_regs_mask & (1 << i))
16748       num_regs++;
16749 
16750   gcc_assert (num_regs && num_regs <= 16);
16751 
16752   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
16753      to be popped.  So, if num_regs is even, now it will become odd,
16754      and we can generate pop with PC.  If num_regs is odd, it will be
16755      even now, and ldr with return can be generated for PC.  */
16756   if (return_in_pc)
16757     num_regs--;
16758 
16759   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16760 
16761   /* Var j iterates over all the registers to gather all the registers in
16762      saved_regs_mask.  Var i gives index of saved registers in stack frame.
16763      A PARALLEL RTX of register-pair is created here, so that pattern for
16764      LDRD can be matched.  As PC is always last register to be popped, and
16765      we have already decremented num_regs if PC, we don't have to worry
16766      about PC in this loop.  */
16767   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
16768     if (saved_regs_mask & (1 << j))
16769       {
16770         /* Create RTX for memory load.  */
16771         reg = gen_rtx_REG (SImode, j);
16772         tmp = gen_rtx_SET (SImode,
16773                            reg,
16774                            gen_frame_mem (SImode,
16775                                plus_constant (Pmode,
16776                                               stack_pointer_rtx, 4 * i)));
16777         RTX_FRAME_RELATED_P (tmp) = 1;
16778 
16779         if (i % 2 == 0)
16780           {
16781             /* When saved-register index (i) is even, the RTX to be emitted is
16782                yet to be created.  Hence create it first.  The LDRD pattern we
16783                are generating is :
16784                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
16785                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
16786                where target registers need not be consecutive.  */
16787             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16788             dwarf = NULL_RTX;
16789           }
16790 
16791         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
16792            added as 0th element and if i is odd, reg_i is added as 1st element
16793            of LDRD pattern shown above.  */
16794         XVECEXP (par, 0, (i % 2)) = tmp;
16795         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16796 
16797         if ((i % 2) == 1)
16798           {
16799             /* When saved-register index (i) is odd, RTXs for both the registers
16800                to be loaded are generated in above given LDRD pattern, and the
16801                pattern can be emitted now.  */
16802             par = emit_insn (par);
16803             REG_NOTES (par) = dwarf;
16804           }
16805 
16806         i++;
16807       }
16808 
16809   /* If the number of registers pushed is odd AND return_in_pc is false OR
16810      number of registers are even AND return_in_pc is true, last register is
16811      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
16812      then LDR with post increment.  */
16813 
16814   /* Increment the stack pointer, based on there being
16815      num_regs 4-byte registers to restore.  */
16816   tmp = gen_rtx_SET (VOIDmode,
16817                      stack_pointer_rtx,
16818                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
16819   RTX_FRAME_RELATED_P (tmp) = 1;
16820   emit_insn (tmp);
16821 
16822   dwarf = NULL_RTX;
16823 
16824   if (((num_regs % 2) == 1 && !return_in_pc)
16825       || ((num_regs % 2) == 0 && return_in_pc))
16826     {
16827       /* Scan for the single register to be popped.  Skip until the saved
16828          register is found.  */
16829       for (; (saved_regs_mask & (1 << j)) == 0; j++);
16830 
16831       /* Gen LDR with post increment here.  */
16832       tmp1 = gen_rtx_MEM (SImode,
16833                           gen_rtx_POST_INC (SImode,
16834                                             stack_pointer_rtx));
16835       set_mem_alias_set (tmp1, get_frame_alias_set ());
16836 
16837       reg = gen_rtx_REG (SImode, j);
16838       tmp = gen_rtx_SET (SImode, reg, tmp1);
16839       RTX_FRAME_RELATED_P (tmp) = 1;
16840       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16841 
16842       if (return_in_pc)
16843         {
16844           /* If return_in_pc, j must be PC_REGNUM.  */
16845           gcc_assert (j == PC_REGNUM);
16846           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16847           XVECEXP (par, 0, 0) = ret_rtx;
16848           XVECEXP (par, 0, 1) = tmp;
16849           par = emit_jump_insn (par);
16850         }
16851       else
16852         {
16853           par = emit_insn (tmp);
16854         }
16855 
16856       REG_NOTES (par) = dwarf;
16857     }
16858   else if ((num_regs % 2) == 1 && return_in_pc)
16859     {
16860       /* There are 2 registers to be popped.  So, generate the pattern
16861          pop_multiple_with_stack_update_and_return to pop in PC.  */
16862       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
16863     }
16864 
16865   return;
16866 }
16867 
16868 /* Calculate the size of the return value that is passed in registers.  */
16869 static unsigned
16870 arm_size_return_regs (void)
16871 {
16872   enum machine_mode mode;
16873 
16874   if (crtl->return_rtx != 0)
16875     mode = GET_MODE (crtl->return_rtx);
16876   else
16877     mode = DECL_MODE (DECL_RESULT (current_function_decl));
16878 
16879   return GET_MODE_SIZE (mode);
16880 }
16881 
16882 /* Return true if the current function needs to save/restore LR.  */
16883 static bool
16884 thumb_force_lr_save (void)
16885 {
16886   return !cfun->machine->lr_save_eliminated
16887 	 && (!leaf_function_p ()
16888 	     || thumb_far_jump_used_p ()
16889 	     || df_regs_ever_live_p (LR_REGNUM));
16890 }
16891 
16892 
16893 /* Return true if r3 is used by any of the tail call insns in the
16894    current function.  */
16895 static bool
16896 any_sibcall_uses_r3 (void)
16897 {
16898   edge_iterator ei;
16899   edge e;
16900 
16901   if (!crtl->tail_call_emit)
16902     return false;
16903   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16904     if (e->flags & EDGE_SIBCALL)
16905       {
16906 	rtx call = BB_END (e->src);
16907 	if (!CALL_P (call))
16908 	  call = prev_nonnote_nondebug_insn (call);
16909 	gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16910 	if (find_regno_fusage (call, USE, 3))
16911 	  return true;
16912       }
16913   return false;
16914 }
16915 
16916 
16917 /* Compute the distance from register FROM to register TO.
16918    These can be the arg pointer (26), the soft frame pointer (25),
16919    the stack pointer (13) or the hard frame pointer (11).
16920    In thumb mode r7 is used as the soft frame pointer, if needed.
16921    Typical stack layout looks like this:
16922 
16923        old stack pointer -> |    |
16924                              ----
16925                             |    | \
16926                             |    |   saved arguments for
16927                             |    |   vararg functions
16928 			    |    | /
16929                               --
16930    hard FP & arg pointer -> |    | \
16931                             |    |   stack
16932                             |    |   frame
16933                             |    | /
16934                               --
16935                             |    | \
16936                             |    |   call saved
16937                             |    |   registers
16938       soft frame pointer -> |    | /
16939                               --
16940                             |    | \
16941                             |    |   local
16942                             |    |   variables
16943      locals base pointer -> |    | /
16944                               --
16945                             |    | \
16946                             |    |   outgoing
16947                             |    |   arguments
16948    current stack pointer -> |    | /
16949                               --
16950 
16951   For a given function some or all of these stack components
16952   may not be needed, giving rise to the possibility of
16953   eliminating some of the registers.
16954 
16955   The values returned by this function must reflect the behavior
16956   of arm_expand_prologue() and arm_compute_save_reg_mask().
16957 
16958   The sign of the number returned reflects the direction of stack
16959   growth, so the values are positive for all eliminations except
16960   from the soft frame pointer to the hard frame pointer.
16961 
16962   SFP may point just inside the local variables block to ensure correct
16963   alignment.  */
16964 
16965 
16966 /* Calculate stack offsets.  These are used to calculate register elimination
16967    offsets and in prologue/epilogue code.  Also calculates which registers
16968    should be saved.  */
16969 
16970 static arm_stack_offsets *
16971 arm_get_frame_offsets (void)
16972 {
16973   struct arm_stack_offsets *offsets;
16974   unsigned long func_type;
16975   int leaf;
16976   int saved;
16977   int core_saved;
16978   HOST_WIDE_INT frame_size;
16979   int i;
16980 
16981   offsets = &cfun->machine->stack_offsets;
16982 
16983   /* We need to know if we are a leaf function.  Unfortunately, it
16984      is possible to be called after start_sequence has been called,
16985      which causes get_insns to return the insns for the sequence,
16986      not the function, which will cause leaf_function_p to return
16987      the incorrect result.
16988 
16989      to know about leaf functions once reload has completed, and the
16990      frame size cannot be changed after that time, so we can safely
16991      use the cached value.  */
16992 
16993   if (reload_completed)
16994     return offsets;
16995 
16996   /* Initially this is the size of the local variables.  It will translated
16997      into an offset once we have determined the size of preceding data.  */
16998   frame_size = ROUND_UP_WORD (get_frame_size ());
16999 
17000   leaf = leaf_function_p ();
17001 
17002   /* Space for variadic functions.  */
17003   offsets->saved_args = crtl->args.pretend_args_size;
17004 
17005   /* In Thumb mode this is incorrect, but never used.  */
17006   offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
17007                    arm_compute_static_chain_stack_bytes();
17008 
17009   if (TARGET_32BIT)
17010     {
17011       unsigned int regno;
17012 
17013       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
17014       core_saved = bit_count (offsets->saved_regs_mask) * 4;
17015       saved = core_saved;
17016 
17017       /* We know that SP will be doubleword aligned on entry, and we must
17018 	 preserve that condition at any subroutine call.  We also require the
17019 	 soft frame pointer to be doubleword aligned.  */
17020 
17021       if (TARGET_REALLY_IWMMXT)
17022 	{
17023 	  /* Check for the call-saved iWMMXt registers.  */
17024 	  for (regno = FIRST_IWMMXT_REGNUM;
17025 	       regno <= LAST_IWMMXT_REGNUM;
17026 	       regno++)
17027 	    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
17028 	      saved += 8;
17029 	}
17030 
17031       func_type = arm_current_func_type ();
17032       /* Space for saved VFP registers.  */
17033       if (! IS_VOLATILE (func_type)
17034 	  && TARGET_HARD_FLOAT && TARGET_VFP)
17035 	saved += arm_get_vfp_saved_size ();
17036     }
17037   else /* TARGET_THUMB1 */
17038     {
17039       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
17040       core_saved = bit_count (offsets->saved_regs_mask) * 4;
17041       saved = core_saved;
17042       if (TARGET_BACKTRACE)
17043 	saved += 16;
17044     }
17045 
17046   /* Saved registers include the stack frame.  */
17047   offsets->saved_regs = offsets->saved_args + saved +
17048                         arm_compute_static_chain_stack_bytes();
17049   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
17050   /* A leaf function does not need any stack alignment if it has nothing
17051      on the stack.  */
17052   if (leaf && frame_size == 0
17053       /* However if it calls alloca(), we have a dynamically allocated
17054 	 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
17055       && ! cfun->calls_alloca)
17056     {
17057       offsets->outgoing_args = offsets->soft_frame;
17058       offsets->locals_base = offsets->soft_frame;
17059       return offsets;
17060     }
17061 
17062   /* Ensure SFP has the correct alignment.  */
17063   if (ARM_DOUBLEWORD_ALIGN
17064       && (offsets->soft_frame & 7))
17065     {
17066       offsets->soft_frame += 4;
17067       /* Try to align stack by pushing an extra reg.  Don't bother doing this
17068          when there is a stack frame as the alignment will be rolled into
17069 	 the normal stack adjustment.  */
17070       if (frame_size + crtl->outgoing_args_size == 0)
17071 	{
17072 	  int reg = -1;
17073 
17074 	  /* If it is safe to use r3, then do so.  This sometimes
17075 	     generates better code on Thumb-2 by avoiding the need to
17076 	     use 32-bit push/pop instructions.  */
17077  	  if (! any_sibcall_uses_r3 ()
17078 	      && arm_size_return_regs () <= 12
17079 	      && (offsets->saved_regs_mask & (1 << 3)) == 0)
17080 	    {
17081 	      reg = 3;
17082 	    }
17083 	  else
17084 	    for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
17085 	      {
17086 		/* Avoid fixed registers; they may be changed at
17087 		   arbitrary times so it's unsafe to restore them
17088 		   during the epilogue.  */
17089 		if (!fixed_regs[i]
17090 		    && (offsets->saved_regs_mask & (1 << i)) == 0)
17091 		  {
17092 		    reg = i;
17093 		    break;
17094 		  }
17095 	      }
17096 
17097 	  if (reg != -1)
17098 	    {
17099 	      offsets->saved_regs += 4;
17100 	      offsets->saved_regs_mask |= (1 << reg);
17101 	    }
17102 	}
17103     }
17104 
17105   offsets->locals_base = offsets->soft_frame + frame_size;
17106   offsets->outgoing_args = (offsets->locals_base
17107 			    + crtl->outgoing_args_size);
17108 
17109   if (ARM_DOUBLEWORD_ALIGN)
17110     {
17111       /* Ensure SP remains doubleword aligned.  */
17112       if (offsets->outgoing_args & 7)
17113 	offsets->outgoing_args += 4;
17114       gcc_assert (!(offsets->outgoing_args & 7));
17115     }
17116 
17117   return offsets;
17118 }
17119 
17120 
17121 /* Calculate the relative offsets for the different stack pointers.  Positive
17122    offsets are in the direction of stack growth.  */
17123 
17124 HOST_WIDE_INT
17125 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17126 {
17127   arm_stack_offsets *offsets;
17128 
17129   offsets = arm_get_frame_offsets ();
17130 
17131   /* OK, now we have enough information to compute the distances.
17132      There must be an entry in these switch tables for each pair
17133      of registers in ELIMINABLE_REGS, even if some of the entries
17134      seem to be redundant or useless.  */
17135   switch (from)
17136     {
17137     case ARG_POINTER_REGNUM:
17138       switch (to)
17139 	{
17140 	case THUMB_HARD_FRAME_POINTER_REGNUM:
17141 	  return 0;
17142 
17143 	case FRAME_POINTER_REGNUM:
17144 	  /* This is the reverse of the soft frame pointer
17145 	     to hard frame pointer elimination below.  */
17146 	  return offsets->soft_frame - offsets->saved_args;
17147 
17148 	case ARM_HARD_FRAME_POINTER_REGNUM:
17149 	  /* This is only non-zero in the case where the static chain register
17150 	     is stored above the frame.  */
17151 	  return offsets->frame - offsets->saved_args - 4;
17152 
17153 	case STACK_POINTER_REGNUM:
17154 	  /* If nothing has been pushed on the stack at all
17155 	     then this will return -4.  This *is* correct!  */
17156 	  return offsets->outgoing_args - (offsets->saved_args + 4);
17157 
17158 	default:
17159 	  gcc_unreachable ();
17160 	}
17161       gcc_unreachable ();
17162 
17163     case FRAME_POINTER_REGNUM:
17164       switch (to)
17165 	{
17166 	case THUMB_HARD_FRAME_POINTER_REGNUM:
17167 	  return 0;
17168 
17169 	case ARM_HARD_FRAME_POINTER_REGNUM:
17170 	  /* The hard frame pointer points to the top entry in the
17171 	     stack frame.  The soft frame pointer to the bottom entry
17172 	     in the stack frame.  If there is no stack frame at all,
17173 	     then they are identical.  */
17174 
17175 	  return offsets->frame - offsets->soft_frame;
17176 
17177 	case STACK_POINTER_REGNUM:
17178 	  return offsets->outgoing_args - offsets->soft_frame;
17179 
17180 	default:
17181 	  gcc_unreachable ();
17182 	}
17183       gcc_unreachable ();
17184 
17185     default:
17186       /* You cannot eliminate from the stack pointer.
17187 	 In theory you could eliminate from the hard frame
17188 	 pointer to the stack pointer, but this will never
17189 	 happen, since if a stack frame is not needed the
17190 	 hard frame pointer will never be used.  */
17191       gcc_unreachable ();
17192     }
17193 }
17194 
17195 /* Given FROM and TO register numbers, say whether this elimination is
17196    allowed.  Frame pointer elimination is automatically handled.
17197 
17198    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
17199    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
17200    pointer, we must eliminate FRAME_POINTER_REGNUM into
17201    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
17202    ARG_POINTER_REGNUM.  */
17203 
17204 bool
17205 arm_can_eliminate (const int from, const int to)
17206 {
17207   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
17208           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
17209           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
17210           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
17211            true);
17212 }
17213 
17214 /* Emit RTL to save coprocessor registers on function entry.  Returns the
17215    number of bytes pushed.  */
17216 
17217 static int
17218 arm_save_coproc_regs(void)
17219 {
17220   int saved_size = 0;
17221   unsigned reg;
17222   unsigned start_reg;
17223   rtx insn;
17224 
17225   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
17226     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
17227       {
17228 	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
17229 	insn = gen_rtx_MEM (V2SImode, insn);
17230 	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
17231 	RTX_FRAME_RELATED_P (insn) = 1;
17232 	saved_size += 8;
17233       }
17234 
17235   if (TARGET_HARD_FLOAT && TARGET_VFP)
17236     {
17237       start_reg = FIRST_VFP_REGNUM;
17238 
17239       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
17240 	{
17241 	  if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
17242 	      && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
17243 	    {
17244 	      if (start_reg != reg)
17245 		saved_size += vfp_emit_fstmd (start_reg,
17246 					      (reg - start_reg) / 2);
17247 	      start_reg = reg + 2;
17248 	    }
17249 	}
17250       if (start_reg != reg)
17251 	saved_size += vfp_emit_fstmd (start_reg,
17252 				      (reg - start_reg) / 2);
17253     }
17254   return saved_size;
17255 }
17256 
17257 
17258 /* Set the Thumb frame pointer from the stack pointer.  */
17259 
17260 static void
17261 thumb_set_frame_pointer (arm_stack_offsets *offsets)
17262 {
17263   HOST_WIDE_INT amount;
17264   rtx insn, dwarf;
17265 
17266   amount = offsets->outgoing_args - offsets->locals_base;
17267   if (amount < 1024)
17268     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17269 				  stack_pointer_rtx, GEN_INT (amount)));
17270   else
17271     {
17272       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
17273       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
17274          expects the first two operands to be the same.  */
17275       if (TARGET_THUMB2)
17276 	{
17277 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17278 					stack_pointer_rtx,
17279 					hard_frame_pointer_rtx));
17280 	}
17281       else
17282 	{
17283 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17284 					hard_frame_pointer_rtx,
17285 					stack_pointer_rtx));
17286 	}
17287       dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
17288 			   plus_constant (Pmode, stack_pointer_rtx, amount));
17289       RTX_FRAME_RELATED_P (dwarf) = 1;
17290       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17291     }
17292 
17293   RTX_FRAME_RELATED_P (insn) = 1;
17294 }
17295 
17296 /* Generate the prologue instructions for entry into an ARM or Thumb-2
17297    function.  */
17298 void
17299 arm_expand_prologue (void)
17300 {
17301   rtx amount;
17302   rtx insn;
17303   rtx ip_rtx;
17304   unsigned long live_regs_mask;
17305   unsigned long func_type;
17306   int fp_offset = 0;
17307   int saved_pretend_args = 0;
17308   int saved_regs = 0;
17309   unsigned HOST_WIDE_INT args_to_push;
17310   arm_stack_offsets *offsets;
17311 
17312   func_type = arm_current_func_type ();
17313 
17314   /* Naked functions don't have prologues.  */
17315   if (IS_NAKED (func_type))
17316     return;
17317 
17318   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
17319   args_to_push = crtl->args.pretend_args_size;
17320 
17321   /* Compute which register we will have to save onto the stack.  */
17322   offsets = arm_get_frame_offsets ();
17323   live_regs_mask = offsets->saved_regs_mask;
17324 
17325   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
17326 
17327   if (IS_STACKALIGN (func_type))
17328     {
17329       rtx r0, r1;
17330 
17331       /* Handle a word-aligned stack pointer.  We generate the following:
17332 
17333 	  mov r0, sp
17334 	  bic r1, r0, #7
17335 	  mov sp, r1
17336 	  <save and restore r0 in normal prologue/epilogue>
17337 	  mov sp, r0
17338 	  bx lr
17339 
17340 	 The unwinder doesn't need to know about the stack realignment.
17341 	 Just tell it we saved SP in r0.  */
17342       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
17343 
17344       r0 = gen_rtx_REG (SImode, 0);
17345       r1 = gen_rtx_REG (SImode, 1);
17346 
17347       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
17348       RTX_FRAME_RELATED_P (insn) = 1;
17349       add_reg_note (insn, REG_CFA_REGISTER, NULL);
17350 
17351       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
17352 
17353       /* ??? The CFA changes here, which may cause GDB to conclude that it
17354 	 has entered a different function.  That said, the unwind info is
17355 	 correct, individually, before and after this instruction because
17356 	 we've described the save of SP, which will override the default
17357 	 handling of SP as restoring from the CFA.  */
17358       emit_insn (gen_movsi (stack_pointer_rtx, r1));
17359     }
17360 
17361   /* For APCS frames, if IP register is clobbered
17362      when creating frame, save that register in a special
17363      way.  */
17364   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
17365     {
17366       if (IS_INTERRUPT (func_type))
17367 	{
17368 	  /* Interrupt functions must not corrupt any registers.
17369 	     Creating a frame pointer however, corrupts the IP
17370 	     register, so we must push it first.  */
17371 	  emit_multi_reg_push (1 << IP_REGNUM);
17372 
17373 	  /* Do not set RTX_FRAME_RELATED_P on this insn.
17374 	     The dwarf stack unwinding code only wants to see one
17375 	     stack decrement per function, and this is not it.  If
17376 	     this instruction is labeled as being part of the frame
17377 	     creation sequence then dwarf2out_frame_debug_expr will
17378 	     die when it encounters the assignment of IP to FP
17379 	     later on, since the use of SP here establishes SP as
17380 	     the CFA register and not IP.
17381 
17382 	     Anyway this instruction is not really part of the stack
17383 	     frame creation although it is part of the prologue.  */
17384 	}
17385       else if (IS_NESTED (func_type))
17386 	{
17387 	  /* The Static chain register is the same as the IP register
17388 	     used as a scratch register during stack frame creation.
17389 	     To get around this need to find somewhere to store IP
17390 	     whilst the frame is being created.  We try the following
17391 	     places in order:
17392 
17393 	       1. The last argument register.
17394 	       2. A slot on the stack above the frame.  (This only
17395 	          works if the function is not a varargs function).
17396 	       3. Register r3, after pushing the argument registers
17397 	          onto the stack.
17398 
17399 	     Note - we only need to tell the dwarf2 backend about the SP
17400 	     adjustment in the second variant; the static chain register
17401 	     doesn't need to be unwound, as it doesn't contain a value
17402 	     inherited from the caller.  */
17403 
17404 	  if (df_regs_ever_live_p (3) == false)
17405 	    insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
17406 	  else if (args_to_push == 0)
17407 	    {
17408 	      rtx dwarf;
17409 
17410 	      gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
17411 	      saved_regs += 4;
17412 
17413 	      insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
17414 	      insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
17415 	      fp_offset = 4;
17416 
17417 	      /* Just tell the dwarf backend that we adjusted SP.  */
17418 	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17419 				   plus_constant (Pmode, stack_pointer_rtx,
17420 						  -fp_offset));
17421 	      RTX_FRAME_RELATED_P (insn) = 1;
17422 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17423 	    }
17424 	  else
17425 	    {
17426 	      /* Store the args on the stack.  */
17427 	      if (cfun->machine->uses_anonymous_args)
17428 		insn = emit_multi_reg_push
17429 		  ((0xf0 >> (args_to_push / 4)) & 0xf);
17430 	      else
17431 		insn = emit_insn
17432 		  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17433 			       GEN_INT (- args_to_push)));
17434 
17435 	      RTX_FRAME_RELATED_P (insn) = 1;
17436 
17437 	      saved_pretend_args = 1;
17438 	      fp_offset = args_to_push;
17439 	      args_to_push = 0;
17440 
17441 	      /* Now reuse r3 to preserve IP.  */
17442 	      emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
17443 	    }
17444 	}
17445 
17446       insn = emit_set_insn (ip_rtx,
17447 			    plus_constant (Pmode, stack_pointer_rtx,
17448 					   fp_offset));
17449       RTX_FRAME_RELATED_P (insn) = 1;
17450     }
17451 
17452   if (args_to_push)
17453     {
17454       /* Push the argument registers, or reserve space for them.  */
17455       if (cfun->machine->uses_anonymous_args)
17456 	insn = emit_multi_reg_push
17457 	  ((0xf0 >> (args_to_push / 4)) & 0xf);
17458       else
17459 	insn = emit_insn
17460 	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17461 		       GEN_INT (- args_to_push)));
17462       RTX_FRAME_RELATED_P (insn) = 1;
17463     }
17464 
17465   /* If this is an interrupt service routine, and the link register
17466      is going to be pushed, and we're not generating extra
17467      push of IP (needed when frame is needed and frame layout if apcs),
17468      subtracting four from LR now will mean that the function return
17469      can be done with a single instruction.  */
17470   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
17471       && (live_regs_mask & (1 << LR_REGNUM)) != 0
17472       && !(frame_pointer_needed && TARGET_APCS_FRAME)
17473       && TARGET_ARM)
17474     {
17475       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
17476 
17477       emit_set_insn (lr, plus_constant (SImode, lr, -4));
17478     }
17479 
17480   if (live_regs_mask)
17481     {
17482       saved_regs += bit_count (live_regs_mask) * 4;
17483       if (optimize_size && !frame_pointer_needed
17484 	  && saved_regs == offsets->saved_regs - offsets->saved_args)
17485 	{
17486 	  /* If no coprocessor registers are being pushed and we don't have
17487 	     to worry about a frame pointer then push extra registers to
17488 	     create the stack frame.  This is done is a way that does not
17489 	     alter the frame layout, so is independent of the epilogue.  */
17490 	  int n;
17491 	  int frame;
17492 	  n = 0;
17493 	  while (n < 8 && (live_regs_mask & (1 << n)) == 0)
17494 	    n++;
17495 	  frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
17496 	  if (frame && n * 4 >= frame)
17497 	    {
17498 	      n = frame / 4;
17499 	      live_regs_mask |= (1 << n) - 1;
17500 	      saved_regs += frame;
17501 	    }
17502 	}
17503 
17504       if (TARGET_LDRD
17505 	  && current_tune->prefer_ldrd_strd
17506           && !optimize_function_for_size_p (cfun))
17507         {
17508           if (TARGET_THUMB2)
17509             {
17510               thumb2_emit_strd_push (live_regs_mask);
17511             }
17512           else
17513             {
17514               insn = emit_multi_reg_push (live_regs_mask);
17515               RTX_FRAME_RELATED_P (insn) = 1;
17516             }
17517         }
17518       else
17519         {
17520           insn = emit_multi_reg_push (live_regs_mask);
17521           RTX_FRAME_RELATED_P (insn) = 1;
17522         }
17523     }
17524 
17525   if (! IS_VOLATILE (func_type))
17526     saved_regs += arm_save_coproc_regs ();
17527 
17528   if (frame_pointer_needed && TARGET_ARM)
17529     {
17530       /* Create the new frame pointer.  */
17531       if (TARGET_APCS_FRAME)
17532 	{
17533 	  insn = GEN_INT (-(4 + args_to_push + fp_offset));
17534 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
17535 	  RTX_FRAME_RELATED_P (insn) = 1;
17536 
17537 	  if (IS_NESTED (func_type))
17538 	    {
17539 	      /* Recover the static chain register.  */
17540 	      if (!df_regs_ever_live_p (3)
17541 		  || saved_pretend_args)
17542 		insn = gen_rtx_REG (SImode, 3);
17543 	      else /* if (crtl->args.pretend_args_size == 0) */
17544 		{
17545 		  insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
17546 		  insn = gen_frame_mem (SImode, insn);
17547 		}
17548 	      emit_set_insn (ip_rtx, insn);
17549 	      /* Add a USE to stop propagate_one_insn() from barfing.  */
17550 	      emit_insn (gen_force_register_use (ip_rtx));
17551 	    }
17552 	}
17553       else
17554 	{
17555 	  insn = GEN_INT (saved_regs - 4);
17556 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17557 					stack_pointer_rtx, insn));
17558 	  RTX_FRAME_RELATED_P (insn) = 1;
17559 	}
17560     }
17561 
17562   if (flag_stack_usage_info)
17563     current_function_static_stack_size
17564       = offsets->outgoing_args - offsets->saved_args;
17565 
17566   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
17567     {
17568       /* This add can produce multiple insns for a large constant, so we
17569 	 need to get tricky.  */
17570       rtx last = get_last_insn ();
17571 
17572       amount = GEN_INT (offsets->saved_args + saved_regs
17573 			- offsets->outgoing_args);
17574 
17575       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17576 				    amount));
17577       do
17578 	{
17579 	  last = last ? NEXT_INSN (last) : get_insns ();
17580 	  RTX_FRAME_RELATED_P (last) = 1;
17581 	}
17582       while (last != insn);
17583 
17584       /* If the frame pointer is needed, emit a special barrier that
17585 	 will prevent the scheduler from moving stores to the frame
17586 	 before the stack adjustment.  */
17587       if (frame_pointer_needed)
17588 	insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
17589 					 hard_frame_pointer_rtx));
17590     }
17591 
17592 
17593   if (frame_pointer_needed && TARGET_THUMB2)
17594     thumb_set_frame_pointer (offsets);
17595 
17596   if (flag_pic && arm_pic_register != INVALID_REGNUM)
17597     {
17598       unsigned long mask;
17599 
17600       mask = live_regs_mask;
17601       mask &= THUMB2_WORK_REGS;
17602       if (!IS_NESTED (func_type))
17603 	mask |= (1 << IP_REGNUM);
17604       arm_load_pic_register (mask);
17605     }
17606 
17607   /* If we are profiling, make sure no instructions are scheduled before
17608      the call to mcount.  Similarly if the user has requested no
17609      scheduling in the prolog.  Similarly if we want non-call exceptions
17610      using the EABI unwinder, to prevent faulting instructions from being
17611      swapped with a stack adjustment.  */
17612   if (crtl->profile || !TARGET_SCHED_PROLOG
17613       || (arm_except_unwind_info (&global_options) == UI_TARGET
17614 	  && cfun->can_throw_non_call_exceptions))
17615     emit_insn (gen_blockage ());
17616 
17617   /* If the link register is being kept alive, with the return address in it,
17618      then make sure that it does not get reused by the ce2 pass.  */
17619   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17620     cfun->machine->lr_save_eliminated = 1;
17621 }
17622 
17623 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
17624 static void
17625 arm_print_condition (FILE *stream)
17626 {
17627   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17628     {
17629       /* Branch conversion is not implemented for Thumb-2.  */
17630       if (TARGET_THUMB)
17631 	{
17632 	  output_operand_lossage ("predicated Thumb instruction");
17633 	  return;
17634 	}
17635       if (current_insn_predicate != NULL)
17636 	{
17637 	  output_operand_lossage
17638 	    ("predicated instruction in conditional sequence");
17639 	  return;
17640 	}
17641 
17642       fputs (arm_condition_codes[arm_current_cc], stream);
17643     }
17644   else if (current_insn_predicate)
17645     {
17646       enum arm_cond_code code;
17647 
17648       if (TARGET_THUMB1)
17649 	{
17650 	  output_operand_lossage ("predicated Thumb instruction");
17651 	  return;
17652 	}
17653 
17654       code = get_arm_condition_code (current_insn_predicate);
17655       fputs (arm_condition_codes[code], stream);
17656     }
17657 }
17658 
17659 
17660 /* If CODE is 'd', then the X is a condition operand and the instruction
17661    should only be executed if the condition is true.
17662    if CODE is 'D', then the X is a condition operand and the instruction
17663    should only be executed if the condition is false: however, if the mode
17664    of the comparison is CCFPEmode, then always execute the instruction -- we
17665    do this because in these circumstances !GE does not necessarily imply LT;
17666    in these cases the instruction pattern will take care to make sure that
17667    an instruction containing %d will follow, thereby undoing the effects of
17668    doing this instruction unconditionally.
17669    If CODE is 'N' then X is a floating point operand that must be negated
17670    before output.
17671    If CODE is 'B' then output a bitwise inverted value of X (a const int).
17672    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
17673 static void
17674 arm_print_operand (FILE *stream, rtx x, int code)
17675 {
17676   switch (code)
17677     {
17678     case '@':
17679       fputs (ASM_COMMENT_START, stream);
17680       return;
17681 
17682     case '_':
17683       fputs (user_label_prefix, stream);
17684       return;
17685 
17686     case '|':
17687       fputs (REGISTER_PREFIX, stream);
17688       return;
17689 
17690     case '?':
17691       arm_print_condition (stream);
17692       return;
17693 
17694     case '(':
17695       /* Nothing in unified syntax, otherwise the current condition code.  */
17696       if (!TARGET_UNIFIED_ASM)
17697 	arm_print_condition (stream);
17698       break;
17699 
17700     case ')':
17701       /* The current condition code in unified syntax, otherwise nothing.  */
17702       if (TARGET_UNIFIED_ASM)
17703 	arm_print_condition (stream);
17704       break;
17705 
17706     case '.':
17707       /* The current condition code for a condition code setting instruction.
17708 	 Preceded by 's' in unified syntax, otherwise followed by 's'.  */
17709       if (TARGET_UNIFIED_ASM)
17710 	{
17711 	  fputc('s', stream);
17712 	  arm_print_condition (stream);
17713 	}
17714       else
17715 	{
17716 	  arm_print_condition (stream);
17717 	  fputc('s', stream);
17718 	}
17719       return;
17720 
17721     case '!':
17722       /* If the instruction is conditionally executed then print
17723 	 the current condition code, otherwise print 's'.  */
17724       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17725       if (current_insn_predicate)
17726 	arm_print_condition (stream);
17727       else
17728 	fputc('s', stream);
17729       break;
17730 
17731     /* %# is a "break" sequence. It doesn't output anything, but is used to
17732        separate e.g. operand numbers from following text, if that text consists
17733        of further digits which we don't want to be part of the operand
17734        number.  */
17735     case '#':
17736       return;
17737 
17738     case 'N':
17739       {
17740 	REAL_VALUE_TYPE r;
17741 	REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17742 	r = real_value_negate (&r);
17743 	fprintf (stream, "%s", fp_const_from_val (&r));
17744       }
17745       return;
17746 
17747     /* An integer or symbol address without a preceding # sign.  */
17748     case 'c':
17749       switch (GET_CODE (x))
17750 	{
17751 	case CONST_INT:
17752 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17753 	  break;
17754 
17755 	case SYMBOL_REF:
17756 	  output_addr_const (stream, x);
17757 	  break;
17758 
17759 	case CONST:
17760 	  if (GET_CODE (XEXP (x, 0)) == PLUS
17761 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17762 	    {
17763 	      output_addr_const (stream, x);
17764 	      break;
17765 	    }
17766 	  /* Fall through.  */
17767 
17768 	default:
17769 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
17770 	}
17771       return;
17772 
17773     /* An integer that we want to print in HEX.  */
17774     case 'x':
17775       switch (GET_CODE (x))
17776 	{
17777 	case CONST_INT:
17778 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17779 	  break;
17780 
17781 	default:
17782 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
17783 	}
17784       return;
17785 
17786     case 'B':
17787       if (CONST_INT_P (x))
17788 	{
17789 	  HOST_WIDE_INT val;
17790 	  val = ARM_SIGN_EXTEND (~INTVAL (x));
17791 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17792 	}
17793       else
17794 	{
17795 	  putc ('~', stream);
17796 	  output_addr_const (stream, x);
17797 	}
17798       return;
17799 
17800     case 'L':
17801       /* The low 16 bits of an immediate constant.  */
17802       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17803       return;
17804 
17805     case 'i':
17806       fprintf (stream, "%s", arithmetic_instr (x, 1));
17807       return;
17808 
17809     case 'I':
17810       fprintf (stream, "%s", arithmetic_instr (x, 0));
17811       return;
17812 
17813     case 'S':
17814       {
17815 	HOST_WIDE_INT val;
17816 	const char *shift;
17817 
17818 	shift = shift_op (x, &val);
17819 
17820 	if (shift)
17821 	  {
17822 	    fprintf (stream, ", %s ", shift);
17823 	    if (val == -1)
17824 	      arm_print_operand (stream, XEXP (x, 1), 0);
17825 	    else
17826 	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17827 	  }
17828       }
17829       return;
17830 
17831       /* An explanation of the 'Q', 'R' and 'H' register operands:
17832 
17833 	 In a pair of registers containing a DI or DF value the 'Q'
17834 	 operand returns the register number of the register containing
17835 	 the least significant part of the value.  The 'R' operand returns
17836 	 the register number of the register containing the most
17837 	 significant part of the value.
17838 
17839 	 The 'H' operand returns the higher of the two register numbers.
17840 	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17841 	 same as the 'Q' operand, since the most significant part of the
17842 	 value is held in the lower number register.  The reverse is true
17843 	 on systems where WORDS_BIG_ENDIAN is false.
17844 
17845 	 The purpose of these operands is to distinguish between cases
17846 	 where the endian-ness of the values is important (for example
17847 	 when they are added together), and cases where the endian-ness
17848 	 is irrelevant, but the order of register operations is important.
17849 	 For example when loading a value from memory into a register
17850 	 pair, the endian-ness does not matter.  Provided that the value
17851 	 from the lower memory address is put into the lower numbered
17852 	 register, and the value from the higher address is put into the
17853 	 higher numbered register, the load will work regardless of whether
17854 	 the value being loaded is big-wordian or little-wordian.  The
17855 	 order of the two register loads can matter however, if the address
17856 	 of the memory location is actually held in one of the registers
17857 	 being overwritten by the load.
17858 
17859 	 The 'Q' and 'R' constraints are also available for 64-bit
17860 	 constants.  */
17861     case 'Q':
17862       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17863 	{
17864 	  rtx part = gen_lowpart (SImode, x);
17865 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17866 	  return;
17867 	}
17868 
17869       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17870 	{
17871 	  output_operand_lossage ("invalid operand for code '%c'", code);
17872 	  return;
17873 	}
17874 
17875       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17876       return;
17877 
17878     case 'R':
17879       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17880 	{
17881 	  enum machine_mode mode = GET_MODE (x);
17882 	  rtx part;
17883 
17884 	  if (mode == VOIDmode)
17885 	    mode = DImode;
17886 	  part = gen_highpart_mode (SImode, mode, x);
17887 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17888 	  return;
17889 	}
17890 
17891       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17892 	{
17893 	  output_operand_lossage ("invalid operand for code '%c'", code);
17894 	  return;
17895 	}
17896 
17897       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17898       return;
17899 
17900     case 'H':
17901       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17902 	{
17903 	  output_operand_lossage ("invalid operand for code '%c'", code);
17904 	  return;
17905 	}
17906 
17907       asm_fprintf (stream, "%r", REGNO (x) + 1);
17908       return;
17909 
17910     case 'J':
17911       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17912 	{
17913 	  output_operand_lossage ("invalid operand for code '%c'", code);
17914 	  return;
17915 	}
17916 
17917       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17918       return;
17919 
17920     case 'K':
17921       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17922 	{
17923 	  output_operand_lossage ("invalid operand for code '%c'", code);
17924 	  return;
17925 	}
17926 
17927       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17928       return;
17929 
17930     case 'm':
17931       asm_fprintf (stream, "%r",
17932 		   REG_P (XEXP (x, 0))
17933 		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17934       return;
17935 
17936     case 'M':
17937       asm_fprintf (stream, "{%r-%r}",
17938 		   REGNO (x),
17939 		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17940       return;
17941 
17942     /* Like 'M', but writing doubleword vector registers, for use by Neon
17943        insns.  */
17944     case 'h':
17945       {
17946         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17947         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17948         if (numregs == 1)
17949           asm_fprintf (stream, "{d%d}", regno);
17950         else
17951           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17952       }
17953       return;
17954 
17955     case 'd':
17956       /* CONST_TRUE_RTX means always -- that's the default.  */
17957       if (x == const_true_rtx)
17958 	return;
17959 
17960       if (!COMPARISON_P (x))
17961 	{
17962 	  output_operand_lossage ("invalid operand for code '%c'", code);
17963 	  return;
17964 	}
17965 
17966       fputs (arm_condition_codes[get_arm_condition_code (x)],
17967 	     stream);
17968       return;
17969 
17970     case 'D':
17971       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
17972 	 want to do that.  */
17973       if (x == const_true_rtx)
17974 	{
17975 	  output_operand_lossage ("instruction never executed");
17976 	  return;
17977 	}
17978       if (!COMPARISON_P (x))
17979 	{
17980 	  output_operand_lossage ("invalid operand for code '%c'", code);
17981 	  return;
17982 	}
17983 
17984       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17985 				 (get_arm_condition_code (x))],
17986 	     stream);
17987       return;
17988 
17989     case 's':
17990     case 'V':
17991     case 'W':
17992     case 'X':
17993     case 'Y':
17994     case 'Z':
17995       /* Former Maverick support, removed after GCC-4.7.  */
17996       output_operand_lossage ("obsolete Maverick format code '%c'", code);
17997       return;
17998 
17999     case 'U':
18000       if (!REG_P (x)
18001 	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
18002 	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
18003 	/* Bad value for wCG register number.  */
18004 	{
18005 	  output_operand_lossage ("invalid operand for code '%c'", code);
18006 	  return;
18007 	}
18008 
18009       else
18010 	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
18011       return;
18012 
18013       /* Print an iWMMXt control register name.  */
18014     case 'w':
18015       if (!CONST_INT_P (x)
18016 	  || INTVAL (x) < 0
18017 	  || INTVAL (x) >= 16)
18018 	/* Bad value for wC register number.  */
18019 	{
18020 	  output_operand_lossage ("invalid operand for code '%c'", code);
18021 	  return;
18022 	}
18023 
18024       else
18025 	{
18026 	  static const char * wc_reg_names [16] =
18027 	    {
18028 	      "wCID",  "wCon",  "wCSSF", "wCASF",
18029 	      "wC4",   "wC5",   "wC6",   "wC7",
18030 	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
18031 	      "wC12",  "wC13",  "wC14",  "wC15"
18032 	    };
18033 
18034 	  fputs (wc_reg_names [INTVAL (x)], stream);
18035 	}
18036       return;
18037 
18038     /* Print the high single-precision register of a VFP double-precision
18039        register.  */
18040     case 'p':
18041       {
18042         int mode = GET_MODE (x);
18043         int regno;
18044 
18045         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
18046           {
18047 	    output_operand_lossage ("invalid operand for code '%c'", code);
18048 	    return;
18049           }
18050 
18051         regno = REGNO (x);
18052         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
18053           {
18054 	    output_operand_lossage ("invalid operand for code '%c'", code);
18055 	    return;
18056           }
18057 
18058 	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
18059       }
18060       return;
18061 
18062     /* Print a VFP/Neon double precision or quad precision register name.  */
18063     case 'P':
18064     case 'q':
18065       {
18066 	int mode = GET_MODE (x);
18067 	int is_quad = (code == 'q');
18068 	int regno;
18069 
18070 	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
18071 	  {
18072 	    output_operand_lossage ("invalid operand for code '%c'", code);
18073 	    return;
18074 	  }
18075 
18076 	if (!REG_P (x)
18077 	    || !IS_VFP_REGNUM (REGNO (x)))
18078 	  {
18079 	    output_operand_lossage ("invalid operand for code '%c'", code);
18080 	    return;
18081 	  }
18082 
18083 	regno = REGNO (x);
18084 	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
18085             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
18086 	  {
18087 	    output_operand_lossage ("invalid operand for code '%c'", code);
18088 	    return;
18089 	  }
18090 
18091 	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
18092 	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
18093       }
18094       return;
18095 
18096     /* These two codes print the low/high doubleword register of a Neon quad
18097        register, respectively.  For pair-structure types, can also print
18098        low/high quadword registers.  */
18099     case 'e':
18100     case 'f':
18101       {
18102         int mode = GET_MODE (x);
18103         int regno;
18104 
18105         if ((GET_MODE_SIZE (mode) != 16
18106 	     && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
18107           {
18108 	    output_operand_lossage ("invalid operand for code '%c'", code);
18109 	    return;
18110           }
18111 
18112         regno = REGNO (x);
18113         if (!NEON_REGNO_OK_FOR_QUAD (regno))
18114           {
18115 	    output_operand_lossage ("invalid operand for code '%c'", code);
18116 	    return;
18117           }
18118 
18119         if (GET_MODE_SIZE (mode) == 16)
18120           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
18121 				  + (code == 'f' ? 1 : 0));
18122         else
18123           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
18124 				  + (code == 'f' ? 1 : 0));
18125       }
18126       return;
18127 
18128     /* Print a VFPv3 floating-point constant, represented as an integer
18129        index.  */
18130     case 'G':
18131       {
18132         int index = vfp3_const_double_index (x);
18133 	gcc_assert (index != -1);
18134 	fprintf (stream, "%d", index);
18135       }
18136       return;
18137 
18138     /* Print bits representing opcode features for Neon.
18139 
18140        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
18141        and polynomials as unsigned.
18142 
18143        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
18144 
18145        Bit 2 is 1 for rounding functions, 0 otherwise.  */
18146 
18147     /* Identify the type as 's', 'u', 'p' or 'f'.  */
18148     case 'T':
18149       {
18150         HOST_WIDE_INT bits = INTVAL (x);
18151         fputc ("uspf"[bits & 3], stream);
18152       }
18153       return;
18154 
18155     /* Likewise, but signed and unsigned integers are both 'i'.  */
18156     case 'F':
18157       {
18158         HOST_WIDE_INT bits = INTVAL (x);
18159         fputc ("iipf"[bits & 3], stream);
18160       }
18161       return;
18162 
18163     /* As for 'T', but emit 'u' instead of 'p'.  */
18164     case 't':
18165       {
18166         HOST_WIDE_INT bits = INTVAL (x);
18167         fputc ("usuf"[bits & 3], stream);
18168       }
18169       return;
18170 
18171     /* Bit 2: rounding (vs none).  */
18172     case 'O':
18173       {
18174         HOST_WIDE_INT bits = INTVAL (x);
18175         fputs ((bits & 4) != 0 ? "r" : "", stream);
18176       }
18177       return;
18178 
18179     /* Memory operand for vld1/vst1 instruction.  */
18180     case 'A':
18181       {
18182 	rtx addr;
18183 	bool postinc = FALSE;
18184 	unsigned align, memsize, align_bits;
18185 
18186 	gcc_assert (MEM_P (x));
18187 	addr = XEXP (x, 0);
18188 	if (GET_CODE (addr) == POST_INC)
18189 	  {
18190 	    postinc = 1;
18191 	    addr = XEXP (addr, 0);
18192 	  }
18193 	asm_fprintf (stream, "[%r", REGNO (addr));
18194 
18195 	/* We know the alignment of this access, so we can emit a hint in the
18196 	   instruction (for some alignments) as an aid to the memory subsystem
18197 	   of the target.  */
18198 	align = MEM_ALIGN (x) >> 3;
18199 	memsize = MEM_SIZE (x);
18200 
18201 	/* Only certain alignment specifiers are supported by the hardware.  */
18202 	if (memsize == 32 && (align % 32) == 0)
18203 	  align_bits = 256;
18204 	else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
18205 	  align_bits = 128;
18206 	else if (memsize >= 8 && (align % 8) == 0)
18207 	  align_bits = 64;
18208 	else
18209 	  align_bits = 0;
18210 
18211 	if (align_bits != 0)
18212 	  asm_fprintf (stream, ":%d", align_bits);
18213 
18214 	asm_fprintf (stream, "]");
18215 
18216 	if (postinc)
18217 	  fputs("!", stream);
18218       }
18219       return;
18220 
18221     case 'C':
18222       {
18223 	rtx addr;
18224 
18225 	gcc_assert (MEM_P (x));
18226 	addr = XEXP (x, 0);
18227 	gcc_assert (REG_P (addr));
18228 	asm_fprintf (stream, "[%r]", REGNO (addr));
18229       }
18230       return;
18231 
18232     /* Translate an S register number into a D register number and element index.  */
18233     case 'y':
18234       {
18235         int mode = GET_MODE (x);
18236         int regno;
18237 
18238         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
18239           {
18240 	    output_operand_lossage ("invalid operand for code '%c'", code);
18241 	    return;
18242           }
18243 
18244         regno = REGNO (x);
18245         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
18246           {
18247 	    output_operand_lossage ("invalid operand for code '%c'", code);
18248 	    return;
18249           }
18250 
18251 	regno = regno - FIRST_VFP_REGNUM;
18252 	fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
18253       }
18254       return;
18255 
18256     case 'v':
18257 	gcc_assert (CONST_DOUBLE_P (x));
18258 	fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
18259 	return;
18260 
18261     /* Register specifier for vld1.16/vst1.16.  Translate the S register
18262        number into a D register number and element index.  */
18263     case 'z':
18264       {
18265         int mode = GET_MODE (x);
18266         int regno;
18267 
18268         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
18269           {
18270 	    output_operand_lossage ("invalid operand for code '%c'", code);
18271 	    return;
18272           }
18273 
18274         regno = REGNO (x);
18275         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
18276           {
18277 	    output_operand_lossage ("invalid operand for code '%c'", code);
18278 	    return;
18279           }
18280 
18281 	regno = regno - FIRST_VFP_REGNUM;
18282 	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
18283       }
18284       return;
18285 
18286     default:
18287       if (x == 0)
18288 	{
18289 	  output_operand_lossage ("missing operand");
18290 	  return;
18291 	}
18292 
18293       switch (GET_CODE (x))
18294 	{
18295 	case REG:
18296 	  asm_fprintf (stream, "%r", REGNO (x));
18297 	  break;
18298 
18299 	case MEM:
18300 	  output_memory_reference_mode = GET_MODE (x);
18301 	  output_address (XEXP (x, 0));
18302 	  break;
18303 
18304 	case CONST_DOUBLE:
18305           if (TARGET_NEON)
18306             {
18307               char fpstr[20];
18308               real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
18309 			       sizeof (fpstr), 0, 1);
18310               fprintf (stream, "#%s", fpstr);
18311             }
18312           else
18313 	    fprintf (stream, "#%s", fp_immediate_constant (x));
18314 	  break;
18315 
18316 	default:
18317 	  gcc_assert (GET_CODE (x) != NEG);
18318 	  fputc ('#', stream);
18319 	  if (GET_CODE (x) == HIGH)
18320 	    {
18321 	      fputs (":lower16:", stream);
18322 	      x = XEXP (x, 0);
18323 	    }
18324 
18325 	  output_addr_const (stream, x);
18326 	  break;
18327 	}
18328     }
18329 }
18330 
18331 /* Target hook for printing a memory address.  */
18332 static void
18333 arm_print_operand_address (FILE *stream, rtx x)
18334 {
18335   if (TARGET_32BIT)
18336     {
18337       int is_minus = GET_CODE (x) == MINUS;
18338 
18339       if (REG_P (x))
18340 	asm_fprintf (stream, "[%r]", REGNO (x));
18341       else if (GET_CODE (x) == PLUS || is_minus)
18342 	{
18343 	  rtx base = XEXP (x, 0);
18344 	  rtx index = XEXP (x, 1);
18345 	  HOST_WIDE_INT offset = 0;
18346 	  if (!REG_P (base)
18347 	      || (REG_P (index) && REGNO (index) == SP_REGNUM))
18348 	    {
18349 	      /* Ensure that BASE is a register.  */
18350 	      /* (one of them must be).  */
18351 	      /* Also ensure the SP is not used as in index register.  */
18352 	      rtx temp = base;
18353 	      base = index;
18354 	      index = temp;
18355 	    }
18356 	  switch (GET_CODE (index))
18357 	    {
18358 	    case CONST_INT:
18359 	      offset = INTVAL (index);
18360 	      if (is_minus)
18361 		offset = -offset;
18362 	      asm_fprintf (stream, "[%r, #%wd]",
18363 			   REGNO (base), offset);
18364 	      break;
18365 
18366 	    case REG:
18367 	      asm_fprintf (stream, "[%r, %s%r]",
18368 			   REGNO (base), is_minus ? "-" : "",
18369 			   REGNO (index));
18370 	      break;
18371 
18372 	    case MULT:
18373 	    case ASHIFTRT:
18374 	    case LSHIFTRT:
18375 	    case ASHIFT:
18376 	    case ROTATERT:
18377 	      {
18378 		asm_fprintf (stream, "[%r, %s%r",
18379 			     REGNO (base), is_minus ? "-" : "",
18380 			     REGNO (XEXP (index, 0)));
18381 		arm_print_operand (stream, index, 'S');
18382 		fputs ("]", stream);
18383 		break;
18384 	      }
18385 
18386 	    default:
18387 	      gcc_unreachable ();
18388 	    }
18389 	}
18390       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
18391 	       || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
18392 	{
18393 	  extern enum machine_mode output_memory_reference_mode;
18394 
18395 	  gcc_assert (REG_P (XEXP (x, 0)));
18396 
18397 	  if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
18398 	    asm_fprintf (stream, "[%r, #%s%d]!",
18399 			 REGNO (XEXP (x, 0)),
18400 			 GET_CODE (x) == PRE_DEC ? "-" : "",
18401 			 GET_MODE_SIZE (output_memory_reference_mode));
18402 	  else
18403 	    asm_fprintf (stream, "[%r], #%s%d",
18404 			 REGNO (XEXP (x, 0)),
18405 			 GET_CODE (x) == POST_DEC ? "-" : "",
18406 			 GET_MODE_SIZE (output_memory_reference_mode));
18407 	}
18408       else if (GET_CODE (x) == PRE_MODIFY)
18409 	{
18410 	  asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
18411 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
18412 	    asm_fprintf (stream, "#%wd]!",
18413 			 INTVAL (XEXP (XEXP (x, 1), 1)));
18414 	  else
18415 	    asm_fprintf (stream, "%r]!",
18416 			 REGNO (XEXP (XEXP (x, 1), 1)));
18417 	}
18418       else if (GET_CODE (x) == POST_MODIFY)
18419 	{
18420 	  asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
18421 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
18422 	    asm_fprintf (stream, "#%wd",
18423 			 INTVAL (XEXP (XEXP (x, 1), 1)));
18424 	  else
18425 	    asm_fprintf (stream, "%r",
18426 			 REGNO (XEXP (XEXP (x, 1), 1)));
18427 	}
18428       else output_addr_const (stream, x);
18429     }
18430   else
18431     {
18432       if (REG_P (x))
18433 	asm_fprintf (stream, "[%r]", REGNO (x));
18434       else if (GET_CODE (x) == POST_INC)
18435 	asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
18436       else if (GET_CODE (x) == PLUS)
18437 	{
18438 	  gcc_assert (REG_P (XEXP (x, 0)));
18439 	  if (CONST_INT_P (XEXP (x, 1)))
18440 	    asm_fprintf (stream, "[%r, #%wd]",
18441 			 REGNO (XEXP (x, 0)),
18442 			 INTVAL (XEXP (x, 1)));
18443 	  else
18444 	    asm_fprintf (stream, "[%r, %r]",
18445 			 REGNO (XEXP (x, 0)),
18446 			 REGNO (XEXP (x, 1)));
18447 	}
18448       else
18449 	output_addr_const (stream, x);
18450     }
18451 }
18452 
18453 /* Target hook for indicating whether a punctuation character for
18454    TARGET_PRINT_OPERAND is valid.  */
18455 static bool
18456 arm_print_operand_punct_valid_p (unsigned char code)
18457 {
18458   return (code == '@' || code == '|' || code == '.'
18459 	  || code == '(' || code == ')' || code == '#'
18460 	  || (TARGET_32BIT && (code == '?'))
18461 	  || (TARGET_THUMB2 && (code == '!'))
18462 	  || (TARGET_THUMB && (code == '_')));
18463 }
18464 
18465 /* Target hook for assembling integer objects.  The ARM version needs to
18466    handle word-sized values specially.  */
18467 static bool
18468 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
18469 {
18470   enum machine_mode mode;
18471 
18472   if (size == UNITS_PER_WORD && aligned_p)
18473     {
18474       fputs ("\t.word\t", asm_out_file);
18475       output_addr_const (asm_out_file, x);
18476 
18477       /* Mark symbols as position independent.  We only do this in the
18478 	 .text segment, not in the .data segment.  */
18479       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
18480 	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
18481 	{
18482 	  /* See legitimize_pic_address for an explanation of the
18483 	     TARGET_VXWORKS_RTP check.  */
18484 	  if (TARGET_VXWORKS_RTP
18485 	      || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
18486 	    fputs ("(GOT)", asm_out_file);
18487 	  else
18488 	    fputs ("(GOTOFF)", asm_out_file);
18489 	}
18490       fputc ('\n', asm_out_file);
18491       return true;
18492     }
18493 
18494   mode = GET_MODE (x);
18495 
18496   if (arm_vector_mode_supported_p (mode))
18497     {
18498       int i, units;
18499 
18500       gcc_assert (GET_CODE (x) == CONST_VECTOR);
18501 
18502       units = CONST_VECTOR_NUNITS (x);
18503       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
18504 
18505       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18506         for (i = 0; i < units; i++)
18507 	  {
18508 	    rtx elt = CONST_VECTOR_ELT (x, i);
18509 	    assemble_integer
18510 	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
18511 	  }
18512       else
18513         for (i = 0; i < units; i++)
18514           {
18515             rtx elt = CONST_VECTOR_ELT (x, i);
18516             REAL_VALUE_TYPE rval;
18517 
18518             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
18519 
18520             assemble_real
18521               (rval, GET_MODE_INNER (mode),
18522               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
18523           }
18524 
18525       return true;
18526     }
18527 
18528   return default_assemble_integer (x, size, aligned_p);
18529 }
18530 
18531 static void
18532 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
18533 {
18534   section *s;
18535 
18536   if (!TARGET_AAPCS_BASED)
18537     {
18538       (is_ctor ?
18539        default_named_section_asm_out_constructor
18540        : default_named_section_asm_out_destructor) (symbol, priority);
18541       return;
18542     }
18543 
18544   /* Put these in the .init_array section, using a special relocation.  */
18545   if (priority != DEFAULT_INIT_PRIORITY)
18546     {
18547       char buf[18];
18548       sprintf (buf, "%s.%.5u",
18549 	       is_ctor ? ".init_array" : ".fini_array",
18550 	       priority);
18551       s = get_section (buf, SECTION_WRITE, NULL_TREE);
18552     }
18553   else if (is_ctor)
18554     s = ctors_section;
18555   else
18556     s = dtors_section;
18557 
18558   switch_to_section (s);
18559   assemble_align (POINTER_SIZE);
18560   fputs ("\t.word\t", asm_out_file);
18561   output_addr_const (asm_out_file, symbol);
18562   fputs ("(target1)\n", asm_out_file);
18563 }
18564 
18565 /* Add a function to the list of static constructors.  */
18566 
18567 static void
18568 arm_elf_asm_constructor (rtx symbol, int priority)
18569 {
18570   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
18571 }
18572 
18573 /* Add a function to the list of static destructors.  */
18574 
18575 static void
18576 arm_elf_asm_destructor (rtx symbol, int priority)
18577 {
18578   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
18579 }
18580 
18581 /* A finite state machine takes care of noticing whether or not instructions
18582    can be conditionally executed, and thus decrease execution time and code
18583    size by deleting branch instructions.  The fsm is controlled by
18584    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
18585 
18586 /* The state of the fsm controlling condition codes are:
18587    0: normal, do nothing special
18588    1: make ASM_OUTPUT_OPCODE not output this instruction
18589    2: make ASM_OUTPUT_OPCODE not output this instruction
18590    3: make instructions conditional
18591    4: make instructions conditional
18592 
18593    State transitions (state->state by whom under condition):
18594    0 -> 1 final_prescan_insn if the `target' is a label
18595    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18596    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18597    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18598    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18599           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18600    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18601           (the target insn is arm_target_insn).
18602 
18603    If the jump clobbers the conditions then we use states 2 and 4.
18604 
18605    A similar thing can be done with conditional return insns.
18606 
18607    XXX In case the `target' is an unconditional branch, this conditionalising
18608    of the instructions always reduces code size, but not always execution
18609    time.  But then, I want to reduce the code size to somewhere near what
18610    /bin/cc produces.  */
18611 
18612 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18613    instructions.  When a COND_EXEC instruction is seen the subsequent
18614    instructions are scanned so that multiple conditional instructions can be
18615    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
18616    specify the length and true/false mask for the IT block.  These will be
18617    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
18618 
18619 /* Returns the index of the ARM condition code string in
18620    `arm_condition_codes', or ARM_NV if the comparison is invalid.
18621    COMPARISON should be an rtx like `(eq (...) (...))'.  */
18622 
18623 enum arm_cond_code
18624 maybe_get_arm_condition_code (rtx comparison)
18625 {
18626   enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18627   enum arm_cond_code code;
18628   enum rtx_code comp_code = GET_CODE (comparison);
18629 
18630   if (GET_MODE_CLASS (mode) != MODE_CC)
18631     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18632 			   XEXP (comparison, 1));
18633 
18634   switch (mode)
18635     {
18636     case CC_DNEmode: code = ARM_NE; goto dominance;
18637     case CC_DEQmode: code = ARM_EQ; goto dominance;
18638     case CC_DGEmode: code = ARM_GE; goto dominance;
18639     case CC_DGTmode: code = ARM_GT; goto dominance;
18640     case CC_DLEmode: code = ARM_LE; goto dominance;
18641     case CC_DLTmode: code = ARM_LT; goto dominance;
18642     case CC_DGEUmode: code = ARM_CS; goto dominance;
18643     case CC_DGTUmode: code = ARM_HI; goto dominance;
18644     case CC_DLEUmode: code = ARM_LS; goto dominance;
18645     case CC_DLTUmode: code = ARM_CC;
18646 
18647     dominance:
18648       if (comp_code == EQ)
18649 	return ARM_INVERSE_CONDITION_CODE (code);
18650       if (comp_code == NE)
18651 	return code;
18652       return ARM_NV;
18653 
18654     case CC_NOOVmode:
18655       switch (comp_code)
18656 	{
18657 	case NE: return ARM_NE;
18658 	case EQ: return ARM_EQ;
18659 	case GE: return ARM_PL;
18660 	case LT: return ARM_MI;
18661 	default: return ARM_NV;
18662 	}
18663 
18664     case CC_Zmode:
18665       switch (comp_code)
18666 	{
18667 	case NE: return ARM_NE;
18668 	case EQ: return ARM_EQ;
18669 	default: return ARM_NV;
18670 	}
18671 
18672     case CC_Nmode:
18673       switch (comp_code)
18674 	{
18675 	case NE: return ARM_MI;
18676 	case EQ: return ARM_PL;
18677 	default: return ARM_NV;
18678 	}
18679 
18680     case CCFPEmode:
18681     case CCFPmode:
18682       /* We can handle all cases except UNEQ and LTGT.  */
18683       switch (comp_code)
18684 	{
18685 	case GE: return ARM_GE;
18686 	case GT: return ARM_GT;
18687 	case LE: return ARM_LS;
18688 	case LT: return ARM_MI;
18689 	case NE: return ARM_NE;
18690 	case EQ: return ARM_EQ;
18691 	case ORDERED: return ARM_VC;
18692 	case UNORDERED: return ARM_VS;
18693 	case UNLT: return ARM_LT;
18694 	case UNLE: return ARM_LE;
18695 	case UNGT: return ARM_HI;
18696 	case UNGE: return ARM_PL;
18697 	  /* UNEQ and LTGT do not have a representation.  */
18698 	case UNEQ: /* Fall through.  */
18699 	case LTGT: /* Fall through.  */
18700 	default: return ARM_NV;
18701 	}
18702 
18703     case CC_SWPmode:
18704       switch (comp_code)
18705 	{
18706 	case NE: return ARM_NE;
18707 	case EQ: return ARM_EQ;
18708 	case GE: return ARM_LE;
18709 	case GT: return ARM_LT;
18710 	case LE: return ARM_GE;
18711 	case LT: return ARM_GT;
18712 	case GEU: return ARM_LS;
18713 	case GTU: return ARM_CC;
18714 	case LEU: return ARM_CS;
18715 	case LTU: return ARM_HI;
18716 	default: return ARM_NV;
18717 	}
18718 
18719     case CC_Cmode:
18720       switch (comp_code)
18721 	{
18722 	case LTU: return ARM_CS;
18723 	case GEU: return ARM_CC;
18724 	default: return ARM_NV;
18725 	}
18726 
18727     case CC_CZmode:
18728       switch (comp_code)
18729 	{
18730 	case NE: return ARM_NE;
18731 	case EQ: return ARM_EQ;
18732 	case GEU: return ARM_CS;
18733 	case GTU: return ARM_HI;
18734 	case LEU: return ARM_LS;
18735 	case LTU: return ARM_CC;
18736 	default: return ARM_NV;
18737 	}
18738 
18739     case CC_NCVmode:
18740       switch (comp_code)
18741 	{
18742 	case GE: return ARM_GE;
18743 	case LT: return ARM_LT;
18744 	case GEU: return ARM_CS;
18745 	case LTU: return ARM_CC;
18746 	default: return ARM_NV;
18747 	}
18748 
18749     case CCmode:
18750       switch (comp_code)
18751 	{
18752 	case NE: return ARM_NE;
18753 	case EQ: return ARM_EQ;
18754 	case GE: return ARM_GE;
18755 	case GT: return ARM_GT;
18756 	case LE: return ARM_LE;
18757 	case LT: return ARM_LT;
18758 	case GEU: return ARM_CS;
18759 	case GTU: return ARM_HI;
18760 	case LEU: return ARM_LS;
18761 	case LTU: return ARM_CC;
18762 	default: return ARM_NV;
18763 	}
18764 
18765     default: gcc_unreachable ();
18766     }
18767 }
18768 
18769 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
18770 static enum arm_cond_code
18771 get_arm_condition_code (rtx comparison)
18772 {
18773   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18774   gcc_assert (code != ARM_NV);
18775   return code;
18776 }
18777 
18778 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18779    instructions.  */
18780 void
18781 thumb2_final_prescan_insn (rtx insn)
18782 {
18783   rtx first_insn = insn;
18784   rtx body = PATTERN (insn);
18785   rtx predicate;
18786   enum arm_cond_code code;
18787   int n;
18788   int mask;
18789 
18790   /* Remove the previous insn from the count of insns to be output.  */
18791   if (arm_condexec_count)
18792       arm_condexec_count--;
18793 
18794   /* Nothing to do if we are already inside a conditional block.  */
18795   if (arm_condexec_count)
18796     return;
18797 
18798   if (GET_CODE (body) != COND_EXEC)
18799     return;
18800 
18801   /* Conditional jumps are implemented directly.  */
18802   if (JUMP_P (insn))
18803     return;
18804 
18805   predicate = COND_EXEC_TEST (body);
18806   arm_current_cc = get_arm_condition_code (predicate);
18807 
18808   n = get_attr_ce_count (insn);
18809   arm_condexec_count = 1;
18810   arm_condexec_mask = (1 << n) - 1;
18811   arm_condexec_masklen = n;
18812   /* See if subsequent instructions can be combined into the same block.  */
18813   for (;;)
18814     {
18815       insn = next_nonnote_insn (insn);
18816 
18817       /* Jumping into the middle of an IT block is illegal, so a label or
18818          barrier terminates the block.  */
18819       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
18820 	break;
18821 
18822       body = PATTERN (insn);
18823       /* USE and CLOBBER aren't really insns, so just skip them.  */
18824       if (GET_CODE (body) == USE
18825 	  || GET_CODE (body) == CLOBBER)
18826 	continue;
18827 
18828       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
18829       if (GET_CODE (body) != COND_EXEC)
18830 	break;
18831       /* Allow up to 4 conditionally executed instructions in a block.  */
18832       n = get_attr_ce_count (insn);
18833       if (arm_condexec_masklen + n > 4)
18834 	break;
18835 
18836       predicate = COND_EXEC_TEST (body);
18837       code = get_arm_condition_code (predicate);
18838       mask = (1 << n) - 1;
18839       if (arm_current_cc == code)
18840 	arm_condexec_mask |= (mask << arm_condexec_masklen);
18841       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18842 	break;
18843 
18844       arm_condexec_count++;
18845       arm_condexec_masklen += n;
18846 
18847       /* A jump must be the last instruction in a conditional block.  */
18848       if (JUMP_P (insn))
18849 	break;
18850     }
18851   /* Restore recog_data (getting the attributes of other insns can
18852      destroy this array, but final.c assumes that it remains intact
18853      across this call).  */
18854   extract_constrain_insn_cached (first_insn);
18855 }
18856 
18857 void
18858 arm_final_prescan_insn (rtx insn)
18859 {
18860   /* BODY will hold the body of INSN.  */
18861   rtx body = PATTERN (insn);
18862 
18863   /* This will be 1 if trying to repeat the trick, and things need to be
18864      reversed if it appears to fail.  */
18865   int reverse = 0;
18866 
18867   /* If we start with a return insn, we only succeed if we find another one.  */
18868   int seeking_return = 0;
18869   enum rtx_code return_code = UNKNOWN;
18870 
18871   /* START_INSN will hold the insn from where we start looking.  This is the
18872      first insn after the following code_label if REVERSE is true.  */
18873   rtx start_insn = insn;
18874 
18875   /* If in state 4, check if the target branch is reached, in order to
18876      change back to state 0.  */
18877   if (arm_ccfsm_state == 4)
18878     {
18879       if (insn == arm_target_insn)
18880 	{
18881 	  arm_target_insn = NULL;
18882 	  arm_ccfsm_state = 0;
18883 	}
18884       return;
18885     }
18886 
18887   /* If in state 3, it is possible to repeat the trick, if this insn is an
18888      unconditional branch to a label, and immediately following this branch
18889      is the previous target label which is only used once, and the label this
18890      branch jumps to is not too far off.  */
18891   if (arm_ccfsm_state == 3)
18892     {
18893       if (simplejump_p (insn))
18894 	{
18895 	  start_insn = next_nonnote_insn (start_insn);
18896 	  if (BARRIER_P (start_insn))
18897 	    {
18898 	      /* XXX Isn't this always a barrier?  */
18899 	      start_insn = next_nonnote_insn (start_insn);
18900 	    }
18901 	  if (LABEL_P (start_insn)
18902 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18903 	      && LABEL_NUSES (start_insn) == 1)
18904 	    reverse = TRUE;
18905 	  else
18906 	    return;
18907 	}
18908       else if (ANY_RETURN_P (body))
18909         {
18910 	  start_insn = next_nonnote_insn (start_insn);
18911 	  if (BARRIER_P (start_insn))
18912 	    start_insn = next_nonnote_insn (start_insn);
18913 	  if (LABEL_P (start_insn)
18914 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18915 	      && LABEL_NUSES (start_insn) == 1)
18916 	    {
18917 	      reverse = TRUE;
18918 	      seeking_return = 1;
18919 	      return_code = GET_CODE (body);
18920 	    }
18921 	  else
18922 	    return;
18923         }
18924       else
18925 	return;
18926     }
18927 
18928   gcc_assert (!arm_ccfsm_state || reverse);
18929   if (!JUMP_P (insn))
18930     return;
18931 
18932   /* This jump might be paralleled with a clobber of the condition codes
18933      the jump should always come first */
18934   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18935     body = XVECEXP (body, 0, 0);
18936 
18937   if (reverse
18938       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18939 	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18940     {
18941       int insns_skipped;
18942       int fail = FALSE, succeed = FALSE;
18943       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
18944       int then_not_else = TRUE;
18945       rtx this_insn = start_insn, label = 0;
18946 
18947       /* Register the insn jumped to.  */
18948       if (reverse)
18949         {
18950 	  if (!seeking_return)
18951 	    label = XEXP (SET_SRC (body), 0);
18952         }
18953       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18954 	label = XEXP (XEXP (SET_SRC (body), 1), 0);
18955       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18956 	{
18957 	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
18958 	  then_not_else = FALSE;
18959 	}
18960       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18961 	{
18962 	  seeking_return = 1;
18963 	  return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18964 	}
18965       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18966         {
18967 	  seeking_return = 1;
18968 	  return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18969 	  then_not_else = FALSE;
18970         }
18971       else
18972 	gcc_unreachable ();
18973 
18974       /* See how many insns this branch skips, and what kind of insns.  If all
18975 	 insns are okay, and the label or unconditional branch to the same
18976 	 label is not too far away, succeed.  */
18977       for (insns_skipped = 0;
18978 	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18979 	{
18980 	  rtx scanbody;
18981 
18982 	  this_insn = next_nonnote_insn (this_insn);
18983 	  if (!this_insn)
18984 	    break;
18985 
18986 	  switch (GET_CODE (this_insn))
18987 	    {
18988 	    case CODE_LABEL:
18989 	      /* Succeed if it is the target label, otherwise fail since
18990 		 control falls in from somewhere else.  */
18991 	      if (this_insn == label)
18992 		{
18993 		  arm_ccfsm_state = 1;
18994 		  succeed = TRUE;
18995 		}
18996 	      else
18997 		fail = TRUE;
18998 	      break;
18999 
19000 	    case BARRIER:
19001 	      /* Succeed if the following insn is the target label.
19002 		 Otherwise fail.
19003 		 If return insns are used then the last insn in a function
19004 		 will be a barrier.  */
19005 	      this_insn = next_nonnote_insn (this_insn);
19006 	      if (this_insn && this_insn == label)
19007 		{
19008 		  arm_ccfsm_state = 1;
19009 		  succeed = TRUE;
19010 		}
19011 	      else
19012 		fail = TRUE;
19013 	      break;
19014 
19015 	    case CALL_INSN:
19016 	      /* The AAPCS says that conditional calls should not be
19017 		 used since they make interworking inefficient (the
19018 		 linker can't transform BL<cond> into BLX).  That's
19019 		 only a problem if the machine has BLX.  */
19020 	      if (arm_arch5)
19021 		{
19022 		  fail = TRUE;
19023 		  break;
19024 		}
19025 
19026 	      /* Succeed if the following insn is the target label, or
19027 		 if the following two insns are a barrier and the
19028 		 target label.  */
19029 	      this_insn = next_nonnote_insn (this_insn);
19030 	      if (this_insn && BARRIER_P (this_insn))
19031 		this_insn = next_nonnote_insn (this_insn);
19032 
19033 	      if (this_insn && this_insn == label
19034 		  && insns_skipped < max_insns_skipped)
19035 		{
19036 		  arm_ccfsm_state = 1;
19037 		  succeed = TRUE;
19038 		}
19039 	      else
19040 		fail = TRUE;
19041 	      break;
19042 
19043 	    case JUMP_INSN:
19044       	      /* If this is an unconditional branch to the same label, succeed.
19045 		 If it is to another label, do nothing.  If it is conditional,
19046 		 fail.  */
19047 	      /* XXX Probably, the tests for SET and the PC are
19048 		 unnecessary.  */
19049 
19050 	      scanbody = PATTERN (this_insn);
19051 	      if (GET_CODE (scanbody) == SET
19052 		  && GET_CODE (SET_DEST (scanbody)) == PC)
19053 		{
19054 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
19055 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
19056 		    {
19057 		      arm_ccfsm_state = 2;
19058 		      succeed = TRUE;
19059 		    }
19060 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
19061 		    fail = TRUE;
19062 		}
19063 	      /* Fail if a conditional return is undesirable (e.g. on a
19064 		 StrongARM), but still allow this if optimizing for size.  */
19065 	      else if (GET_CODE (scanbody) == return_code
19066 		       && !use_return_insn (TRUE, NULL)
19067 		       && !optimize_size)
19068 		fail = TRUE;
19069 	      else if (GET_CODE (scanbody) == return_code)
19070 	        {
19071 		  arm_ccfsm_state = 2;
19072 		  succeed = TRUE;
19073 	        }
19074 	      else if (GET_CODE (scanbody) == PARALLEL)
19075 	        {
19076 		  switch (get_attr_conds (this_insn))
19077 		    {
19078 		    case CONDS_NOCOND:
19079 		      break;
19080 		    default:
19081 		      fail = TRUE;
19082 		      break;
19083 		    }
19084 		}
19085 	      else
19086 		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
19087 
19088 	      break;
19089 
19090 	    case INSN:
19091 	      /* Instructions using or affecting the condition codes make it
19092 		 fail.  */
19093 	      scanbody = PATTERN (this_insn);
19094 	      if (!(GET_CODE (scanbody) == SET
19095 		    || GET_CODE (scanbody) == PARALLEL)
19096 		  || get_attr_conds (this_insn) != CONDS_NOCOND)
19097 		fail = TRUE;
19098 	      break;
19099 
19100 	    default:
19101 	      break;
19102 	    }
19103 	}
19104       if (succeed)
19105 	{
19106 	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
19107 	    arm_target_label = CODE_LABEL_NUMBER (label);
19108 	  else
19109 	    {
19110 	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
19111 
19112 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
19113 	        {
19114 		  this_insn = next_nonnote_insn (this_insn);
19115 		  gcc_assert (!this_insn
19116 			      || (!BARRIER_P (this_insn)
19117 				  && !LABEL_P (this_insn)));
19118 	        }
19119 	      if (!this_insn)
19120 	        {
19121 		  /* Oh, dear! we ran off the end.. give up.  */
19122 		  extract_constrain_insn_cached (insn);
19123 		  arm_ccfsm_state = 0;
19124 		  arm_target_insn = NULL;
19125 		  return;
19126 	        }
19127 	      arm_target_insn = this_insn;
19128 	    }
19129 
19130 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
19131 	     what it was.  */
19132 	  if (!reverse)
19133 	    arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
19134 
19135 	  if (reverse || then_not_else)
19136 	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
19137 	}
19138 
19139       /* Restore recog_data (getting the attributes of other insns can
19140 	 destroy this array, but final.c assumes that it remains intact
19141 	 across this call.  */
19142       extract_constrain_insn_cached (insn);
19143     }
19144 }
19145 
19146 /* Output IT instructions.  */
19147 void
19148 thumb2_asm_output_opcode (FILE * stream)
19149 {
19150   char buff[5];
19151   int n;
19152 
19153   if (arm_condexec_mask)
19154     {
19155       for (n = 0; n < arm_condexec_masklen; n++)
19156 	buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
19157       buff[n] = 0;
19158       asm_fprintf(stream, "i%s\t%s\n\t", buff,
19159 		  arm_condition_codes[arm_current_cc]);
19160       arm_condexec_mask = 0;
19161     }
19162 }
19163 
19164 /* Returns true if REGNO is a valid register
19165    for holding a quantity of type MODE.  */
19166 int
19167 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
19168 {
19169   if (GET_MODE_CLASS (mode) == MODE_CC)
19170     return (regno == CC_REGNUM
19171 	    || (TARGET_HARD_FLOAT && TARGET_VFP
19172 		&& regno == VFPCC_REGNUM));
19173 
19174   if (TARGET_THUMB1)
19175     /* For the Thumb we only allow values bigger than SImode in
19176        registers 0 - 6, so that there is always a second low
19177        register available to hold the upper part of the value.
19178        We probably we ought to ensure that the register is the
19179        start of an even numbered register pair.  */
19180     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
19181 
19182   if (TARGET_HARD_FLOAT && TARGET_VFP
19183       && IS_VFP_REGNUM (regno))
19184     {
19185       if (mode == SFmode || mode == SImode)
19186 	return VFP_REGNO_OK_FOR_SINGLE (regno);
19187 
19188       if (mode == DFmode)
19189 	return VFP_REGNO_OK_FOR_DOUBLE (regno);
19190 
19191       /* VFP registers can hold HFmode values, but there is no point in
19192 	 putting them there unless we have hardware conversion insns. */
19193       if (mode == HFmode)
19194 	return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
19195 
19196       if (TARGET_NEON)
19197         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
19198                || (VALID_NEON_QREG_MODE (mode)
19199                    && NEON_REGNO_OK_FOR_QUAD (regno))
19200 	       || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
19201 	       || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
19202 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
19203 	       || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
19204 	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
19205 
19206       return FALSE;
19207     }
19208 
19209   if (TARGET_REALLY_IWMMXT)
19210     {
19211       if (IS_IWMMXT_GR_REGNUM (regno))
19212 	return mode == SImode;
19213 
19214       if (IS_IWMMXT_REGNUM (regno))
19215 	return VALID_IWMMXT_REG_MODE (mode);
19216     }
19217 
19218   /* We allow almost any value to be stored in the general registers.
19219      Restrict doubleword quantities to even register pairs so that we can
19220      use ldrd.  Do not allow very large Neon structure opaque modes in
19221      general registers; they would use too many.  */
19222   if (regno <= LAST_ARM_REGNUM)
19223     return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
19224       && ARM_NUM_REGS (mode) <= 4;
19225 
19226   if (regno == FRAME_POINTER_REGNUM
19227       || regno == ARG_POINTER_REGNUM)
19228     /* We only allow integers in the fake hard registers.  */
19229     return GET_MODE_CLASS (mode) == MODE_INT;
19230 
19231   return FALSE;
19232 }
19233 
19234 /* Implement MODES_TIEABLE_P.  */
19235 
19236 bool
19237 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19238 {
19239   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
19240     return true;
19241 
19242   /* We specifically want to allow elements of "structure" modes to
19243      be tieable to the structure.  This more general condition allows
19244      other rarer situations too.  */
19245   if (TARGET_NEON
19246       && (VALID_NEON_DREG_MODE (mode1)
19247 	  || VALID_NEON_QREG_MODE (mode1)
19248 	  || VALID_NEON_STRUCT_MODE (mode1))
19249       && (VALID_NEON_DREG_MODE (mode2)
19250 	  || VALID_NEON_QREG_MODE (mode2)
19251 	  || VALID_NEON_STRUCT_MODE (mode2)))
19252     return true;
19253 
19254   return false;
19255 }
19256 
19257 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
19258    not used in arm mode.  */
19259 
19260 enum reg_class
19261 arm_regno_class (int regno)
19262 {
19263   if (TARGET_THUMB1)
19264     {
19265       if (regno == STACK_POINTER_REGNUM)
19266 	return STACK_REG;
19267       if (regno == CC_REGNUM)
19268 	return CC_REG;
19269       if (regno < 8)
19270 	return LO_REGS;
19271       return HI_REGS;
19272     }
19273 
19274   if (TARGET_THUMB2 && regno < 8)
19275     return LO_REGS;
19276 
19277   if (   regno <= LAST_ARM_REGNUM
19278       || regno == FRAME_POINTER_REGNUM
19279       || regno == ARG_POINTER_REGNUM)
19280     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
19281 
19282   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
19283     return TARGET_THUMB2 ? CC_REG : NO_REGS;
19284 
19285   if (IS_VFP_REGNUM (regno))
19286     {
19287       if (regno <= D7_VFP_REGNUM)
19288 	return VFP_D0_D7_REGS;
19289       else if (regno <= LAST_LO_VFP_REGNUM)
19290         return VFP_LO_REGS;
19291       else
19292         return VFP_HI_REGS;
19293     }
19294 
19295   if (IS_IWMMXT_REGNUM (regno))
19296     return IWMMXT_REGS;
19297 
19298   if (IS_IWMMXT_GR_REGNUM (regno))
19299     return IWMMXT_GR_REGS;
19300 
19301   return NO_REGS;
19302 }
19303 
19304 /* Handle a special case when computing the offset
19305    of an argument from the frame pointer.  */
19306 int
19307 arm_debugger_arg_offset (int value, rtx addr)
19308 {
19309   rtx insn;
19310 
19311   /* We are only interested if dbxout_parms() failed to compute the offset.  */
19312   if (value != 0)
19313     return 0;
19314 
19315   /* We can only cope with the case where the address is held in a register.  */
19316   if (!REG_P (addr))
19317     return 0;
19318 
19319   /* If we are using the frame pointer to point at the argument, then
19320      an offset of 0 is correct.  */
19321   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
19322     return 0;
19323 
19324   /* If we are using the stack pointer to point at the
19325      argument, then an offset of 0 is correct.  */
19326   /* ??? Check this is consistent with thumb2 frame layout.  */
19327   if ((TARGET_THUMB || !frame_pointer_needed)
19328       && REGNO (addr) == SP_REGNUM)
19329     return 0;
19330 
19331   /* Oh dear.  The argument is pointed to by a register rather
19332      than being held in a register, or being stored at a known
19333      offset from the frame pointer.  Since GDB only understands
19334      those two kinds of argument we must translate the address
19335      held in the register into an offset from the frame pointer.
19336      We do this by searching through the insns for the function
19337      looking to see where this register gets its value.  If the
19338      register is initialized from the frame pointer plus an offset
19339      then we are in luck and we can continue, otherwise we give up.
19340 
19341      This code is exercised by producing debugging information
19342      for a function with arguments like this:
19343 
19344            double func (double a, double b, int c, double d) {return d;}
19345 
19346      Without this code the stab for parameter 'd' will be set to
19347      an offset of 0 from the frame pointer, rather than 8.  */
19348 
19349   /* The if() statement says:
19350 
19351      If the insn is a normal instruction
19352      and if the insn is setting the value in a register
19353      and if the register being set is the register holding the address of the argument
19354      and if the address is computing by an addition
19355      that involves adding to a register
19356      which is the frame pointer
19357      a constant integer
19358 
19359      then...  */
19360 
19361   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19362     {
19363       if (   NONJUMP_INSN_P (insn)
19364 	  && GET_CODE (PATTERN (insn)) == SET
19365 	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
19366 	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
19367 	  && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
19368 	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
19369 	  && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
19370 	     )
19371 	{
19372 	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
19373 
19374 	  break;
19375 	}
19376     }
19377 
19378   if (value == 0)
19379     {
19380       debug_rtx (addr);
19381       warning (0, "unable to compute real location of stacked parameter");
19382       value = 8; /* XXX magic hack */
19383     }
19384 
19385   return value;
19386 }
19387 
19388 typedef enum {
19389   T_V8QI,
19390   T_V4HI,
19391   T_V2SI,
19392   T_V2SF,
19393   T_DI,
19394   T_V16QI,
19395   T_V8HI,
19396   T_V4SI,
19397   T_V4SF,
19398   T_V2DI,
19399   T_TI,
19400   T_EI,
19401   T_OI,
19402   T_MAX		/* Size of enum.  Keep last.  */
19403 } neon_builtin_type_mode;
19404 
19405 #define TYPE_MODE_BIT(X) (1 << (X))
19406 
19407 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)	\
19408 		 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF)	\
19409 		 | TYPE_MODE_BIT (T_DI))
19410 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)	\
19411 		 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)	\
19412 		 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
19413 
19414 #define v8qi_UP  T_V8QI
19415 #define v4hi_UP  T_V4HI
19416 #define v2si_UP  T_V2SI
19417 #define v2sf_UP  T_V2SF
19418 #define di_UP    T_DI
19419 #define v16qi_UP T_V16QI
19420 #define v8hi_UP  T_V8HI
19421 #define v4si_UP  T_V4SI
19422 #define v4sf_UP  T_V4SF
19423 #define v2di_UP  T_V2DI
19424 #define ti_UP	 T_TI
19425 #define ei_UP	 T_EI
19426 #define oi_UP	 T_OI
19427 
19428 #define UP(X) X##_UP
19429 
19430 typedef enum {
19431   NEON_BINOP,
19432   NEON_TERNOP,
19433   NEON_UNOP,
19434   NEON_GETLANE,
19435   NEON_SETLANE,
19436   NEON_CREATE,
19437   NEON_RINT,
19438   NEON_DUP,
19439   NEON_DUPLANE,
19440   NEON_COMBINE,
19441   NEON_SPLIT,
19442   NEON_LANEMUL,
19443   NEON_LANEMULL,
19444   NEON_LANEMULH,
19445   NEON_LANEMAC,
19446   NEON_SCALARMUL,
19447   NEON_SCALARMULL,
19448   NEON_SCALARMULH,
19449   NEON_SCALARMAC,
19450   NEON_CONVERT,
19451   NEON_FIXCONV,
19452   NEON_SELECT,
19453   NEON_RESULTPAIR,
19454   NEON_REINTERP,
19455   NEON_VTBL,
19456   NEON_VTBX,
19457   NEON_LOAD1,
19458   NEON_LOAD1LANE,
19459   NEON_STORE1,
19460   NEON_STORE1LANE,
19461   NEON_LOADSTRUCT,
19462   NEON_LOADSTRUCTLANE,
19463   NEON_STORESTRUCT,
19464   NEON_STORESTRUCTLANE,
19465   NEON_LOGICBINOP,
19466   NEON_SHIFTINSERT,
19467   NEON_SHIFTIMM,
19468   NEON_SHIFTACC
19469 } neon_itype;
19470 
19471 typedef struct {
19472   const char *name;
19473   const neon_itype itype;
19474   const neon_builtin_type_mode mode;
19475   const enum insn_code code;
19476   unsigned int fcode;
19477 } neon_builtin_datum;
19478 
19479 #define CF(N,X) CODE_FOR_neon_##N##X
19480 
19481 #define VAR1(T, N, A) \
19482   {#N, NEON_##T, UP (A), CF (N, A), 0}
19483 #define VAR2(T, N, A, B) \
19484   VAR1 (T, N, A), \
19485   {#N, NEON_##T, UP (B), CF (N, B), 0}
19486 #define VAR3(T, N, A, B, C) \
19487   VAR2 (T, N, A, B), \
19488   {#N, NEON_##T, UP (C), CF (N, C), 0}
19489 #define VAR4(T, N, A, B, C, D) \
19490   VAR3 (T, N, A, B, C), \
19491   {#N, NEON_##T, UP (D), CF (N, D), 0}
19492 #define VAR5(T, N, A, B, C, D, E) \
19493   VAR4 (T, N, A, B, C, D), \
19494   {#N, NEON_##T, UP (E), CF (N, E), 0}
19495 #define VAR6(T, N, A, B, C, D, E, F) \
19496   VAR5 (T, N, A, B, C, D, E), \
19497   {#N, NEON_##T, UP (F), CF (N, F), 0}
19498 #define VAR7(T, N, A, B, C, D, E, F, G) \
19499   VAR6 (T, N, A, B, C, D, E, F), \
19500   {#N, NEON_##T, UP (G), CF (N, G), 0}
19501 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
19502   VAR7 (T, N, A, B, C, D, E, F, G), \
19503   {#N, NEON_##T, UP (H), CF (N, H), 0}
19504 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19505   VAR8 (T, N, A, B, C, D, E, F, G, H), \
19506   {#N, NEON_##T, UP (I), CF (N, I), 0}
19507 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19508   VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19509   {#N, NEON_##T, UP (J), CF (N, J), 0}
19510 
19511 /* The mode entries in the following table correspond to the "key" type of the
19512    instruction variant, i.e. equivalent to that which would be specified after
19513    the assembler mnemonic, which usually refers to the last vector operand.
19514    (Signed/unsigned/polynomial types are not differentiated between though, and
19515    are all mapped onto the same mode for a given element size.) The modes
19516    listed per instruction should be the same as those defined for that
19517    instruction's pattern in neon.md.  */
19518 
19519 static neon_builtin_datum neon_builtin_data[] =
19520 {
19521   VAR10 (BINOP, vadd,
19522 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19523   VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19524   VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19525   VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19526   VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19527   VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19528   VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19529   VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19530   VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19531   VAR2 (TERNOP, vfma, v2sf, v4sf),
19532   VAR2 (TERNOP, vfms, v2sf, v4sf),
19533   VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19534   VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19535   VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19536   VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19537   VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19538   VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19539   VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19540   VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19541   VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19542   VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19543   VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19544   VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19545   VAR2 (BINOP, vqdmull, v4hi, v2si),
19546   VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19547   VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19548   VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19549   VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19550   VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19551   VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19552   VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19553   VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19554   VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19555   VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19556   VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19557   VAR10 (BINOP, vsub,
19558 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19559   VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19560   VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19561   VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19562   VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19563   VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19564   VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19565   VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19566   VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19567   VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19568   VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19569   VAR2 (BINOP, vcage, v2sf, v4sf),
19570   VAR2 (BINOP, vcagt, v2sf, v4sf),
19571   VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19572   VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19573   VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19574   VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19575   VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19576   VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19577   VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19578   VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19579   VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19580   VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19581   VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19582   VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19583   VAR2 (BINOP, vrecps, v2sf, v4sf),
19584   VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19585   VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19586   VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19587   VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19588   VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19589   VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19590   VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19591   VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19592   VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19593   VAR2 (UNOP, vcnt, v8qi, v16qi),
19594   VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19595   VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19596   VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19597   /* FIXME: vget_lane supports more variants than this!  */
19598   VAR10 (GETLANE, vget_lane,
19599 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19600   VAR10 (SETLANE, vset_lane,
19601 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19602   VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19603   VAR10 (DUP, vdup_n,
19604 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19605   VAR10 (DUPLANE, vdup_lane,
19606 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19607   VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19608   VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19609   VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19610   VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19611   VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19612   VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19613   VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19614   VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19615   VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19616   VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19617   VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19618   VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19619   VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19620   VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19621   VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19622   VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19623   VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19624   VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19625   VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19626   VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19627   VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19628   VAR10 (BINOP, vext,
19629 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19630   VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19631   VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19632   VAR2 (UNOP, vrev16, v8qi, v16qi),
19633   VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19634   VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19635   VAR10 (SELECT, vbsl,
19636 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19637   VAR2 (RINT, vrintn, v2sf, v4sf),
19638   VAR2 (RINT, vrinta, v2sf, v4sf),
19639   VAR2 (RINT, vrintp, v2sf, v4sf),
19640   VAR2 (RINT, vrintm, v2sf, v4sf),
19641   VAR2 (RINT, vrintz, v2sf, v4sf),
19642   VAR2 (RINT, vrintx, v2sf, v4sf),
19643   VAR1 (VTBL, vtbl1, v8qi),
19644   VAR1 (VTBL, vtbl2, v8qi),
19645   VAR1 (VTBL, vtbl3, v8qi),
19646   VAR1 (VTBL, vtbl4, v8qi),
19647   VAR1 (VTBX, vtbx1, v8qi),
19648   VAR1 (VTBX, vtbx2, v8qi),
19649   VAR1 (VTBX, vtbx3, v8qi),
19650   VAR1 (VTBX, vtbx4, v8qi),
19651   VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19652   VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19653   VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19654   VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19655   VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19656   VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19657   VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19658   VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19659   VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19660   VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19661   VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19662   VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19663   VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19664   VAR10 (LOAD1, vld1,
19665          v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19666   VAR10 (LOAD1LANE, vld1_lane,
19667 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19668   VAR10 (LOAD1, vld1_dup,
19669 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19670   VAR10 (STORE1, vst1,
19671 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19672   VAR10 (STORE1LANE, vst1_lane,
19673 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19674   VAR9 (LOADSTRUCT,
19675 	vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19676   VAR7 (LOADSTRUCTLANE, vld2_lane,
19677 	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19678   VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19679   VAR9 (STORESTRUCT, vst2,
19680 	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19681   VAR7 (STORESTRUCTLANE, vst2_lane,
19682 	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19683   VAR9 (LOADSTRUCT,
19684 	vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19685   VAR7 (LOADSTRUCTLANE, vld3_lane,
19686 	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19687   VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19688   VAR9 (STORESTRUCT, vst3,
19689 	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19690   VAR7 (STORESTRUCTLANE, vst3_lane,
19691 	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19692   VAR9 (LOADSTRUCT, vld4,
19693 	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19694   VAR7 (LOADSTRUCTLANE, vld4_lane,
19695 	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19696   VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19697   VAR9 (STORESTRUCT, vst4,
19698 	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19699   VAR7 (STORESTRUCTLANE, vst4_lane,
19700 	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19701   VAR10 (LOGICBINOP, vand,
19702 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19703   VAR10 (LOGICBINOP, vorr,
19704 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19705   VAR10 (BINOP, veor,
19706 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19707   VAR10 (LOGICBINOP, vbic,
19708 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19709   VAR10 (LOGICBINOP, vorn,
19710 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19711 };
19712 
19713 #undef CF
19714 #undef VAR1
19715 #undef VAR2
19716 #undef VAR3
19717 #undef VAR4
19718 #undef VAR5
19719 #undef VAR6
19720 #undef VAR7
19721 #undef VAR8
19722 #undef VAR9
19723 #undef VAR10
19724 
19725 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19726    symbolic names defined here (which would require too much duplication).
19727    FIXME?  */
19728 enum arm_builtins
19729 {
19730   ARM_BUILTIN_GETWCGR0,
19731   ARM_BUILTIN_GETWCGR1,
19732   ARM_BUILTIN_GETWCGR2,
19733   ARM_BUILTIN_GETWCGR3,
19734 
19735   ARM_BUILTIN_SETWCGR0,
19736   ARM_BUILTIN_SETWCGR1,
19737   ARM_BUILTIN_SETWCGR2,
19738   ARM_BUILTIN_SETWCGR3,
19739 
19740   ARM_BUILTIN_WZERO,
19741 
19742   ARM_BUILTIN_WAVG2BR,
19743   ARM_BUILTIN_WAVG2HR,
19744   ARM_BUILTIN_WAVG2B,
19745   ARM_BUILTIN_WAVG2H,
19746 
19747   ARM_BUILTIN_WACCB,
19748   ARM_BUILTIN_WACCH,
19749   ARM_BUILTIN_WACCW,
19750 
19751   ARM_BUILTIN_WMACS,
19752   ARM_BUILTIN_WMACSZ,
19753   ARM_BUILTIN_WMACU,
19754   ARM_BUILTIN_WMACUZ,
19755 
19756   ARM_BUILTIN_WSADB,
19757   ARM_BUILTIN_WSADBZ,
19758   ARM_BUILTIN_WSADH,
19759   ARM_BUILTIN_WSADHZ,
19760 
19761   ARM_BUILTIN_WALIGNI,
19762   ARM_BUILTIN_WALIGNR0,
19763   ARM_BUILTIN_WALIGNR1,
19764   ARM_BUILTIN_WALIGNR2,
19765   ARM_BUILTIN_WALIGNR3,
19766 
19767   ARM_BUILTIN_TMIA,
19768   ARM_BUILTIN_TMIAPH,
19769   ARM_BUILTIN_TMIABB,
19770   ARM_BUILTIN_TMIABT,
19771   ARM_BUILTIN_TMIATB,
19772   ARM_BUILTIN_TMIATT,
19773 
19774   ARM_BUILTIN_TMOVMSKB,
19775   ARM_BUILTIN_TMOVMSKH,
19776   ARM_BUILTIN_TMOVMSKW,
19777 
19778   ARM_BUILTIN_TBCSTB,
19779   ARM_BUILTIN_TBCSTH,
19780   ARM_BUILTIN_TBCSTW,
19781 
19782   ARM_BUILTIN_WMADDS,
19783   ARM_BUILTIN_WMADDU,
19784 
19785   ARM_BUILTIN_WPACKHSS,
19786   ARM_BUILTIN_WPACKWSS,
19787   ARM_BUILTIN_WPACKDSS,
19788   ARM_BUILTIN_WPACKHUS,
19789   ARM_BUILTIN_WPACKWUS,
19790   ARM_BUILTIN_WPACKDUS,
19791 
19792   ARM_BUILTIN_WADDB,
19793   ARM_BUILTIN_WADDH,
19794   ARM_BUILTIN_WADDW,
19795   ARM_BUILTIN_WADDSSB,
19796   ARM_BUILTIN_WADDSSH,
19797   ARM_BUILTIN_WADDSSW,
19798   ARM_BUILTIN_WADDUSB,
19799   ARM_BUILTIN_WADDUSH,
19800   ARM_BUILTIN_WADDUSW,
19801   ARM_BUILTIN_WSUBB,
19802   ARM_BUILTIN_WSUBH,
19803   ARM_BUILTIN_WSUBW,
19804   ARM_BUILTIN_WSUBSSB,
19805   ARM_BUILTIN_WSUBSSH,
19806   ARM_BUILTIN_WSUBSSW,
19807   ARM_BUILTIN_WSUBUSB,
19808   ARM_BUILTIN_WSUBUSH,
19809   ARM_BUILTIN_WSUBUSW,
19810 
19811   ARM_BUILTIN_WAND,
19812   ARM_BUILTIN_WANDN,
19813   ARM_BUILTIN_WOR,
19814   ARM_BUILTIN_WXOR,
19815 
19816   ARM_BUILTIN_WCMPEQB,
19817   ARM_BUILTIN_WCMPEQH,
19818   ARM_BUILTIN_WCMPEQW,
19819   ARM_BUILTIN_WCMPGTUB,
19820   ARM_BUILTIN_WCMPGTUH,
19821   ARM_BUILTIN_WCMPGTUW,
19822   ARM_BUILTIN_WCMPGTSB,
19823   ARM_BUILTIN_WCMPGTSH,
19824   ARM_BUILTIN_WCMPGTSW,
19825 
19826   ARM_BUILTIN_TEXTRMSB,
19827   ARM_BUILTIN_TEXTRMSH,
19828   ARM_BUILTIN_TEXTRMSW,
19829   ARM_BUILTIN_TEXTRMUB,
19830   ARM_BUILTIN_TEXTRMUH,
19831   ARM_BUILTIN_TEXTRMUW,
19832   ARM_BUILTIN_TINSRB,
19833   ARM_BUILTIN_TINSRH,
19834   ARM_BUILTIN_TINSRW,
19835 
19836   ARM_BUILTIN_WMAXSW,
19837   ARM_BUILTIN_WMAXSH,
19838   ARM_BUILTIN_WMAXSB,
19839   ARM_BUILTIN_WMAXUW,
19840   ARM_BUILTIN_WMAXUH,
19841   ARM_BUILTIN_WMAXUB,
19842   ARM_BUILTIN_WMINSW,
19843   ARM_BUILTIN_WMINSH,
19844   ARM_BUILTIN_WMINSB,
19845   ARM_BUILTIN_WMINUW,
19846   ARM_BUILTIN_WMINUH,
19847   ARM_BUILTIN_WMINUB,
19848 
19849   ARM_BUILTIN_WMULUM,
19850   ARM_BUILTIN_WMULSM,
19851   ARM_BUILTIN_WMULUL,
19852 
19853   ARM_BUILTIN_PSADBH,
19854   ARM_BUILTIN_WSHUFH,
19855 
19856   ARM_BUILTIN_WSLLH,
19857   ARM_BUILTIN_WSLLW,
19858   ARM_BUILTIN_WSLLD,
19859   ARM_BUILTIN_WSRAH,
19860   ARM_BUILTIN_WSRAW,
19861   ARM_BUILTIN_WSRAD,
19862   ARM_BUILTIN_WSRLH,
19863   ARM_BUILTIN_WSRLW,
19864   ARM_BUILTIN_WSRLD,
19865   ARM_BUILTIN_WRORH,
19866   ARM_BUILTIN_WRORW,
19867   ARM_BUILTIN_WRORD,
19868   ARM_BUILTIN_WSLLHI,
19869   ARM_BUILTIN_WSLLWI,
19870   ARM_BUILTIN_WSLLDI,
19871   ARM_BUILTIN_WSRAHI,
19872   ARM_BUILTIN_WSRAWI,
19873   ARM_BUILTIN_WSRADI,
19874   ARM_BUILTIN_WSRLHI,
19875   ARM_BUILTIN_WSRLWI,
19876   ARM_BUILTIN_WSRLDI,
19877   ARM_BUILTIN_WRORHI,
19878   ARM_BUILTIN_WRORWI,
19879   ARM_BUILTIN_WRORDI,
19880 
19881   ARM_BUILTIN_WUNPCKIHB,
19882   ARM_BUILTIN_WUNPCKIHH,
19883   ARM_BUILTIN_WUNPCKIHW,
19884   ARM_BUILTIN_WUNPCKILB,
19885   ARM_BUILTIN_WUNPCKILH,
19886   ARM_BUILTIN_WUNPCKILW,
19887 
19888   ARM_BUILTIN_WUNPCKEHSB,
19889   ARM_BUILTIN_WUNPCKEHSH,
19890   ARM_BUILTIN_WUNPCKEHSW,
19891   ARM_BUILTIN_WUNPCKEHUB,
19892   ARM_BUILTIN_WUNPCKEHUH,
19893   ARM_BUILTIN_WUNPCKEHUW,
19894   ARM_BUILTIN_WUNPCKELSB,
19895   ARM_BUILTIN_WUNPCKELSH,
19896   ARM_BUILTIN_WUNPCKELSW,
19897   ARM_BUILTIN_WUNPCKELUB,
19898   ARM_BUILTIN_WUNPCKELUH,
19899   ARM_BUILTIN_WUNPCKELUW,
19900 
19901   ARM_BUILTIN_WABSB,
19902   ARM_BUILTIN_WABSH,
19903   ARM_BUILTIN_WABSW,
19904 
19905   ARM_BUILTIN_WADDSUBHX,
19906   ARM_BUILTIN_WSUBADDHX,
19907 
19908   ARM_BUILTIN_WABSDIFFB,
19909   ARM_BUILTIN_WABSDIFFH,
19910   ARM_BUILTIN_WABSDIFFW,
19911 
19912   ARM_BUILTIN_WADDCH,
19913   ARM_BUILTIN_WADDCW,
19914 
19915   ARM_BUILTIN_WAVG4,
19916   ARM_BUILTIN_WAVG4R,
19917 
19918   ARM_BUILTIN_WMADDSX,
19919   ARM_BUILTIN_WMADDUX,
19920 
19921   ARM_BUILTIN_WMADDSN,
19922   ARM_BUILTIN_WMADDUN,
19923 
19924   ARM_BUILTIN_WMULWSM,
19925   ARM_BUILTIN_WMULWUM,
19926 
19927   ARM_BUILTIN_WMULWSMR,
19928   ARM_BUILTIN_WMULWUMR,
19929 
19930   ARM_BUILTIN_WMULWL,
19931 
19932   ARM_BUILTIN_WMULSMR,
19933   ARM_BUILTIN_WMULUMR,
19934 
19935   ARM_BUILTIN_WQMULM,
19936   ARM_BUILTIN_WQMULMR,
19937 
19938   ARM_BUILTIN_WQMULWM,
19939   ARM_BUILTIN_WQMULWMR,
19940 
19941   ARM_BUILTIN_WADDBHUSM,
19942   ARM_BUILTIN_WADDBHUSL,
19943 
19944   ARM_BUILTIN_WQMIABB,
19945   ARM_BUILTIN_WQMIABT,
19946   ARM_BUILTIN_WQMIATB,
19947   ARM_BUILTIN_WQMIATT,
19948 
19949   ARM_BUILTIN_WQMIABBN,
19950   ARM_BUILTIN_WQMIABTN,
19951   ARM_BUILTIN_WQMIATBN,
19952   ARM_BUILTIN_WQMIATTN,
19953 
19954   ARM_BUILTIN_WMIABB,
19955   ARM_BUILTIN_WMIABT,
19956   ARM_BUILTIN_WMIATB,
19957   ARM_BUILTIN_WMIATT,
19958 
19959   ARM_BUILTIN_WMIABBN,
19960   ARM_BUILTIN_WMIABTN,
19961   ARM_BUILTIN_WMIATBN,
19962   ARM_BUILTIN_WMIATTN,
19963 
19964   ARM_BUILTIN_WMIAWBB,
19965   ARM_BUILTIN_WMIAWBT,
19966   ARM_BUILTIN_WMIAWTB,
19967   ARM_BUILTIN_WMIAWTT,
19968 
19969   ARM_BUILTIN_WMIAWBBN,
19970   ARM_BUILTIN_WMIAWBTN,
19971   ARM_BUILTIN_WMIAWTBN,
19972   ARM_BUILTIN_WMIAWTTN,
19973 
19974   ARM_BUILTIN_WMERGE,
19975 
19976   ARM_BUILTIN_NEON_BASE,
19977 
19978   ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19979 };
19980 
19981 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19982 
19983 static void
19984 arm_init_neon_builtins (void)
19985 {
19986   unsigned int i, fcode;
19987   tree decl;
19988 
19989   tree neon_intQI_type_node;
19990   tree neon_intHI_type_node;
19991   tree neon_polyQI_type_node;
19992   tree neon_polyHI_type_node;
19993   tree neon_intSI_type_node;
19994   tree neon_intDI_type_node;
19995   tree neon_float_type_node;
19996 
19997   tree intQI_pointer_node;
19998   tree intHI_pointer_node;
19999   tree intSI_pointer_node;
20000   tree intDI_pointer_node;
20001   tree float_pointer_node;
20002 
20003   tree const_intQI_node;
20004   tree const_intHI_node;
20005   tree const_intSI_node;
20006   tree const_intDI_node;
20007   tree const_float_node;
20008 
20009   tree const_intQI_pointer_node;
20010   tree const_intHI_pointer_node;
20011   tree const_intSI_pointer_node;
20012   tree const_intDI_pointer_node;
20013   tree const_float_pointer_node;
20014 
20015   tree V8QI_type_node;
20016   tree V4HI_type_node;
20017   tree V2SI_type_node;
20018   tree V2SF_type_node;
20019   tree V16QI_type_node;
20020   tree V8HI_type_node;
20021   tree V4SI_type_node;
20022   tree V4SF_type_node;
20023   tree V2DI_type_node;
20024 
20025   tree intUQI_type_node;
20026   tree intUHI_type_node;
20027   tree intUSI_type_node;
20028   tree intUDI_type_node;
20029 
20030   tree intEI_type_node;
20031   tree intOI_type_node;
20032   tree intCI_type_node;
20033   tree intXI_type_node;
20034 
20035   tree V8QI_pointer_node;
20036   tree V4HI_pointer_node;
20037   tree V2SI_pointer_node;
20038   tree V2SF_pointer_node;
20039   tree V16QI_pointer_node;
20040   tree V8HI_pointer_node;
20041   tree V4SI_pointer_node;
20042   tree V4SF_pointer_node;
20043   tree V2DI_pointer_node;
20044 
20045   tree void_ftype_pv8qi_v8qi_v8qi;
20046   tree void_ftype_pv4hi_v4hi_v4hi;
20047   tree void_ftype_pv2si_v2si_v2si;
20048   tree void_ftype_pv2sf_v2sf_v2sf;
20049   tree void_ftype_pdi_di_di;
20050   tree void_ftype_pv16qi_v16qi_v16qi;
20051   tree void_ftype_pv8hi_v8hi_v8hi;
20052   tree void_ftype_pv4si_v4si_v4si;
20053   tree void_ftype_pv4sf_v4sf_v4sf;
20054   tree void_ftype_pv2di_v2di_v2di;
20055 
20056   tree reinterp_ftype_dreg[5][5];
20057   tree reinterp_ftype_qreg[5][5];
20058   tree dreg_types[5], qreg_types[5];
20059 
20060   /* Create distinguished type nodes for NEON vector element types,
20061      and pointers to values of such types, so we can detect them later.  */
20062   neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20063   neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20064   neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20065   neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20066   neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
20067   neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
20068   neon_float_type_node = make_node (REAL_TYPE);
20069   TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
20070   layout_type (neon_float_type_node);
20071 
20072   /* Define typedefs which exactly correspond to the modes we are basing vector
20073      types on.  If you change these names you'll need to change
20074      the table used by arm_mangle_type too.  */
20075   (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
20076 					     "__builtin_neon_qi");
20077   (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
20078 					     "__builtin_neon_hi");
20079   (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
20080 					     "__builtin_neon_si");
20081   (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
20082 					     "__builtin_neon_sf");
20083   (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
20084 					     "__builtin_neon_di");
20085   (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
20086 					     "__builtin_neon_poly8");
20087   (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
20088 					     "__builtin_neon_poly16");
20089 
20090   intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
20091   intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
20092   intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
20093   intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
20094   float_pointer_node = build_pointer_type (neon_float_type_node);
20095 
20096   /* Next create constant-qualified versions of the above types.  */
20097   const_intQI_node = build_qualified_type (neon_intQI_type_node,
20098 					   TYPE_QUAL_CONST);
20099   const_intHI_node = build_qualified_type (neon_intHI_type_node,
20100 					   TYPE_QUAL_CONST);
20101   const_intSI_node = build_qualified_type (neon_intSI_type_node,
20102 					   TYPE_QUAL_CONST);
20103   const_intDI_node = build_qualified_type (neon_intDI_type_node,
20104 					   TYPE_QUAL_CONST);
20105   const_float_node = build_qualified_type (neon_float_type_node,
20106 					   TYPE_QUAL_CONST);
20107 
20108   const_intQI_pointer_node = build_pointer_type (const_intQI_node);
20109   const_intHI_pointer_node = build_pointer_type (const_intHI_node);
20110   const_intSI_pointer_node = build_pointer_type (const_intSI_node);
20111   const_intDI_pointer_node = build_pointer_type (const_intDI_node);
20112   const_float_pointer_node = build_pointer_type (const_float_node);
20113 
20114   /* Now create vector types based on our NEON element types.  */
20115   /* 64-bit vectors.  */
20116   V8QI_type_node =
20117     build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
20118   V4HI_type_node =
20119     build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
20120   V2SI_type_node =
20121     build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
20122   V2SF_type_node =
20123     build_vector_type_for_mode (neon_float_type_node, V2SFmode);
20124   /* 128-bit vectors.  */
20125   V16QI_type_node =
20126     build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
20127   V8HI_type_node =
20128     build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
20129   V4SI_type_node =
20130     build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
20131   V4SF_type_node =
20132     build_vector_type_for_mode (neon_float_type_node, V4SFmode);
20133   V2DI_type_node =
20134     build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
20135 
20136   /* Unsigned integer types for various mode sizes.  */
20137   intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
20138   intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
20139   intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
20140   intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
20141 
20142   (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
20143 					     "__builtin_neon_uqi");
20144   (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
20145 					     "__builtin_neon_uhi");
20146   (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
20147 					     "__builtin_neon_usi");
20148   (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
20149 					     "__builtin_neon_udi");
20150 
20151   /* Opaque integer types for structures of vectors.  */
20152   intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
20153   intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
20154   intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
20155   intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
20156 
20157   (*lang_hooks.types.register_builtin_type) (intTI_type_node,
20158 					     "__builtin_neon_ti");
20159   (*lang_hooks.types.register_builtin_type) (intEI_type_node,
20160 					     "__builtin_neon_ei");
20161   (*lang_hooks.types.register_builtin_type) (intOI_type_node,
20162 					     "__builtin_neon_oi");
20163   (*lang_hooks.types.register_builtin_type) (intCI_type_node,
20164 					     "__builtin_neon_ci");
20165   (*lang_hooks.types.register_builtin_type) (intXI_type_node,
20166 					     "__builtin_neon_xi");
20167 
20168   /* Pointers to vector types.  */
20169   V8QI_pointer_node = build_pointer_type (V8QI_type_node);
20170   V4HI_pointer_node = build_pointer_type (V4HI_type_node);
20171   V2SI_pointer_node = build_pointer_type (V2SI_type_node);
20172   V2SF_pointer_node = build_pointer_type (V2SF_type_node);
20173   V16QI_pointer_node = build_pointer_type (V16QI_type_node);
20174   V8HI_pointer_node = build_pointer_type (V8HI_type_node);
20175   V4SI_pointer_node = build_pointer_type (V4SI_type_node);
20176   V4SF_pointer_node = build_pointer_type (V4SF_type_node);
20177   V2DI_pointer_node = build_pointer_type (V2DI_type_node);
20178 
20179   /* Operations which return results as pairs.  */
20180   void_ftype_pv8qi_v8qi_v8qi =
20181     build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
20182   			      V8QI_type_node, NULL);
20183   void_ftype_pv4hi_v4hi_v4hi =
20184     build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
20185   			      V4HI_type_node, NULL);
20186   void_ftype_pv2si_v2si_v2si =
20187     build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
20188   			      V2SI_type_node, NULL);
20189   void_ftype_pv2sf_v2sf_v2sf =
20190     build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
20191   			      V2SF_type_node, NULL);
20192   void_ftype_pdi_di_di =
20193     build_function_type_list (void_type_node, intDI_pointer_node,
20194 			      neon_intDI_type_node, neon_intDI_type_node, NULL);
20195   void_ftype_pv16qi_v16qi_v16qi =
20196     build_function_type_list (void_type_node, V16QI_pointer_node,
20197 			      V16QI_type_node, V16QI_type_node, NULL);
20198   void_ftype_pv8hi_v8hi_v8hi =
20199     build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
20200   			      V8HI_type_node, NULL);
20201   void_ftype_pv4si_v4si_v4si =
20202     build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
20203   			      V4SI_type_node, NULL);
20204   void_ftype_pv4sf_v4sf_v4sf =
20205     build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
20206   			      V4SF_type_node, NULL);
20207   void_ftype_pv2di_v2di_v2di =
20208     build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
20209 			      V2DI_type_node, NULL);
20210 
20211   dreg_types[0] = V8QI_type_node;
20212   dreg_types[1] = V4HI_type_node;
20213   dreg_types[2] = V2SI_type_node;
20214   dreg_types[3] = V2SF_type_node;
20215   dreg_types[4] = neon_intDI_type_node;
20216 
20217   qreg_types[0] = V16QI_type_node;
20218   qreg_types[1] = V8HI_type_node;
20219   qreg_types[2] = V4SI_type_node;
20220   qreg_types[3] = V4SF_type_node;
20221   qreg_types[4] = V2DI_type_node;
20222 
20223   for (i = 0; i < 5; i++)
20224     {
20225       int j;
20226       for (j = 0; j < 5; j++)
20227         {
20228           reinterp_ftype_dreg[i][j]
20229             = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
20230           reinterp_ftype_qreg[i][j]
20231             = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
20232         }
20233     }
20234 
20235   for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
20236        i < ARRAY_SIZE (neon_builtin_data);
20237        i++, fcode++)
20238     {
20239       neon_builtin_datum *d = &neon_builtin_data[i];
20240 
20241       const char* const modenames[] = {
20242 	"v8qi", "v4hi", "v2si", "v2sf", "di",
20243 	"v16qi", "v8hi", "v4si", "v4sf", "v2di",
20244 	"ti", "ei", "oi"
20245       };
20246       char namebuf[60];
20247       tree ftype = NULL;
20248       int is_load = 0, is_store = 0;
20249 
20250       gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
20251 
20252       d->fcode = fcode;
20253 
20254       switch (d->itype)
20255 	{
20256 	case NEON_LOAD1:
20257 	case NEON_LOAD1LANE:
20258 	case NEON_LOADSTRUCT:
20259 	case NEON_LOADSTRUCTLANE:
20260 	  is_load = 1;
20261 	  /* Fall through.  */
20262 	case NEON_STORE1:
20263 	case NEON_STORE1LANE:
20264 	case NEON_STORESTRUCT:
20265 	case NEON_STORESTRUCTLANE:
20266 	  if (!is_load)
20267 	    is_store = 1;
20268 	  /* Fall through.  */
20269 	case NEON_UNOP:
20270 	case NEON_RINT:
20271 	case NEON_BINOP:
20272 	case NEON_LOGICBINOP:
20273 	case NEON_SHIFTINSERT:
20274 	case NEON_TERNOP:
20275 	case NEON_GETLANE:
20276 	case NEON_SETLANE:
20277 	case NEON_CREATE:
20278 	case NEON_DUP:
20279 	case NEON_DUPLANE:
20280 	case NEON_SHIFTIMM:
20281 	case NEON_SHIFTACC:
20282 	case NEON_COMBINE:
20283 	case NEON_SPLIT:
20284 	case NEON_CONVERT:
20285 	case NEON_FIXCONV:
20286 	case NEON_LANEMUL:
20287 	case NEON_LANEMULL:
20288 	case NEON_LANEMULH:
20289 	case NEON_LANEMAC:
20290 	case NEON_SCALARMUL:
20291 	case NEON_SCALARMULL:
20292 	case NEON_SCALARMULH:
20293 	case NEON_SCALARMAC:
20294 	case NEON_SELECT:
20295 	case NEON_VTBL:
20296 	case NEON_VTBX:
20297 	  {
20298 	    int k;
20299 	    tree return_type = void_type_node, args = void_list_node;
20300 
20301 	    /* Build a function type directly from the insn_data for
20302 	       this builtin.  The build_function_type() function takes
20303 	       care of removing duplicates for us.  */
20304 	    for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
20305 	      {
20306 		tree eltype;
20307 
20308 		if (is_load && k == 1)
20309 		  {
20310 		    /* Neon load patterns always have the memory
20311 		       operand in the operand 1 position.  */
20312 		    gcc_assert (insn_data[d->code].operand[k].predicate
20313 				== neon_struct_operand);
20314 
20315 		    switch (d->mode)
20316 		      {
20317 		      case T_V8QI:
20318 		      case T_V16QI:
20319 			eltype = const_intQI_pointer_node;
20320 			break;
20321 
20322 		      case T_V4HI:
20323 		      case T_V8HI:
20324 			eltype = const_intHI_pointer_node;
20325 			break;
20326 
20327 		      case T_V2SI:
20328 		      case T_V4SI:
20329 			eltype = const_intSI_pointer_node;
20330 			break;
20331 
20332 		      case T_V2SF:
20333 		      case T_V4SF:
20334 			eltype = const_float_pointer_node;
20335 			break;
20336 
20337 		      case T_DI:
20338 		      case T_V2DI:
20339 			eltype = const_intDI_pointer_node;
20340 			break;
20341 
20342 		      default: gcc_unreachable ();
20343 		      }
20344 		  }
20345 		else if (is_store && k == 0)
20346 		  {
20347 		    /* Similarly, Neon store patterns use operand 0 as
20348 		       the memory location to store to.  */
20349 		    gcc_assert (insn_data[d->code].operand[k].predicate
20350 				== neon_struct_operand);
20351 
20352 		    switch (d->mode)
20353 		      {
20354 		      case T_V8QI:
20355 		      case T_V16QI:
20356 			eltype = intQI_pointer_node;
20357 			break;
20358 
20359 		      case T_V4HI:
20360 		      case T_V8HI:
20361 			eltype = intHI_pointer_node;
20362 			break;
20363 
20364 		      case T_V2SI:
20365 		      case T_V4SI:
20366 			eltype = intSI_pointer_node;
20367 			break;
20368 
20369 		      case T_V2SF:
20370 		      case T_V4SF:
20371 			eltype = float_pointer_node;
20372 			break;
20373 
20374 		      case T_DI:
20375 		      case T_V2DI:
20376 			eltype = intDI_pointer_node;
20377 			break;
20378 
20379 		      default: gcc_unreachable ();
20380 		      }
20381 		  }
20382 		else
20383 		  {
20384 		    switch (insn_data[d->code].operand[k].mode)
20385 		      {
20386 		      case VOIDmode: eltype = void_type_node; break;
20387 			/* Scalars.  */
20388 		      case QImode: eltype = neon_intQI_type_node; break;
20389 		      case HImode: eltype = neon_intHI_type_node; break;
20390 		      case SImode: eltype = neon_intSI_type_node; break;
20391 		      case SFmode: eltype = neon_float_type_node; break;
20392 		      case DImode: eltype = neon_intDI_type_node; break;
20393 		      case TImode: eltype = intTI_type_node; break;
20394 		      case EImode: eltype = intEI_type_node; break;
20395 		      case OImode: eltype = intOI_type_node; break;
20396 		      case CImode: eltype = intCI_type_node; break;
20397 		      case XImode: eltype = intXI_type_node; break;
20398 			/* 64-bit vectors.  */
20399 		      case V8QImode: eltype = V8QI_type_node; break;
20400 		      case V4HImode: eltype = V4HI_type_node; break;
20401 		      case V2SImode: eltype = V2SI_type_node; break;
20402 		      case V2SFmode: eltype = V2SF_type_node; break;
20403 			/* 128-bit vectors.  */
20404 		      case V16QImode: eltype = V16QI_type_node; break;
20405 		      case V8HImode: eltype = V8HI_type_node; break;
20406 		      case V4SImode: eltype = V4SI_type_node; break;
20407 		      case V4SFmode: eltype = V4SF_type_node; break;
20408 		      case V2DImode: eltype = V2DI_type_node; break;
20409 		      default: gcc_unreachable ();
20410 		      }
20411 		  }
20412 
20413 		if (k == 0 && !is_store)
20414 		  return_type = eltype;
20415 		else
20416 		  args = tree_cons (NULL_TREE, eltype, args);
20417 	      }
20418 
20419 	    ftype = build_function_type (return_type, args);
20420 	  }
20421 	  break;
20422 
20423 	case NEON_RESULTPAIR:
20424 	  {
20425 	    switch (insn_data[d->code].operand[1].mode)
20426 	      {
20427 	      case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
20428 	      case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
20429 	      case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
20430 	      case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
20431 	      case DImode: ftype = void_ftype_pdi_di_di; break;
20432 	      case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
20433 	      case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
20434 	      case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
20435 	      case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
20436 	      case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
20437 	      default: gcc_unreachable ();
20438 	      }
20439 	  }
20440 	  break;
20441 
20442 	case NEON_REINTERP:
20443 	  {
20444 	    /* We iterate over 5 doubleword types, then 5 quadword
20445 	       types.  */
20446 	    int rhs = d->mode % 5;
20447 	    switch (insn_data[d->code].operand[0].mode)
20448 	      {
20449 	      case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
20450 	      case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
20451 	      case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
20452 	      case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
20453 	      case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
20454 	      case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
20455 	      case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
20456 	      case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
20457 	      case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
20458 	      case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
20459 	      default: gcc_unreachable ();
20460 	      }
20461 	  }
20462 	  break;
20463 
20464 	default:
20465 	  gcc_unreachable ();
20466 	}
20467 
20468       gcc_assert (ftype != NULL);
20469 
20470       sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
20471 
20472       decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
20473 				   NULL_TREE);
20474       arm_builtin_decls[fcode] = decl;
20475     }
20476 }
20477 
20478 #define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
20479   do									\
20480     {									\
20481       if ((MASK) & insn_flags)						\
20482 	{								\
20483 	  tree bdecl;							\
20484 	  bdecl = add_builtin_function ((NAME), (TYPE), (CODE),		\
20485 					BUILT_IN_MD, NULL, NULL_TREE);	\
20486 	  arm_builtin_decls[CODE] = bdecl;				\
20487 	}								\
20488     }									\
20489   while (0)
20490 
20491 struct builtin_description
20492 {
20493   const unsigned int       mask;
20494   const enum insn_code     icode;
20495   const char * const       name;
20496   const enum arm_builtins  code;
20497   const enum rtx_code      comparison;
20498   const unsigned int       flag;
20499 };
20500 
20501 static const struct builtin_description bdesc_2arg[] =
20502 {
20503 #define IWMMXT_BUILTIN(code, string, builtin) \
20504   { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
20505     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20506 
20507 #define IWMMXT2_BUILTIN(code, string, builtin) \
20508   { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
20509     ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20510 
20511   IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
20512   IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
20513   IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
20514   IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
20515   IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
20516   IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
20517   IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
20518   IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
20519   IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
20520   IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
20521   IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
20522   IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
20523   IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
20524   IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
20525   IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
20526   IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
20527   IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
20528   IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
20529   IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
20530   IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
20531   IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
20532   IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
20533   IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
20534   IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
20535   IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
20536   IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
20537   IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
20538   IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
20539   IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
20540   IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
20541   IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
20542   IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
20543   IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
20544   IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
20545   IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
20546   IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
20547   IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
20548   IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
20549   IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
20550   IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
20551   IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
20552   IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
20553   IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
20554   IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
20555   IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
20556   IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
20557   IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
20558   IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
20559   IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
20560   IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
20561   IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
20562   IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
20563   IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
20564   IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
20565   IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
20566   IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
20567   IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
20568   IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
20569   IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
20570   IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
20571   IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
20572   IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
20573   IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
20574   IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
20575   IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
20576   IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
20577   IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
20578   IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
20579   IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
20580   IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
20581   IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
20582   IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
20583   IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
20584   IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
20585   IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
20586   IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
20587   IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
20588   IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
20589 
20590 #define IWMMXT_BUILTIN2(code, builtin) \
20591   { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20592 
20593 #define IWMMXT2_BUILTIN2(code, builtin) \
20594   { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20595 
20596   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
20597   IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
20598   IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
20599   IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
20600   IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
20601   IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
20602   IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
20603   IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
20604   IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
20605   IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
20606 };
20607 
20608 static const struct builtin_description bdesc_1arg[] =
20609 {
20610   IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
20611   IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
20612   IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
20613   IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
20614   IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
20615   IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
20616   IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
20617   IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
20618   IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
20619   IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
20620   IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
20621   IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
20622   IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
20623   IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
20624   IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
20625   IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
20626   IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
20627   IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
20628   IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
20629   IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
20630   IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
20631   IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
20632   IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
20633   IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
20634 };
20635 
20636 /* Set up all the iWMMXt builtins.  This is not called if
20637    TARGET_IWMMXT is zero.  */
20638 
20639 static void
20640 arm_init_iwmmxt_builtins (void)
20641 {
20642   const struct builtin_description * d;
20643   size_t i;
20644 
20645   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20646   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20647   tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
20648 
20649   tree v8qi_ftype_v8qi_v8qi_int
20650     = build_function_type_list (V8QI_type_node,
20651 				V8QI_type_node, V8QI_type_node,
20652 				integer_type_node, NULL_TREE);
20653   tree v4hi_ftype_v4hi_int
20654     = build_function_type_list (V4HI_type_node,
20655 				V4HI_type_node, integer_type_node, NULL_TREE);
20656   tree v2si_ftype_v2si_int
20657     = build_function_type_list (V2SI_type_node,
20658 				V2SI_type_node, integer_type_node, NULL_TREE);
20659   tree v2si_ftype_di_di
20660     = build_function_type_list (V2SI_type_node,
20661 				long_long_integer_type_node,
20662 				long_long_integer_type_node,
20663 				NULL_TREE);
20664   tree di_ftype_di_int
20665     = build_function_type_list (long_long_integer_type_node,
20666 				long_long_integer_type_node,
20667 				integer_type_node, NULL_TREE);
20668   tree di_ftype_di_int_int
20669     = build_function_type_list (long_long_integer_type_node,
20670 				long_long_integer_type_node,
20671 				integer_type_node,
20672 				integer_type_node, NULL_TREE);
20673   tree int_ftype_v8qi
20674     = build_function_type_list (integer_type_node,
20675 				V8QI_type_node, NULL_TREE);
20676   tree int_ftype_v4hi
20677     = build_function_type_list (integer_type_node,
20678 				V4HI_type_node, NULL_TREE);
20679   tree int_ftype_v2si
20680     = build_function_type_list (integer_type_node,
20681 				V2SI_type_node, NULL_TREE);
20682   tree int_ftype_v8qi_int
20683     = build_function_type_list (integer_type_node,
20684 				V8QI_type_node, integer_type_node, NULL_TREE);
20685   tree int_ftype_v4hi_int
20686     = build_function_type_list (integer_type_node,
20687 				V4HI_type_node, integer_type_node, NULL_TREE);
20688   tree int_ftype_v2si_int
20689     = build_function_type_list (integer_type_node,
20690 				V2SI_type_node, integer_type_node, NULL_TREE);
20691   tree v8qi_ftype_v8qi_int_int
20692     = build_function_type_list (V8QI_type_node,
20693 				V8QI_type_node, integer_type_node,
20694 				integer_type_node, NULL_TREE);
20695   tree v4hi_ftype_v4hi_int_int
20696     = build_function_type_list (V4HI_type_node,
20697 				V4HI_type_node, integer_type_node,
20698 				integer_type_node, NULL_TREE);
20699   tree v2si_ftype_v2si_int_int
20700     = build_function_type_list (V2SI_type_node,
20701 				V2SI_type_node, integer_type_node,
20702 				integer_type_node, NULL_TREE);
20703   /* Miscellaneous.  */
20704   tree v8qi_ftype_v4hi_v4hi
20705     = build_function_type_list (V8QI_type_node,
20706 				V4HI_type_node, V4HI_type_node, NULL_TREE);
20707   tree v4hi_ftype_v2si_v2si
20708     = build_function_type_list (V4HI_type_node,
20709 				V2SI_type_node, V2SI_type_node, NULL_TREE);
20710   tree v8qi_ftype_v4hi_v8qi
20711     = build_function_type_list (V8QI_type_node,
20712 	                        V4HI_type_node, V8QI_type_node, NULL_TREE);
20713   tree v2si_ftype_v4hi_v4hi
20714     = build_function_type_list (V2SI_type_node,
20715 				V4HI_type_node, V4HI_type_node, NULL_TREE);
20716   tree v2si_ftype_v8qi_v8qi
20717     = build_function_type_list (V2SI_type_node,
20718 				V8QI_type_node, V8QI_type_node, NULL_TREE);
20719   tree v4hi_ftype_v4hi_di
20720     = build_function_type_list (V4HI_type_node,
20721 				V4HI_type_node, long_long_integer_type_node,
20722 				NULL_TREE);
20723   tree v2si_ftype_v2si_di
20724     = build_function_type_list (V2SI_type_node,
20725 				V2SI_type_node, long_long_integer_type_node,
20726 				NULL_TREE);
20727   tree di_ftype_void
20728     = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20729   tree int_ftype_void
20730     = build_function_type_list (integer_type_node, NULL_TREE);
20731   tree di_ftype_v8qi
20732     = build_function_type_list (long_long_integer_type_node,
20733 				V8QI_type_node, NULL_TREE);
20734   tree di_ftype_v4hi
20735     = build_function_type_list (long_long_integer_type_node,
20736 				V4HI_type_node, NULL_TREE);
20737   tree di_ftype_v2si
20738     = build_function_type_list (long_long_integer_type_node,
20739 				V2SI_type_node, NULL_TREE);
20740   tree v2si_ftype_v4hi
20741     = build_function_type_list (V2SI_type_node,
20742 				V4HI_type_node, NULL_TREE);
20743   tree v4hi_ftype_v8qi
20744     = build_function_type_list (V4HI_type_node,
20745 				V8QI_type_node, NULL_TREE);
20746   tree v8qi_ftype_v8qi
20747     = build_function_type_list (V8QI_type_node,
20748 	                        V8QI_type_node, NULL_TREE);
20749   tree v4hi_ftype_v4hi
20750     = build_function_type_list (V4HI_type_node,
20751 	                        V4HI_type_node, NULL_TREE);
20752   tree v2si_ftype_v2si
20753     = build_function_type_list (V2SI_type_node,
20754 	                        V2SI_type_node, NULL_TREE);
20755 
20756   tree di_ftype_di_v4hi_v4hi
20757     = build_function_type_list (long_long_unsigned_type_node,
20758 				long_long_unsigned_type_node,
20759 				V4HI_type_node, V4HI_type_node,
20760 				NULL_TREE);
20761 
20762   tree di_ftype_v4hi_v4hi
20763     = build_function_type_list (long_long_unsigned_type_node,
20764 				V4HI_type_node,V4HI_type_node,
20765 				NULL_TREE);
20766 
20767   tree v2si_ftype_v2si_v4hi_v4hi
20768     = build_function_type_list (V2SI_type_node,
20769                                 V2SI_type_node, V4HI_type_node,
20770                                 V4HI_type_node, NULL_TREE);
20771 
20772   tree v2si_ftype_v2si_v8qi_v8qi
20773     = build_function_type_list (V2SI_type_node,
20774                                 V2SI_type_node, V8QI_type_node,
20775                                 V8QI_type_node, NULL_TREE);
20776 
20777   tree di_ftype_di_v2si_v2si
20778      = build_function_type_list (long_long_unsigned_type_node,
20779                                  long_long_unsigned_type_node,
20780                                  V2SI_type_node, V2SI_type_node,
20781                                  NULL_TREE);
20782 
20783    tree di_ftype_di_di_int
20784      = build_function_type_list (long_long_unsigned_type_node,
20785                                  long_long_unsigned_type_node,
20786                                  long_long_unsigned_type_node,
20787                                  integer_type_node, NULL_TREE);
20788 
20789    tree void_ftype_int
20790      = build_function_type_list (void_type_node,
20791                                  integer_type_node, NULL_TREE);
20792 
20793    tree v8qi_ftype_char
20794      = build_function_type_list (V8QI_type_node,
20795                                  signed_char_type_node, NULL_TREE);
20796 
20797    tree v4hi_ftype_short
20798      = build_function_type_list (V4HI_type_node,
20799                                  short_integer_type_node, NULL_TREE);
20800 
20801    tree v2si_ftype_int
20802      = build_function_type_list (V2SI_type_node,
20803                                  integer_type_node, NULL_TREE);
20804 
20805   /* Normal vector binops.  */
20806   tree v8qi_ftype_v8qi_v8qi
20807     = build_function_type_list (V8QI_type_node,
20808 				V8QI_type_node, V8QI_type_node, NULL_TREE);
20809   tree v4hi_ftype_v4hi_v4hi
20810     = build_function_type_list (V4HI_type_node,
20811 				V4HI_type_node,V4HI_type_node, NULL_TREE);
20812   tree v2si_ftype_v2si_v2si
20813     = build_function_type_list (V2SI_type_node,
20814 				V2SI_type_node, V2SI_type_node, NULL_TREE);
20815   tree di_ftype_di_di
20816     = build_function_type_list (long_long_unsigned_type_node,
20817 				long_long_unsigned_type_node,
20818 				long_long_unsigned_type_node,
20819 				NULL_TREE);
20820 
20821   /* Add all builtins that are more or less simple operations on two
20822      operands.  */
20823   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20824     {
20825       /* Use one of the operands; the target can have a different mode for
20826 	 mask-generating compares.  */
20827       enum machine_mode mode;
20828       tree type;
20829 
20830       if (d->name == 0)
20831 	continue;
20832 
20833       mode = insn_data[d->icode].operand[1].mode;
20834 
20835       switch (mode)
20836 	{
20837 	case V8QImode:
20838 	  type = v8qi_ftype_v8qi_v8qi;
20839 	  break;
20840 	case V4HImode:
20841 	  type = v4hi_ftype_v4hi_v4hi;
20842 	  break;
20843 	case V2SImode:
20844 	  type = v2si_ftype_v2si_v2si;
20845 	  break;
20846 	case DImode:
20847 	  type = di_ftype_di_di;
20848 	  break;
20849 
20850 	default:
20851 	  gcc_unreachable ();
20852 	}
20853 
20854       def_mbuiltin (d->mask, d->name, type, d->code);
20855     }
20856 
20857   /* Add the remaining MMX insns with somewhat more complicated types.  */
20858 #define iwmmx_mbuiltin(NAME, TYPE, CODE)			\
20859   def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE),	\
20860 		ARM_BUILTIN_ ## CODE)
20861 
20862 #define iwmmx2_mbuiltin(NAME, TYPE, CODE)                      \
20863   def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE),     \
20864                ARM_BUILTIN_ ## CODE)
20865 
20866   iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20867   iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
20868   iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
20869   iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
20870   iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
20871   iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
20872   iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
20873   iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
20874   iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
20875 
20876   iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20877   iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20878   iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20879   iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20880   iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20881   iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20882 
20883   iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20884   iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20885   iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20886   iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20887   iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20888   iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20889 
20890   iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20891   iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20892   iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20893   iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20894   iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20895   iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20896 
20897   iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20898   iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20899   iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20900   iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20901   iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20902   iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20903 
20904   iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20905 
20906   iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
20907   iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
20908   iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
20909   iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
20910   iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
20911   iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
20912   iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
20913   iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
20914   iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20915   iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20916 
20917   iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20918   iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20919   iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20920   iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20921   iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20922   iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20923   iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20924   iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20925   iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20926 
20927   iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20928   iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20929   iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20930 
20931   iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20932   iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20933   iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20934 
20935   iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
20936   iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
20937 
20938   iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20939   iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20940   iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20941   iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20942   iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20943   iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20944 
20945   iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20946   iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20947   iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20948   iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20949   iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20950   iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20951   iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20952   iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20953   iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20954   iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20955   iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20956   iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20957 
20958   iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20959   iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20960   iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20961   iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20962 
20963   iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
20964   iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20965   iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20966   iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20967   iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20968   iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20969   iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20970 
20971   iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
20972   iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
20973   iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
20974 
20975   iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
20976   iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
20977   iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
20978   iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
20979 
20980   iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
20981   iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
20982   iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
20983   iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
20984 
20985   iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
20986   iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
20987   iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
20988   iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
20989 
20990   iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
20991   iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20992   iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20993   iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20994 
20995   iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20996   iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20997   iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20998   iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20999 
21000   iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
21001   iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
21002   iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
21003   iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
21004 
21005   iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
21006 
21007   iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
21008   iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
21009   iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
21010 
21011 #undef iwmmx_mbuiltin
21012 #undef iwmmx2_mbuiltin
21013 }
21014 
21015 static void
21016 arm_init_fp16_builtins (void)
21017 {
21018   tree fp16_type = make_node (REAL_TYPE);
21019   TYPE_PRECISION (fp16_type) = 16;
21020   layout_type (fp16_type);
21021   (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
21022 }
21023 
21024 static void
21025 arm_init_builtins (void)
21026 {
21027   if (TARGET_REALLY_IWMMXT)
21028     arm_init_iwmmxt_builtins ();
21029 
21030   if (TARGET_NEON)
21031     arm_init_neon_builtins ();
21032 
21033   if (arm_fp16_format)
21034     arm_init_fp16_builtins ();
21035 }
21036 
21037 /* Return the ARM builtin for CODE.  */
21038 
21039 static tree
21040 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
21041 {
21042   if (code >= ARM_BUILTIN_MAX)
21043     return error_mark_node;
21044 
21045   return arm_builtin_decls[code];
21046 }
21047 
21048 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
21049 
21050 static const char *
21051 arm_invalid_parameter_type (const_tree t)
21052 {
21053   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21054     return N_("function parameters cannot have __fp16 type");
21055   return NULL;
21056 }
21057 
21058 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
21059 
21060 static const char *
21061 arm_invalid_return_type (const_tree t)
21062 {
21063   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21064     return N_("functions cannot return __fp16 type");
21065   return NULL;
21066 }
21067 
21068 /* Implement TARGET_PROMOTED_TYPE.  */
21069 
21070 static tree
21071 arm_promoted_type (const_tree t)
21072 {
21073   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
21074     return float_type_node;
21075   return NULL_TREE;
21076 }
21077 
21078 /* Implement TARGET_CONVERT_TO_TYPE.
21079    Specifically, this hook implements the peculiarity of the ARM
21080    half-precision floating-point C semantics that requires conversions between
21081    __fp16 to or from double to do an intermediate conversion to float.  */
21082 
21083 static tree
21084 arm_convert_to_type (tree type, tree expr)
21085 {
21086   tree fromtype = TREE_TYPE (expr);
21087   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
21088     return NULL_TREE;
21089   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
21090       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
21091     return convert (type, convert (float_type_node, expr));
21092   return NULL_TREE;
21093 }
21094 
21095 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
21096    This simply adds HFmode as a supported mode; even though we don't
21097    implement arithmetic on this type directly, it's supported by
21098    optabs conversions, much the way the double-word arithmetic is
21099    special-cased in the default hook.  */
21100 
21101 static bool
21102 arm_scalar_mode_supported_p (enum machine_mode mode)
21103 {
21104   if (mode == HFmode)
21105     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
21106   else if (ALL_FIXED_POINT_MODE_P (mode))
21107     return true;
21108   else
21109     return default_scalar_mode_supported_p (mode);
21110 }
21111 
21112 /* Errors in the source file can cause expand_expr to return const0_rtx
21113    where we expect a vector.  To avoid crashing, use one of the vector
21114    clear instructions.  */
21115 
21116 static rtx
21117 safe_vector_operand (rtx x, enum machine_mode mode)
21118 {
21119   if (x != const0_rtx)
21120     return x;
21121   x = gen_reg_rtx (mode);
21122 
21123   emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
21124 			       : gen_rtx_SUBREG (DImode, x, 0)));
21125   return x;
21126 }
21127 
21128 /* Subroutine of arm_expand_builtin to take care of binop insns.  */
21129 
21130 static rtx
21131 arm_expand_binop_builtin (enum insn_code icode,
21132 			  tree exp, rtx target)
21133 {
21134   rtx pat;
21135   tree arg0 = CALL_EXPR_ARG (exp, 0);
21136   tree arg1 = CALL_EXPR_ARG (exp, 1);
21137   rtx op0 = expand_normal (arg0);
21138   rtx op1 = expand_normal (arg1);
21139   enum machine_mode tmode = insn_data[icode].operand[0].mode;
21140   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21141   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
21142 
21143   if (VECTOR_MODE_P (mode0))
21144     op0 = safe_vector_operand (op0, mode0);
21145   if (VECTOR_MODE_P (mode1))
21146     op1 = safe_vector_operand (op1, mode1);
21147 
21148   if (! target
21149       || GET_MODE (target) != tmode
21150       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21151     target = gen_reg_rtx (tmode);
21152 
21153   gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
21154 	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
21155 
21156   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21157     op0 = copy_to_mode_reg (mode0, op0);
21158   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21159     op1 = copy_to_mode_reg (mode1, op1);
21160 
21161   pat = GEN_FCN (icode) (target, op0, op1);
21162   if (! pat)
21163     return 0;
21164   emit_insn (pat);
21165   return target;
21166 }
21167 
21168 /* Subroutine of arm_expand_builtin to take care of unop insns.  */
21169 
21170 static rtx
21171 arm_expand_unop_builtin (enum insn_code icode,
21172 			 tree exp, rtx target, int do_load)
21173 {
21174   rtx pat;
21175   tree arg0 = CALL_EXPR_ARG (exp, 0);
21176   rtx op0 = expand_normal (arg0);
21177   enum machine_mode tmode = insn_data[icode].operand[0].mode;
21178   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21179 
21180   if (! target
21181       || GET_MODE (target) != tmode
21182       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21183     target = gen_reg_rtx (tmode);
21184   if (do_load)
21185     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
21186   else
21187     {
21188       if (VECTOR_MODE_P (mode0))
21189 	op0 = safe_vector_operand (op0, mode0);
21190 
21191       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21192 	op0 = copy_to_mode_reg (mode0, op0);
21193     }
21194 
21195   pat = GEN_FCN (icode) (target, op0);
21196   if (! pat)
21197     return 0;
21198   emit_insn (pat);
21199   return target;
21200 }
21201 
21202 typedef enum {
21203   NEON_ARG_COPY_TO_REG,
21204   NEON_ARG_CONSTANT,
21205   NEON_ARG_MEMORY,
21206   NEON_ARG_STOP
21207 } builtin_arg;
21208 
21209 #define NEON_MAX_BUILTIN_ARGS 5
21210 
21211 /* EXP is a pointer argument to a Neon load or store intrinsic.  Derive
21212    and return an expression for the accessed memory.
21213 
21214    The intrinsic function operates on a block of registers that has
21215    mode REG_MODE.  This block contains vectors of type TYPE_MODE.  The
21216    function references the memory at EXP of type TYPE and in mode
21217    MEM_MODE; this mode may be BLKmode if no more suitable mode is
21218    available.  */
21219 
21220 static tree
21221 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
21222 			  enum machine_mode reg_mode,
21223 			  neon_builtin_type_mode type_mode)
21224 {
21225   HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
21226   tree elem_type, upper_bound, array_type;
21227 
21228   /* Work out the size of the register block in bytes.  */
21229   reg_size = GET_MODE_SIZE (reg_mode);
21230 
21231   /* Work out the size of each vector in bytes.  */
21232   gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
21233   vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
21234 
21235   /* Work out how many vectors there are.  */
21236   gcc_assert (reg_size % vector_size == 0);
21237   nvectors = reg_size / vector_size;
21238 
21239   /* Work out the type of each element.  */
21240   gcc_assert (POINTER_TYPE_P (type));
21241   elem_type = TREE_TYPE (type);
21242 
21243   /* Work out how many elements are being loaded or stored.
21244      MEM_MODE == REG_MODE implies a one-to-one mapping between register
21245      and memory elements; anything else implies a lane load or store.  */
21246   if (mem_mode == reg_mode)
21247     nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
21248   else
21249     nelems = nvectors;
21250 
21251   /* Create a type that describes the full access.  */
21252   upper_bound = build_int_cst (size_type_node, nelems - 1);
21253   array_type = build_array_type (elem_type, build_index_type (upper_bound));
21254 
21255   /* Dereference EXP using that type.  */
21256   return fold_build2 (MEM_REF, array_type, exp,
21257 		      build_int_cst (build_pointer_type (array_type), 0));
21258 }
21259 
21260 /* Expand a Neon builtin.  */
21261 static rtx
21262 arm_expand_neon_args (rtx target, int icode, int have_retval,
21263 		      neon_builtin_type_mode type_mode,
21264 		      tree exp, int fcode, ...)
21265 {
21266   va_list ap;
21267   rtx pat;
21268   tree arg[NEON_MAX_BUILTIN_ARGS];
21269   rtx op[NEON_MAX_BUILTIN_ARGS];
21270   tree arg_type;
21271   tree formals;
21272   enum machine_mode tmode = insn_data[icode].operand[0].mode;
21273   enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
21274   enum machine_mode other_mode;
21275   int argc = 0;
21276   int opno;
21277 
21278   if (have_retval
21279       && (!target
21280 	  || GET_MODE (target) != tmode
21281 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
21282     target = gen_reg_rtx (tmode);
21283 
21284   va_start (ap, fcode);
21285 
21286   formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
21287 
21288   for (;;)
21289     {
21290       builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
21291 
21292       if (thisarg == NEON_ARG_STOP)
21293         break;
21294       else
21295         {
21296           opno = argc + have_retval;
21297           mode[argc] = insn_data[icode].operand[opno].mode;
21298           arg[argc] = CALL_EXPR_ARG (exp, argc);
21299 	  arg_type = TREE_VALUE (formals);
21300           if (thisarg == NEON_ARG_MEMORY)
21301             {
21302               other_mode = insn_data[icode].operand[1 - opno].mode;
21303               arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
21304 						    mode[argc], other_mode,
21305 						    type_mode);
21306             }
21307 
21308 	  /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
21309 	     be returned.  */
21310 	  op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
21311 				  (thisarg == NEON_ARG_MEMORY
21312 				   ? EXPAND_MEMORY : EXPAND_NORMAL));
21313 
21314           switch (thisarg)
21315             {
21316             case NEON_ARG_COPY_TO_REG:
21317               /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
21318               if (!(*insn_data[icode].operand[opno].predicate)
21319                      (op[argc], mode[argc]))
21320                 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
21321               break;
21322 
21323             case NEON_ARG_CONSTANT:
21324               /* FIXME: This error message is somewhat unhelpful.  */
21325               if (!(*insn_data[icode].operand[opno].predicate)
21326                     (op[argc], mode[argc]))
21327 		error ("argument must be a constant");
21328               break;
21329 
21330             case NEON_ARG_MEMORY:
21331 	      /* Check if expand failed.  */
21332 	      if (op[argc] == const0_rtx)
21333 		return 0;
21334 	      gcc_assert (MEM_P (op[argc]));
21335 	      PUT_MODE (op[argc], mode[argc]);
21336 	      /* ??? arm_neon.h uses the same built-in functions for signed
21337 		 and unsigned accesses, casting where necessary.  This isn't
21338 		 alias safe.  */
21339 	      set_mem_alias_set (op[argc], 0);
21340 	      if (!(*insn_data[icode].operand[opno].predicate)
21341                     (op[argc], mode[argc]))
21342 		op[argc] = (replace_equiv_address
21343 			    (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
21344               break;
21345 
21346             case NEON_ARG_STOP:
21347               gcc_unreachable ();
21348             }
21349 
21350           argc++;
21351 	  formals = TREE_CHAIN (formals);
21352         }
21353     }
21354 
21355   va_end (ap);
21356 
21357   if (have_retval)
21358     switch (argc)
21359       {
21360       case 1:
21361 	pat = GEN_FCN (icode) (target, op[0]);
21362 	break;
21363 
21364       case 2:
21365 	pat = GEN_FCN (icode) (target, op[0], op[1]);
21366 	break;
21367 
21368       case 3:
21369 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
21370 	break;
21371 
21372       case 4:
21373 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
21374 	break;
21375 
21376       case 5:
21377 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
21378 	break;
21379 
21380       default:
21381 	gcc_unreachable ();
21382       }
21383   else
21384     switch (argc)
21385       {
21386       case 1:
21387 	pat = GEN_FCN (icode) (op[0]);
21388 	break;
21389 
21390       case 2:
21391 	pat = GEN_FCN (icode) (op[0], op[1]);
21392 	break;
21393 
21394       case 3:
21395 	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
21396 	break;
21397 
21398       case 4:
21399 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
21400 	break;
21401 
21402       case 5:
21403 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
21404         break;
21405 
21406       default:
21407 	gcc_unreachable ();
21408       }
21409 
21410   if (!pat)
21411     return 0;
21412 
21413   emit_insn (pat);
21414 
21415   return target;
21416 }
21417 
21418 /* Expand a Neon builtin. These are "special" because they don't have symbolic
21419    constants defined per-instruction or per instruction-variant. Instead, the
21420    required info is looked up in the table neon_builtin_data.  */
21421 static rtx
21422 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
21423 {
21424   neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
21425   neon_itype itype = d->itype;
21426   enum insn_code icode = d->code;
21427   neon_builtin_type_mode type_mode = d->mode;
21428 
21429   switch (itype)
21430     {
21431     case NEON_UNOP:
21432     case NEON_CONVERT:
21433     case NEON_DUPLANE:
21434       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21435         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
21436 
21437     case NEON_BINOP:
21438     case NEON_SETLANE:
21439     case NEON_SCALARMUL:
21440     case NEON_SCALARMULL:
21441     case NEON_SCALARMULH:
21442     case NEON_SHIFTINSERT:
21443     case NEON_LOGICBINOP:
21444       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21445         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21446         NEON_ARG_STOP);
21447 
21448     case NEON_TERNOP:
21449       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21450         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21451         NEON_ARG_CONSTANT, NEON_ARG_STOP);
21452 
21453     case NEON_GETLANE:
21454     case NEON_FIXCONV:
21455     case NEON_SHIFTIMM:
21456       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21457         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
21458         NEON_ARG_STOP);
21459 
21460     case NEON_CREATE:
21461       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21462         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21463 
21464     case NEON_DUP:
21465     case NEON_RINT:
21466     case NEON_SPLIT:
21467     case NEON_REINTERP:
21468       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21469         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21470 
21471     case NEON_COMBINE:
21472     case NEON_VTBL:
21473       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21474         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21475 
21476     case NEON_RESULTPAIR:
21477       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21478         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21479         NEON_ARG_STOP);
21480 
21481     case NEON_LANEMUL:
21482     case NEON_LANEMULL:
21483     case NEON_LANEMULH:
21484       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21485         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21486         NEON_ARG_CONSTANT, NEON_ARG_STOP);
21487 
21488     case NEON_LANEMAC:
21489       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21490         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21491         NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
21492 
21493     case NEON_SHIFTACC:
21494       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21495         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21496         NEON_ARG_CONSTANT, NEON_ARG_STOP);
21497 
21498     case NEON_SCALARMAC:
21499       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21500 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21501         NEON_ARG_CONSTANT, NEON_ARG_STOP);
21502 
21503     case NEON_SELECT:
21504     case NEON_VTBX:
21505       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21506 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21507         NEON_ARG_STOP);
21508 
21509     case NEON_LOAD1:
21510     case NEON_LOADSTRUCT:
21511       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21512 	NEON_ARG_MEMORY, NEON_ARG_STOP);
21513 
21514     case NEON_LOAD1LANE:
21515     case NEON_LOADSTRUCTLANE:
21516       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21517 	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21518 	NEON_ARG_STOP);
21519 
21520     case NEON_STORE1:
21521     case NEON_STORESTRUCT:
21522       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21523 	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21524 
21525     case NEON_STORE1LANE:
21526     case NEON_STORESTRUCTLANE:
21527       return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21528 	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21529 	NEON_ARG_STOP);
21530     }
21531 
21532   gcc_unreachable ();
21533 }
21534 
21535 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
21536 void
21537 neon_reinterpret (rtx dest, rtx src)
21538 {
21539   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
21540 }
21541 
21542 /* Emit code to place a Neon pair result in memory locations (with equal
21543    registers).  */
21544 void
21545 neon_emit_pair_result_insn (enum machine_mode mode,
21546 			    rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
21547                             rtx op1, rtx op2)
21548 {
21549   rtx mem = gen_rtx_MEM (mode, destaddr);
21550   rtx tmp1 = gen_reg_rtx (mode);
21551   rtx tmp2 = gen_reg_rtx (mode);
21552 
21553   emit_insn (intfn (tmp1, op1, op2, tmp2));
21554 
21555   emit_move_insn (mem, tmp1);
21556   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
21557   emit_move_insn (mem, tmp2);
21558 }
21559 
21560 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
21561    not to early-clobber SRC registers in the process.
21562 
21563    We assume that the operands described by SRC and DEST represent a
21564    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
21565    number of components into which the copy has been decomposed.  */
21566 void
21567 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
21568 {
21569   unsigned int i;
21570 
21571   if (!reg_overlap_mentioned_p (operands[0], operands[1])
21572       || REGNO (operands[0]) < REGNO (operands[1]))
21573     {
21574       for (i = 0; i < count; i++)
21575 	{
21576 	  operands[2 * i] = dest[i];
21577 	  operands[2 * i + 1] = src[i];
21578 	}
21579     }
21580   else
21581     {
21582       for (i = 0; i < count; i++)
21583 	{
21584 	  operands[2 * i] = dest[count - i - 1];
21585 	  operands[2 * i + 1] = src[count - i - 1];
21586 	}
21587     }
21588 }
21589 
21590 /* Split operands into moves from op[1] + op[2] into op[0].  */
21591 
21592 void
21593 neon_split_vcombine (rtx operands[3])
21594 {
21595   unsigned int dest = REGNO (operands[0]);
21596   unsigned int src1 = REGNO (operands[1]);
21597   unsigned int src2 = REGNO (operands[2]);
21598   enum machine_mode halfmode = GET_MODE (operands[1]);
21599   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
21600   rtx destlo, desthi;
21601 
21602   if (src1 == dest && src2 == dest + halfregs)
21603     {
21604       /* No-op move.  Can't split to nothing; emit something.  */
21605       emit_note (NOTE_INSN_DELETED);
21606       return;
21607     }
21608 
21609   /* Preserve register attributes for variable tracking.  */
21610   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
21611   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
21612 			       GET_MODE_SIZE (halfmode));
21613 
21614   /* Special case of reversed high/low parts.  Use VSWP.  */
21615   if (src2 == dest && src1 == dest + halfregs)
21616     {
21617       rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
21618       rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
21619       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
21620       return;
21621     }
21622 
21623   if (!reg_overlap_mentioned_p (operands[2], destlo))
21624     {
21625       /* Try to avoid unnecessary moves if part of the result
21626 	 is in the right place already.  */
21627       if (src1 != dest)
21628 	emit_move_insn (destlo, operands[1]);
21629       if (src2 != dest + halfregs)
21630 	emit_move_insn (desthi, operands[2]);
21631     }
21632   else
21633     {
21634       if (src2 != dest + halfregs)
21635 	emit_move_insn (desthi, operands[2]);
21636       if (src1 != dest)
21637 	emit_move_insn (destlo, operands[1]);
21638     }
21639 }
21640 
21641 /* Expand an expression EXP that calls a built-in function,
21642    with result going to TARGET if that's convenient
21643    (and in mode MODE if that's convenient).
21644    SUBTARGET may be used as the target for computing one of EXP's operands.
21645    IGNORE is nonzero if the value is to be ignored.  */
21646 
21647 static rtx
21648 arm_expand_builtin (tree exp,
21649 		    rtx target,
21650 		    rtx subtarget ATTRIBUTE_UNUSED,
21651 		    enum machine_mode mode ATTRIBUTE_UNUSED,
21652 		    int ignore ATTRIBUTE_UNUSED)
21653 {
21654   const struct builtin_description * d;
21655   enum insn_code    icode;
21656   tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21657   tree              arg0;
21658   tree              arg1;
21659   tree              arg2;
21660   rtx               op0;
21661   rtx               op1;
21662   rtx               op2;
21663   rtx               pat;
21664   int               fcode = DECL_FUNCTION_CODE (fndecl);
21665   size_t            i;
21666   enum machine_mode tmode;
21667   enum machine_mode mode0;
21668   enum machine_mode mode1;
21669   enum machine_mode mode2;
21670   int opint;
21671   int selector;
21672   int mask;
21673   int imm;
21674 
21675   if (fcode >= ARM_BUILTIN_NEON_BASE)
21676     return arm_expand_neon_builtin (fcode, exp, target);
21677 
21678   switch (fcode)
21679     {
21680     case ARM_BUILTIN_TEXTRMSB:
21681     case ARM_BUILTIN_TEXTRMUB:
21682     case ARM_BUILTIN_TEXTRMSH:
21683     case ARM_BUILTIN_TEXTRMUH:
21684     case ARM_BUILTIN_TEXTRMSW:
21685     case ARM_BUILTIN_TEXTRMUW:
21686       icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
21687 	       : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
21688 	       : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
21689 	       : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
21690 	       : CODE_FOR_iwmmxt_textrmw);
21691 
21692       arg0 = CALL_EXPR_ARG (exp, 0);
21693       arg1 = CALL_EXPR_ARG (exp, 1);
21694       op0 = expand_normal (arg0);
21695       op1 = expand_normal (arg1);
21696       tmode = insn_data[icode].operand[0].mode;
21697       mode0 = insn_data[icode].operand[1].mode;
21698       mode1 = insn_data[icode].operand[2].mode;
21699 
21700       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21701 	op0 = copy_to_mode_reg (mode0, op0);
21702       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21703 	{
21704 	  /* @@@ better error message */
21705 	  error ("selector must be an immediate");
21706 	  return gen_reg_rtx (tmode);
21707 	}
21708 
21709       opint = INTVAL (op1);
21710       if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
21711 	{
21712 	  if (opint > 7 || opint < 0)
21713 	    error ("the range of selector should be in 0 to 7");
21714 	}
21715       else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
21716 	{
21717 	  if (opint > 3 || opint < 0)
21718 	    error ("the range of selector should be in 0 to 3");
21719 	}
21720       else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW.  */
21721 	{
21722 	  if (opint > 1 || opint < 0)
21723 	    error ("the range of selector should be in 0 to 1");
21724 	}
21725 
21726       if (target == 0
21727 	  || GET_MODE (target) != tmode
21728 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21729 	target = gen_reg_rtx (tmode);
21730       pat = GEN_FCN (icode) (target, op0, op1);
21731       if (! pat)
21732 	return 0;
21733       emit_insn (pat);
21734       return target;
21735 
21736     case ARM_BUILTIN_WALIGNI:
21737       /* If op2 is immediate, call walighi, else call walighr.  */
21738       arg0 = CALL_EXPR_ARG (exp, 0);
21739       arg1 = CALL_EXPR_ARG (exp, 1);
21740       arg2 = CALL_EXPR_ARG (exp, 2);
21741       op0 = expand_normal (arg0);
21742       op1 = expand_normal (arg1);
21743       op2 = expand_normal (arg2);
21744       if (CONST_INT_P (op2))
21745         {
21746 	  icode = CODE_FOR_iwmmxt_waligni;
21747           tmode = insn_data[icode].operand[0].mode;
21748 	  mode0 = insn_data[icode].operand[1].mode;
21749 	  mode1 = insn_data[icode].operand[2].mode;
21750 	  mode2 = insn_data[icode].operand[3].mode;
21751           if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21752 	    op0 = copy_to_mode_reg (mode0, op0);
21753           if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21754 	    op1 = copy_to_mode_reg (mode1, op1);
21755           gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
21756 	  selector = INTVAL (op2);
21757 	  if (selector > 7 || selector < 0)
21758 	    error ("the range of selector should be in 0 to 7");
21759 	}
21760       else
21761         {
21762 	  icode = CODE_FOR_iwmmxt_walignr;
21763           tmode = insn_data[icode].operand[0].mode;
21764 	  mode0 = insn_data[icode].operand[1].mode;
21765 	  mode1 = insn_data[icode].operand[2].mode;
21766 	  mode2 = insn_data[icode].operand[3].mode;
21767           if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21768 	    op0 = copy_to_mode_reg (mode0, op0);
21769           if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21770 	    op1 = copy_to_mode_reg (mode1, op1);
21771           if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
21772 	    op2 = copy_to_mode_reg (mode2, op2);
21773 	}
21774       if (target == 0
21775 	  || GET_MODE (target) != tmode
21776 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21777 	target = gen_reg_rtx (tmode);
21778       pat = GEN_FCN (icode) (target, op0, op1, op2);
21779       if (!pat)
21780 	return 0;
21781       emit_insn (pat);
21782       return target;
21783 
21784     case ARM_BUILTIN_TINSRB:
21785     case ARM_BUILTIN_TINSRH:
21786     case ARM_BUILTIN_TINSRW:
21787     case ARM_BUILTIN_WMERGE:
21788       icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
21789 	       : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
21790 	       : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
21791 	       : CODE_FOR_iwmmxt_tinsrw);
21792       arg0 = CALL_EXPR_ARG (exp, 0);
21793       arg1 = CALL_EXPR_ARG (exp, 1);
21794       arg2 = CALL_EXPR_ARG (exp, 2);
21795       op0 = expand_normal (arg0);
21796       op1 = expand_normal (arg1);
21797       op2 = expand_normal (arg2);
21798       tmode = insn_data[icode].operand[0].mode;
21799       mode0 = insn_data[icode].operand[1].mode;
21800       mode1 = insn_data[icode].operand[2].mode;
21801       mode2 = insn_data[icode].operand[3].mode;
21802 
21803       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21804 	op0 = copy_to_mode_reg (mode0, op0);
21805       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21806 	op1 = copy_to_mode_reg (mode1, op1);
21807       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21808 	{
21809 	  error ("selector must be an immediate");
21810 	  return const0_rtx;
21811 	}
21812       if (icode == CODE_FOR_iwmmxt_wmerge)
21813 	{
21814 	  selector = INTVAL (op2);
21815 	  if (selector > 7 || selector < 0)
21816 	    error ("the range of selector should be in 0 to 7");
21817 	}
21818       if ((icode == CODE_FOR_iwmmxt_tinsrb)
21819 	  || (icode == CODE_FOR_iwmmxt_tinsrh)
21820 	  || (icode == CODE_FOR_iwmmxt_tinsrw))
21821         {
21822 	  mask = 0x01;
21823 	  selector= INTVAL (op2);
21824 	  if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
21825 	    error ("the range of selector should be in 0 to 7");
21826 	  else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
21827 	    error ("the range of selector should be in 0 to 3");
21828 	  else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
21829 	    error ("the range of selector should be in 0 to 1");
21830 	  mask <<= selector;
21831 	  op2 = GEN_INT (mask);
21832 	}
21833       if (target == 0
21834 	  || GET_MODE (target) != tmode
21835 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21836 	target = gen_reg_rtx (tmode);
21837       pat = GEN_FCN (icode) (target, op0, op1, op2);
21838       if (! pat)
21839 	return 0;
21840       emit_insn (pat);
21841       return target;
21842 
21843     case ARM_BUILTIN_SETWCGR0:
21844     case ARM_BUILTIN_SETWCGR1:
21845     case ARM_BUILTIN_SETWCGR2:
21846     case ARM_BUILTIN_SETWCGR3:
21847       icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
21848 	       : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
21849 	       : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
21850 	       : CODE_FOR_iwmmxt_setwcgr3);
21851       arg0 = CALL_EXPR_ARG (exp, 0);
21852       op0 = expand_normal (arg0);
21853       mode0 = insn_data[icode].operand[0].mode;
21854       if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
21855         op0 = copy_to_mode_reg (mode0, op0);
21856       pat = GEN_FCN (icode) (op0);
21857       if (!pat)
21858 	return 0;
21859       emit_insn (pat);
21860       return 0;
21861 
21862     case ARM_BUILTIN_GETWCGR0:
21863     case ARM_BUILTIN_GETWCGR1:
21864     case ARM_BUILTIN_GETWCGR2:
21865     case ARM_BUILTIN_GETWCGR3:
21866       icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
21867 	       : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
21868 	       : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
21869 	       : CODE_FOR_iwmmxt_getwcgr3);
21870       tmode = insn_data[icode].operand[0].mode;
21871       if (target == 0
21872 	  || GET_MODE (target) != tmode
21873 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21874         target = gen_reg_rtx (tmode);
21875       pat = GEN_FCN (icode) (target);
21876       if (!pat)
21877         return 0;
21878       emit_insn (pat);
21879       return target;
21880 
21881     case ARM_BUILTIN_WSHUFH:
21882       icode = CODE_FOR_iwmmxt_wshufh;
21883       arg0 = CALL_EXPR_ARG (exp, 0);
21884       arg1 = CALL_EXPR_ARG (exp, 1);
21885       op0 = expand_normal (arg0);
21886       op1 = expand_normal (arg1);
21887       tmode = insn_data[icode].operand[0].mode;
21888       mode1 = insn_data[icode].operand[1].mode;
21889       mode2 = insn_data[icode].operand[2].mode;
21890 
21891       if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21892 	op0 = copy_to_mode_reg (mode1, op0);
21893       if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21894 	{
21895 	  error ("mask must be an immediate");
21896 	  return const0_rtx;
21897 	}
21898       selector = INTVAL (op1);
21899       if (selector < 0 || selector > 255)
21900 	error ("the range of mask should be in 0 to 255");
21901       if (target == 0
21902 	  || GET_MODE (target) != tmode
21903 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21904 	target = gen_reg_rtx (tmode);
21905       pat = GEN_FCN (icode) (target, op0, op1);
21906       if (! pat)
21907 	return 0;
21908       emit_insn (pat);
21909       return target;
21910 
21911     case ARM_BUILTIN_WMADDS:
21912       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
21913     case ARM_BUILTIN_WMADDSX:
21914       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
21915     case ARM_BUILTIN_WMADDSN:
21916       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
21917     case ARM_BUILTIN_WMADDU:
21918       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
21919     case ARM_BUILTIN_WMADDUX:
21920       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
21921     case ARM_BUILTIN_WMADDUN:
21922       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
21923     case ARM_BUILTIN_WSADBZ:
21924       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21925     case ARM_BUILTIN_WSADHZ:
21926       return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21927 
21928       /* Several three-argument builtins.  */
21929     case ARM_BUILTIN_WMACS:
21930     case ARM_BUILTIN_WMACU:
21931     case ARM_BUILTIN_TMIA:
21932     case ARM_BUILTIN_TMIAPH:
21933     case ARM_BUILTIN_TMIATT:
21934     case ARM_BUILTIN_TMIATB:
21935     case ARM_BUILTIN_TMIABT:
21936     case ARM_BUILTIN_TMIABB:
21937     case ARM_BUILTIN_WQMIABB:
21938     case ARM_BUILTIN_WQMIABT:
21939     case ARM_BUILTIN_WQMIATB:
21940     case ARM_BUILTIN_WQMIATT:
21941     case ARM_BUILTIN_WQMIABBN:
21942     case ARM_BUILTIN_WQMIABTN:
21943     case ARM_BUILTIN_WQMIATBN:
21944     case ARM_BUILTIN_WQMIATTN:
21945     case ARM_BUILTIN_WMIABB:
21946     case ARM_BUILTIN_WMIABT:
21947     case ARM_BUILTIN_WMIATB:
21948     case ARM_BUILTIN_WMIATT:
21949     case ARM_BUILTIN_WMIABBN:
21950     case ARM_BUILTIN_WMIABTN:
21951     case ARM_BUILTIN_WMIATBN:
21952     case ARM_BUILTIN_WMIATTN:
21953     case ARM_BUILTIN_WMIAWBB:
21954     case ARM_BUILTIN_WMIAWBT:
21955     case ARM_BUILTIN_WMIAWTB:
21956     case ARM_BUILTIN_WMIAWTT:
21957     case ARM_BUILTIN_WMIAWBBN:
21958     case ARM_BUILTIN_WMIAWBTN:
21959     case ARM_BUILTIN_WMIAWTBN:
21960     case ARM_BUILTIN_WMIAWTTN:
21961     case ARM_BUILTIN_WSADB:
21962     case ARM_BUILTIN_WSADH:
21963       icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21964 	       : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21965 	       : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21966 	       : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21967 	       : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21968 	       : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21969 	       : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21970 	       : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21971 	       : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
21972 	       : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
21973 	       : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
21974 	       : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
21975 	       : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
21976 	       : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
21977 	       : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
21978 	       : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
21979 	       : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
21980 	       : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
21981 	       : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
21982 	       : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
21983 	       : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
21984 	       : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
21985 	       : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
21986 	       : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
21987 	       : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
21988 	       : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
21989 	       : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
21990 	       : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
21991 	       : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
21992 	       : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21993 	       : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21994 	       : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21995 	       : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21996 	       : CODE_FOR_iwmmxt_wsadh);
21997       arg0 = CALL_EXPR_ARG (exp, 0);
21998       arg1 = CALL_EXPR_ARG (exp, 1);
21999       arg2 = CALL_EXPR_ARG (exp, 2);
22000       op0 = expand_normal (arg0);
22001       op1 = expand_normal (arg1);
22002       op2 = expand_normal (arg2);
22003       tmode = insn_data[icode].operand[0].mode;
22004       mode0 = insn_data[icode].operand[1].mode;
22005       mode1 = insn_data[icode].operand[2].mode;
22006       mode2 = insn_data[icode].operand[3].mode;
22007 
22008       if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22009 	op0 = copy_to_mode_reg (mode0, op0);
22010       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22011 	op1 = copy_to_mode_reg (mode1, op1);
22012       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
22013 	op2 = copy_to_mode_reg (mode2, op2);
22014       if (target == 0
22015 	  || GET_MODE (target) != tmode
22016 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22017 	target = gen_reg_rtx (tmode);
22018       pat = GEN_FCN (icode) (target, op0, op1, op2);
22019       if (! pat)
22020 	return 0;
22021       emit_insn (pat);
22022       return target;
22023 
22024     case ARM_BUILTIN_WZERO:
22025       target = gen_reg_rtx (DImode);
22026       emit_insn (gen_iwmmxt_clrdi (target));
22027       return target;
22028 
22029     case ARM_BUILTIN_WSRLHI:
22030     case ARM_BUILTIN_WSRLWI:
22031     case ARM_BUILTIN_WSRLDI:
22032     case ARM_BUILTIN_WSLLHI:
22033     case ARM_BUILTIN_WSLLWI:
22034     case ARM_BUILTIN_WSLLDI:
22035     case ARM_BUILTIN_WSRAHI:
22036     case ARM_BUILTIN_WSRAWI:
22037     case ARM_BUILTIN_WSRADI:
22038     case ARM_BUILTIN_WRORHI:
22039     case ARM_BUILTIN_WRORWI:
22040     case ARM_BUILTIN_WRORDI:
22041     case ARM_BUILTIN_WSRLH:
22042     case ARM_BUILTIN_WSRLW:
22043     case ARM_BUILTIN_WSRLD:
22044     case ARM_BUILTIN_WSLLH:
22045     case ARM_BUILTIN_WSLLW:
22046     case ARM_BUILTIN_WSLLD:
22047     case ARM_BUILTIN_WSRAH:
22048     case ARM_BUILTIN_WSRAW:
22049     case ARM_BUILTIN_WSRAD:
22050     case ARM_BUILTIN_WRORH:
22051     case ARM_BUILTIN_WRORW:
22052     case ARM_BUILTIN_WRORD:
22053       icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
22054 	       : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
22055 	       : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
22056 	       : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
22057 	       : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
22058 	       : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
22059 	       : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
22060 	       : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
22061 	       : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
22062 	       : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
22063 	       : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
22064 	       : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
22065 	       : fcode == ARM_BUILTIN_WSRLH  ? CODE_FOR_lshrv4hi3_di
22066 	       : fcode == ARM_BUILTIN_WSRLW  ? CODE_FOR_lshrv2si3_di
22067 	       : fcode == ARM_BUILTIN_WSRLD  ? CODE_FOR_lshrdi3_di
22068 	       : fcode == ARM_BUILTIN_WSLLH  ? CODE_FOR_ashlv4hi3_di
22069 	       : fcode == ARM_BUILTIN_WSLLW  ? CODE_FOR_ashlv2si3_di
22070 	       : fcode == ARM_BUILTIN_WSLLD  ? CODE_FOR_ashldi3_di
22071 	       : fcode == ARM_BUILTIN_WSRAH  ? CODE_FOR_ashrv4hi3_di
22072 	       : fcode == ARM_BUILTIN_WSRAW  ? CODE_FOR_ashrv2si3_di
22073 	       : fcode == ARM_BUILTIN_WSRAD  ? CODE_FOR_ashrdi3_di
22074 	       : fcode == ARM_BUILTIN_WRORH  ? CODE_FOR_rorv4hi3_di
22075 	       : fcode == ARM_BUILTIN_WRORW  ? CODE_FOR_rorv2si3_di
22076 	       : fcode == ARM_BUILTIN_WRORD  ? CODE_FOR_rordi3_di
22077 	       : CODE_FOR_nothing);
22078       arg1 = CALL_EXPR_ARG (exp, 1);
22079       op1 = expand_normal (arg1);
22080       if (GET_MODE (op1) == VOIDmode)
22081 	{
22082 	  imm = INTVAL (op1);
22083 	  if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
22084 	       || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
22085 	      && (imm < 0 || imm > 32))
22086 	    {
22087 	      if (fcode == ARM_BUILTIN_WRORHI)
22088 		error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi16 in code.");
22089 	      else if (fcode == ARM_BUILTIN_WRORWI)
22090 		error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_rori_pi32 in code.");
22091 	      else if (fcode == ARM_BUILTIN_WRORH)
22092 		error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi16 in code.");
22093 	      else
22094 		error ("the range of count should be in 0 to 32.  please check the intrinsic _mm_ror_pi32 in code.");
22095 	    }
22096 	  else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
22097 		   && (imm < 0 || imm > 64))
22098 	    {
22099 	      if (fcode == ARM_BUILTIN_WRORDI)
22100 		error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_rori_si64 in code.");
22101 	      else
22102 		error ("the range of count should be in 0 to 64.  please check the intrinsic _mm_ror_si64 in code.");
22103 	    }
22104 	  else if (imm < 0)
22105 	    {
22106 	      if (fcode == ARM_BUILTIN_WSRLHI)
22107 		error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi16 in code.");
22108 	      else if (fcode == ARM_BUILTIN_WSRLWI)
22109 		error ("the count should be no less than 0.  please check the intrinsic _mm_srli_pi32 in code.");
22110 	      else if (fcode == ARM_BUILTIN_WSRLDI)
22111 		error ("the count should be no less than 0.  please check the intrinsic _mm_srli_si64 in code.");
22112 	      else if (fcode == ARM_BUILTIN_WSLLHI)
22113 		error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi16 in code.");
22114 	      else if (fcode == ARM_BUILTIN_WSLLWI)
22115 		error ("the count should be no less than 0.  please check the intrinsic _mm_slli_pi32 in code.");
22116 	      else if (fcode == ARM_BUILTIN_WSLLDI)
22117 		error ("the count should be no less than 0.  please check the intrinsic _mm_slli_si64 in code.");
22118 	      else if (fcode == ARM_BUILTIN_WSRAHI)
22119 		error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi16 in code.");
22120 	      else if (fcode == ARM_BUILTIN_WSRAWI)
22121 		error ("the count should be no less than 0.  please check the intrinsic _mm_srai_pi32 in code.");
22122 	      else if (fcode == ARM_BUILTIN_WSRADI)
22123 		error ("the count should be no less than 0.  please check the intrinsic _mm_srai_si64 in code.");
22124 	      else if (fcode == ARM_BUILTIN_WSRLH)
22125 		error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi16 in code.");
22126 	      else if (fcode == ARM_BUILTIN_WSRLW)
22127 		error ("the count should be no less than 0.  please check the intrinsic _mm_srl_pi32 in code.");
22128 	      else if (fcode == ARM_BUILTIN_WSRLD)
22129 		error ("the count should be no less than 0.  please check the intrinsic _mm_srl_si64 in code.");
22130 	      else if (fcode == ARM_BUILTIN_WSLLH)
22131 		error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi16 in code.");
22132 	      else if (fcode == ARM_BUILTIN_WSLLW)
22133 		error ("the count should be no less than 0.  please check the intrinsic _mm_sll_pi32 in code.");
22134 	      else if (fcode == ARM_BUILTIN_WSLLD)
22135 		error ("the count should be no less than 0.  please check the intrinsic _mm_sll_si64 in code.");
22136 	      else if (fcode == ARM_BUILTIN_WSRAH)
22137 		error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi16 in code.");
22138 	      else if (fcode == ARM_BUILTIN_WSRAW)
22139 		error ("the count should be no less than 0.  please check the intrinsic _mm_sra_pi32 in code.");
22140 	      else
22141 		error ("the count should be no less than 0.  please check the intrinsic _mm_sra_si64 in code.");
22142 	    }
22143 	}
22144       return arm_expand_binop_builtin (icode, exp, target);
22145 
22146     default:
22147       break;
22148     }
22149 
22150   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
22151     if (d->code == (const enum arm_builtins) fcode)
22152       return arm_expand_binop_builtin (d->icode, exp, target);
22153 
22154   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
22155     if (d->code == (const enum arm_builtins) fcode)
22156       return arm_expand_unop_builtin (d->icode, exp, target, 0);
22157 
22158   /* @@@ Should really do something sensible here.  */
22159   return NULL_RTX;
22160 }
22161 
22162 /* Return the number (counting from 0) of
22163    the least significant set bit in MASK.  */
22164 
22165 inline static int
22166 number_of_first_bit_set (unsigned mask)
22167 {
22168   return ctz_hwi (mask);
22169 }
22170 
22171 /* Like emit_multi_reg_push, but allowing for a different set of
22172    registers to be described as saved.  MASK is the set of registers
22173    to be saved; REAL_REGS is the set of registers to be described as
22174    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
22175 
22176 static rtx
22177 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
22178 {
22179   unsigned long regno;
22180   rtx par[10], tmp, reg, insn;
22181   int i, j;
22182 
22183   /* Build the parallel of the registers actually being stored.  */
22184   for (i = 0; mask; ++i, mask &= mask - 1)
22185     {
22186       regno = ctz_hwi (mask);
22187       reg = gen_rtx_REG (SImode, regno);
22188 
22189       if (i == 0)
22190 	tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
22191       else
22192 	tmp = gen_rtx_USE (VOIDmode, reg);
22193 
22194       par[i] = tmp;
22195     }
22196 
22197   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22198   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22199   tmp = gen_frame_mem (BLKmode, tmp);
22200   tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
22201   par[0] = tmp;
22202 
22203   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
22204   insn = emit_insn (tmp);
22205 
22206   /* Always build the stack adjustment note for unwind info.  */
22207   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
22208   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
22209   par[0] = tmp;
22210 
22211   /* Build the parallel of the registers recorded as saved for unwind.  */
22212   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
22213     {
22214       regno = ctz_hwi (real_regs);
22215       reg = gen_rtx_REG (SImode, regno);
22216 
22217       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
22218       tmp = gen_frame_mem (SImode, tmp);
22219       tmp = gen_rtx_SET (VOIDmode, tmp, reg);
22220       RTX_FRAME_RELATED_P (tmp) = 1;
22221       par[j + 1] = tmp;
22222     }
22223 
22224   if (j == 0)
22225     tmp = par[0];
22226   else
22227     {
22228       RTX_FRAME_RELATED_P (par[0]) = 1;
22229       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
22230     }
22231 
22232   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
22233 
22234   return insn;
22235 }
22236 
22237 /* Emit code to push or pop registers to or from the stack.  F is the
22238    assembly file.  MASK is the registers to pop.  */
22239 static void
22240 thumb_pop (FILE *f, unsigned long mask)
22241 {
22242   int regno;
22243   int lo_mask = mask & 0xFF;
22244   int pushed_words = 0;
22245 
22246   gcc_assert (mask);
22247 
22248   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
22249     {
22250       /* Special case.  Do not generate a POP PC statement here, do it in
22251 	 thumb_exit() */
22252       thumb_exit (f, -1);
22253       return;
22254     }
22255 
22256   fprintf (f, "\tpop\t{");
22257 
22258   /* Look at the low registers first.  */
22259   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
22260     {
22261       if (lo_mask & 1)
22262 	{
22263 	  asm_fprintf (f, "%r", regno);
22264 
22265 	  if ((lo_mask & ~1) != 0)
22266 	    fprintf (f, ", ");
22267 
22268 	  pushed_words++;
22269 	}
22270     }
22271 
22272   if (mask & (1 << PC_REGNUM))
22273     {
22274       /* Catch popping the PC.  */
22275       if (TARGET_INTERWORK || TARGET_BACKTRACE
22276 	  || crtl->calls_eh_return)
22277 	{
22278 	  /* The PC is never poped directly, instead
22279 	     it is popped into r3 and then BX is used.  */
22280 	  fprintf (f, "}\n");
22281 
22282 	  thumb_exit (f, -1);
22283 
22284 	  return;
22285 	}
22286       else
22287 	{
22288 	  if (mask & 0xFF)
22289 	    fprintf (f, ", ");
22290 
22291 	  asm_fprintf (f, "%r", PC_REGNUM);
22292 	}
22293     }
22294 
22295   fprintf (f, "}\n");
22296 }
22297 
22298 /* Generate code to return from a thumb function.
22299    If 'reg_containing_return_addr' is -1, then the return address is
22300    actually on the stack, at the stack pointer.  */
22301 static void
22302 thumb_exit (FILE *f, int reg_containing_return_addr)
22303 {
22304   unsigned regs_available_for_popping;
22305   unsigned regs_to_pop;
22306   int pops_needed;
22307   unsigned available;
22308   unsigned required;
22309   int mode;
22310   int size;
22311   int restore_a4 = FALSE;
22312 
22313   /* Compute the registers we need to pop.  */
22314   regs_to_pop = 0;
22315   pops_needed = 0;
22316 
22317   if (reg_containing_return_addr == -1)
22318     {
22319       regs_to_pop |= 1 << LR_REGNUM;
22320       ++pops_needed;
22321     }
22322 
22323   if (TARGET_BACKTRACE)
22324     {
22325       /* Restore the (ARM) frame pointer and stack pointer.  */
22326       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
22327       pops_needed += 2;
22328     }
22329 
22330   /* If there is nothing to pop then just emit the BX instruction and
22331      return.  */
22332   if (pops_needed == 0)
22333     {
22334       if (crtl->calls_eh_return)
22335 	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
22336 
22337       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
22338       return;
22339     }
22340   /* Otherwise if we are not supporting interworking and we have not created
22341      a backtrace structure and the function was not entered in ARM mode then
22342      just pop the return address straight into the PC.  */
22343   else if (!TARGET_INTERWORK
22344 	   && !TARGET_BACKTRACE
22345 	   && !is_called_in_ARM_mode (current_function_decl)
22346 	   && !crtl->calls_eh_return)
22347     {
22348       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
22349       return;
22350     }
22351 
22352   /* Find out how many of the (return) argument registers we can corrupt.  */
22353   regs_available_for_popping = 0;
22354 
22355   /* If returning via __builtin_eh_return, the bottom three registers
22356      all contain information needed for the return.  */
22357   if (crtl->calls_eh_return)
22358     size = 12;
22359   else
22360     {
22361       /* If we can deduce the registers used from the function's
22362 	 return value.  This is more reliable that examining
22363 	 df_regs_ever_live_p () because that will be set if the register is
22364 	 ever used in the function, not just if the register is used
22365 	 to hold a return value.  */
22366 
22367       if (crtl->return_rtx != 0)
22368 	mode = GET_MODE (crtl->return_rtx);
22369       else
22370 	mode = DECL_MODE (DECL_RESULT (current_function_decl));
22371 
22372       size = GET_MODE_SIZE (mode);
22373 
22374       if (size == 0)
22375 	{
22376 	  /* In a void function we can use any argument register.
22377 	     In a function that returns a structure on the stack
22378 	     we can use the second and third argument registers.  */
22379 	  if (mode == VOIDmode)
22380 	    regs_available_for_popping =
22381 	      (1 << ARG_REGISTER (1))
22382 	      | (1 << ARG_REGISTER (2))
22383 	      | (1 << ARG_REGISTER (3));
22384 	  else
22385 	    regs_available_for_popping =
22386 	      (1 << ARG_REGISTER (2))
22387 	      | (1 << ARG_REGISTER (3));
22388 	}
22389       else if (size <= 4)
22390 	regs_available_for_popping =
22391 	  (1 << ARG_REGISTER (2))
22392 	  | (1 << ARG_REGISTER (3));
22393       else if (size <= 8)
22394 	regs_available_for_popping =
22395 	  (1 << ARG_REGISTER (3));
22396     }
22397 
22398   /* Match registers to be popped with registers into which we pop them.  */
22399   for (available = regs_available_for_popping,
22400        required  = regs_to_pop;
22401        required != 0 && available != 0;
22402        available &= ~(available & - available),
22403        required  &= ~(required  & - required))
22404     -- pops_needed;
22405 
22406   /* If we have any popping registers left over, remove them.  */
22407   if (available > 0)
22408     regs_available_for_popping &= ~available;
22409 
22410   /* Otherwise if we need another popping register we can use
22411      the fourth argument register.  */
22412   else if (pops_needed)
22413     {
22414       /* If we have not found any free argument registers and
22415 	 reg a4 contains the return address, we must move it.  */
22416       if (regs_available_for_popping == 0
22417 	  && reg_containing_return_addr == LAST_ARG_REGNUM)
22418 	{
22419 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
22420 	  reg_containing_return_addr = LR_REGNUM;
22421 	}
22422       else if (size > 12)
22423 	{
22424 	  /* Register a4 is being used to hold part of the return value,
22425 	     but we have dire need of a free, low register.  */
22426 	  restore_a4 = TRUE;
22427 
22428 	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
22429 	}
22430 
22431       if (reg_containing_return_addr != LAST_ARG_REGNUM)
22432 	{
22433 	  /* The fourth argument register is available.  */
22434 	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
22435 
22436 	  --pops_needed;
22437 	}
22438     }
22439 
22440   /* Pop as many registers as we can.  */
22441   thumb_pop (f, regs_available_for_popping);
22442 
22443   /* Process the registers we popped.  */
22444   if (reg_containing_return_addr == -1)
22445     {
22446       /* The return address was popped into the lowest numbered register.  */
22447       regs_to_pop &= ~(1 << LR_REGNUM);
22448 
22449       reg_containing_return_addr =
22450 	number_of_first_bit_set (regs_available_for_popping);
22451 
22452       /* Remove this register for the mask of available registers, so that
22453          the return address will not be corrupted by further pops.  */
22454       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
22455     }
22456 
22457   /* If we popped other registers then handle them here.  */
22458   if (regs_available_for_popping)
22459     {
22460       int frame_pointer;
22461 
22462       /* Work out which register currently contains the frame pointer.  */
22463       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
22464 
22465       /* Move it into the correct place.  */
22466       asm_fprintf (f, "\tmov\t%r, %r\n",
22467 		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
22468 
22469       /* (Temporarily) remove it from the mask of popped registers.  */
22470       regs_available_for_popping &= ~(1 << frame_pointer);
22471       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
22472 
22473       if (regs_available_for_popping)
22474 	{
22475 	  int stack_pointer;
22476 
22477 	  /* We popped the stack pointer as well,
22478 	     find the register that contains it.  */
22479 	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
22480 
22481 	  /* Move it into the stack register.  */
22482 	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
22483 
22484 	  /* At this point we have popped all necessary registers, so
22485 	     do not worry about restoring regs_available_for_popping
22486 	     to its correct value:
22487 
22488 	     assert (pops_needed == 0)
22489 	     assert (regs_available_for_popping == (1 << frame_pointer))
22490 	     assert (regs_to_pop == (1 << STACK_POINTER))  */
22491 	}
22492       else
22493 	{
22494 	  /* Since we have just move the popped value into the frame
22495 	     pointer, the popping register is available for reuse, and
22496 	     we know that we still have the stack pointer left to pop.  */
22497 	  regs_available_for_popping |= (1 << frame_pointer);
22498 	}
22499     }
22500 
22501   /* If we still have registers left on the stack, but we no longer have
22502      any registers into which we can pop them, then we must move the return
22503      address into the link register and make available the register that
22504      contained it.  */
22505   if (regs_available_for_popping == 0 && pops_needed > 0)
22506     {
22507       regs_available_for_popping |= 1 << reg_containing_return_addr;
22508 
22509       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
22510 		   reg_containing_return_addr);
22511 
22512       reg_containing_return_addr = LR_REGNUM;
22513     }
22514 
22515   /* If we have registers left on the stack then pop some more.
22516      We know that at most we will want to pop FP and SP.  */
22517   if (pops_needed > 0)
22518     {
22519       int  popped_into;
22520       int  move_to;
22521 
22522       thumb_pop (f, regs_available_for_popping);
22523 
22524       /* We have popped either FP or SP.
22525 	 Move whichever one it is into the correct register.  */
22526       popped_into = number_of_first_bit_set (regs_available_for_popping);
22527       move_to     = number_of_first_bit_set (regs_to_pop);
22528 
22529       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
22530 
22531       regs_to_pop &= ~(1 << move_to);
22532 
22533       --pops_needed;
22534     }
22535 
22536   /* If we still have not popped everything then we must have only
22537      had one register available to us and we are now popping the SP.  */
22538   if (pops_needed > 0)
22539     {
22540       int  popped_into;
22541 
22542       thumb_pop (f, regs_available_for_popping);
22543 
22544       popped_into = number_of_first_bit_set (regs_available_for_popping);
22545 
22546       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
22547       /*
22548 	assert (regs_to_pop == (1 << STACK_POINTER))
22549 	assert (pops_needed == 1)
22550       */
22551     }
22552 
22553   /* If necessary restore the a4 register.  */
22554   if (restore_a4)
22555     {
22556       if (reg_containing_return_addr != LR_REGNUM)
22557 	{
22558 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
22559 	  reg_containing_return_addr = LR_REGNUM;
22560 	}
22561 
22562       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
22563     }
22564 
22565   if (crtl->calls_eh_return)
22566     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
22567 
22568   /* Return to caller.  */
22569   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
22570 }
22571 
22572 /* Scan INSN just before assembler is output for it.
22573    For Thumb-1, we track the status of the condition codes; this
22574    information is used in the cbranchsi4_insn pattern.  */
22575 void
22576 thumb1_final_prescan_insn (rtx insn)
22577 {
22578   if (flag_print_asm_name)
22579     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
22580 		 INSN_ADDRESSES (INSN_UID (insn)));
22581   /* Don't overwrite the previous setter when we get to a cbranch.  */
22582   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
22583     {
22584       enum attr_conds conds;
22585 
22586       if (cfun->machine->thumb1_cc_insn)
22587 	{
22588 	  if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
22589 	      || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
22590 	    CC_STATUS_INIT;
22591 	}
22592       conds = get_attr_conds (insn);
22593       if (conds == CONDS_SET)
22594 	{
22595 	  rtx set = single_set (insn);
22596 	  cfun->machine->thumb1_cc_insn = insn;
22597 	  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
22598 	  cfun->machine->thumb1_cc_op1 = const0_rtx;
22599 	  cfun->machine->thumb1_cc_mode = CC_NOOVmode;
22600 	  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
22601 	    {
22602 	      rtx src1 = XEXP (SET_SRC (set), 1);
22603 	      if (src1 == const0_rtx)
22604 		cfun->machine->thumb1_cc_mode = CCmode;
22605 	    }
22606 	  else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
22607 	    {
22608 	      /* Record the src register operand instead of dest because
22609 		 cprop_hardreg pass propagates src.  */
22610 	      cfun->machine->thumb1_cc_op0 = SET_SRC (set);
22611 	    }
22612 	}
22613       else if (conds != CONDS_NOCOND)
22614 	cfun->machine->thumb1_cc_insn = NULL_RTX;
22615     }
22616 }
22617 
22618 int
22619 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
22620 {
22621   unsigned HOST_WIDE_INT mask = 0xff;
22622   int i;
22623 
22624   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
22625   if (val == 0) /* XXX */
22626     return 0;
22627 
22628   for (i = 0; i < 25; i++)
22629     if ((val & (mask << i)) == val)
22630       return 1;
22631 
22632   return 0;
22633 }
22634 
22635 /* Returns nonzero if the current function contains,
22636    or might contain a far jump.  */
22637 static int
22638 thumb_far_jump_used_p (void)
22639 {
22640   rtx insn;
22641 
22642   /* This test is only important for leaf functions.  */
22643   /* assert (!leaf_function_p ()); */
22644 
22645   /* If we have already decided that far jumps may be used,
22646      do not bother checking again, and always return true even if
22647      it turns out that they are not being used.  Once we have made
22648      the decision that far jumps are present (and that hence the link
22649      register will be pushed onto the stack) we cannot go back on it.  */
22650   if (cfun->machine->far_jump_used)
22651     return 1;
22652 
22653   /* If this function is not being called from the prologue/epilogue
22654      generation code then it must be being called from the
22655      INITIAL_ELIMINATION_OFFSET macro.  */
22656   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
22657     {
22658       /* In this case we know that we are being asked about the elimination
22659 	 of the arg pointer register.  If that register is not being used,
22660 	 then there are no arguments on the stack, and we do not have to
22661 	 worry that a far jump might force the prologue to push the link
22662 	 register, changing the stack offsets.  In this case we can just
22663 	 return false, since the presence of far jumps in the function will
22664 	 not affect stack offsets.
22665 
22666 	 If the arg pointer is live (or if it was live, but has now been
22667 	 eliminated and so set to dead) then we do have to test to see if
22668 	 the function might contain a far jump.  This test can lead to some
22669 	 false negatives, since before reload is completed, then length of
22670 	 branch instructions is not known, so gcc defaults to returning their
22671 	 longest length, which in turn sets the far jump attribute to true.
22672 
22673 	 A false negative will not result in bad code being generated, but it
22674 	 will result in a needless push and pop of the link register.  We
22675 	 hope that this does not occur too often.
22676 
22677 	 If we need doubleword stack alignment this could affect the other
22678 	 elimination offsets so we can't risk getting it wrong.  */
22679       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
22680 	cfun->machine->arg_pointer_live = 1;
22681       else if (!cfun->machine->arg_pointer_live)
22682 	return 0;
22683     }
22684 
22685   /* Check to see if the function contains a branch
22686      insn with the far jump attribute set.  */
22687   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22688     {
22689       if (JUMP_P (insn)
22690 	  /* Ignore tablejump patterns.  */
22691 	  && GET_CODE (PATTERN (insn)) != ADDR_VEC
22692 	  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
22693 	  && get_attr_far_jump (insn) == FAR_JUMP_YES
22694 	  )
22695 	{
22696 	  /* Record the fact that we have decided that
22697 	     the function does use far jumps.  */
22698 	  cfun->machine->far_jump_used = 1;
22699 	  return 1;
22700 	}
22701     }
22702 
22703   return 0;
22704 }
22705 
22706 /* Return nonzero if FUNC must be entered in ARM mode.  */
22707 int
22708 is_called_in_ARM_mode (tree func)
22709 {
22710   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
22711 
22712   /* Ignore the problem about functions whose address is taken.  */
22713   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
22714     return TRUE;
22715 
22716 #ifdef ARM_PE
22717   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
22718 #else
22719   return FALSE;
22720 #endif
22721 }
22722 
22723 /* Given the stack offsets and register mask in OFFSETS, decide how
22724    many additional registers to push instead of subtracting a constant
22725    from SP.  For epilogues the principle is the same except we use pop.
22726    FOR_PROLOGUE indicates which we're generating.  */
22727 static int
22728 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
22729 {
22730   HOST_WIDE_INT amount;
22731   unsigned long live_regs_mask = offsets->saved_regs_mask;
22732   /* Extract a mask of the ones we can give to the Thumb's push/pop
22733      instruction.  */
22734   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
22735   /* Then count how many other high registers will need to be pushed.  */
22736   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22737   int n_free, reg_base, size;
22738 
22739   if (!for_prologue && frame_pointer_needed)
22740     amount = offsets->locals_base - offsets->saved_regs;
22741   else
22742     amount = offsets->outgoing_args - offsets->saved_regs;
22743 
22744   /* If the stack frame size is 512 exactly, we can save one load
22745      instruction, which should make this a win even when optimizing
22746      for speed.  */
22747   if (!optimize_size && amount != 512)
22748     return 0;
22749 
22750   /* Can't do this if there are high registers to push.  */
22751   if (high_regs_pushed != 0)
22752     return 0;
22753 
22754   /* Shouldn't do it in the prologue if no registers would normally
22755      be pushed at all.  In the epilogue, also allow it if we'll have
22756      a pop insn for the PC.  */
22757   if  (l_mask == 0
22758        && (for_prologue
22759 	   || TARGET_BACKTRACE
22760 	   || (live_regs_mask & 1 << LR_REGNUM) == 0
22761 	   || TARGET_INTERWORK
22762 	   || crtl->args.pretend_args_size != 0))
22763     return 0;
22764 
22765   /* Don't do this if thumb_expand_prologue wants to emit instructions
22766      between the push and the stack frame allocation.  */
22767   if (for_prologue
22768       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
22769 	  || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
22770     return 0;
22771 
22772   reg_base = 0;
22773   n_free = 0;
22774   if (!for_prologue)
22775     {
22776       size = arm_size_return_regs ();
22777       reg_base = ARM_NUM_INTS (size);
22778       live_regs_mask >>= reg_base;
22779     }
22780 
22781   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
22782 	 && (for_prologue || call_used_regs[reg_base + n_free]))
22783     {
22784       live_regs_mask >>= 1;
22785       n_free++;
22786     }
22787 
22788   if (n_free == 0)
22789     return 0;
22790   gcc_assert (amount / 4 * 4 == amount);
22791 
22792   if (amount >= 512 && (amount - n_free * 4) < 512)
22793     return (amount - 508) / 4;
22794   if (amount <= n_free * 4)
22795     return amount / 4;
22796   return 0;
22797 }
22798 
22799 /* The bits which aren't usefully expanded as rtl.  */
22800 const char *
22801 thumb1_unexpanded_epilogue (void)
22802 {
22803   arm_stack_offsets *offsets;
22804   int regno;
22805   unsigned long live_regs_mask = 0;
22806   int high_regs_pushed = 0;
22807   int extra_pop;
22808   int had_to_push_lr;
22809   int size;
22810 
22811   if (cfun->machine->return_used_this_function != 0)
22812     return "";
22813 
22814   if (IS_NAKED (arm_current_func_type ()))
22815     return "";
22816 
22817   offsets = arm_get_frame_offsets ();
22818   live_regs_mask = offsets->saved_regs_mask;
22819   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22820 
22821   /* If we can deduce the registers used from the function's return value.
22822      This is more reliable that examining df_regs_ever_live_p () because that
22823      will be set if the register is ever used in the function, not just if
22824      the register is used to hold a return value.  */
22825   size = arm_size_return_regs ();
22826 
22827   extra_pop = thumb1_extra_regs_pushed (offsets, false);
22828   if (extra_pop > 0)
22829     {
22830       unsigned long extra_mask = (1 << extra_pop) - 1;
22831       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
22832     }
22833 
22834   /* The prolog may have pushed some high registers to use as
22835      work registers.  e.g. the testsuite file:
22836      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
22837      compiles to produce:
22838 	push	{r4, r5, r6, r7, lr}
22839 	mov	r7, r9
22840 	mov	r6, r8
22841 	push	{r6, r7}
22842      as part of the prolog.  We have to undo that pushing here.  */
22843 
22844   if (high_regs_pushed)
22845     {
22846       unsigned long mask = live_regs_mask & 0xff;
22847       int next_hi_reg;
22848 
22849       /* The available low registers depend on the size of the value we are
22850          returning.  */
22851       if (size <= 12)
22852 	mask |=  1 << 3;
22853       if (size <= 8)
22854 	mask |= 1 << 2;
22855 
22856       if (mask == 0)
22857 	/* Oh dear!  We have no low registers into which we can pop
22858            high registers!  */
22859 	internal_error
22860 	  ("no low registers available for popping high registers");
22861 
22862       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
22863 	if (live_regs_mask & (1 << next_hi_reg))
22864 	  break;
22865 
22866       while (high_regs_pushed)
22867 	{
22868 	  /* Find lo register(s) into which the high register(s) can
22869              be popped.  */
22870 	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22871 	    {
22872 	      if (mask & (1 << regno))
22873 		high_regs_pushed--;
22874 	      if (high_regs_pushed == 0)
22875 		break;
22876 	    }
22877 
22878 	  mask &= (2 << regno) - 1;	/* A noop if regno == 8 */
22879 
22880 	  /* Pop the values into the low register(s).  */
22881 	  thumb_pop (asm_out_file, mask);
22882 
22883 	  /* Move the value(s) into the high registers.  */
22884 	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22885 	    {
22886 	      if (mask & (1 << regno))
22887 		{
22888 		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
22889 			       regno);
22890 
22891 		  for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
22892 		    if (live_regs_mask & (1 << next_hi_reg))
22893 		      break;
22894 		}
22895 	    }
22896 	}
22897       live_regs_mask &= ~0x0f00;
22898     }
22899 
22900   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
22901   live_regs_mask &= 0xff;
22902 
22903   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
22904     {
22905       /* Pop the return address into the PC.  */
22906       if (had_to_push_lr)
22907 	live_regs_mask |= 1 << PC_REGNUM;
22908 
22909       /* Either no argument registers were pushed or a backtrace
22910 	 structure was created which includes an adjusted stack
22911 	 pointer, so just pop everything.  */
22912       if (live_regs_mask)
22913 	thumb_pop (asm_out_file, live_regs_mask);
22914 
22915       /* We have either just popped the return address into the
22916 	 PC or it is was kept in LR for the entire function.
22917 	 Note that thumb_pop has already called thumb_exit if the
22918 	 PC was in the list.  */
22919       if (!had_to_push_lr)
22920 	thumb_exit (asm_out_file, LR_REGNUM);
22921     }
22922   else
22923     {
22924       /* Pop everything but the return address.  */
22925       if (live_regs_mask)
22926 	thumb_pop (asm_out_file, live_regs_mask);
22927 
22928       if (had_to_push_lr)
22929 	{
22930 	  if (size > 12)
22931 	    {
22932 	      /* We have no free low regs, so save one.  */
22933 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
22934 			   LAST_ARG_REGNUM);
22935 	    }
22936 
22937 	  /* Get the return address into a temporary register.  */
22938 	  thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
22939 
22940 	  if (size > 12)
22941 	    {
22942 	      /* Move the return address to lr.  */
22943 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
22944 			   LAST_ARG_REGNUM);
22945 	      /* Restore the low register.  */
22946 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
22947 			   IP_REGNUM);
22948 	      regno = LR_REGNUM;
22949 	    }
22950 	  else
22951 	    regno = LAST_ARG_REGNUM;
22952 	}
22953       else
22954 	regno = LR_REGNUM;
22955 
22956       /* Remove the argument registers that were pushed onto the stack.  */
22957       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22958 		   SP_REGNUM, SP_REGNUM,
22959 		   crtl->args.pretend_args_size);
22960 
22961       thumb_exit (asm_out_file, regno);
22962     }
22963 
22964   return "";
22965 }
22966 
22967 /* Functions to save and restore machine-specific function data.  */
22968 static struct machine_function *
22969 arm_init_machine_status (void)
22970 {
22971   struct machine_function *machine;
22972   machine = ggc_alloc_cleared_machine_function ();
22973 
22974 #if ARM_FT_UNKNOWN != 0
22975   machine->func_type = ARM_FT_UNKNOWN;
22976 #endif
22977   return machine;
22978 }
22979 
22980 /* Return an RTX indicating where the return address to the
22981    calling function can be found.  */
22982 rtx
22983 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22984 {
22985   if (count != 0)
22986     return NULL_RTX;
22987 
22988   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22989 }
22990 
22991 /* Do anything needed before RTL is emitted for each function.  */
22992 void
22993 arm_init_expanders (void)
22994 {
22995   /* Arrange to initialize and mark the machine per-function status.  */
22996   init_machine_status = arm_init_machine_status;
22997 
22998   /* This is to stop the combine pass optimizing away the alignment
22999      adjustment of va_arg.  */
23000   /* ??? It is claimed that this should not be necessary.  */
23001   if (cfun)
23002     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
23003 }
23004 
23005 
23006 /* Like arm_compute_initial_elimination offset.  Simpler because there
23007    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
23008    to point at the base of the local variables after static stack
23009    space for a function has been allocated.  */
23010 
23011 HOST_WIDE_INT
23012 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23013 {
23014   arm_stack_offsets *offsets;
23015 
23016   offsets = arm_get_frame_offsets ();
23017 
23018   switch (from)
23019     {
23020     case ARG_POINTER_REGNUM:
23021       switch (to)
23022 	{
23023 	case STACK_POINTER_REGNUM:
23024 	  return offsets->outgoing_args - offsets->saved_args;
23025 
23026 	case FRAME_POINTER_REGNUM:
23027 	  return offsets->soft_frame - offsets->saved_args;
23028 
23029 	case ARM_HARD_FRAME_POINTER_REGNUM:
23030 	  return offsets->saved_regs - offsets->saved_args;
23031 
23032 	case THUMB_HARD_FRAME_POINTER_REGNUM:
23033 	  return offsets->locals_base - offsets->saved_args;
23034 
23035 	default:
23036 	  gcc_unreachable ();
23037 	}
23038       break;
23039 
23040     case FRAME_POINTER_REGNUM:
23041       switch (to)
23042 	{
23043 	case STACK_POINTER_REGNUM:
23044 	  return offsets->outgoing_args - offsets->soft_frame;
23045 
23046 	case ARM_HARD_FRAME_POINTER_REGNUM:
23047 	  return offsets->saved_regs - offsets->soft_frame;
23048 
23049 	case THUMB_HARD_FRAME_POINTER_REGNUM:
23050 	  return offsets->locals_base - offsets->soft_frame;
23051 
23052 	default:
23053 	  gcc_unreachable ();
23054 	}
23055       break;
23056 
23057     default:
23058       gcc_unreachable ();
23059     }
23060 }
23061 
23062 /* Generate the function's prologue.  */
23063 
23064 void
23065 thumb1_expand_prologue (void)
23066 {
23067   rtx insn;
23068 
23069   HOST_WIDE_INT amount;
23070   arm_stack_offsets *offsets;
23071   unsigned long func_type;
23072   int regno;
23073   unsigned long live_regs_mask;
23074   unsigned long l_mask;
23075   unsigned high_regs_pushed = 0;
23076 
23077   func_type = arm_current_func_type ();
23078 
23079   /* Naked functions don't have prologues.  */
23080   if (IS_NAKED (func_type))
23081     return;
23082 
23083   if (IS_INTERRUPT (func_type))
23084     {
23085       error ("interrupt Service Routines cannot be coded in Thumb mode");
23086       return;
23087     }
23088 
23089   if (is_called_in_ARM_mode (current_function_decl))
23090     emit_insn (gen_prologue_thumb1_interwork ());
23091 
23092   offsets = arm_get_frame_offsets ();
23093   live_regs_mask = offsets->saved_regs_mask;
23094 
23095   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
23096   l_mask = live_regs_mask & 0x40ff;
23097   /* Then count how many other high registers will need to be pushed.  */
23098   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23099 
23100   if (crtl->args.pretend_args_size)
23101     {
23102       rtx x = GEN_INT (-crtl->args.pretend_args_size);
23103 
23104       if (cfun->machine->uses_anonymous_args)
23105 	{
23106 	  int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
23107 	  unsigned long mask;
23108 
23109 	  mask = 1ul << (LAST_ARG_REGNUM + 1);
23110 	  mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
23111 
23112 	  insn = thumb1_emit_multi_reg_push (mask, 0);
23113 	}
23114       else
23115 	{
23116 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23117 					stack_pointer_rtx, x));
23118 	}
23119       RTX_FRAME_RELATED_P (insn) = 1;
23120     }
23121 
23122   if (TARGET_BACKTRACE)
23123     {
23124       HOST_WIDE_INT offset = 0;
23125       unsigned work_register;
23126       rtx work_reg, x, arm_hfp_rtx;
23127 
23128       /* We have been asked to create a stack backtrace structure.
23129          The code looks like this:
23130 
23131 	 0   .align 2
23132 	 0   func:
23133          0     sub   SP, #16         Reserve space for 4 registers.
23134 	 2     push  {R7}            Push low registers.
23135          4     add   R7, SP, #20     Get the stack pointer before the push.
23136          6     str   R7, [SP, #8]    Store the stack pointer
23137 					(before reserving the space).
23138          8     mov   R7, PC          Get hold of the start of this code + 12.
23139         10     str   R7, [SP, #16]   Store it.
23140         12     mov   R7, FP          Get hold of the current frame pointer.
23141         14     str   R7, [SP, #4]    Store it.
23142         16     mov   R7, LR          Get hold of the current return address.
23143         18     str   R7, [SP, #12]   Store it.
23144         20     add   R7, SP, #16     Point at the start of the
23145 					backtrace structure.
23146         22     mov   FP, R7          Put this value into the frame pointer.  */
23147 
23148       work_register = thumb_find_work_register (live_regs_mask);
23149       work_reg = gen_rtx_REG (SImode, work_register);
23150       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
23151 
23152       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23153 				    stack_pointer_rtx, GEN_INT (-16)));
23154       RTX_FRAME_RELATED_P (insn) = 1;
23155 
23156       if (l_mask)
23157 	{
23158 	  insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
23159 	  RTX_FRAME_RELATED_P (insn) = 1;
23160 
23161 	  offset = bit_count (l_mask) * UNITS_PER_WORD;
23162 	}
23163 
23164       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
23165       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23166 
23167       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
23168       x = gen_frame_mem (SImode, x);
23169       emit_move_insn (x, work_reg);
23170 
23171       /* Make sure that the instruction fetching the PC is in the right place
23172 	 to calculate "start of backtrace creation code + 12".  */
23173       /* ??? The stores using the common WORK_REG ought to be enough to
23174 	 prevent the scheduler from doing anything weird.  Failing that
23175 	 we could always move all of the following into an UNSPEC_VOLATILE.  */
23176       if (l_mask)
23177 	{
23178 	  x = gen_rtx_REG (SImode, PC_REGNUM);
23179 	  emit_move_insn (work_reg, x);
23180 
23181 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23182 	  x = gen_frame_mem (SImode, x);
23183 	  emit_move_insn (x, work_reg);
23184 
23185 	  emit_move_insn (work_reg, arm_hfp_rtx);
23186 
23187 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
23188 	  x = gen_frame_mem (SImode, x);
23189 	  emit_move_insn (x, work_reg);
23190 	}
23191       else
23192 	{
23193 	  emit_move_insn (work_reg, arm_hfp_rtx);
23194 
23195 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
23196 	  x = gen_frame_mem (SImode, x);
23197 	  emit_move_insn (x, work_reg);
23198 
23199 	  x = gen_rtx_REG (SImode, PC_REGNUM);
23200 	  emit_move_insn (work_reg, x);
23201 
23202 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
23203 	  x = gen_frame_mem (SImode, x);
23204 	  emit_move_insn (x, work_reg);
23205 	}
23206 
23207       x = gen_rtx_REG (SImode, LR_REGNUM);
23208       emit_move_insn (work_reg, x);
23209 
23210       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
23211       x = gen_frame_mem (SImode, x);
23212       emit_move_insn (x, work_reg);
23213 
23214       x = GEN_INT (offset + 12);
23215       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
23216 
23217       emit_move_insn (arm_hfp_rtx, work_reg);
23218     }
23219   /* Optimization:  If we are not pushing any low registers but we are going
23220      to push some high registers then delay our first push.  This will just
23221      be a push of LR and we can combine it with the push of the first high
23222      register.  */
23223   else if ((l_mask & 0xff) != 0
23224 	   || (high_regs_pushed == 0 && l_mask))
23225     {
23226       unsigned long mask = l_mask;
23227       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
23228       insn = thumb1_emit_multi_reg_push (mask, mask);
23229       RTX_FRAME_RELATED_P (insn) = 1;
23230     }
23231 
23232   if (high_regs_pushed)
23233     {
23234       unsigned pushable_regs;
23235       unsigned next_hi_reg;
23236       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
23237 						 : crtl->args.info.nregs;
23238       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
23239 
23240       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
23241 	if (live_regs_mask & (1 << next_hi_reg))
23242 	  break;
23243 
23244       /* Here we need to mask out registers used for passing arguments
23245 	 even if they can be pushed.  This is to avoid using them to stash the high
23246 	 registers.  Such kind of stash may clobber the use of arguments.  */
23247       pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
23248 
23249       if (pushable_regs == 0)
23250 	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
23251 
23252       while (high_regs_pushed > 0)
23253 	{
23254 	  unsigned long real_regs_mask = 0;
23255 
23256 	  for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
23257 	    {
23258 	      if (pushable_regs & (1 << regno))
23259 		{
23260 		  emit_move_insn (gen_rtx_REG (SImode, regno),
23261 				  gen_rtx_REG (SImode, next_hi_reg));
23262 
23263 		  high_regs_pushed --;
23264 		  real_regs_mask |= (1 << next_hi_reg);
23265 
23266 		  if (high_regs_pushed)
23267 		    {
23268 		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
23269 			   next_hi_reg --)
23270 			if (live_regs_mask & (1 << next_hi_reg))
23271 			  break;
23272 		    }
23273 		  else
23274 		    {
23275 		      pushable_regs &= ~((1 << regno) - 1);
23276 		      break;
23277 		    }
23278 		}
23279 	    }
23280 
23281 	  /* If we had to find a work register and we have not yet
23282 	     saved the LR then add it to the list of regs to push.  */
23283 	  if (l_mask == (1 << LR_REGNUM))
23284 	    {
23285 	      pushable_regs |= l_mask;
23286 	      real_regs_mask |= l_mask;
23287 	      l_mask = 0;
23288 	    }
23289 
23290 	  insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
23291 	  RTX_FRAME_RELATED_P (insn) = 1;
23292 	}
23293     }
23294 
23295   /* Load the pic register before setting the frame pointer,
23296      so we can use r7 as a temporary work register.  */
23297   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23298     arm_load_pic_register (live_regs_mask);
23299 
23300   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
23301     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
23302 		    stack_pointer_rtx);
23303 
23304   if (flag_stack_usage_info)
23305     current_function_static_stack_size
23306       = offsets->outgoing_args - offsets->saved_args;
23307 
23308   amount = offsets->outgoing_args - offsets->saved_regs;
23309   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
23310   if (amount)
23311     {
23312       if (amount < 512)
23313 	{
23314 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23315 					GEN_INT (- amount)));
23316 	  RTX_FRAME_RELATED_P (insn) = 1;
23317 	}
23318       else
23319 	{
23320 	  rtx reg, dwarf;
23321 
23322 	  /* The stack decrement is too big for an immediate value in a single
23323 	     insn.  In theory we could issue multiple subtracts, but after
23324 	     three of them it becomes more space efficient to place the full
23325 	     value in the constant pool and load into a register.  (Also the
23326 	     ARM debugger really likes to see only one stack decrement per
23327 	     function).  So instead we look for a scratch register into which
23328 	     we can load the decrement, and then we subtract this from the
23329 	     stack pointer.  Unfortunately on the thumb the only available
23330 	     scratch registers are the argument registers, and we cannot use
23331 	     these as they may hold arguments to the function.  Instead we
23332 	     attempt to locate a call preserved register which is used by this
23333 	     function.  If we can find one, then we know that it will have
23334 	     been pushed at the start of the prologue and so we can corrupt
23335 	     it now.  */
23336 	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
23337 	    if (live_regs_mask & (1 << regno))
23338 	      break;
23339 
23340 	  gcc_assert(regno <= LAST_LO_REGNUM);
23341 
23342 	  reg = gen_rtx_REG (SImode, regno);
23343 
23344 	  emit_insn (gen_movsi (reg, GEN_INT (- amount)));
23345 
23346 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
23347 					stack_pointer_rtx, reg));
23348 
23349 	  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
23350 			       plus_constant (Pmode, stack_pointer_rtx,
23351 					      -amount));
23352 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23353 	  RTX_FRAME_RELATED_P (insn) = 1;
23354 	}
23355     }
23356 
23357   if (frame_pointer_needed)
23358     thumb_set_frame_pointer (offsets);
23359 
23360   /* If we are profiling, make sure no instructions are scheduled before
23361      the call to mcount.  Similarly if the user has requested no
23362      scheduling in the prolog.  Similarly if we want non-call exceptions
23363      using the EABI unwinder, to prevent faulting instructions from being
23364      swapped with a stack adjustment.  */
23365   if (crtl->profile || !TARGET_SCHED_PROLOG
23366       || (arm_except_unwind_info (&global_options) == UI_TARGET
23367 	  && cfun->can_throw_non_call_exceptions))
23368     emit_insn (gen_blockage ());
23369 
23370   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
23371   if (live_regs_mask & 0xff)
23372     cfun->machine->lr_save_eliminated = 0;
23373 }
23374 
23375 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
23376    POP instruction can be generated.  LR should be replaced by PC.  All
23377    the checks required are already done by  USE_RETURN_INSN ().  Hence,
23378    all we really need to check here is if single register is to be
23379    returned, or multiple register return.  */
23380 void
23381 thumb2_expand_return (void)
23382 {
23383   int i, num_regs;
23384   unsigned long saved_regs_mask;
23385   arm_stack_offsets *offsets;
23386 
23387   offsets = arm_get_frame_offsets ();
23388   saved_regs_mask = offsets->saved_regs_mask;
23389 
23390   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
23391     if (saved_regs_mask & (1 << i))
23392       num_regs++;
23393 
23394   if (saved_regs_mask)
23395     {
23396       if (num_regs == 1)
23397         {
23398           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
23399           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
23400           rtx addr = gen_rtx_MEM (SImode,
23401                                   gen_rtx_POST_INC (SImode,
23402                                                     stack_pointer_rtx));
23403           set_mem_alias_set (addr, get_frame_alias_set ());
23404           XVECEXP (par, 0, 0) = ret_rtx;
23405           XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
23406           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
23407           emit_jump_insn (par);
23408         }
23409       else
23410         {
23411           saved_regs_mask &= ~ (1 << LR_REGNUM);
23412           saved_regs_mask |=   (1 << PC_REGNUM);
23413           arm_emit_multi_reg_pop (saved_regs_mask);
23414         }
23415     }
23416   else
23417     {
23418       emit_jump_insn (simple_return_rtx);
23419     }
23420 }
23421 
23422 void
23423 thumb1_expand_epilogue (void)
23424 {
23425   HOST_WIDE_INT amount;
23426   arm_stack_offsets *offsets;
23427   int regno;
23428 
23429   /* Naked functions don't have prologues.  */
23430   if (IS_NAKED (arm_current_func_type ()))
23431     return;
23432 
23433   offsets = arm_get_frame_offsets ();
23434   amount = offsets->outgoing_args - offsets->saved_regs;
23435 
23436   if (frame_pointer_needed)
23437     {
23438       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
23439       amount = offsets->locals_base - offsets->saved_regs;
23440     }
23441   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
23442 
23443   gcc_assert (amount >= 0);
23444   if (amount)
23445     {
23446       emit_insn (gen_blockage ());
23447 
23448       if (amount < 512)
23449 	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23450 			       GEN_INT (amount)));
23451       else
23452 	{
23453 	  /* r3 is always free in the epilogue.  */
23454 	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
23455 
23456 	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
23457 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
23458 	}
23459     }
23460 
23461   /* Emit a USE (stack_pointer_rtx), so that
23462      the stack adjustment will not be deleted.  */
23463   emit_insn (gen_force_register_use (stack_pointer_rtx));
23464 
23465   if (crtl->profile || !TARGET_SCHED_PROLOG)
23466     emit_insn (gen_blockage ());
23467 
23468   /* Emit a clobber for each insn that will be restored in the epilogue,
23469      so that flow2 will get register lifetimes correct.  */
23470   for (regno = 0; regno < 13; regno++)
23471     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
23472       emit_clobber (gen_rtx_REG (SImode, regno));
23473 
23474   if (! df_regs_ever_live_p (LR_REGNUM))
23475     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
23476 }
23477 
23478 /* Epilogue code for APCS frame.  */
23479 static void
23480 arm_expand_epilogue_apcs_frame (bool really_return)
23481 {
23482   unsigned long func_type;
23483   unsigned long saved_regs_mask;
23484   int num_regs = 0;
23485   int i;
23486   int floats_from_frame = 0;
23487   arm_stack_offsets *offsets;
23488 
23489   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
23490   func_type = arm_current_func_type ();
23491 
23492   /* Get frame offsets for ARM.  */
23493   offsets = arm_get_frame_offsets ();
23494   saved_regs_mask = offsets->saved_regs_mask;
23495 
23496   /* Find the offset of the floating-point save area in the frame.  */
23497   floats_from_frame = offsets->saved_args - offsets->frame;
23498 
23499   /* Compute how many core registers saved and how far away the floats are.  */
23500   for (i = 0; i <= LAST_ARM_REGNUM; i++)
23501     if (saved_regs_mask & (1 << i))
23502       {
23503         num_regs++;
23504         floats_from_frame += 4;
23505       }
23506 
23507   if (TARGET_HARD_FLOAT && TARGET_VFP)
23508     {
23509       int start_reg;
23510 
23511       /* The offset is from IP_REGNUM.  */
23512       int saved_size = arm_get_vfp_saved_size ();
23513       if (saved_size > 0)
23514         {
23515           floats_from_frame += saved_size;
23516           emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
23517                                  hard_frame_pointer_rtx,
23518                                  GEN_INT (-floats_from_frame)));
23519         }
23520 
23521       /* Generate VFP register multi-pop.  */
23522       start_reg = FIRST_VFP_REGNUM;
23523 
23524       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
23525         /* Look for a case where a reg does not need restoring.  */
23526         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
23527             && (!df_regs_ever_live_p (i + 1)
23528                 || call_used_regs[i + 1]))
23529           {
23530             if (start_reg != i)
23531               arm_emit_vfp_multi_reg_pop (start_reg,
23532                                           (i - start_reg) / 2,
23533                                           gen_rtx_REG (SImode,
23534                                                        IP_REGNUM));
23535             start_reg = i + 2;
23536           }
23537 
23538       /* Restore the remaining regs that we have discovered (or possibly
23539          even all of them, if the conditional in the for loop never
23540          fired).  */
23541       if (start_reg != i)
23542         arm_emit_vfp_multi_reg_pop (start_reg,
23543                                     (i - start_reg) / 2,
23544                                     gen_rtx_REG (SImode, IP_REGNUM));
23545     }
23546 
23547   if (TARGET_IWMMXT)
23548     {
23549       /* The frame pointer is guaranteed to be non-double-word aligned, as
23550          it is set to double-word-aligned old_stack_pointer - 4.  */
23551       rtx insn;
23552       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
23553 
23554       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
23555         if (df_regs_ever_live_p (i) && !call_used_regs[i])
23556           {
23557             rtx addr = gen_frame_mem (V2SImode,
23558                                  plus_constant (Pmode, hard_frame_pointer_rtx,
23559                                                 - lrm_count * 4));
23560             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
23561             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23562                                                gen_rtx_REG (V2SImode, i),
23563                                                NULL_RTX);
23564             lrm_count += 2;
23565           }
23566     }
23567 
23568   /* saved_regs_mask should contain IP which contains old stack pointer
23569      at the time of activation creation.  Since SP and IP are adjacent registers,
23570      we can restore the value directly into SP.  */
23571   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
23572   saved_regs_mask &= ~(1 << IP_REGNUM);
23573   saved_regs_mask |= (1 << SP_REGNUM);
23574 
23575   /* There are two registers left in saved_regs_mask - LR and PC.  We
23576      only need to restore LR (the return address), but to
23577      save time we can load it directly into PC, unless we need a
23578      special function exit sequence, or we are not really returning.  */
23579   if (really_return
23580       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
23581       && !crtl->calls_eh_return)
23582     /* Delete LR from the register mask, so that LR on
23583        the stack is loaded into the PC in the register mask.  */
23584     saved_regs_mask &= ~(1 << LR_REGNUM);
23585   else
23586     saved_regs_mask &= ~(1 << PC_REGNUM);
23587 
23588   num_regs = bit_count (saved_regs_mask);
23589   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
23590     {
23591       emit_insn (gen_blockage ());
23592       /* Unwind the stack to just below the saved registers.  */
23593       emit_insn (gen_addsi3 (stack_pointer_rtx,
23594                              hard_frame_pointer_rtx,
23595                              GEN_INT (- 4 * num_regs)));
23596     }
23597 
23598   arm_emit_multi_reg_pop (saved_regs_mask);
23599 
23600   if (IS_INTERRUPT (func_type))
23601     {
23602       /* Interrupt handlers will have pushed the
23603          IP onto the stack, so restore it now.  */
23604       rtx insn;
23605       rtx addr = gen_rtx_MEM (SImode,
23606                               gen_rtx_POST_INC (SImode,
23607                               stack_pointer_rtx));
23608       set_mem_alias_set (addr, get_frame_alias_set ());
23609       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
23610       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23611                                          gen_rtx_REG (SImode, IP_REGNUM),
23612                                          NULL_RTX);
23613     }
23614 
23615   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
23616     return;
23617 
23618   if (crtl->calls_eh_return)
23619     emit_insn (gen_addsi3 (stack_pointer_rtx,
23620 			   stack_pointer_rtx,
23621 			   gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
23622 
23623   if (IS_STACKALIGN (func_type))
23624     /* Restore the original stack pointer.  Before prologue, the stack was
23625        realigned and the original stack pointer saved in r0.  For details,
23626        see comment in arm_expand_prologue.  */
23627     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23628 
23629   emit_jump_insn (simple_return_rtx);
23630 }
23631 
23632 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
23633    function is not a sibcall.  */
23634 void
23635 arm_expand_epilogue (bool really_return)
23636 {
23637   unsigned long func_type;
23638   unsigned long saved_regs_mask;
23639   int num_regs = 0;
23640   int i;
23641   int amount;
23642   arm_stack_offsets *offsets;
23643 
23644   func_type = arm_current_func_type ();
23645 
23646   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
23647      let output_return_instruction take care of instruction emition if any.  */
23648   if (IS_NAKED (func_type)
23649       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
23650     {
23651       if (really_return)
23652         emit_jump_insn (simple_return_rtx);
23653       return;
23654     }
23655 
23656   /* If we are throwing an exception, then we really must be doing a
23657      return, so we can't tail-call.  */
23658   gcc_assert (!crtl->calls_eh_return || really_return);
23659 
23660   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23661     {
23662       arm_expand_epilogue_apcs_frame (really_return);
23663       return;
23664     }
23665 
23666   /* Get frame offsets for ARM.  */
23667   offsets = arm_get_frame_offsets ();
23668   saved_regs_mask = offsets->saved_regs_mask;
23669   num_regs = bit_count (saved_regs_mask);
23670 
23671   if (frame_pointer_needed)
23672     {
23673       /* Restore stack pointer if necessary.  */
23674       if (TARGET_ARM)
23675         {
23676           /* In ARM mode, frame pointer points to first saved register.
23677              Restore stack pointer to last saved register.  */
23678           amount = offsets->frame - offsets->saved_regs;
23679 
23680           /* Force out any pending memory operations that reference stacked data
23681              before stack de-allocation occurs.  */
23682           emit_insn (gen_blockage ());
23683           emit_insn (gen_addsi3 (stack_pointer_rtx,
23684                                  hard_frame_pointer_rtx,
23685                                  GEN_INT (amount)));
23686 
23687           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23688              deleted.  */
23689           emit_insn (gen_force_register_use (stack_pointer_rtx));
23690         }
23691       else
23692         {
23693           /* In Thumb-2 mode, the frame pointer points to the last saved
23694              register.  */
23695           amount = offsets->locals_base - offsets->saved_regs;
23696           if (amount)
23697             emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23698                                    hard_frame_pointer_rtx,
23699                                    GEN_INT (amount)));
23700 
23701           /* Force out any pending memory operations that reference stacked data
23702              before stack de-allocation occurs.  */
23703           emit_insn (gen_blockage ());
23704           emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
23705           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23706              deleted.  */
23707           emit_insn (gen_force_register_use (stack_pointer_rtx));
23708         }
23709     }
23710   else
23711     {
23712       /* Pop off outgoing args and local frame to adjust stack pointer to
23713          last saved register.  */
23714       amount = offsets->outgoing_args - offsets->saved_regs;
23715       if (amount)
23716         {
23717           /* Force out any pending memory operations that reference stacked data
23718              before stack de-allocation occurs.  */
23719           emit_insn (gen_blockage ());
23720           emit_insn (gen_addsi3 (stack_pointer_rtx,
23721                                  stack_pointer_rtx,
23722                                  GEN_INT (amount)));
23723           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
23724              not deleted.  */
23725           emit_insn (gen_force_register_use (stack_pointer_rtx));
23726         }
23727     }
23728 
23729   if (TARGET_HARD_FLOAT && TARGET_VFP)
23730     {
23731       /* Generate VFP register multi-pop.  */
23732       int end_reg = LAST_VFP_REGNUM + 1;
23733 
23734       /* Scan the registers in reverse order.  We need to match
23735          any groupings made in the prologue and generate matching
23736          vldm operations.  The need to match groups is because,
23737          unlike pop, vldm can only do consecutive regs.  */
23738       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
23739         /* Look for a case where a reg does not need restoring.  */
23740         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
23741             && (!df_regs_ever_live_p (i + 1)
23742                 || call_used_regs[i + 1]))
23743           {
23744             /* Restore the regs discovered so far (from reg+2 to
23745                end_reg).  */
23746             if (end_reg > i + 2)
23747               arm_emit_vfp_multi_reg_pop (i + 2,
23748                                           (end_reg - (i + 2)) / 2,
23749                                           stack_pointer_rtx);
23750             end_reg = i;
23751           }
23752 
23753       /* Restore the remaining regs that we have discovered (or possibly
23754          even all of them, if the conditional in the for loop never
23755          fired).  */
23756       if (end_reg > i + 2)
23757         arm_emit_vfp_multi_reg_pop (i + 2,
23758                                     (end_reg - (i + 2)) / 2,
23759                                     stack_pointer_rtx);
23760     }
23761 
23762   if (TARGET_IWMMXT)
23763     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
23764       if (df_regs_ever_live_p (i) && !call_used_regs[i])
23765         {
23766           rtx insn;
23767           rtx addr = gen_rtx_MEM (V2SImode,
23768                                   gen_rtx_POST_INC (SImode,
23769                                                     stack_pointer_rtx));
23770           set_mem_alias_set (addr, get_frame_alias_set ());
23771           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
23772           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23773                                              gen_rtx_REG (V2SImode, i),
23774                                              NULL_RTX);
23775         }
23776 
23777   if (saved_regs_mask)
23778     {
23779       rtx insn;
23780       bool return_in_pc = false;
23781 
23782       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
23783           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
23784           && !IS_STACKALIGN (func_type)
23785           && really_return
23786           && crtl->args.pretend_args_size == 0
23787           && saved_regs_mask & (1 << LR_REGNUM)
23788           && !crtl->calls_eh_return)
23789         {
23790           saved_regs_mask &= ~(1 << LR_REGNUM);
23791           saved_regs_mask |= (1 << PC_REGNUM);
23792           return_in_pc = true;
23793         }
23794 
23795       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
23796         {
23797           for (i = 0; i <= LAST_ARM_REGNUM; i++)
23798             if (saved_regs_mask & (1 << i))
23799               {
23800                 rtx addr = gen_rtx_MEM (SImode,
23801                                         gen_rtx_POST_INC (SImode,
23802                                                           stack_pointer_rtx));
23803                 set_mem_alias_set (addr, get_frame_alias_set ());
23804 
23805                 if (i == PC_REGNUM)
23806                   {
23807                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
23808                     XVECEXP (insn, 0, 0) = ret_rtx;
23809                     XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
23810                                                         gen_rtx_REG (SImode, i),
23811                                                         addr);
23812                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
23813                     insn = emit_jump_insn (insn);
23814                   }
23815                 else
23816                   {
23817                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
23818                                                  addr));
23819                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23820                                                        gen_rtx_REG (SImode, i),
23821                                                        NULL_RTX);
23822                   }
23823               }
23824         }
23825       else
23826         {
23827           if (TARGET_LDRD
23828 	      && current_tune->prefer_ldrd_strd
23829               && !optimize_function_for_size_p (cfun))
23830             {
23831               if (TARGET_THUMB2)
23832                 thumb2_emit_ldrd_pop (saved_regs_mask);
23833               else
23834                 arm_emit_multi_reg_pop (saved_regs_mask);
23835             }
23836           else
23837             arm_emit_multi_reg_pop (saved_regs_mask);
23838         }
23839 
23840       if (return_in_pc == true)
23841         return;
23842     }
23843 
23844   if (crtl->args.pretend_args_size)
23845     emit_insn (gen_addsi3 (stack_pointer_rtx,
23846                            stack_pointer_rtx,
23847                            GEN_INT (crtl->args.pretend_args_size)));
23848 
23849   if (!really_return)
23850     return;
23851 
23852   if (crtl->calls_eh_return)
23853     emit_insn (gen_addsi3 (stack_pointer_rtx,
23854                            stack_pointer_rtx,
23855                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
23856 
23857   if (IS_STACKALIGN (func_type))
23858     /* Restore the original stack pointer.  Before prologue, the stack was
23859        realigned and the original stack pointer saved in r0.  For details,
23860        see comment in arm_expand_prologue.  */
23861     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23862 
23863   emit_jump_insn (simple_return_rtx);
23864 }
23865 
23866 /* Implementation of insn prologue_thumb1_interwork.  This is the first
23867    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
23868 
23869 const char *
23870 thumb1_output_interwork (void)
23871 {
23872   const char * name;
23873   FILE *f = asm_out_file;
23874 
23875   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
23876   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
23877 	      == SYMBOL_REF);
23878   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23879 
23880   /* Generate code sequence to switch us into Thumb mode.  */
23881   /* The .code 32 directive has already been emitted by
23882      ASM_DECLARE_FUNCTION_NAME.  */
23883   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
23884   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
23885 
23886   /* Generate a label, so that the debugger will notice the
23887      change in instruction sets.  This label is also used by
23888      the assembler to bypass the ARM code when this function
23889      is called from a Thumb encoded function elsewhere in the
23890      same file.  Hence the definition of STUB_NAME here must
23891      agree with the definition in gas/config/tc-arm.c.  */
23892 
23893 #define STUB_NAME ".real_start_of"
23894 
23895   fprintf (f, "\t.code\t16\n");
23896 #ifdef ARM_PE
23897   if (arm_dllexport_name_p (name))
23898     name = arm_strip_name_encoding (name);
23899 #endif
23900   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
23901   fprintf (f, "\t.thumb_func\n");
23902   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
23903 
23904   return "";
23905 }
23906 
23907 /* Handle the case of a double word load into a low register from
23908    a computed memory address.  The computed address may involve a
23909    register which is overwritten by the load.  */
23910 const char *
23911 thumb_load_double_from_address (rtx *operands)
23912 {
23913   rtx addr;
23914   rtx base;
23915   rtx offset;
23916   rtx arg1;
23917   rtx arg2;
23918 
23919   gcc_assert (REG_P (operands[0]));
23920   gcc_assert (MEM_P (operands[1]));
23921 
23922   /* Get the memory address.  */
23923   addr = XEXP (operands[1], 0);
23924 
23925   /* Work out how the memory address is computed.  */
23926   switch (GET_CODE (addr))
23927     {
23928     case REG:
23929       operands[2] = adjust_address (operands[1], SImode, 4);
23930 
23931       if (REGNO (operands[0]) == REGNO (addr))
23932 	{
23933 	  output_asm_insn ("ldr\t%H0, %2", operands);
23934 	  output_asm_insn ("ldr\t%0, %1", operands);
23935 	}
23936       else
23937 	{
23938 	  output_asm_insn ("ldr\t%0, %1", operands);
23939 	  output_asm_insn ("ldr\t%H0, %2", operands);
23940 	}
23941       break;
23942 
23943     case CONST:
23944       /* Compute <address> + 4 for the high order load.  */
23945       operands[2] = adjust_address (operands[1], SImode, 4);
23946 
23947       output_asm_insn ("ldr\t%0, %1", operands);
23948       output_asm_insn ("ldr\t%H0, %2", operands);
23949       break;
23950 
23951     case PLUS:
23952       arg1   = XEXP (addr, 0);
23953       arg2   = XEXP (addr, 1);
23954 
23955       if (CONSTANT_P (arg1))
23956 	base = arg2, offset = arg1;
23957       else
23958 	base = arg1, offset = arg2;
23959 
23960       gcc_assert (REG_P (base));
23961 
23962       /* Catch the case of <address> = <reg> + <reg> */
23963       if (REG_P (offset))
23964 	{
23965 	  int reg_offset = REGNO (offset);
23966 	  int reg_base   = REGNO (base);
23967 	  int reg_dest   = REGNO (operands[0]);
23968 
23969 	  /* Add the base and offset registers together into the
23970              higher destination register.  */
23971 	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
23972 		       reg_dest + 1, reg_base, reg_offset);
23973 
23974 	  /* Load the lower destination register from the address in
23975              the higher destination register.  */
23976 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
23977 		       reg_dest, reg_dest + 1);
23978 
23979 	  /* Load the higher destination register from its own address
23980              plus 4.  */
23981 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
23982 		       reg_dest + 1, reg_dest + 1);
23983 	}
23984       else
23985 	{
23986 	  /* Compute <address> + 4 for the high order load.  */
23987 	  operands[2] = adjust_address (operands[1], SImode, 4);
23988 
23989 	  /* If the computed address is held in the low order register
23990 	     then load the high order register first, otherwise always
23991 	     load the low order register first.  */
23992 	  if (REGNO (operands[0]) == REGNO (base))
23993 	    {
23994 	      output_asm_insn ("ldr\t%H0, %2", operands);
23995 	      output_asm_insn ("ldr\t%0, %1", operands);
23996 	    }
23997 	  else
23998 	    {
23999 	      output_asm_insn ("ldr\t%0, %1", operands);
24000 	      output_asm_insn ("ldr\t%H0, %2", operands);
24001 	    }
24002 	}
24003       break;
24004 
24005     case LABEL_REF:
24006       /* With no registers to worry about we can just load the value
24007          directly.  */
24008       operands[2] = adjust_address (operands[1], SImode, 4);
24009 
24010       output_asm_insn ("ldr\t%H0, %2", operands);
24011       output_asm_insn ("ldr\t%0, %1", operands);
24012       break;
24013 
24014     default:
24015       gcc_unreachable ();
24016     }
24017 
24018   return "";
24019 }
24020 
24021 const char *
24022 thumb_output_move_mem_multiple (int n, rtx *operands)
24023 {
24024   rtx tmp;
24025 
24026   switch (n)
24027     {
24028     case 2:
24029       if (REGNO (operands[4]) > REGNO (operands[5]))
24030 	{
24031 	  tmp = operands[4];
24032 	  operands[4] = operands[5];
24033 	  operands[5] = tmp;
24034 	}
24035       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
24036       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
24037       break;
24038 
24039     case 3:
24040       if (REGNO (operands[4]) > REGNO (operands[5]))
24041 	{
24042 	  tmp = operands[4];
24043 	  operands[4] = operands[5];
24044 	  operands[5] = tmp;
24045 	}
24046       if (REGNO (operands[5]) > REGNO (operands[6]))
24047 	{
24048 	  tmp = operands[5];
24049 	  operands[5] = operands[6];
24050 	  operands[6] = tmp;
24051 	}
24052       if (REGNO (operands[4]) > REGNO (operands[5]))
24053 	{
24054 	  tmp = operands[4];
24055 	  operands[4] = operands[5];
24056 	  operands[5] = tmp;
24057 	}
24058 
24059       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
24060       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
24061       break;
24062 
24063     default:
24064       gcc_unreachable ();
24065     }
24066 
24067   return "";
24068 }
24069 
24070 /* Output a call-via instruction for thumb state.  */
24071 const char *
24072 thumb_call_via_reg (rtx reg)
24073 {
24074   int regno = REGNO (reg);
24075   rtx *labelp;
24076 
24077   gcc_assert (regno < LR_REGNUM);
24078 
24079   /* If we are in the normal text section we can use a single instance
24080      per compilation unit.  If we are doing function sections, then we need
24081      an entry per section, since we can't rely on reachability.  */
24082   if (in_section == text_section)
24083     {
24084       thumb_call_reg_needed = 1;
24085 
24086       if (thumb_call_via_label[regno] == NULL)
24087 	thumb_call_via_label[regno] = gen_label_rtx ();
24088       labelp = thumb_call_via_label + regno;
24089     }
24090   else
24091     {
24092       if (cfun->machine->call_via[regno] == NULL)
24093 	cfun->machine->call_via[regno] = gen_label_rtx ();
24094       labelp = cfun->machine->call_via + regno;
24095     }
24096 
24097   output_asm_insn ("bl\t%a0", labelp);
24098   return "";
24099 }
24100 
24101 /* Routines for generating rtl.  */
24102 void
24103 thumb_expand_movmemqi (rtx *operands)
24104 {
24105   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
24106   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
24107   HOST_WIDE_INT len = INTVAL (operands[2]);
24108   HOST_WIDE_INT offset = 0;
24109 
24110   while (len >= 12)
24111     {
24112       emit_insn (gen_movmem12b (out, in, out, in));
24113       len -= 12;
24114     }
24115 
24116   if (len >= 8)
24117     {
24118       emit_insn (gen_movmem8b (out, in, out, in));
24119       len -= 8;
24120     }
24121 
24122   if (len >= 4)
24123     {
24124       rtx reg = gen_reg_rtx (SImode);
24125       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
24126       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
24127       len -= 4;
24128       offset += 4;
24129     }
24130 
24131   if (len >= 2)
24132     {
24133       rtx reg = gen_reg_rtx (HImode);
24134       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
24135 					      plus_constant (Pmode, in,
24136 							     offset))));
24137       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
24138 								offset)),
24139 			    reg));
24140       len -= 2;
24141       offset += 2;
24142     }
24143 
24144   if (len)
24145     {
24146       rtx reg = gen_reg_rtx (QImode);
24147       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
24148 					      plus_constant (Pmode, in,
24149 							     offset))));
24150       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
24151 								offset)),
24152 			    reg));
24153     }
24154 }
24155 
24156 void
24157 thumb_reload_out_hi (rtx *operands)
24158 {
24159   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
24160 }
24161 
24162 /* Handle reading a half-word from memory during reload.  */
24163 void
24164 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
24165 {
24166   gcc_unreachable ();
24167 }
24168 
24169 /* Return the length of a function name prefix
24170     that starts with the character 'c'.  */
24171 static int
24172 arm_get_strip_length (int c)
24173 {
24174   switch (c)
24175     {
24176     ARM_NAME_ENCODING_LENGTHS
24177       default: return 0;
24178     }
24179 }
24180 
24181 /* Return a pointer to a function's name with any
24182    and all prefix encodings stripped from it.  */
24183 const char *
24184 arm_strip_name_encoding (const char *name)
24185 {
24186   int skip;
24187 
24188   while ((skip = arm_get_strip_length (* name)))
24189     name += skip;
24190 
24191   return name;
24192 }
24193 
24194 /* If there is a '*' anywhere in the name's prefix, then
24195    emit the stripped name verbatim, otherwise prepend an
24196    underscore if leading underscores are being used.  */
24197 void
24198 arm_asm_output_labelref (FILE *stream, const char *name)
24199 {
24200   int skip;
24201   int verbatim = 0;
24202 
24203   while ((skip = arm_get_strip_length (* name)))
24204     {
24205       verbatim |= (*name == '*');
24206       name += skip;
24207     }
24208 
24209   if (verbatim)
24210     fputs (name, stream);
24211   else
24212     asm_fprintf (stream, "%U%s", name);
24213 }
24214 
24215 /* This function is used to emit an EABI tag and its associated value.
24216    We emit the numerical value of the tag in case the assembler does not
24217    support textual tags.  (Eg gas prior to 2.20).  If requested we include
24218    the tag name in a comment so that anyone reading the assembler output
24219    will know which tag is being set.
24220 
24221    This function is not static because arm-c.c needs it too.  */
24222 
24223 void
24224 arm_emit_eabi_attribute (const char *name, int num, int val)
24225 {
24226   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
24227   if (flag_verbose_asm || flag_debug_asm)
24228     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
24229   asm_fprintf (asm_out_file, "\n");
24230 }
24231 
24232 static void
24233 arm_file_start (void)
24234 {
24235   int val;
24236 
24237   if (TARGET_UNIFIED_ASM)
24238     asm_fprintf (asm_out_file, "\t.syntax unified\n");
24239 
24240   if (TARGET_BPABI)
24241     {
24242       const char *fpu_name;
24243       if (arm_selected_arch)
24244 	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
24245       else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
24246 	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
24247       else
24248 	asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
24249 
24250       if (TARGET_SOFT_FLOAT)
24251 	{
24252 	  fpu_name = "softvfp";
24253 	}
24254       else
24255 	{
24256 	  fpu_name = arm_fpu_desc->name;
24257 	  if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
24258 	    {
24259 	      if (TARGET_HARD_FLOAT)
24260 		arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
24261 	      if (TARGET_HARD_FLOAT_ABI)
24262 		arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
24263 	    }
24264 	}
24265       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
24266 
24267       /* Some of these attributes only apply when the corresponding features
24268          are used.  However we don't have any easy way of figuring this out.
24269 	 Conservatively record the setting that would have been used.  */
24270 
24271       if (flag_rounding_math)
24272 	arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
24273 
24274       if (!flag_unsafe_math_optimizations)
24275 	{
24276 	  arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
24277 	  arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
24278 	}
24279       if (flag_signaling_nans)
24280 	arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
24281 
24282       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
24283 			   flag_finite_math_only ? 1 : 3);
24284 
24285       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
24286       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
24287       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
24288 			       flag_short_enums ? 1 : 2);
24289 
24290       /* Tag_ABI_optimization_goals.  */
24291       if (optimize_size)
24292 	val = 4;
24293       else if (optimize >= 2)
24294 	val = 2;
24295       else if (optimize)
24296 	val = 1;
24297       else
24298 	val = 6;
24299       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
24300 
24301       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
24302 			       unaligned_access);
24303 
24304       if (arm_fp16_format)
24305 	arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
24306 			     (int) arm_fp16_format);
24307 
24308       if (arm_lang_output_object_attributes_hook)
24309 	arm_lang_output_object_attributes_hook();
24310     }
24311 
24312   default_file_start ();
24313 }
24314 
24315 static void
24316 arm_file_end (void)
24317 {
24318   int regno;
24319 
24320   if (NEED_INDICATE_EXEC_STACK)
24321     /* Add .note.GNU-stack.  */
24322     file_end_indicate_exec_stack ();
24323 
24324   if (! thumb_call_reg_needed)
24325     return;
24326 
24327   switch_to_section (text_section);
24328   asm_fprintf (asm_out_file, "\t.code 16\n");
24329   ASM_OUTPUT_ALIGN (asm_out_file, 1);
24330 
24331   for (regno = 0; regno < LR_REGNUM; regno++)
24332     {
24333       rtx label = thumb_call_via_label[regno];
24334 
24335       if (label != 0)
24336 	{
24337 	  targetm.asm_out.internal_label (asm_out_file, "L",
24338 					  CODE_LABEL_NUMBER (label));
24339 	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
24340 	}
24341     }
24342 }
24343 
24344 #ifndef ARM_PE
24345 /* Symbols in the text segment can be accessed without indirecting via the
24346    constant pool; it may take an extra binary operation, but this is still
24347    faster than indirecting via memory.  Don't do this when not optimizing,
24348    since we won't be calculating al of the offsets necessary to do this
24349    simplification.  */
24350 
24351 static void
24352 arm_encode_section_info (tree decl, rtx rtl, int first)
24353 {
24354   if (optimize > 0 && TREE_CONSTANT (decl))
24355     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
24356 
24357   default_encode_section_info (decl, rtl, first);
24358 }
24359 #endif /* !ARM_PE */
24360 
24361 static void
24362 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
24363 {
24364   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
24365       && !strcmp (prefix, "L"))
24366     {
24367       arm_ccfsm_state = 0;
24368       arm_target_insn = NULL;
24369     }
24370   default_internal_label (stream, prefix, labelno);
24371 }
24372 
24373 /* Output code to add DELTA to the first argument, and then jump
24374    to FUNCTION.  Used for C++ multiple inheritance.  */
24375 static void
24376 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
24377 		     HOST_WIDE_INT delta,
24378 		     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
24379 		     tree function)
24380 {
24381   static int thunk_label = 0;
24382   char label[256];
24383   char labelpc[256];
24384   int mi_delta = delta;
24385   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
24386   int shift = 0;
24387   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
24388                     ? 1 : 0);
24389   if (mi_delta < 0)
24390     mi_delta = - mi_delta;
24391 
24392   final_start_function (emit_barrier (), file, 1);
24393 
24394   if (TARGET_THUMB1)
24395     {
24396       int labelno = thunk_label++;
24397       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
24398       /* Thunks are entered in arm mode when avaiable.  */
24399       if (TARGET_THUMB1_ONLY)
24400 	{
24401 	  /* push r3 so we can use it as a temporary.  */
24402 	  /* TODO: Omit this save if r3 is not used.  */
24403 	  fputs ("\tpush {r3}\n", file);
24404 	  fputs ("\tldr\tr3, ", file);
24405 	}
24406       else
24407 	{
24408 	  fputs ("\tldr\tr12, ", file);
24409 	}
24410       assemble_name (file, label);
24411       fputc ('\n', file);
24412       if (flag_pic)
24413 	{
24414 	  /* If we are generating PIC, the ldr instruction below loads
24415 	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
24416 	     the address of the add + 8, so we have:
24417 
24418 	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
24419 	         = target + 1.
24420 
24421 	     Note that we have "+ 1" because some versions of GNU ld
24422 	     don't set the low bit of the result for R_ARM_REL32
24423 	     relocations against thumb function symbols.
24424 	     On ARMv6M this is +4, not +8.  */
24425 	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
24426 	  assemble_name (file, labelpc);
24427 	  fputs (":\n", file);
24428 	  if (TARGET_THUMB1_ONLY)
24429 	    {
24430 	      /* This is 2 insns after the start of the thunk, so we know it
24431 	         is 4-byte aligned.  */
24432 	      fputs ("\tadd\tr3, pc, r3\n", file);
24433 	      fputs ("\tmov r12, r3\n", file);
24434 	    }
24435 	  else
24436 	    fputs ("\tadd\tr12, pc, r12\n", file);
24437 	}
24438       else if (TARGET_THUMB1_ONLY)
24439 	fputs ("\tmov r12, r3\n", file);
24440     }
24441   if (TARGET_THUMB1_ONLY)
24442     {
24443       if (mi_delta > 255)
24444 	{
24445 	  fputs ("\tldr\tr3, ", file);
24446 	  assemble_name (file, label);
24447 	  fputs ("+4\n", file);
24448 	  asm_fprintf (file, "\t%s\t%r, %r, r3\n",
24449 		       mi_op, this_regno, this_regno);
24450 	}
24451       else if (mi_delta != 0)
24452 	{
24453 	  asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
24454 		       mi_op, this_regno, this_regno,
24455 		       mi_delta);
24456 	}
24457     }
24458   else
24459     {
24460       /* TODO: Use movw/movt for large constants when available.  */
24461       while (mi_delta != 0)
24462 	{
24463 	  if ((mi_delta & (3 << shift)) == 0)
24464 	    shift += 2;
24465 	  else
24466 	    {
24467 	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
24468 			   mi_op, this_regno, this_regno,
24469 			   mi_delta & (0xff << shift));
24470 	      mi_delta &= ~(0xff << shift);
24471 	      shift += 8;
24472 	    }
24473 	}
24474     }
24475   if (TARGET_THUMB1)
24476     {
24477       if (TARGET_THUMB1_ONLY)
24478 	fputs ("\tpop\t{r3}\n", file);
24479 
24480       fprintf (file, "\tbx\tr12\n");
24481       ASM_OUTPUT_ALIGN (file, 2);
24482       assemble_name (file, label);
24483       fputs (":\n", file);
24484       if (flag_pic)
24485 	{
24486 	  /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
24487 	  rtx tem = XEXP (DECL_RTL (function), 0);
24488 	  /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
24489 	     pipeline offset is four rather than eight.  Adjust the offset
24490 	     accordingly.  */
24491 	  tem = plus_constant (GET_MODE (tem), tem,
24492 			       TARGET_THUMB1_ONLY ? -3 : -7);
24493 	  tem = gen_rtx_MINUS (GET_MODE (tem),
24494 			       tem,
24495 			       gen_rtx_SYMBOL_REF (Pmode,
24496 						   ggc_strdup (labelpc)));
24497 	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
24498 	}
24499       else
24500 	/* Output ".word .LTHUNKn".  */
24501 	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
24502 
24503       if (TARGET_THUMB1_ONLY && mi_delta > 255)
24504 	assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
24505     }
24506   else
24507     {
24508       fputs ("\tb\t", file);
24509       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
24510       if (NEED_PLT_RELOC)
24511         fputs ("(PLT)", file);
24512       fputc ('\n', file);
24513     }
24514 
24515   final_end_function ();
24516 }
24517 
24518 int
24519 arm_emit_vector_const (FILE *file, rtx x)
24520 {
24521   int i;
24522   const char * pattern;
24523 
24524   gcc_assert (GET_CODE (x) == CONST_VECTOR);
24525 
24526   switch (GET_MODE (x))
24527     {
24528     case V2SImode: pattern = "%08x"; break;
24529     case V4HImode: pattern = "%04x"; break;
24530     case V8QImode: pattern = "%02x"; break;
24531     default:       gcc_unreachable ();
24532     }
24533 
24534   fprintf (file, "0x");
24535   for (i = CONST_VECTOR_NUNITS (x); i--;)
24536     {
24537       rtx element;
24538 
24539       element = CONST_VECTOR_ELT (x, i);
24540       fprintf (file, pattern, INTVAL (element));
24541     }
24542 
24543   return 1;
24544 }
24545 
24546 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
24547    HFmode constant pool entries are actually loaded with ldr.  */
24548 void
24549 arm_emit_fp16_const (rtx c)
24550 {
24551   REAL_VALUE_TYPE r;
24552   long bits;
24553 
24554   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
24555   bits = real_to_target (NULL, &r, HFmode);
24556   if (WORDS_BIG_ENDIAN)
24557     assemble_zeros (2);
24558   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
24559   if (!WORDS_BIG_ENDIAN)
24560     assemble_zeros (2);
24561 }
24562 
24563 const char *
24564 arm_output_load_gr (rtx *operands)
24565 {
24566   rtx reg;
24567   rtx offset;
24568   rtx wcgr;
24569   rtx sum;
24570 
24571   if (!MEM_P (operands [1])
24572       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
24573       || !REG_P (reg = XEXP (sum, 0))
24574       || !CONST_INT_P (offset = XEXP (sum, 1))
24575       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
24576     return "wldrw%?\t%0, %1";
24577 
24578   /* Fix up an out-of-range load of a GR register.  */
24579   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
24580   wcgr = operands[0];
24581   operands[0] = reg;
24582   output_asm_insn ("ldr%?\t%0, %1", operands);
24583 
24584   operands[0] = wcgr;
24585   operands[1] = reg;
24586   output_asm_insn ("tmcr%?\t%0, %1", operands);
24587   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
24588 
24589   return "";
24590 }
24591 
24592 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
24593 
24594    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
24595    named arg and all anonymous args onto the stack.
24596    XXX I know the prologue shouldn't be pushing registers, but it is faster
24597    that way.  */
24598 
24599 static void
24600 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
24601 			    enum machine_mode mode,
24602 			    tree type,
24603 			    int *pretend_size,
24604 			    int second_time ATTRIBUTE_UNUSED)
24605 {
24606   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
24607   int nregs;
24608 
24609   cfun->machine->uses_anonymous_args = 1;
24610   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
24611     {
24612       nregs = pcum->aapcs_ncrn;
24613       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
24614 	nregs++;
24615     }
24616   else
24617     nregs = pcum->nregs;
24618 
24619   if (nregs < NUM_ARG_REGS)
24620     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
24621 }
24622 
24623 /* Return nonzero if the CONSUMER instruction (a store) does not need
24624    PRODUCER's value to calculate the address.  */
24625 
24626 int
24627 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
24628 {
24629   rtx value = PATTERN (producer);
24630   rtx addr = PATTERN (consumer);
24631 
24632   if (GET_CODE (value) == COND_EXEC)
24633     value = COND_EXEC_CODE (value);
24634   if (GET_CODE (value) == PARALLEL)
24635     value = XVECEXP (value, 0, 0);
24636   value = XEXP (value, 0);
24637   if (GET_CODE (addr) == COND_EXEC)
24638     addr = COND_EXEC_CODE (addr);
24639   if (GET_CODE (addr) == PARALLEL)
24640     addr = XVECEXP (addr, 0, 0);
24641   addr = XEXP (addr, 0);
24642 
24643   return !reg_overlap_mentioned_p (value, addr);
24644 }
24645 
24646 /* Return nonzero if the CONSUMER instruction (a store) does need
24647    PRODUCER's value to calculate the address.  */
24648 
24649 int
24650 arm_early_store_addr_dep (rtx producer, rtx consumer)
24651 {
24652   return !arm_no_early_store_addr_dep (producer, consumer);
24653 }
24654 
24655 /* Return nonzero if the CONSUMER instruction (a load) does need
24656    PRODUCER's value to calculate the address.  */
24657 
24658 int
24659 arm_early_load_addr_dep (rtx producer, rtx consumer)
24660 {
24661   rtx value = PATTERN (producer);
24662   rtx addr = PATTERN (consumer);
24663 
24664   if (GET_CODE (value) == COND_EXEC)
24665     value = COND_EXEC_CODE (value);
24666   if (GET_CODE (value) == PARALLEL)
24667     value = XVECEXP (value, 0, 0);
24668   value = XEXP (value, 0);
24669   if (GET_CODE (addr) == COND_EXEC)
24670     addr = COND_EXEC_CODE (addr);
24671   if (GET_CODE (addr) == PARALLEL)
24672     {
24673       if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
24674         addr = XVECEXP (addr, 0, 1);
24675       else
24676         addr = XVECEXP (addr, 0, 0);
24677     }
24678   addr = XEXP (addr, 1);
24679 
24680   return reg_overlap_mentioned_p (value, addr);
24681 }
24682 
24683 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
24684    have an early register shift value or amount dependency on the
24685    result of PRODUCER.  */
24686 
24687 int
24688 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
24689 {
24690   rtx value = PATTERN (producer);
24691   rtx op = PATTERN (consumer);
24692   rtx early_op;
24693 
24694   if (GET_CODE (value) == COND_EXEC)
24695     value = COND_EXEC_CODE (value);
24696   if (GET_CODE (value) == PARALLEL)
24697     value = XVECEXP (value, 0, 0);
24698   value = XEXP (value, 0);
24699   if (GET_CODE (op) == COND_EXEC)
24700     op = COND_EXEC_CODE (op);
24701   if (GET_CODE (op) == PARALLEL)
24702     op = XVECEXP (op, 0, 0);
24703   op = XEXP (op, 1);
24704 
24705   early_op = XEXP (op, 0);
24706   /* This is either an actual independent shift, or a shift applied to
24707      the first operand of another operation.  We want the whole shift
24708      operation.  */
24709   if (REG_P (early_op))
24710     early_op = op;
24711 
24712   return !reg_overlap_mentioned_p (value, early_op);
24713 }
24714 
24715 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
24716    have an early register shift value dependency on the result of
24717    PRODUCER.  */
24718 
24719 int
24720 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
24721 {
24722   rtx value = PATTERN (producer);
24723   rtx op = PATTERN (consumer);
24724   rtx early_op;
24725 
24726   if (GET_CODE (value) == COND_EXEC)
24727     value = COND_EXEC_CODE (value);
24728   if (GET_CODE (value) == PARALLEL)
24729     value = XVECEXP (value, 0, 0);
24730   value = XEXP (value, 0);
24731   if (GET_CODE (op) == COND_EXEC)
24732     op = COND_EXEC_CODE (op);
24733   if (GET_CODE (op) == PARALLEL)
24734     op = XVECEXP (op, 0, 0);
24735   op = XEXP (op, 1);
24736 
24737   early_op = XEXP (op, 0);
24738 
24739   /* This is either an actual independent shift, or a shift applied to
24740      the first operand of another operation.  We want the value being
24741      shifted, in either case.  */
24742   if (!REG_P (early_op))
24743     early_op = XEXP (early_op, 0);
24744 
24745   return !reg_overlap_mentioned_p (value, early_op);
24746 }
24747 
24748 /* Return nonzero if the CONSUMER (a mul or mac op) does not
24749    have an early register mult dependency on the result of
24750    PRODUCER.  */
24751 
24752 int
24753 arm_no_early_mul_dep (rtx producer, rtx consumer)
24754 {
24755   rtx value = PATTERN (producer);
24756   rtx op = PATTERN (consumer);
24757 
24758   if (GET_CODE (value) == COND_EXEC)
24759     value = COND_EXEC_CODE (value);
24760   if (GET_CODE (value) == PARALLEL)
24761     value = XVECEXP (value, 0, 0);
24762   value = XEXP (value, 0);
24763   if (GET_CODE (op) == COND_EXEC)
24764     op = COND_EXEC_CODE (op);
24765   if (GET_CODE (op) == PARALLEL)
24766     op = XVECEXP (op, 0, 0);
24767   op = XEXP (op, 1);
24768 
24769   if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
24770     {
24771       if (GET_CODE (XEXP (op, 0)) == MULT)
24772 	return !reg_overlap_mentioned_p (value, XEXP (op, 0));
24773       else
24774 	return !reg_overlap_mentioned_p (value, XEXP (op, 1));
24775     }
24776 
24777   return 0;
24778 }
24779 
24780 /* We can't rely on the caller doing the proper promotion when
24781    using APCS or ATPCS.  */
24782 
24783 static bool
24784 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
24785 {
24786     return !TARGET_AAPCS_BASED;
24787 }
24788 
24789 static enum machine_mode
24790 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
24791                            enum machine_mode mode,
24792                            int *punsignedp ATTRIBUTE_UNUSED,
24793                            const_tree fntype ATTRIBUTE_UNUSED,
24794                            int for_return ATTRIBUTE_UNUSED)
24795 {
24796   if (GET_MODE_CLASS (mode) == MODE_INT
24797       && GET_MODE_SIZE (mode) < 4)
24798     return SImode;
24799 
24800   return mode;
24801 }
24802 
24803 /* AAPCS based ABIs use short enums by default.  */
24804 
24805 static bool
24806 arm_default_short_enums (void)
24807 {
24808   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
24809 }
24810 
24811 
24812 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
24813 
24814 static bool
24815 arm_align_anon_bitfield (void)
24816 {
24817   return TARGET_AAPCS_BASED;
24818 }
24819 
24820 
24821 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
24822 
24823 static tree
24824 arm_cxx_guard_type (void)
24825 {
24826   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
24827 }
24828 
24829 /* Return non-zero iff the consumer (a multiply-accumulate or a
24830    multiple-subtract instruction) has an accumulator dependency on the
24831    result of the producer and no other dependency on that result.  It
24832    does not check if the producer is multiply-accumulate instruction.  */
24833 int
24834 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
24835 {
24836   rtx result;
24837   rtx op0, op1, acc;
24838 
24839   producer = PATTERN (producer);
24840   consumer = PATTERN (consumer);
24841 
24842   if (GET_CODE (producer) == COND_EXEC)
24843     producer = COND_EXEC_CODE (producer);
24844   if (GET_CODE (consumer) == COND_EXEC)
24845     consumer = COND_EXEC_CODE (consumer);
24846 
24847   if (GET_CODE (producer) != SET)
24848     return 0;
24849 
24850   result = XEXP (producer, 0);
24851 
24852   if (GET_CODE (consumer) != SET)
24853     return 0;
24854 
24855   /* Check that the consumer is of the form
24856      (set (...) (plus (mult ...) (...)))
24857      or
24858      (set (...) (minus (...) (mult ...))).  */
24859   if (GET_CODE (XEXP (consumer, 1)) == PLUS)
24860     {
24861       if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
24862         return 0;
24863 
24864       op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
24865       op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
24866       acc = XEXP (XEXP (consumer, 1), 1);
24867     }
24868   else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
24869     {
24870       if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
24871         return 0;
24872 
24873       op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
24874       op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
24875       acc = XEXP (XEXP (consumer, 1), 0);
24876     }
24877   else
24878     return 0;
24879 
24880   return (reg_overlap_mentioned_p (result, acc)
24881           && !reg_overlap_mentioned_p (result, op0)
24882           && !reg_overlap_mentioned_p (result, op1));
24883 }
24884 
24885 /* Return non-zero if the consumer (a multiply-accumulate instruction)
24886    has an accumulator dependency on the result of the producer (a
24887    multiplication instruction) and no other dependency on that result.  */
24888 int
24889 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
24890 {
24891   rtx mul = PATTERN (producer);
24892   rtx mac = PATTERN (consumer);
24893   rtx mul_result;
24894   rtx mac_op0, mac_op1, mac_acc;
24895 
24896   if (GET_CODE (mul) == COND_EXEC)
24897     mul = COND_EXEC_CODE (mul);
24898   if (GET_CODE (mac) == COND_EXEC)
24899     mac = COND_EXEC_CODE (mac);
24900 
24901   /* Check that mul is of the form (set (...) (mult ...))
24902      and mla is of the form (set (...) (plus (mult ...) (...))).  */
24903   if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
24904       || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
24905           || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
24906     return 0;
24907 
24908   mul_result = XEXP (mul, 0);
24909   mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
24910   mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
24911   mac_acc = XEXP (XEXP (mac, 1), 1);
24912 
24913   return (reg_overlap_mentioned_p (mul_result, mac_acc)
24914           && !reg_overlap_mentioned_p (mul_result, mac_op0)
24915           && !reg_overlap_mentioned_p (mul_result, mac_op1));
24916 }
24917 
24918 
24919 /* The EABI says test the least significant bit of a guard variable.  */
24920 
24921 static bool
24922 arm_cxx_guard_mask_bit (void)
24923 {
24924   return TARGET_AAPCS_BASED;
24925 }
24926 
24927 
24928 /* The EABI specifies that all array cookies are 8 bytes long.  */
24929 
24930 static tree
24931 arm_get_cookie_size (tree type)
24932 {
24933   tree size;
24934 
24935   if (!TARGET_AAPCS_BASED)
24936     return default_cxx_get_cookie_size (type);
24937 
24938   size = build_int_cst (sizetype, 8);
24939   return size;
24940 }
24941 
24942 
24943 /* The EABI says that array cookies should also contain the element size.  */
24944 
24945 static bool
24946 arm_cookie_has_size (void)
24947 {
24948   return TARGET_AAPCS_BASED;
24949 }
24950 
24951 
24952 /* The EABI says constructors and destructors should return a pointer to
24953    the object constructed/destroyed.  */
24954 
24955 static bool
24956 arm_cxx_cdtor_returns_this (void)
24957 {
24958   return TARGET_AAPCS_BASED;
24959 }
24960 
24961 /* The EABI says that an inline function may never be the key
24962    method.  */
24963 
24964 static bool
24965 arm_cxx_key_method_may_be_inline (void)
24966 {
24967   return !TARGET_AAPCS_BASED;
24968 }
24969 
24970 static void
24971 arm_cxx_determine_class_data_visibility (tree decl)
24972 {
24973   if (!TARGET_AAPCS_BASED
24974       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
24975     return;
24976 
24977   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24978      is exported.  However, on systems without dynamic vague linkage,
24979      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
24980   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
24981     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
24982   else
24983     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
24984   DECL_VISIBILITY_SPECIFIED (decl) = 1;
24985 }
24986 
24987 static bool
24988 arm_cxx_class_data_always_comdat (void)
24989 {
24990   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24991      vague linkage if the class has no key function.  */
24992   return !TARGET_AAPCS_BASED;
24993 }
24994 
24995 
24996 /* The EABI says __aeabi_atexit should be used to register static
24997    destructors.  */
24998 
24999 static bool
25000 arm_cxx_use_aeabi_atexit (void)
25001 {
25002   return TARGET_AAPCS_BASED;
25003 }
25004 
25005 
25006 void
25007 arm_set_return_address (rtx source, rtx scratch)
25008 {
25009   arm_stack_offsets *offsets;
25010   HOST_WIDE_INT delta;
25011   rtx addr;
25012   unsigned long saved_regs;
25013 
25014   offsets = arm_get_frame_offsets ();
25015   saved_regs = offsets->saved_regs_mask;
25016 
25017   if ((saved_regs & (1 << LR_REGNUM)) == 0)
25018     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25019   else
25020     {
25021       if (frame_pointer_needed)
25022 	addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
25023       else
25024 	{
25025 	  /* LR will be the first saved register.  */
25026 	  delta = offsets->outgoing_args - (offsets->frame + 4);
25027 
25028 
25029 	  if (delta >= 4096)
25030 	    {
25031 	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
25032 				     GEN_INT (delta & ~4095)));
25033 	      addr = scratch;
25034 	      delta &= 4095;
25035 	    }
25036 	  else
25037 	    addr = stack_pointer_rtx;
25038 
25039 	  addr = plus_constant (Pmode, addr, delta);
25040 	}
25041       emit_move_insn (gen_frame_mem (Pmode, addr), source);
25042     }
25043 }
25044 
25045 
25046 void
25047 thumb_set_return_address (rtx source, rtx scratch)
25048 {
25049   arm_stack_offsets *offsets;
25050   HOST_WIDE_INT delta;
25051   HOST_WIDE_INT limit;
25052   int reg;
25053   rtx addr;
25054   unsigned long mask;
25055 
25056   emit_use (source);
25057 
25058   offsets = arm_get_frame_offsets ();
25059   mask = offsets->saved_regs_mask;
25060   if (mask & (1 << LR_REGNUM))
25061     {
25062       limit = 1024;
25063       /* Find the saved regs.  */
25064       if (frame_pointer_needed)
25065 	{
25066 	  delta = offsets->soft_frame - offsets->saved_args;
25067 	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
25068 	  if (TARGET_THUMB1)
25069 	    limit = 128;
25070 	}
25071       else
25072 	{
25073 	  delta = offsets->outgoing_args - offsets->saved_args;
25074 	  reg = SP_REGNUM;
25075 	}
25076       /* Allow for the stack frame.  */
25077       if (TARGET_THUMB1 && TARGET_BACKTRACE)
25078 	delta -= 16;
25079       /* The link register is always the first saved register.  */
25080       delta -= 4;
25081 
25082       /* Construct the address.  */
25083       addr = gen_rtx_REG (SImode, reg);
25084       if (delta > limit)
25085 	{
25086 	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
25087 	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
25088 	  addr = scratch;
25089 	}
25090       else
25091 	addr = plus_constant (Pmode, addr, delta);
25092 
25093       emit_move_insn (gen_frame_mem (Pmode, addr), source);
25094     }
25095   else
25096     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
25097 }
25098 
25099 /* Implements target hook vector_mode_supported_p.  */
25100 bool
25101 arm_vector_mode_supported_p (enum machine_mode mode)
25102 {
25103   /* Neon also supports V2SImode, etc. listed in the clause below.  */
25104   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
25105       || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
25106     return true;
25107 
25108   if ((TARGET_NEON || TARGET_IWMMXT)
25109       && ((mode == V2SImode)
25110 	  || (mode == V4HImode)
25111 	  || (mode == V8QImode)))
25112     return true;
25113 
25114   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
25115       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
25116       || mode == V2HAmode))
25117     return true;
25118 
25119   return false;
25120 }
25121 
25122 /* Implements target hook array_mode_supported_p.  */
25123 
25124 static bool
25125 arm_array_mode_supported_p (enum machine_mode mode,
25126 			    unsigned HOST_WIDE_INT nelems)
25127 {
25128   if (TARGET_NEON
25129       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
25130       && (nelems >= 2 && nelems <= 4))
25131     return true;
25132 
25133   return false;
25134 }
25135 
25136 /* Use the option -mvectorize-with-neon-double to override the use of quardword
25137    registers when autovectorizing for Neon, at least until multiple vector
25138    widths are supported properly by the middle-end.  */
25139 
25140 static enum machine_mode
25141 arm_preferred_simd_mode (enum machine_mode mode)
25142 {
25143   if (TARGET_NEON)
25144     switch (mode)
25145       {
25146       case SFmode:
25147 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
25148       case SImode:
25149 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
25150       case HImode:
25151 	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
25152       case QImode:
25153 	return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
25154       case DImode:
25155 	if (!TARGET_NEON_VECTORIZE_DOUBLE)
25156 	  return V2DImode;
25157 	break;
25158 
25159       default:;
25160       }
25161 
25162   if (TARGET_REALLY_IWMMXT)
25163     switch (mode)
25164       {
25165       case SImode:
25166 	return V2SImode;
25167       case HImode:
25168 	return V4HImode;
25169       case QImode:
25170 	return V8QImode;
25171 
25172       default:;
25173       }
25174 
25175   return word_mode;
25176 }
25177 
25178 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
25179 
25180    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
25181    using r0-r4 for function arguments, r7 for the stack frame and don't have
25182    enough left over to do doubleword arithmetic.  For Thumb-2 all the
25183    potentially problematic instructions accept high registers so this is not
25184    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
25185    that require many low registers.  */
25186 static bool
25187 arm_class_likely_spilled_p (reg_class_t rclass)
25188 {
25189   if ((TARGET_THUMB1 && rclass == LO_REGS)
25190       || rclass  == CC_REG)
25191     return true;
25192 
25193   return false;
25194 }
25195 
25196 /* Implements target hook small_register_classes_for_mode_p.  */
25197 bool
25198 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
25199 {
25200   return TARGET_THUMB1;
25201 }
25202 
25203 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
25204    ARM insns and therefore guarantee that the shift count is modulo 256.
25205    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
25206    guarantee no particular behavior for out-of-range counts.  */
25207 
25208 static unsigned HOST_WIDE_INT
25209 arm_shift_truncation_mask (enum machine_mode mode)
25210 {
25211   return mode == SImode ? 255 : 0;
25212 }
25213 
25214 
25215 /* Map internal gcc register numbers to DWARF2 register numbers.  */
25216 
25217 unsigned int
25218 arm_dbx_register_number (unsigned int regno)
25219 {
25220   if (regno < 16)
25221     return regno;
25222 
25223   if (IS_VFP_REGNUM (regno))
25224     {
25225       /* See comment in arm_dwarf_register_span.  */
25226       if (VFP_REGNO_OK_FOR_SINGLE (regno))
25227 	return 64 + regno - FIRST_VFP_REGNUM;
25228       else
25229 	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
25230     }
25231 
25232   if (IS_IWMMXT_GR_REGNUM (regno))
25233     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
25234 
25235   if (IS_IWMMXT_REGNUM (regno))
25236     return 112 + regno - FIRST_IWMMXT_REGNUM;
25237 
25238   gcc_unreachable ();
25239 }
25240 
25241 /* Dwarf models VFPv3 registers as 32 64-bit registers.
25242    GCC models tham as 64 32-bit registers, so we need to describe this to
25243    the DWARF generation code.  Other registers can use the default.  */
25244 static rtx
25245 arm_dwarf_register_span (rtx rtl)
25246 {
25247   unsigned regno;
25248   int nregs;
25249   int i;
25250   rtx p;
25251 
25252   regno = REGNO (rtl);
25253   if (!IS_VFP_REGNUM (regno))
25254     return NULL_RTX;
25255 
25256   /* XXX FIXME: The EABI defines two VFP register ranges:
25257 	64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
25258 	256-287: D0-D31
25259      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
25260      corresponding D register.  Until GDB supports this, we shall use the
25261      legacy encodings.  We also use these encodings for D0-D15 for
25262      compatibility with older debuggers.  */
25263   if (VFP_REGNO_OK_FOR_SINGLE (regno))
25264     return NULL_RTX;
25265 
25266   nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
25267   p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
25268   regno = (regno - FIRST_VFP_REGNUM) / 2;
25269   for (i = 0; i < nregs; i++)
25270     XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
25271 
25272   return p;
25273 }
25274 
25275 #if ARM_UNWIND_INFO
25276 /* Emit unwind directives for a store-multiple instruction or stack pointer
25277    push during alignment.
25278    These should only ever be generated by the function prologue code, so
25279    expect them to have a particular form.  */
25280 
25281 static void
25282 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
25283 {
25284   int i;
25285   HOST_WIDE_INT offset;
25286   HOST_WIDE_INT nregs;
25287   int reg_size;
25288   unsigned reg;
25289   unsigned lastreg;
25290   rtx e;
25291 
25292   e = XVECEXP (p, 0, 0);
25293   if (GET_CODE (e) != SET)
25294     abort ();
25295 
25296   /* First insn will adjust the stack pointer.  */
25297   if (GET_CODE (e) != SET
25298       || !REG_P (XEXP (e, 0))
25299       || REGNO (XEXP (e, 0)) != SP_REGNUM
25300       || GET_CODE (XEXP (e, 1)) != PLUS)
25301     abort ();
25302 
25303   offset = -INTVAL (XEXP (XEXP (e, 1), 1));
25304   nregs = XVECLEN (p, 0) - 1;
25305 
25306   reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
25307   if (reg < 16)
25308     {
25309       /* The function prologue may also push pc, but not annotate it as it is
25310 	 never restored.  We turn this into a stack pointer adjustment.  */
25311       if (nregs * 4 == offset - 4)
25312 	{
25313 	  fprintf (asm_out_file, "\t.pad #4\n");
25314 	  offset -= 4;
25315 	}
25316       reg_size = 4;
25317       fprintf (asm_out_file, "\t.save {");
25318     }
25319   else if (IS_VFP_REGNUM (reg))
25320     {
25321       reg_size = 8;
25322       fprintf (asm_out_file, "\t.vsave {");
25323     }
25324   else
25325     /* Unknown register type.  */
25326     abort ();
25327 
25328   /* If the stack increment doesn't match the size of the saved registers,
25329      something has gone horribly wrong.  */
25330   if (offset != nregs * reg_size)
25331     abort ();
25332 
25333   offset = 0;
25334   lastreg = 0;
25335   /* The remaining insns will describe the stores.  */
25336   for (i = 1; i <= nregs; i++)
25337     {
25338       /* Expect (set (mem <addr>) (reg)).
25339          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
25340       e = XVECEXP (p, 0, i);
25341       if (GET_CODE (e) != SET
25342 	  || !MEM_P (XEXP (e, 0))
25343 	  || !REG_P (XEXP (e, 1)))
25344 	abort ();
25345 
25346       reg = REGNO (XEXP (e, 1));
25347       if (reg < lastreg)
25348 	abort ();
25349 
25350       if (i != 1)
25351 	fprintf (asm_out_file, ", ");
25352       /* We can't use %r for vfp because we need to use the
25353 	 double precision register names.  */
25354       if (IS_VFP_REGNUM (reg))
25355 	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
25356       else
25357 	asm_fprintf (asm_out_file, "%r", reg);
25358 
25359 #ifdef ENABLE_CHECKING
25360       /* Check that the addresses are consecutive.  */
25361       e = XEXP (XEXP (e, 0), 0);
25362       if (GET_CODE (e) == PLUS)
25363 	{
25364 	  offset += reg_size;
25365 	  if (!REG_P (XEXP (e, 0))
25366 	      || REGNO (XEXP (e, 0)) != SP_REGNUM
25367 	      || !CONST_INT_P (XEXP (e, 1))
25368 	      || offset != INTVAL (XEXP (e, 1)))
25369 	    abort ();
25370 	}
25371       else if (i != 1
25372 	       || !REG_P (e)
25373 	       || REGNO (e) != SP_REGNUM)
25374 	abort ();
25375 #endif
25376     }
25377   fprintf (asm_out_file, "}\n");
25378 }
25379 
25380 /*  Emit unwind directives for a SET.  */
25381 
25382 static void
25383 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
25384 {
25385   rtx e0;
25386   rtx e1;
25387   unsigned reg;
25388 
25389   e0 = XEXP (p, 0);
25390   e1 = XEXP (p, 1);
25391   switch (GET_CODE (e0))
25392     {
25393     case MEM:
25394       /* Pushing a single register.  */
25395       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
25396 	  || !REG_P (XEXP (XEXP (e0, 0), 0))
25397 	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
25398 	abort ();
25399 
25400       asm_fprintf (asm_out_file, "\t.save ");
25401       if (IS_VFP_REGNUM (REGNO (e1)))
25402 	asm_fprintf(asm_out_file, "{d%d}\n",
25403 		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
25404       else
25405 	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
25406       break;
25407 
25408     case REG:
25409       if (REGNO (e0) == SP_REGNUM)
25410 	{
25411 	  /* A stack increment.  */
25412 	  if (GET_CODE (e1) != PLUS
25413 	      || !REG_P (XEXP (e1, 0))
25414 	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
25415 	      || !CONST_INT_P (XEXP (e1, 1)))
25416 	    abort ();
25417 
25418 	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
25419 		       -INTVAL (XEXP (e1, 1)));
25420 	}
25421       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
25422 	{
25423 	  HOST_WIDE_INT offset;
25424 
25425 	  if (GET_CODE (e1) == PLUS)
25426 	    {
25427 	      if (!REG_P (XEXP (e1, 0))
25428 		  || !CONST_INT_P (XEXP (e1, 1)))
25429 		abort ();
25430 	      reg = REGNO (XEXP (e1, 0));
25431 	      offset = INTVAL (XEXP (e1, 1));
25432 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
25433 			   HARD_FRAME_POINTER_REGNUM, reg,
25434 			   offset);
25435 	    }
25436 	  else if (REG_P (e1))
25437 	    {
25438 	      reg = REGNO (e1);
25439 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
25440 			   HARD_FRAME_POINTER_REGNUM, reg);
25441 	    }
25442 	  else
25443 	    abort ();
25444 	}
25445       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
25446 	{
25447 	  /* Move from sp to reg.  */
25448 	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
25449 	}
25450      else if (GET_CODE (e1) == PLUS
25451 	      && REG_P (XEXP (e1, 0))
25452 	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
25453 	      && CONST_INT_P (XEXP (e1, 1)))
25454 	{
25455 	  /* Set reg to offset from sp.  */
25456 	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
25457 		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
25458 	}
25459       else
25460 	abort ();
25461       break;
25462 
25463     default:
25464       abort ();
25465     }
25466 }
25467 
25468 
25469 /* Emit unwind directives for the given insn.  */
25470 
25471 static void
25472 arm_unwind_emit (FILE * asm_out_file, rtx insn)
25473 {
25474   rtx note, pat;
25475   bool handled_one = false;
25476 
25477   if (arm_except_unwind_info (&global_options) != UI_TARGET)
25478     return;
25479 
25480   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
25481       && (TREE_NOTHROW (current_function_decl)
25482 	  || crtl->all_throwers_are_sibcalls))
25483     return;
25484 
25485   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
25486     return;
25487 
25488   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
25489     {
25490       pat = XEXP (note, 0);
25491       switch (REG_NOTE_KIND (note))
25492 	{
25493 	case REG_FRAME_RELATED_EXPR:
25494 	  goto found;
25495 
25496 	case REG_CFA_REGISTER:
25497 	  if (pat == NULL)
25498 	    {
25499 	      pat = PATTERN (insn);
25500 	      if (GET_CODE (pat) == PARALLEL)
25501 		pat = XVECEXP (pat, 0, 0);
25502 	    }
25503 
25504 	  /* Only emitted for IS_STACKALIGN re-alignment.  */
25505 	  {
25506 	    rtx dest, src;
25507 	    unsigned reg;
25508 
25509 	    src = SET_SRC (pat);
25510 	    dest = SET_DEST (pat);
25511 
25512 	    gcc_assert (src == stack_pointer_rtx);
25513 	    reg = REGNO (dest);
25514 	    asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
25515 			 reg + 0x90, reg);
25516 	  }
25517 	  handled_one = true;
25518 	  break;
25519 
25520 	case REG_CFA_DEF_CFA:
25521 	case REG_CFA_EXPRESSION:
25522 	case REG_CFA_ADJUST_CFA:
25523 	case REG_CFA_OFFSET:
25524 	  /* ??? Only handling here what we actually emit.  */
25525 	  gcc_unreachable ();
25526 
25527 	default:
25528 	  break;
25529 	}
25530     }
25531   if (handled_one)
25532     return;
25533   pat = PATTERN (insn);
25534  found:
25535 
25536   switch (GET_CODE (pat))
25537     {
25538     case SET:
25539       arm_unwind_emit_set (asm_out_file, pat);
25540       break;
25541 
25542     case SEQUENCE:
25543       /* Store multiple.  */
25544       arm_unwind_emit_sequence (asm_out_file, pat);
25545       break;
25546 
25547     default:
25548       abort();
25549     }
25550 }
25551 
25552 
25553 /* Output a reference from a function exception table to the type_info
25554    object X.  The EABI specifies that the symbol should be relocated by
25555    an R_ARM_TARGET2 relocation.  */
25556 
25557 static bool
25558 arm_output_ttype (rtx x)
25559 {
25560   fputs ("\t.word\t", asm_out_file);
25561   output_addr_const (asm_out_file, x);
25562   /* Use special relocations for symbol references.  */
25563   if (!CONST_INT_P (x))
25564     fputs ("(TARGET2)", asm_out_file);
25565   fputc ('\n', asm_out_file);
25566 
25567   return TRUE;
25568 }
25569 
25570 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
25571 
25572 static void
25573 arm_asm_emit_except_personality (rtx personality)
25574 {
25575   fputs ("\t.personality\t", asm_out_file);
25576   output_addr_const (asm_out_file, personality);
25577   fputc ('\n', asm_out_file);
25578 }
25579 
25580 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
25581 
25582 static void
25583 arm_asm_init_sections (void)
25584 {
25585   exception_section = get_unnamed_section (0, output_section_asm_op,
25586 					   "\t.handlerdata");
25587 }
25588 #endif /* ARM_UNWIND_INFO */
25589 
25590 /* Output unwind directives for the start/end of a function.  */
25591 
25592 void
25593 arm_output_fn_unwind (FILE * f, bool prologue)
25594 {
25595   if (arm_except_unwind_info (&global_options) != UI_TARGET)
25596     return;
25597 
25598   if (prologue)
25599     fputs ("\t.fnstart\n", f);
25600   else
25601     {
25602       /* If this function will never be unwound, then mark it as such.
25603          The came condition is used in arm_unwind_emit to suppress
25604 	 the frame annotations.  */
25605       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
25606 	  && (TREE_NOTHROW (current_function_decl)
25607 	      || crtl->all_throwers_are_sibcalls))
25608 	fputs("\t.cantunwind\n", f);
25609 
25610       fputs ("\t.fnend\n", f);
25611     }
25612 }
25613 
25614 static bool
25615 arm_emit_tls_decoration (FILE *fp, rtx x)
25616 {
25617   enum tls_reloc reloc;
25618   rtx val;
25619 
25620   val = XVECEXP (x, 0, 0);
25621   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
25622 
25623   output_addr_const (fp, val);
25624 
25625   switch (reloc)
25626     {
25627     case TLS_GD32:
25628       fputs ("(tlsgd)", fp);
25629       break;
25630     case TLS_LDM32:
25631       fputs ("(tlsldm)", fp);
25632       break;
25633     case TLS_LDO32:
25634       fputs ("(tlsldo)", fp);
25635       break;
25636     case TLS_IE32:
25637       fputs ("(gottpoff)", fp);
25638       break;
25639     case TLS_LE32:
25640       fputs ("(tpoff)", fp);
25641       break;
25642     case TLS_DESCSEQ:
25643       fputs ("(tlsdesc)", fp);
25644       break;
25645     default:
25646       gcc_unreachable ();
25647     }
25648 
25649   switch (reloc)
25650     {
25651     case TLS_GD32:
25652     case TLS_LDM32:
25653     case TLS_IE32:
25654     case TLS_DESCSEQ:
25655       fputs (" + (. - ", fp);
25656       output_addr_const (fp, XVECEXP (x, 0, 2));
25657       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
25658       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
25659       output_addr_const (fp, XVECEXP (x, 0, 3));
25660       fputc (')', fp);
25661       break;
25662     default:
25663       break;
25664     }
25665 
25666   return TRUE;
25667 }
25668 
25669 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
25670 
25671 static void
25672 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
25673 {
25674   gcc_assert (size == 4);
25675   fputs ("\t.word\t", file);
25676   output_addr_const (file, x);
25677   fputs ("(tlsldo)", file);
25678 }
25679 
25680 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
25681 
25682 static bool
25683 arm_output_addr_const_extra (FILE *fp, rtx x)
25684 {
25685   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
25686     return arm_emit_tls_decoration (fp, x);
25687   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
25688     {
25689       char label[256];
25690       int labelno = INTVAL (XVECEXP (x, 0, 0));
25691 
25692       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
25693       assemble_name_raw (fp, label);
25694 
25695       return TRUE;
25696     }
25697   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
25698     {
25699       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
25700       if (GOT_PCREL)
25701 	fputs ("+.", fp);
25702       fputs ("-(", fp);
25703       output_addr_const (fp, XVECEXP (x, 0, 0));
25704       fputc (')', fp);
25705       return TRUE;
25706     }
25707   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
25708     {
25709       output_addr_const (fp, XVECEXP (x, 0, 0));
25710       if (GOT_PCREL)
25711         fputs ("+.", fp);
25712       fputs ("-(", fp);
25713       output_addr_const (fp, XVECEXP (x, 0, 1));
25714       fputc (')', fp);
25715       return TRUE;
25716     }
25717   else if (GET_CODE (x) == CONST_VECTOR)
25718     return arm_emit_vector_const (fp, x);
25719 
25720   return FALSE;
25721 }
25722 
25723 /* Output assembly for a shift instruction.
25724    SET_FLAGS determines how the instruction modifies the condition codes.
25725    0 - Do not set condition codes.
25726    1 - Set condition codes.
25727    2 - Use smallest instruction.  */
25728 const char *
25729 arm_output_shift(rtx * operands, int set_flags)
25730 {
25731   char pattern[100];
25732   static const char flag_chars[3] = {'?', '.', '!'};
25733   const char *shift;
25734   HOST_WIDE_INT val;
25735   char c;
25736 
25737   c = flag_chars[set_flags];
25738   if (TARGET_UNIFIED_ASM)
25739     {
25740       shift = shift_op(operands[3], &val);
25741       if (shift)
25742 	{
25743 	  if (val != -1)
25744 	    operands[2] = GEN_INT(val);
25745 	  sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
25746 	}
25747       else
25748 	sprintf (pattern, "mov%%%c\t%%0, %%1", c);
25749     }
25750   else
25751     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
25752   output_asm_insn (pattern, operands);
25753   return "";
25754 }
25755 
25756 /* Output assembly for a WMMX immediate shift instruction.  */
25757 const char *
25758 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
25759 {
25760   int shift = INTVAL (operands[2]);
25761   char templ[50];
25762   enum machine_mode opmode = GET_MODE (operands[0]);
25763 
25764   gcc_assert (shift >= 0);
25765 
25766   /* If the shift value in the register versions is > 63 (for D qualifier),
25767      31 (for W qualifier) or 15 (for H qualifier).  */
25768   if (((opmode == V4HImode) && (shift > 15))
25769 	|| ((opmode == V2SImode) && (shift > 31))
25770 	|| ((opmode == DImode) && (shift > 63)))
25771   {
25772     if (wror_or_wsra)
25773       {
25774         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
25775         output_asm_insn (templ, operands);
25776         if (opmode == DImode)
25777           {
25778 	    sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
25779 	    output_asm_insn (templ, operands);
25780           }
25781       }
25782     else
25783       {
25784         /* The destination register will contain all zeros.  */
25785         sprintf (templ, "wzero\t%%0");
25786         output_asm_insn (templ, operands);
25787       }
25788     return "";
25789   }
25790 
25791   if ((opmode == DImode) && (shift > 32))
25792     {
25793       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
25794       output_asm_insn (templ, operands);
25795       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
25796       output_asm_insn (templ, operands);
25797     }
25798   else
25799     {
25800       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
25801       output_asm_insn (templ, operands);
25802     }
25803   return "";
25804 }
25805 
25806 /* Output assembly for a WMMX tinsr instruction.  */
25807 const char *
25808 arm_output_iwmmxt_tinsr (rtx *operands)
25809 {
25810   int mask = INTVAL (operands[3]);
25811   int i;
25812   char templ[50];
25813   int units = mode_nunits[GET_MODE (operands[0])];
25814   gcc_assert ((mask & (mask - 1)) == 0);
25815   for (i = 0; i < units; ++i)
25816     {
25817       if ((mask & 0x01) == 1)
25818         {
25819           break;
25820         }
25821       mask >>= 1;
25822     }
25823   gcc_assert (i < units);
25824   {
25825     switch (GET_MODE (operands[0]))
25826       {
25827       case V8QImode:
25828 	sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
25829 	break;
25830       case V4HImode:
25831 	sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
25832 	break;
25833       case V2SImode:
25834 	sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
25835 	break;
25836       default:
25837 	gcc_unreachable ();
25838 	break;
25839       }
25840     output_asm_insn (templ, operands);
25841   }
25842   return "";
25843 }
25844 
25845 /* Output a Thumb-1 casesi dispatch sequence.  */
25846 const char *
25847 thumb1_output_casesi (rtx *operands)
25848 {
25849   rtx diff_vec = PATTERN (next_real_insn (operands[0]));
25850 
25851   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
25852 
25853   switch (GET_MODE(diff_vec))
25854     {
25855     case QImode:
25856       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
25857 	      "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
25858     case HImode:
25859       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
25860 	      "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
25861     case SImode:
25862       return "bl\t%___gnu_thumb1_case_si";
25863     default:
25864       gcc_unreachable ();
25865     }
25866 }
25867 
25868 /* Output a Thumb-2 casesi instruction.  */
25869 const char *
25870 thumb2_output_casesi (rtx *operands)
25871 {
25872   rtx diff_vec = PATTERN (next_real_insn (operands[2]));
25873 
25874   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
25875 
25876   output_asm_insn ("cmp\t%0, %1", operands);
25877   output_asm_insn ("bhi\t%l3", operands);
25878   switch (GET_MODE(diff_vec))
25879     {
25880     case QImode:
25881       return "tbb\t[%|pc, %0]";
25882     case HImode:
25883       return "tbh\t[%|pc, %0, lsl #1]";
25884     case SImode:
25885       if (flag_pic)
25886 	{
25887 	  output_asm_insn ("adr\t%4, %l2", operands);
25888 	  output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
25889 	  output_asm_insn ("add\t%4, %4, %5", operands);
25890 	  return "bx\t%4";
25891 	}
25892       else
25893 	{
25894 	  output_asm_insn ("adr\t%4, %l2", operands);
25895 	  return "ldr\t%|pc, [%4, %0, lsl #2]";
25896 	}
25897     default:
25898       gcc_unreachable ();
25899     }
25900 }
25901 
25902 /* Most ARM cores are single issue, but some newer ones can dual issue.
25903    The scheduler descriptions rely on this being correct.  */
25904 static int
25905 arm_issue_rate (void)
25906 {
25907   switch (arm_tune)
25908     {
25909     case cortexa15:
25910       return 3;
25911 
25912     case cortexr4:
25913     case cortexr4f:
25914     case cortexr5:
25915     case genericv7a:
25916     case cortexa5:
25917     case cortexa7:
25918     case cortexa8:
25919     case cortexa9:
25920     case fa726te:
25921     case marvell_pj4:
25922       return 2;
25923 
25924     default:
25925       return 1;
25926     }
25927 }
25928 
25929 /* A table and a function to perform ARM-specific name mangling for
25930    NEON vector types in order to conform to the AAPCS (see "Procedure
25931    Call Standard for the ARM Architecture", Appendix A).  To qualify
25932    for emission with the mangled names defined in that document, a
25933    vector type must not only be of the correct mode but also be
25934    composed of NEON vector element types (e.g. __builtin_neon_qi).  */
25935 typedef struct
25936 {
25937   enum machine_mode mode;
25938   const char *element_type_name;
25939   const char *aapcs_name;
25940 } arm_mangle_map_entry;
25941 
25942 static arm_mangle_map_entry arm_mangle_map[] = {
25943   /* 64-bit containerized types.  */
25944   { V8QImode,  "__builtin_neon_qi",     "15__simd64_int8_t" },
25945   { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
25946   { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
25947   { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
25948   { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
25949   { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
25950   { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
25951   { V8QImode,  "__builtin_neon_poly8",  "16__simd64_poly8_t" },
25952   { V4HImode,  "__builtin_neon_poly16", "17__simd64_poly16_t" },
25953   /* 128-bit containerized types.  */
25954   { V16QImode, "__builtin_neon_qi",     "16__simd128_int8_t" },
25955   { V16QImode, "__builtin_neon_uqi",    "17__simd128_uint8_t" },
25956   { V8HImode,  "__builtin_neon_hi",     "17__simd128_int16_t" },
25957   { V8HImode,  "__builtin_neon_uhi",    "18__simd128_uint16_t" },
25958   { V4SImode,  "__builtin_neon_si",     "17__simd128_int32_t" },
25959   { V4SImode,  "__builtin_neon_usi",    "18__simd128_uint32_t" },
25960   { V4SFmode,  "__builtin_neon_sf",     "19__simd128_float32_t" },
25961   { V16QImode, "__builtin_neon_poly8",  "17__simd128_poly8_t" },
25962   { V8HImode,  "__builtin_neon_poly16", "18__simd128_poly16_t" },
25963   { VOIDmode, NULL, NULL }
25964 };
25965 
25966 const char *
25967 arm_mangle_type (const_tree type)
25968 {
25969   arm_mangle_map_entry *pos = arm_mangle_map;
25970 
25971   /* The ARM ABI documents (10th October 2008) say that "__va_list"
25972      has to be managled as if it is in the "std" namespace.  */
25973   if (TARGET_AAPCS_BASED
25974       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
25975     return "St9__va_list";
25976 
25977   /* Half-precision float.  */
25978   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
25979     return "Dh";
25980 
25981   if (TREE_CODE (type) != VECTOR_TYPE)
25982     return NULL;
25983 
25984   /* Check the mode of the vector type, and the name of the vector
25985      element type, against the table.  */
25986   while (pos->mode != VOIDmode)
25987     {
25988       tree elt_type = TREE_TYPE (type);
25989 
25990       if (pos->mode == TYPE_MODE (type)
25991 	  && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
25992 	  && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
25993 		      pos->element_type_name))
25994         return pos->aapcs_name;
25995 
25996       pos++;
25997     }
25998 
25999   /* Use the default mangling for unrecognized (possibly user-defined)
26000      vector types.  */
26001   return NULL;
26002 }
26003 
26004 /* Order of allocation of core registers for Thumb: this allocation is
26005    written over the corresponding initial entries of the array
26006    initialized with REG_ALLOC_ORDER.  We allocate all low registers
26007    first.  Saving and restoring a low register is usually cheaper than
26008    using a call-clobbered high register.  */
26009 
26010 static const int thumb_core_reg_alloc_order[] =
26011 {
26012    3,  2,  1,  0,  4,  5,  6,  7,
26013   14, 12,  8,  9, 10, 11
26014 };
26015 
26016 /* Adjust register allocation order when compiling for Thumb.  */
26017 
26018 void
26019 arm_order_regs_for_local_alloc (void)
26020 {
26021   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
26022   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
26023   if (TARGET_THUMB)
26024     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
26025             sizeof (thumb_core_reg_alloc_order));
26026 }
26027 
26028 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
26029 
26030 bool
26031 arm_frame_pointer_required (void)
26032 {
26033   return (cfun->has_nonlocal_label
26034           || SUBTARGET_FRAME_POINTER_REQUIRED
26035           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
26036 }
26037 
26038 /* Only thumb1 can't support conditional execution, so return true if
26039    the target is not thumb1.  */
26040 static bool
26041 arm_have_conditional_execution (void)
26042 {
26043   return !TARGET_THUMB1;
26044 }
26045 
26046 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
26047 static HOST_WIDE_INT
26048 arm_vector_alignment (const_tree type)
26049 {
26050   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
26051 
26052   if (TARGET_AAPCS_BASED)
26053     align = MIN (align, 64);
26054 
26055   return align;
26056 }
26057 
26058 static unsigned int
26059 arm_autovectorize_vector_sizes (void)
26060 {
26061   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
26062 }
26063 
26064 static bool
26065 arm_vector_alignment_reachable (const_tree type, bool is_packed)
26066 {
26067   /* Vectors which aren't in packed structures will not be less aligned than
26068      the natural alignment of their element type, so this is safe.  */
26069   if (TARGET_NEON && !BYTES_BIG_ENDIAN)
26070     return !is_packed;
26071 
26072   return default_builtin_vector_alignment_reachable (type, is_packed);
26073 }
26074 
26075 static bool
26076 arm_builtin_support_vector_misalignment (enum machine_mode mode,
26077 					 const_tree type, int misalignment,
26078 					 bool is_packed)
26079 {
26080   if (TARGET_NEON && !BYTES_BIG_ENDIAN)
26081     {
26082       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
26083 
26084       if (is_packed)
26085         return align == 1;
26086 
26087       /* If the misalignment is unknown, we should be able to handle the access
26088 	 so long as it is not to a member of a packed data structure.  */
26089       if (misalignment == -1)
26090         return true;
26091 
26092       /* Return true if the misalignment is a multiple of the natural alignment
26093          of the vector's element type.  This is probably always going to be
26094 	 true in practice, since we've already established that this isn't a
26095 	 packed access.  */
26096       return ((misalignment % align) == 0);
26097     }
26098 
26099   return default_builtin_support_vector_misalignment (mode, type, misalignment,
26100 						      is_packed);
26101 }
26102 
26103 static void
26104 arm_conditional_register_usage (void)
26105 {
26106   int regno;
26107 
26108   if (TARGET_THUMB1 && optimize_size)
26109     {
26110       /* When optimizing for size on Thumb-1, it's better not
26111         to use the HI regs, because of the overhead of
26112         stacking them.  */
26113       for (regno = FIRST_HI_REGNUM;
26114 	   regno <= LAST_HI_REGNUM; ++regno)
26115 	fixed_regs[regno] = call_used_regs[regno] = 1;
26116     }
26117 
26118   /* The link register can be clobbered by any branch insn,
26119      but we have no way to track that at present, so mark
26120      it as unavailable.  */
26121   if (TARGET_THUMB1)
26122     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
26123 
26124   if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
26125     {
26126       /* VFPv3 registers are disabled when earlier VFP
26127 	 versions are selected due to the definition of
26128 	 LAST_VFP_REGNUM.  */
26129       for (regno = FIRST_VFP_REGNUM;
26130 	   regno <= LAST_VFP_REGNUM; ++ regno)
26131 	{
26132 	  fixed_regs[regno] = 0;
26133 	  call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
26134 	    || regno >= FIRST_VFP_REGNUM + 32;
26135 	}
26136     }
26137 
26138   if (TARGET_REALLY_IWMMXT)
26139     {
26140       regno = FIRST_IWMMXT_GR_REGNUM;
26141       /* The 2002/10/09 revision of the XScale ABI has wCG0
26142          and wCG1 as call-preserved registers.  The 2002/11/21
26143          revision changed this so that all wCG registers are
26144          scratch registers.  */
26145       for (regno = FIRST_IWMMXT_GR_REGNUM;
26146 	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
26147 	fixed_regs[regno] = 0;
26148       /* The XScale ABI has wR0 - wR9 as scratch registers,
26149 	 the rest as call-preserved registers.  */
26150       for (regno = FIRST_IWMMXT_REGNUM;
26151 	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
26152 	{
26153 	  fixed_regs[regno] = 0;
26154 	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
26155 	}
26156     }
26157 
26158   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
26159     {
26160       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26161       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
26162     }
26163   else if (TARGET_APCS_STACK)
26164     {
26165       fixed_regs[10]     = 1;
26166       call_used_regs[10] = 1;
26167     }
26168   /* -mcaller-super-interworking reserves r11 for calls to
26169      _interwork_r11_call_via_rN().  Making the register global
26170      is an easy way of ensuring that it remains valid for all
26171      calls.  */
26172   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
26173       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
26174     {
26175       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26176       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26177       if (TARGET_CALLER_INTERWORKING)
26178 	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
26179     }
26180   SUBTARGET_CONDITIONAL_REGISTER_USAGE
26181 }
26182 
26183 static reg_class_t
26184 arm_preferred_rename_class (reg_class_t rclass)
26185 {
26186   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
26187      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
26188      and code size can be reduced.  */
26189   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
26190     return LO_REGS;
26191   else
26192     return NO_REGS;
26193 }
26194 
26195 /* Compute the atrribute "length" of insn "*push_multi".
26196    So this function MUST be kept in sync with that insn pattern.  */
26197 int
26198 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
26199 {
26200   int i, regno, hi_reg;
26201   int num_saves = XVECLEN (parallel_op, 0);
26202 
26203   /* ARM mode.  */
26204   if (TARGET_ARM)
26205     return 4;
26206   /* Thumb1 mode.  */
26207   if (TARGET_THUMB1)
26208     return 2;
26209 
26210   /* Thumb2 mode.  */
26211   regno = REGNO (first_op);
26212   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26213   for (i = 1; i < num_saves && !hi_reg; i++)
26214     {
26215       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
26216       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
26217     }
26218 
26219   if (!hi_reg)
26220     return 2;
26221   return 4;
26222 }
26223 
26224 /* Compute the number of instructions emitted by output_move_double.  */
26225 int
26226 arm_count_output_move_double_insns (rtx *operands)
26227 {
26228   int count;
26229   rtx ops[2];
26230   /* output_move_double may modify the operands array, so call it
26231      here on a copy of the array.  */
26232   ops[0] = operands[0];
26233   ops[1] = operands[1];
26234   output_move_double (ops, false, &count);
26235   return count;
26236 }
26237 
26238 int
26239 vfp3_const_double_for_fract_bits (rtx operand)
26240 {
26241   REAL_VALUE_TYPE r0;
26242 
26243   if (!CONST_DOUBLE_P (operand))
26244     return 0;
26245 
26246   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
26247   if (exact_real_inverse (DFmode, &r0))
26248     {
26249       if (exact_real_truncate (DFmode, &r0))
26250 	{
26251 	  HOST_WIDE_INT value = real_to_integer (&r0);
26252 	  value = value & 0xffffffff;
26253 	  if ((value != 0) && ( (value & (value - 1)) == 0))
26254 	    return int_log2 (value);
26255 	}
26256     }
26257   return 0;
26258 }
26259 
26260 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
26261 
26262 static void
26263 arm_pre_atomic_barrier (enum memmodel model)
26264 {
26265   if (need_atomic_barrier_p (model, true))
26266     emit_insn (gen_memory_barrier ());
26267 }
26268 
26269 static void
26270 arm_post_atomic_barrier (enum memmodel model)
26271 {
26272   if (need_atomic_barrier_p (model, false))
26273     emit_insn (gen_memory_barrier ());
26274 }
26275 
26276 /* Emit the load-exclusive and store-exclusive instructions.  */
26277 
26278 static void
26279 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
26280 {
26281   rtx (*gen) (rtx, rtx);
26282 
26283   switch (mode)
26284     {
26285     case QImode: gen = gen_arm_load_exclusiveqi; break;
26286     case HImode: gen = gen_arm_load_exclusivehi; break;
26287     case SImode: gen = gen_arm_load_exclusivesi; break;
26288     case DImode: gen = gen_arm_load_exclusivedi; break;
26289     default:
26290       gcc_unreachable ();
26291     }
26292 
26293   emit_insn (gen (rval, mem));
26294 }
26295 
26296 static void
26297 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
26298 {
26299   rtx (*gen) (rtx, rtx, rtx);
26300 
26301   switch (mode)
26302     {
26303     case QImode: gen = gen_arm_store_exclusiveqi; break;
26304     case HImode: gen = gen_arm_store_exclusivehi; break;
26305     case SImode: gen = gen_arm_store_exclusivesi; break;
26306     case DImode: gen = gen_arm_store_exclusivedi; break;
26307     default:
26308       gcc_unreachable ();
26309     }
26310 
26311   emit_insn (gen (bval, rval, mem));
26312 }
26313 
26314 /* Mark the previous jump instruction as unlikely.  */
26315 
26316 static void
26317 emit_unlikely_jump (rtx insn)
26318 {
26319   rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
26320 
26321   insn = emit_jump_insn (insn);
26322   add_reg_note (insn, REG_BR_PROB, very_unlikely);
26323 }
26324 
26325 /* Expand a compare and swap pattern.  */
26326 
26327 void
26328 arm_expand_compare_and_swap (rtx operands[])
26329 {
26330   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
26331   enum machine_mode mode;
26332   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
26333 
26334   bval = operands[0];
26335   rval = operands[1];
26336   mem = operands[2];
26337   oldval = operands[3];
26338   newval = operands[4];
26339   is_weak = operands[5];
26340   mod_s = operands[6];
26341   mod_f = operands[7];
26342   mode = GET_MODE (mem);
26343 
26344   switch (mode)
26345     {
26346     case QImode:
26347     case HImode:
26348       /* For narrow modes, we're going to perform the comparison in SImode,
26349 	 so do the zero-extension now.  */
26350       rval = gen_reg_rtx (SImode);
26351       oldval = convert_modes (SImode, mode, oldval, true);
26352       /* FALLTHRU */
26353 
26354     case SImode:
26355       /* Force the value into a register if needed.  We waited until after
26356 	 the zero-extension above to do this properly.  */
26357       if (!arm_add_operand (oldval, SImode))
26358 	oldval = force_reg (SImode, oldval);
26359       break;
26360 
26361     case DImode:
26362       if (!cmpdi_operand (oldval, mode))
26363 	oldval = force_reg (mode, oldval);
26364       break;
26365 
26366     default:
26367       gcc_unreachable ();
26368     }
26369 
26370   switch (mode)
26371     {
26372     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
26373     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
26374     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
26375     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
26376     default:
26377       gcc_unreachable ();
26378     }
26379 
26380   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
26381 
26382   if (mode == QImode || mode == HImode)
26383     emit_move_insn (operands[1], gen_lowpart (mode, rval));
26384 
26385   /* In all cases, we arrange for success to be signaled by Z set.
26386      This arrangement allows for the boolean result to be used directly
26387      in a subsequent branch, post optimization.  */
26388   x = gen_rtx_REG (CCmode, CC_REGNUM);
26389   x = gen_rtx_EQ (SImode, x, const0_rtx);
26390   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
26391 }
26392 
26393 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
26394    another memory store between the load-exclusive and store-exclusive can
26395    reset the monitor from Exclusive to Open state.  This means we must wait
26396    until after reload to split the pattern, lest we get a register spill in
26397    the middle of the atomic sequence.  */
26398 
26399 void
26400 arm_split_compare_and_swap (rtx operands[])
26401 {
26402   rtx rval, mem, oldval, newval, scratch;
26403   enum machine_mode mode;
26404   enum memmodel mod_s, mod_f;
26405   bool is_weak;
26406   rtx label1, label2, x, cond;
26407 
26408   rval = operands[0];
26409   mem = operands[1];
26410   oldval = operands[2];
26411   newval = operands[3];
26412   is_weak = (operands[4] != const0_rtx);
26413   mod_s = (enum memmodel) INTVAL (operands[5]);
26414   mod_f = (enum memmodel) INTVAL (operands[6]);
26415   scratch = operands[7];
26416   mode = GET_MODE (mem);
26417 
26418   arm_pre_atomic_barrier (mod_s);
26419 
26420   label1 = NULL_RTX;
26421   if (!is_weak)
26422     {
26423       label1 = gen_label_rtx ();
26424       emit_label (label1);
26425     }
26426   label2 = gen_label_rtx ();
26427 
26428   arm_emit_load_exclusive (mode, rval, mem);
26429 
26430   cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
26431   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26432   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
26433 			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
26434   emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
26435 
26436   arm_emit_store_exclusive (mode, scratch, mem, newval);
26437 
26438   /* Weak or strong, we want EQ to be true for success, so that we
26439      match the flags that we got from the compare above.  */
26440   cond = gen_rtx_REG (CCmode, CC_REGNUM);
26441   x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
26442   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
26443 
26444   if (!is_weak)
26445     {
26446       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26447       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
26448 				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
26449       emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
26450     }
26451 
26452   if (mod_f != MEMMODEL_RELAXED)
26453     emit_label (label2);
26454 
26455   arm_post_atomic_barrier (mod_s);
26456 
26457   if (mod_f == MEMMODEL_RELAXED)
26458     emit_label (label2);
26459 }
26460 
26461 void
26462 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
26463 		     rtx value, rtx model_rtx, rtx cond)
26464 {
26465   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
26466   enum machine_mode mode = GET_MODE (mem);
26467   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
26468   rtx label, x;
26469 
26470   arm_pre_atomic_barrier (model);
26471 
26472   label = gen_label_rtx ();
26473   emit_label (label);
26474 
26475   if (new_out)
26476     new_out = gen_lowpart (wmode, new_out);
26477   if (old_out)
26478     old_out = gen_lowpart (wmode, old_out);
26479   else
26480     old_out = new_out;
26481   value = simplify_gen_subreg (wmode, value, mode, 0);
26482 
26483   arm_emit_load_exclusive (mode, old_out, mem);
26484 
26485   switch (code)
26486     {
26487     case SET:
26488       new_out = value;
26489       break;
26490 
26491     case NOT:
26492       x = gen_rtx_AND (wmode, old_out, value);
26493       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
26494       x = gen_rtx_NOT (wmode, new_out);
26495       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
26496       break;
26497 
26498     case MINUS:
26499       if (CONST_INT_P (value))
26500 	{
26501 	  value = GEN_INT (-INTVAL (value));
26502 	  code = PLUS;
26503 	}
26504       /* FALLTHRU */
26505 
26506     case PLUS:
26507       if (mode == DImode)
26508 	{
26509 	  /* DImode plus/minus need to clobber flags.  */
26510 	  /* The adddi3 and subdi3 patterns are incorrectly written so that
26511 	     they require matching operands, even when we could easily support
26512 	     three operands.  Thankfully, this can be fixed up post-splitting,
26513 	     as the individual add+adc patterns do accept three operands and
26514 	     post-reload cprop can make these moves go away.  */
26515 	  emit_move_insn (new_out, old_out);
26516 	  if (code == PLUS)
26517 	    x = gen_adddi3 (new_out, new_out, value);
26518 	  else
26519 	    x = gen_subdi3 (new_out, new_out, value);
26520 	  emit_insn (x);
26521 	  break;
26522 	}
26523       /* FALLTHRU */
26524 
26525     default:
26526       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
26527       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
26528       break;
26529     }
26530 
26531   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
26532 
26533   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26534   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
26535 
26536   arm_post_atomic_barrier (model);
26537 }
26538 
26539 #define MAX_VECT_LEN 16
26540 
26541 struct expand_vec_perm_d
26542 {
26543   rtx target, op0, op1;
26544   unsigned char perm[MAX_VECT_LEN];
26545   enum machine_mode vmode;
26546   unsigned char nelt;
26547   bool one_vector_p;
26548   bool testing_p;
26549 };
26550 
26551 /* Generate a variable permutation.  */
26552 
26553 static void
26554 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
26555 {
26556   enum machine_mode vmode = GET_MODE (target);
26557   bool one_vector_p = rtx_equal_p (op0, op1);
26558 
26559   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
26560   gcc_checking_assert (GET_MODE (op0) == vmode);
26561   gcc_checking_assert (GET_MODE (op1) == vmode);
26562   gcc_checking_assert (GET_MODE (sel) == vmode);
26563   gcc_checking_assert (TARGET_NEON);
26564 
26565   if (one_vector_p)
26566     {
26567       if (vmode == V8QImode)
26568 	emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
26569       else
26570 	emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
26571     }
26572   else
26573     {
26574       rtx pair;
26575 
26576       if (vmode == V8QImode)
26577 	{
26578 	  pair = gen_reg_rtx (V16QImode);
26579 	  emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
26580 	  pair = gen_lowpart (TImode, pair);
26581 	  emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
26582 	}
26583       else
26584 	{
26585 	  pair = gen_reg_rtx (OImode);
26586 	  emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
26587 	  emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
26588 	}
26589     }
26590 }
26591 
26592 void
26593 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
26594 {
26595   enum machine_mode vmode = GET_MODE (target);
26596   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
26597   bool one_vector_p = rtx_equal_p (op0, op1);
26598   rtx rmask[MAX_VECT_LEN], mask;
26599 
26600   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
26601      numbering of elements for big-endian, we must reverse the order.  */
26602   gcc_checking_assert (!BYTES_BIG_ENDIAN);
26603 
26604   /* The VTBL instruction does not use a modulo index, so we must take care
26605      of that ourselves.  */
26606   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
26607   for (i = 0; i < nelt; ++i)
26608     rmask[i] = mask;
26609   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
26610   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
26611 
26612   arm_expand_vec_perm_1 (target, op0, op1, sel);
26613 }
26614 
26615 /* Generate or test for an insn that supports a constant permutation.  */
26616 
26617 /* Recognize patterns for the VUZP insns.  */
26618 
26619 static bool
26620 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
26621 {
26622   unsigned int i, odd, mask, nelt = d->nelt;
26623   rtx out0, out1, in0, in1, x;
26624   rtx (*gen)(rtx, rtx, rtx, rtx);
26625 
26626   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26627     return false;
26628 
26629   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
26630   if (d->perm[0] == 0)
26631     odd = 0;
26632   else if (d->perm[0] == 1)
26633     odd = 1;
26634   else
26635     return false;
26636   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26637 
26638   for (i = 0; i < nelt; i++)
26639     {
26640       unsigned elt = (i * 2 + odd) & mask;
26641       if (d->perm[i] != elt)
26642 	return false;
26643     }
26644 
26645   /* Success!  */
26646   if (d->testing_p)
26647     return true;
26648 
26649   switch (d->vmode)
26650     {
26651     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
26652     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
26653     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
26654     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
26655     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
26656     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
26657     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
26658     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
26659     default:
26660       gcc_unreachable ();
26661     }
26662 
26663   in0 = d->op0;
26664   in1 = d->op1;
26665   if (BYTES_BIG_ENDIAN)
26666     {
26667       x = in0, in0 = in1, in1 = x;
26668       odd = !odd;
26669     }
26670 
26671   out0 = d->target;
26672   out1 = gen_reg_rtx (d->vmode);
26673   if (odd)
26674     x = out0, out0 = out1, out1 = x;
26675 
26676   emit_insn (gen (out0, in0, in1, out1));
26677   return true;
26678 }
26679 
26680 /* Recognize patterns for the VZIP insns.  */
26681 
26682 static bool
26683 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
26684 {
26685   unsigned int i, high, mask, nelt = d->nelt;
26686   rtx out0, out1, in0, in1, x;
26687   rtx (*gen)(rtx, rtx, rtx, rtx);
26688 
26689   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26690     return false;
26691 
26692   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
26693   high = nelt / 2;
26694   if (d->perm[0] == high)
26695     ;
26696   else if (d->perm[0] == 0)
26697     high = 0;
26698   else
26699     return false;
26700   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26701 
26702   for (i = 0; i < nelt / 2; i++)
26703     {
26704       unsigned elt = (i + high) & mask;
26705       if (d->perm[i * 2] != elt)
26706 	return false;
26707       elt = (elt + nelt) & mask;
26708       if (d->perm[i * 2 + 1] != elt)
26709 	return false;
26710     }
26711 
26712   /* Success!  */
26713   if (d->testing_p)
26714     return true;
26715 
26716   switch (d->vmode)
26717     {
26718     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
26719     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
26720     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
26721     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
26722     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
26723     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
26724     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
26725     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
26726     default:
26727       gcc_unreachable ();
26728     }
26729 
26730   in0 = d->op0;
26731   in1 = d->op1;
26732   if (BYTES_BIG_ENDIAN)
26733     {
26734       x = in0, in0 = in1, in1 = x;
26735       high = !high;
26736     }
26737 
26738   out0 = d->target;
26739   out1 = gen_reg_rtx (d->vmode);
26740   if (high)
26741     x = out0, out0 = out1, out1 = x;
26742 
26743   emit_insn (gen (out0, in0, in1, out1));
26744   return true;
26745 }
26746 
26747 /* Recognize patterns for the VREV insns.  */
26748 
26749 static bool
26750 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
26751 {
26752   unsigned int i, j, diff, nelt = d->nelt;
26753   rtx (*gen)(rtx, rtx, rtx);
26754 
26755   if (!d->one_vector_p)
26756     return false;
26757 
26758   diff = d->perm[0];
26759   switch (diff)
26760     {
26761     case 7:
26762       switch (d->vmode)
26763 	{
26764 	case V16QImode: gen = gen_neon_vrev64v16qi; break;
26765 	case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
26766 	default:
26767 	  return false;
26768 	}
26769       break;
26770     case 3:
26771       switch (d->vmode)
26772 	{
26773 	case V16QImode: gen = gen_neon_vrev32v16qi; break;
26774 	case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
26775 	case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
26776 	case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
26777 	default:
26778 	  return false;
26779 	}
26780       break;
26781     case 1:
26782       switch (d->vmode)
26783 	{
26784 	case V16QImode: gen = gen_neon_vrev16v16qi; break;
26785 	case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
26786 	case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
26787 	case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
26788 	case V4SImode:  gen = gen_neon_vrev64v4si;  break;
26789 	case V2SImode:  gen = gen_neon_vrev64v2si;  break;
26790 	case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
26791 	case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
26792 	default:
26793 	  return false;
26794 	}
26795       break;
26796     default:
26797       return false;
26798     }
26799 
26800   for (i = 0; i < nelt ; i += diff + 1)
26801     for (j = 0; j <= diff; j += 1)
26802       {
26803 	/* This is guaranteed to be true as the value of diff
26804 	   is 7, 3, 1 and we should have enough elements in the
26805 	   queue to generate this. Getting a vector mask with a
26806 	   value of diff other than these values implies that
26807 	   something is wrong by the time we get here.  */
26808 	gcc_assert (i + j < nelt);
26809 	if (d->perm[i + j] != i + diff - j)
26810 	  return false;
26811       }
26812 
26813   /* Success! */
26814   if (d->testing_p)
26815     return true;
26816 
26817   /* ??? The third operand is an artifact of the builtin infrastructure
26818      and is ignored by the actual instruction.  */
26819   emit_insn (gen (d->target, d->op0, const0_rtx));
26820   return true;
26821 }
26822 
26823 /* Recognize patterns for the VTRN insns.  */
26824 
26825 static bool
26826 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
26827 {
26828   unsigned int i, odd, mask, nelt = d->nelt;
26829   rtx out0, out1, in0, in1, x;
26830   rtx (*gen)(rtx, rtx, rtx, rtx);
26831 
26832   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26833     return false;
26834 
26835   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
26836   if (d->perm[0] == 0)
26837     odd = 0;
26838   else if (d->perm[0] == 1)
26839     odd = 1;
26840   else
26841     return false;
26842   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26843 
26844   for (i = 0; i < nelt; i += 2)
26845     {
26846       if (d->perm[i] != i + odd)
26847 	return false;
26848       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
26849 	return false;
26850     }
26851 
26852   /* Success!  */
26853   if (d->testing_p)
26854     return true;
26855 
26856   switch (d->vmode)
26857     {
26858     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
26859     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
26860     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
26861     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
26862     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
26863     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
26864     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
26865     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
26866     default:
26867       gcc_unreachable ();
26868     }
26869 
26870   in0 = d->op0;
26871   in1 = d->op1;
26872   if (BYTES_BIG_ENDIAN)
26873     {
26874       x = in0, in0 = in1, in1 = x;
26875       odd = !odd;
26876     }
26877 
26878   out0 = d->target;
26879   out1 = gen_reg_rtx (d->vmode);
26880   if (odd)
26881     x = out0, out0 = out1, out1 = x;
26882 
26883   emit_insn (gen (out0, in0, in1, out1));
26884   return true;
26885 }
26886 
26887 /* Recognize patterns for the VEXT insns.  */
26888 
26889 static bool
26890 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
26891 {
26892   unsigned int i, nelt = d->nelt;
26893   rtx (*gen) (rtx, rtx, rtx, rtx);
26894   rtx offset;
26895 
26896   unsigned int location;
26897 
26898   unsigned int next  = d->perm[0] + 1;
26899 
26900   /* TODO: Handle GCC's numbering of elements for big-endian.  */
26901   if (BYTES_BIG_ENDIAN)
26902     return false;
26903 
26904   /* Check if the extracted indexes are increasing by one.  */
26905   for (i = 1; i < nelt; next++, i++)
26906     {
26907       /* If we hit the most significant element of the 2nd vector in
26908 	 the previous iteration, no need to test further.  */
26909       if (next == 2 * nelt)
26910 	return false;
26911 
26912       /* If we are operating on only one vector: it could be a
26913 	 rotation.  If there are only two elements of size < 64, let
26914 	 arm_evpc_neon_vrev catch it.  */
26915       if (d->one_vector_p && (next == nelt))
26916 	{
26917 	  if ((nelt == 2) && (d->vmode != V2DImode))
26918 	    return false;
26919 	  else
26920 	    next = 0;
26921 	}
26922 
26923       if (d->perm[i] != next)
26924 	return false;
26925     }
26926 
26927   location = d->perm[0];
26928 
26929   switch (d->vmode)
26930     {
26931     case V16QImode: gen = gen_neon_vextv16qi; break;
26932     case V8QImode: gen = gen_neon_vextv8qi; break;
26933     case V4HImode: gen = gen_neon_vextv4hi; break;
26934     case V8HImode: gen = gen_neon_vextv8hi; break;
26935     case V2SImode: gen = gen_neon_vextv2si; break;
26936     case V4SImode: gen = gen_neon_vextv4si; break;
26937     case V2SFmode: gen = gen_neon_vextv2sf; break;
26938     case V4SFmode: gen = gen_neon_vextv4sf; break;
26939     case V2DImode: gen = gen_neon_vextv2di; break;
26940     default:
26941       return false;
26942     }
26943 
26944   /* Success! */
26945   if (d->testing_p)
26946     return true;
26947 
26948   offset = GEN_INT (location);
26949   emit_insn (gen (d->target, d->op0, d->op1, offset));
26950   return true;
26951 }
26952 
26953 /* The NEON VTBL instruction is a fully variable permuation that's even
26954    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
26955    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
26956    can do slightly better by expanding this as a constant where we don't
26957    have to apply a mask.  */
26958 
26959 static bool
26960 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
26961 {
26962   rtx rperm[MAX_VECT_LEN], sel;
26963   enum machine_mode vmode = d->vmode;
26964   unsigned int i, nelt = d->nelt;
26965 
26966   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
26967      numbering of elements for big-endian, we must reverse the order.  */
26968   if (BYTES_BIG_ENDIAN)
26969     return false;
26970 
26971   if (d->testing_p)
26972     return true;
26973 
26974   /* Generic code will try constant permutation twice.  Once with the
26975      original mode and again with the elements lowered to QImode.
26976      So wait and don't do the selector expansion ourselves.  */
26977   if (vmode != V8QImode && vmode != V16QImode)
26978     return false;
26979 
26980   for (i = 0; i < nelt; ++i)
26981     rperm[i] = GEN_INT (d->perm[i]);
26982   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
26983   sel = force_reg (vmode, sel);
26984 
26985   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
26986   return true;
26987 }
26988 
26989 static bool
26990 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
26991 {
26992   /* Check if the input mask matches vext before reordering the
26993      operands.  */
26994   if (TARGET_NEON)
26995     if (arm_evpc_neon_vext (d))
26996       return true;
26997 
26998   /* The pattern matching functions above are written to look for a small
26999      number to begin the sequence (0, 1, N/2).  If we begin with an index
27000      from the second operand, we can swap the operands.  */
27001   if (d->perm[0] >= d->nelt)
27002     {
27003       unsigned i, nelt = d->nelt;
27004       rtx x;
27005 
27006       for (i = 0; i < nelt; ++i)
27007 	d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
27008 
27009       x = d->op0;
27010       d->op0 = d->op1;
27011       d->op1 = x;
27012     }
27013 
27014   if (TARGET_NEON)
27015     {
27016       if (arm_evpc_neon_vuzp (d))
27017 	return true;
27018       if (arm_evpc_neon_vzip (d))
27019 	return true;
27020       if (arm_evpc_neon_vrev (d))
27021 	return true;
27022       if (arm_evpc_neon_vtrn (d))
27023 	return true;
27024       return arm_evpc_neon_vtbl (d);
27025     }
27026   return false;
27027 }
27028 
27029 /* Expand a vec_perm_const pattern.  */
27030 
27031 bool
27032 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
27033 {
27034   struct expand_vec_perm_d d;
27035   int i, nelt, which;
27036 
27037   d.target = target;
27038   d.op0 = op0;
27039   d.op1 = op1;
27040 
27041   d.vmode = GET_MODE (target);
27042   gcc_assert (VECTOR_MODE_P (d.vmode));
27043   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27044   d.testing_p = false;
27045 
27046   for (i = which = 0; i < nelt; ++i)
27047     {
27048       rtx e = XVECEXP (sel, 0, i);
27049       int ei = INTVAL (e) & (2 * nelt - 1);
27050       which |= (ei < nelt ? 1 : 2);
27051       d.perm[i] = ei;
27052     }
27053 
27054   switch (which)
27055     {
27056     default:
27057       gcc_unreachable();
27058 
27059     case 3:
27060       d.one_vector_p = false;
27061       if (!rtx_equal_p (op0, op1))
27062 	break;
27063 
27064       /* The elements of PERM do not suggest that only the first operand
27065 	 is used, but both operands are identical.  Allow easier matching
27066 	 of the permutation by folding the permutation into the single
27067 	 input vector.  */
27068       /* FALLTHRU */
27069     case 2:
27070       for (i = 0; i < nelt; ++i)
27071         d.perm[i] &= nelt - 1;
27072       d.op0 = op1;
27073       d.one_vector_p = true;
27074       break;
27075 
27076     case 1:
27077       d.op1 = op0;
27078       d.one_vector_p = true;
27079       break;
27080     }
27081 
27082   return arm_expand_vec_perm_const_1 (&d);
27083 }
27084 
27085 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
27086 
27087 static bool
27088 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
27089 				 const unsigned char *sel)
27090 {
27091   struct expand_vec_perm_d d;
27092   unsigned int i, nelt, which;
27093   bool ret;
27094 
27095   d.vmode = vmode;
27096   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
27097   d.testing_p = true;
27098   memcpy (d.perm, sel, nelt);
27099 
27100   /* Categorize the set of elements in the selector.  */
27101   for (i = which = 0; i < nelt; ++i)
27102     {
27103       unsigned char e = d.perm[i];
27104       gcc_assert (e < 2 * nelt);
27105       which |= (e < nelt ? 1 : 2);
27106     }
27107 
27108   /* For all elements from second vector, fold the elements to first.  */
27109   if (which == 2)
27110     for (i = 0; i < nelt; ++i)
27111       d.perm[i] -= nelt;
27112 
27113   /* Check whether the mask can be applied to the vector type.  */
27114   d.one_vector_p = (which != 3);
27115 
27116   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
27117   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
27118   if (!d.one_vector_p)
27119     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
27120 
27121   start_sequence ();
27122   ret = arm_expand_vec_perm_const_1 (&d);
27123   end_sequence ();
27124 
27125   return ret;
27126 }
27127 
27128 bool
27129 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
27130 {
27131   /* If we are soft float and we do not have ldrd
27132      then all auto increment forms are ok.  */
27133   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
27134     return true;
27135 
27136   switch (code)
27137     {
27138       /* Post increment and Pre Decrement are supported for all
27139 	 instruction forms except for vector forms.  */
27140     case ARM_POST_INC:
27141     case ARM_PRE_DEC:
27142       if (VECTOR_MODE_P (mode))
27143 	{
27144 	  if (code != ARM_PRE_DEC)
27145 	    return true;
27146 	  else
27147 	    return false;
27148 	}
27149 
27150       return true;
27151 
27152     case ARM_POST_DEC:
27153     case ARM_PRE_INC:
27154       /* Without LDRD and mode size greater than
27155 	 word size, there is no point in auto-incrementing
27156          because ldm and stm will not have these forms.  */
27157       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
27158 	return false;
27159 
27160       /* Vector and floating point modes do not support
27161 	 these auto increment forms.  */
27162       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
27163 	return false;
27164 
27165       return true;
27166 
27167     default:
27168       return false;
27169 
27170     }
27171 
27172   return false;
27173 }
27174 
27175 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
27176    on ARM, since we know that shifts by negative amounts are no-ops.
27177    Additionally, the default expansion code is not available or suitable
27178    for post-reload insn splits (this can occur when the register allocator
27179    chooses not to do a shift in NEON).
27180 
27181    This function is used in both initial expand and post-reload splits, and
27182    handles all kinds of 64-bit shifts.
27183 
27184    Input requirements:
27185     - It is safe for the input and output to be the same register, but
27186       early-clobber rules apply for the shift amount and scratch registers.
27187     - Shift by register requires both scratch registers.  In all other cases
27188       the scratch registers may be NULL.
27189     - Ashiftrt by a register also clobbers the CC register.  */
27190 void
27191 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
27192 			       rtx amount, rtx scratch1, rtx scratch2)
27193 {
27194   rtx out_high = gen_highpart (SImode, out);
27195   rtx out_low = gen_lowpart (SImode, out);
27196   rtx in_high = gen_highpart (SImode, in);
27197   rtx in_low = gen_lowpart (SImode, in);
27198 
27199   /* Terminology:
27200 	in = the register pair containing the input value.
27201 	out = the destination register pair.
27202 	up = the high- or low-part of each pair.
27203 	down = the opposite part to "up".
27204      In a shift, we can consider bits to shift from "up"-stream to
27205      "down"-stream, so in a left-shift "up" is the low-part and "down"
27206      is the high-part of each register pair.  */
27207 
27208   rtx out_up   = code == ASHIFT ? out_low : out_high;
27209   rtx out_down = code == ASHIFT ? out_high : out_low;
27210   rtx in_up   = code == ASHIFT ? in_low : in_high;
27211   rtx in_down = code == ASHIFT ? in_high : in_low;
27212 
27213   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
27214   gcc_assert (out
27215 	      && (REG_P (out) || GET_CODE (out) == SUBREG)
27216 	      && GET_MODE (out) == DImode);
27217   gcc_assert (in
27218 	      && (REG_P (in) || GET_CODE (in) == SUBREG)
27219 	      && GET_MODE (in) == DImode);
27220   gcc_assert (amount
27221 	      && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
27222 		   && GET_MODE (amount) == SImode)
27223 		  || CONST_INT_P (amount)));
27224   gcc_assert (scratch1 == NULL
27225 	      || (GET_CODE (scratch1) == SCRATCH)
27226 	      || (GET_MODE (scratch1) == SImode
27227 		  && REG_P (scratch1)));
27228   gcc_assert (scratch2 == NULL
27229 	      || (GET_CODE (scratch2) == SCRATCH)
27230 	      || (GET_MODE (scratch2) == SImode
27231 		  && REG_P (scratch2)));
27232   gcc_assert (!REG_P (out) || !REG_P (amount)
27233 	      || !HARD_REGISTER_P (out)
27234 	      || (REGNO (out) != REGNO (amount)
27235 		  && REGNO (out) + 1 != REGNO (amount)));
27236 
27237   /* Macros to make following code more readable.  */
27238   #define SUB_32(DEST,SRC) \
27239 	    gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
27240   #define RSB_32(DEST,SRC) \
27241 	    gen_subsi3 ((DEST), GEN_INT (32), (SRC))
27242   #define SUB_S_32(DEST,SRC) \
27243 	    gen_addsi3_compare0 ((DEST), (SRC), \
27244 				 GEN_INT (-32))
27245   #define SET(DEST,SRC) \
27246 	    gen_rtx_SET (SImode, (DEST), (SRC))
27247   #define SHIFT(CODE,SRC,AMOUNT) \
27248 	    gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
27249   #define LSHIFT(CODE,SRC,AMOUNT) \
27250 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
27251 			    SImode, (SRC), (AMOUNT))
27252   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
27253 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
27254 			    SImode, (SRC), (AMOUNT))
27255   #define ORR(A,B) \
27256 	    gen_rtx_IOR (SImode, (A), (B))
27257   #define BRANCH(COND,LABEL) \
27258 	    gen_arm_cond_branch ((LABEL), \
27259 				 gen_rtx_ ## COND (CCmode, cc_reg, \
27260 						   const0_rtx), \
27261 				 cc_reg)
27262 
27263   /* Shifts by register and shifts by constant are handled separately.  */
27264   if (CONST_INT_P (amount))
27265     {
27266       /* We have a shift-by-constant.  */
27267 
27268       /* First, handle out-of-range shift amounts.
27269 	 In both cases we try to match the result an ARM instruction in a
27270 	 shift-by-register would give.  This helps reduce execution
27271 	 differences between optimization levels, but it won't stop other
27272          parts of the compiler doing different things.  This is "undefined
27273          behaviour, in any case.  */
27274       if (INTVAL (amount) <= 0)
27275 	emit_insn (gen_movdi (out, in));
27276       else if (INTVAL (amount) >= 64)
27277 	{
27278 	  if (code == ASHIFTRT)
27279 	    {
27280 	      rtx const31_rtx = GEN_INT (31);
27281 	      emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
27282 	      emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
27283 	    }
27284 	  else
27285 	    emit_insn (gen_movdi (out, const0_rtx));
27286 	}
27287 
27288       /* Now handle valid shifts. */
27289       else if (INTVAL (amount) < 32)
27290 	{
27291 	  /* Shifts by a constant less than 32.  */
27292 	  rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
27293 
27294 	  emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
27295 	  emit_insn (SET (out_down,
27296 			  ORR (REV_LSHIFT (code, in_up, reverse_amount),
27297 			       out_down)));
27298 	  emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
27299 	}
27300       else
27301 	{
27302 	  /* Shifts by a constant greater than 31.  */
27303 	  rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
27304 
27305 	  emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
27306 	  if (code == ASHIFTRT)
27307 	    emit_insn (gen_ashrsi3 (out_up, in_up,
27308 				    GEN_INT (31)));
27309 	  else
27310 	    emit_insn (SET (out_up, const0_rtx));
27311 	}
27312     }
27313   else
27314     {
27315       /* We have a shift-by-register.  */
27316       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
27317 
27318       /* This alternative requires the scratch registers.  */
27319       gcc_assert (scratch1 && REG_P (scratch1));
27320       gcc_assert (scratch2 && REG_P (scratch2));
27321 
27322       /* We will need the values "amount-32" and "32-amount" later.
27323          Swapping them around now allows the later code to be more general. */
27324       switch (code)
27325 	{
27326 	case ASHIFT:
27327 	  emit_insn (SUB_32 (scratch1, amount));
27328 	  emit_insn (RSB_32 (scratch2, amount));
27329 	  break;
27330 	case ASHIFTRT:
27331 	  emit_insn (RSB_32 (scratch1, amount));
27332 	  /* Also set CC = amount > 32.  */
27333 	  emit_insn (SUB_S_32 (scratch2, amount));
27334 	  break;
27335 	case LSHIFTRT:
27336 	  emit_insn (RSB_32 (scratch1, amount));
27337 	  emit_insn (SUB_32 (scratch2, amount));
27338 	  break;
27339 	default:
27340 	  gcc_unreachable ();
27341 	}
27342 
27343       /* Emit code like this:
27344 
27345 	 arithmetic-left:
27346 	    out_down = in_down << amount;
27347 	    out_down = (in_up << (amount - 32)) | out_down;
27348 	    out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
27349 	    out_up = in_up << amount;
27350 
27351 	 arithmetic-right:
27352 	    out_down = in_down >> amount;
27353 	    out_down = (in_up << (32 - amount)) | out_down;
27354 	    if (amount < 32)
27355 	      out_down = ((signed)in_up >> (amount - 32)) | out_down;
27356 	    out_up = in_up << amount;
27357 
27358 	 logical-right:
27359 	    out_down = in_down >> amount;
27360 	    out_down = (in_up << (32 - amount)) | out_down;
27361 	    if (amount < 32)
27362 	      out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
27363 	    out_up = in_up << amount;
27364 
27365 	  The ARM and Thumb2 variants are the same but implemented slightly
27366 	  differently.  If this were only called during expand we could just
27367 	  use the Thumb2 case and let combine do the right thing, but this
27368 	  can also be called from post-reload splitters.  */
27369 
27370       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
27371 
27372       if (!TARGET_THUMB2)
27373 	{
27374 	  /* Emit code for ARM mode.  */
27375 	  emit_insn (SET (out_down,
27376 			  ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
27377 	  if (code == ASHIFTRT)
27378 	    {
27379 	      rtx done_label = gen_label_rtx ();
27380 	      emit_jump_insn (BRANCH (LT, done_label));
27381 	      emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
27382 					     out_down)));
27383 	      emit_label (done_label);
27384 	    }
27385 	  else
27386 	    emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
27387 					   out_down)));
27388 	}
27389       else
27390 	{
27391 	  /* Emit code for Thumb2 mode.
27392 	     Thumb2 can't do shift and or in one insn.  */
27393 	  emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
27394 	  emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
27395 
27396 	  if (code == ASHIFTRT)
27397 	    {
27398 	      rtx done_label = gen_label_rtx ();
27399 	      emit_jump_insn (BRANCH (LT, done_label));
27400 	      emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
27401 	      emit_insn (SET (out_down, ORR (out_down, scratch2)));
27402 	      emit_label (done_label);
27403 	    }
27404 	  else
27405 	    {
27406 	      emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
27407 	      emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
27408 	    }
27409 	}
27410 
27411       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
27412     }
27413 
27414   #undef SUB_32
27415   #undef RSB_32
27416   #undef SUB_S_32
27417   #undef SET
27418   #undef SHIFT
27419   #undef LSHIFT
27420   #undef REV_LSHIFT
27421   #undef ORR
27422   #undef BRANCH
27423 }
27424 
27425 
27426 /* Returns true if a valid comparison operation and makes
27427    the operands in a form that is valid.  */
27428 bool
27429 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
27430 {
27431   enum rtx_code code = GET_CODE (*comparison);
27432   int code_int;
27433   enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
27434     ? GET_MODE (*op2) : GET_MODE (*op1);
27435 
27436   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
27437 
27438   if (code == UNEQ || code == LTGT)
27439     return false;
27440 
27441   code_int = (int)code;
27442   arm_canonicalize_comparison (&code_int, op1, op2, 0);
27443   PUT_CODE (*comparison, (enum rtx_code)code_int);
27444 
27445   switch (mode)
27446     {
27447     case SImode:
27448       if (!arm_add_operand (*op1, mode))
27449 	*op1 = force_reg (mode, *op1);
27450       if (!arm_add_operand (*op2, mode))
27451 	*op2 = force_reg (mode, *op2);
27452       return true;
27453 
27454     case DImode:
27455       if (!cmpdi_operand (*op1, mode))
27456 	*op1 = force_reg (mode, *op1);
27457       if (!cmpdi_operand (*op2, mode))
27458 	*op2 = force_reg (mode, *op2);
27459       return true;
27460 
27461     case SFmode:
27462     case DFmode:
27463       if (!arm_float_compare_operand (*op1, mode))
27464 	*op1 = force_reg (mode, *op1);
27465       if (!arm_float_compare_operand (*op2, mode))
27466 	*op2 = force_reg (mode, *op2);
27467       return true;
27468     default:
27469       break;
27470     }
27471 
27472   return false;
27473 
27474 }
27475 
27476 /* return TRUE if x is a reference to a value in a constant pool */
27477 extern bool
27478 arm_is_constant_pool_ref (rtx x)
27479 {
27480   return (MEM_P (x)
27481 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
27482 	  && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
27483 }
27484 
27485 #include "gt-arm.h"
27486