xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/arm/arm.c (revision 8ecbf5f02b752fcb7debe1a8fab1dc82602bc760)
1 /* Output routines for GCC for ARM.
2    Copyright (C) 1991-2018 Free Software Foundation, Inc.
3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4    and Martin Simmons (@harleqn.co.uk).
5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
6 
7    This file is part of GCC.
8 
9    GCC is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published
11    by the Free Software Foundation; either version 3, or (at your
12    option) any later version.
13 
14    GCC is distributed in the hope that it will be useful, but WITHOUT
15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
17    License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with GCC; see the file COPYING3.  If not see
21    <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73 
74 /* This file should be included last.  */
75 #include "target-def.h"
76 
77 /* Forward definitions of types.  */
78 typedef struct minipool_node    Mnode;
79 typedef struct minipool_fixup   Mfix;
80 
81 /* The last .arch and .fpu assembly strings that we printed.  */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
84 
85 void (*arm_lang_output_object_attributes_hook)(void);
86 
87 struct four_ints
88 {
89   int i[4];
90 };
91 
92 /* Forward function declarations.  */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 			     unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 					   int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 			       machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 				       unsigned HOST_WIDE_INT val,
162 				       struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 					 unsigned HOST_WIDE_INT val,
165 					 struct four_ints *return_sequence,
166 					 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 						    machine_mode, int *,
171 						    const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 				 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
191 				  tree, bool);
192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
193 			     const_tree, bool);
194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
195 				      const_tree, bool);
196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 				      const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
202 
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
210 
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
214 
215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
216 					tree, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 				   machine_mode, const_tree, bool);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 #endif
230 static void arm_asm_init_sections (void);
231 static rtx arm_dwarf_register_span (rtx);
232 
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
248 static void arm_option_restore (struct gcc_options *,
249 				struct cl_target_option *);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option *);
252 static void arm_set_current_function (tree);
253 static bool arm_can_inline_p (tree, tree);
254 static void arm_relayout_function (tree);
255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
257 static bool arm_sched_can_speculate_insn (rtx_insn *);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 					unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 						     const_tree type,
286 						     int misalignment,
287 						     bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static void arm_autovectorize_vector_sizes (vector_sizes *);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
296 
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 					  const vec_perm_indices &);
299 
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301 
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 					   tree vectype,
304 					   int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 				   enum vect_cost_for_stmt kind,
307 				   struct _stmt_vec_info *stmt_info,
308 				   int misalign,
309 				   enum vect_cost_model_location where);
310 
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 					 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314 
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 				     const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 						int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 
329 /* Table of machine attributes.  */
330 static const struct attribute_spec arm_attribute_table[] =
331 {
332   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333        affects_type_identity, handler, exclude } */
334   /* Function calls made to this symbol must be done indirectly, because
335      it may lie outside of the 26 bit addressing range of a normal function
336      call.  */
337   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
338   /* Whereas these functions are always known to reside within the 26 bit
339      addressing range.  */
340   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
341   /* Specify the procedure call conventions for a function.  */
342   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
343     NULL },
344   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
345   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
346     NULL },
347   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
348     NULL },
349   { "naked",        0, 0, true,  false, false, false,
350     arm_handle_fndecl_attribute, NULL },
351 #ifdef ARM_PE
352   /* ARM/PE has three new attributes:
353      interfacearm - ?
354      dllexport - for exporting a function/variable that will live in a dll
355      dllimport - for importing a function/variable from a dll
356 
357      Microsoft allows multiple declspecs in one __declspec, separating
358      them with spaces.  We do NOT support this.  Instead, use __declspec
359      multiple times.
360   */
361   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
362   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
363   { "interfacearm", 0, 0, true,  false, false, false,
364     arm_handle_fndecl_attribute, NULL },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
367     NULL },
368   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
369     NULL },
370   { "notshared",    0, 0, false, true, false, false,
371     arm_handle_notshared_attribute, NULL },
372 #endif
373   /* ARMv8-M Security Extensions support.  */
374   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375     arm_handle_cmse_nonsecure_entry, NULL },
376   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377     arm_handle_cmse_nonsecure_call, NULL },
378   { NULL, 0, 0, false, false, false, false, NULL, NULL }
379 };
380 
381 /* Initialize the GCC target structure.  */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef  TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
385 #endif
386 
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
389 
390 #undef  TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
392 
393 #undef  TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
395 
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
400 
401 #undef  TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef  TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
405 
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
412 
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
415 
416 #undef  TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
418 
419 #undef  TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
421 
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
424 
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
427 
428 #undef  TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
430 
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
433 
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
436 
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
439 
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
442 
443 #undef  TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
445 
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
448 
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
451 
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
454 
455 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
457 
458 #undef  TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
460 
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
463 
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
466 
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
469 
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
472 
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
475 
476 #undef TARGET_ENCODE_SECTION_INFO
477 #ifdef ARM_PE
478 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
479 #else
480 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
481 #endif
482 
483 #undef  TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
485 
486 #undef  TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
488 
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
491 
492 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
494 
495 #undef  TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
497 
498 #undef  TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
500 
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
503 
504 #undef  TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508 
509 #undef  TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef  TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
513 
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524   arm_autovectorize_vector_sizes
525 
526 #undef  TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
528 
529 #undef  TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS  arm_init_builtins
531 #undef  TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef  TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
535 
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
538 
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
555 
556 #undef  TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
558 
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
561 
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
568 
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
571 
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
574 
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
577 
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
580 
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
583 
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
586 
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
589 
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
592 
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
595 
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
598 
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
601 
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604   arm_cxx_determine_class_data_visibility
605 
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
608 
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
611 
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
614 
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
617 
618 #if ARM_UNWIND_INFO
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
621 
622 /* EABI unwinding tables use a different format for the typeinfo tables.  */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
625 
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
628 
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
631 
632 #endif /* ARM_UNWIND_INFO */
633 
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
636 
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
639 
640 #undef  TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
642 
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
646 #endif
647 
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
650 
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
653 
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
656 
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
659 
660 /* The minimum is set such that the total size of the block
661    for a particular anchor is -4088 + 1 + 4095 bytes, which is
662    divisible by eight, ensuring natural spacing of anchors.  */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
665 
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
668 
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671   arm_first_cycle_multipass_dfa_lookahead
672 
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675   arm_first_cycle_multipass_dfa_lookahead_guard
676 
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
679 
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
682 
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
689 
690 #ifdef HAVE_AS_TLS
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
693 #endif
694 
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p
697 
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
700 
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
703 
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
706 
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
709 
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
712 
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
715 
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
718 
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
721 
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
724 
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727   arm_builtin_vectorized_function
728 
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
731 
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734   arm_vector_alignment_reachable
735 
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738   arm_builtin_support_vector_misalignment
739 
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742   arm_preferred_rename_class
743 
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
746 
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749   arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
752 
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755   arm_canonicalize_comparison
756 
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
759 
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
762 
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
765 
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
768 
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
771 
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
774 
775 #undef  TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
777 
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
780 
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
783 
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
786 
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
789 
790 /* Although the architecture reserves bits 0 and 1, only the former is
791    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
794 
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
797 
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
802 
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
805 
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
808 
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
811 
812 /* Obstack for minipool constant handling.  */
813 static struct obstack minipool_obstack;
814 static char *         minipool_startobj;
815 
816 /* The maximum number of insns skipped which
817    will be conditionalised if possible.  */
818 static int max_insns_skipped = 5;
819 
820 extern FILE * asm_out_file;
821 
822 /* True if we are currently building a constant table.  */
823 int making_const_table;
824 
825 /* The processor for which instructions should be scheduled.  */
826 enum processor_type arm_tune = TARGET_CPU_arm_none;
827 
828 /* The current tuning set.  */
829 const struct tune_params *current_tune;
830 
831 /* Which floating point hardware to schedule for.  */
832 int arm_fpu_attr;
833 
834 /* Used for Thumb call_via trampolines.  */
835 rtx thumb_call_via_label[14];
836 static int thumb_call_reg_needed;
837 
838 /* The bits in this mask specify which instruction scheduling options should
839    be used.  */
840 unsigned int tune_flags = 0;
841 
842 /* The highest ARM architecture version supported by the
843    target.  */
844 enum base_architecture arm_base_arch = BASE_ARCH_0;
845 
846 /* Active target architecture and tuning.  */
847 
848 struct arm_build_target arm_active_target;
849 
850 /* The following are used in the arm.md file as equivalents to bits
851    in the above two flag variables.  */
852 
853 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
854 int arm_arch3m = 0;
855 
856 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
857 int arm_arch4 = 0;
858 
859 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
860 int arm_arch4t = 0;
861 
862 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
863 int arm_arch5 = 0;
864 
865 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
866 int arm_arch5e = 0;
867 
868 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
869 int arm_arch5te = 0;
870 
871 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
872 int arm_arch6 = 0;
873 
874 /* Nonzero if this chip supports the ARM 6K extensions.  */
875 int arm_arch6k = 0;
876 
877 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
878 int arm_arch6kz = 0;
879 
880 /* Nonzero if instructions present in ARMv6-M can be used.  */
881 int arm_arch6m = 0;
882 
883 /* Nonzero if this chip supports the ARM 7 extensions.  */
884 int arm_arch7 = 0;
885 
886 /* Nonzero if this chip supports the Large Physical Address Extension.  */
887 int arm_arch_lpae = 0;
888 
889 /* Nonzero if instructions not present in the 'M' profile can be used.  */
890 int arm_arch_notm = 0;
891 
892 /* Nonzero if instructions present in ARMv7E-M can be used.  */
893 int arm_arch7em = 0;
894 
895 /* Nonzero if instructions present in ARMv8 can be used.  */
896 int arm_arch8 = 0;
897 
898 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
899 int arm_arch8_1 = 0;
900 
901 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
902 int arm_arch8_2 = 0;
903 
904 /* Nonzero if this chip supports the FP16 instructions extension of ARM
905    Architecture 8.2.  */
906 int arm_fp16_inst = 0;
907 
908 /* Nonzero if this chip can benefit from load scheduling.  */
909 int arm_ld_sched = 0;
910 
911 /* Nonzero if this chip is a StrongARM.  */
912 int arm_tune_strongarm = 0;
913 
914 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
915 int arm_arch_iwmmxt = 0;
916 
917 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
918 int arm_arch_iwmmxt2 = 0;
919 
920 /* Nonzero if this chip is an XScale.  */
921 int arm_arch_xscale = 0;
922 
923 /* Nonzero if tuning for XScale  */
924 int arm_tune_xscale = 0;
925 
926 /* Nonzero if we want to tune for stores that access the write-buffer.
927    This typically means an ARM6 or ARM7 with MMU or MPU.  */
928 int arm_tune_wbuf = 0;
929 
930 /* Nonzero if tuning for Cortex-A9.  */
931 int arm_tune_cortex_a9 = 0;
932 
933 /* Nonzero if we should define __THUMB_INTERWORK__ in the
934    preprocessor.
935    XXX This is a bit of a hack, it's intended to help work around
936    problems in GLD which doesn't understand that armv5t code is
937    interworking clean.  */
938 int arm_cpp_interwork = 0;
939 
940 /* Nonzero if chip supports Thumb 1.  */
941 int arm_arch_thumb1;
942 
943 /* Nonzero if chip supports Thumb 2.  */
944 int arm_arch_thumb2;
945 
946 /* Nonzero if chip supports integer division instruction.  */
947 int arm_arch_arm_hwdiv;
948 int arm_arch_thumb_hwdiv;
949 
950 /* Nonzero if chip disallows volatile memory access in IT block.  */
951 int arm_arch_no_volatile_ce;
952 
953 /* Nonzero if we should use Neon to handle 64-bits operations rather
954    than core registers.  */
955 int prefer_neon_for_64bits = 0;
956 
957 /* Nonzero if we shouldn't use literal pools.  */
958 bool arm_disable_literal_pool = false;
959 
960 /* The register number to be used for the PIC offset register.  */
961 unsigned arm_pic_register = INVALID_REGNUM;
962 
963 enum arm_pcs arm_pcs_default;
964 
965 /* For an explanation of these variables, see final_prescan_insn below.  */
966 int arm_ccfsm_state;
967 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
968 enum arm_cond_code arm_current_cc;
969 
970 rtx arm_target_insn;
971 int arm_target_label;
972 /* The number of conditionally executed insns, including the current insn.  */
973 int arm_condexec_count = 0;
974 /* A bitmask specifying the patterns for the IT block.
975    Zero means do not output an IT block before this insn. */
976 int arm_condexec_mask = 0;
977 /* The number of bits used in arm_condexec_mask.  */
978 int arm_condexec_masklen = 0;
979 
980 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
981 int arm_arch_crc = 0;
982 
983 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
984 int arm_arch_dotprod = 0;
985 
986 /* Nonzero if chip supports the ARMv8-M security extensions.  */
987 int arm_arch_cmse = 0;
988 
989 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
990 int arm_m_profile_small_mul = 0;
991 
992 /* The condition codes of the ARM, and the inverse function.  */
993 static const char * const arm_condition_codes[] =
994 {
995   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
996   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
997 };
998 
999 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1000 int arm_regs_in_sequence[] =
1001 {
1002   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1003 };
1004 
1005 #define ARM_LSL_NAME "lsl"
1006 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1007 
1008 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1009 				   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1010 				   | (1 << PIC_OFFSET_TABLE_REGNUM)))
1011 
1012 /* Initialization code.  */
1013 
1014 struct cpu_tune
1015 {
1016   enum processor_type scheduler;
1017   unsigned int tune_flags;
1018   const struct tune_params *tune;
1019 };
1020 
1021 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1022 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1023   {								\
1024     num_slots,							\
1025     l1_size,							\
1026     l1_line_size						\
1027   }
1028 
1029 /* arm generic vectorizer costs.  */
1030 static const
1031 struct cpu_vec_costs arm_default_vec_cost = {
1032   1,					/* scalar_stmt_cost.  */
1033   1,					/* scalar load_cost.  */
1034   1,					/* scalar_store_cost.  */
1035   1,					/* vec_stmt_cost.  */
1036   1,					/* vec_to_scalar_cost.  */
1037   1,					/* scalar_to_vec_cost.  */
1038   1,					/* vec_align_load_cost.  */
1039   1,					/* vec_unalign_load_cost.  */
1040   1,					/* vec_unalign_store_cost.  */
1041   1,					/* vec_store_cost.  */
1042   3,					/* cond_taken_branch_cost.  */
1043   1,					/* cond_not_taken_branch_cost.  */
1044 };
1045 
1046 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1047 #include "aarch-cost-tables.h"
1048 
1049 
1050 
1051 const struct cpu_cost_table cortexa9_extra_costs =
1052 {
1053   /* ALU */
1054   {
1055     0,			/* arith.  */
1056     0,			/* logical.  */
1057     0,			/* shift.  */
1058     COSTS_N_INSNS (1),	/* shift_reg.  */
1059     COSTS_N_INSNS (1),	/* arith_shift.  */
1060     COSTS_N_INSNS (2),	/* arith_shift_reg.  */
1061     0,			/* log_shift.  */
1062     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1063     COSTS_N_INSNS (1),	/* extend.  */
1064     COSTS_N_INSNS (2),	/* extend_arith.  */
1065     COSTS_N_INSNS (1),	/* bfi.  */
1066     COSTS_N_INSNS (1),	/* bfx.  */
1067     0,			/* clz.  */
1068     0,			/* rev.  */
1069     0,			/* non_exec.  */
1070     true		/* non_exec_costs_exec.  */
1071   },
1072   {
1073     /* MULT SImode */
1074     {
1075       COSTS_N_INSNS (3),	/* simple.  */
1076       COSTS_N_INSNS (3),	/* flag_setting.  */
1077       COSTS_N_INSNS (2),	/* extend.  */
1078       COSTS_N_INSNS (3),	/* add.  */
1079       COSTS_N_INSNS (2),	/* extend_add.  */
1080       COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A9.  */
1081     },
1082     /* MULT DImode */
1083     {
1084       0,			/* simple (N/A).  */
1085       0,			/* flag_setting (N/A).  */
1086       COSTS_N_INSNS (4),	/* extend.  */
1087       0,			/* add (N/A).  */
1088       COSTS_N_INSNS (4),	/* extend_add.  */
1089       0				/* idiv (N/A).  */
1090     }
1091   },
1092   /* LD/ST */
1093   {
1094     COSTS_N_INSNS (2),	/* load.  */
1095     COSTS_N_INSNS (2),	/* load_sign_extend.  */
1096     COSTS_N_INSNS (2),	/* ldrd.  */
1097     COSTS_N_INSNS (2),	/* ldm_1st.  */
1098     1,			/* ldm_regs_per_insn_1st.  */
1099     2,			/* ldm_regs_per_insn_subsequent.  */
1100     COSTS_N_INSNS (5),	/* loadf.  */
1101     COSTS_N_INSNS (5),	/* loadd.  */
1102     COSTS_N_INSNS (1),  /* load_unaligned.  */
1103     COSTS_N_INSNS (2),	/* store.  */
1104     COSTS_N_INSNS (2),	/* strd.  */
1105     COSTS_N_INSNS (2),	/* stm_1st.  */
1106     1,			/* stm_regs_per_insn_1st.  */
1107     2,			/* stm_regs_per_insn_subsequent.  */
1108     COSTS_N_INSNS (1),	/* storef.  */
1109     COSTS_N_INSNS (1),	/* stored.  */
1110     COSTS_N_INSNS (1),	/* store_unaligned.  */
1111     COSTS_N_INSNS (1),	/* loadv.  */
1112     COSTS_N_INSNS (1)	/* storev.  */
1113   },
1114   {
1115     /* FP SFmode */
1116     {
1117       COSTS_N_INSNS (14),	/* div.  */
1118       COSTS_N_INSNS (4),	/* mult.  */
1119       COSTS_N_INSNS (7),	/* mult_addsub. */
1120       COSTS_N_INSNS (30),	/* fma.  */
1121       COSTS_N_INSNS (3),	/* addsub.  */
1122       COSTS_N_INSNS (1),	/* fpconst.  */
1123       COSTS_N_INSNS (1),	/* neg.  */
1124       COSTS_N_INSNS (3),	/* compare.  */
1125       COSTS_N_INSNS (3),	/* widen.  */
1126       COSTS_N_INSNS (3),	/* narrow.  */
1127       COSTS_N_INSNS (3),	/* toint.  */
1128       COSTS_N_INSNS (3),	/* fromint.  */
1129       COSTS_N_INSNS (3)		/* roundint.  */
1130     },
1131     /* FP DFmode */
1132     {
1133       COSTS_N_INSNS (24),	/* div.  */
1134       COSTS_N_INSNS (5),	/* mult.  */
1135       COSTS_N_INSNS (8),	/* mult_addsub.  */
1136       COSTS_N_INSNS (30),	/* fma.  */
1137       COSTS_N_INSNS (3),	/* addsub.  */
1138       COSTS_N_INSNS (1),	/* fpconst.  */
1139       COSTS_N_INSNS (1),	/* neg.  */
1140       COSTS_N_INSNS (3),	/* compare.  */
1141       COSTS_N_INSNS (3),	/* widen.  */
1142       COSTS_N_INSNS (3),	/* narrow.  */
1143       COSTS_N_INSNS (3),	/* toint.  */
1144       COSTS_N_INSNS (3),	/* fromint.  */
1145       COSTS_N_INSNS (3)		/* roundint.  */
1146     }
1147   },
1148   /* Vector */
1149   {
1150     COSTS_N_INSNS (1)	/* alu.  */
1151   }
1152 };
1153 
1154 const struct cpu_cost_table cortexa8_extra_costs =
1155 {
1156   /* ALU */
1157   {
1158     0,			/* arith.  */
1159     0,			/* logical.  */
1160     COSTS_N_INSNS (1),	/* shift.  */
1161     0,			/* shift_reg.  */
1162     COSTS_N_INSNS (1),	/* arith_shift.  */
1163     0,			/* arith_shift_reg.  */
1164     COSTS_N_INSNS (1),	/* log_shift.  */
1165     0,			/* log_shift_reg.  */
1166     0,			/* extend.  */
1167     0,			/* extend_arith.  */
1168     0,			/* bfi.  */
1169     0,			/* bfx.  */
1170     0,			/* clz.  */
1171     0,			/* rev.  */
1172     0,			/* non_exec.  */
1173     true		/* non_exec_costs_exec.  */
1174   },
1175   {
1176     /* MULT SImode */
1177     {
1178       COSTS_N_INSNS (1),	/* simple.  */
1179       COSTS_N_INSNS (1),	/* flag_setting.  */
1180       COSTS_N_INSNS (1),	/* extend.  */
1181       COSTS_N_INSNS (1),	/* add.  */
1182       COSTS_N_INSNS (1),	/* extend_add.  */
1183       COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A8.  */
1184     },
1185     /* MULT DImode */
1186     {
1187       0,			/* simple (N/A).  */
1188       0,			/* flag_setting (N/A).  */
1189       COSTS_N_INSNS (2),	/* extend.  */
1190       0,			/* add (N/A).  */
1191       COSTS_N_INSNS (2),	/* extend_add.  */
1192       0				/* idiv (N/A).  */
1193     }
1194   },
1195   /* LD/ST */
1196   {
1197     COSTS_N_INSNS (1),	/* load.  */
1198     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1199     COSTS_N_INSNS (1),	/* ldrd.  */
1200     COSTS_N_INSNS (1),	/* ldm_1st.  */
1201     1,			/* ldm_regs_per_insn_1st.  */
1202     2,			/* ldm_regs_per_insn_subsequent.  */
1203     COSTS_N_INSNS (1),	/* loadf.  */
1204     COSTS_N_INSNS (1),	/* loadd.  */
1205     COSTS_N_INSNS (1),  /* load_unaligned.  */
1206     COSTS_N_INSNS (1),	/* store.  */
1207     COSTS_N_INSNS (1),	/* strd.  */
1208     COSTS_N_INSNS (1),	/* stm_1st.  */
1209     1,			/* stm_regs_per_insn_1st.  */
1210     2,			/* stm_regs_per_insn_subsequent.  */
1211     COSTS_N_INSNS (1),	/* storef.  */
1212     COSTS_N_INSNS (1),	/* stored.  */
1213     COSTS_N_INSNS (1),	/* store_unaligned.  */
1214     COSTS_N_INSNS (1),	/* loadv.  */
1215     COSTS_N_INSNS (1)	/* storev.  */
1216   },
1217   {
1218     /* FP SFmode */
1219     {
1220       COSTS_N_INSNS (36),	/* div.  */
1221       COSTS_N_INSNS (11),	/* mult.  */
1222       COSTS_N_INSNS (20),	/* mult_addsub. */
1223       COSTS_N_INSNS (30),	/* fma.  */
1224       COSTS_N_INSNS (9),	/* addsub.  */
1225       COSTS_N_INSNS (3),	/* fpconst.  */
1226       COSTS_N_INSNS (3),	/* neg.  */
1227       COSTS_N_INSNS (6),	/* compare.  */
1228       COSTS_N_INSNS (4),	/* widen.  */
1229       COSTS_N_INSNS (4),	/* narrow.  */
1230       COSTS_N_INSNS (8),	/* toint.  */
1231       COSTS_N_INSNS (8),	/* fromint.  */
1232       COSTS_N_INSNS (8)		/* roundint.  */
1233     },
1234     /* FP DFmode */
1235     {
1236       COSTS_N_INSNS (64),	/* div.  */
1237       COSTS_N_INSNS (16),	/* mult.  */
1238       COSTS_N_INSNS (25),	/* mult_addsub.  */
1239       COSTS_N_INSNS (30),	/* fma.  */
1240       COSTS_N_INSNS (9),	/* addsub.  */
1241       COSTS_N_INSNS (3),	/* fpconst.  */
1242       COSTS_N_INSNS (3),	/* neg.  */
1243       COSTS_N_INSNS (6),	/* compare.  */
1244       COSTS_N_INSNS (6),	/* widen.  */
1245       COSTS_N_INSNS (6),	/* narrow.  */
1246       COSTS_N_INSNS (8),	/* toint.  */
1247       COSTS_N_INSNS (8),	/* fromint.  */
1248       COSTS_N_INSNS (8)		/* roundint.  */
1249     }
1250   },
1251   /* Vector */
1252   {
1253     COSTS_N_INSNS (1)	/* alu.  */
1254   }
1255 };
1256 
1257 const struct cpu_cost_table cortexa5_extra_costs =
1258 {
1259   /* ALU */
1260   {
1261     0,			/* arith.  */
1262     0,			/* logical.  */
1263     COSTS_N_INSNS (1),	/* shift.  */
1264     COSTS_N_INSNS (1),	/* shift_reg.  */
1265     COSTS_N_INSNS (1),	/* arith_shift.  */
1266     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1267     COSTS_N_INSNS (1),	/* log_shift.  */
1268     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1269     COSTS_N_INSNS (1),	/* extend.  */
1270     COSTS_N_INSNS (1),	/* extend_arith.  */
1271     COSTS_N_INSNS (1),	/* bfi.  */
1272     COSTS_N_INSNS (1),	/* bfx.  */
1273     COSTS_N_INSNS (1),	/* clz.  */
1274     COSTS_N_INSNS (1),	/* rev.  */
1275     0,			/* non_exec.  */
1276     true		/* non_exec_costs_exec.  */
1277   },
1278 
1279   {
1280     /* MULT SImode */
1281     {
1282       0,			/* simple.  */
1283       COSTS_N_INSNS (1),	/* flag_setting.  */
1284       COSTS_N_INSNS (1),	/* extend.  */
1285       COSTS_N_INSNS (1),	/* add.  */
1286       COSTS_N_INSNS (1),	/* extend_add.  */
1287       COSTS_N_INSNS (7)		/* idiv.  */
1288     },
1289     /* MULT DImode */
1290     {
1291       0,			/* simple (N/A).  */
1292       0,			/* flag_setting (N/A).  */
1293       COSTS_N_INSNS (1),	/* extend.  */
1294       0,			/* add.  */
1295       COSTS_N_INSNS (2),	/* extend_add.  */
1296       0				/* idiv (N/A).  */
1297     }
1298   },
1299   /* LD/ST */
1300   {
1301     COSTS_N_INSNS (1),	/* load.  */
1302     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1303     COSTS_N_INSNS (6),	/* ldrd.  */
1304     COSTS_N_INSNS (1),	/* ldm_1st.  */
1305     1,			/* ldm_regs_per_insn_1st.  */
1306     2,			/* ldm_regs_per_insn_subsequent.  */
1307     COSTS_N_INSNS (2),	/* loadf.  */
1308     COSTS_N_INSNS (4),	/* loadd.  */
1309     COSTS_N_INSNS (1),	/* load_unaligned.  */
1310     COSTS_N_INSNS (1),	/* store.  */
1311     COSTS_N_INSNS (3),	/* strd.  */
1312     COSTS_N_INSNS (1),	/* stm_1st.  */
1313     1,			/* stm_regs_per_insn_1st.  */
1314     2,			/* stm_regs_per_insn_subsequent.  */
1315     COSTS_N_INSNS (2),	/* storef.  */
1316     COSTS_N_INSNS (2),	/* stored.  */
1317     COSTS_N_INSNS (1),	/* store_unaligned.  */
1318     COSTS_N_INSNS (1),	/* loadv.  */
1319     COSTS_N_INSNS (1)	/* storev.  */
1320   },
1321   {
1322     /* FP SFmode */
1323     {
1324       COSTS_N_INSNS (15),	/* div.  */
1325       COSTS_N_INSNS (3),	/* mult.  */
1326       COSTS_N_INSNS (7),	/* mult_addsub. */
1327       COSTS_N_INSNS (7),	/* fma.  */
1328       COSTS_N_INSNS (3),	/* addsub.  */
1329       COSTS_N_INSNS (3),	/* fpconst.  */
1330       COSTS_N_INSNS (3),	/* neg.  */
1331       COSTS_N_INSNS (3),	/* compare.  */
1332       COSTS_N_INSNS (3),	/* widen.  */
1333       COSTS_N_INSNS (3),	/* narrow.  */
1334       COSTS_N_INSNS (3),	/* toint.  */
1335       COSTS_N_INSNS (3),	/* fromint.  */
1336       COSTS_N_INSNS (3)		/* roundint.  */
1337     },
1338     /* FP DFmode */
1339     {
1340       COSTS_N_INSNS (30),	/* div.  */
1341       COSTS_N_INSNS (6),	/* mult.  */
1342       COSTS_N_INSNS (10),	/* mult_addsub.  */
1343       COSTS_N_INSNS (7),	/* fma.  */
1344       COSTS_N_INSNS (3),	/* addsub.  */
1345       COSTS_N_INSNS (3),	/* fpconst.  */
1346       COSTS_N_INSNS (3),	/* neg.  */
1347       COSTS_N_INSNS (3),	/* compare.  */
1348       COSTS_N_INSNS (3),	/* widen.  */
1349       COSTS_N_INSNS (3),	/* narrow.  */
1350       COSTS_N_INSNS (3),	/* toint.  */
1351       COSTS_N_INSNS (3),	/* fromint.  */
1352       COSTS_N_INSNS (3)		/* roundint.  */
1353     }
1354   },
1355   /* Vector */
1356   {
1357     COSTS_N_INSNS (1)	/* alu.  */
1358   }
1359 };
1360 
1361 
1362 const struct cpu_cost_table cortexa7_extra_costs =
1363 {
1364   /* ALU */
1365   {
1366     0,			/* arith.  */
1367     0,			/* logical.  */
1368     COSTS_N_INSNS (1),	/* shift.  */
1369     COSTS_N_INSNS (1),	/* shift_reg.  */
1370     COSTS_N_INSNS (1),	/* arith_shift.  */
1371     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1372     COSTS_N_INSNS (1),	/* log_shift.  */
1373     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1374     COSTS_N_INSNS (1),	/* extend.  */
1375     COSTS_N_INSNS (1),	/* extend_arith.  */
1376     COSTS_N_INSNS (1),	/* bfi.  */
1377     COSTS_N_INSNS (1),	/* bfx.  */
1378     COSTS_N_INSNS (1),	/* clz.  */
1379     COSTS_N_INSNS (1),	/* rev.  */
1380     0,			/* non_exec.  */
1381     true		/* non_exec_costs_exec.  */
1382   },
1383 
1384   {
1385     /* MULT SImode */
1386     {
1387       0,			/* simple.  */
1388       COSTS_N_INSNS (1),	/* flag_setting.  */
1389       COSTS_N_INSNS (1),	/* extend.  */
1390       COSTS_N_INSNS (1),	/* add.  */
1391       COSTS_N_INSNS (1),	/* extend_add.  */
1392       COSTS_N_INSNS (7)		/* idiv.  */
1393     },
1394     /* MULT DImode */
1395     {
1396       0,			/* simple (N/A).  */
1397       0,			/* flag_setting (N/A).  */
1398       COSTS_N_INSNS (1),	/* extend.  */
1399       0,			/* add.  */
1400       COSTS_N_INSNS (2),	/* extend_add.  */
1401       0				/* idiv (N/A).  */
1402     }
1403   },
1404   /* LD/ST */
1405   {
1406     COSTS_N_INSNS (1),	/* load.  */
1407     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1408     COSTS_N_INSNS (3),	/* ldrd.  */
1409     COSTS_N_INSNS (1),	/* ldm_1st.  */
1410     1,			/* ldm_regs_per_insn_1st.  */
1411     2,			/* ldm_regs_per_insn_subsequent.  */
1412     COSTS_N_INSNS (2),	/* loadf.  */
1413     COSTS_N_INSNS (2),	/* loadd.  */
1414     COSTS_N_INSNS (1),	/* load_unaligned.  */
1415     COSTS_N_INSNS (1),	/* store.  */
1416     COSTS_N_INSNS (3),	/* strd.  */
1417     COSTS_N_INSNS (1),	/* stm_1st.  */
1418     1,			/* stm_regs_per_insn_1st.  */
1419     2,			/* stm_regs_per_insn_subsequent.  */
1420     COSTS_N_INSNS (2),	/* storef.  */
1421     COSTS_N_INSNS (2),	/* stored.  */
1422     COSTS_N_INSNS (1),	/* store_unaligned.  */
1423     COSTS_N_INSNS (1),	/* loadv.  */
1424     COSTS_N_INSNS (1)	/* storev.  */
1425   },
1426   {
1427     /* FP SFmode */
1428     {
1429       COSTS_N_INSNS (15),	/* div.  */
1430       COSTS_N_INSNS (3),	/* mult.  */
1431       COSTS_N_INSNS (7),	/* mult_addsub. */
1432       COSTS_N_INSNS (7),	/* fma.  */
1433       COSTS_N_INSNS (3),	/* addsub.  */
1434       COSTS_N_INSNS (3),	/* fpconst.  */
1435       COSTS_N_INSNS (3),	/* neg.  */
1436       COSTS_N_INSNS (3),	/* compare.  */
1437       COSTS_N_INSNS (3),	/* widen.  */
1438       COSTS_N_INSNS (3),	/* narrow.  */
1439       COSTS_N_INSNS (3),	/* toint.  */
1440       COSTS_N_INSNS (3),	/* fromint.  */
1441       COSTS_N_INSNS (3)		/* roundint.  */
1442     },
1443     /* FP DFmode */
1444     {
1445       COSTS_N_INSNS (30),	/* div.  */
1446       COSTS_N_INSNS (6),	/* mult.  */
1447       COSTS_N_INSNS (10),	/* mult_addsub.  */
1448       COSTS_N_INSNS (7),	/* fma.  */
1449       COSTS_N_INSNS (3),	/* addsub.  */
1450       COSTS_N_INSNS (3),	/* fpconst.  */
1451       COSTS_N_INSNS (3),	/* neg.  */
1452       COSTS_N_INSNS (3),	/* compare.  */
1453       COSTS_N_INSNS (3),	/* widen.  */
1454       COSTS_N_INSNS (3),	/* narrow.  */
1455       COSTS_N_INSNS (3),	/* toint.  */
1456       COSTS_N_INSNS (3),	/* fromint.  */
1457       COSTS_N_INSNS (3)		/* roundint.  */
1458     }
1459   },
1460   /* Vector */
1461   {
1462     COSTS_N_INSNS (1)	/* alu.  */
1463   }
1464 };
1465 
1466 const struct cpu_cost_table cortexa12_extra_costs =
1467 {
1468   /* ALU */
1469   {
1470     0,			/* arith.  */
1471     0,			/* logical.  */
1472     0,			/* shift.  */
1473     COSTS_N_INSNS (1),	/* shift_reg.  */
1474     COSTS_N_INSNS (1),	/* arith_shift.  */
1475     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1476     COSTS_N_INSNS (1),	/* log_shift.  */
1477     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1478     0,			/* extend.  */
1479     COSTS_N_INSNS (1),	/* extend_arith.  */
1480     0,			/* bfi.  */
1481     COSTS_N_INSNS (1),	/* bfx.  */
1482     COSTS_N_INSNS (1),	/* clz.  */
1483     COSTS_N_INSNS (1),	/* rev.  */
1484     0,			/* non_exec.  */
1485     true		/* non_exec_costs_exec.  */
1486   },
1487   /* MULT SImode */
1488   {
1489     {
1490       COSTS_N_INSNS (2),	/* simple.  */
1491       COSTS_N_INSNS (3),	/* flag_setting.  */
1492       COSTS_N_INSNS (2),	/* extend.  */
1493       COSTS_N_INSNS (3),	/* add.  */
1494       COSTS_N_INSNS (2),	/* extend_add.  */
1495       COSTS_N_INSNS (18)	/* idiv.  */
1496     },
1497     /* MULT DImode */
1498     {
1499       0,			/* simple (N/A).  */
1500       0,			/* flag_setting (N/A).  */
1501       COSTS_N_INSNS (3),	/* extend.  */
1502       0,			/* add (N/A).  */
1503       COSTS_N_INSNS (3),	/* extend_add.  */
1504       0				/* idiv (N/A).  */
1505     }
1506   },
1507   /* LD/ST */
1508   {
1509     COSTS_N_INSNS (3),	/* load.  */
1510     COSTS_N_INSNS (3),	/* load_sign_extend.  */
1511     COSTS_N_INSNS (3),	/* ldrd.  */
1512     COSTS_N_INSNS (3),	/* ldm_1st.  */
1513     1,			/* ldm_regs_per_insn_1st.  */
1514     2,			/* ldm_regs_per_insn_subsequent.  */
1515     COSTS_N_INSNS (3),	/* loadf.  */
1516     COSTS_N_INSNS (3),	/* loadd.  */
1517     0,			/* load_unaligned.  */
1518     0,			/* store.  */
1519     0,			/* strd.  */
1520     0,			/* stm_1st.  */
1521     1,			/* stm_regs_per_insn_1st.  */
1522     2,			/* stm_regs_per_insn_subsequent.  */
1523     COSTS_N_INSNS (2),	/* storef.  */
1524     COSTS_N_INSNS (2),	/* stored.  */
1525     0,			/* store_unaligned.  */
1526     COSTS_N_INSNS (1),	/* loadv.  */
1527     COSTS_N_INSNS (1)	/* storev.  */
1528   },
1529   {
1530     /* FP SFmode */
1531     {
1532       COSTS_N_INSNS (17),	/* div.  */
1533       COSTS_N_INSNS (4),	/* mult.  */
1534       COSTS_N_INSNS (8),	/* mult_addsub. */
1535       COSTS_N_INSNS (8),	/* fma.  */
1536       COSTS_N_INSNS (4),	/* addsub.  */
1537       COSTS_N_INSNS (2),	/* fpconst. */
1538       COSTS_N_INSNS (2),	/* neg.  */
1539       COSTS_N_INSNS (2),	/* compare.  */
1540       COSTS_N_INSNS (4),	/* widen.  */
1541       COSTS_N_INSNS (4),	/* narrow.  */
1542       COSTS_N_INSNS (4),	/* toint.  */
1543       COSTS_N_INSNS (4),	/* fromint.  */
1544       COSTS_N_INSNS (4)		/* roundint.  */
1545     },
1546     /* FP DFmode */
1547     {
1548       COSTS_N_INSNS (31),	/* div.  */
1549       COSTS_N_INSNS (4),	/* mult.  */
1550       COSTS_N_INSNS (8),	/* mult_addsub.  */
1551       COSTS_N_INSNS (8),	/* fma.  */
1552       COSTS_N_INSNS (4),	/* addsub.  */
1553       COSTS_N_INSNS (2),	/* fpconst.  */
1554       COSTS_N_INSNS (2),	/* neg.  */
1555       COSTS_N_INSNS (2),	/* compare.  */
1556       COSTS_N_INSNS (4),	/* widen.  */
1557       COSTS_N_INSNS (4),	/* narrow.  */
1558       COSTS_N_INSNS (4),	/* toint.  */
1559       COSTS_N_INSNS (4),	/* fromint.  */
1560       COSTS_N_INSNS (4)		/* roundint.  */
1561     }
1562   },
1563   /* Vector */
1564   {
1565     COSTS_N_INSNS (1)	/* alu.  */
1566   }
1567 };
1568 
1569 const struct cpu_cost_table cortexa15_extra_costs =
1570 {
1571   /* ALU */
1572   {
1573     0,			/* arith.  */
1574     0,			/* logical.  */
1575     0,			/* shift.  */
1576     0,			/* shift_reg.  */
1577     COSTS_N_INSNS (1),	/* arith_shift.  */
1578     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1579     COSTS_N_INSNS (1),	/* log_shift.  */
1580     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1581     0,			/* extend.  */
1582     COSTS_N_INSNS (1),	/* extend_arith.  */
1583     COSTS_N_INSNS (1),	/* bfi.  */
1584     0,			/* bfx.  */
1585     0,			/* clz.  */
1586     0,			/* rev.  */
1587     0,			/* non_exec.  */
1588     true		/* non_exec_costs_exec.  */
1589   },
1590   /* MULT SImode */
1591   {
1592     {
1593       COSTS_N_INSNS (2),	/* simple.  */
1594       COSTS_N_INSNS (3),	/* flag_setting.  */
1595       COSTS_N_INSNS (2),	/* extend.  */
1596       COSTS_N_INSNS (2),	/* add.  */
1597       COSTS_N_INSNS (2),	/* extend_add.  */
1598       COSTS_N_INSNS (18)	/* idiv.  */
1599     },
1600     /* MULT DImode */
1601     {
1602       0,			/* simple (N/A).  */
1603       0,			/* flag_setting (N/A).  */
1604       COSTS_N_INSNS (3),	/* extend.  */
1605       0,			/* add (N/A).  */
1606       COSTS_N_INSNS (3),	/* extend_add.  */
1607       0				/* idiv (N/A).  */
1608     }
1609   },
1610   /* LD/ST */
1611   {
1612     COSTS_N_INSNS (3),	/* load.  */
1613     COSTS_N_INSNS (3),	/* load_sign_extend.  */
1614     COSTS_N_INSNS (3),	/* ldrd.  */
1615     COSTS_N_INSNS (4),	/* ldm_1st.  */
1616     1,			/* ldm_regs_per_insn_1st.  */
1617     2,			/* ldm_regs_per_insn_subsequent.  */
1618     COSTS_N_INSNS (4),	/* loadf.  */
1619     COSTS_N_INSNS (4),	/* loadd.  */
1620     0,			/* load_unaligned.  */
1621     0,			/* store.  */
1622     0,			/* strd.  */
1623     COSTS_N_INSNS (1),	/* stm_1st.  */
1624     1,			/* stm_regs_per_insn_1st.  */
1625     2,			/* stm_regs_per_insn_subsequent.  */
1626     0,			/* storef.  */
1627     0,			/* stored.  */
1628     0,			/* store_unaligned.  */
1629     COSTS_N_INSNS (1),	/* loadv.  */
1630     COSTS_N_INSNS (1)	/* storev.  */
1631   },
1632   {
1633     /* FP SFmode */
1634     {
1635       COSTS_N_INSNS (17),	/* div.  */
1636       COSTS_N_INSNS (4),	/* mult.  */
1637       COSTS_N_INSNS (8),	/* mult_addsub. */
1638       COSTS_N_INSNS (8),	/* fma.  */
1639       COSTS_N_INSNS (4),	/* addsub.  */
1640       COSTS_N_INSNS (2),	/* fpconst. */
1641       COSTS_N_INSNS (2),	/* neg.  */
1642       COSTS_N_INSNS (5),	/* compare.  */
1643       COSTS_N_INSNS (4),	/* widen.  */
1644       COSTS_N_INSNS (4),	/* narrow.  */
1645       COSTS_N_INSNS (4),	/* toint.  */
1646       COSTS_N_INSNS (4),	/* fromint.  */
1647       COSTS_N_INSNS (4)		/* roundint.  */
1648     },
1649     /* FP DFmode */
1650     {
1651       COSTS_N_INSNS (31),	/* div.  */
1652       COSTS_N_INSNS (4),	/* mult.  */
1653       COSTS_N_INSNS (8),	/* mult_addsub.  */
1654       COSTS_N_INSNS (8),	/* fma.  */
1655       COSTS_N_INSNS (4),	/* addsub.  */
1656       COSTS_N_INSNS (2),	/* fpconst.  */
1657       COSTS_N_INSNS (2),	/* neg.  */
1658       COSTS_N_INSNS (2),	/* compare.  */
1659       COSTS_N_INSNS (4),	/* widen.  */
1660       COSTS_N_INSNS (4),	/* narrow.  */
1661       COSTS_N_INSNS (4),	/* toint.  */
1662       COSTS_N_INSNS (4),	/* fromint.  */
1663       COSTS_N_INSNS (4)		/* roundint.  */
1664     }
1665   },
1666   /* Vector */
1667   {
1668     COSTS_N_INSNS (1)	/* alu.  */
1669   }
1670 };
1671 
1672 const struct cpu_cost_table v7m_extra_costs =
1673 {
1674   /* ALU */
1675   {
1676     0,			/* arith.  */
1677     0,			/* logical.  */
1678     0,			/* shift.  */
1679     0,			/* shift_reg.  */
1680     0,			/* arith_shift.  */
1681     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1682     0,			/* log_shift.  */
1683     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1684     0,			/* extend.  */
1685     COSTS_N_INSNS (1),	/* extend_arith.  */
1686     0,			/* bfi.  */
1687     0,			/* bfx.  */
1688     0,			/* clz.  */
1689     0,			/* rev.  */
1690     COSTS_N_INSNS (1),	/* non_exec.  */
1691     false		/* non_exec_costs_exec.  */
1692   },
1693   {
1694     /* MULT SImode */
1695     {
1696       COSTS_N_INSNS (1),	/* simple.  */
1697       COSTS_N_INSNS (1),	/* flag_setting.  */
1698       COSTS_N_INSNS (2),	/* extend.  */
1699       COSTS_N_INSNS (1),	/* add.  */
1700       COSTS_N_INSNS (3),	/* extend_add.  */
1701       COSTS_N_INSNS (8)		/* idiv.  */
1702     },
1703     /* MULT DImode */
1704     {
1705       0,			/* simple (N/A).  */
1706       0,			/* flag_setting (N/A).  */
1707       COSTS_N_INSNS (2),	/* extend.  */
1708       0,			/* add (N/A).  */
1709       COSTS_N_INSNS (3),	/* extend_add.  */
1710       0				/* idiv (N/A).  */
1711     }
1712   },
1713   /* LD/ST */
1714   {
1715     COSTS_N_INSNS (2),	/* load.  */
1716     0,			/* load_sign_extend.  */
1717     COSTS_N_INSNS (3),	/* ldrd.  */
1718     COSTS_N_INSNS (2),	/* ldm_1st.  */
1719     1,			/* ldm_regs_per_insn_1st.  */
1720     1,			/* ldm_regs_per_insn_subsequent.  */
1721     COSTS_N_INSNS (2),	/* loadf.  */
1722     COSTS_N_INSNS (3),	/* loadd.  */
1723     COSTS_N_INSNS (1),  /* load_unaligned.  */
1724     COSTS_N_INSNS (2),	/* store.  */
1725     COSTS_N_INSNS (3),	/* strd.  */
1726     COSTS_N_INSNS (2),	/* stm_1st.  */
1727     1,			/* stm_regs_per_insn_1st.  */
1728     1,			/* stm_regs_per_insn_subsequent.  */
1729     COSTS_N_INSNS (2),	/* storef.  */
1730     COSTS_N_INSNS (3),	/* stored.  */
1731     COSTS_N_INSNS (1),	/* store_unaligned.  */
1732     COSTS_N_INSNS (1),	/* loadv.  */
1733     COSTS_N_INSNS (1)	/* storev.  */
1734   },
1735   {
1736     /* FP SFmode */
1737     {
1738       COSTS_N_INSNS (7),	/* div.  */
1739       COSTS_N_INSNS (2),	/* mult.  */
1740       COSTS_N_INSNS (5),	/* mult_addsub.  */
1741       COSTS_N_INSNS (3),	/* fma.  */
1742       COSTS_N_INSNS (1),	/* addsub.  */
1743       0,			/* fpconst.  */
1744       0,			/* neg.  */
1745       0,			/* compare.  */
1746       0,			/* widen.  */
1747       0,			/* narrow.  */
1748       0,			/* toint.  */
1749       0,			/* fromint.  */
1750       0				/* roundint.  */
1751     },
1752     /* FP DFmode */
1753     {
1754       COSTS_N_INSNS (15),	/* div.  */
1755       COSTS_N_INSNS (5),	/* mult.  */
1756       COSTS_N_INSNS (7),	/* mult_addsub.  */
1757       COSTS_N_INSNS (7),	/* fma.  */
1758       COSTS_N_INSNS (3),	/* addsub.  */
1759       0,			/* fpconst.  */
1760       0,			/* neg.  */
1761       0,			/* compare.  */
1762       0,			/* widen.  */
1763       0,			/* narrow.  */
1764       0,			/* toint.  */
1765       0,			/* fromint.  */
1766       0				/* roundint.  */
1767     }
1768   },
1769   /* Vector */
1770   {
1771     COSTS_N_INSNS (1)	/* alu.  */
1772   }
1773 };
1774 
1775 const struct addr_mode_cost_table generic_addr_mode_costs =
1776 {
1777   /* int.  */
1778   {
1779     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1780     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1781     COSTS_N_INSNS (0)	/* AMO_WB.  */
1782   },
1783   /* float.  */
1784   {
1785     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1786     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1787     COSTS_N_INSNS (0)	/* AMO_WB.  */
1788   },
1789   /* vector.  */
1790   {
1791     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1792     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1793     COSTS_N_INSNS (0)	/* AMO_WB.  */
1794   }
1795 };
1796 
1797 const struct tune_params arm_slowmul_tune =
1798 {
1799   &generic_extra_costs,			/* Insn extra costs.  */
1800   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1801   NULL,					/* Sched adj cost.  */
1802   arm_default_branch_cost,
1803   &arm_default_vec_cost,
1804   3,						/* Constant limit.  */
1805   5,						/* Max cond insns.  */
1806   8,						/* Memset max inline.  */
1807   1,						/* Issue rate.  */
1808   ARM_PREFETCH_NOT_BENEFICIAL,
1809   tune_params::PREF_CONST_POOL_TRUE,
1810   tune_params::PREF_LDRD_FALSE,
1811   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1812   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1813   tune_params::DISPARAGE_FLAGS_NEITHER,
1814   tune_params::PREF_NEON_64_FALSE,
1815   tune_params::PREF_NEON_STRINGOPS_FALSE,
1816   tune_params::FUSE_NOTHING,
1817   tune_params::SCHED_AUTOPREF_OFF
1818 };
1819 
1820 const struct tune_params arm_fastmul_tune =
1821 {
1822   &generic_extra_costs,			/* Insn extra costs.  */
1823   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1824   NULL,					/* Sched adj cost.  */
1825   arm_default_branch_cost,
1826   &arm_default_vec_cost,
1827   1,						/* Constant limit.  */
1828   5,						/* Max cond insns.  */
1829   8,						/* Memset max inline.  */
1830   1,						/* Issue rate.  */
1831   ARM_PREFETCH_NOT_BENEFICIAL,
1832   tune_params::PREF_CONST_POOL_TRUE,
1833   tune_params::PREF_LDRD_FALSE,
1834   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1835   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1836   tune_params::DISPARAGE_FLAGS_NEITHER,
1837   tune_params::PREF_NEON_64_FALSE,
1838   tune_params::PREF_NEON_STRINGOPS_FALSE,
1839   tune_params::FUSE_NOTHING,
1840   tune_params::SCHED_AUTOPREF_OFF
1841 };
1842 
1843 /* StrongARM has early execution of branches, so a sequence that is worth
1844    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1845 
1846 const struct tune_params arm_strongarm_tune =
1847 {
1848   &generic_extra_costs,			/* Insn extra costs.  */
1849   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1850   NULL,					/* Sched adj cost.  */
1851   arm_default_branch_cost,
1852   &arm_default_vec_cost,
1853   1,						/* Constant limit.  */
1854   3,						/* Max cond insns.  */
1855   8,						/* Memset max inline.  */
1856   1,						/* Issue rate.  */
1857   ARM_PREFETCH_NOT_BENEFICIAL,
1858   tune_params::PREF_CONST_POOL_TRUE,
1859   tune_params::PREF_LDRD_FALSE,
1860   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1861   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1862   tune_params::DISPARAGE_FLAGS_NEITHER,
1863   tune_params::PREF_NEON_64_FALSE,
1864   tune_params::PREF_NEON_STRINGOPS_FALSE,
1865   tune_params::FUSE_NOTHING,
1866   tune_params::SCHED_AUTOPREF_OFF
1867 };
1868 
1869 const struct tune_params arm_xscale_tune =
1870 {
1871   &generic_extra_costs,			/* Insn extra costs.  */
1872   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1873   xscale_sched_adjust_cost,
1874   arm_default_branch_cost,
1875   &arm_default_vec_cost,
1876   2,						/* Constant limit.  */
1877   3,						/* Max cond insns.  */
1878   8,						/* Memset max inline.  */
1879   1,						/* Issue rate.  */
1880   ARM_PREFETCH_NOT_BENEFICIAL,
1881   tune_params::PREF_CONST_POOL_TRUE,
1882   tune_params::PREF_LDRD_FALSE,
1883   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1884   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1885   tune_params::DISPARAGE_FLAGS_NEITHER,
1886   tune_params::PREF_NEON_64_FALSE,
1887   tune_params::PREF_NEON_STRINGOPS_FALSE,
1888   tune_params::FUSE_NOTHING,
1889   tune_params::SCHED_AUTOPREF_OFF
1890 };
1891 
1892 const struct tune_params arm_9e_tune =
1893 {
1894   &generic_extra_costs,			/* Insn extra costs.  */
1895   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1896   NULL,					/* Sched adj cost.  */
1897   arm_default_branch_cost,
1898   &arm_default_vec_cost,
1899   1,						/* Constant limit.  */
1900   5,						/* Max cond insns.  */
1901   8,						/* Memset max inline.  */
1902   1,						/* Issue rate.  */
1903   ARM_PREFETCH_NOT_BENEFICIAL,
1904   tune_params::PREF_CONST_POOL_TRUE,
1905   tune_params::PREF_LDRD_FALSE,
1906   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1907   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1908   tune_params::DISPARAGE_FLAGS_NEITHER,
1909   tune_params::PREF_NEON_64_FALSE,
1910   tune_params::PREF_NEON_STRINGOPS_FALSE,
1911   tune_params::FUSE_NOTHING,
1912   tune_params::SCHED_AUTOPREF_OFF
1913 };
1914 
1915 const struct tune_params arm_marvell_pj4_tune =
1916 {
1917   &generic_extra_costs,			/* Insn extra costs.  */
1918   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1919   NULL,					/* Sched adj cost.  */
1920   arm_default_branch_cost,
1921   &arm_default_vec_cost,
1922   1,						/* Constant limit.  */
1923   5,						/* Max cond insns.  */
1924   8,						/* Memset max inline.  */
1925   2,						/* Issue rate.  */
1926   ARM_PREFETCH_NOT_BENEFICIAL,
1927   tune_params::PREF_CONST_POOL_TRUE,
1928   tune_params::PREF_LDRD_FALSE,
1929   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1930   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1931   tune_params::DISPARAGE_FLAGS_NEITHER,
1932   tune_params::PREF_NEON_64_FALSE,
1933   tune_params::PREF_NEON_STRINGOPS_FALSE,
1934   tune_params::FUSE_NOTHING,
1935   tune_params::SCHED_AUTOPREF_OFF
1936 };
1937 
1938 const struct tune_params arm_v6t2_tune =
1939 {
1940   &generic_extra_costs,			/* Insn extra costs.  */
1941   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1942   NULL,					/* Sched adj cost.  */
1943   arm_default_branch_cost,
1944   &arm_default_vec_cost,
1945   1,						/* Constant limit.  */
1946   5,						/* Max cond insns.  */
1947   8,						/* Memset max inline.  */
1948   1,						/* Issue rate.  */
1949   ARM_PREFETCH_NOT_BENEFICIAL,
1950   tune_params::PREF_CONST_POOL_FALSE,
1951   tune_params::PREF_LDRD_FALSE,
1952   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1953   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1954   tune_params::DISPARAGE_FLAGS_NEITHER,
1955   tune_params::PREF_NEON_64_FALSE,
1956   tune_params::PREF_NEON_STRINGOPS_FALSE,
1957   tune_params::FUSE_NOTHING,
1958   tune_params::SCHED_AUTOPREF_OFF
1959 };
1960 
1961 
1962 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1963 const struct tune_params arm_cortex_tune =
1964 {
1965   &generic_extra_costs,
1966   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1967   NULL,					/* Sched adj cost.  */
1968   arm_default_branch_cost,
1969   &arm_default_vec_cost,
1970   1,						/* Constant limit.  */
1971   5,						/* Max cond insns.  */
1972   8,						/* Memset max inline.  */
1973   2,						/* Issue rate.  */
1974   ARM_PREFETCH_NOT_BENEFICIAL,
1975   tune_params::PREF_CONST_POOL_FALSE,
1976   tune_params::PREF_LDRD_FALSE,
1977   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1978   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1979   tune_params::DISPARAGE_FLAGS_NEITHER,
1980   tune_params::PREF_NEON_64_FALSE,
1981   tune_params::PREF_NEON_STRINGOPS_FALSE,
1982   tune_params::FUSE_NOTHING,
1983   tune_params::SCHED_AUTOPREF_OFF
1984 };
1985 
1986 const struct tune_params arm_cortex_a8_tune =
1987 {
1988   &cortexa8_extra_costs,
1989   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1990   NULL,					/* Sched adj cost.  */
1991   arm_default_branch_cost,
1992   &arm_default_vec_cost,
1993   1,						/* Constant limit.  */
1994   5,						/* Max cond insns.  */
1995   8,						/* Memset max inline.  */
1996   2,						/* Issue rate.  */
1997   ARM_PREFETCH_NOT_BENEFICIAL,
1998   tune_params::PREF_CONST_POOL_FALSE,
1999   tune_params::PREF_LDRD_FALSE,
2000   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2001   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2002   tune_params::DISPARAGE_FLAGS_NEITHER,
2003   tune_params::PREF_NEON_64_FALSE,
2004   tune_params::PREF_NEON_STRINGOPS_TRUE,
2005   tune_params::FUSE_NOTHING,
2006   tune_params::SCHED_AUTOPREF_OFF
2007 };
2008 
2009 const struct tune_params arm_cortex_a7_tune =
2010 {
2011   &cortexa7_extra_costs,
2012   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2013   NULL,					/* Sched adj cost.  */
2014   arm_default_branch_cost,
2015   &arm_default_vec_cost,
2016   1,						/* Constant limit.  */
2017   5,						/* Max cond insns.  */
2018   8,						/* Memset max inline.  */
2019   2,						/* Issue rate.  */
2020   ARM_PREFETCH_NOT_BENEFICIAL,
2021   tune_params::PREF_CONST_POOL_FALSE,
2022   tune_params::PREF_LDRD_FALSE,
2023   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2024   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2025   tune_params::DISPARAGE_FLAGS_NEITHER,
2026   tune_params::PREF_NEON_64_FALSE,
2027   tune_params::PREF_NEON_STRINGOPS_TRUE,
2028   tune_params::FUSE_NOTHING,
2029   tune_params::SCHED_AUTOPREF_OFF
2030 };
2031 
2032 const struct tune_params arm_cortex_a15_tune =
2033 {
2034   &cortexa15_extra_costs,
2035   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2036   NULL,					/* Sched adj cost.  */
2037   arm_default_branch_cost,
2038   &arm_default_vec_cost,
2039   1,						/* Constant limit.  */
2040   2,						/* Max cond insns.  */
2041   8,						/* Memset max inline.  */
2042   3,						/* Issue rate.  */
2043   ARM_PREFETCH_NOT_BENEFICIAL,
2044   tune_params::PREF_CONST_POOL_FALSE,
2045   tune_params::PREF_LDRD_TRUE,
2046   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2047   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2048   tune_params::DISPARAGE_FLAGS_ALL,
2049   tune_params::PREF_NEON_64_FALSE,
2050   tune_params::PREF_NEON_STRINGOPS_TRUE,
2051   tune_params::FUSE_NOTHING,
2052   tune_params::SCHED_AUTOPREF_FULL
2053 };
2054 
2055 const struct tune_params arm_cortex_a35_tune =
2056 {
2057   &cortexa53_extra_costs,
2058   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2059   NULL,					/* Sched adj cost.  */
2060   arm_default_branch_cost,
2061   &arm_default_vec_cost,
2062   1,						/* Constant limit.  */
2063   5,						/* Max cond insns.  */
2064   8,						/* Memset max inline.  */
2065   1,						/* Issue rate.  */
2066   ARM_PREFETCH_NOT_BENEFICIAL,
2067   tune_params::PREF_CONST_POOL_FALSE,
2068   tune_params::PREF_LDRD_FALSE,
2069   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2070   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2071   tune_params::DISPARAGE_FLAGS_NEITHER,
2072   tune_params::PREF_NEON_64_FALSE,
2073   tune_params::PREF_NEON_STRINGOPS_TRUE,
2074   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2075   tune_params::SCHED_AUTOPREF_OFF
2076 };
2077 
2078 const struct tune_params arm_cortex_a53_tune =
2079 {
2080   &cortexa53_extra_costs,
2081   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2082   NULL,					/* Sched adj cost.  */
2083   arm_default_branch_cost,
2084   &arm_default_vec_cost,
2085   1,						/* Constant limit.  */
2086   5,						/* Max cond insns.  */
2087   8,						/* Memset max inline.  */
2088   2,						/* Issue rate.  */
2089   ARM_PREFETCH_NOT_BENEFICIAL,
2090   tune_params::PREF_CONST_POOL_FALSE,
2091   tune_params::PREF_LDRD_FALSE,
2092   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2093   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2094   tune_params::DISPARAGE_FLAGS_NEITHER,
2095   tune_params::PREF_NEON_64_FALSE,
2096   tune_params::PREF_NEON_STRINGOPS_TRUE,
2097   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2098   tune_params::SCHED_AUTOPREF_OFF
2099 };
2100 
2101 const struct tune_params arm_cortex_a57_tune =
2102 {
2103   &cortexa57_extra_costs,
2104   &generic_addr_mode_costs,		/* addressing mode costs */
2105   NULL,					/* Sched adj cost.  */
2106   arm_default_branch_cost,
2107   &arm_default_vec_cost,
2108   1,						/* Constant limit.  */
2109   2,						/* Max cond insns.  */
2110   8,						/* Memset max inline.  */
2111   3,						/* Issue rate.  */
2112   ARM_PREFETCH_NOT_BENEFICIAL,
2113   tune_params::PREF_CONST_POOL_FALSE,
2114   tune_params::PREF_LDRD_TRUE,
2115   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2116   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2117   tune_params::DISPARAGE_FLAGS_ALL,
2118   tune_params::PREF_NEON_64_FALSE,
2119   tune_params::PREF_NEON_STRINGOPS_TRUE,
2120   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2121   tune_params::SCHED_AUTOPREF_FULL
2122 };
2123 
2124 const struct tune_params arm_exynosm1_tune =
2125 {
2126   &exynosm1_extra_costs,
2127   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2128   NULL,						/* Sched adj cost.  */
2129   arm_default_branch_cost,
2130   &arm_default_vec_cost,
2131   1,						/* Constant limit.  */
2132   2,						/* Max cond insns.  */
2133   8,						/* Memset max inline.  */
2134   3,						/* Issue rate.  */
2135   ARM_PREFETCH_NOT_BENEFICIAL,
2136   tune_params::PREF_CONST_POOL_FALSE,
2137   tune_params::PREF_LDRD_TRUE,
2138   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* Thumb.  */
2139   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* ARM.  */
2140   tune_params::DISPARAGE_FLAGS_ALL,
2141   tune_params::PREF_NEON_64_FALSE,
2142   tune_params::PREF_NEON_STRINGOPS_TRUE,
2143   tune_params::FUSE_NOTHING,
2144   tune_params::SCHED_AUTOPREF_OFF
2145 };
2146 
2147 const struct tune_params arm_xgene1_tune =
2148 {
2149   &xgene1_extra_costs,
2150   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2151   NULL,					/* Sched adj cost.  */
2152   arm_default_branch_cost,
2153   &arm_default_vec_cost,
2154   1,						/* Constant limit.  */
2155   2,						/* Max cond insns.  */
2156   32,						/* Memset max inline.  */
2157   4,						/* Issue rate.  */
2158   ARM_PREFETCH_NOT_BENEFICIAL,
2159   tune_params::PREF_CONST_POOL_FALSE,
2160   tune_params::PREF_LDRD_TRUE,
2161   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2162   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2163   tune_params::DISPARAGE_FLAGS_ALL,
2164   tune_params::PREF_NEON_64_FALSE,
2165   tune_params::PREF_NEON_STRINGOPS_FALSE,
2166   tune_params::FUSE_NOTHING,
2167   tune_params::SCHED_AUTOPREF_OFF
2168 };
2169 
2170 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2171    less appealing.  Set max_insns_skipped to a low value.  */
2172 
2173 const struct tune_params arm_cortex_a5_tune =
2174 {
2175   &cortexa5_extra_costs,
2176   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2177   NULL,					/* Sched adj cost.  */
2178   arm_cortex_a5_branch_cost,
2179   &arm_default_vec_cost,
2180   1,						/* Constant limit.  */
2181   1,						/* Max cond insns.  */
2182   8,						/* Memset max inline.  */
2183   2,						/* Issue rate.  */
2184   ARM_PREFETCH_NOT_BENEFICIAL,
2185   tune_params::PREF_CONST_POOL_FALSE,
2186   tune_params::PREF_LDRD_FALSE,
2187   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2188   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2189   tune_params::DISPARAGE_FLAGS_NEITHER,
2190   tune_params::PREF_NEON_64_FALSE,
2191   tune_params::PREF_NEON_STRINGOPS_TRUE,
2192   tune_params::FUSE_NOTHING,
2193   tune_params::SCHED_AUTOPREF_OFF
2194 };
2195 
2196 const struct tune_params arm_cortex_a9_tune =
2197 {
2198   &cortexa9_extra_costs,
2199   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2200   cortex_a9_sched_adjust_cost,
2201   arm_default_branch_cost,
2202   &arm_default_vec_cost,
2203   1,						/* Constant limit.  */
2204   5,						/* Max cond insns.  */
2205   8,						/* Memset max inline.  */
2206   2,						/* Issue rate.  */
2207   ARM_PREFETCH_BENEFICIAL(4,32,32),
2208   tune_params::PREF_CONST_POOL_FALSE,
2209   tune_params::PREF_LDRD_FALSE,
2210   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2211   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2212   tune_params::DISPARAGE_FLAGS_NEITHER,
2213   tune_params::PREF_NEON_64_FALSE,
2214   tune_params::PREF_NEON_STRINGOPS_FALSE,
2215   tune_params::FUSE_NOTHING,
2216   tune_params::SCHED_AUTOPREF_OFF
2217 };
2218 
2219 const struct tune_params arm_cortex_a12_tune =
2220 {
2221   &cortexa12_extra_costs,
2222   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2223   NULL,					/* Sched adj cost.  */
2224   arm_default_branch_cost,
2225   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2226   1,						/* Constant limit.  */
2227   2,						/* Max cond insns.  */
2228   8,						/* Memset max inline.  */
2229   2,						/* Issue rate.  */
2230   ARM_PREFETCH_NOT_BENEFICIAL,
2231   tune_params::PREF_CONST_POOL_FALSE,
2232   tune_params::PREF_LDRD_TRUE,
2233   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2234   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2235   tune_params::DISPARAGE_FLAGS_ALL,
2236   tune_params::PREF_NEON_64_FALSE,
2237   tune_params::PREF_NEON_STRINGOPS_TRUE,
2238   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2239   tune_params::SCHED_AUTOPREF_OFF
2240 };
2241 
2242 const struct tune_params arm_cortex_a73_tune =
2243 {
2244   &cortexa57_extra_costs,
2245   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2246   NULL,						/* Sched adj cost.  */
2247   arm_default_branch_cost,
2248   &arm_default_vec_cost,			/* Vectorizer costs.  */
2249   1,						/* Constant limit.  */
2250   2,						/* Max cond insns.  */
2251   8,						/* Memset max inline.  */
2252   2,						/* Issue rate.  */
2253   ARM_PREFETCH_NOT_BENEFICIAL,
2254   tune_params::PREF_CONST_POOL_FALSE,
2255   tune_params::PREF_LDRD_TRUE,
2256   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2257   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2258   tune_params::DISPARAGE_FLAGS_ALL,
2259   tune_params::PREF_NEON_64_FALSE,
2260   tune_params::PREF_NEON_STRINGOPS_TRUE,
2261   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2262   tune_params::SCHED_AUTOPREF_FULL
2263 };
2264 
2265 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2266    cycle to execute each.  An LDR from the constant pool also takes two cycles
2267    to execute, but mildly increases pipelining opportunity (consecutive
2268    loads/stores can be pipelined together, saving one cycle), and may also
2269    improve icache utilisation.  Hence we prefer the constant pool for such
2270    processors.  */
2271 
2272 const struct tune_params arm_v7m_tune =
2273 {
2274   &v7m_extra_costs,
2275   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2276   NULL,					/* Sched adj cost.  */
2277   arm_cortex_m_branch_cost,
2278   &arm_default_vec_cost,
2279   1,						/* Constant limit.  */
2280   2,						/* Max cond insns.  */
2281   8,						/* Memset max inline.  */
2282   1,						/* Issue rate.  */
2283   ARM_PREFETCH_NOT_BENEFICIAL,
2284   tune_params::PREF_CONST_POOL_TRUE,
2285   tune_params::PREF_LDRD_FALSE,
2286   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2287   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2288   tune_params::DISPARAGE_FLAGS_NEITHER,
2289   tune_params::PREF_NEON_64_FALSE,
2290   tune_params::PREF_NEON_STRINGOPS_FALSE,
2291   tune_params::FUSE_NOTHING,
2292   tune_params::SCHED_AUTOPREF_OFF
2293 };
2294 
2295 /* Cortex-M7 tuning.  */
2296 
2297 const struct tune_params arm_cortex_m7_tune =
2298 {
2299   &v7m_extra_costs,
2300   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2301   NULL,					/* Sched adj cost.  */
2302   arm_cortex_m7_branch_cost,
2303   &arm_default_vec_cost,
2304   0,						/* Constant limit.  */
2305   1,						/* Max cond insns.  */
2306   8,						/* Memset max inline.  */
2307   2,						/* Issue rate.  */
2308   ARM_PREFETCH_NOT_BENEFICIAL,
2309   tune_params::PREF_CONST_POOL_TRUE,
2310   tune_params::PREF_LDRD_FALSE,
2311   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2312   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2313   tune_params::DISPARAGE_FLAGS_NEITHER,
2314   tune_params::PREF_NEON_64_FALSE,
2315   tune_params::PREF_NEON_STRINGOPS_FALSE,
2316   tune_params::FUSE_NOTHING,
2317   tune_params::SCHED_AUTOPREF_OFF
2318 };
2319 
2320 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2321    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2322    cortex-m23.  */
2323 const struct tune_params arm_v6m_tune =
2324 {
2325   &generic_extra_costs,			/* Insn extra costs.  */
2326   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2327   NULL,					/* Sched adj cost.  */
2328   arm_default_branch_cost,
2329   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2330   1,						/* Constant limit.  */
2331   5,						/* Max cond insns.  */
2332   8,						/* Memset max inline.  */
2333   1,						/* Issue rate.  */
2334   ARM_PREFETCH_NOT_BENEFICIAL,
2335   tune_params::PREF_CONST_POOL_FALSE,
2336   tune_params::PREF_LDRD_FALSE,
2337   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2338   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2339   tune_params::DISPARAGE_FLAGS_NEITHER,
2340   tune_params::PREF_NEON_64_FALSE,
2341   tune_params::PREF_NEON_STRINGOPS_FALSE,
2342   tune_params::FUSE_NOTHING,
2343   tune_params::SCHED_AUTOPREF_OFF
2344 };
2345 
2346 const struct tune_params arm_fa726te_tune =
2347 {
2348   &generic_extra_costs,				/* Insn extra costs.  */
2349   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2350   fa726te_sched_adjust_cost,
2351   arm_default_branch_cost,
2352   &arm_default_vec_cost,
2353   1,						/* Constant limit.  */
2354   5,						/* Max cond insns.  */
2355   8,						/* Memset max inline.  */
2356   2,						/* Issue rate.  */
2357   ARM_PREFETCH_NOT_BENEFICIAL,
2358   tune_params::PREF_CONST_POOL_TRUE,
2359   tune_params::PREF_LDRD_FALSE,
2360   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2361   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2362   tune_params::DISPARAGE_FLAGS_NEITHER,
2363   tune_params::PREF_NEON_64_FALSE,
2364   tune_params::PREF_NEON_STRINGOPS_FALSE,
2365   tune_params::FUSE_NOTHING,
2366   tune_params::SCHED_AUTOPREF_OFF
2367 };
2368 
2369 /* Auto-generated CPU, FPU and architecture tables.  */
2370 #include "arm-cpu-data.h"
2371 
2372 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2373    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2374    is thus chosen to be big enough to hold the longest architecture name.  */
2375 
2376 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2377 
2378 /* Supported TLS relocations.  */
2379 
2380 enum tls_reloc {
2381   TLS_GD32,
2382   TLS_LDM32,
2383   TLS_LDO32,
2384   TLS_IE32,
2385   TLS_LE32,
2386   TLS_DESCSEQ	/* GNU scheme */
2387 };
2388 
2389 /* The maximum number of insns to be used when loading a constant.  */
2390 inline static int
2391 arm_constant_limit (bool size_p)
2392 {
2393   return size_p ? 1 : current_tune->constant_limit;
2394 }
2395 
2396 /* Emit an insn that's a simple single-set.  Both the operands must be known
2397    to be valid.  */
2398 inline static rtx_insn *
2399 emit_set_insn (rtx x, rtx y)
2400 {
2401   return emit_insn (gen_rtx_SET (x, y));
2402 }
2403 
2404 /* Return the number of bits set in VALUE.  */
2405 static unsigned
2406 bit_count (unsigned long value)
2407 {
2408   unsigned long count = 0;
2409 
2410   while (value)
2411     {
2412       count++;
2413       value &= value - 1;  /* Clear the least-significant set bit.  */
2414     }
2415 
2416   return count;
2417 }
2418 
2419 /* Return the number of bits set in BMAP.  */
2420 static unsigned
2421 bitmap_popcount (const sbitmap bmap)
2422 {
2423   unsigned int count = 0;
2424   unsigned int n = 0;
2425   sbitmap_iterator sbi;
2426 
2427   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2428     count++;
2429   return count;
2430 }
2431 
2432 typedef struct
2433 {
2434   machine_mode mode;
2435   const char *name;
2436 } arm_fixed_mode_set;
2437 
2438 /* A small helper for setting fixed-point library libfuncs.  */
2439 
2440 static void
2441 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2442 			     const char *funcname, const char *modename,
2443 			     int num_suffix)
2444 {
2445   char buffer[50];
2446 
2447   if (num_suffix == 0)
2448     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2449   else
2450     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2451 
2452   set_optab_libfunc (optable, mode, buffer);
2453 }
2454 
2455 static void
2456 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2457 			    machine_mode from, const char *funcname,
2458 			    const char *toname, const char *fromname)
2459 {
2460   char buffer[50];
2461   const char *maybe_suffix_2 = "";
2462 
2463   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2464   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2465       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2466       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2467     maybe_suffix_2 = "2";
2468 
2469   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2470 	   maybe_suffix_2);
2471 
2472   set_conv_libfunc (optable, to, from, buffer);
2473 }
2474 
2475 /* Set up library functions unique to ARM.  */
2476 
2477 static void
2478 arm_init_libfuncs (void)
2479 {
2480   /* For Linux, we have access to kernel support for atomic operations.  */
2481   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2482     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2483 
2484   /* There are no special library functions unless we are using the
2485      ARM BPABI.  */
2486   if (!TARGET_BPABI)
2487     return;
2488 
2489   /* The functions below are described in Section 4 of the "Run-Time
2490      ABI for the ARM architecture", Version 1.0.  */
2491 
2492   /* Double-precision floating-point arithmetic.  Table 2.  */
2493   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2494   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2495   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2496   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2497   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2498 
2499   /* Double-precision comparisons.  Table 3.  */
2500   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2501   set_optab_libfunc (ne_optab, DFmode, NULL);
2502   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2503   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2504   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2505   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2506   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2507 
2508   /* Single-precision floating-point arithmetic.  Table 4.  */
2509   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2510   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2511   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2512   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2513   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2514 
2515   /* Single-precision comparisons.  Table 5.  */
2516   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2517   set_optab_libfunc (ne_optab, SFmode, NULL);
2518   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2519   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2520   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2521   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2522   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2523 
2524   /* Floating-point to integer conversions.  Table 6.  */
2525   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2526   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2527   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2528   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2529   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2530   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2531   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2532   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2533 
2534   /* Conversions between floating types.  Table 7.  */
2535   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2536   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2537 
2538   /* Integer to floating-point conversions.  Table 8.  */
2539   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2540   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2541   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2542   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2543   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2544   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2545   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2546   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2547 
2548   /* Long long.  Table 9.  */
2549   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2550   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2551   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2552   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2553   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2554   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2555   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2556   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2557 
2558   /* Integer (32/32->32) division.  \S 4.3.1.  */
2559   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2560   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2561 
2562   /* The divmod functions are designed so that they can be used for
2563      plain division, even though they return both the quotient and the
2564      remainder.  The quotient is returned in the usual location (i.e.,
2565      r0 for SImode, {r0, r1} for DImode), just as would be expected
2566      for an ordinary division routine.  Because the AAPCS calling
2567      conventions specify that all of { r0, r1, r2, r3 } are
2568      callee-saved registers, there is no need to tell the compiler
2569      explicitly that those registers are clobbered by these
2570      routines.  */
2571   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2572   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2573 
2574   /* For SImode division the ABI provides div-without-mod routines,
2575      which are faster.  */
2576   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2577   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2578 
2579   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2580      divmod libcalls instead.  */
2581   set_optab_libfunc (smod_optab, DImode, NULL);
2582   set_optab_libfunc (umod_optab, DImode, NULL);
2583   set_optab_libfunc (smod_optab, SImode, NULL);
2584   set_optab_libfunc (umod_optab, SImode, NULL);
2585 
2586   /* Half-precision float operations.  The compiler handles all operations
2587      with NULL libfuncs by converting the SFmode.  */
2588   switch (arm_fp16_format)
2589     {
2590     case ARM_FP16_FORMAT_IEEE:
2591     case ARM_FP16_FORMAT_ALTERNATIVE:
2592 
2593       /* Conversions.  */
2594       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2595 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2596 			 ? "__gnu_f2h_ieee"
2597 			 : "__gnu_f2h_alternative"));
2598       set_conv_libfunc (sext_optab, SFmode, HFmode,
2599 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2600 			 ? "__gnu_h2f_ieee"
2601 			 : "__gnu_h2f_alternative"));
2602 
2603       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2604 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2605 			 ? "__gnu_d2h_ieee"
2606 			 : "__gnu_d2h_alternative"));
2607 
2608       /* Arithmetic.  */
2609       set_optab_libfunc (add_optab, HFmode, NULL);
2610       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2611       set_optab_libfunc (smul_optab, HFmode, NULL);
2612       set_optab_libfunc (neg_optab, HFmode, NULL);
2613       set_optab_libfunc (sub_optab, HFmode, NULL);
2614 
2615       /* Comparisons.  */
2616       set_optab_libfunc (eq_optab, HFmode, NULL);
2617       set_optab_libfunc (ne_optab, HFmode, NULL);
2618       set_optab_libfunc (lt_optab, HFmode, NULL);
2619       set_optab_libfunc (le_optab, HFmode, NULL);
2620       set_optab_libfunc (ge_optab, HFmode, NULL);
2621       set_optab_libfunc (gt_optab, HFmode, NULL);
2622       set_optab_libfunc (unord_optab, HFmode, NULL);
2623       break;
2624 
2625     default:
2626       break;
2627     }
2628 
2629   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2630   {
2631     const arm_fixed_mode_set fixed_arith_modes[] =
2632       {
2633 	{ E_QQmode, "qq" },
2634 	{ E_UQQmode, "uqq" },
2635 	{ E_HQmode, "hq" },
2636 	{ E_UHQmode, "uhq" },
2637 	{ E_SQmode, "sq" },
2638 	{ E_USQmode, "usq" },
2639 	{ E_DQmode, "dq" },
2640 	{ E_UDQmode, "udq" },
2641 	{ E_TQmode, "tq" },
2642 	{ E_UTQmode, "utq" },
2643 	{ E_HAmode, "ha" },
2644 	{ E_UHAmode, "uha" },
2645 	{ E_SAmode, "sa" },
2646 	{ E_USAmode, "usa" },
2647 	{ E_DAmode, "da" },
2648 	{ E_UDAmode, "uda" },
2649 	{ E_TAmode, "ta" },
2650 	{ E_UTAmode, "uta" }
2651       };
2652     const arm_fixed_mode_set fixed_conv_modes[] =
2653       {
2654 	{ E_QQmode, "qq" },
2655 	{ E_UQQmode, "uqq" },
2656 	{ E_HQmode, "hq" },
2657 	{ E_UHQmode, "uhq" },
2658 	{ E_SQmode, "sq" },
2659 	{ E_USQmode, "usq" },
2660 	{ E_DQmode, "dq" },
2661 	{ E_UDQmode, "udq" },
2662 	{ E_TQmode, "tq" },
2663 	{ E_UTQmode, "utq" },
2664 	{ E_HAmode, "ha" },
2665 	{ E_UHAmode, "uha" },
2666 	{ E_SAmode, "sa" },
2667 	{ E_USAmode, "usa" },
2668 	{ E_DAmode, "da" },
2669 	{ E_UDAmode, "uda" },
2670 	{ E_TAmode, "ta" },
2671 	{ E_UTAmode, "uta" },
2672 	{ E_QImode, "qi" },
2673 	{ E_HImode, "hi" },
2674 	{ E_SImode, "si" },
2675 	{ E_DImode, "di" },
2676 	{ E_TImode, "ti" },
2677 	{ E_SFmode, "sf" },
2678 	{ E_DFmode, "df" }
2679       };
2680     unsigned int i, j;
2681 
2682     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2683       {
2684 	arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2685 				     "add", fixed_arith_modes[i].name, 3);
2686 	arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2687 				     "ssadd", fixed_arith_modes[i].name, 3);
2688 	arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2689 				     "usadd", fixed_arith_modes[i].name, 3);
2690 	arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2691 				     "sub", fixed_arith_modes[i].name, 3);
2692 	arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2693 				     "sssub", fixed_arith_modes[i].name, 3);
2694 	arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2695 				     "ussub", fixed_arith_modes[i].name, 3);
2696 	arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2697 				     "mul", fixed_arith_modes[i].name, 3);
2698 	arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2699 				     "ssmul", fixed_arith_modes[i].name, 3);
2700 	arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2701 				     "usmul", fixed_arith_modes[i].name, 3);
2702 	arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2703 				     "div", fixed_arith_modes[i].name, 3);
2704 	arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2705 				     "udiv", fixed_arith_modes[i].name, 3);
2706 	arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2707 				     "ssdiv", fixed_arith_modes[i].name, 3);
2708 	arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2709 				     "usdiv", fixed_arith_modes[i].name, 3);
2710 	arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2711 				     "neg", fixed_arith_modes[i].name, 2);
2712 	arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2713 				     "ssneg", fixed_arith_modes[i].name, 2);
2714 	arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2715 				     "usneg", fixed_arith_modes[i].name, 2);
2716 	arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2717 				     "ashl", fixed_arith_modes[i].name, 3);
2718 	arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2719 				     "ashr", fixed_arith_modes[i].name, 3);
2720 	arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2721 				     "lshr", fixed_arith_modes[i].name, 3);
2722 	arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2723 				     "ssashl", fixed_arith_modes[i].name, 3);
2724 	arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2725 				     "usashl", fixed_arith_modes[i].name, 3);
2726 	arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2727 				     "cmp", fixed_arith_modes[i].name, 2);
2728       }
2729 
2730     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2731       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2732 	{
2733 	  if (i == j
2734 	      || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2735 		  && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2736 	    continue;
2737 
2738 	  arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2739 				      fixed_conv_modes[j].mode, "fract",
2740 				      fixed_conv_modes[i].name,
2741 				      fixed_conv_modes[j].name);
2742 	  arm_set_fixed_conv_libfunc (satfract_optab,
2743 				      fixed_conv_modes[i].mode,
2744 				      fixed_conv_modes[j].mode, "satfract",
2745 				      fixed_conv_modes[i].name,
2746 				      fixed_conv_modes[j].name);
2747 	  arm_set_fixed_conv_libfunc (fractuns_optab,
2748 				      fixed_conv_modes[i].mode,
2749 				      fixed_conv_modes[j].mode, "fractuns",
2750 				      fixed_conv_modes[i].name,
2751 				      fixed_conv_modes[j].name);
2752 	  arm_set_fixed_conv_libfunc (satfractuns_optab,
2753 				      fixed_conv_modes[i].mode,
2754 				      fixed_conv_modes[j].mode, "satfractuns",
2755 				      fixed_conv_modes[i].name,
2756 				      fixed_conv_modes[j].name);
2757 	}
2758   }
2759 
2760   if (TARGET_AAPCS_BASED)
2761     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2762 }
2763 
2764 /* On AAPCS systems, this is the "struct __va_list".  */
2765 static GTY(()) tree va_list_type;
2766 
2767 /* Return the type to use as __builtin_va_list.  */
2768 static tree
2769 arm_build_builtin_va_list (void)
2770 {
2771   tree va_list_name;
2772   tree ap_field;
2773 
2774   if (!TARGET_AAPCS_BASED)
2775     return std_build_builtin_va_list ();
2776 
2777   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2778      defined as:
2779 
2780        struct __va_list
2781        {
2782 	 void *__ap;
2783        };
2784 
2785      The C Library ABI further reinforces this definition in \S
2786      4.1.
2787 
2788      We must follow this definition exactly.  The structure tag
2789      name is visible in C++ mangled names, and thus forms a part
2790      of the ABI.  The field name may be used by people who
2791      #include <stdarg.h>.  */
2792   /* Create the type.  */
2793   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2794   /* Give it the required name.  */
2795   va_list_name = build_decl (BUILTINS_LOCATION,
2796 			     TYPE_DECL,
2797 			     get_identifier ("__va_list"),
2798 			     va_list_type);
2799   DECL_ARTIFICIAL (va_list_name) = 1;
2800   TYPE_NAME (va_list_type) = va_list_name;
2801   TYPE_STUB_DECL (va_list_type) = va_list_name;
2802   /* Create the __ap field.  */
2803   ap_field = build_decl (BUILTINS_LOCATION,
2804 			 FIELD_DECL,
2805 			 get_identifier ("__ap"),
2806 			 ptr_type_node);
2807   DECL_ARTIFICIAL (ap_field) = 1;
2808   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2809   TYPE_FIELDS (va_list_type) = ap_field;
2810   /* Compute its layout.  */
2811   layout_type (va_list_type);
2812 
2813   return va_list_type;
2814 }
2815 
2816 /* Return an expression of type "void *" pointing to the next
2817    available argument in a variable-argument list.  VALIST is the
2818    user-level va_list object, of type __builtin_va_list.  */
2819 static tree
2820 arm_extract_valist_ptr (tree valist)
2821 {
2822   if (TREE_TYPE (valist) == error_mark_node)
2823     return error_mark_node;
2824 
2825   /* On an AAPCS target, the pointer is stored within "struct
2826      va_list".  */
2827   if (TARGET_AAPCS_BASED)
2828     {
2829       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2830       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2831 		       valist, ap_field, NULL_TREE);
2832     }
2833 
2834   return valist;
2835 }
2836 
2837 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2838 static void
2839 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2840 {
2841   valist = arm_extract_valist_ptr (valist);
2842   std_expand_builtin_va_start (valist, nextarg);
2843 }
2844 
2845 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2846 static tree
2847 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2848 			  gimple_seq *post_p)
2849 {
2850   valist = arm_extract_valist_ptr (valist);
2851   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2852 }
2853 
2854 /* Check any incompatible options that the user has specified.  */
2855 static void
2856 arm_option_check_internal (struct gcc_options *opts)
2857 {
2858   int flags = opts->x_target_flags;
2859 
2860   /* iWMMXt and NEON are incompatible.  */
2861   if (TARGET_IWMMXT
2862       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2863     error ("iWMMXt and NEON are incompatible");
2864 
2865   /* Make sure that the processor choice does not conflict with any of the
2866      other command line choices.  */
2867   if (TARGET_ARM_P (flags)
2868       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2869     error ("target CPU does not support ARM mode");
2870 
2871   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2872   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2873     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2874 
2875   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2876     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2877 
2878   /* If this target is normally configured to use APCS frames, warn if they
2879      are turned off and debugging is turned on.  */
2880   if (TARGET_ARM_P (flags)
2881       && write_symbols != NO_DEBUG
2882       && !TARGET_APCS_FRAME
2883       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2884     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2885 
2886   /* iWMMXt unsupported under Thumb mode.  */
2887   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2888     error ("iWMMXt unsupported under Thumb mode");
2889 
2890   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2891     error ("can not use -mtp=cp15 with 16-bit Thumb");
2892 
2893   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2894     {
2895       error ("RTP PIC is incompatible with Thumb");
2896       flag_pic = 0;
2897     }
2898 
2899   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2900      with MOVT.  */
2901   if ((target_pure_code || target_slow_flash_data)
2902       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2903     {
2904       const char *flag = (target_pure_code ? "-mpure-code" :
2905 					     "-mslow-flash-data");
2906       error ("%s only supports non-pic code on M-profile targets with the "
2907 	     "MOVT instruction", flag);
2908     }
2909 
2910 }
2911 
2912 /* Recompute the global settings depending on target attribute options.  */
2913 
2914 static void
2915 arm_option_params_internal (void)
2916 {
2917   /* If we are not using the default (ARM mode) section anchor offset
2918      ranges, then set the correct ranges now.  */
2919   if (TARGET_THUMB1)
2920     {
2921       /* Thumb-1 LDR instructions cannot have negative offsets.
2922          Permissible positive offset ranges are 5-bit (for byte loads),
2923          6-bit (for halfword loads), or 7-bit (for word loads).
2924          Empirical results suggest a 7-bit anchor range gives the best
2925          overall code size.  */
2926       targetm.min_anchor_offset = 0;
2927       targetm.max_anchor_offset = 127;
2928     }
2929   else if (TARGET_THUMB2)
2930     {
2931       /* The minimum is set such that the total size of the block
2932          for a particular anchor is 248 + 1 + 4095 bytes, which is
2933          divisible by eight, ensuring natural spacing of anchors.  */
2934       targetm.min_anchor_offset = -248;
2935       targetm.max_anchor_offset = 4095;
2936     }
2937   else
2938     {
2939       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2940       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2941     }
2942 
2943   /* Increase the number of conditional instructions with -Os.  */
2944   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2945 
2946   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2947   if (TARGET_THUMB2)
2948     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2949 }
2950 
2951 /* True if -mflip-thumb should next add an attribute for the default
2952    mode, false if it should next add an attribute for the opposite mode.  */
2953 static GTY(()) bool thumb_flipper;
2954 
2955 /* Options after initial target override.  */
2956 static GTY(()) tree init_optimize;
2957 
2958 static void
2959 arm_override_options_after_change_1 (struct gcc_options *opts)
2960 {
2961   if (opts->x_align_functions <= 0)
2962     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2963       && opts->x_optimize_size ? 2 : 4;
2964 }
2965 
2966 /* Implement targetm.override_options_after_change.  */
2967 
2968 static void
2969 arm_override_options_after_change (void)
2970 {
2971   arm_configure_build_target (&arm_active_target,
2972 			      TREE_TARGET_OPTION (target_option_default_node),
2973 			      &global_options_set, false);
2974 
2975   arm_override_options_after_change_1 (&global_options);
2976 }
2977 
2978 /* Implement TARGET_OPTION_SAVE.  */
2979 static void
2980 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2981 {
2982   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2983   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2984   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2985 }
2986 
2987 /* Implement TARGET_OPTION_RESTORE.  */
2988 static void
2989 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2990 {
2991   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2992   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2993   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2994   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2995 			      false);
2996 }
2997 
2998 /* Reset options between modes that the user has specified.  */
2999 static void
3000 arm_option_override_internal (struct gcc_options *opts,
3001 			      struct gcc_options *opts_set)
3002 {
3003   arm_override_options_after_change_1 (opts);
3004 
3005   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3006     {
3007       /* The default is to enable interworking, so this warning message would
3008 	 be confusing to users who have just compiled with, eg, -march=armv3.  */
3009       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3010       opts->x_target_flags &= ~MASK_INTERWORK;
3011     }
3012 
3013   if (TARGET_THUMB_P (opts->x_target_flags)
3014       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3015     {
3016       warning (0, "target CPU does not support THUMB instructions");
3017       opts->x_target_flags &= ~MASK_THUMB;
3018     }
3019 
3020   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3021     {
3022       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3023       opts->x_target_flags &= ~MASK_APCS_FRAME;
3024     }
3025 
3026   /* Callee super interworking implies thumb interworking.  Adding
3027      this to the flags here simplifies the logic elsewhere.  */
3028   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3029     opts->x_target_flags |= MASK_INTERWORK;
3030 
3031   /* need to remember initial values so combinaisons of options like
3032      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3033   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3034 
3035   if (! opts_set->x_arm_restrict_it)
3036     opts->x_arm_restrict_it = arm_arch8;
3037 
3038   /* ARM execution state and M profile don't have [restrict] IT.  */
3039   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3040     opts->x_arm_restrict_it = 0;
3041 
3042   /* Enable -munaligned-access by default for
3043      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3044      i.e. Thumb2 and ARM state only.
3045      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3046      - ARMv8 architecture-base processors.
3047 
3048      Disable -munaligned-access by default for
3049      - all pre-ARMv6 architecture-based processors
3050      - ARMv6-M architecture-based processors
3051      - ARMv8-M Baseline processors.  */
3052 
3053   if (! opts_set->x_unaligned_access)
3054     {
3055       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3056 			  && arm_arch6 && (arm_arch_notm || arm_arch7));
3057     }
3058   else if (opts->x_unaligned_access == 1
3059 	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3060     {
3061       warning (0, "target CPU does not support unaligned accesses");
3062      opts->x_unaligned_access = 0;
3063     }
3064 
3065   /* Don't warn since it's on by default in -O2.  */
3066   if (TARGET_THUMB1_P (opts->x_target_flags))
3067     opts->x_flag_schedule_insns = 0;
3068   else
3069     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3070 
3071   /* Disable shrink-wrap when optimizing function for size, since it tends to
3072      generate additional returns.  */
3073   if (optimize_function_for_size_p (cfun)
3074       && TARGET_THUMB2_P (opts->x_target_flags))
3075     opts->x_flag_shrink_wrap = false;
3076   else
3077     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3078 
3079   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3080      - epilogue_insns - does not accurately model the corresponding insns
3081      emitted in the asm file.  In particular, see the comment in thumb_exit
3082      'Find out how many of the (return) argument registers we can corrupt'.
3083      As a consequence, the epilogue may clobber registers without fipa-ra
3084      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3085      TODO: Accurately model clobbers for epilogue_insns and reenable
3086      fipa-ra.  */
3087   if (TARGET_THUMB1_P (opts->x_target_flags))
3088     opts->x_flag_ipa_ra = 0;
3089   else
3090     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3091 
3092   /* Thumb2 inline assembly code should always use unified syntax.
3093      This will apply to ARM and Thumb1 eventually.  */
3094   if (TARGET_THUMB2_P (opts->x_target_flags))
3095     opts->x_inline_asm_unified = true;
3096 
3097 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3098   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3099 #endif
3100 }
3101 
3102 static sbitmap isa_all_fpubits;
3103 static sbitmap isa_quirkbits;
3104 
3105 /* Configure a build target TARGET from the user-specified options OPTS and
3106    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3107    architecture have been specified, but the two are not identical.  */
3108 void
3109 arm_configure_build_target (struct arm_build_target *target,
3110 			    struct cl_target_option *opts,
3111 			    struct gcc_options *opts_set,
3112 			    bool warn_compatible)
3113 {
3114   const cpu_option *arm_selected_tune = NULL;
3115   const arch_option *arm_selected_arch = NULL;
3116   const cpu_option *arm_selected_cpu = NULL;
3117   const arm_fpu_desc *arm_selected_fpu = NULL;
3118   const char *tune_opts = NULL;
3119   const char *arch_opts = NULL;
3120   const char *cpu_opts = NULL;
3121 
3122   bitmap_clear (target->isa);
3123   target->core_name = NULL;
3124   target->arch_name = NULL;
3125 
3126   if (opts_set->x_arm_arch_string)
3127     {
3128       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3129 						      "-march",
3130 						      opts->x_arm_arch_string);
3131       arch_opts = strchr (opts->x_arm_arch_string, '+');
3132     }
3133 
3134   if (opts_set->x_arm_cpu_string)
3135     {
3136       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3137 						    opts->x_arm_cpu_string);
3138       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3139       arm_selected_tune = arm_selected_cpu;
3140       /* If taking the tuning from -mcpu, we don't need to rescan the
3141 	 options for tuning.  */
3142     }
3143 
3144   if (opts_set->x_arm_tune_string)
3145     {
3146       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3147 						     opts->x_arm_tune_string);
3148       tune_opts = strchr (opts->x_arm_tune_string, '+');
3149     }
3150 
3151   if (arm_selected_arch)
3152     {
3153       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3154       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3155 				 arch_opts);
3156 
3157       if (arm_selected_cpu)
3158 	{
3159 	  auto_sbitmap cpu_isa (isa_num_bits);
3160 	  auto_sbitmap isa_delta (isa_num_bits);
3161 
3162 	  arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3163 	  arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3164 				     cpu_opts);
3165 	  bitmap_xor (isa_delta, cpu_isa, target->isa);
3166 	  /* Ignore any bits that are quirk bits.  */
3167 	  bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3168 	  /* Ignore (for now) any bits that might be set by -mfpu.  */
3169 	  bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3170 
3171 	  if (!bitmap_empty_p (isa_delta))
3172 	    {
3173 	      if (warn_compatible)
3174 		warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3175 			 arm_selected_cpu->common.name,
3176 			 arm_selected_arch->common.name);
3177 	      /* -march wins for code generation.
3178 		 -mcpu wins for default tuning.  */
3179 	      if (!arm_selected_tune)
3180 		arm_selected_tune = arm_selected_cpu;
3181 
3182 	      arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3183 	      target->arch_name = arm_selected_arch->common.name;
3184 	    }
3185 	  else
3186 	    {
3187 	      /* Architecture and CPU are essentially the same.
3188 		 Prefer the CPU setting.  */
3189 	      arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3190 	      target->core_name = arm_selected_cpu->common.name;
3191 	      /* Copy the CPU's capabilities, so that we inherit the
3192 		 appropriate extensions and quirks.  */
3193 	      bitmap_copy (target->isa, cpu_isa);
3194 	    }
3195 	}
3196       else
3197 	{
3198 	  /* Pick a CPU based on the architecture.  */
3199 	  arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3200 	  target->arch_name = arm_selected_arch->common.name;
3201 	  /* Note: target->core_name is left unset in this path.  */
3202 	}
3203     }
3204   else if (arm_selected_cpu)
3205     {
3206       target->core_name = arm_selected_cpu->common.name;
3207       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3208       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3209 				 cpu_opts);
3210       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3211     }
3212   /* If the user did not specify a processor or architecture, choose
3213      one for them.  */
3214   else
3215     {
3216       const cpu_option *sel;
3217       auto_sbitmap sought_isa (isa_num_bits);
3218       bitmap_clear (sought_isa);
3219       auto_sbitmap default_isa (isa_num_bits);
3220 
3221       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3222 						    TARGET_CPU_DEFAULT);
3223       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3224       gcc_assert (arm_selected_cpu->common.name);
3225 
3226       /* RWE: All of the selection logic below (to the end of this
3227 	 'if' clause) looks somewhat suspect.  It appears to be mostly
3228 	 there to support forcing thumb support when the default CPU
3229 	 does not have thumb (somewhat dubious in terms of what the
3230 	 user might be expecting).  I think it should be removed once
3231 	 support for the pre-thumb era cores is removed.  */
3232       sel = arm_selected_cpu;
3233       arm_initialize_isa (default_isa, sel->common.isa_bits);
3234       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3235 				 cpu_opts);
3236 
3237       /* Now check to see if the user has specified any command line
3238 	 switches that require certain abilities from the cpu.  */
3239 
3240       if (TARGET_INTERWORK || TARGET_THUMB)
3241 	{
3242 	  bitmap_set_bit (sought_isa, isa_bit_thumb);
3243 	  bitmap_set_bit (sought_isa, isa_bit_mode32);
3244 
3245 	  /* There are no ARM processors that support both APCS-26 and
3246 	     interworking.  Therefore we forcibly remove MODE26 from
3247 	     from the isa features here (if it was set), so that the
3248 	     search below will always be able to find a compatible
3249 	     processor.  */
3250 	  bitmap_clear_bit (default_isa, isa_bit_mode26);
3251 	}
3252 
3253       /* If there are such requirements and the default CPU does not
3254 	 satisfy them, we need to run over the complete list of
3255 	 cores looking for one that is satisfactory.  */
3256       if (!bitmap_empty_p (sought_isa)
3257 	  && !bitmap_subset_p (sought_isa, default_isa))
3258 	{
3259 	  auto_sbitmap candidate_isa (isa_num_bits);
3260 	  /* We're only interested in a CPU with at least the
3261 	     capabilities of the default CPU and the required
3262 	     additional features.  */
3263 	  bitmap_ior (default_isa, default_isa, sought_isa);
3264 
3265 	  /* Try to locate a CPU type that supports all of the abilities
3266 	     of the default CPU, plus the extra abilities requested by
3267 	     the user.  */
3268 	  for (sel = all_cores; sel->common.name != NULL; sel++)
3269 	    {
3270 	      arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3271 	      /* An exact match?  */
3272 	      if (bitmap_equal_p (default_isa, candidate_isa))
3273 		break;
3274 	    }
3275 
3276 	  if (sel->common.name == NULL)
3277 	    {
3278 	      unsigned current_bit_count = isa_num_bits;
3279 	      const cpu_option *best_fit = NULL;
3280 
3281 	      /* Ideally we would like to issue an error message here
3282 		 saying that it was not possible to find a CPU compatible
3283 		 with the default CPU, but which also supports the command
3284 		 line options specified by the programmer, and so they
3285 		 ought to use the -mcpu=<name> command line option to
3286 		 override the default CPU type.
3287 
3288 		 If we cannot find a CPU that has exactly the
3289 		 characteristics of the default CPU and the given
3290 		 command line options we scan the array again looking
3291 		 for a best match.  The best match must have at least
3292 		 the capabilities of the perfect match.  */
3293 	      for (sel = all_cores; sel->common.name != NULL; sel++)
3294 		{
3295 		  arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3296 
3297 		  if (bitmap_subset_p (default_isa, candidate_isa))
3298 		    {
3299 		      unsigned count;
3300 
3301 		      bitmap_and_compl (candidate_isa, candidate_isa,
3302 					default_isa);
3303 		      count = bitmap_popcount (candidate_isa);
3304 
3305 		      if (count < current_bit_count)
3306 			{
3307 			  best_fit = sel;
3308 			  current_bit_count = count;
3309 			}
3310 		    }
3311 
3312 		  gcc_assert (best_fit);
3313 		  sel = best_fit;
3314 		}
3315 	    }
3316 	  arm_selected_cpu = sel;
3317 	}
3318 
3319       /* Now we know the CPU, we can finally initialize the target
3320 	 structure.  */
3321       target->core_name = arm_selected_cpu->common.name;
3322       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3323       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3324 				 cpu_opts);
3325       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3326     }
3327 
3328   gcc_assert (arm_selected_cpu);
3329   gcc_assert (arm_selected_arch);
3330 
3331   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3332     {
3333       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3334       auto_sbitmap fpu_bits (isa_num_bits);
3335 
3336       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3337       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3338       bitmap_ior (target->isa, target->isa, fpu_bits);
3339     }
3340 
3341   if (!arm_selected_tune)
3342     arm_selected_tune = arm_selected_cpu;
3343   else /* Validate the features passed to -mtune.  */
3344     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3345 
3346   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3347 
3348   /* Finish initializing the target structure.  */
3349   target->arch_pp_name = arm_selected_arch->arch;
3350   target->base_arch = arm_selected_arch->base_arch;
3351   target->profile = arm_selected_arch->profile;
3352 
3353   target->tune_flags = tune_data->tune_flags;
3354   target->tune = tune_data->tune;
3355   target->tune_core = tune_data->scheduler;
3356   arm_option_reconfigure_globals ();
3357 }
3358 
3359 /* Fix up any incompatible options that the user has specified.  */
3360 static void
3361 arm_option_override (void)
3362 {
3363   static const enum isa_feature fpu_bitlist[]
3364     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3365   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3366   cl_target_option opts;
3367 
3368   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3369   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3370 
3371   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3372   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3373 
3374   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3375 
3376   if (!global_options_set.x_arm_fpu_index)
3377     {
3378       bool ok;
3379       int fpu_index;
3380 
3381       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3382 				  CL_TARGET);
3383       gcc_assert (ok);
3384       arm_fpu_index = (enum fpu_type) fpu_index;
3385     }
3386 
3387   cl_target_option_save (&opts, &global_options);
3388   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3389 			      true);
3390 
3391 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3392   SUBTARGET_OVERRIDE_OPTIONS;
3393 #endif
3394 
3395   /* Initialize boolean versions of the architectural flags, for use
3396      in the arm.md file and for enabling feature flags.  */
3397   arm_option_reconfigure_globals ();
3398 
3399   arm_tune = arm_active_target.tune_core;
3400   tune_flags = arm_active_target.tune_flags;
3401   current_tune = arm_active_target.tune;
3402 
3403   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3404   if (TARGET_APCS_FRAME)
3405     flag_shrink_wrap = false;
3406 
3407   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3408     {
3409       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3410       target_flags |= MASK_APCS_FRAME;
3411     }
3412 
3413   if (TARGET_POKE_FUNCTION_NAME)
3414     target_flags |= MASK_APCS_FRAME;
3415 
3416   if (TARGET_APCS_REENT && flag_pic)
3417     error ("-fpic and -mapcs-reent are incompatible");
3418 
3419   if (TARGET_APCS_REENT)
3420     warning (0, "APCS reentrant code not supported.  Ignored");
3421 
3422   /* Set up some tuning parameters.  */
3423   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3424   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3425   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3426   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3427   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3428   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3429 
3430   /* For arm2/3 there is no need to do any scheduling if we are doing
3431      software floating-point.  */
3432   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3433     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3434 
3435   /* Override the default structure alignment for AAPCS ABI.  */
3436   if (!global_options_set.x_arm_structure_size_boundary)
3437     {
3438       if (TARGET_AAPCS_BASED)
3439 	arm_structure_size_boundary = 8;
3440     }
3441   else
3442     {
3443       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3444 
3445       if (arm_structure_size_boundary != 8
3446 	  && arm_structure_size_boundary != 32
3447 	  && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3448 	{
3449 	  if (ARM_DOUBLEWORD_ALIGN)
3450 	    warning (0,
3451 		     "structure size boundary can only be set to 8, 32 or 64");
3452 	  else
3453 	    warning (0, "structure size boundary can only be set to 8 or 32");
3454 	  arm_structure_size_boundary
3455 	    = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3456 	}
3457     }
3458 
3459   if (TARGET_VXWORKS_RTP)
3460     {
3461       if (!global_options_set.x_arm_pic_data_is_text_relative)
3462 	arm_pic_data_is_text_relative = 0;
3463     }
3464   else if (flag_pic
3465 	   && !arm_pic_data_is_text_relative
3466 	   && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3467     /* When text & data segments don't have a fixed displacement, the
3468        intended use is with a single, read only, pic base register.
3469        Unless the user explicitly requested not to do that, set
3470        it.  */
3471     target_flags |= MASK_SINGLE_PIC_BASE;
3472 
3473   /* If stack checking is disabled, we can use r10 as the PIC register,
3474      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3475   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3476     {
3477       if (TARGET_VXWORKS_RTP)
3478 	warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3479       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3480     }
3481 
3482   if (flag_pic && TARGET_VXWORKS_RTP)
3483     arm_pic_register = 9;
3484 
3485   if (arm_pic_register_string != NULL)
3486     {
3487       int pic_register = decode_reg_name (arm_pic_register_string);
3488 
3489       if (!flag_pic)
3490 	warning (0, "-mpic-register= is useless without -fpic");
3491 
3492       /* Prevent the user from choosing an obviously stupid PIC register.  */
3493       else if (pic_register < 0 || call_used_regs[pic_register]
3494 	       || pic_register == HARD_FRAME_POINTER_REGNUM
3495 	       || pic_register == STACK_POINTER_REGNUM
3496 	       || pic_register >= PC_REGNUM
3497 	       || (TARGET_VXWORKS_RTP
3498 		   && (unsigned int) pic_register != arm_pic_register))
3499 	error ("unable to use '%s' for PIC register", arm_pic_register_string);
3500       else
3501 	arm_pic_register = pic_register;
3502     }
3503 
3504   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3505   if (fix_cm3_ldrd == 2)
3506     {
3507       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3508 	fix_cm3_ldrd = 1;
3509       else
3510 	fix_cm3_ldrd = 0;
3511     }
3512 
3513   /* Hot/Cold partitioning is not currently supported, since we can't
3514      handle literal pool placement in that case.  */
3515   if (flag_reorder_blocks_and_partition)
3516     {
3517       inform (input_location,
3518 	      "-freorder-blocks-and-partition not supported on this architecture");
3519       flag_reorder_blocks_and_partition = 0;
3520       flag_reorder_blocks = 1;
3521     }
3522 
3523   if (flag_pic)
3524     /* Hoisting PIC address calculations more aggressively provides a small,
3525        but measurable, size reduction for PIC code.  Therefore, we decrease
3526        the bar for unrestricted expression hoisting to the cost of PIC address
3527        calculation, which is 2 instructions.  */
3528     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3529 			   global_options.x_param_values,
3530 			   global_options_set.x_param_values);
3531 
3532   /* ARM EABI defaults to strict volatile bitfields.  */
3533   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3534       && abi_version_at_least(2))
3535     flag_strict_volatile_bitfields = 1;
3536 
3537   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3538      have deemed it beneficial (signified by setting
3539      prefetch.num_slots to 1 or more).  */
3540   if (flag_prefetch_loop_arrays < 0
3541       && HAVE_prefetch
3542       && optimize >= 3
3543       && current_tune->prefetch.num_slots > 0)
3544     flag_prefetch_loop_arrays = 1;
3545 
3546   /* Set up parameters to be used in prefetching algorithm.  Do not
3547      override the defaults unless we are tuning for a core we have
3548      researched values for.  */
3549   if (current_tune->prefetch.num_slots > 0)
3550     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3551 			   current_tune->prefetch.num_slots,
3552 			   global_options.x_param_values,
3553 			   global_options_set.x_param_values);
3554   if (current_tune->prefetch.l1_cache_line_size >= 0)
3555     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3556 			   current_tune->prefetch.l1_cache_line_size,
3557 			   global_options.x_param_values,
3558 			   global_options_set.x_param_values);
3559   if (current_tune->prefetch.l1_cache_size >= 0)
3560     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3561 			   current_tune->prefetch.l1_cache_size,
3562 			   global_options.x_param_values,
3563 			   global_options_set.x_param_values);
3564 
3565   /* Use Neon to perform 64-bits operations rather than core
3566      registers.  */
3567   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3568   if (use_neon_for_64bits == 1)
3569      prefer_neon_for_64bits = true;
3570 
3571   /* Use the alternative scheduling-pressure algorithm by default.  */
3572   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3573 			 global_options.x_param_values,
3574 			 global_options_set.x_param_values);
3575 
3576   /* Look through ready list and all of queue for instructions
3577      relevant for L2 auto-prefetcher.  */
3578   int param_sched_autopref_queue_depth;
3579 
3580   switch (current_tune->sched_autopref)
3581     {
3582     case tune_params::SCHED_AUTOPREF_OFF:
3583       param_sched_autopref_queue_depth = -1;
3584       break;
3585 
3586     case tune_params::SCHED_AUTOPREF_RANK:
3587       param_sched_autopref_queue_depth = 0;
3588       break;
3589 
3590     case tune_params::SCHED_AUTOPREF_FULL:
3591       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3592       break;
3593 
3594     default:
3595       gcc_unreachable ();
3596     }
3597 
3598   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3599 			 param_sched_autopref_queue_depth,
3600 			 global_options.x_param_values,
3601 			 global_options_set.x_param_values);
3602 
3603   /* Currently, for slow flash data, we just disable literal pools.  We also
3604      disable it for pure-code.  */
3605   if (target_slow_flash_data || target_pure_code)
3606     arm_disable_literal_pool = true;
3607 
3608   /* Disable scheduling fusion by default if it's not armv7 processor
3609      or doesn't prefer ldrd/strd.  */
3610   if (flag_schedule_fusion == 2
3611       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3612     flag_schedule_fusion = 0;
3613 
3614   /* Need to remember initial options before they are overriden.  */
3615   init_optimize = build_optimization_node (&global_options);
3616 
3617   arm_options_perform_arch_sanity_checks ();
3618   arm_option_override_internal (&global_options, &global_options_set);
3619   arm_option_check_internal (&global_options);
3620   arm_option_params_internal ();
3621 
3622   /* Create the default target_options structure.  */
3623   target_option_default_node = target_option_current_node
3624     = build_target_option_node (&global_options);
3625 
3626   /* Register global variables with the garbage collector.  */
3627   arm_add_gc_roots ();
3628 
3629   /* Init initial mode for testing.  */
3630   thumb_flipper = TARGET_THUMB;
3631 }
3632 
3633 
3634 /* Reconfigure global status flags from the active_target.isa.  */
3635 void
3636 arm_option_reconfigure_globals (void)
3637 {
3638   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3639   arm_base_arch = arm_active_target.base_arch;
3640 
3641   /* Initialize boolean versions of the architectural flags, for use
3642      in the arm.md file.  */
3643   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3644   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3645   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3646   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3647   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3648   arm_arch5te = arm_arch5e
3649     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3650   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3651   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3652   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3653   arm_arch6m = arm_arch6 && !arm_arch_notm;
3654   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3655   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3656   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3657   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3658   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3659   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3660   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3661   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3662   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3663   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3664   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3665   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3666   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3667   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3668   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3669   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3670   if (arm_fp16_inst)
3671     {
3672       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3673 	error ("selected fp16 options are incompatible");
3674       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3675     }
3676 
3677   /* And finally, set up some quirks.  */
3678   arm_arch_no_volatile_ce
3679     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3680   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3681 					    isa_bit_quirk_armv6kz);
3682 
3683   /* Use the cp15 method if it is available.  */
3684   if (target_thread_pointer == TP_AUTO)
3685     {
3686       if (arm_arch6k && !TARGET_THUMB1)
3687 	target_thread_pointer = TP_CP15;
3688       else
3689 	target_thread_pointer = TP_SOFT;
3690     }
3691 }
3692 
3693 /* Perform some validation between the desired architecture and the rest of the
3694    options.  */
3695 void
3696 arm_options_perform_arch_sanity_checks (void)
3697 {
3698   /* V5 code we generate is completely interworking capable, so we turn off
3699      TARGET_INTERWORK here to avoid many tests later on.  */
3700 
3701   /* XXX However, we must pass the right pre-processor defines to CPP
3702      or GLD can get confused.  This is a hack.  */
3703   if (TARGET_INTERWORK)
3704     arm_cpp_interwork = 1;
3705 
3706   if (arm_arch5)
3707     target_flags &= ~MASK_INTERWORK;
3708 
3709   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3710     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3711 
3712   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3713     error ("iwmmxt abi requires an iwmmxt capable cpu");
3714 
3715   /* BPABI targets use linker tricks to allow interworking on cores
3716      without thumb support.  */
3717   if (TARGET_INTERWORK
3718       && !TARGET_BPABI
3719       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3720     {
3721       warning (0, "target CPU does not support interworking" );
3722       target_flags &= ~MASK_INTERWORK;
3723     }
3724 
3725   /* If soft-float is specified then don't use FPU.  */
3726   if (TARGET_SOFT_FLOAT)
3727     arm_fpu_attr = FPU_NONE;
3728   else
3729     arm_fpu_attr = FPU_VFP;
3730 
3731   if (TARGET_AAPCS_BASED)
3732     {
3733       if (TARGET_CALLER_INTERWORKING)
3734 	error ("AAPCS does not support -mcaller-super-interworking");
3735       else
3736 	if (TARGET_CALLEE_INTERWORKING)
3737 	  error ("AAPCS does not support -mcallee-super-interworking");
3738     }
3739 
3740   /* __fp16 support currently assumes the core has ldrh.  */
3741   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3742     sorry ("__fp16 and no ldrh");
3743 
3744   if (use_cmse && !arm_arch_cmse)
3745     error ("target CPU does not support ARMv8-M Security Extensions");
3746 
3747   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3748      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3749   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3750     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3751 
3752 
3753   if (TARGET_AAPCS_BASED)
3754     {
3755       if (arm_abi == ARM_ABI_IWMMXT)
3756 	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3757       else if (TARGET_HARD_FLOAT_ABI)
3758 	{
3759 	  arm_pcs_default = ARM_PCS_AAPCS_VFP;
3760 	  if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3761 	    error ("-mfloat-abi=hard: selected processor lacks an FPU");
3762 	}
3763       else
3764 	arm_pcs_default = ARM_PCS_AAPCS;
3765     }
3766   else
3767     {
3768       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3769 	sorry ("-mfloat-abi=hard and VFP");
3770 
3771       if (arm_abi == ARM_ABI_APCS)
3772 	arm_pcs_default = ARM_PCS_APCS;
3773       else
3774 	arm_pcs_default = ARM_PCS_ATPCS;
3775     }
3776 }
3777 
3778 static void
3779 arm_add_gc_roots (void)
3780 {
3781   gcc_obstack_init(&minipool_obstack);
3782   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3783 }
3784 
3785 /* A table of known ARM exception types.
3786    For use with the interrupt function attribute.  */
3787 
3788 typedef struct
3789 {
3790   const char *const arg;
3791   const unsigned long return_value;
3792 }
3793 isr_attribute_arg;
3794 
3795 static const isr_attribute_arg isr_attribute_args [] =
3796 {
3797   { "IRQ",   ARM_FT_ISR },
3798   { "irq",   ARM_FT_ISR },
3799   { "FIQ",   ARM_FT_FIQ },
3800   { "fiq",   ARM_FT_FIQ },
3801   { "ABORT", ARM_FT_ISR },
3802   { "abort", ARM_FT_ISR },
3803   { "ABORT", ARM_FT_ISR },
3804   { "abort", ARM_FT_ISR },
3805   { "UNDEF", ARM_FT_EXCEPTION },
3806   { "undef", ARM_FT_EXCEPTION },
3807   { "SWI",   ARM_FT_EXCEPTION },
3808   { "swi",   ARM_FT_EXCEPTION },
3809   { NULL,    ARM_FT_NORMAL }
3810 };
3811 
3812 /* Returns the (interrupt) function type of the current
3813    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3814 
3815 static unsigned long
3816 arm_isr_value (tree argument)
3817 {
3818   const isr_attribute_arg * ptr;
3819   const char *              arg;
3820 
3821   if (!arm_arch_notm)
3822     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3823 
3824   /* No argument - default to IRQ.  */
3825   if (argument == NULL_TREE)
3826     return ARM_FT_ISR;
3827 
3828   /* Get the value of the argument.  */
3829   if (TREE_VALUE (argument) == NULL_TREE
3830       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3831     return ARM_FT_UNKNOWN;
3832 
3833   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3834 
3835   /* Check it against the list of known arguments.  */
3836   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3837     if (streq (arg, ptr->arg))
3838       return ptr->return_value;
3839 
3840   /* An unrecognized interrupt type.  */
3841   return ARM_FT_UNKNOWN;
3842 }
3843 
3844 /* Computes the type of the current function.  */
3845 
3846 static unsigned long
3847 arm_compute_func_type (void)
3848 {
3849   unsigned long type = ARM_FT_UNKNOWN;
3850   tree a;
3851   tree attr;
3852 
3853   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3854 
3855   /* Decide if the current function is volatile.  Such functions
3856      never return, and many memory cycles can be saved by not storing
3857      register values that will never be needed again.  This optimization
3858      was added to speed up context switching in a kernel application.  */
3859   if (optimize > 0
3860       && (TREE_NOTHROW (current_function_decl)
3861           || !(flag_unwind_tables
3862                || (flag_exceptions
3863 		   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3864       && TREE_THIS_VOLATILE (current_function_decl))
3865     type |= ARM_FT_VOLATILE;
3866 
3867   if (cfun->static_chain_decl != NULL)
3868     type |= ARM_FT_NESTED;
3869 
3870   attr = DECL_ATTRIBUTES (current_function_decl);
3871 
3872   a = lookup_attribute ("naked", attr);
3873   if (a != NULL_TREE)
3874     type |= ARM_FT_NAKED;
3875 
3876   a = lookup_attribute ("isr", attr);
3877   if (a == NULL_TREE)
3878     a = lookup_attribute ("interrupt", attr);
3879 
3880   if (a == NULL_TREE)
3881     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3882   else
3883     type |= arm_isr_value (TREE_VALUE (a));
3884 
3885   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3886     type |= ARM_FT_CMSE_ENTRY;
3887 
3888   return type;
3889 }
3890 
3891 /* Returns the type of the current function.  */
3892 
3893 unsigned long
3894 arm_current_func_type (void)
3895 {
3896   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3897     cfun->machine->func_type = arm_compute_func_type ();
3898 
3899   return cfun->machine->func_type;
3900 }
3901 
3902 bool
3903 arm_allocate_stack_slots_for_args (void)
3904 {
3905   /* Naked functions should not allocate stack slots for arguments.  */
3906   return !IS_NAKED (arm_current_func_type ());
3907 }
3908 
3909 static bool
3910 arm_warn_func_return (tree decl)
3911 {
3912   /* Naked functions are implemented entirely in assembly, including the
3913      return sequence, so suppress warnings about this.  */
3914   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3915 }
3916 
3917 
3918 /* Output assembler code for a block containing the constant parts
3919    of a trampoline, leaving space for the variable parts.
3920 
3921    On the ARM, (if r8 is the static chain regnum, and remembering that
3922    referencing pc adds an offset of 8) the trampoline looks like:
3923 	   ldr 		r8, [pc, #0]
3924 	   ldr		pc, [pc]
3925 	   .word	static chain value
3926 	   .word	function's address
3927    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3928 
3929 static void
3930 arm_asm_trampoline_template (FILE *f)
3931 {
3932   fprintf (f, "\t.syntax unified\n");
3933 
3934   if (TARGET_ARM)
3935     {
3936       fprintf (f, "\t.arm\n");
3937       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3938       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3939     }
3940   else if (TARGET_THUMB2)
3941     {
3942       fprintf (f, "\t.thumb\n");
3943       /* The Thumb-2 trampoline is similar to the arm implementation.
3944 	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3945       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3946 		   STATIC_CHAIN_REGNUM, PC_REGNUM);
3947       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3948     }
3949   else
3950     {
3951       ASM_OUTPUT_ALIGN (f, 2);
3952       fprintf (f, "\t.code\t16\n");
3953       fprintf (f, ".Ltrampoline_start:\n");
3954       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3955       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3956       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3957       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3958       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3959       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3960     }
3961   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3962   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3963 }
3964 
3965 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3966 
3967 static void
3968 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3969 {
3970   rtx fnaddr, mem, a_tramp;
3971 
3972   emit_block_move (m_tramp, assemble_trampoline_template (),
3973 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3974 
3975   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3976   emit_move_insn (mem, chain_value);
3977 
3978   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3979   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3980   emit_move_insn (mem, fnaddr);
3981 
3982   a_tramp = XEXP (m_tramp, 0);
3983   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3984 		     LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3985 		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3986 }
3987 
3988 /* Thumb trampolines should be entered in thumb mode, so set
3989    the bottom bit of the address.  */
3990 
3991 static rtx
3992 arm_trampoline_adjust_address (rtx addr)
3993 {
3994   if (TARGET_THUMB)
3995     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3996 				NULL, 0, OPTAB_LIB_WIDEN);
3997   return addr;
3998 }
3999 
4000 /* Return 1 if it is possible to return using a single instruction.
4001    If SIBLING is non-null, this is a test for a return before a sibling
4002    call.  SIBLING is the call insn, so we can examine its register usage.  */
4003 
4004 int
4005 use_return_insn (int iscond, rtx sibling)
4006 {
4007   int regno;
4008   unsigned int func_type;
4009   unsigned long saved_int_regs;
4010   unsigned HOST_WIDE_INT stack_adjust;
4011   arm_stack_offsets *offsets;
4012 
4013   /* Never use a return instruction before reload has run.  */
4014   if (!reload_completed)
4015     return 0;
4016 
4017   func_type = arm_current_func_type ();
4018 
4019   /* Naked, volatile and stack alignment functions need special
4020      consideration.  */
4021   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4022     return 0;
4023 
4024   /* So do interrupt functions that use the frame pointer and Thumb
4025      interrupt functions.  */
4026   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4027     return 0;
4028 
4029   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4030       && !optimize_function_for_size_p (cfun))
4031     return 0;
4032 
4033   offsets = arm_get_frame_offsets ();
4034   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4035 
4036   /* As do variadic functions.  */
4037   if (crtl->args.pretend_args_size
4038       || cfun->machine->uses_anonymous_args
4039       /* Or if the function calls __builtin_eh_return () */
4040       || crtl->calls_eh_return
4041       /* Or if the function calls alloca */
4042       || cfun->calls_alloca
4043       /* Or if there is a stack adjustment.  However, if the stack pointer
4044 	 is saved on the stack, we can use a pre-incrementing stack load.  */
4045       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4046 				 && stack_adjust == 4))
4047       /* Or if the static chain register was saved above the frame, under the
4048 	 assumption that the stack pointer isn't saved on the stack.  */
4049       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4050           && arm_compute_static_chain_stack_bytes() != 0))
4051     return 0;
4052 
4053   saved_int_regs = offsets->saved_regs_mask;
4054 
4055   /* Unfortunately, the insn
4056 
4057        ldmib sp, {..., sp, ...}
4058 
4059      triggers a bug on most SA-110 based devices, such that the stack
4060      pointer won't be correctly restored if the instruction takes a
4061      page fault.  We work around this problem by popping r3 along with
4062      the other registers, since that is never slower than executing
4063      another instruction.
4064 
4065      We test for !arm_arch5 here, because code for any architecture
4066      less than this could potentially be run on one of the buggy
4067      chips.  */
4068   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
4069     {
4070       /* Validate that r3 is a call-clobbered register (always true in
4071 	 the default abi) ...  */
4072       if (!call_used_regs[3])
4073 	return 0;
4074 
4075       /* ... that it isn't being used for a return value ... */
4076       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4077 	return 0;
4078 
4079       /* ... or for a tail-call argument ...  */
4080       if (sibling)
4081 	{
4082 	  gcc_assert (CALL_P (sibling));
4083 
4084 	  if (find_regno_fusage (sibling, USE, 3))
4085 	    return 0;
4086 	}
4087 
4088       /* ... and that there are no call-saved registers in r0-r2
4089 	 (always true in the default ABI).  */
4090       if (saved_int_regs & 0x7)
4091 	return 0;
4092     }
4093 
4094   /* Can't be done if interworking with Thumb, and any registers have been
4095      stacked.  */
4096   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4097     return 0;
4098 
4099   /* On StrongARM, conditional returns are expensive if they aren't
4100      taken and multiple registers have been stacked.  */
4101   if (iscond && arm_tune_strongarm)
4102     {
4103       /* Conditional return when just the LR is stored is a simple
4104 	 conditional-load instruction, that's not expensive.  */
4105       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4106 	return 0;
4107 
4108       if (flag_pic
4109 	  && arm_pic_register != INVALID_REGNUM
4110 	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4111 	return 0;
4112     }
4113 
4114   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4115      several instructions if anything needs to be popped.  */
4116   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4117     return 0;
4118 
4119   /* If there are saved registers but the LR isn't saved, then we need
4120      two instructions for the return.  */
4121   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4122     return 0;
4123 
4124   /* Can't be done if any of the VFP regs are pushed,
4125      since this also requires an insn.  */
4126   if (TARGET_HARD_FLOAT)
4127     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4128       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4129 	return 0;
4130 
4131   if (TARGET_REALLY_IWMMXT)
4132     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4133       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4134 	return 0;
4135 
4136   return 1;
4137 }
4138 
4139 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4140    shrink-wrapping if possible.  This is the case if we need to emit a
4141    prologue, which we can test by looking at the offsets.  */
4142 bool
4143 use_simple_return_p (void)
4144 {
4145   arm_stack_offsets *offsets;
4146 
4147   /* Note this function can be called before or after reload.  */
4148   if (!reload_completed)
4149     arm_compute_frame_layout ();
4150 
4151   offsets = arm_get_frame_offsets ();
4152   return offsets->outgoing_args != 0;
4153 }
4154 
4155 /* Return TRUE if int I is a valid immediate ARM constant.  */
4156 
4157 int
4158 const_ok_for_arm (HOST_WIDE_INT i)
4159 {
4160   int lowbit;
4161 
4162   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4163      be all zero, or all one.  */
4164   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4165       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4166 	  != ((~(unsigned HOST_WIDE_INT) 0)
4167 	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4168     return FALSE;
4169 
4170   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4171 
4172   /* Fast return for 0 and small values.  We must do this for zero, since
4173      the code below can't handle that one case.  */
4174   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4175     return TRUE;
4176 
4177   /* Get the number of trailing zeros.  */
4178   lowbit = ffs((int) i) - 1;
4179 
4180   /* Only even shifts are allowed in ARM mode so round down to the
4181      nearest even number.  */
4182   if (TARGET_ARM)
4183     lowbit &= ~1;
4184 
4185   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4186     return TRUE;
4187 
4188   if (TARGET_ARM)
4189     {
4190       /* Allow rotated constants in ARM mode.  */
4191       if (lowbit <= 4
4192 	   && ((i & ~0xc000003f) == 0
4193 	       || (i & ~0xf000000f) == 0
4194 	       || (i & ~0xfc000003) == 0))
4195 	return TRUE;
4196     }
4197   else if (TARGET_THUMB2)
4198     {
4199       HOST_WIDE_INT v;
4200 
4201       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4202       v = i & 0xff;
4203       v |= v << 16;
4204       if (i == v || i == (v | (v << 8)))
4205 	return TRUE;
4206 
4207       /* Allow repeated pattern 0xXY00XY00.  */
4208       v = i & 0xff00;
4209       v |= v << 16;
4210       if (i == v)
4211 	return TRUE;
4212     }
4213   else if (TARGET_HAVE_MOVT)
4214     {
4215       /* Thumb-1 Targets with MOVT.  */
4216       if (i > 0xffff)
4217 	return FALSE;
4218       else
4219 	return TRUE;
4220     }
4221 
4222   return FALSE;
4223 }
4224 
4225 /* Return true if I is a valid constant for the operation CODE.  */
4226 int
4227 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4228 {
4229   if (const_ok_for_arm (i))
4230     return 1;
4231 
4232   switch (code)
4233     {
4234     case SET:
4235       /* See if we can use movw.  */
4236       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4237 	return 1;
4238       else
4239 	/* Otherwise, try mvn.  */
4240 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4241 
4242     case PLUS:
4243       /* See if we can use addw or subw.  */
4244       if (TARGET_THUMB2
4245 	  && ((i & 0xfffff000) == 0
4246 	      || ((-i) & 0xfffff000) == 0))
4247 	return 1;
4248       /* Fall through.  */
4249     case COMPARE:
4250     case EQ:
4251     case NE:
4252     case GT:
4253     case LE:
4254     case LT:
4255     case GE:
4256     case GEU:
4257     case LTU:
4258     case GTU:
4259     case LEU:
4260     case UNORDERED:
4261     case ORDERED:
4262     case UNEQ:
4263     case UNGE:
4264     case UNLT:
4265     case UNGT:
4266     case UNLE:
4267       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4268 
4269     case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
4270     case XOR:
4271       return 0;
4272 
4273     case IOR:
4274       if (TARGET_THUMB2)
4275 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4276       return 0;
4277 
4278     case AND:
4279       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4280 
4281     default:
4282       gcc_unreachable ();
4283     }
4284 }
4285 
4286 /* Return true if I is a valid di mode constant for the operation CODE.  */
4287 int
4288 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4289 {
4290   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4291   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4292   rtx hi = GEN_INT (hi_val);
4293   rtx lo = GEN_INT (lo_val);
4294 
4295   if (TARGET_THUMB1)
4296     return 0;
4297 
4298   switch (code)
4299     {
4300     case AND:
4301     case IOR:
4302     case XOR:
4303       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4304               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4305     case PLUS:
4306       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4307 
4308     default:
4309       return 0;
4310     }
4311 }
4312 
4313 /* Emit a sequence of insns to handle a large constant.
4314    CODE is the code of the operation required, it can be any of SET, PLUS,
4315    IOR, AND, XOR, MINUS;
4316    MODE is the mode in which the operation is being performed;
4317    VAL is the integer to operate on;
4318    SOURCE is the other operand (a register, or a null-pointer for SET);
4319    SUBTARGETS means it is safe to create scratch registers if that will
4320    either produce a simpler sequence, or we will want to cse the values.
4321    Return value is the number of insns emitted.  */
4322 
4323 /* ??? Tweak this for thumb2.  */
4324 int
4325 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4326 		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4327 {
4328   rtx cond;
4329 
4330   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4331     cond = COND_EXEC_TEST (PATTERN (insn));
4332   else
4333     cond = NULL_RTX;
4334 
4335   if (subtargets || code == SET
4336       || (REG_P (target) && REG_P (source)
4337 	  && REGNO (target) != REGNO (source)))
4338     {
4339       /* After arm_reorg has been called, we can't fix up expensive
4340 	 constants by pushing them into memory so we must synthesize
4341 	 them in-line, regardless of the cost.  This is only likely to
4342 	 be more costly on chips that have load delay slots and we are
4343 	 compiling without running the scheduler (so no splitting
4344 	 occurred before the final instruction emission).
4345 
4346 	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4347       */
4348       if (!cfun->machine->after_arm_reorg
4349 	  && !cond
4350 	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4351 				1, 0)
4352 	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
4353 		 + (code != SET))))
4354 	{
4355 	  if (code == SET)
4356 	    {
4357 	      /* Currently SET is the only monadic value for CODE, all
4358 		 the rest are diadic.  */
4359 	      if (TARGET_USE_MOVT)
4360 		arm_emit_movpair (target, GEN_INT (val));
4361 	      else
4362 		emit_set_insn (target, GEN_INT (val));
4363 
4364 	      return 1;
4365 	    }
4366 	  else
4367 	    {
4368 	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4369 
4370 	      if (TARGET_USE_MOVT)
4371 		arm_emit_movpair (temp, GEN_INT (val));
4372 	      else
4373 		emit_set_insn (temp, GEN_INT (val));
4374 
4375 	      /* For MINUS, the value is subtracted from, since we never
4376 		 have subtraction of a constant.  */
4377 	      if (code == MINUS)
4378 		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4379 	      else
4380 		emit_set_insn (target,
4381 			       gen_rtx_fmt_ee (code, mode, source, temp));
4382 	      return 2;
4383 	    }
4384 	}
4385     }
4386 
4387   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4388 			   1);
4389 }
4390 
4391 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4392    ARM/THUMB2 immediates, and add up to VAL.
4393    Thr function return value gives the number of insns required.  */
4394 static int
4395 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4396 			    struct four_ints *return_sequence)
4397 {
4398   int best_consecutive_zeros = 0;
4399   int i;
4400   int best_start = 0;
4401   int insns1, insns2;
4402   struct four_ints tmp_sequence;
4403 
4404   /* If we aren't targeting ARM, the best place to start is always at
4405      the bottom, otherwise look more closely.  */
4406   if (TARGET_ARM)
4407     {
4408       for (i = 0; i < 32; i += 2)
4409 	{
4410 	  int consecutive_zeros = 0;
4411 
4412 	  if (!(val & (3 << i)))
4413 	    {
4414 	      while ((i < 32) && !(val & (3 << i)))
4415 		{
4416 		  consecutive_zeros += 2;
4417 		  i += 2;
4418 		}
4419 	      if (consecutive_zeros > best_consecutive_zeros)
4420 		{
4421 		  best_consecutive_zeros = consecutive_zeros;
4422 		  best_start = i - consecutive_zeros;
4423 		}
4424 	      i -= 2;
4425 	    }
4426 	}
4427     }
4428 
4429   /* So long as it won't require any more insns to do so, it's
4430      desirable to emit a small constant (in bits 0...9) in the last
4431      insn.  This way there is more chance that it can be combined with
4432      a later addressing insn to form a pre-indexed load or store
4433      operation.  Consider:
4434 
4435 	   *((volatile int *)0xe0000100) = 1;
4436 	   *((volatile int *)0xe0000110) = 2;
4437 
4438      We want this to wind up as:
4439 
4440 	    mov rA, #0xe0000000
4441 	    mov rB, #1
4442 	    str rB, [rA, #0x100]
4443 	    mov rB, #2
4444 	    str rB, [rA, #0x110]
4445 
4446      rather than having to synthesize both large constants from scratch.
4447 
4448      Therefore, we calculate how many insns would be required to emit
4449      the constant starting from `best_start', and also starting from
4450      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4451      yield a shorter sequence, we may as well use zero.  */
4452   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4453   if (best_start != 0
4454       && ((HOST_WIDE_INT_1U << best_start) < val))
4455     {
4456       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4457       if (insns2 <= insns1)
4458 	{
4459 	  *return_sequence = tmp_sequence;
4460 	  insns1 = insns2;
4461 	}
4462     }
4463 
4464   return insns1;
4465 }
4466 
4467 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4468 static int
4469 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4470 			     struct four_ints *return_sequence, int i)
4471 {
4472   int remainder = val & 0xffffffff;
4473   int insns = 0;
4474 
4475   /* Try and find a way of doing the job in either two or three
4476      instructions.
4477 
4478      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4479      location.  We start at position I.  This may be the MSB, or
4480      optimial_immediate_sequence may have positioned it at the largest block
4481      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4482      wrapping around to the top of the word when we drop off the bottom.
4483      In the worst case this code should produce no more than four insns.
4484 
4485      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4486      constants, shifted to any arbitrary location.  We should always start
4487      at the MSB.  */
4488   do
4489     {
4490       int end;
4491       unsigned int b1, b2, b3, b4;
4492       unsigned HOST_WIDE_INT result;
4493       int loc;
4494 
4495       gcc_assert (insns < 4);
4496 
4497       if (i <= 0)
4498 	i += 32;
4499 
4500       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4501       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4502 	{
4503 	  loc = i;
4504 	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4505 	    /* We can use addw/subw for the last 12 bits.  */
4506 	    result = remainder;
4507 	  else
4508 	    {
4509 	      /* Use an 8-bit shifted/rotated immediate.  */
4510 	      end = i - 8;
4511 	      if (end < 0)
4512 		end += 32;
4513 	      result = remainder & ((0x0ff << end)
4514 				   | ((i < end) ? (0xff >> (32 - end))
4515 						: 0));
4516 	      i -= 8;
4517 	    }
4518 	}
4519       else
4520 	{
4521 	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
4522 	     arbitrary shifts.  */
4523 	  i -= TARGET_ARM ? 2 : 1;
4524 	  continue;
4525 	}
4526 
4527       /* Next, see if we can do a better job with a thumb2 replicated
4528 	 constant.
4529 
4530          We do it this way around to catch the cases like 0x01F001E0 where
4531 	 two 8-bit immediates would work, but a replicated constant would
4532 	 make it worse.
4533 
4534          TODO: 16-bit constants that don't clear all the bits, but still win.
4535          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4536       if (TARGET_THUMB2)
4537 	{
4538 	  b1 = (remainder & 0xff000000) >> 24;
4539 	  b2 = (remainder & 0x00ff0000) >> 16;
4540 	  b3 = (remainder & 0x0000ff00) >> 8;
4541 	  b4 = remainder & 0xff;
4542 
4543 	  if (loc > 24)
4544 	    {
4545 	      /* The 8-bit immediate already found clears b1 (and maybe b2),
4546 		 but must leave b3 and b4 alone.  */
4547 
4548 	      /* First try to find a 32-bit replicated constant that clears
4549 		 almost everything.  We can assume that we can't do it in one,
4550 		 or else we wouldn't be here.  */
4551 	      unsigned int tmp = b1 & b2 & b3 & b4;
4552 	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4553 				  + (tmp << 24);
4554 	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4555 					    + (tmp == b3) + (tmp == b4);
4556 	      if (tmp
4557 		  && (matching_bytes >= 3
4558 		      || (matching_bytes == 2
4559 			  && const_ok_for_op (remainder & ~tmp2, code))))
4560 		{
4561 		  /* At least 3 of the bytes match, and the fourth has at
4562 		     least as many bits set, or two of the bytes match
4563 		     and it will only require one more insn to finish.  */
4564 		  result = tmp2;
4565 		  i = tmp != b1 ? 32
4566 		      : tmp != b2 ? 24
4567 		      : tmp != b3 ? 16
4568 		      : 8;
4569 		}
4570 
4571 	      /* Second, try to find a 16-bit replicated constant that can
4572 		 leave three of the bytes clear.  If b2 or b4 is already
4573 		 zero, then we can.  If the 8-bit from above would not
4574 		 clear b2 anyway, then we still win.  */
4575 	      else if (b1 == b3 && (!b2 || !b4
4576 			       || (remainder & 0x00ff0000 & ~result)))
4577 		{
4578 		  result = remainder & 0xff00ff00;
4579 		  i = 24;
4580 		}
4581 	    }
4582 	  else if (loc > 16)
4583 	    {
4584 	      /* The 8-bit immediate already found clears b2 (and maybe b3)
4585 		 and we don't get here unless b1 is alredy clear, but it will
4586 		 leave b4 unchanged.  */
4587 
4588 	      /* If we can clear b2 and b4 at once, then we win, since the
4589 		 8-bits couldn't possibly reach that far.  */
4590 	      if (b2 == b4)
4591 		{
4592 		  result = remainder & 0x00ff00ff;
4593 		  i = 16;
4594 		}
4595 	    }
4596 	}
4597 
4598       return_sequence->i[insns++] = result;
4599       remainder &= ~result;
4600 
4601       if (code == SET || code == MINUS)
4602 	code = PLUS;
4603     }
4604   while (remainder);
4605 
4606   return insns;
4607 }
4608 
4609 /* Emit an instruction with the indicated PATTERN.  If COND is
4610    non-NULL, conditionalize the execution of the instruction on COND
4611    being true.  */
4612 
4613 static void
4614 emit_constant_insn (rtx cond, rtx pattern)
4615 {
4616   if (cond)
4617     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4618   emit_insn (pattern);
4619 }
4620 
4621 /* As above, but extra parameter GENERATE which, if clear, suppresses
4622    RTL generation.  */
4623 
4624 static int
4625 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4626 		  unsigned HOST_WIDE_INT val, rtx target, rtx source,
4627 		  int subtargets, int generate)
4628 {
4629   int can_invert = 0;
4630   int can_negate = 0;
4631   int final_invert = 0;
4632   int i;
4633   int set_sign_bit_copies = 0;
4634   int clear_sign_bit_copies = 0;
4635   int clear_zero_bit_copies = 0;
4636   int set_zero_bit_copies = 0;
4637   int insns = 0, neg_insns, inv_insns;
4638   unsigned HOST_WIDE_INT temp1, temp2;
4639   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4640   struct four_ints *immediates;
4641   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4642 
4643   /* Find out which operations are safe for a given CODE.  Also do a quick
4644      check for degenerate cases; these can occur when DImode operations
4645      are split.  */
4646   switch (code)
4647     {
4648     case SET:
4649       can_invert = 1;
4650       break;
4651 
4652     case PLUS:
4653       can_negate = 1;
4654       break;
4655 
4656     case IOR:
4657       if (remainder == 0xffffffff)
4658 	{
4659 	  if (generate)
4660 	    emit_constant_insn (cond,
4661 				gen_rtx_SET (target,
4662 					     GEN_INT (ARM_SIGN_EXTEND (val))));
4663 	  return 1;
4664 	}
4665 
4666       if (remainder == 0)
4667 	{
4668 	  if (reload_completed && rtx_equal_p (target, source))
4669 	    return 0;
4670 
4671 	  if (generate)
4672 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4673 	  return 1;
4674 	}
4675       break;
4676 
4677     case AND:
4678       if (remainder == 0)
4679 	{
4680 	  if (generate)
4681 	    emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4682 	  return 1;
4683 	}
4684       if (remainder == 0xffffffff)
4685 	{
4686 	  if (reload_completed && rtx_equal_p (target, source))
4687 	    return 0;
4688 	  if (generate)
4689 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4690 	  return 1;
4691 	}
4692       can_invert = 1;
4693       break;
4694 
4695     case XOR:
4696       if (remainder == 0)
4697 	{
4698 	  if (reload_completed && rtx_equal_p (target, source))
4699 	    return 0;
4700 	  if (generate)
4701 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4702 	  return 1;
4703 	}
4704 
4705       if (remainder == 0xffffffff)
4706 	{
4707 	  if (generate)
4708 	    emit_constant_insn (cond,
4709 				gen_rtx_SET (target,
4710 					     gen_rtx_NOT (mode, source)));
4711 	  return 1;
4712 	}
4713       final_invert = 1;
4714       break;
4715 
4716     case MINUS:
4717       /* We treat MINUS as (val - source), since (source - val) is always
4718 	 passed as (source + (-val)).  */
4719       if (remainder == 0)
4720 	{
4721 	  if (generate)
4722 	    emit_constant_insn (cond,
4723 				gen_rtx_SET (target,
4724 					     gen_rtx_NEG (mode, source)));
4725 	  return 1;
4726 	}
4727       if (const_ok_for_arm (val))
4728 	{
4729 	  if (generate)
4730 	    emit_constant_insn (cond,
4731 				gen_rtx_SET (target,
4732 					     gen_rtx_MINUS (mode, GEN_INT (val),
4733 							    source)));
4734 	  return 1;
4735 	}
4736 
4737       break;
4738 
4739     default:
4740       gcc_unreachable ();
4741     }
4742 
4743   /* If we can do it in one insn get out quickly.  */
4744   if (const_ok_for_op (val, code))
4745     {
4746       if (generate)
4747 	emit_constant_insn (cond,
4748 			    gen_rtx_SET (target,
4749 					 (source
4750 					  ? gen_rtx_fmt_ee (code, mode, source,
4751 							    GEN_INT (val))
4752 					  : GEN_INT (val))));
4753       return 1;
4754     }
4755 
4756   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4757      insn.  */
4758   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4759       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4760     {
4761       if (generate)
4762 	{
4763 	  if (mode == SImode && i == 16)
4764 	    /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4765 	       smaller insn.  */
4766 	    emit_constant_insn (cond,
4767 				gen_zero_extendhisi2
4768 				(target, gen_lowpart (HImode, source)));
4769 	  else
4770 	    /* Extz only supports SImode, but we can coerce the operands
4771 	       into that mode.  */
4772 	    emit_constant_insn (cond,
4773 				gen_extzv_t2 (gen_lowpart (SImode, target),
4774 					      gen_lowpart (SImode, source),
4775 					      GEN_INT (i), const0_rtx));
4776 	}
4777 
4778       return 1;
4779     }
4780 
4781   /* Calculate a few attributes that may be useful for specific
4782      optimizations.  */
4783   /* Count number of leading zeros.  */
4784   for (i = 31; i >= 0; i--)
4785     {
4786       if ((remainder & (1 << i)) == 0)
4787 	clear_sign_bit_copies++;
4788       else
4789 	break;
4790     }
4791 
4792   /* Count number of leading 1's.  */
4793   for (i = 31; i >= 0; i--)
4794     {
4795       if ((remainder & (1 << i)) != 0)
4796 	set_sign_bit_copies++;
4797       else
4798 	break;
4799     }
4800 
4801   /* Count number of trailing zero's.  */
4802   for (i = 0; i <= 31; i++)
4803     {
4804       if ((remainder & (1 << i)) == 0)
4805 	clear_zero_bit_copies++;
4806       else
4807 	break;
4808     }
4809 
4810   /* Count number of trailing 1's.  */
4811   for (i = 0; i <= 31; i++)
4812     {
4813       if ((remainder & (1 << i)) != 0)
4814 	set_zero_bit_copies++;
4815       else
4816 	break;
4817     }
4818 
4819   switch (code)
4820     {
4821     case SET:
4822       /* See if we can do this by sign_extending a constant that is known
4823 	 to be negative.  This is a good, way of doing it, since the shift
4824 	 may well merge into a subsequent insn.  */
4825       if (set_sign_bit_copies > 1)
4826 	{
4827 	  if (const_ok_for_arm
4828 	      (temp1 = ARM_SIGN_EXTEND (remainder
4829 					<< (set_sign_bit_copies - 1))))
4830 	    {
4831 	      if (generate)
4832 		{
4833 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4834 		  emit_constant_insn (cond,
4835 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
4836 		  emit_constant_insn (cond,
4837 				      gen_ashrsi3 (target, new_src,
4838 						   GEN_INT (set_sign_bit_copies - 1)));
4839 		}
4840 	      return 2;
4841 	    }
4842 	  /* For an inverted constant, we will need to set the low bits,
4843 	     these will be shifted out of harm's way.  */
4844 	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4845 	  if (const_ok_for_arm (~temp1))
4846 	    {
4847 	      if (generate)
4848 		{
4849 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4850 		  emit_constant_insn (cond,
4851 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
4852 		  emit_constant_insn (cond,
4853 				      gen_ashrsi3 (target, new_src,
4854 						   GEN_INT (set_sign_bit_copies - 1)));
4855 		}
4856 	      return 2;
4857 	    }
4858 	}
4859 
4860       /* See if we can calculate the value as the difference between two
4861 	 valid immediates.  */
4862       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4863 	{
4864 	  int topshift = clear_sign_bit_copies & ~1;
4865 
4866 	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4867 				   & (0xff000000 >> topshift));
4868 
4869 	  /* If temp1 is zero, then that means the 9 most significant
4870 	     bits of remainder were 1 and we've caused it to overflow.
4871 	     When topshift is 0 we don't need to do anything since we
4872 	     can borrow from 'bit 32'.  */
4873 	  if (temp1 == 0 && topshift != 0)
4874 	    temp1 = 0x80000000 >> (topshift - 1);
4875 
4876 	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4877 
4878 	  if (const_ok_for_arm (temp2))
4879 	    {
4880 	      if (generate)
4881 		{
4882 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4883 		  emit_constant_insn (cond,
4884 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
4885 		  emit_constant_insn (cond,
4886 				      gen_addsi3 (target, new_src,
4887 						  GEN_INT (-temp2)));
4888 		}
4889 
4890 	      return 2;
4891 	    }
4892 	}
4893 
4894       /* See if we can generate this by setting the bottom (or the top)
4895 	 16 bits, and then shifting these into the other half of the
4896 	 word.  We only look for the simplest cases, to do more would cost
4897 	 too much.  Be careful, however, not to generate this when the
4898 	 alternative would take fewer insns.  */
4899       if (val & 0xffff0000)
4900 	{
4901 	  temp1 = remainder & 0xffff0000;
4902 	  temp2 = remainder & 0x0000ffff;
4903 
4904 	  /* Overlaps outside this range are best done using other methods.  */
4905 	  for (i = 9; i < 24; i++)
4906 	    {
4907 	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4908 		  && !const_ok_for_arm (temp2))
4909 		{
4910 		  rtx new_src = (subtargets
4911 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4912 				 : target);
4913 		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4914 					    source, subtargets, generate);
4915 		  source = new_src;
4916 		  if (generate)
4917 		    emit_constant_insn
4918 		      (cond,
4919 		       gen_rtx_SET
4920 		       (target,
4921 			gen_rtx_IOR (mode,
4922 				     gen_rtx_ASHIFT (mode, source,
4923 						     GEN_INT (i)),
4924 				     source)));
4925 		  return insns + 1;
4926 		}
4927 	    }
4928 
4929 	  /* Don't duplicate cases already considered.  */
4930 	  for (i = 17; i < 24; i++)
4931 	    {
4932 	      if (((temp1 | (temp1 >> i)) == remainder)
4933 		  && !const_ok_for_arm (temp1))
4934 		{
4935 		  rtx new_src = (subtargets
4936 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4937 				 : target);
4938 		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4939 					    source, subtargets, generate);
4940 		  source = new_src;
4941 		  if (generate)
4942 		    emit_constant_insn
4943 		      (cond,
4944 		       gen_rtx_SET (target,
4945 				    gen_rtx_IOR
4946 				    (mode,
4947 				     gen_rtx_LSHIFTRT (mode, source,
4948 						       GEN_INT (i)),
4949 				     source)));
4950 		  return insns + 1;
4951 		}
4952 	    }
4953 	}
4954       break;
4955 
4956     case IOR:
4957     case XOR:
4958       /* If we have IOR or XOR, and the constant can be loaded in a
4959 	 single instruction, and we can find a temporary to put it in,
4960 	 then this can be done in two instructions instead of 3-4.  */
4961       if (subtargets
4962 	  /* TARGET can't be NULL if SUBTARGETS is 0 */
4963 	  || (reload_completed && !reg_mentioned_p (target, source)))
4964 	{
4965 	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4966 	    {
4967 	      if (generate)
4968 		{
4969 		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4970 
4971 		  emit_constant_insn (cond,
4972 				      gen_rtx_SET (sub, GEN_INT (val)));
4973 		  emit_constant_insn (cond,
4974 				      gen_rtx_SET (target,
4975 						   gen_rtx_fmt_ee (code, mode,
4976 								   source, sub)));
4977 		}
4978 	      return 2;
4979 	    }
4980 	}
4981 
4982       if (code == XOR)
4983 	break;
4984 
4985       /*  Convert.
4986 	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4987 	                     and the remainder 0s for e.g. 0xfff00000)
4988 	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4989 
4990 	  This can be done in 2 instructions by using shifts with mov or mvn.
4991 	  e.g. for
4992 	  x = x | 0xfff00000;
4993 	  we generate.
4994 	  mvn	r0, r0, asl #12
4995 	  mvn	r0, r0, lsr #12  */
4996       if (set_sign_bit_copies > 8
4997 	  && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4998 	{
4999 	  if (generate)
5000 	    {
5001 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5002 	      rtx shift = GEN_INT (set_sign_bit_copies);
5003 
5004 	      emit_constant_insn
5005 		(cond,
5006 		 gen_rtx_SET (sub,
5007 			      gen_rtx_NOT (mode,
5008 					   gen_rtx_ASHIFT (mode,
5009 							   source,
5010 							   shift))));
5011 	      emit_constant_insn
5012 		(cond,
5013 		 gen_rtx_SET (target,
5014 			      gen_rtx_NOT (mode,
5015 					   gen_rtx_LSHIFTRT (mode, sub,
5016 							     shift))));
5017 	    }
5018 	  return 2;
5019 	}
5020 
5021       /* Convert
5022 	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
5023 	   to
5024 	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5025 
5026 	  For eg. r0 = r0 | 0xfff
5027 	       mvn	r0, r0, lsr #12
5028 	       mvn	r0, r0, asl #12
5029 
5030       */
5031       if (set_zero_bit_copies > 8
5032 	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5033 	{
5034 	  if (generate)
5035 	    {
5036 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5037 	      rtx shift = GEN_INT (set_zero_bit_copies);
5038 
5039 	      emit_constant_insn
5040 		(cond,
5041 		 gen_rtx_SET (sub,
5042 			      gen_rtx_NOT (mode,
5043 					   gen_rtx_LSHIFTRT (mode,
5044 							     source,
5045 							     shift))));
5046 	      emit_constant_insn
5047 		(cond,
5048 		 gen_rtx_SET (target,
5049 			      gen_rtx_NOT (mode,
5050 					   gen_rtx_ASHIFT (mode, sub,
5051 							   shift))));
5052 	    }
5053 	  return 2;
5054 	}
5055 
5056       /* This will never be reached for Thumb2 because orn is a valid
5057 	 instruction. This is for Thumb1 and the ARM 32 bit cases.
5058 
5059 	 x = y | constant (such that ~constant is a valid constant)
5060 	 Transform this to
5061 	 x = ~(~y & ~constant).
5062       */
5063       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5064 	{
5065 	  if (generate)
5066 	    {
5067 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5068 	      emit_constant_insn (cond,
5069 				  gen_rtx_SET (sub,
5070 					       gen_rtx_NOT (mode, source)));
5071 	      source = sub;
5072 	      if (subtargets)
5073 		sub = gen_reg_rtx (mode);
5074 	      emit_constant_insn (cond,
5075 				  gen_rtx_SET (sub,
5076 					       gen_rtx_AND (mode, source,
5077 							    GEN_INT (temp1))));
5078 	      emit_constant_insn (cond,
5079 				  gen_rtx_SET (target,
5080 					       gen_rtx_NOT (mode, sub)));
5081 	    }
5082 	  return 3;
5083 	}
5084       break;
5085 
5086     case AND:
5087       /* See if two shifts will do 2 or more insn's worth of work.  */
5088       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5089 	{
5090 	  HOST_WIDE_INT shift_mask = ((0xffffffff
5091 				       << (32 - clear_sign_bit_copies))
5092 				      & 0xffffffff);
5093 
5094 	  if ((remainder | shift_mask) != 0xffffffff)
5095 	    {
5096 	      HOST_WIDE_INT new_val
5097 	        = ARM_SIGN_EXTEND (remainder | shift_mask);
5098 
5099 	      if (generate)
5100 		{
5101 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5102 		  insns = arm_gen_constant (AND, SImode, cond, new_val,
5103 					    new_src, source, subtargets, 1);
5104 		  source = new_src;
5105 		}
5106 	      else
5107 		{
5108 		  rtx targ = subtargets ? NULL_RTX : target;
5109 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5110 					    targ, source, subtargets, 0);
5111 		}
5112 	    }
5113 
5114 	  if (generate)
5115 	    {
5116 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5117 	      rtx shift = GEN_INT (clear_sign_bit_copies);
5118 
5119 	      emit_insn (gen_ashlsi3 (new_src, source, shift));
5120 	      emit_insn (gen_lshrsi3 (target, new_src, shift));
5121 	    }
5122 
5123 	  return insns + 2;
5124 	}
5125 
5126       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5127 	{
5128 	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5129 
5130 	  if ((remainder | shift_mask) != 0xffffffff)
5131 	    {
5132 	      HOST_WIDE_INT new_val
5133 	        = ARM_SIGN_EXTEND (remainder | shift_mask);
5134 	      if (generate)
5135 		{
5136 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5137 
5138 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5139 					    new_src, source, subtargets, 1);
5140 		  source = new_src;
5141 		}
5142 	      else
5143 		{
5144 		  rtx targ = subtargets ? NULL_RTX : target;
5145 
5146 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5147 					    targ, source, subtargets, 0);
5148 		}
5149 	    }
5150 
5151 	  if (generate)
5152 	    {
5153 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5154 	      rtx shift = GEN_INT (clear_zero_bit_copies);
5155 
5156 	      emit_insn (gen_lshrsi3 (new_src, source, shift));
5157 	      emit_insn (gen_ashlsi3 (target, new_src, shift));
5158 	    }
5159 
5160 	  return insns + 2;
5161 	}
5162 
5163       break;
5164 
5165     default:
5166       break;
5167     }
5168 
5169   /* Calculate what the instruction sequences would be if we generated it
5170      normally, negated, or inverted.  */
5171   if (code == AND)
5172     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5173     insns = 99;
5174   else
5175     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5176 
5177   if (can_negate)
5178     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5179 					    &neg_immediates);
5180   else
5181     neg_insns = 99;
5182 
5183   if (can_invert || final_invert)
5184     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5185 					    &inv_immediates);
5186   else
5187     inv_insns = 99;
5188 
5189   immediates = &pos_immediates;
5190 
5191   /* Is the negated immediate sequence more efficient?  */
5192   if (neg_insns < insns && neg_insns <= inv_insns)
5193     {
5194       insns = neg_insns;
5195       immediates = &neg_immediates;
5196     }
5197   else
5198     can_negate = 0;
5199 
5200   /* Is the inverted immediate sequence more efficient?
5201      We must allow for an extra NOT instruction for XOR operations, although
5202      there is some chance that the final 'mvn' will get optimized later.  */
5203   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5204     {
5205       insns = inv_insns;
5206       immediates = &inv_immediates;
5207     }
5208   else
5209     {
5210       can_invert = 0;
5211       final_invert = 0;
5212     }
5213 
5214   /* Now output the chosen sequence as instructions.  */
5215   if (generate)
5216     {
5217       for (i = 0; i < insns; i++)
5218 	{
5219 	  rtx new_src, temp1_rtx;
5220 
5221 	  temp1 = immediates->i[i];
5222 
5223 	  if (code == SET || code == MINUS)
5224 	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
5225 	  else if ((final_invert || i < (insns - 1)) && subtargets)
5226 	    new_src = gen_reg_rtx (mode);
5227 	  else
5228 	    new_src = target;
5229 
5230 	  if (can_invert)
5231 	    temp1 = ~temp1;
5232 	  else if (can_negate)
5233 	    temp1 = -temp1;
5234 
5235 	  temp1 = trunc_int_for_mode (temp1, mode);
5236 	  temp1_rtx = GEN_INT (temp1);
5237 
5238 	  if (code == SET)
5239 	    ;
5240 	  else if (code == MINUS)
5241 	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5242 	  else
5243 	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5244 
5245 	  emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5246 	  source = new_src;
5247 
5248 	  if (code == SET)
5249 	    {
5250 	      can_negate = can_invert;
5251 	      can_invert = 0;
5252 	      code = PLUS;
5253 	    }
5254 	  else if (code == MINUS)
5255 	    code = PLUS;
5256 	}
5257     }
5258 
5259   if (final_invert)
5260     {
5261       if (generate)
5262 	emit_constant_insn (cond, gen_rtx_SET (target,
5263 					       gen_rtx_NOT (mode, source)));
5264       insns++;
5265     }
5266 
5267   return insns;
5268 }
5269 
5270 /* Canonicalize a comparison so that we are more likely to recognize it.
5271    This can be done for a few constant compares, where we can make the
5272    immediate value easier to load.  */
5273 
5274 static void
5275 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5276 			     bool op0_preserve_value)
5277 {
5278   machine_mode mode;
5279   unsigned HOST_WIDE_INT i, maxval;
5280 
5281   mode = GET_MODE (*op0);
5282   if (mode == VOIDmode)
5283     mode = GET_MODE (*op1);
5284 
5285   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5286 
5287   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5288      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5289      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5290      for GTU/LEU in Thumb mode.  */
5291   if (mode == DImode)
5292     {
5293 
5294       if (*code == GT || *code == LE
5295 	  || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5296 	{
5297 	  /* Missing comparison.  First try to use an available
5298 	     comparison.  */
5299 	  if (CONST_INT_P (*op1))
5300 	    {
5301 	      i = INTVAL (*op1);
5302 	      switch (*code)
5303 		{
5304 		case GT:
5305 		case LE:
5306 		  if (i != maxval
5307 		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
5308 		    {
5309 		      *op1 = GEN_INT (i + 1);
5310 		      *code = *code == GT ? GE : LT;
5311 		      return;
5312 		    }
5313 		  break;
5314 		case GTU:
5315 		case LEU:
5316 		  if (i != ~((unsigned HOST_WIDE_INT) 0)
5317 		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
5318 		    {
5319 		      *op1 = GEN_INT (i + 1);
5320 		      *code = *code == GTU ? GEU : LTU;
5321 		      return;
5322 		    }
5323 		  break;
5324 		default:
5325 		  gcc_unreachable ();
5326 		}
5327 	    }
5328 
5329 	  /* If that did not work, reverse the condition.  */
5330 	  if (!op0_preserve_value)
5331 	    {
5332 	      std::swap (*op0, *op1);
5333 	      *code = (int)swap_condition ((enum rtx_code)*code);
5334 	    }
5335 	}
5336       return;
5337     }
5338 
5339   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5340      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5341      to facilitate possible combining with a cmp into 'ands'.  */
5342   if (mode == SImode
5343       && GET_CODE (*op0) == ZERO_EXTEND
5344       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5345       && GET_MODE (XEXP (*op0, 0)) == QImode
5346       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5347       && subreg_lowpart_p (XEXP (*op0, 0))
5348       && *op1 == const0_rtx)
5349     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5350 			GEN_INT (255));
5351 
5352   /* Comparisons smaller than DImode.  Only adjust comparisons against
5353      an out-of-range constant.  */
5354   if (!CONST_INT_P (*op1)
5355       || const_ok_for_arm (INTVAL (*op1))
5356       || const_ok_for_arm (- INTVAL (*op1)))
5357     return;
5358 
5359   i = INTVAL (*op1);
5360 
5361   switch (*code)
5362     {
5363     case EQ:
5364     case NE:
5365       return;
5366 
5367     case GT:
5368     case LE:
5369       if (i != maxval
5370 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5371 	{
5372 	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5373 	  *code = *code == GT ? GE : LT;
5374 	  return;
5375 	}
5376       break;
5377 
5378     case GE:
5379     case LT:
5380       if (i != ~maxval
5381 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5382 	{
5383 	  *op1 = GEN_INT (i - 1);
5384 	  *code = *code == GE ? GT : LE;
5385 	  return;
5386 	}
5387       break;
5388 
5389     case GTU:
5390     case LEU:
5391       if (i != ~((unsigned HOST_WIDE_INT) 0)
5392 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5393 	{
5394 	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5395 	  *code = *code == GTU ? GEU : LTU;
5396 	  return;
5397 	}
5398       break;
5399 
5400     case GEU:
5401     case LTU:
5402       if (i != 0
5403 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5404 	{
5405 	  *op1 = GEN_INT (i - 1);
5406 	  *code = *code == GEU ? GTU : LEU;
5407 	  return;
5408 	}
5409       break;
5410 
5411     default:
5412       gcc_unreachable ();
5413     }
5414 }
5415 
5416 
5417 /* Define how to find the value returned by a function.  */
5418 
5419 static rtx
5420 arm_function_value(const_tree type, const_tree func,
5421 		   bool outgoing ATTRIBUTE_UNUSED)
5422 {
5423   machine_mode mode;
5424   int unsignedp ATTRIBUTE_UNUSED;
5425   rtx r ATTRIBUTE_UNUSED;
5426 
5427   mode = TYPE_MODE (type);
5428 
5429   if (TARGET_AAPCS_BASED)
5430     return aapcs_allocate_return_reg (mode, type, func);
5431 
5432   /* Promote integer types.  */
5433   if (INTEGRAL_TYPE_P (type))
5434     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5435 
5436   /* Promotes small structs returned in a register to full-word size
5437      for big-endian AAPCS.  */
5438   if (arm_return_in_msb (type))
5439     {
5440       HOST_WIDE_INT size = int_size_in_bytes (type);
5441       if (size % UNITS_PER_WORD != 0)
5442 	{
5443 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5444 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5445 	}
5446     }
5447 
5448   return arm_libcall_value_1 (mode);
5449 }
5450 
5451 /* libcall hashtable helpers.  */
5452 
5453 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5454 {
5455   static inline hashval_t hash (const rtx_def *);
5456   static inline bool equal (const rtx_def *, const rtx_def *);
5457   static inline void remove (rtx_def *);
5458 };
5459 
5460 inline bool
5461 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5462 {
5463   return rtx_equal_p (p1, p2);
5464 }
5465 
5466 inline hashval_t
5467 libcall_hasher::hash (const rtx_def *p1)
5468 {
5469   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5470 }
5471 
5472 typedef hash_table<libcall_hasher> libcall_table_type;
5473 
5474 static void
5475 add_libcall (libcall_table_type *htab, rtx libcall)
5476 {
5477   *htab->find_slot (libcall, INSERT) = libcall;
5478 }
5479 
5480 static bool
5481 arm_libcall_uses_aapcs_base (const_rtx libcall)
5482 {
5483   static bool init_done = false;
5484   static libcall_table_type *libcall_htab = NULL;
5485 
5486   if (!init_done)
5487     {
5488       init_done = true;
5489 
5490       libcall_htab = new libcall_table_type (31);
5491       add_libcall (libcall_htab,
5492 		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5493       add_libcall (libcall_htab,
5494 		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5495       add_libcall (libcall_htab,
5496 		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5497       add_libcall (libcall_htab,
5498 		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5499 
5500       add_libcall (libcall_htab,
5501 		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5502       add_libcall (libcall_htab,
5503 		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5504       add_libcall (libcall_htab,
5505 		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5506       add_libcall (libcall_htab,
5507 		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5508 
5509       add_libcall (libcall_htab,
5510 		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
5511       add_libcall (libcall_htab,
5512 		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5513       add_libcall (libcall_htab,
5514 		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
5515       add_libcall (libcall_htab,
5516 		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
5517       add_libcall (libcall_htab,
5518 		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
5519       add_libcall (libcall_htab,
5520 		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
5521       add_libcall (libcall_htab,
5522 		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
5523       add_libcall (libcall_htab,
5524 		   convert_optab_libfunc (ufix_optab, DImode, SFmode));
5525 
5526       /* Values from double-precision helper functions are returned in core
5527 	 registers if the selected core only supports single-precision
5528 	 arithmetic, even if we are using the hard-float ABI.  The same is
5529 	 true for single-precision helpers, but we will never be using the
5530 	 hard-float ABI on a CPU which doesn't support single-precision
5531 	 operations in hardware.  */
5532       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5533       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5534       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5535       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5536       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5537       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5538       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5539       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5540       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5541       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5542       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5543       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5544 							SFmode));
5545       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5546 							DFmode));
5547       add_libcall (libcall_htab,
5548 		   convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5549     }
5550 
5551   return libcall && libcall_htab->find (libcall) != NULL;
5552 }
5553 
5554 static rtx
5555 arm_libcall_value_1 (machine_mode mode)
5556 {
5557   if (TARGET_AAPCS_BASED)
5558     return aapcs_libcall_value (mode);
5559   else if (TARGET_IWMMXT_ABI
5560 	   && arm_vector_mode_supported_p (mode))
5561     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5562   else
5563     return gen_rtx_REG (mode, ARG_REGISTER (1));
5564 }
5565 
5566 /* Define how to find the value returned by a library function
5567    assuming the value has mode MODE.  */
5568 
5569 static rtx
5570 arm_libcall_value (machine_mode mode, const_rtx libcall)
5571 {
5572   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5573       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5574     {
5575       /* The following libcalls return their result in integer registers,
5576 	 even though they return a floating point value.  */
5577       if (arm_libcall_uses_aapcs_base (libcall))
5578 	return gen_rtx_REG (mode, ARG_REGISTER(1));
5579 
5580     }
5581 
5582   return arm_libcall_value_1 (mode);
5583 }
5584 
5585 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5586 
5587 static bool
5588 arm_function_value_regno_p (const unsigned int regno)
5589 {
5590   if (regno == ARG_REGISTER (1)
5591       || (TARGET_32BIT
5592 	  && TARGET_AAPCS_BASED
5593 	  && TARGET_HARD_FLOAT
5594 	  && regno == FIRST_VFP_REGNUM)
5595       || (TARGET_IWMMXT_ABI
5596 	  && regno == FIRST_IWMMXT_REGNUM))
5597     return true;
5598 
5599   return false;
5600 }
5601 
5602 /* Determine the amount of memory needed to store the possible return
5603    registers of an untyped call.  */
5604 int
5605 arm_apply_result_size (void)
5606 {
5607   int size = 16;
5608 
5609   if (TARGET_32BIT)
5610     {
5611       if (TARGET_HARD_FLOAT_ABI)
5612 	size += 32;
5613       if (TARGET_IWMMXT_ABI)
5614 	size += 8;
5615     }
5616 
5617   return size;
5618 }
5619 
5620 /* Decide whether TYPE should be returned in memory (true)
5621    or in a register (false).  FNTYPE is the type of the function making
5622    the call.  */
5623 static bool
5624 arm_return_in_memory (const_tree type, const_tree fntype)
5625 {
5626   HOST_WIDE_INT size;
5627 
5628   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5629 
5630   if (TARGET_AAPCS_BASED)
5631     {
5632       /* Simple, non-aggregate types (ie not including vectors and
5633 	 complex) are always returned in a register (or registers).
5634 	 We don't care about which register here, so we can short-cut
5635 	 some of the detail.  */
5636       if (!AGGREGATE_TYPE_P (type)
5637 	  && TREE_CODE (type) != VECTOR_TYPE
5638 	  && TREE_CODE (type) != COMPLEX_TYPE)
5639 	return false;
5640 
5641       /* Any return value that is no larger than one word can be
5642 	 returned in r0.  */
5643       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5644 	return false;
5645 
5646       /* Check any available co-processors to see if they accept the
5647 	 type as a register candidate (VFP, for example, can return
5648 	 some aggregates in consecutive registers).  These aren't
5649 	 available if the call is variadic.  */
5650       if (aapcs_select_return_coproc (type, fntype) >= 0)
5651 	return false;
5652 
5653       /* Vector values should be returned using ARM registers, not
5654 	 memory (unless they're over 16 bytes, which will break since
5655 	 we only have four call-clobbered registers to play with).  */
5656       if (TREE_CODE (type) == VECTOR_TYPE)
5657 	return (size < 0 || size > (4 * UNITS_PER_WORD));
5658 
5659       /* The rest go in memory.  */
5660       return true;
5661     }
5662 
5663   if (TREE_CODE (type) == VECTOR_TYPE)
5664     return (size < 0 || size > (4 * UNITS_PER_WORD));
5665 
5666   if (!AGGREGATE_TYPE_P (type) &&
5667       (TREE_CODE (type) != VECTOR_TYPE))
5668     /* All simple types are returned in registers.  */
5669     return false;
5670 
5671   if (arm_abi != ARM_ABI_APCS)
5672     {
5673       /* ATPCS and later return aggregate types in memory only if they are
5674 	 larger than a word (or are variable size).  */
5675       return (size < 0 || size > UNITS_PER_WORD);
5676     }
5677 
5678   /* For the arm-wince targets we choose to be compatible with Microsoft's
5679      ARM and Thumb compilers, which always return aggregates in memory.  */
5680 #ifndef ARM_WINCE
5681   /* All structures/unions bigger than one word are returned in memory.
5682      Also catch the case where int_size_in_bytes returns -1.  In this case
5683      the aggregate is either huge or of variable size, and in either case
5684      we will want to return it via memory and not in a register.  */
5685   if (size < 0 || size > UNITS_PER_WORD)
5686     return true;
5687 
5688   if (TREE_CODE (type) == RECORD_TYPE)
5689     {
5690       tree field;
5691 
5692       /* For a struct the APCS says that we only return in a register
5693 	 if the type is 'integer like' and every addressable element
5694 	 has an offset of zero.  For practical purposes this means
5695 	 that the structure can have at most one non bit-field element
5696 	 and that this element must be the first one in the structure.  */
5697 
5698       /* Find the first field, ignoring non FIELD_DECL things which will
5699 	 have been created by C++.  */
5700       for (field = TYPE_FIELDS (type);
5701 	   field && TREE_CODE (field) != FIELD_DECL;
5702 	   field = DECL_CHAIN (field))
5703 	continue;
5704 
5705       if (field == NULL)
5706 	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5707 
5708       /* Check that the first field is valid for returning in a register.  */
5709 
5710       /* ... Floats are not allowed */
5711       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5712 	return true;
5713 
5714       /* ... Aggregates that are not themselves valid for returning in
5715 	 a register are not allowed.  */
5716       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5717 	return true;
5718 
5719       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5720 	 since they are not addressable.  */
5721       for (field = DECL_CHAIN (field);
5722 	   field;
5723 	   field = DECL_CHAIN (field))
5724 	{
5725 	  if (TREE_CODE (field) != FIELD_DECL)
5726 	    continue;
5727 
5728 	  if (!DECL_BIT_FIELD_TYPE (field))
5729 	    return true;
5730 	}
5731 
5732       return false;
5733     }
5734 
5735   if (TREE_CODE (type) == UNION_TYPE)
5736     {
5737       tree field;
5738 
5739       /* Unions can be returned in registers if every element is
5740 	 integral, or can be returned in an integer register.  */
5741       for (field = TYPE_FIELDS (type);
5742 	   field;
5743 	   field = DECL_CHAIN (field))
5744 	{
5745 	  if (TREE_CODE (field) != FIELD_DECL)
5746 	    continue;
5747 
5748 	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
5749 	    return true;
5750 
5751 	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5752 	    return true;
5753 	}
5754 
5755       return false;
5756     }
5757 #endif /* not ARM_WINCE */
5758 
5759   /* Return all other types in memory.  */
5760   return true;
5761 }
5762 
5763 const struct pcs_attribute_arg
5764 {
5765   const char *arg;
5766   enum arm_pcs value;
5767 } pcs_attribute_args[] =
5768   {
5769     {"aapcs", ARM_PCS_AAPCS},
5770     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5771 #if 0
5772     /* We could recognize these, but changes would be needed elsewhere
5773      * to implement them.  */
5774     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5775     {"atpcs", ARM_PCS_ATPCS},
5776     {"apcs", ARM_PCS_APCS},
5777 #endif
5778     {NULL, ARM_PCS_UNKNOWN}
5779   };
5780 
5781 static enum arm_pcs
5782 arm_pcs_from_attribute (tree attr)
5783 {
5784   const struct pcs_attribute_arg *ptr;
5785   const char *arg;
5786 
5787   /* Get the value of the argument.  */
5788   if (TREE_VALUE (attr) == NULL_TREE
5789       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5790     return ARM_PCS_UNKNOWN;
5791 
5792   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5793 
5794   /* Check it against the list of known arguments.  */
5795   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5796     if (streq (arg, ptr->arg))
5797       return ptr->value;
5798 
5799   /* An unrecognized interrupt type.  */
5800   return ARM_PCS_UNKNOWN;
5801 }
5802 
5803 /* Get the PCS variant to use for this call.  TYPE is the function's type
5804    specification, DECL is the specific declartion.  DECL may be null if
5805    the call could be indirect or if this is a library call.  */
5806 static enum arm_pcs
5807 arm_get_pcs_model (const_tree type, const_tree decl)
5808 {
5809   bool user_convention = false;
5810   enum arm_pcs user_pcs = arm_pcs_default;
5811   tree attr;
5812 
5813   gcc_assert (type);
5814 
5815   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5816   if (attr)
5817     {
5818       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5819       user_convention = true;
5820     }
5821 
5822   if (TARGET_AAPCS_BASED)
5823     {
5824       /* Detect varargs functions.  These always use the base rules
5825 	 (no argument is ever a candidate for a co-processor
5826 	 register).  */
5827       bool base_rules = stdarg_p (type);
5828 
5829       if (user_convention)
5830 	{
5831 	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5832 	    sorry ("non-AAPCS derived PCS variant");
5833 	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5834 	    error ("variadic functions must use the base AAPCS variant");
5835 	}
5836 
5837       if (base_rules)
5838 	return ARM_PCS_AAPCS;
5839       else if (user_convention)
5840 	return user_pcs;
5841       else if (decl && flag_unit_at_a_time)
5842 	{
5843 	  /* Local functions never leak outside this compilation unit,
5844 	     so we are free to use whatever conventions are
5845 	     appropriate.  */
5846 	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5847 	  cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5848 	  if (i && i->local)
5849 	    return ARM_PCS_AAPCS_LOCAL;
5850 	}
5851     }
5852   else if (user_convention && user_pcs != arm_pcs_default)
5853     sorry ("PCS variant");
5854 
5855   /* For everything else we use the target's default.  */
5856   return arm_pcs_default;
5857 }
5858 
5859 
5860 static void
5861 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5862 		    const_tree fntype ATTRIBUTE_UNUSED,
5863 		    rtx libcall ATTRIBUTE_UNUSED,
5864 		    const_tree fndecl ATTRIBUTE_UNUSED)
5865 {
5866   /* Record the unallocated VFP registers.  */
5867   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5868   pcum->aapcs_vfp_reg_alloc = 0;
5869 }
5870 
5871 /* Walk down the type tree of TYPE counting consecutive base elements.
5872    If *MODEP is VOIDmode, then set it to the first valid floating point
5873    type.  If a non-floating point type is found, or if a floating point
5874    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5875    otherwise return the count in the sub-tree.  */
5876 static int
5877 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5878 {
5879   machine_mode mode;
5880   HOST_WIDE_INT size;
5881 
5882   switch (TREE_CODE (type))
5883     {
5884     case REAL_TYPE:
5885       mode = TYPE_MODE (type);
5886       if (mode != DFmode && mode != SFmode && mode != HFmode)
5887 	return -1;
5888 
5889       if (*modep == VOIDmode)
5890 	*modep = mode;
5891 
5892       if (*modep == mode)
5893 	return 1;
5894 
5895       break;
5896 
5897     case COMPLEX_TYPE:
5898       mode = TYPE_MODE (TREE_TYPE (type));
5899       if (mode != DFmode && mode != SFmode)
5900 	return -1;
5901 
5902       if (*modep == VOIDmode)
5903 	*modep = mode;
5904 
5905       if (*modep == mode)
5906 	return 2;
5907 
5908       break;
5909 
5910     case VECTOR_TYPE:
5911       /* Use V2SImode and V4SImode as representatives of all 64-bit
5912 	 and 128-bit vector types, whether or not those modes are
5913 	 supported with the present options.  */
5914       size = int_size_in_bytes (type);
5915       switch (size)
5916 	{
5917 	case 8:
5918 	  mode = V2SImode;
5919 	  break;
5920 	case 16:
5921 	  mode = V4SImode;
5922 	  break;
5923 	default:
5924 	  return -1;
5925 	}
5926 
5927       if (*modep == VOIDmode)
5928 	*modep = mode;
5929 
5930       /* Vector modes are considered to be opaque: two vectors are
5931 	 equivalent for the purposes of being homogeneous aggregates
5932 	 if they are the same size.  */
5933       if (*modep == mode)
5934 	return 1;
5935 
5936       break;
5937 
5938     case ARRAY_TYPE:
5939       {
5940 	int count;
5941 	tree index = TYPE_DOMAIN (type);
5942 
5943 	/* Can't handle incomplete types nor sizes that are not
5944 	   fixed.  */
5945 	if (!COMPLETE_TYPE_P (type)
5946 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5947 	  return -1;
5948 
5949 	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5950 	if (count == -1
5951 	    || !index
5952 	    || !TYPE_MAX_VALUE (index)
5953 	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5954 	    || !TYPE_MIN_VALUE (index)
5955 	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5956 	    || count < 0)
5957 	  return -1;
5958 
5959 	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5960 		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5961 
5962 	/* There must be no padding.  */
5963 	if (wi::to_wide (TYPE_SIZE (type))
5964 	    != count * GET_MODE_BITSIZE (*modep))
5965 	  return -1;
5966 
5967 	return count;
5968       }
5969 
5970     case RECORD_TYPE:
5971       {
5972 	int count = 0;
5973 	int sub_count;
5974 	tree field;
5975 
5976 	/* Can't handle incomplete types nor sizes that are not
5977 	   fixed.  */
5978 	if (!COMPLETE_TYPE_P (type)
5979 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5980 	  return -1;
5981 
5982 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5983 	  {
5984 	    if (TREE_CODE (field) != FIELD_DECL)
5985 	      continue;
5986 
5987 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5988 	    if (sub_count < 0)
5989 	      return -1;
5990 	    count += sub_count;
5991 	  }
5992 
5993 	/* There must be no padding.  */
5994 	if (wi::to_wide (TYPE_SIZE (type))
5995 	    != count * GET_MODE_BITSIZE (*modep))
5996 	  return -1;
5997 
5998 	return count;
5999       }
6000 
6001     case UNION_TYPE:
6002     case QUAL_UNION_TYPE:
6003       {
6004 	/* These aren't very interesting except in a degenerate case.  */
6005 	int count = 0;
6006 	int sub_count;
6007 	tree field;
6008 
6009 	/* Can't handle incomplete types nor sizes that are not
6010 	   fixed.  */
6011 	if (!COMPLETE_TYPE_P (type)
6012 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6013 	  return -1;
6014 
6015 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6016 	  {
6017 	    if (TREE_CODE (field) != FIELD_DECL)
6018 	      continue;
6019 
6020 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6021 	    if (sub_count < 0)
6022 	      return -1;
6023 	    count = count > sub_count ? count : sub_count;
6024 	  }
6025 
6026 	/* There must be no padding.  */
6027 	if (wi::to_wide (TYPE_SIZE (type))
6028 	    != count * GET_MODE_BITSIZE (*modep))
6029 	  return -1;
6030 
6031 	return count;
6032       }
6033 
6034     default:
6035       break;
6036     }
6037 
6038   return -1;
6039 }
6040 
6041 /* Return true if PCS_VARIANT should use VFP registers.  */
6042 static bool
6043 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6044 {
6045   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6046     {
6047       static bool seen_thumb1_vfp = false;
6048 
6049       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6050 	{
6051 	  sorry ("Thumb-1 hard-float VFP ABI");
6052 	  /* sorry() is not immediately fatal, so only display this once.  */
6053 	  seen_thumb1_vfp = true;
6054 	}
6055 
6056       return true;
6057     }
6058 
6059   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6060     return false;
6061 
6062   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6063 	  (TARGET_VFP_DOUBLE || !is_double));
6064 }
6065 
6066 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6067    suitable for passing or returning in VFP registers for the PCS
6068    variant selected.  If it is, then *BASE_MODE is updated to contain
6069    a machine mode describing each element of the argument's type and
6070    *COUNT to hold the number of such elements.  */
6071 static bool
6072 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6073 				       machine_mode mode, const_tree type,
6074 				       machine_mode *base_mode, int *count)
6075 {
6076   machine_mode new_mode = VOIDmode;
6077 
6078   /* If we have the type information, prefer that to working things
6079      out from the mode.  */
6080   if (type)
6081     {
6082       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6083 
6084       if (ag_count > 0 && ag_count <= 4)
6085 	*count = ag_count;
6086       else
6087 	return false;
6088     }
6089   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6090 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6091 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6092     {
6093       *count = 1;
6094       new_mode = mode;
6095     }
6096   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6097     {
6098       *count = 2;
6099       new_mode = (mode == DCmode ? DFmode : SFmode);
6100     }
6101   else
6102     return false;
6103 
6104 
6105   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6106     return false;
6107 
6108   *base_mode = new_mode;
6109   return true;
6110 }
6111 
6112 static bool
6113 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6114 			       machine_mode mode, const_tree type)
6115 {
6116   int count ATTRIBUTE_UNUSED;
6117   machine_mode ag_mode ATTRIBUTE_UNUSED;
6118 
6119   if (!use_vfp_abi (pcs_variant, false))
6120     return false;
6121   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6122 						&ag_mode, &count);
6123 }
6124 
6125 static bool
6126 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6127 			     const_tree type)
6128 {
6129   if (!use_vfp_abi (pcum->pcs_variant, false))
6130     return false;
6131 
6132   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6133 						&pcum->aapcs_vfp_rmode,
6134 						&pcum->aapcs_vfp_rcount);
6135 }
6136 
6137 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6138    for the behaviour of this function.  */
6139 
6140 static bool
6141 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6142 		    const_tree type  ATTRIBUTE_UNUSED)
6143 {
6144   int rmode_size
6145     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6146   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6147   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6148   int regno;
6149 
6150   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6151     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6152       {
6153 	pcum->aapcs_vfp_reg_alloc = mask << regno;
6154 	if (mode == BLKmode
6155 	    || (mode == TImode && ! TARGET_NEON)
6156 	    || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6157 	  {
6158 	    int i;
6159 	    int rcount = pcum->aapcs_vfp_rcount;
6160 	    int rshift = shift;
6161 	    machine_mode rmode = pcum->aapcs_vfp_rmode;
6162 	    rtx par;
6163 	    if (!TARGET_NEON)
6164 	      {
6165 		/* Avoid using unsupported vector modes.  */
6166 		if (rmode == V2SImode)
6167 		  rmode = DImode;
6168 		else if (rmode == V4SImode)
6169 		  {
6170 		    rmode = DImode;
6171 		    rcount *= 2;
6172 		    rshift /= 2;
6173 		  }
6174 	      }
6175 	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6176 	    for (i = 0; i < rcount; i++)
6177 	      {
6178 		rtx tmp = gen_rtx_REG (rmode,
6179 				       FIRST_VFP_REGNUM + regno + i * rshift);
6180 		tmp = gen_rtx_EXPR_LIST
6181 		  (VOIDmode, tmp,
6182 		   GEN_INT (i * GET_MODE_SIZE (rmode)));
6183 		XVECEXP (par, 0, i) = tmp;
6184 	      }
6185 
6186 	    pcum->aapcs_reg = par;
6187 	  }
6188 	else
6189 	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6190 	return true;
6191       }
6192   return false;
6193 }
6194 
6195 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6196    comment there for the behaviour of this function.  */
6197 
6198 static rtx
6199 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6200 			       machine_mode mode,
6201 			       const_tree type ATTRIBUTE_UNUSED)
6202 {
6203   if (!use_vfp_abi (pcs_variant, false))
6204     return NULL;
6205 
6206   if (mode == BLKmode
6207       || (GET_MODE_CLASS (mode) == MODE_INT
6208 	  && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6209 	  && !TARGET_NEON))
6210     {
6211       int count;
6212       machine_mode ag_mode;
6213       int i;
6214       rtx par;
6215       int shift;
6216 
6217       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6218 					     &ag_mode, &count);
6219 
6220       if (!TARGET_NEON)
6221 	{
6222 	  if (ag_mode == V2SImode)
6223 	    ag_mode = DImode;
6224 	  else if (ag_mode == V4SImode)
6225 	    {
6226 	      ag_mode = DImode;
6227 	      count *= 2;
6228 	    }
6229 	}
6230       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6231       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6232       for (i = 0; i < count; i++)
6233 	{
6234 	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6235 	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6236 				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6237 	  XVECEXP (par, 0, i) = tmp;
6238 	}
6239 
6240       return par;
6241     }
6242 
6243   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6244 }
6245 
6246 static void
6247 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6248 		   machine_mode mode  ATTRIBUTE_UNUSED,
6249 		   const_tree type  ATTRIBUTE_UNUSED)
6250 {
6251   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6252   pcum->aapcs_vfp_reg_alloc = 0;
6253   return;
6254 }
6255 
6256 #define AAPCS_CP(X)				\
6257   {						\
6258     aapcs_ ## X ## _cum_init,			\
6259     aapcs_ ## X ## _is_call_candidate,		\
6260     aapcs_ ## X ## _allocate,			\
6261     aapcs_ ## X ## _is_return_candidate,	\
6262     aapcs_ ## X ## _allocate_return_reg,	\
6263     aapcs_ ## X ## _advance			\
6264   }
6265 
6266 /* Table of co-processors that can be used to pass arguments in
6267    registers.  Idealy no arugment should be a candidate for more than
6268    one co-processor table entry, but the table is processed in order
6269    and stops after the first match.  If that entry then fails to put
6270    the argument into a co-processor register, the argument will go on
6271    the stack.  */
6272 static struct
6273 {
6274   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6275   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6276 
6277   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6278      BLKmode) is a candidate for this co-processor's registers; this
6279      function should ignore any position-dependent state in
6280      CUMULATIVE_ARGS and only use call-type dependent information.  */
6281   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6282 
6283   /* Return true if the argument does get a co-processor register; it
6284      should set aapcs_reg to an RTX of the register allocated as is
6285      required for a return from FUNCTION_ARG.  */
6286   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6287 
6288   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6289      be returned in this co-processor's registers.  */
6290   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6291 
6292   /* Allocate and return an RTX element to hold the return type of a call.  This
6293      routine must not fail and will only be called if is_return_candidate
6294      returned true with the same parameters.  */
6295   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6296 
6297   /* Finish processing this argument and prepare to start processing
6298      the next one.  */
6299   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6300 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6301   {
6302     AAPCS_CP(vfp)
6303   };
6304 
6305 #undef AAPCS_CP
6306 
6307 static int
6308 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6309 			  const_tree type)
6310 {
6311   int i;
6312 
6313   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6314     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6315       return i;
6316 
6317   return -1;
6318 }
6319 
6320 static int
6321 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6322 {
6323   /* We aren't passed a decl, so we can't check that a call is local.
6324      However, it isn't clear that that would be a win anyway, since it
6325      might limit some tail-calling opportunities.  */
6326   enum arm_pcs pcs_variant;
6327 
6328   if (fntype)
6329     {
6330       const_tree fndecl = NULL_TREE;
6331 
6332       if (TREE_CODE (fntype) == FUNCTION_DECL)
6333 	{
6334 	  fndecl = fntype;
6335 	  fntype = TREE_TYPE (fntype);
6336 	}
6337 
6338       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6339     }
6340   else
6341     pcs_variant = arm_pcs_default;
6342 
6343   if (pcs_variant != ARM_PCS_AAPCS)
6344     {
6345       int i;
6346 
6347       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6348 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6349 							TYPE_MODE (type),
6350 							type))
6351 	  return i;
6352     }
6353   return -1;
6354 }
6355 
6356 static rtx
6357 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6358 			   const_tree fntype)
6359 {
6360   /* We aren't passed a decl, so we can't check that a call is local.
6361      However, it isn't clear that that would be a win anyway, since it
6362      might limit some tail-calling opportunities.  */
6363   enum arm_pcs pcs_variant;
6364   int unsignedp ATTRIBUTE_UNUSED;
6365 
6366   if (fntype)
6367     {
6368       const_tree fndecl = NULL_TREE;
6369 
6370       if (TREE_CODE (fntype) == FUNCTION_DECL)
6371 	{
6372 	  fndecl = fntype;
6373 	  fntype = TREE_TYPE (fntype);
6374 	}
6375 
6376       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6377     }
6378   else
6379     pcs_variant = arm_pcs_default;
6380 
6381   /* Promote integer types.  */
6382   if (type && INTEGRAL_TYPE_P (type))
6383     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6384 
6385   if (pcs_variant != ARM_PCS_AAPCS)
6386     {
6387       int i;
6388 
6389       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6390 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6391 							type))
6392 	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6393 							     mode, type);
6394     }
6395 
6396   /* Promotes small structs returned in a register to full-word size
6397      for big-endian AAPCS.  */
6398   if (type && arm_return_in_msb (type))
6399     {
6400       HOST_WIDE_INT size = int_size_in_bytes (type);
6401       if (size % UNITS_PER_WORD != 0)
6402 	{
6403 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6404 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6405 	}
6406     }
6407 
6408   return gen_rtx_REG (mode, R0_REGNUM);
6409 }
6410 
6411 static rtx
6412 aapcs_libcall_value (machine_mode mode)
6413 {
6414   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6415       && GET_MODE_SIZE (mode) <= 4)
6416     mode = SImode;
6417 
6418   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6419 }
6420 
6421 /* Lay out a function argument using the AAPCS rules.  The rule
6422    numbers referred to here are those in the AAPCS.  */
6423 static void
6424 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6425 		  const_tree type, bool named)
6426 {
6427   int nregs, nregs2;
6428   int ncrn;
6429 
6430   /* We only need to do this once per argument.  */
6431   if (pcum->aapcs_arg_processed)
6432     return;
6433 
6434   pcum->aapcs_arg_processed = true;
6435 
6436   /* Special case: if named is false then we are handling an incoming
6437      anonymous argument which is on the stack.  */
6438   if (!named)
6439     return;
6440 
6441   /* Is this a potential co-processor register candidate?  */
6442   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6443     {
6444       int slot = aapcs_select_call_coproc (pcum, mode, type);
6445       pcum->aapcs_cprc_slot = slot;
6446 
6447       /* We don't have to apply any of the rules from part B of the
6448 	 preparation phase, these are handled elsewhere in the
6449 	 compiler.  */
6450 
6451       if (slot >= 0)
6452 	{
6453 	  /* A Co-processor register candidate goes either in its own
6454 	     class of registers or on the stack.  */
6455 	  if (!pcum->aapcs_cprc_failed[slot])
6456 	    {
6457 	      /* C1.cp - Try to allocate the argument to co-processor
6458 		 registers.  */
6459 	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6460 		return;
6461 
6462 	      /* C2.cp - Put the argument on the stack and note that we
6463 		 can't assign any more candidates in this slot.  We also
6464 		 need to note that we have allocated stack space, so that
6465 		 we won't later try to split a non-cprc candidate between
6466 		 core registers and the stack.  */
6467 	      pcum->aapcs_cprc_failed[slot] = true;
6468 	      pcum->can_split = false;
6469 	    }
6470 
6471 	  /* We didn't get a register, so this argument goes on the
6472 	     stack.  */
6473 	  gcc_assert (pcum->can_split == false);
6474 	  return;
6475 	}
6476     }
6477 
6478   /* C3 - For double-word aligned arguments, round the NCRN up to the
6479      next even number.  */
6480   ncrn = pcum->aapcs_ncrn;
6481   if (ncrn & 1)
6482     {
6483       int res = arm_needs_doubleword_align (mode, type);
6484       /* Only warn during RTL expansion of call stmts, otherwise we would
6485 	 warn e.g. during gimplification even on functions that will be
6486 	 always inlined, and we'd warn multiple times.  Don't warn when
6487 	 called in expand_function_start either, as we warn instead in
6488 	 arm_function_arg_boundary in that case.  */
6489       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6490 	inform (input_location, "parameter passing for argument of type "
6491 		"%qT changed in GCC 7.1", type);
6492       else if (res > 0)
6493 	ncrn++;
6494     }
6495 
6496   nregs = ARM_NUM_REGS2(mode, type);
6497 
6498   /* Sigh, this test should really assert that nregs > 0, but a GCC
6499      extension allows empty structs and then gives them empty size; it
6500      then allows such a structure to be passed by value.  For some of
6501      the code below we have to pretend that such an argument has
6502      non-zero size so that we 'locate' it correctly either in
6503      registers or on the stack.  */
6504   gcc_assert (nregs >= 0);
6505 
6506   nregs2 = nregs ? nregs : 1;
6507 
6508   /* C4 - Argument fits entirely in core registers.  */
6509   if (ncrn + nregs2 <= NUM_ARG_REGS)
6510     {
6511       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6512       pcum->aapcs_next_ncrn = ncrn + nregs;
6513       return;
6514     }
6515 
6516   /* C5 - Some core registers left and there are no arguments already
6517      on the stack: split this argument between the remaining core
6518      registers and the stack.  */
6519   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6520     {
6521       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6522       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6523       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6524       return;
6525     }
6526 
6527   /* C6 - NCRN is set to 4.  */
6528   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6529 
6530   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6531   return;
6532 }
6533 
6534 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6535    for a call to a function whose data type is FNTYPE.
6536    For a library call, FNTYPE is NULL.  */
6537 void
6538 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6539 			  rtx libname,
6540 			  tree fndecl ATTRIBUTE_UNUSED)
6541 {
6542   /* Long call handling.  */
6543   if (fntype)
6544     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6545   else
6546     pcum->pcs_variant = arm_pcs_default;
6547 
6548   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6549     {
6550       if (arm_libcall_uses_aapcs_base (libname))
6551 	pcum->pcs_variant = ARM_PCS_AAPCS;
6552 
6553       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6554       pcum->aapcs_reg = NULL_RTX;
6555       pcum->aapcs_partial = 0;
6556       pcum->aapcs_arg_processed = false;
6557       pcum->aapcs_cprc_slot = -1;
6558       pcum->can_split = true;
6559 
6560       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6561 	{
6562 	  int i;
6563 
6564 	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6565 	    {
6566 	      pcum->aapcs_cprc_failed[i] = false;
6567 	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6568 	    }
6569 	}
6570       return;
6571     }
6572 
6573   /* Legacy ABIs */
6574 
6575   /* On the ARM, the offset starts at 0.  */
6576   pcum->nregs = 0;
6577   pcum->iwmmxt_nregs = 0;
6578   pcum->can_split = true;
6579 
6580   /* Varargs vectors are treated the same as long long.
6581      named_count avoids having to change the way arm handles 'named' */
6582   pcum->named_count = 0;
6583   pcum->nargs = 0;
6584 
6585   if (TARGET_REALLY_IWMMXT && fntype)
6586     {
6587       tree fn_arg;
6588 
6589       for (fn_arg = TYPE_ARG_TYPES (fntype);
6590 	   fn_arg;
6591 	   fn_arg = TREE_CHAIN (fn_arg))
6592 	pcum->named_count += 1;
6593 
6594       if (! pcum->named_count)
6595 	pcum->named_count = INT_MAX;
6596     }
6597 }
6598 
6599 /* Return 1 if double word alignment is required for argument passing.
6600    Return -1 if double word alignment used to be required for argument
6601    passing before PR77728 ABI fix, but is not required anymore.
6602    Return 0 if double word alignment is not required and wasn't requried
6603    before either.  */
6604 static int
6605 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6606 {
6607   if (!type)
6608     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6609 
6610   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6611   if (!AGGREGATE_TYPE_P (type))
6612     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6613 
6614   /* Array types: Use member alignment of element type.  */
6615   if (TREE_CODE (type) == ARRAY_TYPE)
6616     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6617 
6618   int ret = 0;
6619   /* Record/aggregate types: Use greatest member alignment of any member.  */
6620   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6621     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6622       {
6623 	if (TREE_CODE (field) == FIELD_DECL)
6624 	  return 1;
6625 	else
6626 	  /* Before PR77728 fix, we were incorrectly considering also
6627 	     other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6628 	     Make sure we can warn about that with -Wpsabi.  */
6629 	  ret = -1;
6630       }
6631 
6632   return ret;
6633 }
6634 
6635 
6636 /* Determine where to put an argument to a function.
6637    Value is zero to push the argument on the stack,
6638    or a hard register in which to store the argument.
6639 
6640    MODE is the argument's machine mode.
6641    TYPE is the data type of the argument (as a tree).
6642     This is null for libcalls where that information may
6643     not be available.
6644    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6645     the preceding args and about the function being called.
6646    NAMED is nonzero if this argument is a named parameter
6647     (otherwise it is an extra parameter matching an ellipsis).
6648 
6649    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6650    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6651    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6652    defined), say it is passed in the stack (function_prologue will
6653    indeed make it pass in the stack if necessary).  */
6654 
6655 static rtx
6656 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6657 		  const_tree type, bool named)
6658 {
6659   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6660   int nregs;
6661 
6662   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6663      a call insn (op3 of a call_value insn).  */
6664   if (mode == VOIDmode)
6665     return const0_rtx;
6666 
6667   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6668     {
6669       aapcs_layout_arg (pcum, mode, type, named);
6670       return pcum->aapcs_reg;
6671     }
6672 
6673   /* Varargs vectors are treated the same as long long.
6674      named_count avoids having to change the way arm handles 'named' */
6675   if (TARGET_IWMMXT_ABI
6676       && arm_vector_mode_supported_p (mode)
6677       && pcum->named_count > pcum->nargs + 1)
6678     {
6679       if (pcum->iwmmxt_nregs <= 9)
6680 	return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6681       else
6682 	{
6683 	  pcum->can_split = false;
6684 	  return NULL_RTX;
6685 	}
6686     }
6687 
6688   /* Put doubleword aligned quantities in even register pairs.  */
6689   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6690     {
6691       int res = arm_needs_doubleword_align (mode, type);
6692       if (res < 0 && warn_psabi)
6693 	inform (input_location, "parameter passing for argument of type "
6694 		"%qT changed in GCC 7.1", type);
6695       else if (res > 0)
6696 	pcum->nregs++;
6697     }
6698 
6699   /* Only allow splitting an arg between regs and memory if all preceding
6700      args were allocated to regs.  For args passed by reference we only count
6701      the reference pointer.  */
6702   if (pcum->can_split)
6703     nregs = 1;
6704   else
6705     nregs = ARM_NUM_REGS2 (mode, type);
6706 
6707   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6708     return NULL_RTX;
6709 
6710   return gen_rtx_REG (mode, pcum->nregs);
6711 }
6712 
6713 static unsigned int
6714 arm_function_arg_boundary (machine_mode mode, const_tree type)
6715 {
6716   if (!ARM_DOUBLEWORD_ALIGN)
6717     return PARM_BOUNDARY;
6718 
6719   int res = arm_needs_doubleword_align (mode, type);
6720   if (res < 0 && warn_psabi)
6721     inform (input_location, "parameter passing for argument of type %qT "
6722 	    "changed in GCC 7.1", type);
6723 
6724   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6725 }
6726 
6727 static int
6728 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6729 		       tree type, bool named)
6730 {
6731   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6732   int nregs = pcum->nregs;
6733 
6734   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6735     {
6736       aapcs_layout_arg (pcum, mode, type, named);
6737       return pcum->aapcs_partial;
6738     }
6739 
6740   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6741     return 0;
6742 
6743   if (NUM_ARG_REGS > nregs
6744       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6745       && pcum->can_split)
6746     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6747 
6748   return 0;
6749 }
6750 
6751 /* Update the data in PCUM to advance over an argument
6752    of mode MODE and data type TYPE.
6753    (TYPE is null for libcalls where that information may not be available.)  */
6754 
6755 static void
6756 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6757 			  const_tree type, bool named)
6758 {
6759   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6760 
6761   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6762     {
6763       aapcs_layout_arg (pcum, mode, type, named);
6764 
6765       if (pcum->aapcs_cprc_slot >= 0)
6766 	{
6767 	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6768 							      type);
6769 	  pcum->aapcs_cprc_slot = -1;
6770 	}
6771 
6772       /* Generic stuff.  */
6773       pcum->aapcs_arg_processed = false;
6774       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6775       pcum->aapcs_reg = NULL_RTX;
6776       pcum->aapcs_partial = 0;
6777     }
6778   else
6779     {
6780       pcum->nargs += 1;
6781       if (arm_vector_mode_supported_p (mode)
6782 	  && pcum->named_count > pcum->nargs
6783 	  && TARGET_IWMMXT_ABI)
6784 	pcum->iwmmxt_nregs += 1;
6785       else
6786 	pcum->nregs += ARM_NUM_REGS2 (mode, type);
6787     }
6788 }
6789 
6790 /* Variable sized types are passed by reference.  This is a GCC
6791    extension to the ARM ABI.  */
6792 
6793 static bool
6794 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6795 		       machine_mode mode ATTRIBUTE_UNUSED,
6796 		       const_tree type, bool named ATTRIBUTE_UNUSED)
6797 {
6798   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6799 }
6800 
6801 /* Encode the current state of the #pragma [no_]long_calls.  */
6802 typedef enum
6803 {
6804   OFF,		/* No #pragma [no_]long_calls is in effect.  */
6805   LONG,		/* #pragma long_calls is in effect.  */
6806   SHORT		/* #pragma no_long_calls is in effect.  */
6807 } arm_pragma_enum;
6808 
6809 static arm_pragma_enum arm_pragma_long_calls = OFF;
6810 
6811 void
6812 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6813 {
6814   arm_pragma_long_calls = LONG;
6815 }
6816 
6817 void
6818 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6819 {
6820   arm_pragma_long_calls = SHORT;
6821 }
6822 
6823 void
6824 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6825 {
6826   arm_pragma_long_calls = OFF;
6827 }
6828 
6829 /* Handle an attribute requiring a FUNCTION_DECL;
6830    arguments as in struct attribute_spec.handler.  */
6831 static tree
6832 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6833 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6834 {
6835   if (TREE_CODE (*node) != FUNCTION_DECL)
6836     {
6837       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6838 	       name);
6839       *no_add_attrs = true;
6840     }
6841 
6842   return NULL_TREE;
6843 }
6844 
6845 /* Handle an "interrupt" or "isr" attribute;
6846    arguments as in struct attribute_spec.handler.  */
6847 static tree
6848 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6849 			  bool *no_add_attrs)
6850 {
6851   if (DECL_P (*node))
6852     {
6853       if (TREE_CODE (*node) != FUNCTION_DECL)
6854 	{
6855 	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
6856 		   name);
6857 	  *no_add_attrs = true;
6858 	}
6859       /* FIXME: the argument if any is checked for type attributes;
6860 	 should it be checked for decl ones?  */
6861     }
6862   else
6863     {
6864       if (TREE_CODE (*node) == FUNCTION_TYPE
6865 	  || TREE_CODE (*node) == METHOD_TYPE)
6866 	{
6867 	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6868 	    {
6869 	      warning (OPT_Wattributes, "%qE attribute ignored",
6870 		       name);
6871 	      *no_add_attrs = true;
6872 	    }
6873 	}
6874       else if (TREE_CODE (*node) == POINTER_TYPE
6875 	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6876 		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6877 	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
6878 	{
6879 	  *node = build_variant_type_copy (*node);
6880 	  TREE_TYPE (*node) = build_type_attribute_variant
6881 	    (TREE_TYPE (*node),
6882 	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6883 	  *no_add_attrs = true;
6884 	}
6885       else
6886 	{
6887 	  /* Possibly pass this attribute on from the type to a decl.  */
6888 	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
6889 		       | (int) ATTR_FLAG_FUNCTION_NEXT
6890 		       | (int) ATTR_FLAG_ARRAY_NEXT))
6891 	    {
6892 	      *no_add_attrs = true;
6893 	      return tree_cons (name, args, NULL_TREE);
6894 	    }
6895 	  else
6896 	    {
6897 	      warning (OPT_Wattributes, "%qE attribute ignored",
6898 		       name);
6899 	    }
6900 	}
6901     }
6902 
6903   return NULL_TREE;
6904 }
6905 
6906 /* Handle a "pcs" attribute; arguments as in struct
6907    attribute_spec.handler.  */
6908 static tree
6909 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6910 			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6911 {
6912   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6913     {
6914       warning (OPT_Wattributes, "%qE attribute ignored", name);
6915       *no_add_attrs = true;
6916     }
6917   return NULL_TREE;
6918 }
6919 
6920 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6921 /* Handle the "notshared" attribute.  This attribute is another way of
6922    requesting hidden visibility.  ARM's compiler supports
6923    "__declspec(notshared)"; we support the same thing via an
6924    attribute.  */
6925 
6926 static tree
6927 arm_handle_notshared_attribute (tree *node,
6928 				tree name ATTRIBUTE_UNUSED,
6929 				tree args ATTRIBUTE_UNUSED,
6930 				int flags ATTRIBUTE_UNUSED,
6931 				bool *no_add_attrs)
6932 {
6933   tree decl = TYPE_NAME (*node);
6934 
6935   if (decl)
6936     {
6937       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6938       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6939       *no_add_attrs = false;
6940     }
6941   return NULL_TREE;
6942 }
6943 #endif
6944 
6945 /* This function returns true if a function with declaration FNDECL and type
6946    FNTYPE uses the stack to pass arguments or return variables and false
6947    otherwise.  This is used for functions with the attributes
6948    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6949    diagnostic messages if the stack is used.  NAME is the name of the attribute
6950    used.  */
6951 
6952 static bool
6953 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6954 {
6955   function_args_iterator args_iter;
6956   CUMULATIVE_ARGS args_so_far_v;
6957   cumulative_args_t args_so_far;
6958   bool first_param = true;
6959   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6960 
6961   /* Error out if any argument is passed on the stack.  */
6962   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6963   args_so_far = pack_cumulative_args (&args_so_far_v);
6964   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6965     {
6966       rtx arg_rtx;
6967       machine_mode arg_mode = TYPE_MODE (arg_type);
6968 
6969       prev_arg_type = arg_type;
6970       if (VOID_TYPE_P (arg_type))
6971 	continue;
6972 
6973       if (!first_param)
6974 	arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6975       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6976       if (!arg_rtx
6977 	  || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6978 	{
6979 	  error ("%qE attribute not available to functions with arguments "
6980 		 "passed on the stack", name);
6981 	  return true;
6982 	}
6983       first_param = false;
6984     }
6985 
6986   /* Error out for variadic functions since we cannot control how many
6987      arguments will be passed and thus stack could be used.  stdarg_p () is not
6988      used for the checking to avoid browsing arguments twice.  */
6989   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6990     {
6991       error ("%qE attribute not available to functions with variable number "
6992 	     "of arguments", name);
6993       return true;
6994     }
6995 
6996   /* Error out if return value is passed on the stack.  */
6997   ret_type = TREE_TYPE (fntype);
6998   if (arm_return_in_memory (ret_type, fntype))
6999     {
7000       error ("%qE attribute not available to functions that return value on "
7001 	     "the stack", name);
7002       return true;
7003     }
7004   return false;
7005 }
7006 
7007 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7008    function will check whether the attribute is allowed here and will add the
7009    attribute to the function declaration tree or otherwise issue a warning.  */
7010 
7011 static tree
7012 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7013 				 tree /* args */,
7014 				 int /* flags */,
7015 				 bool *no_add_attrs)
7016 {
7017   tree fndecl;
7018 
7019   if (!use_cmse)
7020     {
7021       *no_add_attrs = true;
7022       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7023 	       name);
7024       return NULL_TREE;
7025     }
7026 
7027   /* Ignore attribute for function types.  */
7028   if (TREE_CODE (*node) != FUNCTION_DECL)
7029     {
7030       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7031 	       name);
7032       *no_add_attrs = true;
7033       return NULL_TREE;
7034     }
7035 
7036   fndecl = *node;
7037 
7038   /* Warn for static linkage functions.  */
7039   if (!TREE_PUBLIC (fndecl))
7040     {
7041       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7042 	       "with static linkage", name);
7043       *no_add_attrs = true;
7044       return NULL_TREE;
7045     }
7046 
7047   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7048 						TREE_TYPE (fndecl));
7049   return NULL_TREE;
7050 }
7051 
7052 
7053 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7054    function will check whether the attribute is allowed here and will add the
7055    attribute to the function type tree or otherwise issue a diagnostic.  The
7056    reason we check this at declaration time is to only allow the use of the
7057    attribute with declarations of function pointers and not function
7058    declarations.  This function checks NODE is of the expected type and issues
7059    diagnostics otherwise using NAME.  If it is not of the expected type
7060    *NO_ADD_ATTRS will be set to true.  */
7061 
7062 static tree
7063 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7064 				 tree /* args */,
7065 				 int /* flags */,
7066 				 bool *no_add_attrs)
7067 {
7068   tree decl = NULL_TREE, fntype = NULL_TREE;
7069   tree type;
7070 
7071   if (!use_cmse)
7072     {
7073       *no_add_attrs = true;
7074       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7075 	       name);
7076       return NULL_TREE;
7077     }
7078 
7079   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7080     {
7081       decl = *node;
7082       fntype = TREE_TYPE (decl);
7083     }
7084 
7085   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7086     fntype = TREE_TYPE (fntype);
7087 
7088   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7089     {
7090 	warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7091 		 "function pointer", name);
7092 	*no_add_attrs = true;
7093 	return NULL_TREE;
7094     }
7095 
7096   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7097 
7098   if (*no_add_attrs)
7099     return NULL_TREE;
7100 
7101   /* Prevent trees being shared among function types with and without
7102      cmse_nonsecure_call attribute.  */
7103   type = TREE_TYPE (decl);
7104 
7105   type = build_distinct_type_copy (type);
7106   TREE_TYPE (decl) = type;
7107   fntype = type;
7108 
7109   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7110     {
7111       type = fntype;
7112       fntype = TREE_TYPE (fntype);
7113       fntype = build_distinct_type_copy (fntype);
7114       TREE_TYPE (type) = fntype;
7115     }
7116 
7117   /* Construct a type attribute and add it to the function type.  */
7118   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7119 			  TYPE_ATTRIBUTES (fntype));
7120   TYPE_ATTRIBUTES (fntype) = attrs;
7121   return NULL_TREE;
7122 }
7123 
7124 /* Return 0 if the attributes for two types are incompatible, 1 if they
7125    are compatible, and 2 if they are nearly compatible (which causes a
7126    warning to be generated).  */
7127 static int
7128 arm_comp_type_attributes (const_tree type1, const_tree type2)
7129 {
7130   int l1, l2, s1, s2;
7131 
7132   /* Check for mismatch of non-default calling convention.  */
7133   if (TREE_CODE (type1) != FUNCTION_TYPE)
7134     return 1;
7135 
7136   /* Check for mismatched call attributes.  */
7137   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7138   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7139   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7140   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7141 
7142   /* Only bother to check if an attribute is defined.  */
7143   if (l1 | l2 | s1 | s2)
7144     {
7145       /* If one type has an attribute, the other must have the same attribute.  */
7146       if ((l1 != l2) || (s1 != s2))
7147 	return 0;
7148 
7149       /* Disallow mixed attributes.  */
7150       if ((l1 & s2) || (l2 & s1))
7151 	return 0;
7152     }
7153 
7154   /* Check for mismatched ISR attribute.  */
7155   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7156   if (! l1)
7157     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7158   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7159   if (! l2)
7160     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7161   if (l1 != l2)
7162     return 0;
7163 
7164   l1 = lookup_attribute ("cmse_nonsecure_call",
7165 			 TYPE_ATTRIBUTES (type1)) != NULL;
7166   l2 = lookup_attribute ("cmse_nonsecure_call",
7167 			 TYPE_ATTRIBUTES (type2)) != NULL;
7168 
7169   if (l1 != l2)
7170     return 0;
7171 
7172   return 1;
7173 }
7174 
7175 /*  Assigns default attributes to newly defined type.  This is used to
7176     set short_call/long_call attributes for function types of
7177     functions defined inside corresponding #pragma scopes.  */
7178 static void
7179 arm_set_default_type_attributes (tree type)
7180 {
7181   /* Add __attribute__ ((long_call)) to all functions, when
7182      inside #pragma long_calls or __attribute__ ((short_call)),
7183      when inside #pragma no_long_calls.  */
7184   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7185     {
7186       tree type_attr_list, attr_name;
7187       type_attr_list = TYPE_ATTRIBUTES (type);
7188 
7189       if (arm_pragma_long_calls == LONG)
7190  	attr_name = get_identifier ("long_call");
7191       else if (arm_pragma_long_calls == SHORT)
7192  	attr_name = get_identifier ("short_call");
7193       else
7194  	return;
7195 
7196       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7197       TYPE_ATTRIBUTES (type) = type_attr_list;
7198     }
7199 }
7200 
7201 /* Return true if DECL is known to be linked into section SECTION.  */
7202 
7203 static bool
7204 arm_function_in_section_p (tree decl, section *section)
7205 {
7206   /* We can only be certain about the prevailing symbol definition.  */
7207   if (!decl_binds_to_current_def_p (decl))
7208     return false;
7209 
7210   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7211   if (!DECL_SECTION_NAME (decl))
7212     {
7213       /* Make sure that we will not create a unique section for DECL.  */
7214       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7215 	return false;
7216     }
7217 
7218   return function_section (decl) == section;
7219 }
7220 
7221 /* Return nonzero if a 32-bit "long_call" should be generated for
7222    a call from the current function to DECL.  We generate a long_call
7223    if the function:
7224 
7225         a.  has an __attribute__((long call))
7226      or b.  is within the scope of a #pragma long_calls
7227      or c.  the -mlong-calls command line switch has been specified
7228 
7229    However we do not generate a long call if the function:
7230 
7231         d.  has an __attribute__ ((short_call))
7232      or e.  is inside the scope of a #pragma no_long_calls
7233      or f.  is defined in the same section as the current function.  */
7234 
7235 bool
7236 arm_is_long_call_p (tree decl)
7237 {
7238   tree attrs;
7239 
7240   if (!decl)
7241     return TARGET_LONG_CALLS;
7242 
7243   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7244   if (lookup_attribute ("short_call", attrs))
7245     return false;
7246 
7247   /* For "f", be conservative, and only cater for cases in which the
7248      whole of the current function is placed in the same section.  */
7249   if (!flag_reorder_blocks_and_partition
7250       && TREE_CODE (decl) == FUNCTION_DECL
7251       && arm_function_in_section_p (decl, current_function_section ()))
7252     return false;
7253 
7254   if (lookup_attribute ("long_call", attrs))
7255     return true;
7256 
7257   return TARGET_LONG_CALLS;
7258 }
7259 
7260 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7261 static bool
7262 arm_function_ok_for_sibcall (tree decl, tree exp)
7263 {
7264   unsigned long func_type;
7265 
7266   if (cfun->machine->sibcall_blocked)
7267     return false;
7268 
7269   /* Never tailcall something if we are generating code for Thumb-1.  */
7270   if (TARGET_THUMB1)
7271     return false;
7272 
7273   /* The PIC register is live on entry to VxWorks PLT entries, so we
7274      must make the call before restoring the PIC register.  */
7275   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7276     return false;
7277 
7278   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7279      may be used both as target of the call and base register for restoring
7280      the VFP registers  */
7281   if (TARGET_APCS_FRAME && TARGET_ARM
7282       && TARGET_HARD_FLOAT
7283       && decl && arm_is_long_call_p (decl))
7284     return false;
7285 
7286   /* If we are interworking and the function is not declared static
7287      then we can't tail-call it unless we know that it exists in this
7288      compilation unit (since it might be a Thumb routine).  */
7289   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7290       && !TREE_ASM_WRITTEN (decl))
7291     return false;
7292 
7293   func_type = arm_current_func_type ();
7294   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7295   if (IS_INTERRUPT (func_type))
7296     return false;
7297 
7298   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7299      generated for entry functions themselves.  */
7300   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7301     return false;
7302 
7303   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7304      this would complicate matters for later code generation.  */
7305   if (TREE_CODE (exp) == CALL_EXPR)
7306     {
7307       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7308       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7309 	return false;
7310     }
7311 
7312   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7313     {
7314       /* Check that the return value locations are the same.  For
7315 	 example that we aren't returning a value from the sibling in
7316 	 a VFP register but then need to transfer it to a core
7317 	 register.  */
7318       rtx a, b;
7319       tree decl_or_type = decl;
7320 
7321       /* If it is an indirect function pointer, get the function type.  */
7322       if (!decl)
7323 	decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7324 
7325       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7326       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7327 			      cfun->decl, false);
7328       if (!rtx_equal_p (a, b))
7329 	return false;
7330     }
7331 
7332   /* Never tailcall if function may be called with a misaligned SP.  */
7333   if (IS_STACKALIGN (func_type))
7334     return false;
7335 
7336   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7337      references should become a NOP.  Don't convert such calls into
7338      sibling calls.  */
7339   if (TARGET_AAPCS_BASED
7340       && arm_abi == ARM_ABI_AAPCS
7341       && decl
7342       && DECL_WEAK (decl))
7343     return false;
7344 
7345   /* We cannot do a tailcall for an indirect call by descriptor if all the
7346      argument registers are used because the only register left to load the
7347      address is IP and it will already contain the static chain.  */
7348   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7349     {
7350       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7351       CUMULATIVE_ARGS cum;
7352       cumulative_args_t cum_v;
7353 
7354       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7355       cum_v = pack_cumulative_args (&cum);
7356 
7357       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7358 	{
7359 	  tree type = TREE_VALUE (t);
7360 	  if (!VOID_TYPE_P (type))
7361 	    arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7362 	}
7363 
7364       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7365 	return false;
7366     }
7367 
7368   /* Everything else is ok.  */
7369   return true;
7370 }
7371 
7372 
7373 /* Addressing mode support functions.  */
7374 
7375 /* Return nonzero if X is a legitimate immediate operand when compiling
7376    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7377 int
7378 legitimate_pic_operand_p (rtx x)
7379 {
7380   if (GET_CODE (x) == SYMBOL_REF
7381       || (GET_CODE (x) == CONST
7382 	  && GET_CODE (XEXP (x, 0)) == PLUS
7383 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7384     return 0;
7385 
7386   return 1;
7387 }
7388 
7389 /* Record that the current function needs a PIC register.  Initialize
7390    cfun->machine->pic_reg if we have not already done so.  */
7391 
7392 static void
7393 require_pic_register (void)
7394 {
7395   /* A lot of the logic here is made obscure by the fact that this
7396      routine gets called as part of the rtx cost estimation process.
7397      We don't want those calls to affect any assumptions about the real
7398      function; and further, we can't call entry_of_function() until we
7399      start the real expansion process.  */
7400   if (!crtl->uses_pic_offset_table)
7401     {
7402       gcc_assert (can_create_pseudo_p ());
7403       if (arm_pic_register != INVALID_REGNUM
7404 	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7405 	{
7406 	  if (!cfun->machine->pic_reg)
7407 	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7408 
7409 	  /* Play games to avoid marking the function as needing pic
7410 	     if we are being called as part of the cost-estimation
7411 	     process.  */
7412 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7413 	    crtl->uses_pic_offset_table = 1;
7414 	}
7415       else
7416 	{
7417 	  rtx_insn *seq, *insn;
7418 
7419 	  if (!cfun->machine->pic_reg)
7420 	    cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7421 
7422 	  /* Play games to avoid marking the function as needing pic
7423 	     if we are being called as part of the cost-estimation
7424 	     process.  */
7425 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7426 	    {
7427 	      crtl->uses_pic_offset_table = 1;
7428 	      start_sequence ();
7429 
7430 	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7431 		  && arm_pic_register > LAST_LO_REGNUM)
7432 		emit_move_insn (cfun->machine->pic_reg,
7433 				gen_rtx_REG (Pmode, arm_pic_register));
7434 	      else
7435 		arm_load_pic_register (0UL);
7436 
7437 	      seq = get_insns ();
7438 	      end_sequence ();
7439 
7440 	      for (insn = seq; insn; insn = NEXT_INSN (insn))
7441 		if (INSN_P (insn))
7442 		  INSN_LOCATION (insn) = prologue_location;
7443 
7444 	      /* We can be called during expansion of PHI nodes, where
7445 	         we can't yet emit instructions directly in the final
7446 		 insn stream.  Queue the insns on the entry edge, they will
7447 		 be committed after everything else is expanded.  */
7448 	      insert_insn_on_edge (seq,
7449 				   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7450 	    }
7451 	}
7452     }
7453 }
7454 
7455 rtx
7456 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7457 {
7458   if (GET_CODE (orig) == SYMBOL_REF
7459       || GET_CODE (orig) == LABEL_REF)
7460     {
7461       if (reg == 0)
7462 	{
7463 	  gcc_assert (can_create_pseudo_p ());
7464 	  reg = gen_reg_rtx (Pmode);
7465 	}
7466 
7467       /* VxWorks does not impose a fixed gap between segments; the run-time
7468 	 gap can be different from the object-file gap.  We therefore can't
7469 	 use GOTOFF unless we are absolutely sure that the symbol is in the
7470 	 same segment as the GOT.  Unfortunately, the flexibility of linker
7471 	 scripts means that we can't be sure of that in general, so assume
7472 	 that GOTOFF is never valid on VxWorks.  */
7473       /* References to weak symbols cannot be resolved locally: they
7474 	 may be overridden by a non-weak definition at link time.  */
7475       rtx_insn *insn;
7476       if ((GET_CODE (orig) == LABEL_REF
7477 	   || (GET_CODE (orig) == SYMBOL_REF
7478 	       && SYMBOL_REF_LOCAL_P (orig)
7479 	       && (SYMBOL_REF_DECL (orig)
7480 		   ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7481 	  && NEED_GOT_RELOC
7482 	  && arm_pic_data_is_text_relative)
7483 	insn = arm_pic_static_addr (orig, reg);
7484       else
7485 	{
7486 	  rtx pat;
7487 	  rtx mem;
7488 
7489 	  /* If this function doesn't have a pic register, create one now.  */
7490 	  require_pic_register ();
7491 
7492 	  pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7493 
7494 	  /* Make the MEM as close to a constant as possible.  */
7495 	  mem = SET_SRC (pat);
7496 	  gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7497 	  MEM_READONLY_P (mem) = 1;
7498 	  MEM_NOTRAP_P (mem) = 1;
7499 
7500 	  insn = emit_insn (pat);
7501 	}
7502 
7503       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7504 	 by loop.  */
7505       set_unique_reg_note (insn, REG_EQUAL, orig);
7506 
7507       return reg;
7508     }
7509   else if (GET_CODE (orig) == CONST)
7510     {
7511       rtx base, offset;
7512 
7513       if (GET_CODE (XEXP (orig, 0)) == PLUS
7514 	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7515 	return orig;
7516 
7517       /* Handle the case where we have: const (UNSPEC_TLS).  */
7518       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7519 	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7520 	return orig;
7521 
7522       /* Handle the case where we have:
7523          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7524          CONST_INT.  */
7525       if (GET_CODE (XEXP (orig, 0)) == PLUS
7526           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7527           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7528         {
7529 	  gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7530 	  return orig;
7531 	}
7532 
7533       if (reg == 0)
7534 	{
7535 	  gcc_assert (can_create_pseudo_p ());
7536 	  reg = gen_reg_rtx (Pmode);
7537 	}
7538 
7539       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7540 
7541       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7542       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7543 				       base == reg ? 0 : reg);
7544 
7545       if (CONST_INT_P (offset))
7546 	{
7547 	  /* The base register doesn't really matter, we only want to
7548 	     test the index for the appropriate mode.  */
7549 	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
7550 	    {
7551 	      gcc_assert (can_create_pseudo_p ());
7552 	      offset = force_reg (Pmode, offset);
7553 	    }
7554 
7555 	  if (CONST_INT_P (offset))
7556 	    return plus_constant (Pmode, base, INTVAL (offset));
7557 	}
7558 
7559       if (GET_MODE_SIZE (mode) > 4
7560 	  && (GET_MODE_CLASS (mode) == MODE_INT
7561 	      || TARGET_SOFT_FLOAT))
7562 	{
7563 	  emit_insn (gen_addsi3 (reg, base, offset));
7564 	  return reg;
7565 	}
7566 
7567       return gen_rtx_PLUS (Pmode, base, offset);
7568     }
7569 
7570   return orig;
7571 }
7572 
7573 
7574 /* Find a spare register to use during the prolog of a function.  */
7575 
7576 static int
7577 thumb_find_work_register (unsigned long pushed_regs_mask)
7578 {
7579   int reg;
7580 
7581   /* Check the argument registers first as these are call-used.  The
7582      register allocation order means that sometimes r3 might be used
7583      but earlier argument registers might not, so check them all.  */
7584   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7585     if (!df_regs_ever_live_p (reg))
7586       return reg;
7587 
7588   /* Before going on to check the call-saved registers we can try a couple
7589      more ways of deducing that r3 is available.  The first is when we are
7590      pushing anonymous arguments onto the stack and we have less than 4
7591      registers worth of fixed arguments(*).  In this case r3 will be part of
7592      the variable argument list and so we can be sure that it will be
7593      pushed right at the start of the function.  Hence it will be available
7594      for the rest of the prologue.
7595      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7596   if (cfun->machine->uses_anonymous_args
7597       && crtl->args.pretend_args_size > 0)
7598     return LAST_ARG_REGNUM;
7599 
7600   /* The other case is when we have fixed arguments but less than 4 registers
7601      worth.  In this case r3 might be used in the body of the function, but
7602      it is not being used to convey an argument into the function.  In theory
7603      we could just check crtl->args.size to see how many bytes are
7604      being passed in argument registers, but it seems that it is unreliable.
7605      Sometimes it will have the value 0 when in fact arguments are being
7606      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7607      check the args_info.nregs field as well.  The problem with this field is
7608      that it makes no allowances for arguments that are passed to the
7609      function but which are not used.  Hence we could miss an opportunity
7610      when a function has an unused argument in r3.  But it is better to be
7611      safe than to be sorry.  */
7612   if (! cfun->machine->uses_anonymous_args
7613       && crtl->args.size >= 0
7614       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7615       && (TARGET_AAPCS_BASED
7616 	  ? crtl->args.info.aapcs_ncrn < 4
7617 	  : crtl->args.info.nregs < 4))
7618     return LAST_ARG_REGNUM;
7619 
7620   /* Otherwise look for a call-saved register that is going to be pushed.  */
7621   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7622     if (pushed_regs_mask & (1 << reg))
7623       return reg;
7624 
7625   if (TARGET_THUMB2)
7626     {
7627       /* Thumb-2 can use high regs.  */
7628       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7629 	if (pushed_regs_mask & (1 << reg))
7630 	  return reg;
7631     }
7632   /* Something went wrong - thumb_compute_save_reg_mask()
7633      should have arranged for a suitable register to be pushed.  */
7634   gcc_unreachable ();
7635 }
7636 
7637 static GTY(()) int pic_labelno;
7638 
7639 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7640    low register.  */
7641 
7642 void
7643 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7644 {
7645   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7646 
7647   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7648     return;
7649 
7650   gcc_assert (flag_pic);
7651 
7652   pic_reg = cfun->machine->pic_reg;
7653   if (TARGET_VXWORKS_RTP)
7654     {
7655       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7656       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7657       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7658 
7659       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7660 
7661       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7662       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7663     }
7664   else
7665     {
7666       /* We use an UNSPEC rather than a LABEL_REF because this label
7667 	 never appears in the code stream.  */
7668 
7669       labelno = GEN_INT (pic_labelno++);
7670       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7671       l1 = gen_rtx_CONST (VOIDmode, l1);
7672 
7673       /* On the ARM the PC register contains 'dot + 8' at the time of the
7674 	 addition, on the Thumb it is 'dot + 4'.  */
7675       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7676       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7677 				UNSPEC_GOTSYM_OFF);
7678       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7679 
7680       if (TARGET_32BIT)
7681 	{
7682 	  emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7683 	}
7684       else /* TARGET_THUMB1 */
7685 	{
7686 	  if (arm_pic_register != INVALID_REGNUM
7687 	      && REGNO (pic_reg) > LAST_LO_REGNUM)
7688 	    {
7689 	      /* We will have pushed the pic register, so we should always be
7690 		 able to find a work register.  */
7691 	      pic_tmp = gen_rtx_REG (SImode,
7692 				     thumb_find_work_register (saved_regs));
7693 	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7694 	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7695 	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7696 	    }
7697 	  else if (arm_pic_register != INVALID_REGNUM
7698 		   && arm_pic_register > LAST_LO_REGNUM
7699 		   && REGNO (pic_reg) <= LAST_LO_REGNUM)
7700 	    {
7701 	      emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7702 	      emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7703 	      emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7704 	    }
7705 	  else
7706 	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7707 	}
7708     }
7709 
7710   /* Need to emit this whether or not we obey regdecls,
7711      since setjmp/longjmp can cause life info to screw up.  */
7712   emit_use (pic_reg);
7713 }
7714 
7715 /* Generate code to load the address of a static var when flag_pic is set.  */
7716 static rtx_insn *
7717 arm_pic_static_addr (rtx orig, rtx reg)
7718 {
7719   rtx l1, labelno, offset_rtx;
7720 
7721   gcc_assert (flag_pic);
7722 
7723   /* We use an UNSPEC rather than a LABEL_REF because this label
7724      never appears in the code stream.  */
7725   labelno = GEN_INT (pic_labelno++);
7726   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7727   l1 = gen_rtx_CONST (VOIDmode, l1);
7728 
7729   /* On the ARM the PC register contains 'dot + 8' at the time of the
7730      addition, on the Thumb it is 'dot + 4'.  */
7731   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7732   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7733                                UNSPEC_SYMBOL_OFFSET);
7734   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7735 
7736   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7737 }
7738 
7739 /* Return nonzero if X is valid as an ARM state addressing register.  */
7740 static int
7741 arm_address_register_rtx_p (rtx x, int strict_p)
7742 {
7743   int regno;
7744 
7745   if (!REG_P (x))
7746     return 0;
7747 
7748   regno = REGNO (x);
7749 
7750   if (strict_p)
7751     return ARM_REGNO_OK_FOR_BASE_P (regno);
7752 
7753   return (regno <= LAST_ARM_REGNUM
7754 	  || regno >= FIRST_PSEUDO_REGISTER
7755 	  || regno == FRAME_POINTER_REGNUM
7756 	  || regno == ARG_POINTER_REGNUM);
7757 }
7758 
7759 /* Return TRUE if this rtx is the difference of a symbol and a label,
7760    and will reduce to a PC-relative relocation in the object file.
7761    Expressions like this can be left alone when generating PIC, rather
7762    than forced through the GOT.  */
7763 static int
7764 pcrel_constant_p (rtx x)
7765 {
7766   if (GET_CODE (x) == MINUS)
7767     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7768 
7769   return FALSE;
7770 }
7771 
7772 /* Return true if X will surely end up in an index register after next
7773    splitting pass.  */
7774 static bool
7775 will_be_in_index_register (const_rtx x)
7776 {
7777   /* arm.md: calculate_pic_address will split this into a register.  */
7778   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7779 }
7780 
7781 /* Return nonzero if X is a valid ARM state address operand.  */
7782 int
7783 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7784 			        int strict_p)
7785 {
7786   bool use_ldrd;
7787   enum rtx_code code = GET_CODE (x);
7788 
7789   if (arm_address_register_rtx_p (x, strict_p))
7790     return 1;
7791 
7792   use_ldrd = (TARGET_LDRD
7793 	      && (mode == DImode || mode == DFmode));
7794 
7795   if (code == POST_INC || code == PRE_DEC
7796       || ((code == PRE_INC || code == POST_DEC)
7797 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7798     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7799 
7800   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7801 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7802 	   && GET_CODE (XEXP (x, 1)) == PLUS
7803 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7804     {
7805       rtx addend = XEXP (XEXP (x, 1), 1);
7806 
7807       /* Don't allow ldrd post increment by register because it's hard
7808 	 to fixup invalid register choices.  */
7809       if (use_ldrd
7810 	  && GET_CODE (x) == POST_MODIFY
7811 	  && REG_P (addend))
7812 	return 0;
7813 
7814       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7815 	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
7816     }
7817 
7818   /* After reload constants split into minipools will have addresses
7819      from a LABEL_REF.  */
7820   else if (reload_completed
7821 	   && (code == LABEL_REF
7822 	       || (code == CONST
7823 		   && GET_CODE (XEXP (x, 0)) == PLUS
7824 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7825 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7826     return 1;
7827 
7828   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7829     return 0;
7830 
7831   else if (code == PLUS)
7832     {
7833       rtx xop0 = XEXP (x, 0);
7834       rtx xop1 = XEXP (x, 1);
7835 
7836       return ((arm_address_register_rtx_p (xop0, strict_p)
7837 	       && ((CONST_INT_P (xop1)
7838 		    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7839 		   || (!strict_p && will_be_in_index_register (xop1))))
7840 	      || (arm_address_register_rtx_p (xop1, strict_p)
7841 		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7842     }
7843 
7844 #if 0
7845   /* Reload currently can't handle MINUS, so disable this for now */
7846   else if (GET_CODE (x) == MINUS)
7847     {
7848       rtx xop0 = XEXP (x, 0);
7849       rtx xop1 = XEXP (x, 1);
7850 
7851       return (arm_address_register_rtx_p (xop0, strict_p)
7852 	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7853     }
7854 #endif
7855 
7856   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7857 	   && code == SYMBOL_REF
7858 	   && CONSTANT_POOL_ADDRESS_P (x)
7859 	   && ! (flag_pic
7860 		 && symbol_mentioned_p (get_pool_constant (x))
7861 		 && ! pcrel_constant_p (get_pool_constant (x))))
7862     return 1;
7863 
7864   return 0;
7865 }
7866 
7867 /* Return true if we can avoid creating a constant pool entry for x.  */
7868 static bool
7869 can_avoid_literal_pool_for_label_p (rtx x)
7870 {
7871   /* Normally we can assign constant values to target registers without
7872      the help of constant pool.  But there are cases we have to use constant
7873      pool like:
7874      1) assign a label to register.
7875      2) sign-extend a 8bit value to 32bit and then assign to register.
7876 
7877      Constant pool access in format:
7878      (set (reg r0) (mem (symbol_ref (".LC0"))))
7879      will cause the use of literal pool (later in function arm_reorg).
7880      So here we mark such format as an invalid format, then the compiler
7881      will adjust it into:
7882      (set (reg r0) (symbol_ref (".LC0")))
7883      (set (reg r0) (mem (reg r0))).
7884      No extra register is required, and (mem (reg r0)) won't cause the use
7885      of literal pools.  */
7886   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7887       && CONSTANT_POOL_ADDRESS_P (x))
7888     return 1;
7889   return 0;
7890 }
7891 
7892 
7893 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7894 static int
7895 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7896 {
7897   bool use_ldrd;
7898   enum rtx_code code = GET_CODE (x);
7899 
7900   if (arm_address_register_rtx_p (x, strict_p))
7901     return 1;
7902 
7903   use_ldrd = (TARGET_LDRD
7904 	      && (mode == DImode || mode == DFmode));
7905 
7906   if (code == POST_INC || code == PRE_DEC
7907       || ((code == PRE_INC || code == POST_DEC)
7908 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7909     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7910 
7911   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7912 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7913 	   && GET_CODE (XEXP (x, 1)) == PLUS
7914 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7915     {
7916       /* Thumb-2 only has autoincrement by constant.  */
7917       rtx addend = XEXP (XEXP (x, 1), 1);
7918       HOST_WIDE_INT offset;
7919 
7920       if (!CONST_INT_P (addend))
7921 	return 0;
7922 
7923       offset = INTVAL(addend);
7924       if (GET_MODE_SIZE (mode) <= 4)
7925 	return (offset > -256 && offset < 256);
7926 
7927       return (use_ldrd && offset > -1024 && offset < 1024
7928 	      && (offset & 3) == 0);
7929     }
7930 
7931   /* After reload constants split into minipools will have addresses
7932      from a LABEL_REF.  */
7933   else if (reload_completed
7934 	   && (code == LABEL_REF
7935 	       || (code == CONST
7936 		   && GET_CODE (XEXP (x, 0)) == PLUS
7937 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7938 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7939     return 1;
7940 
7941   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7942     return 0;
7943 
7944   else if (code == PLUS)
7945     {
7946       rtx xop0 = XEXP (x, 0);
7947       rtx xop1 = XEXP (x, 1);
7948 
7949       return ((arm_address_register_rtx_p (xop0, strict_p)
7950 	       && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7951 		   || (!strict_p && will_be_in_index_register (xop1))))
7952 	      || (arm_address_register_rtx_p (xop1, strict_p)
7953 		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7954     }
7955 
7956   else if (can_avoid_literal_pool_for_label_p (x))
7957     return 0;
7958 
7959   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7960 	   && code == SYMBOL_REF
7961 	   && CONSTANT_POOL_ADDRESS_P (x)
7962 	   && ! (flag_pic
7963 		 && symbol_mentioned_p (get_pool_constant (x))
7964 		 && ! pcrel_constant_p (get_pool_constant (x))))
7965     return 1;
7966 
7967   return 0;
7968 }
7969 
7970 /* Return nonzero if INDEX is valid for an address index operand in
7971    ARM state.  */
7972 static int
7973 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7974 			int strict_p)
7975 {
7976   HOST_WIDE_INT range;
7977   enum rtx_code code = GET_CODE (index);
7978 
7979   /* Standard coprocessor addressing modes.  */
7980   if (TARGET_HARD_FLOAT
7981       && (mode == SFmode || mode == DFmode))
7982     return (code == CONST_INT && INTVAL (index) < 1024
7983 	    && INTVAL (index) > -1024
7984 	    && (INTVAL (index) & 3) == 0);
7985 
7986   /* For quad modes, we restrict the constant offset to be slightly less
7987      than what the instruction format permits.  We do this because for
7988      quad mode moves, we will actually decompose them into two separate
7989      double-mode reads or writes.  INDEX must therefore be a valid
7990      (double-mode) offset and so should INDEX+8.  */
7991   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7992     return (code == CONST_INT
7993 	    && INTVAL (index) < 1016
7994 	    && INTVAL (index) > -1024
7995 	    && (INTVAL (index) & 3) == 0);
7996 
7997   /* We have no such constraint on double mode offsets, so we permit the
7998      full range of the instruction format.  */
7999   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8000     return (code == CONST_INT
8001 	    && INTVAL (index) < 1024
8002 	    && INTVAL (index) > -1024
8003 	    && (INTVAL (index) & 3) == 0);
8004 
8005   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8006     return (code == CONST_INT
8007 	    && INTVAL (index) < 1024
8008 	    && INTVAL (index) > -1024
8009 	    && (INTVAL (index) & 3) == 0);
8010 
8011   if (arm_address_register_rtx_p (index, strict_p)
8012       && (GET_MODE_SIZE (mode) <= 4))
8013     return 1;
8014 
8015   if (mode == DImode || mode == DFmode)
8016     {
8017       if (code == CONST_INT)
8018 	{
8019 	  HOST_WIDE_INT val = INTVAL (index);
8020 
8021 	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8022 	     If vldr is selected it uses arm_coproc_mem_operand.  */
8023 	  if (TARGET_LDRD)
8024 	    return val > -256 && val < 256;
8025 	  else
8026 	    return val > -4096 && val < 4092;
8027 	}
8028 
8029       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8030     }
8031 
8032   if (GET_MODE_SIZE (mode) <= 4
8033       && ! (arm_arch4
8034 	    && (mode == HImode
8035 		|| mode == HFmode
8036 		|| (mode == QImode && outer == SIGN_EXTEND))))
8037     {
8038       if (code == MULT)
8039 	{
8040 	  rtx xiop0 = XEXP (index, 0);
8041 	  rtx xiop1 = XEXP (index, 1);
8042 
8043 	  return ((arm_address_register_rtx_p (xiop0, strict_p)
8044 		   && power_of_two_operand (xiop1, SImode))
8045 		  || (arm_address_register_rtx_p (xiop1, strict_p)
8046 		      && power_of_two_operand (xiop0, SImode)));
8047 	}
8048       else if (code == LSHIFTRT || code == ASHIFTRT
8049 	       || code == ASHIFT || code == ROTATERT)
8050 	{
8051 	  rtx op = XEXP (index, 1);
8052 
8053 	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8054 		  && CONST_INT_P (op)
8055 		  && INTVAL (op) > 0
8056 		  && INTVAL (op) <= 31);
8057 	}
8058     }
8059 
8060   /* For ARM v4 we may be doing a sign-extend operation during the
8061      load.  */
8062   if (arm_arch4)
8063     {
8064       if (mode == HImode
8065 	  || mode == HFmode
8066 	  || (outer == SIGN_EXTEND && mode == QImode))
8067 	range = 256;
8068       else
8069 	range = 4096;
8070     }
8071   else
8072     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8073 
8074   return (code == CONST_INT
8075 	  && INTVAL (index) < range
8076 	  && INTVAL (index) > -range);
8077 }
8078 
8079 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8080    index operand.  i.e. 1, 2, 4 or 8.  */
8081 static bool
8082 thumb2_index_mul_operand (rtx op)
8083 {
8084   HOST_WIDE_INT val;
8085 
8086   if (!CONST_INT_P (op))
8087     return false;
8088 
8089   val = INTVAL(op);
8090   return (val == 1 || val == 2 || val == 4 || val == 8);
8091 }
8092 
8093 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8094 static int
8095 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8096 {
8097   enum rtx_code code = GET_CODE (index);
8098 
8099   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8100   /* Standard coprocessor addressing modes.  */
8101   if (TARGET_HARD_FLOAT
8102       && (mode == SFmode || mode == DFmode))
8103     return (code == CONST_INT && INTVAL (index) < 1024
8104 	    /* Thumb-2 allows only > -256 index range for it's core register
8105 	       load/stores. Since we allow SF/DF in core registers, we have
8106 	       to use the intersection between -256~4096 (core) and -1024~1024
8107 	       (coprocessor).  */
8108 	    && INTVAL (index) > -256
8109 	    && (INTVAL (index) & 3) == 0);
8110 
8111   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8112     {
8113       /* For DImode assume values will usually live in core regs
8114 	 and only allow LDRD addressing modes.  */
8115       if (!TARGET_LDRD || mode != DImode)
8116 	return (code == CONST_INT
8117 		&& INTVAL (index) < 1024
8118 		&& INTVAL (index) > -1024
8119 		&& (INTVAL (index) & 3) == 0);
8120     }
8121 
8122   /* For quad modes, we restrict the constant offset to be slightly less
8123      than what the instruction format permits.  We do this because for
8124      quad mode moves, we will actually decompose them into two separate
8125      double-mode reads or writes.  INDEX must therefore be a valid
8126      (double-mode) offset and so should INDEX+8.  */
8127   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8128     return (code == CONST_INT
8129 	    && INTVAL (index) < 1016
8130 	    && INTVAL (index) > -1024
8131 	    && (INTVAL (index) & 3) == 0);
8132 
8133   /* We have no such constraint on double mode offsets, so we permit the
8134      full range of the instruction format.  */
8135   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8136     return (code == CONST_INT
8137 	    && INTVAL (index) < 1024
8138 	    && INTVAL (index) > -1024
8139 	    && (INTVAL (index) & 3) == 0);
8140 
8141   if (arm_address_register_rtx_p (index, strict_p)
8142       && (GET_MODE_SIZE (mode) <= 4))
8143     return 1;
8144 
8145   if (mode == DImode || mode == DFmode)
8146     {
8147       if (code == CONST_INT)
8148 	{
8149 	  HOST_WIDE_INT val = INTVAL (index);
8150 	  /* Thumb-2 ldrd only has reg+const addressing modes.
8151 	     Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8152 	     If vldr is selected it uses arm_coproc_mem_operand.  */
8153 	  if (TARGET_LDRD)
8154 	    return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8155 	  else
8156 	    return IN_RANGE (val, -255, 4095 - 4);
8157 	}
8158       else
8159 	return 0;
8160     }
8161 
8162   if (code == MULT)
8163     {
8164       rtx xiop0 = XEXP (index, 0);
8165       rtx xiop1 = XEXP (index, 1);
8166 
8167       return ((arm_address_register_rtx_p (xiop0, strict_p)
8168 	       && thumb2_index_mul_operand (xiop1))
8169 	      || (arm_address_register_rtx_p (xiop1, strict_p)
8170 		  && thumb2_index_mul_operand (xiop0)));
8171     }
8172   else if (code == ASHIFT)
8173     {
8174       rtx op = XEXP (index, 1);
8175 
8176       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8177 	      && CONST_INT_P (op)
8178 	      && INTVAL (op) > 0
8179 	      && INTVAL (op) <= 3);
8180     }
8181 
8182   return (code == CONST_INT
8183 	  && INTVAL (index) < 4096
8184 	  && INTVAL (index) > -256);
8185 }
8186 
8187 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8188 static int
8189 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8190 {
8191   int regno;
8192 
8193   if (!REG_P (x))
8194     return 0;
8195 
8196   regno = REGNO (x);
8197 
8198   if (strict_p)
8199     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8200 
8201   return (regno <= LAST_LO_REGNUM
8202 	  || regno > LAST_VIRTUAL_REGISTER
8203 	  || regno == FRAME_POINTER_REGNUM
8204 	  || (GET_MODE_SIZE (mode) >= 4
8205 	      && (regno == STACK_POINTER_REGNUM
8206 		  || regno >= FIRST_PSEUDO_REGISTER
8207 		  || x == hard_frame_pointer_rtx
8208 		  || x == arg_pointer_rtx)));
8209 }
8210 
8211 /* Return nonzero if x is a legitimate index register.  This is the case
8212    for any base register that can access a QImode object.  */
8213 inline static int
8214 thumb1_index_register_rtx_p (rtx x, int strict_p)
8215 {
8216   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8217 }
8218 
8219 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8220 
8221    The AP may be eliminated to either the SP or the FP, so we use the
8222    least common denominator, e.g. SImode, and offsets from 0 to 64.
8223 
8224    ??? Verify whether the above is the right approach.
8225 
8226    ??? Also, the FP may be eliminated to the SP, so perhaps that
8227    needs special handling also.
8228 
8229    ??? Look at how the mips16 port solves this problem.  It probably uses
8230    better ways to solve some of these problems.
8231 
8232    Although it is not incorrect, we don't accept QImode and HImode
8233    addresses based on the frame pointer or arg pointer until the
8234    reload pass starts.  This is so that eliminating such addresses
8235    into stack based ones won't produce impossible code.  */
8236 int
8237 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8238 {
8239   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8240     return 0;
8241 
8242   /* ??? Not clear if this is right.  Experiment.  */
8243   if (GET_MODE_SIZE (mode) < 4
8244       && !(reload_in_progress || reload_completed)
8245       && (reg_mentioned_p (frame_pointer_rtx, x)
8246 	  || reg_mentioned_p (arg_pointer_rtx, x)
8247 	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
8248 	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8249 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8250 	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8251     return 0;
8252 
8253   /* Accept any base register.  SP only in SImode or larger.  */
8254   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8255     return 1;
8256 
8257   /* This is PC relative data before arm_reorg runs.  */
8258   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8259 	   && GET_CODE (x) == SYMBOL_REF
8260            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8261     return 1;
8262 
8263   /* This is PC relative data after arm_reorg runs.  */
8264   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8265 	   && reload_completed
8266 	   && (GET_CODE (x) == LABEL_REF
8267 	       || (GET_CODE (x) == CONST
8268 		   && GET_CODE (XEXP (x, 0)) == PLUS
8269 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8270 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8271     return 1;
8272 
8273   /* Post-inc indexing only supported for SImode and larger.  */
8274   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8275 	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8276     return 1;
8277 
8278   else if (GET_CODE (x) == PLUS)
8279     {
8280       /* REG+REG address can be any two index registers.  */
8281       /* We disallow FRAME+REG addressing since we know that FRAME
8282 	 will be replaced with STACK, and SP relative addressing only
8283 	 permits SP+OFFSET.  */
8284       if (GET_MODE_SIZE (mode) <= 4
8285 	  && XEXP (x, 0) != frame_pointer_rtx
8286 	  && XEXP (x, 1) != frame_pointer_rtx
8287 	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8288 	  && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8289 	      || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8290 	return 1;
8291 
8292       /* REG+const has 5-7 bit offset for non-SP registers.  */
8293       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8294 		|| XEXP (x, 0) == arg_pointer_rtx)
8295 	       && CONST_INT_P (XEXP (x, 1))
8296 	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8297 	return 1;
8298 
8299       /* REG+const has 10-bit offset for SP, but only SImode and
8300 	 larger is supported.  */
8301       /* ??? Should probably check for DI/DFmode overflow here
8302 	 just like GO_IF_LEGITIMATE_OFFSET does.  */
8303       else if (REG_P (XEXP (x, 0))
8304 	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8305 	       && GET_MODE_SIZE (mode) >= 4
8306 	       && CONST_INT_P (XEXP (x, 1))
8307 	       && INTVAL (XEXP (x, 1)) >= 0
8308 	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8309 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
8310 	return 1;
8311 
8312       else if (REG_P (XEXP (x, 0))
8313 	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8314 		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8315 		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8316 		       && REGNO (XEXP (x, 0))
8317 			  <= LAST_VIRTUAL_POINTER_REGISTER))
8318 	       && GET_MODE_SIZE (mode) >= 4
8319 	       && CONST_INT_P (XEXP (x, 1))
8320 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
8321 	return 1;
8322     }
8323 
8324   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8325 	   && GET_MODE_SIZE (mode) == 4
8326 	   && GET_CODE (x) == SYMBOL_REF
8327 	   && CONSTANT_POOL_ADDRESS_P (x)
8328 	   && ! (flag_pic
8329 		 && symbol_mentioned_p (get_pool_constant (x))
8330 		 && ! pcrel_constant_p (get_pool_constant (x))))
8331     return 1;
8332 
8333   return 0;
8334 }
8335 
8336 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8337    instruction of mode MODE.  */
8338 int
8339 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8340 {
8341   switch (GET_MODE_SIZE (mode))
8342     {
8343     case 1:
8344       return val >= 0 && val < 32;
8345 
8346     case 2:
8347       return val >= 0 && val < 64 && (val & 1) == 0;
8348 
8349     default:
8350       return (val >= 0
8351 	      && (val + GET_MODE_SIZE (mode)) <= 128
8352 	      && (val & 3) == 0);
8353     }
8354 }
8355 
8356 bool
8357 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8358 {
8359   if (TARGET_ARM)
8360     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8361   else if (TARGET_THUMB2)
8362     return thumb2_legitimate_address_p (mode, x, strict_p);
8363   else /* if (TARGET_THUMB1) */
8364     return thumb1_legitimate_address_p (mode, x, strict_p);
8365 }
8366 
8367 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8368 
8369    Given an rtx X being reloaded into a reg required to be
8370    in class CLASS, return the class of reg to actually use.
8371    In general this is just CLASS, but for the Thumb core registers and
8372    immediate constants we prefer a LO_REGS class or a subset.  */
8373 
8374 static reg_class_t
8375 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8376 {
8377   if (TARGET_32BIT)
8378     return rclass;
8379   else
8380     {
8381       if (rclass == GENERAL_REGS)
8382 	return LO_REGS;
8383       else
8384 	return rclass;
8385     }
8386 }
8387 
8388 /* Build the SYMBOL_REF for __tls_get_addr.  */
8389 
8390 static GTY(()) rtx tls_get_addr_libfunc;
8391 
8392 static rtx
8393 get_tls_get_addr (void)
8394 {
8395   if (!tls_get_addr_libfunc)
8396     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8397   return tls_get_addr_libfunc;
8398 }
8399 
8400 rtx
8401 arm_load_tp (rtx target)
8402 {
8403   if (!target)
8404     target = gen_reg_rtx (SImode);
8405 
8406   if (TARGET_HARD_TP)
8407     {
8408       /* Can return in any reg.  */
8409       emit_insn (gen_load_tp_hard (target));
8410     }
8411   else
8412     {
8413       /* Always returned in r0.  Immediately copy the result into a pseudo,
8414 	 otherwise other uses of r0 (e.g. setting up function arguments) may
8415 	 clobber the value.  */
8416 
8417       rtx tmp;
8418 
8419       emit_insn (gen_load_tp_soft ());
8420 
8421       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8422       emit_move_insn (target, tmp);
8423     }
8424   return target;
8425 }
8426 
8427 static rtx
8428 load_tls_operand (rtx x, rtx reg)
8429 {
8430   rtx tmp;
8431 
8432   if (reg == NULL_RTX)
8433     reg = gen_reg_rtx (SImode);
8434 
8435   tmp = gen_rtx_CONST (SImode, x);
8436 
8437   emit_move_insn (reg, tmp);
8438 
8439   return reg;
8440 }
8441 
8442 static rtx_insn *
8443 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8444 {
8445   rtx label, labelno, sum;
8446 
8447   gcc_assert (reloc != TLS_DESCSEQ);
8448   start_sequence ();
8449 
8450   labelno = GEN_INT (pic_labelno++);
8451   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8452   label = gen_rtx_CONST (VOIDmode, label);
8453 
8454   sum = gen_rtx_UNSPEC (Pmode,
8455 			gen_rtvec (4, x, GEN_INT (reloc), label,
8456 				   GEN_INT (TARGET_ARM ? 8 : 4)),
8457 			UNSPEC_TLS);
8458   reg = load_tls_operand (sum, reg);
8459 
8460   if (TARGET_ARM)
8461     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8462   else
8463     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8464 
8465   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8466 				     LCT_PURE, /* LCT_CONST?  */
8467 				     Pmode, reg, Pmode);
8468 
8469   rtx_insn *insns = get_insns ();
8470   end_sequence ();
8471 
8472   return insns;
8473 }
8474 
8475 static rtx
8476 arm_tls_descseq_addr (rtx x, rtx reg)
8477 {
8478   rtx labelno = GEN_INT (pic_labelno++);
8479   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8480   rtx sum = gen_rtx_UNSPEC (Pmode,
8481 			    gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8482 				       gen_rtx_CONST (VOIDmode, label),
8483 				       GEN_INT (!TARGET_ARM)),
8484 			    UNSPEC_TLS);
8485   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8486 
8487   emit_insn (gen_tlscall (x, labelno));
8488   if (!reg)
8489     reg = gen_reg_rtx (SImode);
8490   else
8491     gcc_assert (REGNO (reg) != R0_REGNUM);
8492 
8493   emit_move_insn (reg, reg0);
8494 
8495   return reg;
8496 }
8497 
8498 rtx
8499 legitimize_tls_address (rtx x, rtx reg)
8500 {
8501   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8502   rtx_insn *insns;
8503   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8504 
8505   switch (model)
8506     {
8507     case TLS_MODEL_GLOBAL_DYNAMIC:
8508       if (TARGET_GNU2_TLS)
8509 	{
8510 	  reg = arm_tls_descseq_addr (x, reg);
8511 
8512 	  tp = arm_load_tp (NULL_RTX);
8513 
8514 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
8515 	}
8516       else
8517 	{
8518 	  /* Original scheme */
8519 	  insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8520 	  dest = gen_reg_rtx (Pmode);
8521 	  emit_libcall_block (insns, dest, ret, x);
8522 	}
8523       return dest;
8524 
8525     case TLS_MODEL_LOCAL_DYNAMIC:
8526       if (TARGET_GNU2_TLS)
8527 	{
8528 	  reg = arm_tls_descseq_addr (x, reg);
8529 
8530 	  tp = arm_load_tp (NULL_RTX);
8531 
8532 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
8533 	}
8534       else
8535 	{
8536 	  insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8537 
8538 	  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8539 	     share the LDM result with other LD model accesses.  */
8540 	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8541 				UNSPEC_TLS);
8542 	  dest = gen_reg_rtx (Pmode);
8543 	  emit_libcall_block (insns, dest, ret, eqv);
8544 
8545 	  /* Load the addend.  */
8546 	  addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8547 						     GEN_INT (TLS_LDO32)),
8548 				   UNSPEC_TLS);
8549 	  addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8550 	  dest = gen_rtx_PLUS (Pmode, dest, addend);
8551 	}
8552       return dest;
8553 
8554     case TLS_MODEL_INITIAL_EXEC:
8555       labelno = GEN_INT (pic_labelno++);
8556       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8557       label = gen_rtx_CONST (VOIDmode, label);
8558       sum = gen_rtx_UNSPEC (Pmode,
8559 			    gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8560 				       GEN_INT (TARGET_ARM ? 8 : 4)),
8561 			    UNSPEC_TLS);
8562       reg = load_tls_operand (sum, reg);
8563 
8564       if (TARGET_ARM)
8565 	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8566       else if (TARGET_THUMB2)
8567 	emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8568       else
8569 	{
8570 	  emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8571 	  emit_move_insn (reg, gen_const_mem (SImode, reg));
8572 	}
8573 
8574       tp = arm_load_tp (NULL_RTX);
8575 
8576       return gen_rtx_PLUS (Pmode, tp, reg);
8577 
8578     case TLS_MODEL_LOCAL_EXEC:
8579       tp = arm_load_tp (NULL_RTX);
8580 
8581       reg = gen_rtx_UNSPEC (Pmode,
8582 			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8583 			    UNSPEC_TLS);
8584       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8585 
8586       return gen_rtx_PLUS (Pmode, tp, reg);
8587 
8588     default:
8589       abort ();
8590     }
8591 }
8592 
8593 /* Try machine-dependent ways of modifying an illegitimate address
8594    to be legitimate.  If we find one, return the new, valid address.  */
8595 rtx
8596 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8597 {
8598   if (arm_tls_referenced_p (x))
8599     {
8600       rtx addend = NULL;
8601 
8602       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8603 	{
8604 	  addend = XEXP (XEXP (x, 0), 1);
8605 	  x = XEXP (XEXP (x, 0), 0);
8606 	}
8607 
8608       if (GET_CODE (x) != SYMBOL_REF)
8609 	return x;
8610 
8611       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8612 
8613       x = legitimize_tls_address (x, NULL_RTX);
8614 
8615       if (addend)
8616 	{
8617 	  x = gen_rtx_PLUS (SImode, x, addend);
8618 	  orig_x = x;
8619 	}
8620       else
8621 	return x;
8622     }
8623 
8624   if (!TARGET_ARM)
8625     {
8626       /* TODO: legitimize_address for Thumb2.  */
8627       if (TARGET_THUMB2)
8628         return x;
8629       return thumb_legitimize_address (x, orig_x, mode);
8630     }
8631 
8632   if (GET_CODE (x) == PLUS)
8633     {
8634       rtx xop0 = XEXP (x, 0);
8635       rtx xop1 = XEXP (x, 1);
8636 
8637       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8638 	xop0 = force_reg (SImode, xop0);
8639 
8640       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8641 	  && !symbol_mentioned_p (xop1))
8642 	xop1 = force_reg (SImode, xop1);
8643 
8644       if (ARM_BASE_REGISTER_RTX_P (xop0)
8645 	  && CONST_INT_P (xop1))
8646 	{
8647 	  HOST_WIDE_INT n, low_n;
8648 	  rtx base_reg, val;
8649 	  n = INTVAL (xop1);
8650 
8651 	  /* VFP addressing modes actually allow greater offsets, but for
8652 	     now we just stick with the lowest common denominator.  */
8653 	  if (mode == DImode || mode == DFmode)
8654 	    {
8655 	      low_n = n & 0x0f;
8656 	      n &= ~0x0f;
8657 	      if (low_n > 4)
8658 		{
8659 		  n += 16;
8660 		  low_n -= 16;
8661 		}
8662 	    }
8663 	  else
8664 	    {
8665 	      low_n = ((mode) == TImode ? 0
8666 		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8667 	      n -= low_n;
8668 	    }
8669 
8670 	  base_reg = gen_reg_rtx (SImode);
8671 	  val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8672 	  emit_move_insn (base_reg, val);
8673 	  x = plus_constant (Pmode, base_reg, low_n);
8674 	}
8675       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8676 	x = gen_rtx_PLUS (SImode, xop0, xop1);
8677     }
8678 
8679   /* XXX We don't allow MINUS any more -- see comment in
8680      arm_legitimate_address_outer_p ().  */
8681   else if (GET_CODE (x) == MINUS)
8682     {
8683       rtx xop0 = XEXP (x, 0);
8684       rtx xop1 = XEXP (x, 1);
8685 
8686       if (CONSTANT_P (xop0))
8687 	xop0 = force_reg (SImode, xop0);
8688 
8689       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8690 	xop1 = force_reg (SImode, xop1);
8691 
8692       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8693 	x = gen_rtx_MINUS (SImode, xop0, xop1);
8694     }
8695 
8696   /* Make sure to take full advantage of the pre-indexed addressing mode
8697      with absolute addresses which often allows for the base register to
8698      be factorized for multiple adjacent memory references, and it might
8699      even allows for the mini pool to be avoided entirely. */
8700   else if (CONST_INT_P (x) && optimize > 0)
8701     {
8702       unsigned int bits;
8703       HOST_WIDE_INT mask, base, index;
8704       rtx base_reg;
8705 
8706       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8707          use a 8-bit index. So let's use a 12-bit index for SImode only and
8708          hope that arm_gen_constant will enable ldrb to use more bits. */
8709       bits = (mode == SImode) ? 12 : 8;
8710       mask = (1 << bits) - 1;
8711       base = INTVAL (x) & ~mask;
8712       index = INTVAL (x) & mask;
8713       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8714         {
8715 	  /* It'll most probably be more efficient to generate the base
8716 	     with more bits set and use a negative index instead. */
8717 	  base |= mask;
8718 	  index -= mask;
8719 	}
8720       base_reg = force_reg (SImode, GEN_INT (base));
8721       x = plus_constant (Pmode, base_reg, index);
8722     }
8723 
8724   if (flag_pic)
8725     {
8726       /* We need to find and carefully transform any SYMBOL and LABEL
8727 	 references; so go back to the original address expression.  */
8728       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8729 
8730       if (new_x != orig_x)
8731 	x = new_x;
8732     }
8733 
8734   return x;
8735 }
8736 
8737 
8738 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8739    to be legitimate.  If we find one, return the new, valid address.  */
8740 rtx
8741 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8742 {
8743   if (GET_CODE (x) == PLUS
8744       && CONST_INT_P (XEXP (x, 1))
8745       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8746 	  || INTVAL (XEXP (x, 1)) < 0))
8747     {
8748       rtx xop0 = XEXP (x, 0);
8749       rtx xop1 = XEXP (x, 1);
8750       HOST_WIDE_INT offset = INTVAL (xop1);
8751 
8752       /* Try and fold the offset into a biasing of the base register and
8753 	 then offsetting that.  Don't do this when optimizing for space
8754 	 since it can cause too many CSEs.  */
8755       if (optimize_size && offset >= 0
8756 	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
8757 	{
8758 	  HOST_WIDE_INT delta;
8759 
8760 	  if (offset >= 256)
8761 	    delta = offset - (256 - GET_MODE_SIZE (mode));
8762 	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8763 	    delta = 31 * GET_MODE_SIZE (mode);
8764 	  else
8765 	    delta = offset & (~31 * GET_MODE_SIZE (mode));
8766 
8767 	  xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8768 				NULL_RTX);
8769 	  x = plus_constant (Pmode, xop0, delta);
8770 	}
8771       else if (offset < 0 && offset > -256)
8772 	/* Small negative offsets are best done with a subtract before the
8773 	   dereference, forcing these into a register normally takes two
8774 	   instructions.  */
8775 	x = force_operand (x, NULL_RTX);
8776       else
8777 	{
8778 	  /* For the remaining cases, force the constant into a register.  */
8779 	  xop1 = force_reg (SImode, xop1);
8780 	  x = gen_rtx_PLUS (SImode, xop0, xop1);
8781 	}
8782     }
8783   else if (GET_CODE (x) == PLUS
8784 	   && s_register_operand (XEXP (x, 1), SImode)
8785 	   && !s_register_operand (XEXP (x, 0), SImode))
8786     {
8787       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8788 
8789       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8790     }
8791 
8792   if (flag_pic)
8793     {
8794       /* We need to find and carefully transform any SYMBOL and LABEL
8795 	 references; so go back to the original address expression.  */
8796       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8797 
8798       if (new_x != orig_x)
8799 	x = new_x;
8800     }
8801 
8802   return x;
8803 }
8804 
8805 /* Return TRUE if X contains any TLS symbol references.  */
8806 
8807 bool
8808 arm_tls_referenced_p (rtx x)
8809 {
8810   if (! TARGET_HAVE_TLS)
8811     return false;
8812 
8813   subrtx_iterator::array_type array;
8814   FOR_EACH_SUBRTX (iter, array, x, ALL)
8815     {
8816       const_rtx x = *iter;
8817       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8818 	{
8819 	  /* ARM currently does not provide relocations to encode TLS variables
8820 	     into AArch32 instructions, only data, so there is no way to
8821 	     currently implement these if a literal pool is disabled.  */
8822 	  if (arm_disable_literal_pool)
8823 	    sorry ("accessing thread-local storage is not currently supported "
8824 		   "with -mpure-code or -mslow-flash-data");
8825 
8826 	  return true;
8827 	}
8828 
8829       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8830 	 TLS offsets, not real symbol references.  */
8831       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8832 	iter.skip_subrtxes ();
8833     }
8834   return false;
8835 }
8836 
8837 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8838 
8839    On the ARM, allow any integer (invalid ones are removed later by insn
8840    patterns), nice doubles and symbol_refs which refer to the function's
8841    constant pool XXX.
8842 
8843    When generating pic allow anything.  */
8844 
8845 static bool
8846 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8847 {
8848   return flag_pic || !label_mentioned_p (x);
8849 }
8850 
8851 static bool
8852 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8853 {
8854   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8855      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8856      for ARMv8-M Baseline or later the result is valid.  */
8857   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8858     x = XEXP (x, 0);
8859 
8860   return (CONST_INT_P (x)
8861 	  || CONST_DOUBLE_P (x)
8862 	  || CONSTANT_ADDRESS_P (x)
8863 	  || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8864 	  || flag_pic);
8865 }
8866 
8867 static bool
8868 arm_legitimate_constant_p (machine_mode mode, rtx x)
8869 {
8870   return (!arm_cannot_force_const_mem (mode, x)
8871 	  && (TARGET_32BIT
8872 	      ? arm_legitimate_constant_p_1 (mode, x)
8873 	      : thumb_legitimate_constant_p (mode, x)));
8874 }
8875 
8876 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8877 
8878 static bool
8879 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8880 {
8881   rtx base, offset;
8882   split_const (x, &base, &offset);
8883 
8884   if (SYMBOL_REF_P (base))
8885     {
8886       /* Function symbols cannot have an offset due to the Thumb bit.  */
8887       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
8888 	  && INTVAL (offset) != 0)
8889 	return true;
8890 
8891       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
8892 	  && !offset_within_block_p (base, INTVAL (offset)))
8893 	return true;
8894     }
8895   return arm_tls_referenced_p (x);
8896 }
8897 
8898 #define REG_OR_SUBREG_REG(X)						\
8899   (REG_P (X)							\
8900    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8901 
8902 #define REG_OR_SUBREG_RTX(X)			\
8903    (REG_P (X) ? (X) : SUBREG_REG (X))
8904 
8905 static inline int
8906 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8907 {
8908   machine_mode mode = GET_MODE (x);
8909   int total, words;
8910 
8911   switch (code)
8912     {
8913     case ASHIFT:
8914     case ASHIFTRT:
8915     case LSHIFTRT:
8916     case ROTATERT:
8917       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8918 
8919     case PLUS:
8920     case MINUS:
8921     case COMPARE:
8922     case NEG:
8923     case NOT:
8924       return COSTS_N_INSNS (1);
8925 
8926     case MULT:
8927       if (arm_arch6m && arm_m_profile_small_mul)
8928 	return COSTS_N_INSNS (32);
8929 
8930       if (CONST_INT_P (XEXP (x, 1)))
8931 	{
8932 	  int cycles = 0;
8933 	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8934 
8935 	  while (i)
8936 	    {
8937 	      i >>= 2;
8938 	      cycles++;
8939 	    }
8940 	  return COSTS_N_INSNS (2) + cycles;
8941 	}
8942       return COSTS_N_INSNS (1) + 16;
8943 
8944     case SET:
8945       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8946 	 the mode.  */
8947       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8948       return (COSTS_N_INSNS (words)
8949 	      + 4 * ((MEM_P (SET_SRC (x)))
8950 		     + MEM_P (SET_DEST (x))));
8951 
8952     case CONST_INT:
8953       if (outer == SET)
8954 	{
8955 	  if (UINTVAL (x) < 256
8956 	      /* 16-bit constant.  */
8957 	      || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8958 	    return 0;
8959 	  if (thumb_shiftable_const (INTVAL (x)))
8960 	    return COSTS_N_INSNS (2);
8961 	  return COSTS_N_INSNS (3);
8962 	}
8963       else if ((outer == PLUS || outer == COMPARE)
8964 	       && INTVAL (x) < 256 && INTVAL (x) > -256)
8965 	return 0;
8966       else if ((outer == IOR || outer == XOR || outer == AND)
8967 	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
8968 	return COSTS_N_INSNS (1);
8969       else if (outer == AND)
8970 	{
8971 	  int i;
8972 	  /* This duplicates the tests in the andsi3 expander.  */
8973 	  for (i = 9; i <= 31; i++)
8974 	    if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8975 		|| (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8976 	      return COSTS_N_INSNS (2);
8977 	}
8978       else if (outer == ASHIFT || outer == ASHIFTRT
8979 	       || outer == LSHIFTRT)
8980 	return 0;
8981       return COSTS_N_INSNS (2);
8982 
8983     case CONST:
8984     case CONST_DOUBLE:
8985     case LABEL_REF:
8986     case SYMBOL_REF:
8987       return COSTS_N_INSNS (3);
8988 
8989     case UDIV:
8990     case UMOD:
8991     case DIV:
8992     case MOD:
8993       return 100;
8994 
8995     case TRUNCATE:
8996       return 99;
8997 
8998     case AND:
8999     case XOR:
9000     case IOR:
9001       /* XXX guess.  */
9002       return 8;
9003 
9004     case MEM:
9005       /* XXX another guess.  */
9006       /* Memory costs quite a lot for the first word, but subsequent words
9007 	 load at the equivalent of a single insn each.  */
9008       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9009 	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9010 		 ? 4 : 0));
9011 
9012     case IF_THEN_ELSE:
9013       /* XXX a guess.  */
9014       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9015 	return 14;
9016       return 2;
9017 
9018     case SIGN_EXTEND:
9019     case ZERO_EXTEND:
9020       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9021       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9022 
9023       if (mode == SImode)
9024 	return total;
9025 
9026       if (arm_arch6)
9027 	return total + COSTS_N_INSNS (1);
9028 
9029       /* Assume a two-shift sequence.  Increase the cost slightly so
9030 	 we prefer actual shifts over an extend operation.  */
9031       return total + 1 + COSTS_N_INSNS (2);
9032 
9033     default:
9034       return 99;
9035     }
9036 }
9037 
9038 /* Estimates the size cost of thumb1 instructions.
9039    For now most of the code is copied from thumb1_rtx_costs. We need more
9040    fine grain tuning when we have more related test cases.  */
9041 static inline int
9042 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9043 {
9044   machine_mode mode = GET_MODE (x);
9045   int words, cost;
9046 
9047   switch (code)
9048     {
9049     case ASHIFT:
9050     case ASHIFTRT:
9051     case LSHIFTRT:
9052     case ROTATERT:
9053       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9054 
9055     case PLUS:
9056     case MINUS:
9057       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9058 	 defined by RTL expansion, especially for the expansion of
9059 	 multiplication.  */
9060       if ((GET_CODE (XEXP (x, 0)) == MULT
9061 	   && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9062 	  || (GET_CODE (XEXP (x, 1)) == MULT
9063 	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9064 	return COSTS_N_INSNS (2);
9065       /* Fall through.  */
9066     case COMPARE:
9067     case NEG:
9068     case NOT:
9069       return COSTS_N_INSNS (1);
9070 
9071     case MULT:
9072       if (CONST_INT_P (XEXP (x, 1)))
9073         {
9074           /* Thumb1 mul instruction can't operate on const. We must Load it
9075              into a register first.  */
9076           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9077 	  /* For the targets which have a very small and high-latency multiply
9078 	     unit, we prefer to synthesize the mult with up to 5 instructions,
9079 	     giving a good balance between size and performance.  */
9080 	  if (arm_arch6m && arm_m_profile_small_mul)
9081 	    return COSTS_N_INSNS (5);
9082 	  else
9083 	    return COSTS_N_INSNS (1) + const_size;
9084         }
9085       return COSTS_N_INSNS (1);
9086 
9087     case SET:
9088       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9089 	 the mode.  */
9090       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9091       cost = COSTS_N_INSNS (words);
9092       if (satisfies_constraint_J (SET_SRC (x))
9093 	  || satisfies_constraint_K (SET_SRC (x))
9094 	     /* Too big an immediate for a 2-byte mov, using MOVT.  */
9095 	  || (CONST_INT_P (SET_SRC (x))
9096 	      && UINTVAL (SET_SRC (x)) >= 256
9097 	      && TARGET_HAVE_MOVT
9098 	      && satisfies_constraint_j (SET_SRC (x)))
9099 	     /* thumb1_movdi_insn.  */
9100 	  || ((words > 1) && MEM_P (SET_SRC (x))))
9101 	cost += COSTS_N_INSNS (1);
9102       return cost;
9103 
9104     case CONST_INT:
9105       if (outer == SET)
9106         {
9107           if (UINTVAL (x) < 256)
9108             return COSTS_N_INSNS (1);
9109 	  /* movw is 4byte long.  */
9110 	  if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9111 	    return COSTS_N_INSNS (2);
9112 	  /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9113 	  if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9114             return COSTS_N_INSNS (2);
9115 	  /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9116           if (thumb_shiftable_const (INTVAL (x)))
9117             return COSTS_N_INSNS (2);
9118           return COSTS_N_INSNS (3);
9119         }
9120       else if ((outer == PLUS || outer == COMPARE)
9121                && INTVAL (x) < 256 && INTVAL (x) > -256)
9122         return 0;
9123       else if ((outer == IOR || outer == XOR || outer == AND)
9124                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9125         return COSTS_N_INSNS (1);
9126       else if (outer == AND)
9127         {
9128           int i;
9129           /* This duplicates the tests in the andsi3 expander.  */
9130           for (i = 9; i <= 31; i++)
9131             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9132                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9133               return COSTS_N_INSNS (2);
9134         }
9135       else if (outer == ASHIFT || outer == ASHIFTRT
9136                || outer == LSHIFTRT)
9137         return 0;
9138       return COSTS_N_INSNS (2);
9139 
9140     case CONST:
9141     case CONST_DOUBLE:
9142     case LABEL_REF:
9143     case SYMBOL_REF:
9144       return COSTS_N_INSNS (3);
9145 
9146     case UDIV:
9147     case UMOD:
9148     case DIV:
9149     case MOD:
9150       return 100;
9151 
9152     case TRUNCATE:
9153       return 99;
9154 
9155     case AND:
9156     case XOR:
9157     case IOR:
9158       return COSTS_N_INSNS (1);
9159 
9160     case MEM:
9161       return (COSTS_N_INSNS (1)
9162 	      + COSTS_N_INSNS (1)
9163 		* ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9164               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9165                  ? COSTS_N_INSNS (1) : 0));
9166 
9167     case IF_THEN_ELSE:
9168       /* XXX a guess.  */
9169       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9170         return 14;
9171       return 2;
9172 
9173     case ZERO_EXTEND:
9174       /* XXX still guessing.  */
9175       switch (GET_MODE (XEXP (x, 0)))
9176         {
9177           case E_QImode:
9178             return (1 + (mode == DImode ? 4 : 0)
9179                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9180 
9181           case E_HImode:
9182             return (4 + (mode == DImode ? 4 : 0)
9183                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9184 
9185           case E_SImode:
9186             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9187 
9188           default:
9189             return 99;
9190         }
9191 
9192     default:
9193       return 99;
9194     }
9195 }
9196 
9197 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9198    operand, then return the operand that is being shifted.  If the shift
9199    is not by a constant, then set SHIFT_REG to point to the operand.
9200    Return NULL if OP is not a shifter operand.  */
9201 static rtx
9202 shifter_op_p (rtx op, rtx *shift_reg)
9203 {
9204   enum rtx_code code = GET_CODE (op);
9205 
9206   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9207       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9208     return XEXP (op, 0);
9209   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9210     return XEXP (op, 0);
9211   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9212 	   || code == ASHIFTRT)
9213     {
9214       if (!CONST_INT_P (XEXP (op, 1)))
9215 	*shift_reg = XEXP (op, 1);
9216       return XEXP (op, 0);
9217     }
9218 
9219   return NULL;
9220 }
9221 
9222 static bool
9223 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9224 {
9225   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9226   rtx_code code = GET_CODE (x);
9227   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9228 
9229   switch (XINT (x, 1))
9230     {
9231     case UNSPEC_UNALIGNED_LOAD:
9232       /* We can only do unaligned loads into the integer unit, and we can't
9233 	 use LDM or LDRD.  */
9234       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9235       if (speed_p)
9236 	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9237 		  + extra_cost->ldst.load_unaligned);
9238 
9239 #ifdef NOT_YET
9240       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9241 				 ADDR_SPACE_GENERIC, speed_p);
9242 #endif
9243       return true;
9244 
9245     case UNSPEC_UNALIGNED_STORE:
9246       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9247       if (speed_p)
9248 	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9249 		  + extra_cost->ldst.store_unaligned);
9250 
9251       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9252 #ifdef NOT_YET
9253       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9254 				 ADDR_SPACE_GENERIC, speed_p);
9255 #endif
9256       return true;
9257 
9258     case UNSPEC_VRINTZ:
9259     case UNSPEC_VRINTP:
9260     case UNSPEC_VRINTM:
9261     case UNSPEC_VRINTR:
9262     case UNSPEC_VRINTX:
9263     case UNSPEC_VRINTA:
9264       if (speed_p)
9265         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9266 
9267       return true;
9268     default:
9269       *cost = COSTS_N_INSNS (2);
9270       break;
9271     }
9272   return true;
9273 }
9274 
9275 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9276    call (one insn for -Os) and then one for processing the result.  */
9277 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9278 
9279 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)				\
9280 	do								\
9281 	  {								\
9282 	    shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);	\
9283 	    if (shift_op != NULL					\
9284 	        && arm_rtx_shift_left_p (XEXP (x, IDX)))		\
9285 	      {								\
9286 	        if (shift_reg)						\
9287 		  {							\
9288 		    if (speed_p)					\
9289 		      *cost += extra_cost->alu.arith_shift_reg;		\
9290 		    *cost += rtx_cost (shift_reg, GET_MODE (shift_reg),	\
9291 				       ASHIFT, 1, speed_p);		\
9292 		  }							\
9293 	        else if (speed_p)					\
9294 		  *cost += extra_cost->alu.arith_shift;			\
9295 									\
9296 		*cost += (rtx_cost (shift_op, GET_MODE (shift_op),	\
9297 				    ASHIFT, 0, speed_p)			\
9298 			  + rtx_cost (XEXP (x, 1 - IDX),		\
9299 				      GET_MODE (shift_op),		\
9300 			              OP, 1, speed_p));			\
9301 	        return true;						\
9302 	      }								\
9303 	  }								\
9304 	while (0)
9305 
9306 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9307    considering the costs of the addressing mode and memory access
9308    separately.  */
9309 static bool
9310 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9311 	       int *cost, bool speed_p)
9312 {
9313   machine_mode mode = GET_MODE (x);
9314 
9315   *cost = COSTS_N_INSNS (1);
9316 
9317   if (flag_pic
9318       && GET_CODE (XEXP (x, 0)) == PLUS
9319       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9320     /* This will be split into two instructions.  Add the cost of the
9321        additional instruction here.  The cost of the memory access is computed
9322        below.  See arm.md:calculate_pic_address.  */
9323     *cost += COSTS_N_INSNS (1);
9324 
9325   /* Calculate cost of the addressing mode.  */
9326   if (speed_p)
9327     {
9328       arm_addr_mode_op op_type;
9329       switch (GET_CODE (XEXP (x, 0)))
9330 	{
9331 	default:
9332 	case REG:
9333 	  op_type = AMO_DEFAULT;
9334 	  break;
9335 	case MINUS:
9336 	  /* MINUS does not appear in RTL, but the architecture supports it,
9337 	     so handle this case defensively.  */
9338 	  /* fall through */
9339 	case PLUS:
9340 	  op_type = AMO_NO_WB;
9341 	  break;
9342 	case PRE_INC:
9343 	case PRE_DEC:
9344 	case POST_INC:
9345 	case POST_DEC:
9346 	case PRE_MODIFY:
9347 	case POST_MODIFY:
9348 	  op_type = AMO_WB;
9349 	  break;
9350 	}
9351 
9352       if (VECTOR_MODE_P (mode))
9353 	  *cost += current_tune->addr_mode_costs->vector[op_type];
9354       else if (FLOAT_MODE_P (mode))
9355 	  *cost += current_tune->addr_mode_costs->fp[op_type];
9356       else
9357 	  *cost += current_tune->addr_mode_costs->integer[op_type];
9358     }
9359 
9360   /* Calculate cost of memory access.  */
9361   if (speed_p)
9362     {
9363       if (FLOAT_MODE_P (mode))
9364 	{
9365 	  if (GET_MODE_SIZE (mode) == 8)
9366 	    *cost += extra_cost->ldst.loadd;
9367 	  else
9368 	    *cost += extra_cost->ldst.loadf;
9369 	}
9370       else if (VECTOR_MODE_P (mode))
9371 	*cost += extra_cost->ldst.loadv;
9372       else
9373 	{
9374 	  /* Integer modes */
9375 	  if (GET_MODE_SIZE (mode) == 8)
9376 	    *cost += extra_cost->ldst.ldrd;
9377 	  else
9378 	    *cost += extra_cost->ldst.load;
9379 	}
9380     }
9381 
9382   return true;
9383 }
9384 
9385 /* RTX costs.  Make an estimate of the cost of executing the operation
9386    X, which is contained within an operation with code OUTER_CODE.
9387    SPEED_P indicates whether the cost desired is the performance cost,
9388    or the size cost.  The estimate is stored in COST and the return
9389    value is TRUE if the cost calculation is final, or FALSE if the
9390    caller should recurse through the operands of X to add additional
9391    costs.
9392 
9393    We currently make no attempt to model the size savings of Thumb-2
9394    16-bit instructions.  At the normal points in compilation where
9395    this code is called we have no measure of whether the condition
9396    flags are live or not, and thus no realistic way to determine what
9397    the size will eventually be.  */
9398 static bool
9399 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9400 		   const struct cpu_cost_table *extra_cost,
9401 		   int *cost, bool speed_p)
9402 {
9403   machine_mode mode = GET_MODE (x);
9404 
9405   *cost = COSTS_N_INSNS (1);
9406 
9407   if (TARGET_THUMB1)
9408     {
9409       if (speed_p)
9410 	*cost = thumb1_rtx_costs (x, code, outer_code);
9411       else
9412 	*cost = thumb1_size_rtx_costs (x, code, outer_code);
9413       return true;
9414     }
9415 
9416   switch (code)
9417     {
9418     case SET:
9419       *cost = 0;
9420       /* SET RTXs don't have a mode so we get it from the destination.  */
9421       mode = GET_MODE (SET_DEST (x));
9422 
9423       if (REG_P (SET_SRC (x))
9424 	  && REG_P (SET_DEST (x)))
9425 	{
9426 	  /* Assume that most copies can be done with a single insn,
9427 	     unless we don't have HW FP, in which case everything
9428 	     larger than word mode will require two insns.  */
9429 	  *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9430 				   && GET_MODE_SIZE (mode) > 4)
9431 				  || mode == DImode)
9432 				 ? 2 : 1);
9433 	  /* Conditional register moves can be encoded
9434 	     in 16 bits in Thumb mode.  */
9435 	  if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9436 	    *cost >>= 1;
9437 
9438 	  return true;
9439 	}
9440 
9441       if (CONST_INT_P (SET_SRC (x)))
9442 	{
9443 	  /* Handle CONST_INT here, since the value doesn't have a mode
9444 	     and we would otherwise be unable to work out the true cost.  */
9445 	  *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9446 			    0, speed_p);
9447 	  outer_code = SET;
9448 	  /* Slightly lower the cost of setting a core reg to a constant.
9449 	     This helps break up chains and allows for better scheduling.  */
9450 	  if (REG_P (SET_DEST (x))
9451 	      && REGNO (SET_DEST (x)) <= LR_REGNUM)
9452 	    *cost -= 1;
9453 	  x = SET_SRC (x);
9454 	  /* Immediate moves with an immediate in the range [0, 255] can be
9455 	     encoded in 16 bits in Thumb mode.  */
9456 	  if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9457 	      && INTVAL (x) >= 0 && INTVAL (x) <=255)
9458 	    *cost >>= 1;
9459 	  goto const_int_cost;
9460 	}
9461 
9462       return false;
9463 
9464     case MEM:
9465       return arm_mem_costs (x, extra_cost, cost, speed_p);
9466 
9467     case PARALLEL:
9468     {
9469    /* Calculations of LDM costs are complex.  We assume an initial cost
9470    (ldm_1st) which will load the number of registers mentioned in
9471    ldm_regs_per_insn_1st registers; then each additional
9472    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9473    formula for N regs is thus:
9474 
9475    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9476 			     + ldm_regs_per_insn_subsequent - 1)
9477 			    / ldm_regs_per_insn_subsequent).
9478 
9479    Additional costs may also be added for addressing.  A similar
9480    formula is used for STM.  */
9481 
9482       bool is_ldm = load_multiple_operation (x, SImode);
9483       bool is_stm = store_multiple_operation (x, SImode);
9484 
9485       if (is_ldm || is_stm)
9486         {
9487 	  if (speed_p)
9488 	    {
9489 	      HOST_WIDE_INT nregs = XVECLEN (x, 0);
9490 	      HOST_WIDE_INT regs_per_insn_1st = is_ldm
9491 	                              ? extra_cost->ldst.ldm_regs_per_insn_1st
9492 	                              : extra_cost->ldst.stm_regs_per_insn_1st;
9493 	      HOST_WIDE_INT regs_per_insn_sub = is_ldm
9494 	                       ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9495 	                       : extra_cost->ldst.stm_regs_per_insn_subsequent;
9496 
9497 	      *cost += regs_per_insn_1st
9498 	               + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9499 					    + regs_per_insn_sub - 1)
9500 					  / regs_per_insn_sub);
9501 	      return true;
9502 	    }
9503 
9504         }
9505       return false;
9506     }
9507     case DIV:
9508     case UDIV:
9509       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9510 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9511 	*cost += COSTS_N_INSNS (speed_p
9512 			       ? extra_cost->fp[mode != SFmode].div : 0);
9513       else if (mode == SImode && TARGET_IDIV)
9514 	*cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9515       else
9516 	*cost = LIBCALL_COST (2);
9517 
9518       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9519 	 possible udiv is prefered.  */
9520       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9521       return false;	/* All arguments must be in registers.  */
9522 
9523     case MOD:
9524       /* MOD by a power of 2 can be expanded as:
9525 	 rsbs    r1, r0, #0
9526 	 and     r0, r0, #(n - 1)
9527 	 and     r1, r1, #(n - 1)
9528 	 rsbpl   r0, r1, #0.  */
9529       if (CONST_INT_P (XEXP (x, 1))
9530 	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9531 	  && mode == SImode)
9532 	{
9533 	  *cost += COSTS_N_INSNS (3);
9534 
9535 	  if (speed_p)
9536 	    *cost += 2 * extra_cost->alu.logical
9537 		     + extra_cost->alu.arith;
9538 	  return true;
9539 	}
9540 
9541     /* Fall-through.  */
9542     case UMOD:
9543       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9544 	 possible udiv is prefered.  */
9545       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9546       return false;	/* All arguments must be in registers.  */
9547 
9548     case ROTATE:
9549       if (mode == SImode && REG_P (XEXP (x, 1)))
9550 	{
9551 	  *cost += (COSTS_N_INSNS (1)
9552 		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9553 	  if (speed_p)
9554 	    *cost += extra_cost->alu.shift_reg;
9555 	  return true;
9556 	}
9557       /* Fall through */
9558     case ROTATERT:
9559     case ASHIFT:
9560     case LSHIFTRT:
9561     case ASHIFTRT:
9562       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9563 	{
9564 	  *cost += (COSTS_N_INSNS (2)
9565 		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9566 	  if (speed_p)
9567 	    *cost += 2 * extra_cost->alu.shift;
9568 	  /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
9569 	  if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9570 	    *cost += 1;
9571 	  return true;
9572 	}
9573       else if (mode == SImode)
9574 	{
9575 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9576 	  /* Slightly disparage register shifts at -Os, but not by much.  */
9577 	  if (!CONST_INT_P (XEXP (x, 1)))
9578 	    *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9579 		      + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9580 	  return true;
9581 	}
9582       else if (GET_MODE_CLASS (mode) == MODE_INT
9583 	       && GET_MODE_SIZE (mode) < 4)
9584 	{
9585 	  if (code == ASHIFT)
9586 	    {
9587 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9588 	      /* Slightly disparage register shifts at -Os, but not by
9589 	         much.  */
9590 	      if (!CONST_INT_P (XEXP (x, 1)))
9591 		*cost += (speed_p ? extra_cost->alu.shift_reg : 1
9592 			  + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9593 	    }
9594 	  else if (code == LSHIFTRT || code == ASHIFTRT)
9595 	    {
9596 	      if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9597 		{
9598 		  /* Can use SBFX/UBFX.  */
9599 		  if (speed_p)
9600 		    *cost += extra_cost->alu.bfx;
9601 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9602 		}
9603 	      else
9604 		{
9605 		  *cost += COSTS_N_INSNS (1);
9606 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9607 		  if (speed_p)
9608 		    {
9609 		      if (CONST_INT_P (XEXP (x, 1)))
9610 			*cost += 2 * extra_cost->alu.shift;
9611 		      else
9612 			*cost += (extra_cost->alu.shift
9613 				  + extra_cost->alu.shift_reg);
9614 		    }
9615 		  else
9616 		    /* Slightly disparage register shifts.  */
9617 		    *cost += !CONST_INT_P (XEXP (x, 1));
9618 		}
9619 	    }
9620 	  else /* Rotates.  */
9621 	    {
9622 	      *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9623 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9624 	      if (speed_p)
9625 		{
9626 		  if (CONST_INT_P (XEXP (x, 1)))
9627 		    *cost += (2 * extra_cost->alu.shift
9628 			      + extra_cost->alu.log_shift);
9629 		  else
9630 		    *cost += (extra_cost->alu.shift
9631 			      + extra_cost->alu.shift_reg
9632 			      + extra_cost->alu.log_shift_reg);
9633 		}
9634 	    }
9635 	  return true;
9636 	}
9637 
9638       *cost = LIBCALL_COST (2);
9639       return false;
9640 
9641     case BSWAP:
9642       if (arm_arch6)
9643         {
9644           if (mode == SImode)
9645             {
9646               if (speed_p)
9647                 *cost += extra_cost->alu.rev;
9648 
9649               return false;
9650             }
9651         }
9652       else
9653         {
9654         /* No rev instruction available.  Look at arm_legacy_rev
9655            and thumb_legacy_rev for the form of RTL used then.  */
9656           if (TARGET_THUMB)
9657             {
9658               *cost += COSTS_N_INSNS (9);
9659 
9660               if (speed_p)
9661                 {
9662                   *cost += 6 * extra_cost->alu.shift;
9663                   *cost += 3 * extra_cost->alu.logical;
9664                 }
9665             }
9666           else
9667             {
9668               *cost += COSTS_N_INSNS (4);
9669 
9670               if (speed_p)
9671                 {
9672                   *cost += 2 * extra_cost->alu.shift;
9673                   *cost += extra_cost->alu.arith_shift;
9674                   *cost += 2 * extra_cost->alu.logical;
9675                 }
9676             }
9677           return true;
9678         }
9679       return false;
9680 
9681     case MINUS:
9682       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9683 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9684 	{
9685 	  if (GET_CODE (XEXP (x, 0)) == MULT
9686 	      || GET_CODE (XEXP (x, 1)) == MULT)
9687 	    {
9688 	      rtx mul_op0, mul_op1, sub_op;
9689 
9690 	      if (speed_p)
9691 		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
9692 
9693 	      if (GET_CODE (XEXP (x, 0)) == MULT)
9694 		{
9695 		  mul_op0 = XEXP (XEXP (x, 0), 0);
9696 		  mul_op1 = XEXP (XEXP (x, 0), 1);
9697 		  sub_op = XEXP (x, 1);
9698 		}
9699 	      else
9700 		{
9701 		  mul_op0 = XEXP (XEXP (x, 1), 0);
9702 		  mul_op1 = XEXP (XEXP (x, 1), 1);
9703 		  sub_op = XEXP (x, 0);
9704 		}
9705 
9706 	      /* The first operand of the multiply may be optionally
9707 		 negated.  */
9708 	      if (GET_CODE (mul_op0) == NEG)
9709 		mul_op0 = XEXP (mul_op0, 0);
9710 
9711 	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9712 			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
9713 			+ rtx_cost (sub_op, mode, code, 0, speed_p));
9714 
9715 	      return true;
9716 	    }
9717 
9718 	  if (speed_p)
9719 	    *cost += extra_cost->fp[mode != SFmode].addsub;
9720 	  return false;
9721 	}
9722 
9723       if (mode == SImode)
9724 	{
9725 	  rtx shift_by_reg = NULL;
9726 	  rtx shift_op;
9727 	  rtx non_shift_op;
9728 
9729 	  shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9730 	  if (shift_op == NULL)
9731 	    {
9732 	      shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9733 	      non_shift_op = XEXP (x, 0);
9734 	    }
9735 	  else
9736 	    non_shift_op = XEXP (x, 1);
9737 
9738 	  if (shift_op != NULL)
9739 	    {
9740 	      if (shift_by_reg != NULL)
9741 		{
9742 		  if (speed_p)
9743 		    *cost += extra_cost->alu.arith_shift_reg;
9744 		  *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9745 		}
9746 	      else if (speed_p)
9747 		*cost += extra_cost->alu.arith_shift;
9748 
9749 	      *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9750 	      *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9751 	      return true;
9752 	    }
9753 
9754 	  if (arm_arch_thumb2
9755 	      && GET_CODE (XEXP (x, 1)) == MULT)
9756 	    {
9757 	      /* MLS.  */
9758 	      if (speed_p)
9759 		*cost += extra_cost->mult[0].add;
9760 	      *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9761 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9762 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9763 	      return true;
9764 	    }
9765 
9766 	  if (CONST_INT_P (XEXP (x, 0)))
9767 	    {
9768 	      int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9769 					    INTVAL (XEXP (x, 0)), NULL_RTX,
9770 					    NULL_RTX, 1, 0);
9771 	      *cost = COSTS_N_INSNS (insns);
9772 	      if (speed_p)
9773 		*cost += insns * extra_cost->alu.arith;
9774 	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9775 	      return true;
9776 	    }
9777 	  else if (speed_p)
9778 	    *cost += extra_cost->alu.arith;
9779 
9780 	  return false;
9781 	}
9782 
9783       if (GET_MODE_CLASS (mode) == MODE_INT
9784 	  && GET_MODE_SIZE (mode) < 4)
9785 	{
9786 	  rtx shift_op, shift_reg;
9787 	  shift_reg = NULL;
9788 
9789 	  /* We check both sides of the MINUS for shifter operands since,
9790 	     unlike PLUS, it's not commutative.  */
9791 
9792 	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9793 	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9794 
9795 	  /* Slightly disparage, as we might need to widen the result.  */
9796 	  *cost += 1;
9797 	  if (speed_p)
9798 	    *cost += extra_cost->alu.arith;
9799 
9800 	  if (CONST_INT_P (XEXP (x, 0)))
9801 	    {
9802 	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9803 	      return true;
9804 	    }
9805 
9806 	  return false;
9807 	}
9808 
9809       if (mode == DImode)
9810 	{
9811 	  *cost += COSTS_N_INSNS (1);
9812 
9813 	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9814 	    {
9815 	      rtx op1 = XEXP (x, 1);
9816 
9817 	      if (speed_p)
9818 		*cost += 2 * extra_cost->alu.arith;
9819 
9820 	      if (GET_CODE (op1) == ZERO_EXTEND)
9821 		*cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9822 				   0, speed_p);
9823 	      else
9824 		*cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9825 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9826 				 0, speed_p);
9827 	      return true;
9828 	    }
9829 	  else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9830 	    {
9831 	      if (speed_p)
9832 		*cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9833 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9834 				  0, speed_p)
9835 			+ rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9836 	      return true;
9837 	    }
9838 	  else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9839 		   || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9840 	    {
9841 	      if (speed_p)
9842 		*cost += (extra_cost->alu.arith
9843 			  + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9844 			     ? extra_cost->alu.arith
9845 			     : extra_cost->alu.arith_shift));
9846 	      *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9847 			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9848 				    GET_CODE (XEXP (x, 1)), 0, speed_p));
9849 	      return true;
9850 	    }
9851 
9852 	  if (speed_p)
9853 	    *cost += 2 * extra_cost->alu.arith;
9854 	  return false;
9855 	}
9856 
9857       /* Vector mode?  */
9858 
9859       *cost = LIBCALL_COST (2);
9860       return false;
9861 
9862     case PLUS:
9863       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9864 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9865 	{
9866 	  if (GET_CODE (XEXP (x, 0)) == MULT)
9867 	    {
9868 	      rtx mul_op0, mul_op1, add_op;
9869 
9870 	      if (speed_p)
9871 		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
9872 
9873 	      mul_op0 = XEXP (XEXP (x, 0), 0);
9874 	      mul_op1 = XEXP (XEXP (x, 0), 1);
9875 	      add_op = XEXP (x, 1);
9876 
9877 	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9878 			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
9879 			+ rtx_cost (add_op, mode, code, 0, speed_p));
9880 
9881 	      return true;
9882 	    }
9883 
9884 	  if (speed_p)
9885 	    *cost += extra_cost->fp[mode != SFmode].addsub;
9886 	  return false;
9887 	}
9888       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9889 	{
9890 	  *cost = LIBCALL_COST (2);
9891 	  return false;
9892 	}
9893 
9894 	/* Narrow modes can be synthesized in SImode, but the range
9895 	   of useful sub-operations is limited.  Check for shift operations
9896 	   on one of the operands.  Only left shifts can be used in the
9897 	   narrow modes.  */
9898       if (GET_MODE_CLASS (mode) == MODE_INT
9899 	  && GET_MODE_SIZE (mode) < 4)
9900 	{
9901 	  rtx shift_op, shift_reg;
9902 	  shift_reg = NULL;
9903 
9904 	  HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9905 
9906 	  if (CONST_INT_P (XEXP (x, 1)))
9907 	    {
9908 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9909 					    INTVAL (XEXP (x, 1)), NULL_RTX,
9910 					    NULL_RTX, 1, 0);
9911 	      *cost = COSTS_N_INSNS (insns);
9912 	      if (speed_p)
9913 		*cost += insns * extra_cost->alu.arith;
9914 	      /* Slightly penalize a narrow operation as the result may
9915 		 need widening.  */
9916 	      *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9917 	      return true;
9918 	    }
9919 
9920 	  /* Slightly penalize a narrow operation as the result may
9921 	     need widening.  */
9922 	  *cost += 1;
9923 	  if (speed_p)
9924 	    *cost += extra_cost->alu.arith;
9925 
9926 	  return false;
9927 	}
9928 
9929       if (mode == SImode)
9930 	{
9931 	  rtx shift_op, shift_reg;
9932 
9933 	  if (TARGET_INT_SIMD
9934 	      && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9935 		  || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9936 	    {
9937 	      /* UXTA[BH] or SXTA[BH].  */
9938 	      if (speed_p)
9939 		*cost += extra_cost->alu.extend_arith;
9940 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9941 				  0, speed_p)
9942 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9943 	      return true;
9944 	    }
9945 
9946 	  shift_reg = NULL;
9947 	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9948 	  if (shift_op != NULL)
9949 	    {
9950 	      if (shift_reg)
9951 		{
9952 		  if (speed_p)
9953 		    *cost += extra_cost->alu.arith_shift_reg;
9954 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9955 		}
9956 	      else if (speed_p)
9957 		*cost += extra_cost->alu.arith_shift;
9958 
9959 	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9960 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9961 	      return true;
9962 	    }
9963 	  if (GET_CODE (XEXP (x, 0)) == MULT)
9964 	    {
9965 	      rtx mul_op = XEXP (x, 0);
9966 
9967 	      if (TARGET_DSP_MULTIPLY
9968 		  && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9969 		       && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9970 			   || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9971 			       && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9972 			       && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9973 		      || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9974 			  && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9975 			  && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9976 			  && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9977 			      || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9978 				  && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9979 				  && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9980 				      == 16))))))
9981 		{
9982 		  /* SMLA[BT][BT].  */
9983 		  if (speed_p)
9984 		    *cost += extra_cost->mult[0].extend_add;
9985 		  *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9986 				      SIGN_EXTEND, 0, speed_p)
9987 			    + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9988 					SIGN_EXTEND, 0, speed_p)
9989 			    + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9990 		  return true;
9991 		}
9992 
9993 	      if (speed_p)
9994 		*cost += extra_cost->mult[0].add;
9995 	      *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9996 			+ rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9997 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9998 	      return true;
9999 	    }
10000 	  if (CONST_INT_P (XEXP (x, 1)))
10001 	    {
10002 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10003 					    INTVAL (XEXP (x, 1)), NULL_RTX,
10004 					    NULL_RTX, 1, 0);
10005 	      *cost = COSTS_N_INSNS (insns);
10006 	      if (speed_p)
10007 		*cost += insns * extra_cost->alu.arith;
10008 	      *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10009 	      return true;
10010 	    }
10011 	  else if (speed_p)
10012 	    *cost += extra_cost->alu.arith;
10013 
10014 	  return false;
10015 	}
10016 
10017       if (mode == DImode)
10018 	{
10019 	  if (arm_arch3m
10020 	      && GET_CODE (XEXP (x, 0)) == MULT
10021 	      && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10022 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10023 		  || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10024 		      && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10025 	    {
10026 	      if (speed_p)
10027 		*cost += extra_cost->mult[1].extend_add;
10028 	      *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10029 				  ZERO_EXTEND, 0, speed_p)
10030 			+ rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10031 				    ZERO_EXTEND, 0, speed_p)
10032 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10033 	      return true;
10034 	    }
10035 
10036 	  *cost += COSTS_N_INSNS (1);
10037 
10038 	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10039 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10040 	    {
10041 	      if (speed_p)
10042 		*cost += (extra_cost->alu.arith
10043 			  + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10044 			     ? extra_cost->alu.arith
10045 			     : extra_cost->alu.arith_shift));
10046 
10047 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10048 				  0, speed_p)
10049 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10050 	      return true;
10051 	    }
10052 
10053 	  if (speed_p)
10054 	    *cost += 2 * extra_cost->alu.arith;
10055 	  return false;
10056 	}
10057 
10058       /* Vector mode?  */
10059       *cost = LIBCALL_COST (2);
10060       return false;
10061     case IOR:
10062       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10063         {
10064           if (speed_p)
10065             *cost += extra_cost->alu.rev;
10066 
10067           return true;
10068         }
10069     /* Fall through.  */
10070     case AND: case XOR:
10071       if (mode == SImode)
10072 	{
10073 	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10074 	  rtx op0 = XEXP (x, 0);
10075 	  rtx shift_op, shift_reg;
10076 
10077 	  if (subcode == NOT
10078 	      && (code == AND
10079 		  || (code == IOR && TARGET_THUMB2)))
10080 	    op0 = XEXP (op0, 0);
10081 
10082 	  shift_reg = NULL;
10083 	  shift_op = shifter_op_p (op0, &shift_reg);
10084 	  if (shift_op != NULL)
10085 	    {
10086 	      if (shift_reg)
10087 		{
10088 		  if (speed_p)
10089 		    *cost += extra_cost->alu.log_shift_reg;
10090 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10091 		}
10092 	      else if (speed_p)
10093 		*cost += extra_cost->alu.log_shift;
10094 
10095 	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10096 			+ rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10097 	      return true;
10098 	    }
10099 
10100 	  if (CONST_INT_P (XEXP (x, 1)))
10101 	    {
10102 	      int insns = arm_gen_constant (code, SImode, NULL_RTX,
10103 					    INTVAL (XEXP (x, 1)), NULL_RTX,
10104 					    NULL_RTX, 1, 0);
10105 
10106 	      *cost = COSTS_N_INSNS (insns);
10107 	      if (speed_p)
10108 		*cost += insns * extra_cost->alu.logical;
10109 	      *cost += rtx_cost (op0, mode, code, 0, speed_p);
10110 	      return true;
10111 	    }
10112 
10113 	  if (speed_p)
10114 	    *cost += extra_cost->alu.logical;
10115 	  *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10116 		    + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10117 	  return true;
10118 	}
10119 
10120       if (mode == DImode)
10121 	{
10122 	  rtx op0 = XEXP (x, 0);
10123 	  enum rtx_code subcode = GET_CODE (op0);
10124 
10125 	  *cost += COSTS_N_INSNS (1);
10126 
10127 	  if (subcode == NOT
10128 	      && (code == AND
10129 		  || (code == IOR && TARGET_THUMB2)))
10130 	    op0 = XEXP (op0, 0);
10131 
10132 	  if (GET_CODE (op0) == ZERO_EXTEND)
10133 	    {
10134 	      if (speed_p)
10135 		*cost += 2 * extra_cost->alu.logical;
10136 
10137 	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10138 				  0, speed_p)
10139 			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10140 	      return true;
10141 	    }
10142 	  else if (GET_CODE (op0) == SIGN_EXTEND)
10143 	    {
10144 	      if (speed_p)
10145 		*cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10146 
10147 	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10148 				  0, speed_p)
10149 			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10150 	      return true;
10151 	    }
10152 
10153 	  if (speed_p)
10154 	    *cost += 2 * extra_cost->alu.logical;
10155 
10156 	  return true;
10157 	}
10158       /* Vector mode?  */
10159 
10160       *cost = LIBCALL_COST (2);
10161       return false;
10162 
10163     case MULT:
10164       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10165 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10166 	{
10167 	  rtx op0 = XEXP (x, 0);
10168 
10169 	  if (GET_CODE (op0) == NEG && !flag_rounding_math)
10170 	    op0 = XEXP (op0, 0);
10171 
10172 	  if (speed_p)
10173 	    *cost += extra_cost->fp[mode != SFmode].mult;
10174 
10175 	  *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10176 		    + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10177 	  return true;
10178 	}
10179       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10180 	{
10181 	  *cost = LIBCALL_COST (2);
10182 	  return false;
10183 	}
10184 
10185       if (mode == SImode)
10186 	{
10187 	  if (TARGET_DSP_MULTIPLY
10188 	      && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10189 		   && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10190 		       || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10191 			   && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10192 			   && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10193 		  || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10194 		      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10195 		      && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10196 		      && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10197 			  || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10198 			      && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10199 			      && (INTVAL (XEXP (XEXP (x, 1), 1))
10200 				  == 16))))))
10201 	    {
10202 	      /* SMUL[TB][TB].  */
10203 	      if (speed_p)
10204 		*cost += extra_cost->mult[0].extend;
10205 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10206 				 SIGN_EXTEND, 0, speed_p);
10207 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10208 				 SIGN_EXTEND, 1, speed_p);
10209 	      return true;
10210 	    }
10211 	  if (speed_p)
10212 	    *cost += extra_cost->mult[0].simple;
10213 	  return false;
10214 	}
10215 
10216       if (mode == DImode)
10217 	{
10218 	  if (arm_arch3m
10219 	      && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10220 		   && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10221 		  || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10222 		      && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10223 	    {
10224 	      if (speed_p)
10225 		*cost += extra_cost->mult[1].extend;
10226 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10227 				  ZERO_EXTEND, 0, speed_p)
10228 			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10229 				    ZERO_EXTEND, 0, speed_p));
10230 	      return true;
10231 	    }
10232 
10233 	  *cost = LIBCALL_COST (2);
10234 	  return false;
10235 	}
10236 
10237       /* Vector mode?  */
10238       *cost = LIBCALL_COST (2);
10239       return false;
10240 
10241     case NEG:
10242       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10243 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10244 	{
10245 	  if (GET_CODE (XEXP (x, 0)) == MULT)
10246 	    {
10247 	      /* VNMUL.  */
10248 	      *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10249 	      return true;
10250 	    }
10251 
10252 	  if (speed_p)
10253 	    *cost += extra_cost->fp[mode != SFmode].neg;
10254 
10255 	  return false;
10256 	}
10257       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10258 	{
10259 	  *cost = LIBCALL_COST (1);
10260 	  return false;
10261 	}
10262 
10263       if (mode == SImode)
10264 	{
10265 	  if (GET_CODE (XEXP (x, 0)) == ABS)
10266 	    {
10267 	      *cost += COSTS_N_INSNS (1);
10268 	      /* Assume the non-flag-changing variant.  */
10269 	      if (speed_p)
10270 		*cost += (extra_cost->alu.log_shift
10271 			  + extra_cost->alu.arith_shift);
10272 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10273 	      return true;
10274 	    }
10275 
10276 	  if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10277 	      || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10278 	    {
10279 	      *cost += COSTS_N_INSNS (1);
10280 	      /* No extra cost for MOV imm and MVN imm.  */
10281 	      /* If the comparison op is using the flags, there's no further
10282 		 cost, otherwise we need to add the cost of the comparison.  */
10283 	      if (!(REG_P (XEXP (XEXP (x, 0), 0))
10284 		    && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10285 		    && XEXP (XEXP (x, 0), 1) == const0_rtx))
10286 		{
10287 		  mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10288 		  *cost += (COSTS_N_INSNS (1)
10289 			    + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10290 					0, speed_p)
10291 			    + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10292 					1, speed_p));
10293 		  if (speed_p)
10294 		    *cost += extra_cost->alu.arith;
10295 		}
10296 	      return true;
10297 	    }
10298 
10299 	  if (speed_p)
10300 	    *cost += extra_cost->alu.arith;
10301 	  return false;
10302 	}
10303 
10304       if (GET_MODE_CLASS (mode) == MODE_INT
10305 	  && GET_MODE_SIZE (mode) < 4)
10306 	{
10307 	  /* Slightly disparage, as we might need an extend operation.  */
10308 	  *cost += 1;
10309 	  if (speed_p)
10310 	    *cost += extra_cost->alu.arith;
10311 	  return false;
10312 	}
10313 
10314       if (mode == DImode)
10315 	{
10316 	  *cost += COSTS_N_INSNS (1);
10317 	  if (speed_p)
10318 	    *cost += 2 * extra_cost->alu.arith;
10319 	  return false;
10320 	}
10321 
10322       /* Vector mode?  */
10323       *cost = LIBCALL_COST (1);
10324       return false;
10325 
10326     case NOT:
10327       if (mode == SImode)
10328 	{
10329 	  rtx shift_op;
10330 	  rtx shift_reg = NULL;
10331 
10332 	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10333 
10334 	  if (shift_op)
10335 	    {
10336 	      if (shift_reg != NULL)
10337 		{
10338 		  if (speed_p)
10339 		    *cost += extra_cost->alu.log_shift_reg;
10340 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10341 		}
10342 	      else if (speed_p)
10343 		*cost += extra_cost->alu.log_shift;
10344 	      *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10345 	      return true;
10346 	    }
10347 
10348 	  if (speed_p)
10349 	    *cost += extra_cost->alu.logical;
10350 	  return false;
10351 	}
10352       if (mode == DImode)
10353 	{
10354 	  *cost += COSTS_N_INSNS (1);
10355 	  return false;
10356 	}
10357 
10358       /* Vector mode?  */
10359 
10360       *cost += LIBCALL_COST (1);
10361       return false;
10362 
10363     case IF_THEN_ELSE:
10364       {
10365         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10366 	  {
10367 	    *cost += COSTS_N_INSNS (3);
10368 	    return true;
10369 	  }
10370 	int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10371 	int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10372 
10373 	*cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10374 	/* Assume that if one arm of the if_then_else is a register,
10375 	   that it will be tied with the result and eliminate the
10376 	   conditional insn.  */
10377 	if (REG_P (XEXP (x, 1)))
10378 	  *cost += op2cost;
10379 	else if (REG_P (XEXP (x, 2)))
10380 	  *cost += op1cost;
10381 	else
10382 	  {
10383 	    if (speed_p)
10384 	      {
10385 		if (extra_cost->alu.non_exec_costs_exec)
10386 		  *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10387 		else
10388 		  *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10389 	      }
10390 	    else
10391 	      *cost += op1cost + op2cost;
10392 	  }
10393       }
10394       return true;
10395 
10396     case COMPARE:
10397       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10398 	*cost = 0;
10399       else
10400 	{
10401 	  machine_mode op0mode;
10402 	  /* We'll mostly assume that the cost of a compare is the cost of the
10403 	     LHS.  However, there are some notable exceptions.  */
10404 
10405 	  /* Floating point compares are never done as side-effects.  */
10406 	  op0mode = GET_MODE (XEXP (x, 0));
10407 	  if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10408 	      && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10409 	    {
10410 	      if (speed_p)
10411 		*cost += extra_cost->fp[op0mode != SFmode].compare;
10412 
10413 	      if (XEXP (x, 1) == CONST0_RTX (op0mode))
10414 		{
10415 		  *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10416 		  return true;
10417 		}
10418 
10419 	      return false;
10420 	    }
10421 	  else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10422 	    {
10423 	      *cost = LIBCALL_COST (2);
10424 	      return false;
10425 	    }
10426 
10427 	  /* DImode compares normally take two insns.  */
10428 	  if (op0mode == DImode)
10429 	    {
10430 	      *cost += COSTS_N_INSNS (1);
10431 	      if (speed_p)
10432 		*cost += 2 * extra_cost->alu.arith;
10433 	      return false;
10434 	    }
10435 
10436 	  if (op0mode == SImode)
10437 	    {
10438 	      rtx shift_op;
10439 	      rtx shift_reg;
10440 
10441 	      if (XEXP (x, 1) == const0_rtx
10442 		  && !(REG_P (XEXP (x, 0))
10443 		       || (GET_CODE (XEXP (x, 0)) == SUBREG
10444 			   && REG_P (SUBREG_REG (XEXP (x, 0))))))
10445 		{
10446 		  *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10447 
10448 		  /* Multiply operations that set the flags are often
10449 		     significantly more expensive.  */
10450 		  if (speed_p
10451 		      && GET_CODE (XEXP (x, 0)) == MULT
10452 		      && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10453 		    *cost += extra_cost->mult[0].flag_setting;
10454 
10455 		  if (speed_p
10456 		      && GET_CODE (XEXP (x, 0)) == PLUS
10457 		      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10458 		      && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10459 							    0), 1), mode))
10460 		    *cost += extra_cost->mult[0].flag_setting;
10461 		  return true;
10462 		}
10463 
10464 	      shift_reg = NULL;
10465 	      shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10466 	      if (shift_op != NULL)
10467 		{
10468 		  if (shift_reg != NULL)
10469 		    {
10470 		      *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10471 					 1, speed_p);
10472 		      if (speed_p)
10473 			*cost += extra_cost->alu.arith_shift_reg;
10474 		    }
10475 		  else if (speed_p)
10476 		    *cost += extra_cost->alu.arith_shift;
10477 		  *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10478 		  *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10479 		  return true;
10480 		}
10481 
10482 	      if (speed_p)
10483 		*cost += extra_cost->alu.arith;
10484 	      if (CONST_INT_P (XEXP (x, 1))
10485 		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10486 		{
10487 		  *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10488 		  return true;
10489 		}
10490 	      return false;
10491 	    }
10492 
10493 	  /* Vector mode?  */
10494 
10495 	  *cost = LIBCALL_COST (2);
10496 	  return false;
10497 	}
10498       return true;
10499 
10500     case EQ:
10501     case NE:
10502     case LT:
10503     case LE:
10504     case GT:
10505     case GE:
10506     case LTU:
10507     case LEU:
10508     case GEU:
10509     case GTU:
10510     case ORDERED:
10511     case UNORDERED:
10512     case UNEQ:
10513     case UNLE:
10514     case UNLT:
10515     case UNGE:
10516     case UNGT:
10517     case LTGT:
10518       if (outer_code == SET)
10519 	{
10520 	  /* Is it a store-flag operation?  */
10521 	  if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10522 	      && XEXP (x, 1) == const0_rtx)
10523 	    {
10524 	      /* Thumb also needs an IT insn.  */
10525 	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10526 	      return true;
10527 	    }
10528 	  if (XEXP (x, 1) == const0_rtx)
10529 	    {
10530 	      switch (code)
10531 		{
10532 		case LT:
10533 		  /* LSR Rd, Rn, #31.  */
10534 		  if (speed_p)
10535 		    *cost += extra_cost->alu.shift;
10536 		  break;
10537 
10538 		case EQ:
10539 		  /* RSBS T1, Rn, #0
10540 		     ADC  Rd, Rn, T1.  */
10541 
10542 		case NE:
10543 		  /* SUBS T1, Rn, #1
10544 		     SBC  Rd, Rn, T1.  */
10545 		  *cost += COSTS_N_INSNS (1);
10546 		  break;
10547 
10548 		case LE:
10549 		  /* RSBS T1, Rn, Rn, LSR #31
10550 		     ADC  Rd, Rn, T1. */
10551 		  *cost += COSTS_N_INSNS (1);
10552 		  if (speed_p)
10553 		    *cost += extra_cost->alu.arith_shift;
10554 		  break;
10555 
10556 		case GT:
10557 		  /* RSB  Rd, Rn, Rn, ASR #1
10558 		     LSR  Rd, Rd, #31.  */
10559 		  *cost += COSTS_N_INSNS (1);
10560 		  if (speed_p)
10561 		    *cost += (extra_cost->alu.arith_shift
10562 			      + extra_cost->alu.shift);
10563 		  break;
10564 
10565 		case GE:
10566 		  /* ASR  Rd, Rn, #31
10567 		     ADD  Rd, Rn, #1.  */
10568 		  *cost += COSTS_N_INSNS (1);
10569 		  if (speed_p)
10570 		    *cost += extra_cost->alu.shift;
10571 		  break;
10572 
10573 		default:
10574 		  /* Remaining cases are either meaningless or would take
10575 		     three insns anyway.  */
10576 		  *cost = COSTS_N_INSNS (3);
10577 		  break;
10578 		}
10579 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10580 	      return true;
10581 	    }
10582 	  else
10583 	    {
10584 	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10585 	      if (CONST_INT_P (XEXP (x, 1))
10586 		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10587 		{
10588 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10589 		  return true;
10590 		}
10591 
10592 	      return false;
10593 	    }
10594 	}
10595       /* Not directly inside a set.  If it involves the condition code
10596 	 register it must be the condition for a branch, cond_exec or
10597 	 I_T_E operation.  Since the comparison is performed elsewhere
10598 	 this is just the control part which has no additional
10599 	 cost.  */
10600       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10601 	       && XEXP (x, 1) == const0_rtx)
10602 	{
10603 	  *cost = 0;
10604 	  return true;
10605 	}
10606       return false;
10607 
10608     case ABS:
10609       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10610 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10611 	{
10612 	  if (speed_p)
10613 	    *cost += extra_cost->fp[mode != SFmode].neg;
10614 
10615 	  return false;
10616 	}
10617       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10618 	{
10619 	  *cost = LIBCALL_COST (1);
10620 	  return false;
10621 	}
10622 
10623       if (mode == SImode)
10624 	{
10625 	  if (speed_p)
10626 	    *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10627 	  return false;
10628 	}
10629       /* Vector mode?  */
10630       *cost = LIBCALL_COST (1);
10631       return false;
10632 
10633     case SIGN_EXTEND:
10634       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10635 	  && MEM_P (XEXP (x, 0)))
10636 	{
10637 	  if (mode == DImode)
10638 	    *cost += COSTS_N_INSNS (1);
10639 
10640 	  if (!speed_p)
10641 	    return true;
10642 
10643 	  if (GET_MODE (XEXP (x, 0)) == SImode)
10644 	    *cost += extra_cost->ldst.load;
10645 	  else
10646 	    *cost += extra_cost->ldst.load_sign_extend;
10647 
10648 	  if (mode == DImode)
10649 	    *cost += extra_cost->alu.shift;
10650 
10651 	  return true;
10652 	}
10653 
10654       /* Widening from less than 32-bits requires an extend operation.  */
10655       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10656 	{
10657 	  /* We have SXTB/SXTH.  */
10658 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10659 	  if (speed_p)
10660 	    *cost += extra_cost->alu.extend;
10661 	}
10662       else if (GET_MODE (XEXP (x, 0)) != SImode)
10663 	{
10664 	  /* Needs two shifts.  */
10665 	  *cost += COSTS_N_INSNS (1);
10666 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10667 	  if (speed_p)
10668 	    *cost += 2 * extra_cost->alu.shift;
10669 	}
10670 
10671       /* Widening beyond 32-bits requires one more insn.  */
10672       if (mode == DImode)
10673 	{
10674 	  *cost += COSTS_N_INSNS (1);
10675 	  if (speed_p)
10676 	    *cost += extra_cost->alu.shift;
10677 	}
10678 
10679       return true;
10680 
10681     case ZERO_EXTEND:
10682       if ((arm_arch4
10683 	   || GET_MODE (XEXP (x, 0)) == SImode
10684 	   || GET_MODE (XEXP (x, 0)) == QImode)
10685 	  && MEM_P (XEXP (x, 0)))
10686 	{
10687 	  *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10688 
10689 	  if (mode == DImode)
10690 	    *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10691 
10692 	  return true;
10693 	}
10694 
10695       /* Widening from less than 32-bits requires an extend operation.  */
10696       if (GET_MODE (XEXP (x, 0)) == QImode)
10697 	{
10698 	  /* UXTB can be a shorter instruction in Thumb2, but it might
10699 	     be slower than the AND Rd, Rn, #255 alternative.  When
10700 	     optimizing for speed it should never be slower to use
10701 	     AND, and we don't really model 16-bit vs 32-bit insns
10702 	     here.  */
10703 	  if (speed_p)
10704 	    *cost += extra_cost->alu.logical;
10705 	}
10706       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10707 	{
10708 	  /* We have UXTB/UXTH.  */
10709 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10710 	  if (speed_p)
10711 	    *cost += extra_cost->alu.extend;
10712 	}
10713       else if (GET_MODE (XEXP (x, 0)) != SImode)
10714 	{
10715 	  /* Needs two shifts.  It's marginally preferable to use
10716 	     shifts rather than two BIC instructions as the second
10717 	     shift may merge with a subsequent insn as a shifter
10718 	     op.  */
10719 	  *cost = COSTS_N_INSNS (2);
10720 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10721 	  if (speed_p)
10722 	    *cost += 2 * extra_cost->alu.shift;
10723 	}
10724 
10725       /* Widening beyond 32-bits requires one more insn.  */
10726       if (mode == DImode)
10727 	{
10728 	  *cost += COSTS_N_INSNS (1);	/* No speed penalty.  */
10729 	}
10730 
10731       return true;
10732 
10733     case CONST_INT:
10734       *cost = 0;
10735       /* CONST_INT has no mode, so we cannot tell for sure how many
10736 	 insns are really going to be needed.  The best we can do is
10737 	 look at the value passed.  If it fits in SImode, then assume
10738 	 that's the mode it will be used for.  Otherwise assume it
10739 	 will be used in DImode.  */
10740       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10741 	mode = SImode;
10742       else
10743 	mode = DImode;
10744 
10745       /* Avoid blowing up in arm_gen_constant ().  */
10746       if (!(outer_code == PLUS
10747 	    || outer_code == AND
10748 	    || outer_code == IOR
10749 	    || outer_code == XOR
10750 	    || outer_code == MINUS))
10751 	outer_code = SET;
10752 
10753     const_int_cost:
10754       if (mode == SImode)
10755 	{
10756 	  *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10757 						    INTVAL (x), NULL, NULL,
10758 						    0, 0));
10759 	  /* Extra costs?  */
10760 	}
10761       else
10762 	{
10763 	  *cost += COSTS_N_INSNS (arm_gen_constant
10764 				  (outer_code, SImode, NULL,
10765 				   trunc_int_for_mode (INTVAL (x), SImode),
10766 				   NULL, NULL, 0, 0)
10767 				  + arm_gen_constant (outer_code, SImode, NULL,
10768 						      INTVAL (x) >> 32, NULL,
10769 						      NULL, 0, 0));
10770 	  /* Extra costs?  */
10771 	}
10772 
10773       return true;
10774 
10775     case CONST:
10776     case LABEL_REF:
10777     case SYMBOL_REF:
10778       if (speed_p)
10779 	{
10780 	  if (arm_arch_thumb2 && !flag_pic)
10781 	    *cost += COSTS_N_INSNS (1);
10782 	  else
10783 	    *cost += extra_cost->ldst.load;
10784 	}
10785       else
10786 	*cost += COSTS_N_INSNS (1);
10787 
10788       if (flag_pic)
10789 	{
10790 	  *cost += COSTS_N_INSNS (1);
10791 	  if (speed_p)
10792 	    *cost += extra_cost->alu.arith;
10793 	}
10794 
10795       return true;
10796 
10797     case CONST_FIXED:
10798       *cost = COSTS_N_INSNS (4);
10799       /* Fixme.  */
10800       return true;
10801 
10802     case CONST_DOUBLE:
10803       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10804 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10805 	{
10806 	  if (vfp3_const_double_rtx (x))
10807 	    {
10808 	      if (speed_p)
10809 		*cost += extra_cost->fp[mode == DFmode].fpconst;
10810 	      return true;
10811 	    }
10812 
10813 	  if (speed_p)
10814 	    {
10815 	      if (mode == DFmode)
10816 		*cost += extra_cost->ldst.loadd;
10817 	      else
10818 		*cost += extra_cost->ldst.loadf;
10819 	    }
10820 	  else
10821 	    *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10822 
10823 	  return true;
10824 	}
10825       *cost = COSTS_N_INSNS (4);
10826       return true;
10827 
10828     case CONST_VECTOR:
10829       /* Fixme.  */
10830       if (TARGET_NEON
10831 	  && TARGET_HARD_FLOAT
10832 	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10833 	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10834 	*cost = COSTS_N_INSNS (1);
10835       else
10836 	*cost = COSTS_N_INSNS (4);
10837       return true;
10838 
10839     case HIGH:
10840     case LO_SUM:
10841       /* When optimizing for size, we prefer constant pool entries to
10842 	 MOVW/MOVT pairs, so bump the cost of these slightly.  */
10843       if (!speed_p)
10844 	*cost += 1;
10845       return true;
10846 
10847     case CLZ:
10848       if (speed_p)
10849 	*cost += extra_cost->alu.clz;
10850       return false;
10851 
10852     case SMIN:
10853       if (XEXP (x, 1) == const0_rtx)
10854 	{
10855 	  if (speed_p)
10856 	    *cost += extra_cost->alu.log_shift;
10857 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10858 	  return true;
10859 	}
10860       /* Fall through.  */
10861     case SMAX:
10862     case UMIN:
10863     case UMAX:
10864       *cost += COSTS_N_INSNS (1);
10865       return false;
10866 
10867     case TRUNCATE:
10868       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10869 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10870 	  && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10871 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10872 	  && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10873 	       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10874 	      || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10875 		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10876 		      == ZERO_EXTEND))))
10877 	{
10878 	  if (speed_p)
10879 	    *cost += extra_cost->mult[1].extend;
10880 	  *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10881 			      ZERO_EXTEND, 0, speed_p)
10882 		    + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10883 				ZERO_EXTEND, 0, speed_p));
10884 	  return true;
10885 	}
10886       *cost = LIBCALL_COST (1);
10887       return false;
10888 
10889     case UNSPEC_VOLATILE:
10890     case UNSPEC:
10891       return arm_unspec_cost (x, outer_code, speed_p, cost);
10892 
10893     case PC:
10894       /* Reading the PC is like reading any other register.  Writing it
10895 	 is more expensive, but we take that into account elsewhere.  */
10896       *cost = 0;
10897       return true;
10898 
10899     case ZERO_EXTRACT:
10900       /* TODO: Simple zero_extract of bottom bits using AND.  */
10901       /* Fall through.  */
10902     case SIGN_EXTRACT:
10903       if (arm_arch6
10904 	  && mode == SImode
10905 	  && CONST_INT_P (XEXP (x, 1))
10906 	  && CONST_INT_P (XEXP (x, 2)))
10907 	{
10908 	  if (speed_p)
10909 	    *cost += extra_cost->alu.bfx;
10910 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10911 	  return true;
10912 	}
10913       /* Without UBFX/SBFX, need to resort to shift operations.  */
10914       *cost += COSTS_N_INSNS (1);
10915       if (speed_p)
10916 	*cost += 2 * extra_cost->alu.shift;
10917       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10918       return true;
10919 
10920     case FLOAT_EXTEND:
10921       if (TARGET_HARD_FLOAT)
10922 	{
10923 	  if (speed_p)
10924 	    *cost += extra_cost->fp[mode == DFmode].widen;
10925 	  if (!TARGET_VFP5
10926 	      && GET_MODE (XEXP (x, 0)) == HFmode)
10927 	    {
10928 	      /* Pre v8, widening HF->DF is a two-step process, first
10929 	         widening to SFmode.  */
10930 	      *cost += COSTS_N_INSNS (1);
10931 	      if (speed_p)
10932 		*cost += extra_cost->fp[0].widen;
10933 	    }
10934 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10935 	  return true;
10936 	}
10937 
10938       *cost = LIBCALL_COST (1);
10939       return false;
10940 
10941     case FLOAT_TRUNCATE:
10942       if (TARGET_HARD_FLOAT)
10943 	{
10944 	  if (speed_p)
10945 	    *cost += extra_cost->fp[mode == DFmode].narrow;
10946 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10947 	  return true;
10948 	  /* Vector modes?  */
10949 	}
10950       *cost = LIBCALL_COST (1);
10951       return false;
10952 
10953     case FMA:
10954       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10955         {
10956           rtx op0 = XEXP (x, 0);
10957           rtx op1 = XEXP (x, 1);
10958           rtx op2 = XEXP (x, 2);
10959 
10960 
10961           /* vfms or vfnma.  */
10962           if (GET_CODE (op0) == NEG)
10963             op0 = XEXP (op0, 0);
10964 
10965           /* vfnms or vfnma.  */
10966           if (GET_CODE (op2) == NEG)
10967             op2 = XEXP (op2, 0);
10968 
10969           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10970           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10971           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10972 
10973           if (speed_p)
10974             *cost += extra_cost->fp[mode ==DFmode].fma;
10975 
10976           return true;
10977         }
10978 
10979       *cost = LIBCALL_COST (3);
10980       return false;
10981 
10982     case FIX:
10983     case UNSIGNED_FIX:
10984       if (TARGET_HARD_FLOAT)
10985 	{
10986 	  /* The *combine_vcvtf2i reduces a vmul+vcvt into
10987 	     a vcvt fixed-point conversion.  */
10988 	  if (code == FIX && mode == SImode
10989 	      && GET_CODE (XEXP (x, 0)) == FIX
10990 	      && GET_MODE (XEXP (x, 0)) == SFmode
10991 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10992 	      && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10993 		 > 0)
10994 	    {
10995 	      if (speed_p)
10996 		*cost += extra_cost->fp[0].toint;
10997 
10998 	      *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10999 				 code, 0, speed_p);
11000 	      return true;
11001 	    }
11002 
11003 	  if (GET_MODE_CLASS (mode) == MODE_INT)
11004 	    {
11005 	      mode = GET_MODE (XEXP (x, 0));
11006 	      if (speed_p)
11007 		*cost += extra_cost->fp[mode == DFmode].toint;
11008 	      /* Strip of the 'cost' of rounding towards zero.  */
11009 	      if (GET_CODE (XEXP (x, 0)) == FIX)
11010 		*cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11011 				   0, speed_p);
11012 	      else
11013 		*cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11014 	      /* ??? Increase the cost to deal with transferring from
11015 		 FP -> CORE registers?  */
11016 	      return true;
11017 	    }
11018 	  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11019 		   && TARGET_VFP5)
11020 	    {
11021 	      if (speed_p)
11022 		*cost += extra_cost->fp[mode == DFmode].roundint;
11023 	      return false;
11024 	    }
11025 	  /* Vector costs? */
11026 	}
11027       *cost = LIBCALL_COST (1);
11028       return false;
11029 
11030     case FLOAT:
11031     case UNSIGNED_FLOAT:
11032       if (TARGET_HARD_FLOAT)
11033 	{
11034 	  /* ??? Increase the cost to deal with transferring from CORE
11035 	     -> FP registers?  */
11036 	  if (speed_p)
11037 	    *cost += extra_cost->fp[mode == DFmode].fromint;
11038 	  return false;
11039 	}
11040       *cost = LIBCALL_COST (1);
11041       return false;
11042 
11043     case CALL:
11044       return true;
11045 
11046     case ASM_OPERANDS:
11047       {
11048       /* Just a guess.  Guess number of instructions in the asm
11049          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11050          though (see PR60663).  */
11051         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11052         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11053 
11054         *cost = COSTS_N_INSNS (asm_length + num_operands);
11055         return true;
11056       }
11057     default:
11058       if (mode != VOIDmode)
11059 	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11060       else
11061 	*cost = COSTS_N_INSNS (4); /* Who knows?  */
11062       return false;
11063     }
11064 }
11065 
11066 #undef HANDLE_NARROW_SHIFT_ARITH
11067 
11068 /* RTX costs entry point.  */
11069 
11070 static bool
11071 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11072 	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11073 {
11074   bool result;
11075   int code = GET_CODE (x);
11076   gcc_assert (current_tune->insn_extra_cost);
11077 
11078   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11079 				(enum rtx_code) outer_code,
11080 				current_tune->insn_extra_cost,
11081 				total, speed);
11082 
11083   if (dump_file && arm_verbose_cost)
11084     {
11085       print_rtl_single (dump_file, x);
11086       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11087 	       *total, result ? "final" : "partial");
11088     }
11089   return result;
11090 }
11091 
11092 /* All address computations that can be done are free, but rtx cost returns
11093    the same for practically all of them.  So we weight the different types
11094    of address here in the order (most pref first):
11095    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11096 static inline int
11097 arm_arm_address_cost (rtx x)
11098 {
11099   enum rtx_code c  = GET_CODE (x);
11100 
11101   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11102     return 0;
11103   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11104     return 10;
11105 
11106   if (c == PLUS)
11107     {
11108       if (CONST_INT_P (XEXP (x, 1)))
11109 	return 2;
11110 
11111       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11112 	return 3;
11113 
11114       return 4;
11115     }
11116 
11117   return 6;
11118 }
11119 
11120 static inline int
11121 arm_thumb_address_cost (rtx x)
11122 {
11123   enum rtx_code c  = GET_CODE (x);
11124 
11125   if (c == REG)
11126     return 1;
11127   if (c == PLUS
11128       && REG_P (XEXP (x, 0))
11129       && CONST_INT_P (XEXP (x, 1)))
11130     return 1;
11131 
11132   return 2;
11133 }
11134 
11135 static int
11136 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11137 		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11138 {
11139   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11140 }
11141 
11142 /* Adjust cost hook for XScale.  */
11143 static bool
11144 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11145 			  int * cost)
11146 {
11147   /* Some true dependencies can have a higher cost depending
11148      on precisely how certain input operands are used.  */
11149   if (dep_type == 0
11150       && recog_memoized (insn) >= 0
11151       && recog_memoized (dep) >= 0)
11152     {
11153       int shift_opnum = get_attr_shift (insn);
11154       enum attr_type attr_type = get_attr_type (dep);
11155 
11156       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11157 	 operand for INSN.  If we have a shifted input operand and the
11158 	 instruction we depend on is another ALU instruction, then we may
11159 	 have to account for an additional stall.  */
11160       if (shift_opnum != 0
11161 	  && (attr_type == TYPE_ALU_SHIFT_IMM
11162 	      || attr_type == TYPE_ALUS_SHIFT_IMM
11163 	      || attr_type == TYPE_LOGIC_SHIFT_IMM
11164 	      || attr_type == TYPE_LOGICS_SHIFT_IMM
11165 	      || attr_type == TYPE_ALU_SHIFT_REG
11166 	      || attr_type == TYPE_ALUS_SHIFT_REG
11167 	      || attr_type == TYPE_LOGIC_SHIFT_REG
11168 	      || attr_type == TYPE_LOGICS_SHIFT_REG
11169 	      || attr_type == TYPE_MOV_SHIFT
11170 	      || attr_type == TYPE_MVN_SHIFT
11171 	      || attr_type == TYPE_MOV_SHIFT_REG
11172 	      || attr_type == TYPE_MVN_SHIFT_REG))
11173 	{
11174 	  rtx shifted_operand;
11175 	  int opno;
11176 
11177 	  /* Get the shifted operand.  */
11178 	  extract_insn (insn);
11179 	  shifted_operand = recog_data.operand[shift_opnum];
11180 
11181 	  /* Iterate over all the operands in DEP.  If we write an operand
11182 	     that overlaps with SHIFTED_OPERAND, then we have increase the
11183 	     cost of this dependency.  */
11184 	  extract_insn (dep);
11185 	  preprocess_constraints (dep);
11186 	  for (opno = 0; opno < recog_data.n_operands; opno++)
11187 	    {
11188 	      /* We can ignore strict inputs.  */
11189 	      if (recog_data.operand_type[opno] == OP_IN)
11190 		continue;
11191 
11192 	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
11193 					   shifted_operand))
11194 		{
11195 		  *cost = 2;
11196 		  return false;
11197 		}
11198 	    }
11199 	}
11200     }
11201   return true;
11202 }
11203 
11204 /* Adjust cost hook for Cortex A9.  */
11205 static bool
11206 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11207 			     int * cost)
11208 {
11209   switch (dep_type)
11210     {
11211     case REG_DEP_ANTI:
11212       *cost = 0;
11213       return false;
11214 
11215     case REG_DEP_TRUE:
11216     case REG_DEP_OUTPUT:
11217 	if (recog_memoized (insn) >= 0
11218 	    && recog_memoized (dep) >= 0)
11219 	  {
11220 	    if (GET_CODE (PATTERN (insn)) == SET)
11221 	      {
11222 		if (GET_MODE_CLASS
11223 		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11224 		  || GET_MODE_CLASS
11225 		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11226 		  {
11227 		    enum attr_type attr_type_insn = get_attr_type (insn);
11228 		    enum attr_type attr_type_dep = get_attr_type (dep);
11229 
11230 		    /* By default all dependencies of the form
11231 		       s0 = s0 <op> s1
11232 		       s0 = s0 <op> s2
11233 		       have an extra latency of 1 cycle because
11234 		       of the input and output dependency in this
11235 		       case. However this gets modeled as an true
11236 		       dependency and hence all these checks.  */
11237 		    if (REG_P (SET_DEST (PATTERN (insn)))
11238 			&& reg_set_p (SET_DEST (PATTERN (insn)), dep))
11239 		      {
11240 			/* FMACS is a special case where the dependent
11241 			   instruction can be issued 3 cycles before
11242 			   the normal latency in case of an output
11243 			   dependency.  */
11244 			if ((attr_type_insn == TYPE_FMACS
11245 			     || attr_type_insn == TYPE_FMACD)
11246 			    && (attr_type_dep == TYPE_FMACS
11247 				|| attr_type_dep == TYPE_FMACD))
11248 			  {
11249 			    if (dep_type == REG_DEP_OUTPUT)
11250 			      *cost = insn_default_latency (dep) - 3;
11251 			    else
11252 			      *cost = insn_default_latency (dep);
11253 			    return false;
11254 			  }
11255 			else
11256 			  {
11257 			    if (dep_type == REG_DEP_OUTPUT)
11258 			      *cost = insn_default_latency (dep) + 1;
11259 			    else
11260 			      *cost = insn_default_latency (dep);
11261 			  }
11262 			return false;
11263 		      }
11264 		  }
11265 	      }
11266 	  }
11267 	break;
11268 
11269     default:
11270       gcc_unreachable ();
11271     }
11272 
11273   return true;
11274 }
11275 
11276 /* Adjust cost hook for FA726TE.  */
11277 static bool
11278 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11279 			   int * cost)
11280 {
11281   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11282      have penalty of 3.  */
11283   if (dep_type == REG_DEP_TRUE
11284       && recog_memoized (insn) >= 0
11285       && recog_memoized (dep) >= 0
11286       && get_attr_conds (dep) == CONDS_SET)
11287     {
11288       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11289       if (get_attr_conds (insn) == CONDS_USE
11290           && get_attr_type (insn) != TYPE_BRANCH)
11291         {
11292           *cost = 3;
11293           return false;
11294         }
11295 
11296       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11297           || get_attr_conds (insn) == CONDS_USE)
11298         {
11299           *cost = 0;
11300           return false;
11301         }
11302     }
11303 
11304   return true;
11305 }
11306 
11307 /* Implement TARGET_REGISTER_MOVE_COST.
11308 
11309    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11310    it is typically more expensive than a single memory access.  We set
11311    the cost to less than two memory accesses so that floating
11312    point to integer conversion does not go through memory.  */
11313 
11314 int
11315 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11316 			reg_class_t from, reg_class_t to)
11317 {
11318   if (TARGET_32BIT)
11319     {
11320       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11321 	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11322 	return 15;
11323       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11324 	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11325 	return 4;
11326       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11327 	return 20;
11328       else
11329 	return 2;
11330     }
11331   else
11332     {
11333       if (from == HI_REGS || to == HI_REGS)
11334 	return 4;
11335       else
11336 	return 2;
11337     }
11338 }
11339 
11340 /* Implement TARGET_MEMORY_MOVE_COST.  */
11341 
11342 int
11343 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11344 		      bool in ATTRIBUTE_UNUSED)
11345 {
11346   if (TARGET_32BIT)
11347     return 10;
11348   else
11349     {
11350       if (GET_MODE_SIZE (mode) < 4)
11351 	return 8;
11352       else
11353 	return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11354     }
11355 }
11356 
11357 /* Vectorizer cost model implementation.  */
11358 
11359 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11360 static int
11361 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11362 				tree vectype,
11363 				int misalign ATTRIBUTE_UNUSED)
11364 {
11365   unsigned elements;
11366 
11367   switch (type_of_cost)
11368     {
11369       case scalar_stmt:
11370         return current_tune->vec_costs->scalar_stmt_cost;
11371 
11372       case scalar_load:
11373         return current_tune->vec_costs->scalar_load_cost;
11374 
11375       case scalar_store:
11376         return current_tune->vec_costs->scalar_store_cost;
11377 
11378       case vector_stmt:
11379         return current_tune->vec_costs->vec_stmt_cost;
11380 
11381       case vector_load:
11382         return current_tune->vec_costs->vec_align_load_cost;
11383 
11384       case vector_store:
11385         return current_tune->vec_costs->vec_store_cost;
11386 
11387       case vec_to_scalar:
11388         return current_tune->vec_costs->vec_to_scalar_cost;
11389 
11390       case scalar_to_vec:
11391         return current_tune->vec_costs->scalar_to_vec_cost;
11392 
11393       case unaligned_load:
11394       case vector_gather_load:
11395         return current_tune->vec_costs->vec_unalign_load_cost;
11396 
11397       case unaligned_store:
11398       case vector_scatter_store:
11399         return current_tune->vec_costs->vec_unalign_store_cost;
11400 
11401       case cond_branch_taken:
11402         return current_tune->vec_costs->cond_taken_branch_cost;
11403 
11404       case cond_branch_not_taken:
11405         return current_tune->vec_costs->cond_not_taken_branch_cost;
11406 
11407       case vec_perm:
11408       case vec_promote_demote:
11409         return current_tune->vec_costs->vec_stmt_cost;
11410 
11411       case vec_construct:
11412 	elements = TYPE_VECTOR_SUBPARTS (vectype);
11413 	return elements / 2 + 1;
11414 
11415       default:
11416         gcc_unreachable ();
11417     }
11418 }
11419 
11420 /* Implement targetm.vectorize.add_stmt_cost.  */
11421 
11422 static unsigned
11423 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11424 		   struct _stmt_vec_info *stmt_info, int misalign,
11425 		   enum vect_cost_model_location where)
11426 {
11427   unsigned *cost = (unsigned *) data;
11428   unsigned retval = 0;
11429 
11430   if (flag_vect_cost_model)
11431     {
11432       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11433       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11434 
11435       /* Statements in an inner loop relative to the loop being
11436 	 vectorized are weighted more heavily.  The value here is
11437 	 arbitrary and could potentially be improved with analysis.  */
11438       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11439 	count *= 50;  /* FIXME.  */
11440 
11441       retval = (unsigned) (count * stmt_cost);
11442       cost[where] += retval;
11443     }
11444 
11445   return retval;
11446 }
11447 
11448 /* Return true if and only if this insn can dual-issue only as older.  */
11449 static bool
11450 cortexa7_older_only (rtx_insn *insn)
11451 {
11452   if (recog_memoized (insn) < 0)
11453     return false;
11454 
11455   switch (get_attr_type (insn))
11456     {
11457     case TYPE_ALU_DSP_REG:
11458     case TYPE_ALU_SREG:
11459     case TYPE_ALUS_SREG:
11460     case TYPE_LOGIC_REG:
11461     case TYPE_LOGICS_REG:
11462     case TYPE_ADC_REG:
11463     case TYPE_ADCS_REG:
11464     case TYPE_ADR:
11465     case TYPE_BFM:
11466     case TYPE_REV:
11467     case TYPE_MVN_REG:
11468     case TYPE_SHIFT_IMM:
11469     case TYPE_SHIFT_REG:
11470     case TYPE_LOAD_BYTE:
11471     case TYPE_LOAD_4:
11472     case TYPE_STORE_4:
11473     case TYPE_FFARITHS:
11474     case TYPE_FADDS:
11475     case TYPE_FFARITHD:
11476     case TYPE_FADDD:
11477     case TYPE_FMOV:
11478     case TYPE_F_CVT:
11479     case TYPE_FCMPS:
11480     case TYPE_FCMPD:
11481     case TYPE_FCONSTS:
11482     case TYPE_FCONSTD:
11483     case TYPE_FMULS:
11484     case TYPE_FMACS:
11485     case TYPE_FMULD:
11486     case TYPE_FMACD:
11487     case TYPE_FDIVS:
11488     case TYPE_FDIVD:
11489     case TYPE_F_MRC:
11490     case TYPE_F_MRRC:
11491     case TYPE_F_FLAG:
11492     case TYPE_F_LOADS:
11493     case TYPE_F_STORES:
11494       return true;
11495     default:
11496       return false;
11497     }
11498 }
11499 
11500 /* Return true if and only if this insn can dual-issue as younger.  */
11501 static bool
11502 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11503 {
11504   if (recog_memoized (insn) < 0)
11505     {
11506       if (verbose > 5)
11507         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11508       return false;
11509     }
11510 
11511   switch (get_attr_type (insn))
11512     {
11513     case TYPE_ALU_IMM:
11514     case TYPE_ALUS_IMM:
11515     case TYPE_LOGIC_IMM:
11516     case TYPE_LOGICS_IMM:
11517     case TYPE_EXTEND:
11518     case TYPE_MVN_IMM:
11519     case TYPE_MOV_IMM:
11520     case TYPE_MOV_REG:
11521     case TYPE_MOV_SHIFT:
11522     case TYPE_MOV_SHIFT_REG:
11523     case TYPE_BRANCH:
11524     case TYPE_CALL:
11525       return true;
11526     default:
11527       return false;
11528     }
11529 }
11530 
11531 
11532 /* Look for an instruction that can dual issue only as an older
11533    instruction, and move it in front of any instructions that can
11534    dual-issue as younger, while preserving the relative order of all
11535    other instructions in the ready list.  This is a hueuristic to help
11536    dual-issue in later cycles, by postponing issue of more flexible
11537    instructions.  This heuristic may affect dual issue opportunities
11538    in the current cycle.  */
11539 static void
11540 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11541 			int *n_readyp, int clock)
11542 {
11543   int i;
11544   int first_older_only = -1, first_younger = -1;
11545 
11546   if (verbose > 5)
11547     fprintf (file,
11548              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11549              clock,
11550              *n_readyp);
11551 
11552   /* Traverse the ready list from the head (the instruction to issue
11553      first), and looking for the first instruction that can issue as
11554      younger and the first instruction that can dual-issue only as
11555      older.  */
11556   for (i = *n_readyp - 1; i >= 0; i--)
11557     {
11558       rtx_insn *insn = ready[i];
11559       if (cortexa7_older_only (insn))
11560         {
11561           first_older_only = i;
11562           if (verbose > 5)
11563             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11564           break;
11565         }
11566       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11567         first_younger = i;
11568     }
11569 
11570   /* Nothing to reorder because either no younger insn found or insn
11571      that can dual-issue only as older appears before any insn that
11572      can dual-issue as younger.  */
11573   if (first_younger == -1)
11574     {
11575       if (verbose > 5)
11576         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11577       return;
11578     }
11579 
11580   /* Nothing to reorder because no older-only insn in the ready list.  */
11581   if (first_older_only == -1)
11582     {
11583       if (verbose > 5)
11584         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11585       return;
11586     }
11587 
11588   /* Move first_older_only insn before first_younger.  */
11589   if (verbose > 5)
11590     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11591              INSN_UID(ready [first_older_only]),
11592              INSN_UID(ready [first_younger]));
11593   rtx_insn *first_older_only_insn = ready [first_older_only];
11594   for (i = first_older_only; i < first_younger; i++)
11595     {
11596       ready[i] = ready[i+1];
11597     }
11598 
11599   ready[i] = first_older_only_insn;
11600   return;
11601 }
11602 
11603 /* Implement TARGET_SCHED_REORDER. */
11604 static int
11605 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11606                    int clock)
11607 {
11608   switch (arm_tune)
11609     {
11610     case TARGET_CPU_cortexa7:
11611       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11612       break;
11613     default:
11614       /* Do nothing for other cores.  */
11615       break;
11616     }
11617 
11618   return arm_issue_rate ();
11619 }
11620 
11621 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11622    It corrects the value of COST based on the relationship between
11623    INSN and DEP through the dependence LINK.  It returns the new
11624    value. There is a per-core adjust_cost hook to adjust scheduler costs
11625    and the per-core hook can choose to completely override the generic
11626    adjust_cost function. Only put bits of code into arm_adjust_cost that
11627    are common across all cores.  */
11628 static int
11629 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11630 		 unsigned int)
11631 {
11632   rtx i_pat, d_pat;
11633 
11634  /* When generating Thumb-1 code, we want to place flag-setting operations
11635     close to a conditional branch which depends on them, so that we can
11636     omit the comparison. */
11637   if (TARGET_THUMB1
11638       && dep_type == 0
11639       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11640       && recog_memoized (dep) >= 0
11641       && get_attr_conds (dep) == CONDS_SET)
11642     return 0;
11643 
11644   if (current_tune->sched_adjust_cost != NULL)
11645     {
11646       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11647 	return cost;
11648     }
11649 
11650   /* XXX Is this strictly true?  */
11651   if (dep_type == REG_DEP_ANTI
11652       || dep_type == REG_DEP_OUTPUT)
11653     return 0;
11654 
11655   /* Call insns don't incur a stall, even if they follow a load.  */
11656   if (dep_type == 0
11657       && CALL_P (insn))
11658     return 1;
11659 
11660   if ((i_pat = single_set (insn)) != NULL
11661       && MEM_P (SET_SRC (i_pat))
11662       && (d_pat = single_set (dep)) != NULL
11663       && MEM_P (SET_DEST (d_pat)))
11664     {
11665       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11666       /* This is a load after a store, there is no conflict if the load reads
11667 	 from a cached area.  Assume that loads from the stack, and from the
11668 	 constant pool are cached, and that others will miss.  This is a
11669 	 hack.  */
11670 
11671       if ((GET_CODE (src_mem) == SYMBOL_REF
11672 	   && CONSTANT_POOL_ADDRESS_P (src_mem))
11673 	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
11674 	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
11675 	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11676 	return 1;
11677     }
11678 
11679   return cost;
11680 }
11681 
11682 int
11683 arm_max_conditional_execute (void)
11684 {
11685   return max_insns_skipped;
11686 }
11687 
11688 static int
11689 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11690 {
11691   if (TARGET_32BIT)
11692     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11693   else
11694     return (optimize > 0) ? 2 : 0;
11695 }
11696 
11697 static int
11698 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11699 {
11700   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11701 }
11702 
11703 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11704    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11705    sequences of non-executed instructions in IT blocks probably take the same
11706    amount of time as executed instructions (and the IT instruction itself takes
11707    space in icache).  This function was experimentally determined to give good
11708    results on a popular embedded benchmark.  */
11709 
11710 static int
11711 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11712 {
11713   return (TARGET_32BIT && speed_p) ? 1
11714          : arm_default_branch_cost (speed_p, predictable_p);
11715 }
11716 
11717 static int
11718 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11719 {
11720   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11721 }
11722 
11723 static bool fp_consts_inited = false;
11724 
11725 static REAL_VALUE_TYPE value_fp0;
11726 
11727 static void
11728 init_fp_table (void)
11729 {
11730   REAL_VALUE_TYPE r;
11731 
11732   r = REAL_VALUE_ATOF ("0", DFmode);
11733   value_fp0 = r;
11734   fp_consts_inited = true;
11735 }
11736 
11737 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11738 int
11739 arm_const_double_rtx (rtx x)
11740 {
11741   const REAL_VALUE_TYPE *r;
11742 
11743   if (!fp_consts_inited)
11744     init_fp_table ();
11745 
11746   r = CONST_DOUBLE_REAL_VALUE (x);
11747   if (REAL_VALUE_MINUS_ZERO (*r))
11748     return 0;
11749 
11750   if (real_equal (r, &value_fp0))
11751     return 1;
11752 
11753   return 0;
11754 }
11755 
11756 /* VFPv3 has a fairly wide range of representable immediates, formed from
11757    "quarter-precision" floating-point values. These can be evaluated using this
11758    formula (with ^ for exponentiation):
11759 
11760      -1^s * n * 2^-r
11761 
11762    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11763    16 <= n <= 31 and 0 <= r <= 7.
11764 
11765    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11766 
11767      - A (most-significant) is the sign bit.
11768      - BCD are the exponent (encoded as r XOR 3).
11769      - EFGH are the mantissa (encoded as n - 16).
11770 */
11771 
11772 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11773    fconst[sd] instruction, or -1 if X isn't suitable.  */
11774 static int
11775 vfp3_const_double_index (rtx x)
11776 {
11777   REAL_VALUE_TYPE r, m;
11778   int sign, exponent;
11779   unsigned HOST_WIDE_INT mantissa, mant_hi;
11780   unsigned HOST_WIDE_INT mask;
11781   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11782   bool fail;
11783 
11784   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11785     return -1;
11786 
11787   r = *CONST_DOUBLE_REAL_VALUE (x);
11788 
11789   /* We can't represent these things, so detect them first.  */
11790   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11791     return -1;
11792 
11793   /* Extract sign, exponent and mantissa.  */
11794   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11795   r = real_value_abs (&r);
11796   exponent = REAL_EXP (&r);
11797   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11798      highest (sign) bit, with a fixed binary point at bit point_pos.
11799      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11800      bits for the mantissa, this may fail (low bits would be lost).  */
11801   real_ldexp (&m, &r, point_pos - exponent);
11802   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11803   mantissa = w.elt (0);
11804   mant_hi = w.elt (1);
11805 
11806   /* If there are bits set in the low part of the mantissa, we can't
11807      represent this value.  */
11808   if (mantissa != 0)
11809     return -1;
11810 
11811   /* Now make it so that mantissa contains the most-significant bits, and move
11812      the point_pos to indicate that the least-significant bits have been
11813      discarded.  */
11814   point_pos -= HOST_BITS_PER_WIDE_INT;
11815   mantissa = mant_hi;
11816 
11817   /* We can permit four significant bits of mantissa only, plus a high bit
11818      which is always 1.  */
11819   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11820   if ((mantissa & mask) != 0)
11821     return -1;
11822 
11823   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11824   mantissa >>= point_pos - 5;
11825 
11826   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11827      floating-point immediate zero with Neon using an integer-zero load, but
11828      that case is handled elsewhere.)  */
11829   if (mantissa == 0)
11830     return -1;
11831 
11832   gcc_assert (mantissa >= 16 && mantissa <= 31);
11833 
11834   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11835      normalized significands are in the range [1, 2). (Our mantissa is shifted
11836      left 4 places at this point relative to normalized IEEE754 values).  GCC
11837      internally uses [0.5, 1) (see real.c), so the exponent returned from
11838      REAL_EXP must be altered.  */
11839   exponent = 5 - exponent;
11840 
11841   if (exponent < 0 || exponent > 7)
11842     return -1;
11843 
11844   /* Sign, mantissa and exponent are now in the correct form to plug into the
11845      formula described in the comment above.  */
11846   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11847 }
11848 
11849 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11850 int
11851 vfp3_const_double_rtx (rtx x)
11852 {
11853   if (!TARGET_VFP3)
11854     return 0;
11855 
11856   return vfp3_const_double_index (x) != -1;
11857 }
11858 
11859 /* Recognize immediates which can be used in various Neon instructions. Legal
11860    immediates are described by the following table (for VMVN variants, the
11861    bitwise inverse of the constant shown is recognized. In either case, VMOV
11862    is output and the correct instruction to use for a given constant is chosen
11863    by the assembler). The constant shown is replicated across all elements of
11864    the destination vector.
11865 
11866    insn elems variant constant (binary)
11867    ---- ----- ------- -----------------
11868    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11869    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11870    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11871    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11872    vmov  i16     4    00000000 abcdefgh
11873    vmov  i16     5    abcdefgh 00000000
11874    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11875    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11876    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11877    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11878    vmvn  i16    10    00000000 abcdefgh
11879    vmvn  i16    11    abcdefgh 00000000
11880    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11881    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11882    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11883    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11884    vmov   i8    16    abcdefgh
11885    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11886                       eeeeeeee ffffffff gggggggg hhhhhhhh
11887    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11888    vmov  f32    19    00000000 00000000 00000000 00000000
11889 
11890    For case 18, B = !b. Representable values are exactly those accepted by
11891    vfp3_const_double_index, but are output as floating-point numbers rather
11892    than indices.
11893 
11894    For case 19, we will change it to vmov.i32 when assembling.
11895 
11896    Variants 0-5 (inclusive) may also be used as immediates for the second
11897    operand of VORR/VBIC instructions.
11898 
11899    The INVERSE argument causes the bitwise inverse of the given operand to be
11900    recognized instead (used for recognizing legal immediates for the VAND/VORN
11901    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11902    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11903    output, rather than the real insns vbic/vorr).
11904 
11905    INVERSE makes no difference to the recognition of float vectors.
11906 
11907    The return value is the variant of immediate as shown in the above table, or
11908    -1 if the given value doesn't match any of the listed patterns.
11909 */
11910 static int
11911 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11912 		      rtx *modconst, int *elementwidth)
11913 {
11914 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
11915   matches = 1;					\
11916   for (i = 0; i < idx; i += (STRIDE))		\
11917     if (!(TEST))				\
11918       matches = 0;				\
11919   if (matches)					\
11920     {						\
11921       immtype = (CLASS);			\
11922       elsize = (ELSIZE);			\
11923       break;					\
11924     }
11925 
11926   unsigned int i, elsize = 0, idx = 0, n_elts;
11927   unsigned int innersize;
11928   unsigned char bytes[16];
11929   int immtype = -1, matches;
11930   unsigned int invmask = inverse ? 0xff : 0;
11931   bool vector = GET_CODE (op) == CONST_VECTOR;
11932 
11933   if (vector)
11934     n_elts = CONST_VECTOR_NUNITS (op);
11935   else
11936     {
11937       n_elts = 1;
11938       gcc_assert (mode != VOIDmode);
11939     }
11940 
11941   innersize = GET_MODE_UNIT_SIZE (mode);
11942 
11943   /* Vectors of float constants.  */
11944   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11945     {
11946       rtx el0 = CONST_VECTOR_ELT (op, 0);
11947 
11948       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11949         return -1;
11950 
11951       /* FP16 vectors cannot be represented.  */
11952       if (GET_MODE_INNER (mode) == HFmode)
11953 	return -1;
11954 
11955       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11956 	 are distinct in this context.  */
11957       if (!const_vec_duplicate_p (op))
11958 	return -1;
11959 
11960       if (modconst)
11961         *modconst = CONST_VECTOR_ELT (op, 0);
11962 
11963       if (elementwidth)
11964         *elementwidth = 0;
11965 
11966       if (el0 == CONST0_RTX (GET_MODE (el0)))
11967 	return 19;
11968       else
11969 	return 18;
11970     }
11971 
11972   /* The tricks done in the code below apply for little-endian vector layout.
11973      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11974      FIXME: Implement logic for big-endian vectors.  */
11975   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11976     return -1;
11977 
11978   /* Splat vector constant out into a byte vector.  */
11979   for (i = 0; i < n_elts; i++)
11980     {
11981       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11982       unsigned HOST_WIDE_INT elpart;
11983 
11984       gcc_assert (CONST_INT_P (el));
11985       elpart = INTVAL (el);
11986 
11987       for (unsigned int byte = 0; byte < innersize; byte++)
11988 	{
11989 	  bytes[idx++] = (elpart & 0xff) ^ invmask;
11990 	  elpart >>= BITS_PER_UNIT;
11991 	}
11992     }
11993 
11994   /* Sanity check.  */
11995   gcc_assert (idx == GET_MODE_SIZE (mode));
11996 
11997   do
11998     {
11999       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12000 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12001 
12002       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12003 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12004 
12005       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12006 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12007 
12008       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12009 		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12010 
12011       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12012 
12013       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12014 
12015       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12016 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12017 
12018       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12019 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12020 
12021       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12022 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12023 
12024       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12025 		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12026 
12027       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12028 
12029       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12030 
12031       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12032 			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
12033 
12034       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12035 			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12036 
12037       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12038 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12039 
12040       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12041 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12042 
12043       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12044 
12045       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12046 			&& bytes[i] == bytes[(i + 8) % idx]);
12047     }
12048   while (0);
12049 
12050   if (immtype == -1)
12051     return -1;
12052 
12053   if (elementwidth)
12054     *elementwidth = elsize;
12055 
12056   if (modconst)
12057     {
12058       unsigned HOST_WIDE_INT imm = 0;
12059 
12060       /* Un-invert bytes of recognized vector, if necessary.  */
12061       if (invmask != 0)
12062         for (i = 0; i < idx; i++)
12063           bytes[i] ^= invmask;
12064 
12065       if (immtype == 17)
12066         {
12067           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12068           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12069 
12070           for (i = 0; i < 8; i++)
12071             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12072                    << (i * BITS_PER_UNIT);
12073 
12074           *modconst = GEN_INT (imm);
12075         }
12076       else
12077         {
12078           unsigned HOST_WIDE_INT imm = 0;
12079 
12080           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12081             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12082 
12083           *modconst = GEN_INT (imm);
12084         }
12085     }
12086 
12087   return immtype;
12088 #undef CHECK
12089 }
12090 
12091 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12092    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12093    float elements), and a modified constant (whatever should be output for a
12094    VMOV) in *MODCONST.  */
12095 
12096 int
12097 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12098 			       rtx *modconst, int *elementwidth)
12099 {
12100   rtx tmpconst;
12101   int tmpwidth;
12102   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12103 
12104   if (retval == -1)
12105     return 0;
12106 
12107   if (modconst)
12108     *modconst = tmpconst;
12109 
12110   if (elementwidth)
12111     *elementwidth = tmpwidth;
12112 
12113   return 1;
12114 }
12115 
12116 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12117    the immediate is valid, write a constant suitable for using as an operand
12118    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12119    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12120 
12121 int
12122 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12123 				rtx *modconst, int *elementwidth)
12124 {
12125   rtx tmpconst;
12126   int tmpwidth;
12127   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12128 
12129   if (retval < 0 || retval > 5)
12130     return 0;
12131 
12132   if (modconst)
12133     *modconst = tmpconst;
12134 
12135   if (elementwidth)
12136     *elementwidth = tmpwidth;
12137 
12138   return 1;
12139 }
12140 
12141 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12142    the immediate is valid, write a constant suitable for using as an operand
12143    to VSHR/VSHL to *MODCONST and the corresponding element width to
12144    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12145    because they have different limitations.  */
12146 
12147 int
12148 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12149 				rtx *modconst, int *elementwidth,
12150 				bool isleftshift)
12151 {
12152   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12153   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12154   unsigned HOST_WIDE_INT last_elt = 0;
12155   unsigned HOST_WIDE_INT maxshift;
12156 
12157   /* Split vector constant out into a byte vector.  */
12158   for (i = 0; i < n_elts; i++)
12159     {
12160       rtx el = CONST_VECTOR_ELT (op, i);
12161       unsigned HOST_WIDE_INT elpart;
12162 
12163       if (CONST_INT_P (el))
12164         elpart = INTVAL (el);
12165       else if (CONST_DOUBLE_P (el))
12166         return 0;
12167       else
12168         gcc_unreachable ();
12169 
12170       if (i != 0 && elpart != last_elt)
12171         return 0;
12172 
12173       last_elt = elpart;
12174     }
12175 
12176   /* Shift less than element size.  */
12177   maxshift = innersize * 8;
12178 
12179   if (isleftshift)
12180     {
12181       /* Left shift immediate value can be from 0 to <size>-1.  */
12182       if (last_elt >= maxshift)
12183         return 0;
12184     }
12185   else
12186     {
12187       /* Right shift immediate value can be from 1 to <size>.  */
12188       if (last_elt == 0 || last_elt > maxshift)
12189 	return 0;
12190     }
12191 
12192   if (elementwidth)
12193     *elementwidth = innersize * 8;
12194 
12195   if (modconst)
12196     *modconst = CONST_VECTOR_ELT (op, 0);
12197 
12198   return 1;
12199 }
12200 
12201 /* Return a string suitable for output of Neon immediate logic operation
12202    MNEM.  */
12203 
12204 char *
12205 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12206 			     int inverse, int quad)
12207 {
12208   int width, is_valid;
12209   static char templ[40];
12210 
12211   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12212 
12213   gcc_assert (is_valid != 0);
12214 
12215   if (quad)
12216     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12217   else
12218     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12219 
12220   return templ;
12221 }
12222 
12223 /* Return a string suitable for output of Neon immediate shift operation
12224    (VSHR or VSHL) MNEM.  */
12225 
12226 char *
12227 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12228 			     machine_mode mode, int quad,
12229 			     bool isleftshift)
12230 {
12231   int width, is_valid;
12232   static char templ[40];
12233 
12234   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12235   gcc_assert (is_valid != 0);
12236 
12237   if (quad)
12238     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12239   else
12240     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12241 
12242   return templ;
12243 }
12244 
12245 /* Output a sequence of pairwise operations to implement a reduction.
12246    NOTE: We do "too much work" here, because pairwise operations work on two
12247    registers-worth of operands in one go. Unfortunately we can't exploit those
12248    extra calculations to do the full operation in fewer steps, I don't think.
12249    Although all vector elements of the result but the first are ignored, we
12250    actually calculate the same result in each of the elements. An alternative
12251    such as initially loading a vector with zero to use as each of the second
12252    operands would use up an additional register and take an extra instruction,
12253    for no particular gain.  */
12254 
12255 void
12256 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12257 		      rtx (*reduc) (rtx, rtx, rtx))
12258 {
12259   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12260   rtx tmpsum = op1;
12261 
12262   for (i = parts / 2; i >= 1; i /= 2)
12263     {
12264       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12265       emit_insn (reduc (dest, tmpsum, tmpsum));
12266       tmpsum = dest;
12267     }
12268 }
12269 
12270 /* If VALS is a vector constant that can be loaded into a register
12271    using VDUP, generate instructions to do so and return an RTX to
12272    assign to the register.  Otherwise return NULL_RTX.  */
12273 
12274 static rtx
12275 neon_vdup_constant (rtx vals)
12276 {
12277   machine_mode mode = GET_MODE (vals);
12278   machine_mode inner_mode = GET_MODE_INNER (mode);
12279   rtx x;
12280 
12281   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12282     return NULL_RTX;
12283 
12284   if (!const_vec_duplicate_p (vals, &x))
12285     /* The elements are not all the same.  We could handle repeating
12286        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12287        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12288        vdup.i16).  */
12289     return NULL_RTX;
12290 
12291   /* We can load this constant by using VDUP and a constant in a
12292      single ARM register.  This will be cheaper than a vector
12293      load.  */
12294 
12295   x = copy_to_mode_reg (inner_mode, x);
12296   return gen_vec_duplicate (mode, x);
12297 }
12298 
12299 /* Generate code to load VALS, which is a PARALLEL containing only
12300    constants (for vec_init) or CONST_VECTOR, efficiently into a
12301    register.  Returns an RTX to copy into the register, or NULL_RTX
12302    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12303 
12304 rtx
12305 neon_make_constant (rtx vals)
12306 {
12307   machine_mode mode = GET_MODE (vals);
12308   rtx target;
12309   rtx const_vec = NULL_RTX;
12310   int n_elts = GET_MODE_NUNITS (mode);
12311   int n_const = 0;
12312   int i;
12313 
12314   if (GET_CODE (vals) == CONST_VECTOR)
12315     const_vec = vals;
12316   else if (GET_CODE (vals) == PARALLEL)
12317     {
12318       /* A CONST_VECTOR must contain only CONST_INTs and
12319 	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12320 	 Only store valid constants in a CONST_VECTOR.  */
12321       for (i = 0; i < n_elts; ++i)
12322 	{
12323 	  rtx x = XVECEXP (vals, 0, i);
12324 	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12325 	    n_const++;
12326 	}
12327       if (n_const == n_elts)
12328 	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12329     }
12330   else
12331     gcc_unreachable ();
12332 
12333   if (const_vec != NULL
12334       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12335     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12336     return const_vec;
12337   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12338     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12339        pipeline cycle; creating the constant takes one or two ARM
12340        pipeline cycles.  */
12341     return target;
12342   else if (const_vec != NULL_RTX)
12343     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12344        (for either double or quad vectors).  We can not take advantage
12345        of single-cycle VLD1 because we need a PC-relative addressing
12346        mode.  */
12347     return const_vec;
12348   else
12349     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12350        We can not construct an initializer.  */
12351     return NULL_RTX;
12352 }
12353 
12354 /* Initialize vector TARGET to VALS.  */
12355 
12356 void
12357 neon_expand_vector_init (rtx target, rtx vals)
12358 {
12359   machine_mode mode = GET_MODE (target);
12360   machine_mode inner_mode = GET_MODE_INNER (mode);
12361   int n_elts = GET_MODE_NUNITS (mode);
12362   int n_var = 0, one_var = -1;
12363   bool all_same = true;
12364   rtx x, mem;
12365   int i;
12366 
12367   for (i = 0; i < n_elts; ++i)
12368     {
12369       x = XVECEXP (vals, 0, i);
12370       if (!CONSTANT_P (x))
12371 	++n_var, one_var = i;
12372 
12373       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12374 	all_same = false;
12375     }
12376 
12377   if (n_var == 0)
12378     {
12379       rtx constant = neon_make_constant (vals);
12380       if (constant != NULL_RTX)
12381 	{
12382 	  emit_move_insn (target, constant);
12383 	  return;
12384 	}
12385     }
12386 
12387   /* Splat a single non-constant element if we can.  */
12388   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12389     {
12390       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12391       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12392       return;
12393     }
12394 
12395   /* One field is non-constant.  Load constant then overwrite varying
12396      field.  This is more efficient than using the stack.  */
12397   if (n_var == 1)
12398     {
12399       rtx copy = copy_rtx (vals);
12400       rtx index = GEN_INT (one_var);
12401 
12402       /* Load constant part of vector, substitute neighboring value for
12403 	 varying element.  */
12404       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12405       neon_expand_vector_init (target, copy);
12406 
12407       /* Insert variable.  */
12408       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12409       switch (mode)
12410 	{
12411 	case E_V8QImode:
12412 	  emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12413 	  break;
12414 	case E_V16QImode:
12415 	  emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12416 	  break;
12417 	case E_V4HImode:
12418 	  emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12419 	  break;
12420 	case E_V8HImode:
12421 	  emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12422 	  break;
12423 	case E_V2SImode:
12424 	  emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12425 	  break;
12426 	case E_V4SImode:
12427 	  emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12428 	  break;
12429 	case E_V2SFmode:
12430 	  emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12431 	  break;
12432 	case E_V4SFmode:
12433 	  emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12434 	  break;
12435 	case E_V2DImode:
12436 	  emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12437 	  break;
12438 	default:
12439 	  gcc_unreachable ();
12440 	}
12441       return;
12442     }
12443 
12444   /* Construct the vector in memory one field at a time
12445      and load the whole vector.  */
12446   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12447   for (i = 0; i < n_elts; i++)
12448     emit_move_insn (adjust_address_nv (mem, inner_mode,
12449 				    i * GET_MODE_SIZE (inner_mode)),
12450 		    XVECEXP (vals, 0, i));
12451   emit_move_insn (target, mem);
12452 }
12453 
12454 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12455    ERR if it doesn't.  EXP indicates the source location, which includes the
12456    inlining history for intrinsics.  */
12457 
12458 static void
12459 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12460 	      const_tree exp, const char *desc)
12461 {
12462   HOST_WIDE_INT lane;
12463 
12464   gcc_assert (CONST_INT_P (operand));
12465 
12466   lane = INTVAL (operand);
12467 
12468   if (lane < low || lane >= high)
12469     {
12470       if (exp)
12471 	error ("%K%s %wd out of range %wd - %wd",
12472 	       exp, desc, lane, low, high - 1);
12473       else
12474 	error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12475     }
12476 }
12477 
12478 /* Bounds-check lanes.  */
12479 
12480 void
12481 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12482 		  const_tree exp)
12483 {
12484   bounds_check (operand, low, high, exp, "lane");
12485 }
12486 
12487 /* Bounds-check constants.  */
12488 
12489 void
12490 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12491 {
12492   bounds_check (operand, low, high, NULL_TREE, "constant");
12493 }
12494 
12495 HOST_WIDE_INT
12496 neon_element_bits (machine_mode mode)
12497 {
12498   return GET_MODE_UNIT_BITSIZE (mode);
12499 }
12500 
12501 
12502 /* Predicates for `match_operand' and `match_operator'.  */
12503 
12504 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12505    WB is true if full writeback address modes are allowed and is false
12506    if limited writeback address modes (POST_INC and PRE_DEC) are
12507    allowed.  */
12508 
12509 int
12510 arm_coproc_mem_operand (rtx op, bool wb)
12511 {
12512   rtx ind;
12513 
12514   /* Reject eliminable registers.  */
12515   if (! (reload_in_progress || reload_completed || lra_in_progress)
12516       && (   reg_mentioned_p (frame_pointer_rtx, op)
12517 	  || reg_mentioned_p (arg_pointer_rtx, op)
12518 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
12519 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12520 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12521 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12522     return FALSE;
12523 
12524   /* Constants are converted into offsets from labels.  */
12525   if (!MEM_P (op))
12526     return FALSE;
12527 
12528   ind = XEXP (op, 0);
12529 
12530   if (reload_completed
12531       && (GET_CODE (ind) == LABEL_REF
12532 	  || (GET_CODE (ind) == CONST
12533 	      && GET_CODE (XEXP (ind, 0)) == PLUS
12534 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12535 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12536     return TRUE;
12537 
12538   /* Match: (mem (reg)).  */
12539   if (REG_P (ind))
12540     return arm_address_register_rtx_p (ind, 0);
12541 
12542   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12543      acceptable in any case (subject to verification by
12544      arm_address_register_rtx_p).  We need WB to be true to accept
12545      PRE_INC and POST_DEC.  */
12546   if (GET_CODE (ind) == POST_INC
12547       || GET_CODE (ind) == PRE_DEC
12548       || (wb
12549 	  && (GET_CODE (ind) == PRE_INC
12550 	      || GET_CODE (ind) == POST_DEC)))
12551     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12552 
12553   if (wb
12554       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12555       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12556       && GET_CODE (XEXP (ind, 1)) == PLUS
12557       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12558     ind = XEXP (ind, 1);
12559 
12560   /* Match:
12561      (plus (reg)
12562 	   (const)).  */
12563   if (GET_CODE (ind) == PLUS
12564       && REG_P (XEXP (ind, 0))
12565       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12566       && CONST_INT_P (XEXP (ind, 1))
12567       && INTVAL (XEXP (ind, 1)) > -1024
12568       && INTVAL (XEXP (ind, 1)) <  1024
12569       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12570     return TRUE;
12571 
12572   return FALSE;
12573 }
12574 
12575 /* Return TRUE if OP is a memory operand which we can load or store a vector
12576    to/from. TYPE is one of the following values:
12577     0 - Vector load/stor (vldr)
12578     1 - Core registers (ldm)
12579     2 - Element/structure loads (vld1)
12580  */
12581 int
12582 neon_vector_mem_operand (rtx op, int type, bool strict)
12583 {
12584   rtx ind;
12585 
12586   /* Reject eliminable registers.  */
12587   if (strict && ! (reload_in_progress || reload_completed)
12588       && (reg_mentioned_p (frame_pointer_rtx, op)
12589 	  || reg_mentioned_p (arg_pointer_rtx, op)
12590 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
12591 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12592 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12593 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12594     return FALSE;
12595 
12596   /* Constants are converted into offsets from labels.  */
12597   if (!MEM_P (op))
12598     return FALSE;
12599 
12600   ind = XEXP (op, 0);
12601 
12602   if (reload_completed
12603       && (GET_CODE (ind) == LABEL_REF
12604 	  || (GET_CODE (ind) == CONST
12605 	      && GET_CODE (XEXP (ind, 0)) == PLUS
12606 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12607 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12608     return TRUE;
12609 
12610   /* Match: (mem (reg)).  */
12611   if (REG_P (ind))
12612     return arm_address_register_rtx_p (ind, 0);
12613 
12614   /* Allow post-increment with Neon registers.  */
12615   if ((type != 1 && GET_CODE (ind) == POST_INC)
12616       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12617     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12618 
12619   /* Allow post-increment by register for VLDn */
12620   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12621       && GET_CODE (XEXP (ind, 1)) == PLUS
12622       && REG_P (XEXP (XEXP (ind, 1), 1)))
12623      return true;
12624 
12625   /* Match:
12626      (plus (reg)
12627           (const)).  */
12628   if (type == 0
12629       && GET_CODE (ind) == PLUS
12630       && REG_P (XEXP (ind, 0))
12631       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12632       && CONST_INT_P (XEXP (ind, 1))
12633       && INTVAL (XEXP (ind, 1)) > -1024
12634       /* For quad modes, we restrict the constant offset to be slightly less
12635 	 than what the instruction format permits.  We have no such constraint
12636 	 on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12637       && (INTVAL (XEXP (ind, 1))
12638 	  < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12639       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12640     return TRUE;
12641 
12642   return FALSE;
12643 }
12644 
12645 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12646    type.  */
12647 int
12648 neon_struct_mem_operand (rtx op)
12649 {
12650   rtx ind;
12651 
12652   /* Reject eliminable registers.  */
12653   if (! (reload_in_progress || reload_completed)
12654       && (   reg_mentioned_p (frame_pointer_rtx, op)
12655 	  || reg_mentioned_p (arg_pointer_rtx, op)
12656 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
12657 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12658 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12659 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12660     return FALSE;
12661 
12662   /* Constants are converted into offsets from labels.  */
12663   if (!MEM_P (op))
12664     return FALSE;
12665 
12666   ind = XEXP (op, 0);
12667 
12668   if (reload_completed
12669       && (GET_CODE (ind) == LABEL_REF
12670 	  || (GET_CODE (ind) == CONST
12671 	      && GET_CODE (XEXP (ind, 0)) == PLUS
12672 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12673 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12674     return TRUE;
12675 
12676   /* Match: (mem (reg)).  */
12677   if (REG_P (ind))
12678     return arm_address_register_rtx_p (ind, 0);
12679 
12680   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12681   if (GET_CODE (ind) == POST_INC
12682       || GET_CODE (ind) == PRE_DEC)
12683     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12684 
12685   return FALSE;
12686 }
12687 
12688 /* Return true if X is a register that will be eliminated later on.  */
12689 int
12690 arm_eliminable_register (rtx x)
12691 {
12692   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12693 		       || REGNO (x) == ARG_POINTER_REGNUM
12694 		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12695 			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12696 }
12697 
12698 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12699    coprocessor registers.  Otherwise return NO_REGS.  */
12700 
12701 enum reg_class
12702 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12703 {
12704   if (mode == HFmode)
12705     {
12706       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12707 	return GENERAL_REGS;
12708       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12709 	return NO_REGS;
12710       return GENERAL_REGS;
12711     }
12712 
12713   /* The neon move patterns handle all legitimate vector and struct
12714      addresses.  */
12715   if (TARGET_NEON
12716       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12717       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12718 	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12719 	  || VALID_NEON_STRUCT_MODE (mode)))
12720     return NO_REGS;
12721 
12722   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12723     return NO_REGS;
12724 
12725   return GENERAL_REGS;
12726 }
12727 
12728 /* Values which must be returned in the most-significant end of the return
12729    register.  */
12730 
12731 static bool
12732 arm_return_in_msb (const_tree valtype)
12733 {
12734   return (TARGET_AAPCS_BASED
12735           && BYTES_BIG_ENDIAN
12736 	  && (AGGREGATE_TYPE_P (valtype)
12737 	      || TREE_CODE (valtype) == COMPLEX_TYPE
12738 	      || FIXED_POINT_TYPE_P (valtype)));
12739 }
12740 
12741 /* Return TRUE if X references a SYMBOL_REF.  */
12742 int
12743 symbol_mentioned_p (rtx x)
12744 {
12745   const char * fmt;
12746   int i;
12747 
12748   if (GET_CODE (x) == SYMBOL_REF)
12749     return 1;
12750 
12751   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12752      are constant offsets, not symbols.  */
12753   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12754     return 0;
12755 
12756   fmt = GET_RTX_FORMAT (GET_CODE (x));
12757 
12758   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12759     {
12760       if (fmt[i] == 'E')
12761 	{
12762 	  int j;
12763 
12764 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12765 	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
12766 	      return 1;
12767 	}
12768       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12769 	return 1;
12770     }
12771 
12772   return 0;
12773 }
12774 
12775 /* Return TRUE if X references a LABEL_REF.  */
12776 int
12777 label_mentioned_p (rtx x)
12778 {
12779   const char * fmt;
12780   int i;
12781 
12782   if (GET_CODE (x) == LABEL_REF)
12783     return 1;
12784 
12785   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12786      instruction, but they are constant offsets, not symbols.  */
12787   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12788     return 0;
12789 
12790   fmt = GET_RTX_FORMAT (GET_CODE (x));
12791   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12792     {
12793       if (fmt[i] == 'E')
12794 	{
12795 	  int j;
12796 
12797 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12798 	    if (label_mentioned_p (XVECEXP (x, i, j)))
12799 	      return 1;
12800 	}
12801       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12802 	return 1;
12803     }
12804 
12805   return 0;
12806 }
12807 
12808 int
12809 tls_mentioned_p (rtx x)
12810 {
12811   switch (GET_CODE (x))
12812     {
12813     case CONST:
12814       return tls_mentioned_p (XEXP (x, 0));
12815 
12816     case UNSPEC:
12817       if (XINT (x, 1) == UNSPEC_TLS)
12818 	return 1;
12819 
12820     /* Fall through.  */
12821     default:
12822       return 0;
12823     }
12824 }
12825 
12826 /* Must not copy any rtx that uses a pc-relative address.
12827    Also, disallow copying of load-exclusive instructions that
12828    may appear after splitting of compare-and-swap-style operations
12829    so as to prevent those loops from being transformed away from their
12830    canonical forms (see PR 69904).  */
12831 
12832 static bool
12833 arm_cannot_copy_insn_p (rtx_insn *insn)
12834 {
12835   /* The tls call insn cannot be copied, as it is paired with a data
12836      word.  */
12837   if (recog_memoized (insn) == CODE_FOR_tlscall)
12838     return true;
12839 
12840   subrtx_iterator::array_type array;
12841   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12842     {
12843       const_rtx x = *iter;
12844       if (GET_CODE (x) == UNSPEC
12845 	  && (XINT (x, 1) == UNSPEC_PIC_BASE
12846 	      || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12847 	return true;
12848     }
12849 
12850   rtx set = single_set (insn);
12851   if (set)
12852     {
12853       rtx src = SET_SRC (set);
12854       if (GET_CODE (src) == ZERO_EXTEND)
12855 	src = XEXP (src, 0);
12856 
12857       /* Catch the load-exclusive and load-acquire operations.  */
12858       if (GET_CODE (src) == UNSPEC_VOLATILE
12859 	  && (XINT (src, 1) == VUNSPEC_LL
12860 	      || XINT (src, 1) == VUNSPEC_LAX))
12861 	return true;
12862     }
12863   return false;
12864 }
12865 
12866 enum rtx_code
12867 minmax_code (rtx x)
12868 {
12869   enum rtx_code code = GET_CODE (x);
12870 
12871   switch (code)
12872     {
12873     case SMAX:
12874       return GE;
12875     case SMIN:
12876       return LE;
12877     case UMIN:
12878       return LEU;
12879     case UMAX:
12880       return GEU;
12881     default:
12882       gcc_unreachable ();
12883     }
12884 }
12885 
12886 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12887 
12888 bool
12889 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12890 			int *mask, bool *signed_sat)
12891 {
12892   /* The high bound must be a power of two minus one.  */
12893   int log = exact_log2 (INTVAL (hi_bound) + 1);
12894   if (log == -1)
12895     return false;
12896 
12897   /* The low bound is either zero (for usat) or one less than the
12898      negation of the high bound (for ssat).  */
12899   if (INTVAL (lo_bound) == 0)
12900     {
12901       if (mask)
12902         *mask = log;
12903       if (signed_sat)
12904         *signed_sat = false;
12905 
12906       return true;
12907     }
12908 
12909   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12910     {
12911       if (mask)
12912         *mask = log + 1;
12913       if (signed_sat)
12914         *signed_sat = true;
12915 
12916       return true;
12917     }
12918 
12919   return false;
12920 }
12921 
12922 /* Return 1 if memory locations are adjacent.  */
12923 int
12924 adjacent_mem_locations (rtx a, rtx b)
12925 {
12926   /* We don't guarantee to preserve the order of these memory refs.  */
12927   if (volatile_refs_p (a) || volatile_refs_p (b))
12928     return 0;
12929 
12930   if ((REG_P (XEXP (a, 0))
12931        || (GET_CODE (XEXP (a, 0)) == PLUS
12932 	   && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12933       && (REG_P (XEXP (b, 0))
12934 	  || (GET_CODE (XEXP (b, 0)) == PLUS
12935 	      && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12936     {
12937       HOST_WIDE_INT val0 = 0, val1 = 0;
12938       rtx reg0, reg1;
12939       int val_diff;
12940 
12941       if (GET_CODE (XEXP (a, 0)) == PLUS)
12942         {
12943 	  reg0 = XEXP (XEXP (a, 0), 0);
12944 	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12945         }
12946       else
12947 	reg0 = XEXP (a, 0);
12948 
12949       if (GET_CODE (XEXP (b, 0)) == PLUS)
12950         {
12951 	  reg1 = XEXP (XEXP (b, 0), 0);
12952 	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12953         }
12954       else
12955 	reg1 = XEXP (b, 0);
12956 
12957       /* Don't accept any offset that will require multiple
12958 	 instructions to handle, since this would cause the
12959 	 arith_adjacentmem pattern to output an overlong sequence.  */
12960       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12961 	return 0;
12962 
12963       /* Don't allow an eliminable register: register elimination can make
12964 	 the offset too large.  */
12965       if (arm_eliminable_register (reg0))
12966 	return 0;
12967 
12968       val_diff = val1 - val0;
12969 
12970       if (arm_ld_sched)
12971 	{
12972 	  /* If the target has load delay slots, then there's no benefit
12973 	     to using an ldm instruction unless the offset is zero and
12974 	     we are optimizing for size.  */
12975 	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12976 		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12977 		  && (val_diff == 4 || val_diff == -4));
12978 	}
12979 
12980       return ((REGNO (reg0) == REGNO (reg1))
12981 	      && (val_diff == 4 || val_diff == -4));
12982     }
12983 
12984   return 0;
12985 }
12986 
12987 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12988    for load operations, false for store operations.  CONSECUTIVE is true
12989    if the register numbers in the operation must be consecutive in the register
12990    bank. RETURN_PC is true if value is to be loaded in PC.
12991    The pattern we are trying to match for load is:
12992      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12993       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12994        :
12995        :
12996       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12997      ]
12998      where
12999      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13000      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13001      3.  If consecutive is TRUE, then for kth register being loaded,
13002          REGNO (R_dk) = REGNO (R_d0) + k.
13003    The pattern for store is similar.  */
13004 bool
13005 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13006                      bool consecutive, bool return_pc)
13007 {
13008   HOST_WIDE_INT count = XVECLEN (op, 0);
13009   rtx reg, mem, addr;
13010   unsigned regno;
13011   unsigned first_regno;
13012   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13013   rtx elt;
13014   bool addr_reg_in_reglist = false;
13015   bool update = false;
13016   int reg_increment;
13017   int offset_adj;
13018   int regs_per_val;
13019 
13020   /* If not in SImode, then registers must be consecutive
13021      (e.g., VLDM instructions for DFmode).  */
13022   gcc_assert ((mode == SImode) || consecutive);
13023   /* Setting return_pc for stores is illegal.  */
13024   gcc_assert (!return_pc || load);
13025 
13026   /* Set up the increments and the regs per val based on the mode.  */
13027   reg_increment = GET_MODE_SIZE (mode);
13028   regs_per_val = reg_increment / 4;
13029   offset_adj = return_pc ? 1 : 0;
13030 
13031   if (count <= 1
13032       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13033       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13034     return false;
13035 
13036   /* Check if this is a write-back.  */
13037   elt = XVECEXP (op, 0, offset_adj);
13038   if (GET_CODE (SET_SRC (elt)) == PLUS)
13039     {
13040       i++;
13041       base = 1;
13042       update = true;
13043 
13044       /* The offset adjustment must be the number of registers being
13045          popped times the size of a single register.  */
13046       if (!REG_P (SET_DEST (elt))
13047           || !REG_P (XEXP (SET_SRC (elt), 0))
13048           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13049           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13050           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13051              ((count - 1 - offset_adj) * reg_increment))
13052         return false;
13053     }
13054 
13055   i = i + offset_adj;
13056   base = base + offset_adj;
13057   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13058      success depends on the type: VLDM can do just one reg,
13059      LDM must do at least two.  */
13060   if ((count <= i) && (mode == SImode))
13061       return false;
13062 
13063   elt = XVECEXP (op, 0, i - 1);
13064   if (GET_CODE (elt) != SET)
13065     return false;
13066 
13067   if (load)
13068     {
13069       reg = SET_DEST (elt);
13070       mem = SET_SRC (elt);
13071     }
13072   else
13073     {
13074       reg = SET_SRC (elt);
13075       mem = SET_DEST (elt);
13076     }
13077 
13078   if (!REG_P (reg) || !MEM_P (mem))
13079     return false;
13080 
13081   regno = REGNO (reg);
13082   first_regno = regno;
13083   addr = XEXP (mem, 0);
13084   if (GET_CODE (addr) == PLUS)
13085     {
13086       if (!CONST_INT_P (XEXP (addr, 1)))
13087 	return false;
13088 
13089       offset = INTVAL (XEXP (addr, 1));
13090       addr = XEXP (addr, 0);
13091     }
13092 
13093   if (!REG_P (addr))
13094     return false;
13095 
13096   /* Don't allow SP to be loaded unless it is also the base register. It
13097      guarantees that SP is reset correctly when an LDM instruction
13098      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13099   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13100     return false;
13101 
13102   if (regno == REGNO (addr))
13103     addr_reg_in_reglist = true;
13104 
13105   for (; i < count; i++)
13106     {
13107       elt = XVECEXP (op, 0, i);
13108       if (GET_CODE (elt) != SET)
13109         return false;
13110 
13111       if (load)
13112         {
13113           reg = SET_DEST (elt);
13114           mem = SET_SRC (elt);
13115         }
13116       else
13117         {
13118           reg = SET_SRC (elt);
13119           mem = SET_DEST (elt);
13120         }
13121 
13122       if (!REG_P (reg)
13123           || GET_MODE (reg) != mode
13124           || REGNO (reg) <= regno
13125           || (consecutive
13126               && (REGNO (reg) !=
13127                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13128           /* Don't allow SP to be loaded unless it is also the base register. It
13129              guarantees that SP is reset correctly when an LDM instruction
13130              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13131           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13132           || !MEM_P (mem)
13133           || GET_MODE (mem) != mode
13134           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13135 	       || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13136 	       || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13137 	       || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13138                    offset + (i - base) * reg_increment))
13139 	      && (!REG_P (XEXP (mem, 0))
13140 		  || offset + (i - base) * reg_increment != 0)))
13141         return false;
13142 
13143       regno = REGNO (reg);
13144       if (regno == REGNO (addr))
13145         addr_reg_in_reglist = true;
13146     }
13147 
13148   if (load)
13149     {
13150       if (update && addr_reg_in_reglist)
13151         return false;
13152 
13153       /* For Thumb-1, address register is always modified - either by write-back
13154          or by explicit load.  If the pattern does not describe an update,
13155          then the address register must be in the list of loaded registers.  */
13156       if (TARGET_THUMB1)
13157         return update || addr_reg_in_reglist;
13158     }
13159 
13160   return true;
13161 }
13162 
13163 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13164    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13165    instruction.  ADD_OFFSET is nonzero if the base address register needs
13166    to be modified with an add instruction before we can use it.  */
13167 
13168 static bool
13169 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13170 				 int nops, HOST_WIDE_INT add_offset)
13171  {
13172   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13173      if the offset isn't small enough.  The reason 2 ldrs are faster
13174      is because these ARMs are able to do more than one cache access
13175      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13176      whilst the ARM8 has a double bandwidth cache.  This means that
13177      these cores can do both an instruction fetch and a data fetch in
13178      a single cycle, so the trick of calculating the address into a
13179      scratch register (one of the result regs) and then doing a load
13180      multiple actually becomes slower (and no smaller in code size).
13181      That is the transformation
13182 
13183  	ldr	rd1, [rbase + offset]
13184  	ldr	rd2, [rbase + offset + 4]
13185 
13186      to
13187 
13188  	add	rd1, rbase, offset
13189  	ldmia	rd1, {rd1, rd2}
13190 
13191      produces worse code -- '3 cycles + any stalls on rd2' instead of
13192      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13193      access per cycle, the first sequence could never complete in less
13194      than 6 cycles, whereas the ldm sequence would only take 5 and
13195      would make better use of sequential accesses if not hitting the
13196      cache.
13197 
13198      We cheat here and test 'arm_ld_sched' which we currently know to
13199      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13200      changes, then the test below needs to be reworked.  */
13201   if (nops == 2 && arm_ld_sched && add_offset != 0)
13202     return false;
13203 
13204   /* XScale has load-store double instructions, but they have stricter
13205      alignment requirements than load-store multiple, so we cannot
13206      use them.
13207 
13208      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13209      the pipeline until completion.
13210 
13211 	NREGS		CYCLES
13212 	  1		  3
13213 	  2		  4
13214 	  3		  5
13215 	  4		  6
13216 
13217      An ldr instruction takes 1-3 cycles, but does not block the
13218      pipeline.
13219 
13220 	NREGS		CYCLES
13221 	  1		 1-3
13222 	  2		 2-6
13223 	  3		 3-9
13224 	  4		 4-12
13225 
13226      Best case ldr will always win.  However, the more ldr instructions
13227      we issue, the less likely we are to be able to schedule them well.
13228      Using ldr instructions also increases code size.
13229 
13230      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13231      for counts of 3 or 4 regs.  */
13232   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13233     return false;
13234   return true;
13235 }
13236 
13237 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13238    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13239    an array ORDER which describes the sequence to use when accessing the
13240    offsets that produces an ascending order.  In this sequence, each
13241    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13242    must have been filled in with the lowest offset by the caller.
13243    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13244    we use to verify that ORDER produces an ascending order of registers.
13245    Return true if it was possible to construct such an order, false if
13246    not.  */
13247 
13248 static bool
13249 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13250 		      int *unsorted_regs)
13251 {
13252   int i;
13253   for (i = 1; i < nops; i++)
13254     {
13255       int j;
13256 
13257       order[i] = order[i - 1];
13258       for (j = 0; j < nops; j++)
13259 	if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13260 	  {
13261 	    /* We must find exactly one offset that is higher than the
13262 	       previous one by 4.  */
13263 	    if (order[i] != order[i - 1])
13264 	      return false;
13265 	    order[i] = j;
13266 	  }
13267       if (order[i] == order[i - 1])
13268 	return false;
13269       /* The register numbers must be ascending.  */
13270       if (unsorted_regs != NULL
13271 	  && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13272 	return false;
13273     }
13274   return true;
13275 }
13276 
13277 /* Used to determine in a peephole whether a sequence of load
13278    instructions can be changed into a load-multiple instruction.
13279    NOPS is the number of separate load instructions we are examining.  The
13280    first NOPS entries in OPERANDS are the destination registers, the
13281    next NOPS entries are memory operands.  If this function is
13282    successful, *BASE is set to the common base register of the memory
13283    accesses; *LOAD_OFFSET is set to the first memory location's offset
13284    from that base register.
13285    REGS is an array filled in with the destination register numbers.
13286    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13287    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13288    the sequence of registers in REGS matches the loads from ascending memory
13289    locations, and the function verifies that the register numbers are
13290    themselves ascending.  If CHECK_REGS is false, the register numbers
13291    are stored in the order they are found in the operands.  */
13292 static int
13293 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13294 			int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13295 {
13296   int unsorted_regs[MAX_LDM_STM_OPS];
13297   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13298   int order[MAX_LDM_STM_OPS];
13299   int base_reg = -1;
13300   int i, ldm_case;
13301 
13302   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13303      easily extended if required.  */
13304   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13305 
13306   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13307 
13308   /* Loop over the operands and check that the memory references are
13309      suitable (i.e. immediate offsets from the same base register).  At
13310      the same time, extract the target register, and the memory
13311      offsets.  */
13312   for (i = 0; i < nops; i++)
13313     {
13314       rtx reg;
13315       rtx offset;
13316 
13317       /* Convert a subreg of a mem into the mem itself.  */
13318       if (GET_CODE (operands[nops + i]) == SUBREG)
13319 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
13320 
13321       gcc_assert (MEM_P (operands[nops + i]));
13322 
13323       /* Don't reorder volatile memory references; it doesn't seem worth
13324 	 looking for the case where the order is ok anyway.  */
13325       if (MEM_VOLATILE_P (operands[nops + i]))
13326 	return 0;
13327 
13328       offset = const0_rtx;
13329 
13330       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13331 	   || (GET_CODE (reg) == SUBREG
13332 	       && REG_P (reg = SUBREG_REG (reg))))
13333 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13334 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13335 		  || (GET_CODE (reg) == SUBREG
13336 		      && REG_P (reg = SUBREG_REG (reg))))
13337 	      && (CONST_INT_P (offset
13338 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
13339 	{
13340 	  if (i == 0)
13341 	    {
13342 	      base_reg = REGNO (reg);
13343 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13344 		return 0;
13345 	    }
13346 	  else if (base_reg != (int) REGNO (reg))
13347 	    /* Not addressed from the same base register.  */
13348 	    return 0;
13349 
13350 	  unsorted_regs[i] = (REG_P (operands[i])
13351 			      ? REGNO (operands[i])
13352 			      : REGNO (SUBREG_REG (operands[i])));
13353 
13354 	  /* If it isn't an integer register, or if it overwrites the
13355 	     base register but isn't the last insn in the list, then
13356 	     we can't do this.  */
13357 	  if (unsorted_regs[i] < 0
13358 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13359 	      || unsorted_regs[i] > 14
13360 	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
13361 	    return 0;
13362 
13363           /* Don't allow SP to be loaded unless it is also the base
13364              register.  It guarantees that SP is reset correctly when
13365              an LDM instruction is interrupted.  Otherwise, we might
13366              end up with a corrupt stack.  */
13367           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13368             return 0;
13369 
13370 	  unsorted_offsets[i] = INTVAL (offset);
13371 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13372 	    order[0] = i;
13373 	}
13374       else
13375 	/* Not a suitable memory address.  */
13376 	return 0;
13377     }
13378 
13379   /* All the useful information has now been extracted from the
13380      operands into unsorted_regs and unsorted_offsets; additionally,
13381      order[0] has been set to the lowest offset in the list.  Sort
13382      the offsets into order, verifying that they are adjacent, and
13383      check that the register numbers are ascending.  */
13384   if (!compute_offset_order (nops, unsorted_offsets, order,
13385 			     check_regs ? unsorted_regs : NULL))
13386     return 0;
13387 
13388   if (saved_order)
13389     memcpy (saved_order, order, sizeof order);
13390 
13391   if (base)
13392     {
13393       *base = base_reg;
13394 
13395       for (i = 0; i < nops; i++)
13396 	regs[i] = unsorted_regs[check_regs ? order[i] : i];
13397 
13398       *load_offset = unsorted_offsets[order[0]];
13399     }
13400 
13401   if (unsorted_offsets[order[0]] == 0)
13402     ldm_case = 1; /* ldmia */
13403   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13404     ldm_case = 2; /* ldmib */
13405   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13406     ldm_case = 3; /* ldmda */
13407   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13408     ldm_case = 4; /* ldmdb */
13409   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13410 	   || const_ok_for_arm (-unsorted_offsets[order[0]]))
13411     ldm_case = 5;
13412   else
13413     return 0;
13414 
13415   if (!multiple_operation_profitable_p (false, nops,
13416 					ldm_case == 5
13417 					? unsorted_offsets[order[0]] : 0))
13418     return 0;
13419 
13420   return ldm_case;
13421 }
13422 
13423 /* Used to determine in a peephole whether a sequence of store instructions can
13424    be changed into a store-multiple instruction.
13425    NOPS is the number of separate store instructions we are examining.
13426    NOPS_TOTAL is the total number of instructions recognized by the peephole
13427    pattern.
13428    The first NOPS entries in OPERANDS are the source registers, the next
13429    NOPS entries are memory operands.  If this function is successful, *BASE is
13430    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13431    to the first memory location's offset from that base register.  REGS is an
13432    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13433    likewise filled with the corresponding rtx's.
13434    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13435    numbers to an ascending order of stores.
13436    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13437    from ascending memory locations, and the function verifies that the register
13438    numbers are themselves ascending.  If CHECK_REGS is false, the register
13439    numbers are stored in the order they are found in the operands.  */
13440 static int
13441 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13442 			 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13443 			 HOST_WIDE_INT *load_offset, bool check_regs)
13444 {
13445   int unsorted_regs[MAX_LDM_STM_OPS];
13446   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13447   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13448   int order[MAX_LDM_STM_OPS];
13449   int base_reg = -1;
13450   rtx base_reg_rtx = NULL;
13451   int i, stm_case;
13452 
13453   /* Write back of base register is currently only supported for Thumb 1.  */
13454   int base_writeback = TARGET_THUMB1;
13455 
13456   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13457      easily extended if required.  */
13458   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13459 
13460   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13461 
13462   /* Loop over the operands and check that the memory references are
13463      suitable (i.e. immediate offsets from the same base register).  At
13464      the same time, extract the target register, and the memory
13465      offsets.  */
13466   for (i = 0; i < nops; i++)
13467     {
13468       rtx reg;
13469       rtx offset;
13470 
13471       /* Convert a subreg of a mem into the mem itself.  */
13472       if (GET_CODE (operands[nops + i]) == SUBREG)
13473 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
13474 
13475       gcc_assert (MEM_P (operands[nops + i]));
13476 
13477       /* Don't reorder volatile memory references; it doesn't seem worth
13478 	 looking for the case where the order is ok anyway.  */
13479       if (MEM_VOLATILE_P (operands[nops + i]))
13480 	return 0;
13481 
13482       offset = const0_rtx;
13483 
13484       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13485 	   || (GET_CODE (reg) == SUBREG
13486 	       && REG_P (reg = SUBREG_REG (reg))))
13487 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13488 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13489 		  || (GET_CODE (reg) == SUBREG
13490 		      && REG_P (reg = SUBREG_REG (reg))))
13491 	      && (CONST_INT_P (offset
13492 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
13493 	{
13494 	  unsorted_reg_rtxs[i] = (REG_P (operands[i])
13495 				  ? operands[i] : SUBREG_REG (operands[i]));
13496 	  unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13497 
13498 	  if (i == 0)
13499 	    {
13500 	      base_reg = REGNO (reg);
13501 	      base_reg_rtx = reg;
13502 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13503 		return 0;
13504 	    }
13505 	  else if (base_reg != (int) REGNO (reg))
13506 	    /* Not addressed from the same base register.  */
13507 	    return 0;
13508 
13509 	  /* If it isn't an integer register, then we can't do this.  */
13510 	  if (unsorted_regs[i] < 0
13511 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13512 	      /* The effects are unpredictable if the base register is
13513 		 both updated and stored.  */
13514 	      || (base_writeback && unsorted_regs[i] == base_reg)
13515 	      || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13516 	      || unsorted_regs[i] > 14)
13517 	    return 0;
13518 
13519 	  unsorted_offsets[i] = INTVAL (offset);
13520 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13521 	    order[0] = i;
13522 	}
13523       else
13524 	/* Not a suitable memory address.  */
13525 	return 0;
13526     }
13527 
13528   /* All the useful information has now been extracted from the
13529      operands into unsorted_regs and unsorted_offsets; additionally,
13530      order[0] has been set to the lowest offset in the list.  Sort
13531      the offsets into order, verifying that they are adjacent, and
13532      check that the register numbers are ascending.  */
13533   if (!compute_offset_order (nops, unsorted_offsets, order,
13534 			     check_regs ? unsorted_regs : NULL))
13535     return 0;
13536 
13537   if (saved_order)
13538     memcpy (saved_order, order, sizeof order);
13539 
13540   if (base)
13541     {
13542       *base = base_reg;
13543 
13544       for (i = 0; i < nops; i++)
13545 	{
13546 	  regs[i] = unsorted_regs[check_regs ? order[i] : i];
13547 	  if (reg_rtxs)
13548 	    reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13549 	}
13550 
13551       *load_offset = unsorted_offsets[order[0]];
13552     }
13553 
13554   if (TARGET_THUMB1
13555       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13556     return 0;
13557 
13558   if (unsorted_offsets[order[0]] == 0)
13559     stm_case = 1; /* stmia */
13560   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13561     stm_case = 2; /* stmib */
13562   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13563     stm_case = 3; /* stmda */
13564   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13565     stm_case = 4; /* stmdb */
13566   else
13567     return 0;
13568 
13569   if (!multiple_operation_profitable_p (false, nops, 0))
13570     return 0;
13571 
13572   return stm_case;
13573 }
13574 
13575 /* Routines for use in generating RTL.  */
13576 
13577 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13578    the instruction; REGS and MEMS are arrays containing the operands.
13579    BASEREG is the base register to be used in addressing the memory operands.
13580    WBACK_OFFSET is nonzero if the instruction should update the base
13581    register.  */
13582 
13583 static rtx
13584 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13585 			 HOST_WIDE_INT wback_offset)
13586 {
13587   int i = 0, j;
13588   rtx result;
13589 
13590   if (!multiple_operation_profitable_p (false, count, 0))
13591     {
13592       rtx seq;
13593 
13594       start_sequence ();
13595 
13596       for (i = 0; i < count; i++)
13597 	emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13598 
13599       if (wback_offset != 0)
13600 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13601 
13602       seq = get_insns ();
13603       end_sequence ();
13604 
13605       return seq;
13606     }
13607 
13608   result = gen_rtx_PARALLEL (VOIDmode,
13609 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13610   if (wback_offset != 0)
13611     {
13612       XVECEXP (result, 0, 0)
13613 	= gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13614       i = 1;
13615       count++;
13616     }
13617 
13618   for (j = 0; i < count; i++, j++)
13619     XVECEXP (result, 0, i)
13620       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13621 
13622   return result;
13623 }
13624 
13625 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13626    the instruction; REGS and MEMS are arrays containing the operands.
13627    BASEREG is the base register to be used in addressing the memory operands.
13628    WBACK_OFFSET is nonzero if the instruction should update the base
13629    register.  */
13630 
13631 static rtx
13632 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13633 			  HOST_WIDE_INT wback_offset)
13634 {
13635   int i = 0, j;
13636   rtx result;
13637 
13638   if (GET_CODE (basereg) == PLUS)
13639     basereg = XEXP (basereg, 0);
13640 
13641   if (!multiple_operation_profitable_p (false, count, 0))
13642     {
13643       rtx seq;
13644 
13645       start_sequence ();
13646 
13647       for (i = 0; i < count; i++)
13648 	emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13649 
13650       if (wback_offset != 0)
13651 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13652 
13653       seq = get_insns ();
13654       end_sequence ();
13655 
13656       return seq;
13657     }
13658 
13659   result = gen_rtx_PARALLEL (VOIDmode,
13660 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13661   if (wback_offset != 0)
13662     {
13663       XVECEXP (result, 0, 0)
13664 	= gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13665       i = 1;
13666       count++;
13667     }
13668 
13669   for (j = 0; i < count; i++, j++)
13670     XVECEXP (result, 0, i)
13671       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13672 
13673   return result;
13674 }
13675 
13676 /* Generate either a load-multiple or a store-multiple instruction.  This
13677    function can be used in situations where we can start with a single MEM
13678    rtx and adjust its address upwards.
13679    COUNT is the number of operations in the instruction, not counting a
13680    possible update of the base register.  REGS is an array containing the
13681    register operands.
13682    BASEREG is the base register to be used in addressing the memory operands,
13683    which are constructed from BASEMEM.
13684    WRITE_BACK specifies whether the generated instruction should include an
13685    update of the base register.
13686    OFFSETP is used to pass an offset to and from this function; this offset
13687    is not used when constructing the address (instead BASEMEM should have an
13688    appropriate offset in its address), it is used only for setting
13689    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13690 
13691 static rtx
13692 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13693 		     bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13694 {
13695   rtx mems[MAX_LDM_STM_OPS];
13696   HOST_WIDE_INT offset = *offsetp;
13697   int i;
13698 
13699   gcc_assert (count <= MAX_LDM_STM_OPS);
13700 
13701   if (GET_CODE (basereg) == PLUS)
13702     basereg = XEXP (basereg, 0);
13703 
13704   for (i = 0; i < count; i++)
13705     {
13706       rtx addr = plus_constant (Pmode, basereg, i * 4);
13707       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13708       offset += 4;
13709     }
13710 
13711   if (write_back)
13712     *offsetp = offset;
13713 
13714   if (is_load)
13715     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13716 				    write_back ? 4 * count : 0);
13717   else
13718     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13719 				     write_back ? 4 * count : 0);
13720 }
13721 
13722 rtx
13723 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13724 		       rtx basemem, HOST_WIDE_INT *offsetp)
13725 {
13726   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13727 			      offsetp);
13728 }
13729 
13730 rtx
13731 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13732 			rtx basemem, HOST_WIDE_INT *offsetp)
13733 {
13734   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13735 			      offsetp);
13736 }
13737 
13738 /* Called from a peephole2 expander to turn a sequence of loads into an
13739    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13740    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13741    is true if we can reorder the registers because they are used commutatively
13742    subsequently.
13743    Returns true iff we could generate a new instruction.  */
13744 
13745 bool
13746 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13747 {
13748   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13749   rtx mems[MAX_LDM_STM_OPS];
13750   int i, j, base_reg;
13751   rtx base_reg_rtx;
13752   HOST_WIDE_INT offset;
13753   int write_back = FALSE;
13754   int ldm_case;
13755   rtx addr;
13756 
13757   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13758 				     &base_reg, &offset, !sort_regs);
13759 
13760   if (ldm_case == 0)
13761     return false;
13762 
13763   if (sort_regs)
13764     for (i = 0; i < nops - 1; i++)
13765       for (j = i + 1; j < nops; j++)
13766 	if (regs[i] > regs[j])
13767 	  {
13768 	    int t = regs[i];
13769 	    regs[i] = regs[j];
13770 	    regs[j] = t;
13771 	  }
13772   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13773 
13774   if (TARGET_THUMB1)
13775     {
13776       gcc_assert (ldm_case == 1 || ldm_case == 5);
13777 
13778       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
13779       write_back = true;
13780       for (i = 0; i < nops; i++)
13781 	if (base_reg == regs[i])
13782 	  write_back = false;
13783 
13784       /* Ensure the base is dead if it is updated.  */
13785       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
13786 	return false;
13787     }
13788 
13789   if (ldm_case == 5)
13790     {
13791       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13792       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13793       offset = 0;
13794       base_reg_rtx = newbase;
13795     }
13796 
13797   for (i = 0; i < nops; i++)
13798     {
13799       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13800       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13801 					      SImode, addr, 0);
13802     }
13803   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13804 				      write_back ? offset + i * 4 : 0));
13805   return true;
13806 }
13807 
13808 /* Called from a peephole2 expander to turn a sequence of stores into an
13809    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13810    NOPS indicates how many separate stores we are trying to combine.
13811    Returns true iff we could generate a new instruction.  */
13812 
13813 bool
13814 gen_stm_seq (rtx *operands, int nops)
13815 {
13816   int i;
13817   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13818   rtx mems[MAX_LDM_STM_OPS];
13819   int base_reg;
13820   rtx base_reg_rtx;
13821   HOST_WIDE_INT offset;
13822   int write_back = FALSE;
13823   int stm_case;
13824   rtx addr;
13825   bool base_reg_dies;
13826 
13827   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13828 				      mem_order, &base_reg, &offset, true);
13829 
13830   if (stm_case == 0)
13831     return false;
13832 
13833   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13834 
13835   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13836   if (TARGET_THUMB1)
13837     {
13838       gcc_assert (base_reg_dies);
13839       write_back = TRUE;
13840     }
13841 
13842   if (stm_case == 5)
13843     {
13844       gcc_assert (base_reg_dies);
13845       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13846       offset = 0;
13847     }
13848 
13849   addr = plus_constant (Pmode, base_reg_rtx, offset);
13850 
13851   for (i = 0; i < nops; i++)
13852     {
13853       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13854       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13855 					      SImode, addr, 0);
13856     }
13857   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13858 				       write_back ? offset + i * 4 : 0));
13859   return true;
13860 }
13861 
13862 /* Called from a peephole2 expander to turn a sequence of stores that are
13863    preceded by constant loads into an STM instruction.  OPERANDS are the
13864    operands found by the peephole matcher; NOPS indicates how many
13865    separate stores we are trying to combine; there are 2 * NOPS
13866    instructions in the peephole.
13867    Returns true iff we could generate a new instruction.  */
13868 
13869 bool
13870 gen_const_stm_seq (rtx *operands, int nops)
13871 {
13872   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13873   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13874   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13875   rtx mems[MAX_LDM_STM_OPS];
13876   int base_reg;
13877   rtx base_reg_rtx;
13878   HOST_WIDE_INT offset;
13879   int write_back = FALSE;
13880   int stm_case;
13881   rtx addr;
13882   bool base_reg_dies;
13883   int i, j;
13884   HARD_REG_SET allocated;
13885 
13886   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13887 				      mem_order, &base_reg, &offset, false);
13888 
13889   if (stm_case == 0)
13890     return false;
13891 
13892   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13893 
13894   /* If the same register is used more than once, try to find a free
13895      register.  */
13896   CLEAR_HARD_REG_SET (allocated);
13897   for (i = 0; i < nops; i++)
13898     {
13899       for (j = i + 1; j < nops; j++)
13900 	if (regs[i] == regs[j])
13901 	  {
13902 	    rtx t = peep2_find_free_register (0, nops * 2,
13903 					      TARGET_THUMB1 ? "l" : "r",
13904 					      SImode, &allocated);
13905 	    if (t == NULL_RTX)
13906 	      return false;
13907 	    reg_rtxs[i] = t;
13908 	    regs[i] = REGNO (t);
13909 	  }
13910     }
13911 
13912   /* Compute an ordering that maps the register numbers to an ascending
13913      sequence.  */
13914   reg_order[0] = 0;
13915   for (i = 0; i < nops; i++)
13916     if (regs[i] < regs[reg_order[0]])
13917       reg_order[0] = i;
13918 
13919   for (i = 1; i < nops; i++)
13920     {
13921       int this_order = reg_order[i - 1];
13922       for (j = 0; j < nops; j++)
13923 	if (regs[j] > regs[reg_order[i - 1]]
13924 	    && (this_order == reg_order[i - 1]
13925 		|| regs[j] < regs[this_order]))
13926 	  this_order = j;
13927       reg_order[i] = this_order;
13928     }
13929 
13930   /* Ensure that registers that must be live after the instruction end
13931      up with the correct value.  */
13932   for (i = 0; i < nops; i++)
13933     {
13934       int this_order = reg_order[i];
13935       if ((this_order != mem_order[i]
13936 	   || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13937 	  && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13938 	return false;
13939     }
13940 
13941   /* Load the constants.  */
13942   for (i = 0; i < nops; i++)
13943     {
13944       rtx op = operands[2 * nops + mem_order[i]];
13945       sorted_regs[i] = regs[reg_order[i]];
13946       emit_move_insn (reg_rtxs[reg_order[i]], op);
13947     }
13948 
13949   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13950 
13951   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13952   if (TARGET_THUMB1)
13953     {
13954       gcc_assert (base_reg_dies);
13955       write_back = TRUE;
13956     }
13957 
13958   if (stm_case == 5)
13959     {
13960       gcc_assert (base_reg_dies);
13961       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13962       offset = 0;
13963     }
13964 
13965   addr = plus_constant (Pmode, base_reg_rtx, offset);
13966 
13967   for (i = 0; i < nops; i++)
13968     {
13969       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13970       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13971 					      SImode, addr, 0);
13972     }
13973   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13974 				       write_back ? offset + i * 4 : 0));
13975   return true;
13976 }
13977 
13978 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13979    unaligned copies on processors which support unaligned semantics for those
13980    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13981    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13982    An interleave factor of 1 (the minimum) will perform no interleaving.
13983    Load/store multiple are used for aligned addresses where possible.  */
13984 
13985 static void
13986 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13987 				   HOST_WIDE_INT length,
13988 				   unsigned int interleave_factor)
13989 {
13990   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13991   int *regnos = XALLOCAVEC (int, interleave_factor);
13992   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13993   HOST_WIDE_INT i, j;
13994   HOST_WIDE_INT remaining = length, words;
13995   rtx halfword_tmp = NULL, byte_tmp = NULL;
13996   rtx dst, src;
13997   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13998   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13999   HOST_WIDE_INT srcoffset, dstoffset;
14000   HOST_WIDE_INT src_autoinc, dst_autoinc;
14001   rtx mem, addr;
14002 
14003   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14004 
14005   /* Use hard registers if we have aligned source or destination so we can use
14006      load/store multiple with contiguous registers.  */
14007   if (dst_aligned || src_aligned)
14008     for (i = 0; i < interleave_factor; i++)
14009       regs[i] = gen_rtx_REG (SImode, i);
14010   else
14011     for (i = 0; i < interleave_factor; i++)
14012       regs[i] = gen_reg_rtx (SImode);
14013 
14014   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14015   src = copy_addr_to_reg (XEXP (srcbase, 0));
14016 
14017   srcoffset = dstoffset = 0;
14018 
14019   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14020      For copying the last bytes we want to subtract this offset again.  */
14021   src_autoinc = dst_autoinc = 0;
14022 
14023   for (i = 0; i < interleave_factor; i++)
14024     regnos[i] = i;
14025 
14026   /* Copy BLOCK_SIZE_BYTES chunks.  */
14027 
14028   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14029     {
14030       /* Load words.  */
14031       if (src_aligned && interleave_factor > 1)
14032 	{
14033 	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14034 					    TRUE, srcbase, &srcoffset));
14035 	  src_autoinc += UNITS_PER_WORD * interleave_factor;
14036 	}
14037       else
14038 	{
14039 	  for (j = 0; j < interleave_factor; j++)
14040 	    {
14041 	      addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14042 						 - src_autoinc));
14043 	      mem = adjust_automodify_address (srcbase, SImode, addr,
14044 					       srcoffset + j * UNITS_PER_WORD);
14045 	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
14046 	    }
14047 	  srcoffset += block_size_bytes;
14048 	}
14049 
14050       /* Store words.  */
14051       if (dst_aligned && interleave_factor > 1)
14052 	{
14053 	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14054 					     TRUE, dstbase, &dstoffset));
14055 	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
14056 	}
14057       else
14058 	{
14059 	  for (j = 0; j < interleave_factor; j++)
14060 	    {
14061 	      addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14062 						 - dst_autoinc));
14063 	      mem = adjust_automodify_address (dstbase, SImode, addr,
14064 					       dstoffset + j * UNITS_PER_WORD);
14065 	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
14066 	    }
14067 	  dstoffset += block_size_bytes;
14068 	}
14069 
14070       remaining -= block_size_bytes;
14071     }
14072 
14073   /* Copy any whole words left (note these aren't interleaved with any
14074      subsequent halfword/byte load/stores in the interests of simplicity).  */
14075 
14076   words = remaining / UNITS_PER_WORD;
14077 
14078   gcc_assert (words < interleave_factor);
14079 
14080   if (src_aligned && words > 1)
14081     {
14082       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14083 					&srcoffset));
14084       src_autoinc += UNITS_PER_WORD * words;
14085     }
14086   else
14087     {
14088       for (j = 0; j < words; j++)
14089 	{
14090 	  addr = plus_constant (Pmode, src,
14091 				srcoffset + j * UNITS_PER_WORD - src_autoinc);
14092 	  mem = adjust_automodify_address (srcbase, SImode, addr,
14093 					   srcoffset + j * UNITS_PER_WORD);
14094 	  if (src_aligned)
14095 	    emit_move_insn (regs[j], mem);
14096 	  else
14097 	    emit_insn (gen_unaligned_loadsi (regs[j], mem));
14098 	}
14099       srcoffset += words * UNITS_PER_WORD;
14100     }
14101 
14102   if (dst_aligned && words > 1)
14103     {
14104       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14105 					 &dstoffset));
14106       dst_autoinc += words * UNITS_PER_WORD;
14107     }
14108   else
14109     {
14110       for (j = 0; j < words; j++)
14111 	{
14112 	  addr = plus_constant (Pmode, dst,
14113 				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14114 	  mem = adjust_automodify_address (dstbase, SImode, addr,
14115 					   dstoffset + j * UNITS_PER_WORD);
14116 	  if (dst_aligned)
14117 	    emit_move_insn (mem, regs[j]);
14118 	  else
14119 	    emit_insn (gen_unaligned_storesi (mem, regs[j]));
14120 	}
14121       dstoffset += words * UNITS_PER_WORD;
14122     }
14123 
14124   remaining -= words * UNITS_PER_WORD;
14125 
14126   gcc_assert (remaining < 4);
14127 
14128   /* Copy a halfword if necessary.  */
14129 
14130   if (remaining >= 2)
14131     {
14132       halfword_tmp = gen_reg_rtx (SImode);
14133 
14134       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14135       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14136       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14137 
14138       /* Either write out immediately, or delay until we've loaded the last
14139 	 byte, depending on interleave factor.  */
14140       if (interleave_factor == 1)
14141 	{
14142 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14143 	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14144 	  emit_insn (gen_unaligned_storehi (mem,
14145 		       gen_lowpart (HImode, halfword_tmp)));
14146 	  halfword_tmp = NULL;
14147 	  dstoffset += 2;
14148 	}
14149 
14150       remaining -= 2;
14151       srcoffset += 2;
14152     }
14153 
14154   gcc_assert (remaining < 2);
14155 
14156   /* Copy last byte.  */
14157 
14158   if ((remaining & 1) != 0)
14159     {
14160       byte_tmp = gen_reg_rtx (SImode);
14161 
14162       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14163       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14164       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14165 
14166       if (interleave_factor == 1)
14167 	{
14168 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14169 	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14170 	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14171 	  byte_tmp = NULL;
14172 	  dstoffset++;
14173 	}
14174 
14175       remaining--;
14176       srcoffset++;
14177     }
14178 
14179   /* Store last halfword if we haven't done so already.  */
14180 
14181   if (halfword_tmp)
14182     {
14183       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14184       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14185       emit_insn (gen_unaligned_storehi (mem,
14186 		   gen_lowpart (HImode, halfword_tmp)));
14187       dstoffset += 2;
14188     }
14189 
14190   /* Likewise for last byte.  */
14191 
14192   if (byte_tmp)
14193     {
14194       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14195       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14196       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14197       dstoffset++;
14198     }
14199 
14200   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14201 }
14202 
14203 /* From mips_adjust_block_mem:
14204 
14205    Helper function for doing a loop-based block operation on memory
14206    reference MEM.  Each iteration of the loop will operate on LENGTH
14207    bytes of MEM.
14208 
14209    Create a new base register for use within the loop and point it to
14210    the start of MEM.  Create a new memory reference that uses this
14211    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14212 
14213 static void
14214 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14215 		      rtx *loop_mem)
14216 {
14217   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14218 
14219   /* Although the new mem does not refer to a known location,
14220      it does keep up to LENGTH bytes of alignment.  */
14221   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14222   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14223 }
14224 
14225 /* From mips_block_move_loop:
14226 
14227    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14228    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14229    the memory regions do not overlap.  */
14230 
14231 static void
14232 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14233 			       unsigned int interleave_factor,
14234 			       HOST_WIDE_INT bytes_per_iter)
14235 {
14236   rtx src_reg, dest_reg, final_src, test;
14237   HOST_WIDE_INT leftover;
14238 
14239   leftover = length % bytes_per_iter;
14240   length -= leftover;
14241 
14242   /* Create registers and memory references for use within the loop.  */
14243   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14244   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14245 
14246   /* Calculate the value that SRC_REG should have after the last iteration of
14247      the loop.  */
14248   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14249 				   0, 0, OPTAB_WIDEN);
14250 
14251   /* Emit the start of the loop.  */
14252   rtx_code_label *label = gen_label_rtx ();
14253   emit_label (label);
14254 
14255   /* Emit the loop body.  */
14256   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14257 				     interleave_factor);
14258 
14259   /* Move on to the next block.  */
14260   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14261   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14262 
14263   /* Emit the loop condition.  */
14264   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14265   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14266 
14267   /* Mop up any left-over bytes.  */
14268   if (leftover)
14269     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14270 }
14271 
14272 /* Emit a block move when either the source or destination is unaligned (not
14273    aligned to a four-byte boundary).  This may need further tuning depending on
14274    core type, optimize_size setting, etc.  */
14275 
14276 static int
14277 arm_movmemqi_unaligned (rtx *operands)
14278 {
14279   HOST_WIDE_INT length = INTVAL (operands[2]);
14280 
14281   if (optimize_size)
14282     {
14283       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14284       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14285       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14286 	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14287 	 or dst_aligned though: allow more interleaving in those cases since the
14288 	 resulting code can be smaller.  */
14289       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14290       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14291 
14292       if (length > 12)
14293 	arm_block_move_unaligned_loop (operands[0], operands[1], length,
14294 				       interleave_factor, bytes_per_iter);
14295       else
14296 	arm_block_move_unaligned_straight (operands[0], operands[1], length,
14297 					   interleave_factor);
14298     }
14299   else
14300     {
14301       /* Note that the loop created by arm_block_move_unaligned_loop may be
14302 	 subject to loop unrolling, which makes tuning this condition a little
14303 	 redundant.  */
14304       if (length > 32)
14305 	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14306       else
14307 	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14308     }
14309 
14310   return 1;
14311 }
14312 
14313 int
14314 arm_gen_movmemqi (rtx *operands)
14315 {
14316   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14317   HOST_WIDE_INT srcoffset, dstoffset;
14318   rtx src, dst, srcbase, dstbase;
14319   rtx part_bytes_reg = NULL;
14320   rtx mem;
14321 
14322   if (!CONST_INT_P (operands[2])
14323       || !CONST_INT_P (operands[3])
14324       || INTVAL (operands[2]) > 64)
14325     return 0;
14326 
14327   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14328     return arm_movmemqi_unaligned (operands);
14329 
14330   if (INTVAL (operands[3]) & 3)
14331     return 0;
14332 
14333   dstbase = operands[0];
14334   srcbase = operands[1];
14335 
14336   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14337   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14338 
14339   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14340   out_words_to_go = INTVAL (operands[2]) / 4;
14341   last_bytes = INTVAL (operands[2]) & 3;
14342   dstoffset = srcoffset = 0;
14343 
14344   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14345     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14346 
14347   while (in_words_to_go >= 2)
14348     {
14349       if (in_words_to_go > 4)
14350 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14351 					  TRUE, srcbase, &srcoffset));
14352       else
14353 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14354 					  src, FALSE, srcbase,
14355 					  &srcoffset));
14356 
14357       if (out_words_to_go)
14358 	{
14359 	  if (out_words_to_go > 4)
14360 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14361 					       TRUE, dstbase, &dstoffset));
14362 	  else if (out_words_to_go != 1)
14363 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14364 					       out_words_to_go, dst,
14365 					       (last_bytes == 0
14366 						? FALSE : TRUE),
14367 					       dstbase, &dstoffset));
14368 	  else
14369 	    {
14370 	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14371 	      emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14372 	      if (last_bytes != 0)
14373 		{
14374 		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14375 		  dstoffset += 4;
14376 		}
14377 	    }
14378 	}
14379 
14380       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14381       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14382     }
14383 
14384   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14385   if (out_words_to_go)
14386     {
14387       rtx sreg;
14388 
14389       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14390       sreg = copy_to_reg (mem);
14391 
14392       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14393       emit_move_insn (mem, sreg);
14394       in_words_to_go--;
14395 
14396       gcc_assert (!in_words_to_go);	/* Sanity check */
14397     }
14398 
14399   if (in_words_to_go)
14400     {
14401       gcc_assert (in_words_to_go > 0);
14402 
14403       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14404       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14405     }
14406 
14407   gcc_assert (!last_bytes || part_bytes_reg);
14408 
14409   if (BYTES_BIG_ENDIAN && last_bytes)
14410     {
14411       rtx tmp = gen_reg_rtx (SImode);
14412 
14413       /* The bytes we want are in the top end of the word.  */
14414       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14415 			      GEN_INT (8 * (4 - last_bytes))));
14416       part_bytes_reg = tmp;
14417 
14418       while (last_bytes)
14419 	{
14420 	  mem = adjust_automodify_address (dstbase, QImode,
14421 					   plus_constant (Pmode, dst,
14422 							  last_bytes - 1),
14423 					   dstoffset + last_bytes - 1);
14424 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14425 
14426 	  if (--last_bytes)
14427 	    {
14428 	      tmp = gen_reg_rtx (SImode);
14429 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14430 	      part_bytes_reg = tmp;
14431 	    }
14432 	}
14433 
14434     }
14435   else
14436     {
14437       if (last_bytes > 1)
14438 	{
14439 	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14440 	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14441 	  last_bytes -= 2;
14442 	  if (last_bytes)
14443 	    {
14444 	      rtx tmp = gen_reg_rtx (SImode);
14445 	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14446 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14447 	      part_bytes_reg = tmp;
14448 	      dstoffset += 2;
14449 	    }
14450 	}
14451 
14452       if (last_bytes)
14453 	{
14454 	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14455 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14456 	}
14457     }
14458 
14459   return 1;
14460 }
14461 
14462 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14463 by mode size.  */
14464 inline static rtx
14465 next_consecutive_mem (rtx mem)
14466 {
14467   machine_mode mode = GET_MODE (mem);
14468   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14469   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14470 
14471   return adjust_automodify_address (mem, mode, addr, offset);
14472 }
14473 
14474 /* Copy using LDRD/STRD instructions whenever possible.
14475    Returns true upon success. */
14476 bool
14477 gen_movmem_ldrd_strd (rtx *operands)
14478 {
14479   unsigned HOST_WIDE_INT len;
14480   HOST_WIDE_INT align;
14481   rtx src, dst, base;
14482   rtx reg0;
14483   bool src_aligned, dst_aligned;
14484   bool src_volatile, dst_volatile;
14485 
14486   gcc_assert (CONST_INT_P (operands[2]));
14487   gcc_assert (CONST_INT_P (operands[3]));
14488 
14489   len = UINTVAL (operands[2]);
14490   if (len > 64)
14491     return false;
14492 
14493   /* Maximum alignment we can assume for both src and dst buffers.  */
14494   align = INTVAL (operands[3]);
14495 
14496   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14497     return false;
14498 
14499   /* Place src and dst addresses in registers
14500      and update the corresponding mem rtx.  */
14501   dst = operands[0];
14502   dst_volatile = MEM_VOLATILE_P (dst);
14503   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14504   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14505   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14506 
14507   src = operands[1];
14508   src_volatile = MEM_VOLATILE_P (src);
14509   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14510   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14511   src = adjust_automodify_address (src, VOIDmode, base, 0);
14512 
14513   if (!unaligned_access && !(src_aligned && dst_aligned))
14514     return false;
14515 
14516   if (src_volatile || dst_volatile)
14517     return false;
14518 
14519   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14520   if (!(dst_aligned || src_aligned))
14521     return arm_gen_movmemqi (operands);
14522 
14523   /* If the either src or dst is unaligned we'll be accessing it as pairs
14524      of unaligned SImode accesses.  Otherwise we can generate DImode
14525      ldrd/strd instructions.  */
14526   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14527   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14528 
14529   while (len >= 8)
14530     {
14531       len -= 8;
14532       reg0 = gen_reg_rtx (DImode);
14533       rtx low_reg = NULL_RTX;
14534       rtx hi_reg = NULL_RTX;
14535 
14536       if (!src_aligned || !dst_aligned)
14537 	{
14538 	  low_reg = gen_lowpart (SImode, reg0);
14539 	  hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14540 	}
14541       if (src_aligned)
14542         emit_move_insn (reg0, src);
14543       else
14544 	{
14545 	  emit_insn (gen_unaligned_loadsi (low_reg, src));
14546 	  src = next_consecutive_mem (src);
14547 	  emit_insn (gen_unaligned_loadsi (hi_reg, src));
14548 	}
14549 
14550       if (dst_aligned)
14551         emit_move_insn (dst, reg0);
14552       else
14553 	{
14554 	  emit_insn (gen_unaligned_storesi (dst, low_reg));
14555 	  dst = next_consecutive_mem (dst);
14556 	  emit_insn (gen_unaligned_storesi (dst, hi_reg));
14557 	}
14558 
14559       src = next_consecutive_mem (src);
14560       dst = next_consecutive_mem (dst);
14561     }
14562 
14563   gcc_assert (len < 8);
14564   if (len >= 4)
14565     {
14566       /* More than a word but less than a double-word to copy.  Copy a word.  */
14567       reg0 = gen_reg_rtx (SImode);
14568       src = adjust_address (src, SImode, 0);
14569       dst = adjust_address (dst, SImode, 0);
14570       if (src_aligned)
14571         emit_move_insn (reg0, src);
14572       else
14573         emit_insn (gen_unaligned_loadsi (reg0, src));
14574 
14575       if (dst_aligned)
14576         emit_move_insn (dst, reg0);
14577       else
14578         emit_insn (gen_unaligned_storesi (dst, reg0));
14579 
14580       src = next_consecutive_mem (src);
14581       dst = next_consecutive_mem (dst);
14582       len -= 4;
14583     }
14584 
14585   if (len == 0)
14586     return true;
14587 
14588   /* Copy the remaining bytes.  */
14589   if (len >= 2)
14590     {
14591       dst = adjust_address (dst, HImode, 0);
14592       src = adjust_address (src, HImode, 0);
14593       reg0 = gen_reg_rtx (SImode);
14594       if (src_aligned)
14595         emit_insn (gen_zero_extendhisi2 (reg0, src));
14596       else
14597         emit_insn (gen_unaligned_loadhiu (reg0, src));
14598 
14599       if (dst_aligned)
14600         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14601       else
14602         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14603 
14604       src = next_consecutive_mem (src);
14605       dst = next_consecutive_mem (dst);
14606       if (len == 2)
14607         return true;
14608     }
14609 
14610   dst = adjust_address (dst, QImode, 0);
14611   src = adjust_address (src, QImode, 0);
14612   reg0 = gen_reg_rtx (QImode);
14613   emit_move_insn (reg0, src);
14614   emit_move_insn (dst, reg0);
14615   return true;
14616 }
14617 
14618 /* Select a dominance comparison mode if possible for a test of the general
14619    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14620    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14621    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14622    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14623    In all cases OP will be either EQ or NE, but we don't need to know which
14624    here.  If we are unable to support a dominance comparison we return
14625    CC mode.  This will then fail to match for the RTL expressions that
14626    generate this call.  */
14627 machine_mode
14628 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14629 {
14630   enum rtx_code cond1, cond2;
14631   int swapped = 0;
14632 
14633   /* Currently we will probably get the wrong result if the individual
14634      comparisons are not simple.  This also ensures that it is safe to
14635      reverse a comparison if necessary.  */
14636   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14637        != CCmode)
14638       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14639 	  != CCmode))
14640     return CCmode;
14641 
14642   /* The if_then_else variant of this tests the second condition if the
14643      first passes, but is true if the first fails.  Reverse the first
14644      condition to get a true "inclusive-or" expression.  */
14645   if (cond_or == DOM_CC_NX_OR_Y)
14646     cond1 = reverse_condition (cond1);
14647 
14648   /* If the comparisons are not equal, and one doesn't dominate the other,
14649      then we can't do this.  */
14650   if (cond1 != cond2
14651       && !comparison_dominates_p (cond1, cond2)
14652       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14653     return CCmode;
14654 
14655   if (swapped)
14656     std::swap (cond1, cond2);
14657 
14658   switch (cond1)
14659     {
14660     case EQ:
14661       if (cond_or == DOM_CC_X_AND_Y)
14662 	return CC_DEQmode;
14663 
14664       switch (cond2)
14665 	{
14666 	case EQ: return CC_DEQmode;
14667 	case LE: return CC_DLEmode;
14668 	case LEU: return CC_DLEUmode;
14669 	case GE: return CC_DGEmode;
14670 	case GEU: return CC_DGEUmode;
14671 	default: gcc_unreachable ();
14672 	}
14673 
14674     case LT:
14675       if (cond_or == DOM_CC_X_AND_Y)
14676 	return CC_DLTmode;
14677 
14678       switch (cond2)
14679 	{
14680 	case  LT:
14681 	    return CC_DLTmode;
14682 	case LE:
14683 	  return CC_DLEmode;
14684 	case NE:
14685 	  return CC_DNEmode;
14686 	default:
14687 	  gcc_unreachable ();
14688 	}
14689 
14690     case GT:
14691       if (cond_or == DOM_CC_X_AND_Y)
14692 	return CC_DGTmode;
14693 
14694       switch (cond2)
14695 	{
14696 	case GT:
14697 	  return CC_DGTmode;
14698 	case GE:
14699 	  return CC_DGEmode;
14700 	case NE:
14701 	  return CC_DNEmode;
14702 	default:
14703 	  gcc_unreachable ();
14704 	}
14705 
14706     case LTU:
14707       if (cond_or == DOM_CC_X_AND_Y)
14708 	return CC_DLTUmode;
14709 
14710       switch (cond2)
14711 	{
14712 	case LTU:
14713 	  return CC_DLTUmode;
14714 	case LEU:
14715 	  return CC_DLEUmode;
14716 	case NE:
14717 	  return CC_DNEmode;
14718 	default:
14719 	  gcc_unreachable ();
14720 	}
14721 
14722     case GTU:
14723       if (cond_or == DOM_CC_X_AND_Y)
14724 	return CC_DGTUmode;
14725 
14726       switch (cond2)
14727 	{
14728 	case GTU:
14729 	  return CC_DGTUmode;
14730 	case GEU:
14731 	  return CC_DGEUmode;
14732 	case NE:
14733 	  return CC_DNEmode;
14734 	default:
14735 	  gcc_unreachable ();
14736 	}
14737 
14738     /* The remaining cases only occur when both comparisons are the
14739        same.  */
14740     case NE:
14741       gcc_assert (cond1 == cond2);
14742       return CC_DNEmode;
14743 
14744     case LE:
14745       gcc_assert (cond1 == cond2);
14746       return CC_DLEmode;
14747 
14748     case GE:
14749       gcc_assert (cond1 == cond2);
14750       return CC_DGEmode;
14751 
14752     case LEU:
14753       gcc_assert (cond1 == cond2);
14754       return CC_DLEUmode;
14755 
14756     case GEU:
14757       gcc_assert (cond1 == cond2);
14758       return CC_DGEUmode;
14759 
14760     default:
14761       gcc_unreachable ();
14762     }
14763 }
14764 
14765 machine_mode
14766 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14767 {
14768   /* All floating point compares return CCFP if it is an equality
14769      comparison, and CCFPE otherwise.  */
14770   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14771     {
14772       switch (op)
14773 	{
14774 	case EQ:
14775 	case NE:
14776 	case UNORDERED:
14777 	case ORDERED:
14778 	case UNLT:
14779 	case UNLE:
14780 	case UNGT:
14781 	case UNGE:
14782 	case UNEQ:
14783 	case LTGT:
14784 	  return CCFPmode;
14785 
14786 	case LT:
14787 	case LE:
14788 	case GT:
14789 	case GE:
14790 	  return CCFPEmode;
14791 
14792 	default:
14793 	  gcc_unreachable ();
14794 	}
14795     }
14796 
14797   /* A compare with a shifted operand.  Because of canonicalization, the
14798      comparison will have to be swapped when we emit the assembler.  */
14799   if (GET_MODE (y) == SImode
14800       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14801       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14802 	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14803 	  || GET_CODE (x) == ROTATERT))
14804     return CC_SWPmode;
14805 
14806   /* This operation is performed swapped, but since we only rely on the Z
14807      flag we don't need an additional mode.  */
14808   if (GET_MODE (y) == SImode
14809       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14810       && GET_CODE (x) == NEG
14811       && (op ==	EQ || op == NE))
14812     return CC_Zmode;
14813 
14814   /* This is a special case that is used by combine to allow a
14815      comparison of a shifted byte load to be split into a zero-extend
14816      followed by a comparison of the shifted integer (only valid for
14817      equalities and unsigned inequalities).  */
14818   if (GET_MODE (x) == SImode
14819       && GET_CODE (x) == ASHIFT
14820       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14821       && GET_CODE (XEXP (x, 0)) == SUBREG
14822       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14823       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14824       && (op == EQ || op == NE
14825 	  || op == GEU || op == GTU || op == LTU || op == LEU)
14826       && CONST_INT_P (y))
14827     return CC_Zmode;
14828 
14829   /* A construct for a conditional compare, if the false arm contains
14830      0, then both conditions must be true, otherwise either condition
14831      must be true.  Not all conditions are possible, so CCmode is
14832      returned if it can't be done.  */
14833   if (GET_CODE (x) == IF_THEN_ELSE
14834       && (XEXP (x, 2) == const0_rtx
14835 	  || XEXP (x, 2) == const1_rtx)
14836       && COMPARISON_P (XEXP (x, 0))
14837       && COMPARISON_P (XEXP (x, 1)))
14838     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14839 					 INTVAL (XEXP (x, 2)));
14840 
14841   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14842   if (GET_CODE (x) == AND
14843       && (op == EQ || op == NE)
14844       && COMPARISON_P (XEXP (x, 0))
14845       && COMPARISON_P (XEXP (x, 1)))
14846     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14847 					 DOM_CC_X_AND_Y);
14848 
14849   if (GET_CODE (x) == IOR
14850       && (op == EQ || op == NE)
14851       && COMPARISON_P (XEXP (x, 0))
14852       && COMPARISON_P (XEXP (x, 1)))
14853     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14854 					 DOM_CC_X_OR_Y);
14855 
14856   /* An operation (on Thumb) where we want to test for a single bit.
14857      This is done by shifting that bit up into the top bit of a
14858      scratch register; we can then branch on the sign bit.  */
14859   if (TARGET_THUMB1
14860       && GET_MODE (x) == SImode
14861       && (op == EQ || op == NE)
14862       && GET_CODE (x) == ZERO_EXTRACT
14863       && XEXP (x, 1) == const1_rtx)
14864     return CC_Nmode;
14865 
14866   /* An operation that sets the condition codes as a side-effect, the
14867      V flag is not set correctly, so we can only use comparisons where
14868      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14869      instead.)  */
14870   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14871   if (GET_MODE (x) == SImode
14872       && y == const0_rtx
14873       && (op == EQ || op == NE || op == LT || op == GE)
14874       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14875 	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
14876 	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14877 	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14878 	  || GET_CODE (x) == LSHIFTRT
14879 	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14880 	  || GET_CODE (x) == ROTATERT
14881 	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14882     return CC_NOOVmode;
14883 
14884   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14885     return CC_Zmode;
14886 
14887   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14888       && GET_CODE (x) == PLUS
14889       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14890     return CC_Cmode;
14891 
14892   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14893     {
14894       switch (op)
14895 	{
14896 	case EQ:
14897 	case NE:
14898 	  /* A DImode comparison against zero can be implemented by
14899 	     or'ing the two halves together.  */
14900 	  if (y == const0_rtx)
14901 	    return CC_Zmode;
14902 
14903 	  /* We can do an equality test in three Thumb instructions.  */
14904 	  if (!TARGET_32BIT)
14905 	    return CC_Zmode;
14906 
14907 	  /* FALLTHROUGH */
14908 
14909 	case LTU:
14910 	case LEU:
14911 	case GTU:
14912 	case GEU:
14913 	  /* DImode unsigned comparisons can be implemented by cmp +
14914 	     cmpeq without a scratch register.  Not worth doing in
14915 	     Thumb-2.  */
14916 	  if (TARGET_32BIT)
14917 	    return CC_CZmode;
14918 
14919 	  /* FALLTHROUGH */
14920 
14921 	case LT:
14922 	case LE:
14923 	case GT:
14924 	case GE:
14925 	  /* DImode signed and unsigned comparisons can be implemented
14926 	     by cmp + sbcs with a scratch register, but that does not
14927 	     set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14928 	  gcc_assert (op != EQ && op != NE);
14929 	  return CC_NCVmode;
14930 
14931 	default:
14932 	  gcc_unreachable ();
14933 	}
14934     }
14935 
14936   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14937     return GET_MODE (x);
14938 
14939   return CCmode;
14940 }
14941 
14942 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14943    return the rtx for register 0 in the proper mode.  FP means this is a
14944    floating point compare: I don't think that it is needed on the arm.  */
14945 rtx
14946 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14947 {
14948   machine_mode mode;
14949   rtx cc_reg;
14950   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14951 
14952   /* We might have X as a constant, Y as a register because of the predicates
14953      used for cmpdi.  If so, force X to a register here.  */
14954   if (dimode_comparison && !REG_P (x))
14955     x = force_reg (DImode, x);
14956 
14957   mode = SELECT_CC_MODE (code, x, y);
14958   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14959 
14960   if (dimode_comparison
14961       && mode != CC_CZmode)
14962     {
14963       rtx clobber, set;
14964 
14965       /* To compare two non-zero values for equality, XOR them and
14966 	 then compare against zero.  Not used for ARM mode; there
14967 	 CC_CZmode is cheaper.  */
14968       if (mode == CC_Zmode && y != const0_rtx)
14969 	{
14970 	  gcc_assert (!reload_completed);
14971 	  x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14972 	  y = const0_rtx;
14973 	}
14974 
14975       /* A scratch register is required.  */
14976       if (reload_completed)
14977 	gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14978       else
14979 	scratch = gen_rtx_SCRATCH (SImode);
14980 
14981       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14982       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14983       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14984     }
14985   else
14986     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14987 
14988   return cc_reg;
14989 }
14990 
14991 /* Generate a sequence of insns that will generate the correct return
14992    address mask depending on the physical architecture that the program
14993    is running on.  */
14994 rtx
14995 arm_gen_return_addr_mask (void)
14996 {
14997   rtx reg = gen_reg_rtx (Pmode);
14998 
14999   emit_insn (gen_return_addr_mask (reg));
15000   return reg;
15001 }
15002 
15003 void
15004 arm_reload_in_hi (rtx *operands)
15005 {
15006   rtx ref = operands[1];
15007   rtx base, scratch;
15008   HOST_WIDE_INT offset = 0;
15009 
15010   if (GET_CODE (ref) == SUBREG)
15011     {
15012       offset = SUBREG_BYTE (ref);
15013       ref = SUBREG_REG (ref);
15014     }
15015 
15016   if (REG_P (ref))
15017     {
15018       /* We have a pseudo which has been spilt onto the stack; there
15019 	 are two cases here: the first where there is a simple
15020 	 stack-slot replacement and a second where the stack-slot is
15021 	 out of range, or is used as a subreg.  */
15022       if (reg_equiv_mem (REGNO (ref)))
15023 	{
15024 	  ref = reg_equiv_mem (REGNO (ref));
15025 	  base = find_replacement (&XEXP (ref, 0));
15026 	}
15027       else
15028 	/* The slot is out of range, or was dressed up in a SUBREG.  */
15029 	base = reg_equiv_address (REGNO (ref));
15030 
15031       /* PR 62554: If there is no equivalent memory location then just move
15032 	 the value as an SImode register move.  This happens when the target
15033 	 architecture variant does not have an HImode register move.  */
15034       if (base == NULL)
15035 	{
15036 	  gcc_assert (REG_P (operands[0]));
15037 	  emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15038 				gen_rtx_SUBREG (SImode, ref, 0)));
15039 	  return;
15040 	}
15041     }
15042   else
15043     base = find_replacement (&XEXP (ref, 0));
15044 
15045   /* Handle the case where the address is too complex to be offset by 1.  */
15046   if (GET_CODE (base) == MINUS
15047       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15048     {
15049       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15050 
15051       emit_set_insn (base_plus, base);
15052       base = base_plus;
15053     }
15054   else if (GET_CODE (base) == PLUS)
15055     {
15056       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15057       HOST_WIDE_INT hi, lo;
15058 
15059       offset += INTVAL (XEXP (base, 1));
15060       base = XEXP (base, 0);
15061 
15062       /* Rework the address into a legal sequence of insns.  */
15063       /* Valid range for lo is -4095 -> 4095 */
15064       lo = (offset >= 0
15065 	    ? (offset & 0xfff)
15066 	    : -((-offset) & 0xfff));
15067 
15068       /* Corner case, if lo is the max offset then we would be out of range
15069 	 once we have added the additional 1 below, so bump the msb into the
15070 	 pre-loading insn(s).  */
15071       if (lo == 4095)
15072 	lo &= 0x7ff;
15073 
15074       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15075 	     ^ (HOST_WIDE_INT) 0x80000000)
15076 	    - (HOST_WIDE_INT) 0x80000000);
15077 
15078       gcc_assert (hi + lo == offset);
15079 
15080       if (hi != 0)
15081 	{
15082 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15083 
15084 	  /* Get the base address; addsi3 knows how to handle constants
15085 	     that require more than one insn.  */
15086 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15087 	  base = base_plus;
15088 	  offset = lo;
15089 	}
15090     }
15091 
15092   /* Operands[2] may overlap operands[0] (though it won't overlap
15093      operands[1]), that's why we asked for a DImode reg -- so we can
15094      use the bit that does not overlap.  */
15095   if (REGNO (operands[2]) == REGNO (operands[0]))
15096     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15097   else
15098     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15099 
15100   emit_insn (gen_zero_extendqisi2 (scratch,
15101 				   gen_rtx_MEM (QImode,
15102 						plus_constant (Pmode, base,
15103 							       offset))));
15104   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15105 				   gen_rtx_MEM (QImode,
15106 						plus_constant (Pmode, base,
15107 							       offset + 1))));
15108   if (!BYTES_BIG_ENDIAN)
15109     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15110 		   gen_rtx_IOR (SImode,
15111 				gen_rtx_ASHIFT
15112 				(SImode,
15113 				 gen_rtx_SUBREG (SImode, operands[0], 0),
15114 				 GEN_INT (8)),
15115 				scratch));
15116   else
15117     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15118 		   gen_rtx_IOR (SImode,
15119 				gen_rtx_ASHIFT (SImode, scratch,
15120 						GEN_INT (8)),
15121 				gen_rtx_SUBREG (SImode, operands[0], 0)));
15122 }
15123 
15124 /* Handle storing a half-word to memory during reload by synthesizing as two
15125    byte stores.  Take care not to clobber the input values until after we
15126    have moved them somewhere safe.  This code assumes that if the DImode
15127    scratch in operands[2] overlaps either the input value or output address
15128    in some way, then that value must die in this insn (we absolutely need
15129    two scratch registers for some corner cases).  */
15130 void
15131 arm_reload_out_hi (rtx *operands)
15132 {
15133   rtx ref = operands[0];
15134   rtx outval = operands[1];
15135   rtx base, scratch;
15136   HOST_WIDE_INT offset = 0;
15137 
15138   if (GET_CODE (ref) == SUBREG)
15139     {
15140       offset = SUBREG_BYTE (ref);
15141       ref = SUBREG_REG (ref);
15142     }
15143 
15144   if (REG_P (ref))
15145     {
15146       /* We have a pseudo which has been spilt onto the stack; there
15147 	 are two cases here: the first where there is a simple
15148 	 stack-slot replacement and a second where the stack-slot is
15149 	 out of range, or is used as a subreg.  */
15150       if (reg_equiv_mem (REGNO (ref)))
15151 	{
15152 	  ref = reg_equiv_mem (REGNO (ref));
15153 	  base = find_replacement (&XEXP (ref, 0));
15154 	}
15155       else
15156 	/* The slot is out of range, or was dressed up in a SUBREG.  */
15157 	base = reg_equiv_address (REGNO (ref));
15158 
15159       /* PR 62254: If there is no equivalent memory location then just move
15160 	 the value as an SImode register move.  This happens when the target
15161 	 architecture variant does not have an HImode register move.  */
15162       if (base == NULL)
15163 	{
15164 	  gcc_assert (REG_P (outval) || SUBREG_P (outval));
15165 
15166 	  if (REG_P (outval))
15167 	    {
15168 	      emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15169 				    gen_rtx_SUBREG (SImode, outval, 0)));
15170 	    }
15171 	  else /* SUBREG_P (outval)  */
15172 	    {
15173 	      if (GET_MODE (SUBREG_REG (outval)) == SImode)
15174 		emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15175 				      SUBREG_REG (outval)));
15176 	      else
15177 		/* FIXME: Handle other cases ?  */
15178 		gcc_unreachable ();
15179 	    }
15180 	  return;
15181 	}
15182     }
15183   else
15184     base = find_replacement (&XEXP (ref, 0));
15185 
15186   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15187 
15188   /* Handle the case where the address is too complex to be offset by 1.  */
15189   if (GET_CODE (base) == MINUS
15190       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15191     {
15192       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15193 
15194       /* Be careful not to destroy OUTVAL.  */
15195       if (reg_overlap_mentioned_p (base_plus, outval))
15196 	{
15197 	  /* Updating base_plus might destroy outval, see if we can
15198 	     swap the scratch and base_plus.  */
15199 	  if (!reg_overlap_mentioned_p (scratch, outval))
15200 	    std::swap (scratch, base_plus);
15201 	  else
15202 	    {
15203 	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15204 
15205 	      /* Be conservative and copy OUTVAL into the scratch now,
15206 		 this should only be necessary if outval is a subreg
15207 		 of something larger than a word.  */
15208 	      /* XXX Might this clobber base?  I can't see how it can,
15209 		 since scratch is known to overlap with OUTVAL, and
15210 		 must be wider than a word.  */
15211 	      emit_insn (gen_movhi (scratch_hi, outval));
15212 	      outval = scratch_hi;
15213 	    }
15214 	}
15215 
15216       emit_set_insn (base_plus, base);
15217       base = base_plus;
15218     }
15219   else if (GET_CODE (base) == PLUS)
15220     {
15221       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15222       HOST_WIDE_INT hi, lo;
15223 
15224       offset += INTVAL (XEXP (base, 1));
15225       base = XEXP (base, 0);
15226 
15227       /* Rework the address into a legal sequence of insns.  */
15228       /* Valid range for lo is -4095 -> 4095 */
15229       lo = (offset >= 0
15230 	    ? (offset & 0xfff)
15231 	    : -((-offset) & 0xfff));
15232 
15233       /* Corner case, if lo is the max offset then we would be out of range
15234 	 once we have added the additional 1 below, so bump the msb into the
15235 	 pre-loading insn(s).  */
15236       if (lo == 4095)
15237 	lo &= 0x7ff;
15238 
15239       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15240 	     ^ (HOST_WIDE_INT) 0x80000000)
15241 	    - (HOST_WIDE_INT) 0x80000000);
15242 
15243       gcc_assert (hi + lo == offset);
15244 
15245       if (hi != 0)
15246 	{
15247 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15248 
15249 	  /* Be careful not to destroy OUTVAL.  */
15250 	  if (reg_overlap_mentioned_p (base_plus, outval))
15251 	    {
15252 	      /* Updating base_plus might destroy outval, see if we
15253 		 can swap the scratch and base_plus.  */
15254 	      if (!reg_overlap_mentioned_p (scratch, outval))
15255 	        std::swap (scratch, base_plus);
15256 	      else
15257 		{
15258 		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15259 
15260 		  /* Be conservative and copy outval into scratch now,
15261 		     this should only be necessary if outval is a
15262 		     subreg of something larger than a word.  */
15263 		  /* XXX Might this clobber base?  I can't see how it
15264 		     can, since scratch is known to overlap with
15265 		     outval.  */
15266 		  emit_insn (gen_movhi (scratch_hi, outval));
15267 		  outval = scratch_hi;
15268 		}
15269 	    }
15270 
15271 	  /* Get the base address; addsi3 knows how to handle constants
15272 	     that require more than one insn.  */
15273 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15274 	  base = base_plus;
15275 	  offset = lo;
15276 	}
15277     }
15278 
15279   if (BYTES_BIG_ENDIAN)
15280     {
15281       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15282 					 plus_constant (Pmode, base,
15283 							offset + 1)),
15284 			    gen_lowpart (QImode, outval)));
15285       emit_insn (gen_lshrsi3 (scratch,
15286 			      gen_rtx_SUBREG (SImode, outval, 0),
15287 			      GEN_INT (8)));
15288       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15289 								offset)),
15290 			    gen_lowpart (QImode, scratch)));
15291     }
15292   else
15293     {
15294       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15295 								offset)),
15296 			    gen_lowpart (QImode, outval)));
15297       emit_insn (gen_lshrsi3 (scratch,
15298 			      gen_rtx_SUBREG (SImode, outval, 0),
15299 			      GEN_INT (8)));
15300       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15301 					 plus_constant (Pmode, base,
15302 							offset + 1)),
15303 			    gen_lowpart (QImode, scratch)));
15304     }
15305 }
15306 
15307 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15308    (padded to the size of a word) should be passed in a register.  */
15309 
15310 static bool
15311 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15312 {
15313   if (TARGET_AAPCS_BASED)
15314     return must_pass_in_stack_var_size (mode, type);
15315   else
15316     return must_pass_in_stack_var_size_or_pad (mode, type);
15317 }
15318 
15319 
15320 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15321    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15322    the default.  For AAPCS based ABIs small aggregate types are placed
15323    in the lowest memory address.  */
15324 
15325 static pad_direction
15326 arm_function_arg_padding (machine_mode mode, const_tree type)
15327 {
15328   if (!TARGET_AAPCS_BASED)
15329     return default_function_arg_padding (mode, type);
15330 
15331   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15332     return PAD_DOWNWARD;
15333 
15334   return PAD_UPWARD;
15335 }
15336 
15337 
15338 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15339    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15340    register has useful data, and return the opposite if the most
15341    significant byte does.  */
15342 
15343 bool
15344 arm_pad_reg_upward (machine_mode mode,
15345                     tree type, int first ATTRIBUTE_UNUSED)
15346 {
15347   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15348     {
15349       /* For AAPCS, small aggregates, small fixed-point types,
15350 	 and small complex types are always padded upwards.  */
15351       if (type)
15352 	{
15353 	  if ((AGGREGATE_TYPE_P (type)
15354 	       || TREE_CODE (type) == COMPLEX_TYPE
15355 	       || FIXED_POINT_TYPE_P (type))
15356 	      && int_size_in_bytes (type) <= 4)
15357 	    return true;
15358 	}
15359       else
15360 	{
15361 	  if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15362 	      && GET_MODE_SIZE (mode) <= 4)
15363 	    return true;
15364 	}
15365     }
15366 
15367   /* Otherwise, use default padding.  */
15368   return !BYTES_BIG_ENDIAN;
15369 }
15370 
15371 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15372    assuming that the address in the base register is word aligned.  */
15373 bool
15374 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15375 {
15376   HOST_WIDE_INT max_offset;
15377 
15378   /* Offset must be a multiple of 4 in Thumb mode.  */
15379   if (TARGET_THUMB2 && ((offset & 3) != 0))
15380     return false;
15381 
15382   if (TARGET_THUMB2)
15383     max_offset = 1020;
15384   else if (TARGET_ARM)
15385     max_offset = 255;
15386   else
15387     return false;
15388 
15389   return ((offset <= max_offset) && (offset >= -max_offset));
15390 }
15391 
15392 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15393    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15394    Assumes that the address in the base register RN is word aligned.  Pattern
15395    guarantees that both memory accesses use the same base register,
15396    the offsets are constants within the range, and the gap between the offsets is 4.
15397    If preload complete then check that registers are legal.  WBACK indicates whether
15398    address is updated.  LOAD indicates whether memory access is load or store.  */
15399 bool
15400 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15401                        bool wback, bool load)
15402 {
15403   unsigned int t, t2, n;
15404 
15405   if (!reload_completed)
15406     return true;
15407 
15408   if (!offset_ok_for_ldrd_strd (offset))
15409     return false;
15410 
15411   t = REGNO (rt);
15412   t2 = REGNO (rt2);
15413   n = REGNO (rn);
15414 
15415   if ((TARGET_THUMB2)
15416       && ((wback && (n == t || n == t2))
15417           || (t == SP_REGNUM)
15418           || (t == PC_REGNUM)
15419           || (t2 == SP_REGNUM)
15420           || (t2 == PC_REGNUM)
15421           || (!load && (n == PC_REGNUM))
15422           || (load && (t == t2))
15423           /* Triggers Cortex-M3 LDRD errata.  */
15424           || (!wback && load && fix_cm3_ldrd && (n == t))))
15425     return false;
15426 
15427   if ((TARGET_ARM)
15428       && ((wback && (n == t || n == t2))
15429           || (t2 == PC_REGNUM)
15430           || (t % 2 != 0)   /* First destination register is not even.  */
15431           || (t2 != t + 1)
15432           /* PC can be used as base register (for offset addressing only),
15433              but it is depricated.  */
15434           || (n == PC_REGNUM)))
15435     return false;
15436 
15437   return true;
15438 }
15439 
15440 /* Return true if a 64-bit access with alignment ALIGN and with a
15441    constant offset OFFSET from the base pointer is permitted on this
15442    architecture.  */
15443 static bool
15444 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15445 {
15446   return (unaligned_access
15447 	  ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15448 	  : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15449 }
15450 
15451 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15452    operand MEM's address contains an immediate offset from the base
15453    register and has no side effects, in which case it sets BASE,
15454    OFFSET and ALIGN accordingly.  */
15455 static bool
15456 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15457 {
15458   rtx addr;
15459 
15460   gcc_assert (base != NULL && offset != NULL);
15461 
15462   /* TODO: Handle more general memory operand patterns, such as
15463      PRE_DEC and PRE_INC.  */
15464 
15465   if (side_effects_p (mem))
15466     return false;
15467 
15468   /* Can't deal with subregs.  */
15469   if (GET_CODE (mem) == SUBREG)
15470     return false;
15471 
15472   gcc_assert (MEM_P (mem));
15473 
15474   *offset = const0_rtx;
15475   *align = MEM_ALIGN (mem);
15476 
15477   addr = XEXP (mem, 0);
15478 
15479   /* If addr isn't valid for DImode, then we can't handle it.  */
15480   if (!arm_legitimate_address_p (DImode, addr,
15481 				 reload_in_progress || reload_completed))
15482     return false;
15483 
15484   if (REG_P (addr))
15485     {
15486       *base = addr;
15487       return true;
15488     }
15489   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15490     {
15491       *base = XEXP (addr, 0);
15492       *offset = XEXP (addr, 1);
15493       return (REG_P (*base) && CONST_INT_P (*offset));
15494     }
15495 
15496   return false;
15497 }
15498 
15499 /* Called from a peephole2 to replace two word-size accesses with a
15500    single LDRD/STRD instruction.  Returns true iff we can generate a
15501    new instruction sequence.  That is, both accesses use the same base
15502    register and the gap between constant offsets is 4.  This function
15503    may reorder its operands to match ldrd/strd RTL templates.
15504    OPERANDS are the operands found by the peephole matcher;
15505    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15506    corresponding memory operands.  LOAD indicaates whether the access
15507    is load or store.  CONST_STORE indicates a store of constant
15508    integer values held in OPERANDS[4,5] and assumes that the pattern
15509    is of length 4 insn, for the purpose of checking dead registers.
15510    COMMUTE indicates that register operands may be reordered.  */
15511 bool
15512 gen_operands_ldrd_strd (rtx *operands, bool load,
15513                         bool const_store, bool commute)
15514 {
15515   int nops = 2;
15516   HOST_WIDE_INT offsets[2], offset, align[2];
15517   rtx base = NULL_RTX;
15518   rtx cur_base, cur_offset, tmp;
15519   int i, gap;
15520   HARD_REG_SET regset;
15521 
15522   gcc_assert (!const_store || !load);
15523   /* Check that the memory references are immediate offsets from the
15524      same base register.  Extract the base register, the destination
15525      registers, and the corresponding memory offsets.  */
15526   for (i = 0; i < nops; i++)
15527     {
15528       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15529 				 &align[i]))
15530         return false;
15531 
15532       if (i == 0)
15533         base = cur_base;
15534       else if (REGNO (base) != REGNO (cur_base))
15535         return false;
15536 
15537       offsets[i] = INTVAL (cur_offset);
15538       if (GET_CODE (operands[i]) == SUBREG)
15539         {
15540           tmp = SUBREG_REG (operands[i]);
15541           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15542           operands[i] = tmp;
15543         }
15544     }
15545 
15546   /* Make sure there is no dependency between the individual loads.  */
15547   if (load && REGNO (operands[0]) == REGNO (base))
15548     return false; /* RAW */
15549 
15550   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15551     return false; /* WAW */
15552 
15553   /* If the same input register is used in both stores
15554      when storing different constants, try to find a free register.
15555      For example, the code
15556 	mov r0, 0
15557 	str r0, [r2]
15558 	mov r0, 1
15559 	str r0, [r2, #4]
15560      can be transformed into
15561 	mov r1, 0
15562 	mov r0, 1
15563 	strd r1, r0, [r2]
15564      in Thumb mode assuming that r1 is free.
15565      For ARM mode do the same but only if the starting register
15566      can be made to be even.  */
15567   if (const_store
15568       && REGNO (operands[0]) == REGNO (operands[1])
15569       && INTVAL (operands[4]) != INTVAL (operands[5]))
15570     {
15571     if (TARGET_THUMB2)
15572       {
15573         CLEAR_HARD_REG_SET (regset);
15574         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15575         if (tmp == NULL_RTX)
15576           return false;
15577 
15578         /* Use the new register in the first load to ensure that
15579            if the original input register is not dead after peephole,
15580            then it will have the correct constant value.  */
15581         operands[0] = tmp;
15582       }
15583     else if (TARGET_ARM)
15584       {
15585         int regno = REGNO (operands[0]);
15586         if (!peep2_reg_dead_p (4, operands[0]))
15587           {
15588             /* When the input register is even and is not dead after the
15589                pattern, it has to hold the second constant but we cannot
15590                form a legal STRD in ARM mode with this register as the second
15591                register.  */
15592             if (regno % 2 == 0)
15593               return false;
15594 
15595             /* Is regno-1 free? */
15596             SET_HARD_REG_SET (regset);
15597             CLEAR_HARD_REG_BIT(regset, regno - 1);
15598             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15599             if (tmp == NULL_RTX)
15600               return false;
15601 
15602             operands[0] = tmp;
15603           }
15604         else
15605           {
15606             /* Find a DImode register.  */
15607             CLEAR_HARD_REG_SET (regset);
15608             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15609             if (tmp != NULL_RTX)
15610               {
15611                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15612                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15613               }
15614             else
15615               {
15616                 /* Can we use the input register to form a DI register?  */
15617                 SET_HARD_REG_SET (regset);
15618                 CLEAR_HARD_REG_BIT(regset,
15619                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15620                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15621                 if (tmp == NULL_RTX)
15622                   return false;
15623                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15624               }
15625           }
15626 
15627         gcc_assert (operands[0] != NULL_RTX);
15628         gcc_assert (operands[1] != NULL_RTX);
15629         gcc_assert (REGNO (operands[0]) % 2 == 0);
15630         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15631       }
15632     }
15633 
15634   /* Make sure the instructions are ordered with lower memory access first.  */
15635   if (offsets[0] > offsets[1])
15636     {
15637       gap = offsets[0] - offsets[1];
15638       offset = offsets[1];
15639 
15640       /* Swap the instructions such that lower memory is accessed first.  */
15641       std::swap (operands[0], operands[1]);
15642       std::swap (operands[2], operands[3]);
15643       std::swap (align[0], align[1]);
15644       if (const_store)
15645         std::swap (operands[4], operands[5]);
15646     }
15647   else
15648     {
15649       gap = offsets[1] - offsets[0];
15650       offset = offsets[0];
15651     }
15652 
15653   /* Make sure accesses are to consecutive memory locations.  */
15654   if (gap != 4)
15655     return false;
15656 
15657   if (!align_ok_ldrd_strd (align[0], offset))
15658     return false;
15659 
15660   /* Make sure we generate legal instructions.  */
15661   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15662                              false, load))
15663     return true;
15664 
15665   /* In Thumb state, where registers are almost unconstrained, there
15666      is little hope to fix it.  */
15667   if (TARGET_THUMB2)
15668     return false;
15669 
15670   if (load && commute)
15671     {
15672       /* Try reordering registers.  */
15673       std::swap (operands[0], operands[1]);
15674       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15675                                  false, load))
15676         return true;
15677     }
15678 
15679   if (const_store)
15680     {
15681       /* If input registers are dead after this pattern, they can be
15682          reordered or replaced by other registers that are free in the
15683          current pattern.  */
15684       if (!peep2_reg_dead_p (4, operands[0])
15685           || !peep2_reg_dead_p (4, operands[1]))
15686         return false;
15687 
15688       /* Try to reorder the input registers.  */
15689       /* For example, the code
15690            mov r0, 0
15691            mov r1, 1
15692            str r1, [r2]
15693            str r0, [r2, #4]
15694          can be transformed into
15695            mov r1, 0
15696            mov r0, 1
15697            strd r0, [r2]
15698       */
15699       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15700                                   false, false))
15701         {
15702           std::swap (operands[0], operands[1]);
15703           return true;
15704         }
15705 
15706       /* Try to find a free DI register.  */
15707       CLEAR_HARD_REG_SET (regset);
15708       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15709       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15710       while (true)
15711         {
15712           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15713           if (tmp == NULL_RTX)
15714             return false;
15715 
15716           /* DREG must be an even-numbered register in DImode.
15717              Split it into SI registers.  */
15718           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15719           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15720           gcc_assert (operands[0] != NULL_RTX);
15721           gcc_assert (operands[1] != NULL_RTX);
15722           gcc_assert (REGNO (operands[0]) % 2 == 0);
15723           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15724 
15725           return (operands_ok_ldrd_strd (operands[0], operands[1],
15726                                          base, offset,
15727                                          false, load));
15728         }
15729     }
15730 
15731   return false;
15732 }
15733 
15734 
15735 
15736 
15737 /* Print a symbolic form of X to the debug file, F.  */
15738 static void
15739 arm_print_value (FILE *f, rtx x)
15740 {
15741   switch (GET_CODE (x))
15742     {
15743     case CONST_INT:
15744       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15745       return;
15746 
15747     case CONST_DOUBLE:
15748       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15749       return;
15750 
15751     case CONST_VECTOR:
15752       {
15753 	int i;
15754 
15755 	fprintf (f, "<");
15756 	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15757 	  {
15758 	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15759 	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
15760 	      fputc (',', f);
15761 	  }
15762 	fprintf (f, ">");
15763       }
15764       return;
15765 
15766     case CONST_STRING:
15767       fprintf (f, "\"%s\"", XSTR (x, 0));
15768       return;
15769 
15770     case SYMBOL_REF:
15771       fprintf (f, "`%s'", XSTR (x, 0));
15772       return;
15773 
15774     case LABEL_REF:
15775       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15776       return;
15777 
15778     case CONST:
15779       arm_print_value (f, XEXP (x, 0));
15780       return;
15781 
15782     case PLUS:
15783       arm_print_value (f, XEXP (x, 0));
15784       fprintf (f, "+");
15785       arm_print_value (f, XEXP (x, 1));
15786       return;
15787 
15788     case PC:
15789       fprintf (f, "pc");
15790       return;
15791 
15792     default:
15793       fprintf (f, "????");
15794       return;
15795     }
15796 }
15797 
15798 /* Routines for manipulation of the constant pool.  */
15799 
15800 /* Arm instructions cannot load a large constant directly into a
15801    register; they have to come from a pc relative load.  The constant
15802    must therefore be placed in the addressable range of the pc
15803    relative load.  Depending on the precise pc relative load
15804    instruction the range is somewhere between 256 bytes and 4k.  This
15805    means that we often have to dump a constant inside a function, and
15806    generate code to branch around it.
15807 
15808    It is important to minimize this, since the branches will slow
15809    things down and make the code larger.
15810 
15811    Normally we can hide the table after an existing unconditional
15812    branch so that there is no interruption of the flow, but in the
15813    worst case the code looks like this:
15814 
15815 	ldr	rn, L1
15816 	...
15817 	b	L2
15818 	align
15819 	L1:	.long value
15820 	L2:
15821 	...
15822 
15823 	ldr	rn, L3
15824 	...
15825 	b	L4
15826 	align
15827 	L3:	.long value
15828 	L4:
15829 	...
15830 
15831    We fix this by performing a scan after scheduling, which notices
15832    which instructions need to have their operands fetched from the
15833    constant table and builds the table.
15834 
15835    The algorithm starts by building a table of all the constants that
15836    need fixing up and all the natural barriers in the function (places
15837    where a constant table can be dropped without breaking the flow).
15838    For each fixup we note how far the pc-relative replacement will be
15839    able to reach and the offset of the instruction into the function.
15840 
15841    Having built the table we then group the fixes together to form
15842    tables that are as large as possible (subject to addressing
15843    constraints) and emit each table of constants after the last
15844    barrier that is within range of all the instructions in the group.
15845    If a group does not contain a barrier, then we forcibly create one
15846    by inserting a jump instruction into the flow.  Once the table has
15847    been inserted, the insns are then modified to reference the
15848    relevant entry in the pool.
15849 
15850    Possible enhancements to the algorithm (not implemented) are:
15851 
15852    1) For some processors and object formats, there may be benefit in
15853    aligning the pools to the start of cache lines; this alignment
15854    would need to be taken into account when calculating addressability
15855    of a pool.  */
15856 
15857 /* These typedefs are located at the start of this file, so that
15858    they can be used in the prototypes there.  This comment is to
15859    remind readers of that fact so that the following structures
15860    can be understood more easily.
15861 
15862      typedef struct minipool_node    Mnode;
15863      typedef struct minipool_fixup   Mfix;  */
15864 
15865 struct minipool_node
15866 {
15867   /* Doubly linked chain of entries.  */
15868   Mnode * next;
15869   Mnode * prev;
15870   /* The maximum offset into the code that this entry can be placed.  While
15871      pushing fixes for forward references, all entries are sorted in order
15872      of increasing max_address.  */
15873   HOST_WIDE_INT max_address;
15874   /* Similarly for an entry inserted for a backwards ref.  */
15875   HOST_WIDE_INT min_address;
15876   /* The number of fixes referencing this entry.  This can become zero
15877      if we "unpush" an entry.  In this case we ignore the entry when we
15878      come to emit the code.  */
15879   int refcount;
15880   /* The offset from the start of the minipool.  */
15881   HOST_WIDE_INT offset;
15882   /* The value in table.  */
15883   rtx value;
15884   /* The mode of value.  */
15885   machine_mode mode;
15886   /* The size of the value.  With iWMMXt enabled
15887      sizes > 4 also imply an alignment of 8-bytes.  */
15888   int fix_size;
15889 };
15890 
15891 struct minipool_fixup
15892 {
15893   Mfix *            next;
15894   rtx_insn *        insn;
15895   HOST_WIDE_INT     address;
15896   rtx *             loc;
15897   machine_mode mode;
15898   int               fix_size;
15899   rtx               value;
15900   Mnode *           minipool;
15901   HOST_WIDE_INT     forwards;
15902   HOST_WIDE_INT     backwards;
15903 };
15904 
15905 /* Fixes less than a word need padding out to a word boundary.  */
15906 #define MINIPOOL_FIX_SIZE(mode) \
15907   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15908 
15909 static Mnode *	minipool_vector_head;
15910 static Mnode *	minipool_vector_tail;
15911 static rtx_code_label	*minipool_vector_label;
15912 static int	minipool_pad;
15913 
15914 /* The linked list of all minipool fixes required for this function.  */
15915 Mfix * 		minipool_fix_head;
15916 Mfix * 		minipool_fix_tail;
15917 /* The fix entry for the current minipool, once it has been placed.  */
15918 Mfix *		minipool_barrier;
15919 
15920 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15921 #define JUMP_TABLES_IN_TEXT_SECTION 0
15922 #endif
15923 
15924 static HOST_WIDE_INT
15925 get_jump_table_size (rtx_jump_table_data *insn)
15926 {
15927   /* ADDR_VECs only take room if read-only data does into the text
15928      section.  */
15929   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15930     {
15931       rtx body = PATTERN (insn);
15932       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15933       HOST_WIDE_INT size;
15934       HOST_WIDE_INT modesize;
15935 
15936       modesize = GET_MODE_SIZE (GET_MODE (body));
15937       size = modesize * XVECLEN (body, elt);
15938       switch (modesize)
15939 	{
15940 	case 1:
15941 	  /* Round up size  of TBB table to a halfword boundary.  */
15942 	  size = (size + 1) & ~HOST_WIDE_INT_1;
15943 	  break;
15944 	case 2:
15945 	  /* No padding necessary for TBH.  */
15946 	  break;
15947 	case 4:
15948 	  /* Add two bytes for alignment on Thumb.  */
15949 	  if (TARGET_THUMB)
15950 	    size += 2;
15951 	  break;
15952 	default:
15953 	  gcc_unreachable ();
15954 	}
15955       return size;
15956     }
15957 
15958   return 0;
15959 }
15960 
15961 /* Return the maximum amount of padding that will be inserted before
15962    label LABEL.  */
15963 
15964 static HOST_WIDE_INT
15965 get_label_padding (rtx label)
15966 {
15967   HOST_WIDE_INT align, min_insn_size;
15968 
15969   align = 1 << label_to_alignment (label);
15970   min_insn_size = TARGET_THUMB ? 2 : 4;
15971   return align > min_insn_size ? align - min_insn_size : 0;
15972 }
15973 
15974 /* Move a minipool fix MP from its current location to before MAX_MP.
15975    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15976    constraints may need updating.  */
15977 static Mnode *
15978 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15979 			       HOST_WIDE_INT max_address)
15980 {
15981   /* The code below assumes these are different.  */
15982   gcc_assert (mp != max_mp);
15983 
15984   if (max_mp == NULL)
15985     {
15986       if (max_address < mp->max_address)
15987 	mp->max_address = max_address;
15988     }
15989   else
15990     {
15991       if (max_address > max_mp->max_address - mp->fix_size)
15992 	mp->max_address = max_mp->max_address - mp->fix_size;
15993       else
15994 	mp->max_address = max_address;
15995 
15996       /* Unlink MP from its current position.  Since max_mp is non-null,
15997        mp->prev must be non-null.  */
15998       mp->prev->next = mp->next;
15999       if (mp->next != NULL)
16000 	mp->next->prev = mp->prev;
16001       else
16002 	minipool_vector_tail = mp->prev;
16003 
16004       /* Re-insert it before MAX_MP.  */
16005       mp->next = max_mp;
16006       mp->prev = max_mp->prev;
16007       max_mp->prev = mp;
16008 
16009       if (mp->prev != NULL)
16010 	mp->prev->next = mp;
16011       else
16012 	minipool_vector_head = mp;
16013     }
16014 
16015   /* Save the new entry.  */
16016   max_mp = mp;
16017 
16018   /* Scan over the preceding entries and adjust their addresses as
16019      required.  */
16020   while (mp->prev != NULL
16021 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16022     {
16023       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16024       mp = mp->prev;
16025     }
16026 
16027   return max_mp;
16028 }
16029 
16030 /* Add a constant to the minipool for a forward reference.  Returns the
16031    node added or NULL if the constant will not fit in this pool.  */
16032 static Mnode *
16033 add_minipool_forward_ref (Mfix *fix)
16034 {
16035   /* If set, max_mp is the first pool_entry that has a lower
16036      constraint than the one we are trying to add.  */
16037   Mnode *       max_mp = NULL;
16038   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16039   Mnode *       mp;
16040 
16041   /* If the minipool starts before the end of FIX->INSN then this FIX
16042      can not be placed into the current pool.  Furthermore, adding the
16043      new constant pool entry may cause the pool to start FIX_SIZE bytes
16044      earlier.  */
16045   if (minipool_vector_head &&
16046       (fix->address + get_attr_length (fix->insn)
16047        >= minipool_vector_head->max_address - fix->fix_size))
16048     return NULL;
16049 
16050   /* Scan the pool to see if a constant with the same value has
16051      already been added.  While we are doing this, also note the
16052      location where we must insert the constant if it doesn't already
16053      exist.  */
16054   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16055     {
16056       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16057 	  && fix->mode == mp->mode
16058 	  && (!LABEL_P (fix->value)
16059 	      || (CODE_LABEL_NUMBER (fix->value)
16060 		  == CODE_LABEL_NUMBER (mp->value)))
16061 	  && rtx_equal_p (fix->value, mp->value))
16062 	{
16063 	  /* More than one fix references this entry.  */
16064 	  mp->refcount++;
16065 	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16066 	}
16067 
16068       /* Note the insertion point if necessary.  */
16069       if (max_mp == NULL
16070 	  && mp->max_address > max_address)
16071 	max_mp = mp;
16072 
16073       /* If we are inserting an 8-bytes aligned quantity and
16074 	 we have not already found an insertion point, then
16075 	 make sure that all such 8-byte aligned quantities are
16076 	 placed at the start of the pool.  */
16077       if (ARM_DOUBLEWORD_ALIGN
16078 	  && max_mp == NULL
16079 	  && fix->fix_size >= 8
16080 	  && mp->fix_size < 8)
16081 	{
16082 	  max_mp = mp;
16083 	  max_address = mp->max_address;
16084 	}
16085     }
16086 
16087   /* The value is not currently in the minipool, so we need to create
16088      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16089      the end of the list since the placement is less constrained than
16090      any existing entry.  Otherwise, we insert the new fix before
16091      MAX_MP and, if necessary, adjust the constraints on the other
16092      entries.  */
16093   mp = XNEW (Mnode);
16094   mp->fix_size = fix->fix_size;
16095   mp->mode = fix->mode;
16096   mp->value = fix->value;
16097   mp->refcount = 1;
16098   /* Not yet required for a backwards ref.  */
16099   mp->min_address = -65536;
16100 
16101   if (max_mp == NULL)
16102     {
16103       mp->max_address = max_address;
16104       mp->next = NULL;
16105       mp->prev = minipool_vector_tail;
16106 
16107       if (mp->prev == NULL)
16108 	{
16109 	  minipool_vector_head = mp;
16110 	  minipool_vector_label = gen_label_rtx ();
16111 	}
16112       else
16113 	mp->prev->next = mp;
16114 
16115       minipool_vector_tail = mp;
16116     }
16117   else
16118     {
16119       if (max_address > max_mp->max_address - mp->fix_size)
16120 	mp->max_address = max_mp->max_address - mp->fix_size;
16121       else
16122 	mp->max_address = max_address;
16123 
16124       mp->next = max_mp;
16125       mp->prev = max_mp->prev;
16126       max_mp->prev = mp;
16127       if (mp->prev != NULL)
16128 	mp->prev->next = mp;
16129       else
16130 	minipool_vector_head = mp;
16131     }
16132 
16133   /* Save the new entry.  */
16134   max_mp = mp;
16135 
16136   /* Scan over the preceding entries and adjust their addresses as
16137      required.  */
16138   while (mp->prev != NULL
16139 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16140     {
16141       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16142       mp = mp->prev;
16143     }
16144 
16145   return max_mp;
16146 }
16147 
16148 static Mnode *
16149 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16150 				HOST_WIDE_INT  min_address)
16151 {
16152   HOST_WIDE_INT offset;
16153 
16154   /* The code below assumes these are different.  */
16155   gcc_assert (mp != min_mp);
16156 
16157   if (min_mp == NULL)
16158     {
16159       if (min_address > mp->min_address)
16160 	mp->min_address = min_address;
16161     }
16162   else
16163     {
16164       /* We will adjust this below if it is too loose.  */
16165       mp->min_address = min_address;
16166 
16167       /* Unlink MP from its current position.  Since min_mp is non-null,
16168 	 mp->next must be non-null.  */
16169       mp->next->prev = mp->prev;
16170       if (mp->prev != NULL)
16171 	mp->prev->next = mp->next;
16172       else
16173 	minipool_vector_head = mp->next;
16174 
16175       /* Reinsert it after MIN_MP.  */
16176       mp->prev = min_mp;
16177       mp->next = min_mp->next;
16178       min_mp->next = mp;
16179       if (mp->next != NULL)
16180 	mp->next->prev = mp;
16181       else
16182 	minipool_vector_tail = mp;
16183     }
16184 
16185   min_mp = mp;
16186 
16187   offset = 0;
16188   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16189     {
16190       mp->offset = offset;
16191       if (mp->refcount > 0)
16192 	offset += mp->fix_size;
16193 
16194       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16195 	mp->next->min_address = mp->min_address + mp->fix_size;
16196     }
16197 
16198   return min_mp;
16199 }
16200 
16201 /* Add a constant to the minipool for a backward reference.  Returns the
16202    node added or NULL if the constant will not fit in this pool.
16203 
16204    Note that the code for insertion for a backwards reference can be
16205    somewhat confusing because the calculated offsets for each fix do
16206    not take into account the size of the pool (which is still under
16207    construction.  */
16208 static Mnode *
16209 add_minipool_backward_ref (Mfix *fix)
16210 {
16211   /* If set, min_mp is the last pool_entry that has a lower constraint
16212      than the one we are trying to add.  */
16213   Mnode *min_mp = NULL;
16214   /* This can be negative, since it is only a constraint.  */
16215   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16216   Mnode *mp;
16217 
16218   /* If we can't reach the current pool from this insn, or if we can't
16219      insert this entry at the end of the pool without pushing other
16220      fixes out of range, then we don't try.  This ensures that we
16221      can't fail later on.  */
16222   if (min_address >= minipool_barrier->address
16223       || (minipool_vector_tail->min_address + fix->fix_size
16224 	  >= minipool_barrier->address))
16225     return NULL;
16226 
16227   /* Scan the pool to see if a constant with the same value has
16228      already been added.  While we are doing this, also note the
16229      location where we must insert the constant if it doesn't already
16230      exist.  */
16231   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16232     {
16233       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16234 	  && fix->mode == mp->mode
16235 	  && (!LABEL_P (fix->value)
16236 	      || (CODE_LABEL_NUMBER (fix->value)
16237 		  == CODE_LABEL_NUMBER (mp->value)))
16238 	  && rtx_equal_p (fix->value, mp->value)
16239 	  /* Check that there is enough slack to move this entry to the
16240 	     end of the table (this is conservative).  */
16241 	  && (mp->max_address
16242 	      > (minipool_barrier->address
16243 		 + minipool_vector_tail->offset
16244 		 + minipool_vector_tail->fix_size)))
16245 	{
16246 	  mp->refcount++;
16247 	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16248 	}
16249 
16250       if (min_mp != NULL)
16251 	mp->min_address += fix->fix_size;
16252       else
16253 	{
16254 	  /* Note the insertion point if necessary.  */
16255 	  if (mp->min_address < min_address)
16256 	    {
16257 	      /* For now, we do not allow the insertion of 8-byte alignment
16258 		 requiring nodes anywhere but at the start of the pool.  */
16259 	      if (ARM_DOUBLEWORD_ALIGN
16260 		  && fix->fix_size >= 8 && mp->fix_size < 8)
16261 		return NULL;
16262 	      else
16263 		min_mp = mp;
16264 	    }
16265 	  else if (mp->max_address
16266 		   < minipool_barrier->address + mp->offset + fix->fix_size)
16267 	    {
16268 	      /* Inserting before this entry would push the fix beyond
16269 		 its maximum address (which can happen if we have
16270 		 re-located a forwards fix); force the new fix to come
16271 		 after it.  */
16272 	      if (ARM_DOUBLEWORD_ALIGN
16273 		  && fix->fix_size >= 8 && mp->fix_size < 8)
16274 		return NULL;
16275 	      else
16276 		{
16277 		  min_mp = mp;
16278 		  min_address = mp->min_address + fix->fix_size;
16279 		}
16280 	    }
16281 	  /* Do not insert a non-8-byte aligned quantity before 8-byte
16282 	     aligned quantities.  */
16283 	  else if (ARM_DOUBLEWORD_ALIGN
16284 		   && fix->fix_size < 8
16285 		   && mp->fix_size >= 8)
16286 	    {
16287 	      min_mp = mp;
16288 	      min_address = mp->min_address + fix->fix_size;
16289 	    }
16290 	}
16291     }
16292 
16293   /* We need to create a new entry.  */
16294   mp = XNEW (Mnode);
16295   mp->fix_size = fix->fix_size;
16296   mp->mode = fix->mode;
16297   mp->value = fix->value;
16298   mp->refcount = 1;
16299   mp->max_address = minipool_barrier->address + 65536;
16300 
16301   mp->min_address = min_address;
16302 
16303   if (min_mp == NULL)
16304     {
16305       mp->prev = NULL;
16306       mp->next = minipool_vector_head;
16307 
16308       if (mp->next == NULL)
16309 	{
16310 	  minipool_vector_tail = mp;
16311 	  minipool_vector_label = gen_label_rtx ();
16312 	}
16313       else
16314 	mp->next->prev = mp;
16315 
16316       minipool_vector_head = mp;
16317     }
16318   else
16319     {
16320       mp->next = min_mp->next;
16321       mp->prev = min_mp;
16322       min_mp->next = mp;
16323 
16324       if (mp->next != NULL)
16325 	mp->next->prev = mp;
16326       else
16327 	minipool_vector_tail = mp;
16328     }
16329 
16330   /* Save the new entry.  */
16331   min_mp = mp;
16332 
16333   if (mp->prev)
16334     mp = mp->prev;
16335   else
16336     mp->offset = 0;
16337 
16338   /* Scan over the following entries and adjust their offsets.  */
16339   while (mp->next != NULL)
16340     {
16341       if (mp->next->min_address < mp->min_address + mp->fix_size)
16342 	mp->next->min_address = mp->min_address + mp->fix_size;
16343 
16344       if (mp->refcount)
16345 	mp->next->offset = mp->offset + mp->fix_size;
16346       else
16347 	mp->next->offset = mp->offset;
16348 
16349       mp = mp->next;
16350     }
16351 
16352   return min_mp;
16353 }
16354 
16355 static void
16356 assign_minipool_offsets (Mfix *barrier)
16357 {
16358   HOST_WIDE_INT offset = 0;
16359   Mnode *mp;
16360 
16361   minipool_barrier = barrier;
16362 
16363   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16364     {
16365       mp->offset = offset;
16366 
16367       if (mp->refcount > 0)
16368 	offset += mp->fix_size;
16369     }
16370 }
16371 
16372 /* Output the literal table */
16373 static void
16374 dump_minipool (rtx_insn *scan)
16375 {
16376   Mnode * mp;
16377   Mnode * nmp;
16378   int align64 = 0;
16379 
16380   if (ARM_DOUBLEWORD_ALIGN)
16381     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16382       if (mp->refcount > 0 && mp->fix_size >= 8)
16383 	{
16384 	  align64 = 1;
16385 	  break;
16386 	}
16387 
16388   if (dump_file)
16389     fprintf (dump_file,
16390 	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16391 	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16392 
16393   scan = emit_label_after (gen_label_rtx (), scan);
16394   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16395   scan = emit_label_after (minipool_vector_label, scan);
16396 
16397   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16398     {
16399       if (mp->refcount > 0)
16400 	{
16401 	  if (dump_file)
16402 	    {
16403 	      fprintf (dump_file,
16404 		       ";;  Offset %u, min %ld, max %ld ",
16405 		       (unsigned) mp->offset, (unsigned long) mp->min_address,
16406 		       (unsigned long) mp->max_address);
16407 	      arm_print_value (dump_file, mp->value);
16408 	      fputc ('\n', dump_file);
16409 	    }
16410 
16411 	  rtx val = copy_rtx (mp->value);
16412 
16413 	  switch (GET_MODE_SIZE (mp->mode))
16414 	    {
16415 #ifdef HAVE_consttable_1
16416 	    case 1:
16417 	      scan = emit_insn_after (gen_consttable_1 (val), scan);
16418 	      break;
16419 
16420 #endif
16421 #ifdef HAVE_consttable_2
16422 	    case 2:
16423 	      scan = emit_insn_after (gen_consttable_2 (val), scan);
16424 	      break;
16425 
16426 #endif
16427 #ifdef HAVE_consttable_4
16428 	    case 4:
16429 	      scan = emit_insn_after (gen_consttable_4 (val), scan);
16430 	      break;
16431 
16432 #endif
16433 #ifdef HAVE_consttable_8
16434 	    case 8:
16435 	      scan = emit_insn_after (gen_consttable_8 (val), scan);
16436 	      break;
16437 
16438 #endif
16439 #ifdef HAVE_consttable_16
16440 	    case 16:
16441               scan = emit_insn_after (gen_consttable_16 (val), scan);
16442               break;
16443 
16444 #endif
16445 	    default:
16446 	      gcc_unreachable ();
16447 	    }
16448 	}
16449 
16450       nmp = mp->next;
16451       free (mp);
16452     }
16453 
16454   minipool_vector_head = minipool_vector_tail = NULL;
16455   scan = emit_insn_after (gen_consttable_end (), scan);
16456   scan = emit_barrier_after (scan);
16457 }
16458 
16459 /* Return the cost of forcibly inserting a barrier after INSN.  */
16460 static int
16461 arm_barrier_cost (rtx_insn *insn)
16462 {
16463   /* Basing the location of the pool on the loop depth is preferable,
16464      but at the moment, the basic block information seems to be
16465      corrupt by this stage of the compilation.  */
16466   int base_cost = 50;
16467   rtx_insn *next = next_nonnote_insn (insn);
16468 
16469   if (next != NULL && LABEL_P (next))
16470     base_cost -= 20;
16471 
16472   switch (GET_CODE (insn))
16473     {
16474     case CODE_LABEL:
16475       /* It will always be better to place the table before the label, rather
16476 	 than after it.  */
16477       return 50;
16478 
16479     case INSN:
16480     case CALL_INSN:
16481       return base_cost;
16482 
16483     case JUMP_INSN:
16484       return base_cost - 10;
16485 
16486     default:
16487       return base_cost + 10;
16488     }
16489 }
16490 
16491 /* Find the best place in the insn stream in the range
16492    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16493    Create the barrier by inserting a jump and add a new fix entry for
16494    it.  */
16495 static Mfix *
16496 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16497 {
16498   HOST_WIDE_INT count = 0;
16499   rtx_barrier *barrier;
16500   rtx_insn *from = fix->insn;
16501   /* The instruction after which we will insert the jump.  */
16502   rtx_insn *selected = NULL;
16503   int selected_cost;
16504   /* The address at which the jump instruction will be placed.  */
16505   HOST_WIDE_INT selected_address;
16506   Mfix * new_fix;
16507   HOST_WIDE_INT max_count = max_address - fix->address;
16508   rtx_code_label *label = gen_label_rtx ();
16509 
16510   selected_cost = arm_barrier_cost (from);
16511   selected_address = fix->address;
16512 
16513   while (from && count < max_count)
16514     {
16515       rtx_jump_table_data *tmp;
16516       int new_cost;
16517 
16518       /* This code shouldn't have been called if there was a natural barrier
16519 	 within range.  */
16520       gcc_assert (!BARRIER_P (from));
16521 
16522       /* Count the length of this insn.  This must stay in sync with the
16523 	 code that pushes minipool fixes.  */
16524       if (LABEL_P (from))
16525 	count += get_label_padding (from);
16526       else
16527 	count += get_attr_length (from);
16528 
16529       /* If there is a jump table, add its length.  */
16530       if (tablejump_p (from, NULL, &tmp))
16531 	{
16532 	  count += get_jump_table_size (tmp);
16533 
16534 	  /* Jump tables aren't in a basic block, so base the cost on
16535 	     the dispatch insn.  If we select this location, we will
16536 	     still put the pool after the table.  */
16537 	  new_cost = arm_barrier_cost (from);
16538 
16539 	  if (count < max_count
16540 	      && (!selected || new_cost <= selected_cost))
16541 	    {
16542 	      selected = tmp;
16543 	      selected_cost = new_cost;
16544 	      selected_address = fix->address + count;
16545 	    }
16546 
16547 	  /* Continue after the dispatch table.  */
16548 	  from = NEXT_INSN (tmp);
16549 	  continue;
16550 	}
16551 
16552       new_cost = arm_barrier_cost (from);
16553 
16554       if (count < max_count
16555 	  && (!selected || new_cost <= selected_cost))
16556 	{
16557 	  selected = from;
16558 	  selected_cost = new_cost;
16559 	  selected_address = fix->address + count;
16560 	}
16561 
16562       from = NEXT_INSN (from);
16563     }
16564 
16565   /* Make sure that we found a place to insert the jump.  */
16566   gcc_assert (selected);
16567 
16568   /* Create a new JUMP_INSN that branches around a barrier.  */
16569   from = emit_jump_insn_after (gen_jump (label), selected);
16570   JUMP_LABEL (from) = label;
16571   barrier = emit_barrier_after (from);
16572   emit_label_after (label, barrier);
16573 
16574   /* Create a minipool barrier entry for the new barrier.  */
16575   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16576   new_fix->insn = barrier;
16577   new_fix->address = selected_address;
16578   new_fix->next = fix->next;
16579   fix->next = new_fix;
16580 
16581   return new_fix;
16582 }
16583 
16584 /* Record that there is a natural barrier in the insn stream at
16585    ADDRESS.  */
16586 static void
16587 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16588 {
16589   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16590 
16591   fix->insn = insn;
16592   fix->address = address;
16593 
16594   fix->next = NULL;
16595   if (minipool_fix_head != NULL)
16596     minipool_fix_tail->next = fix;
16597   else
16598     minipool_fix_head = fix;
16599 
16600   minipool_fix_tail = fix;
16601 }
16602 
16603 /* Record INSN, which will need fixing up to load a value from the
16604    minipool.  ADDRESS is the offset of the insn since the start of the
16605    function; LOC is a pointer to the part of the insn which requires
16606    fixing; VALUE is the constant that must be loaded, which is of type
16607    MODE.  */
16608 static void
16609 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16610 		   machine_mode mode, rtx value)
16611 {
16612   gcc_assert (!arm_disable_literal_pool);
16613   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16614 
16615   fix->insn = insn;
16616   fix->address = address;
16617   fix->loc = loc;
16618   fix->mode = mode;
16619   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16620   fix->value = value;
16621   fix->forwards = get_attr_pool_range (insn);
16622   fix->backwards = get_attr_neg_pool_range (insn);
16623   fix->minipool = NULL;
16624 
16625   /* If an insn doesn't have a range defined for it, then it isn't
16626      expecting to be reworked by this code.  Better to stop now than
16627      to generate duff assembly code.  */
16628   gcc_assert (fix->forwards || fix->backwards);
16629 
16630   /* If an entry requires 8-byte alignment then assume all constant pools
16631      require 4 bytes of padding.  Trying to do this later on a per-pool
16632      basis is awkward because existing pool entries have to be modified.  */
16633   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16634     minipool_pad = 4;
16635 
16636   if (dump_file)
16637     {
16638       fprintf (dump_file,
16639 	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16640 	       GET_MODE_NAME (mode),
16641 	       INSN_UID (insn), (unsigned long) address,
16642 	       -1 * (long)fix->backwards, (long)fix->forwards);
16643       arm_print_value (dump_file, fix->value);
16644       fprintf (dump_file, "\n");
16645     }
16646 
16647   /* Add it to the chain of fixes.  */
16648   fix->next = NULL;
16649 
16650   if (minipool_fix_head != NULL)
16651     minipool_fix_tail->next = fix;
16652   else
16653     minipool_fix_head = fix;
16654 
16655   minipool_fix_tail = fix;
16656 }
16657 
16658 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16659    Returns the number of insns needed, or 99 if we always want to synthesize
16660    the value.  */
16661 int
16662 arm_max_const_double_inline_cost ()
16663 {
16664   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16665 }
16666 
16667 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16668    Returns the number of insns needed, or 99 if we don't know how to
16669    do it.  */
16670 int
16671 arm_const_double_inline_cost (rtx val)
16672 {
16673   rtx lowpart, highpart;
16674   machine_mode mode;
16675 
16676   mode = GET_MODE (val);
16677 
16678   if (mode == VOIDmode)
16679     mode = DImode;
16680 
16681   gcc_assert (GET_MODE_SIZE (mode) == 8);
16682 
16683   lowpart = gen_lowpart (SImode, val);
16684   highpart = gen_highpart_mode (SImode, mode, val);
16685 
16686   gcc_assert (CONST_INT_P (lowpart));
16687   gcc_assert (CONST_INT_P (highpart));
16688 
16689   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16690 			    NULL_RTX, NULL_RTX, 0, 0)
16691 	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16692 			      NULL_RTX, NULL_RTX, 0, 0));
16693 }
16694 
16695 /* Cost of loading a SImode constant.  */
16696 static inline int
16697 arm_const_inline_cost (enum rtx_code code, rtx val)
16698 {
16699   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16700                            NULL_RTX, NULL_RTX, 1, 0);
16701 }
16702 
16703 /* Return true if it is worthwhile to split a 64-bit constant into two
16704    32-bit operations.  This is the case if optimizing for size, or
16705    if we have load delay slots, or if one 32-bit part can be done with
16706    a single data operation.  */
16707 bool
16708 arm_const_double_by_parts (rtx val)
16709 {
16710   machine_mode mode = GET_MODE (val);
16711   rtx part;
16712 
16713   if (optimize_size || arm_ld_sched)
16714     return true;
16715 
16716   if (mode == VOIDmode)
16717     mode = DImode;
16718 
16719   part = gen_highpart_mode (SImode, mode, val);
16720 
16721   gcc_assert (CONST_INT_P (part));
16722 
16723   if (const_ok_for_arm (INTVAL (part))
16724       || const_ok_for_arm (~INTVAL (part)))
16725     return true;
16726 
16727   part = gen_lowpart (SImode, val);
16728 
16729   gcc_assert (CONST_INT_P (part));
16730 
16731   if (const_ok_for_arm (INTVAL (part))
16732       || const_ok_for_arm (~INTVAL (part)))
16733     return true;
16734 
16735   return false;
16736 }
16737 
16738 /* Return true if it is possible to inline both the high and low parts
16739    of a 64-bit constant into 32-bit data processing instructions.  */
16740 bool
16741 arm_const_double_by_immediates (rtx val)
16742 {
16743   machine_mode mode = GET_MODE (val);
16744   rtx part;
16745 
16746   if (mode == VOIDmode)
16747     mode = DImode;
16748 
16749   part = gen_highpart_mode (SImode, mode, val);
16750 
16751   gcc_assert (CONST_INT_P (part));
16752 
16753   if (!const_ok_for_arm (INTVAL (part)))
16754     return false;
16755 
16756   part = gen_lowpart (SImode, val);
16757 
16758   gcc_assert (CONST_INT_P (part));
16759 
16760   if (!const_ok_for_arm (INTVAL (part)))
16761     return false;
16762 
16763   return true;
16764 }
16765 
16766 /* Scan INSN and note any of its operands that need fixing.
16767    If DO_PUSHES is false we do not actually push any of the fixups
16768    needed.  */
16769 static void
16770 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16771 {
16772   int opno;
16773 
16774   extract_constrain_insn (insn);
16775 
16776   if (recog_data.n_alternatives == 0)
16777     return;
16778 
16779   /* Fill in recog_op_alt with information about the constraints of
16780      this insn.  */
16781   preprocess_constraints (insn);
16782 
16783   const operand_alternative *op_alt = which_op_alt ();
16784   for (opno = 0; opno < recog_data.n_operands; opno++)
16785     {
16786       /* Things we need to fix can only occur in inputs.  */
16787       if (recog_data.operand_type[opno] != OP_IN)
16788 	continue;
16789 
16790       /* If this alternative is a memory reference, then any mention
16791 	 of constants in this alternative is really to fool reload
16792 	 into allowing us to accept one there.  We need to fix them up
16793 	 now so that we output the right code.  */
16794       if (op_alt[opno].memory_ok)
16795 	{
16796 	  rtx op = recog_data.operand[opno];
16797 
16798 	  if (CONSTANT_P (op))
16799 	    {
16800 	      if (do_pushes)
16801 		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16802 				   recog_data.operand_mode[opno], op);
16803 	    }
16804 	  else if (MEM_P (op)
16805 		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16806 		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16807 	    {
16808 	      if (do_pushes)
16809 		{
16810 		  rtx cop = avoid_constant_pool_reference (op);
16811 
16812 		  /* Casting the address of something to a mode narrower
16813 		     than a word can cause avoid_constant_pool_reference()
16814 		     to return the pool reference itself.  That's no good to
16815 		     us here.  Lets just hope that we can use the
16816 		     constant pool value directly.  */
16817 		  if (op == cop)
16818 		    cop = get_pool_constant (XEXP (op, 0));
16819 
16820 		  push_minipool_fix (insn, address,
16821 				     recog_data.operand_loc[opno],
16822 				     recog_data.operand_mode[opno], cop);
16823 		}
16824 
16825 	    }
16826 	}
16827     }
16828 
16829   return;
16830 }
16831 
16832 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16833    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16834    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16835    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16836    or four masks, depending on whether it is being computed for a
16837    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16838    respectively.  The tree for the type of the argument or a field within an
16839    argument is passed in ARG_TYPE, the current register this argument or field
16840    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16841    argument or field starts at is passed in STARTING_BIT and the last used bit
16842    is kept in LAST_USED_BIT which is also updated accordingly.  */
16843 
16844 static unsigned HOST_WIDE_INT
16845 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16846 			       uint32_t * padding_bits_to_clear,
16847 			       unsigned starting_bit, int * last_used_bit)
16848 
16849 {
16850   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16851 
16852   if (TREE_CODE (arg_type) == RECORD_TYPE)
16853     {
16854       unsigned current_bit = starting_bit;
16855       tree field;
16856       long int offset, size;
16857 
16858 
16859       field = TYPE_FIELDS (arg_type);
16860       while (field)
16861 	{
16862 	  /* The offset within a structure is always an offset from
16863 	     the start of that structure.  Make sure we take that into the
16864 	     calculation of the register based offset that we use here.  */
16865 	  offset = starting_bit;
16866 	  offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16867 	  offset %= 32;
16868 
16869 	  /* This is the actual size of the field, for bitfields this is the
16870 	     bitfield width and not the container size.  */
16871 	  size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16872 
16873 	  if (*last_used_bit != offset)
16874 	    {
16875 	      if (offset < *last_used_bit)
16876 		{
16877 		  /* This field's offset is before the 'last_used_bit', that
16878 		     means this field goes on the next register.  So we need to
16879 		     pad the rest of the current register and increase the
16880 		     register number.  */
16881 		  uint32_t mask;
16882 		  mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16883 		  mask++;
16884 
16885 		  padding_bits_to_clear[*regno] |= mask;
16886 		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16887 		  (*regno)++;
16888 		}
16889 	      else
16890 		{
16891 		  /* Otherwise we pad the bits between the last field's end and
16892 		     the start of the new field.  */
16893 		  uint32_t mask;
16894 
16895 		  mask = ((uint32_t)-1) >> (32 - offset);
16896 		  mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16897 		  padding_bits_to_clear[*regno] |= mask;
16898 		}
16899 	      current_bit = offset;
16900 	    }
16901 
16902 	  /* Calculate further padding bits for inner structs/unions too.  */
16903 	  if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16904 	    {
16905 	      *last_used_bit = current_bit;
16906 	      not_to_clear_reg_mask
16907 		|= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16908 						  padding_bits_to_clear, offset,
16909 						  last_used_bit);
16910 	    }
16911 	  else
16912 	    {
16913 	      /* Update 'current_bit' with this field's size.  If the
16914 		 'current_bit' lies in a subsequent register, update 'regno' and
16915 		 reset 'current_bit' to point to the current bit in that new
16916 		 register.  */
16917 	      current_bit += size;
16918 	      while (current_bit >= 32)
16919 		{
16920 		  current_bit-=32;
16921 		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16922 		  (*regno)++;
16923 		}
16924 	      *last_used_bit = current_bit;
16925 	    }
16926 
16927 	  field = TREE_CHAIN (field);
16928 	}
16929       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16930     }
16931   else if (TREE_CODE (arg_type) == UNION_TYPE)
16932     {
16933       tree field, field_t;
16934       int i, regno_t, field_size;
16935       int max_reg = -1;
16936       int max_bit = -1;
16937       uint32_t mask;
16938       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16939 	= {-1, -1, -1, -1};
16940 
16941       /* To compute the padding bits in a union we only consider bits as
16942 	 padding bits if they are always either a padding bit or fall outside a
16943 	 fields size for all fields in the union.  */
16944       field = TYPE_FIELDS (arg_type);
16945       while (field)
16946 	{
16947 	  uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16948 	    = {0U, 0U, 0U, 0U};
16949 	  int last_used_bit_t = *last_used_bit;
16950 	  regno_t = *regno;
16951 	  field_t = TREE_TYPE (field);
16952 
16953 	  /* If the field's type is either a record or a union make sure to
16954 	     compute their padding bits too.  */
16955 	  if (RECORD_OR_UNION_TYPE_P (field_t))
16956 	    not_to_clear_reg_mask
16957 	      |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16958 						&padding_bits_to_clear_t[0],
16959 						starting_bit, &last_used_bit_t);
16960 	  else
16961 	    {
16962 	      field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16963 	      regno_t = (field_size / 32) + *regno;
16964 	      last_used_bit_t = (starting_bit + field_size) % 32;
16965 	    }
16966 
16967 	  for (i = *regno; i < regno_t; i++)
16968 	    {
16969 	      /* For all but the last register used by this field only keep the
16970 		 padding bits that were padding bits in this field.  */
16971 	      padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16972 	    }
16973 
16974 	    /* For the last register, keep all padding bits that were padding
16975 	       bits in this field and any padding bits that are still valid
16976 	       as padding bits but fall outside of this field's size.  */
16977 	    mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16978 	    padding_bits_to_clear_res[regno_t]
16979 	      &= padding_bits_to_clear_t[regno_t] | mask;
16980 
16981 	  /* Update the maximum size of the fields in terms of registers used
16982 	     ('max_reg') and the 'last_used_bit' in said register.  */
16983 	  if (max_reg < regno_t)
16984 	    {
16985 	      max_reg = regno_t;
16986 	      max_bit = last_used_bit_t;
16987 	    }
16988 	  else if (max_reg == regno_t && max_bit < last_used_bit_t)
16989 	    max_bit = last_used_bit_t;
16990 
16991 	  field = TREE_CHAIN (field);
16992 	}
16993 
16994       /* Update the current padding_bits_to_clear using the intersection of the
16995 	 padding bits of all the fields.  */
16996       for (i=*regno; i < max_reg; i++)
16997 	padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16998 
16999       /* Do not keep trailing padding bits, we do not know yet whether this
17000 	 is the end of the argument.  */
17001       mask = ((uint32_t) 1 << max_bit) - 1;
17002       padding_bits_to_clear[max_reg]
17003 	|= padding_bits_to_clear_res[max_reg] & mask;
17004 
17005       *regno = max_reg;
17006       *last_used_bit = max_bit;
17007     }
17008   else
17009     /* This function should only be used for structs and unions.  */
17010     gcc_unreachable ();
17011 
17012   return not_to_clear_reg_mask;
17013 }
17014 
17015 /* In the context of ARMv8-M Security Extensions, this function is used for both
17016    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17017    registers are used when returning or passing arguments, which is then
17018    returned as a mask.  It will also compute a mask to indicate padding/unused
17019    bits for each of these registers, and passes this through the
17020    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
17021    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17022    the starting register used to pass this argument or return value is passed
17023    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17024    for struct and union types.  */
17025 
17026 static unsigned HOST_WIDE_INT
17027 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17028 			     uint32_t * padding_bits_to_clear)
17029 
17030 {
17031   int last_used_bit = 0;
17032   unsigned HOST_WIDE_INT not_to_clear_mask;
17033 
17034   if (RECORD_OR_UNION_TYPE_P (arg_type))
17035     {
17036       not_to_clear_mask
17037 	= comp_not_to_clear_mask_str_un (arg_type, &regno,
17038 					 padding_bits_to_clear, 0,
17039 					 &last_used_bit);
17040 
17041 
17042       /* If the 'last_used_bit' is not zero, that means we are still using a
17043 	 part of the last 'regno'.  In such cases we must clear the trailing
17044 	 bits.  Otherwise we are not using regno and we should mark it as to
17045 	 clear.  */
17046       if (last_used_bit != 0)
17047 	padding_bits_to_clear[regno]
17048 	  |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17049       else
17050 	not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17051     }
17052   else
17053     {
17054       not_to_clear_mask = 0;
17055       /* We are not dealing with structs nor unions.  So these arguments may be
17056 	 passed in floating point registers too.  In some cases a BLKmode is
17057 	 used when returning or passing arguments in multiple VFP registers.  */
17058       if (GET_MODE (arg_rtx) == BLKmode)
17059 	{
17060 	  int i, arg_regs;
17061 	  rtx reg;
17062 
17063 	  /* This should really only occur when dealing with the hard-float
17064 	     ABI.  */
17065 	  gcc_assert (TARGET_HARD_FLOAT_ABI);
17066 
17067 	  for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17068 	    {
17069 	      reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17070 	      gcc_assert (REG_P (reg));
17071 
17072 	      not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17073 
17074 	      /* If we are dealing with DF mode, make sure we don't
17075 		 clear either of the registers it addresses.  */
17076 	      arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17077 	      if (arg_regs > 1)
17078 		{
17079 		  unsigned HOST_WIDE_INT mask;
17080 		  mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17081 		  mask -= HOST_WIDE_INT_1U << REGNO (reg);
17082 		  not_to_clear_mask |= mask;
17083 		}
17084 	    }
17085 	}
17086       else
17087 	{
17088 	  /* Otherwise we can rely on the MODE to determine how many registers
17089 	     are being used by this argument.  */
17090 	  int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17091 	  not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17092 	  if (arg_regs > 1)
17093 	    {
17094 	      unsigned HOST_WIDE_INT
17095 	      mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17096 	      mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17097 	      not_to_clear_mask |= mask;
17098 	    }
17099 	}
17100     }
17101 
17102   return not_to_clear_mask;
17103 }
17104 
17105 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17106    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
17107    are to be fully cleared, using the value in register CLEARING_REG if more
17108    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17109    the bits that needs to be cleared in caller-saved core registers, with
17110    SCRATCH_REG used as a scratch register for that clearing.
17111 
17112    NOTE: one of three following assertions must hold:
17113    - SCRATCH_REG is a low register
17114    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17115      in TO_CLEAR_BITMAP)
17116    - CLEARING_REG is a low register.  */
17117 
17118 static void
17119 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17120 		      int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17121 {
17122   bool saved_clearing = false;
17123   rtx saved_clearing_reg = NULL_RTX;
17124   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17125 
17126   gcc_assert (arm_arch_cmse);
17127 
17128   if (!bitmap_empty_p (to_clear_bitmap))
17129     {
17130       minregno = bitmap_first_set_bit (to_clear_bitmap);
17131       maxregno = bitmap_last_set_bit (to_clear_bitmap);
17132     }
17133   clearing_regno = REGNO (clearing_reg);
17134 
17135   /* Clear padding bits.  */
17136   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17137   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17138     {
17139       uint64_t mask;
17140       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17141 
17142       if (padding_bits_to_clear[i] == 0)
17143 	continue;
17144 
17145       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17146 	 CLEARING_REG as scratch.  */
17147       if (TARGET_THUMB1
17148 	  && REGNO (scratch_reg) > LAST_LO_REGNUM)
17149 	{
17150 	  /* clearing_reg is not to be cleared, copy its value into scratch_reg
17151 	     such that we can use clearing_reg to clear the unused bits in the
17152 	     arguments.  */
17153 	  if ((clearing_regno > maxregno
17154 	       || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17155 	      && !saved_clearing)
17156 	    {
17157 	      gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17158 	      emit_move_insn (scratch_reg, clearing_reg);
17159 	      saved_clearing = true;
17160 	      saved_clearing_reg = scratch_reg;
17161 	    }
17162 	  scratch_reg = clearing_reg;
17163 	}
17164 
17165       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
17166       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17167       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17168 
17169       /* Fill the top half of the negated padding_bits_to_clear[i].  */
17170       mask = (~padding_bits_to_clear[i]) >> 16;
17171       rtx16 = gen_int_mode (16, SImode);
17172       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17173       if (mask)
17174 	emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17175 
17176       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17177     }
17178   if (saved_clearing)
17179     emit_move_insn (clearing_reg, saved_clearing_reg);
17180 
17181 
17182   /* Clear full registers.  */
17183 
17184   /* If not marked for clearing, clearing_reg already does not contain
17185      any secret.  */
17186   if (clearing_regno <= maxregno
17187       && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17188     {
17189       emit_move_insn (clearing_reg, const0_rtx);
17190       emit_use (clearing_reg);
17191       bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17192     }
17193 
17194   for (regno = minregno; regno <= maxregno; regno++)
17195     {
17196       if (!bitmap_bit_p (to_clear_bitmap, regno))
17197 	continue;
17198 
17199       if (IS_VFP_REGNUM (regno))
17200 	{
17201 	  /* If regno is an even vfp register and its successor is also to
17202 	     be cleared, use vmov.  */
17203 	  if (TARGET_VFP_DOUBLE
17204 	      && VFP_REGNO_OK_FOR_DOUBLE (regno)
17205 	      && bitmap_bit_p (to_clear_bitmap, regno + 1))
17206 	    {
17207 	      emit_move_insn (gen_rtx_REG (DFmode, regno),
17208 			      CONST1_RTX (DFmode));
17209 	      emit_use (gen_rtx_REG (DFmode, regno));
17210 	      regno++;
17211 	    }
17212 	  else
17213 	    {
17214 	      emit_move_insn (gen_rtx_REG (SFmode, regno),
17215 			      CONST1_RTX (SFmode));
17216 	      emit_use (gen_rtx_REG (SFmode, regno));
17217 	    }
17218 	}
17219       else
17220 	{
17221 	  emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17222 	  emit_use (gen_rtx_REG (SImode, regno));
17223 	}
17224     }
17225 }
17226 
17227 /* Clears caller saved registers not used to pass arguments before a
17228    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
17229    registers is done in __gnu_cmse_nonsecure_call libcall.
17230    See libgcc/config/arm/cmse_nonsecure_call.S.  */
17231 
17232 static void
17233 cmse_nonsecure_call_clear_caller_saved (void)
17234 {
17235   basic_block bb;
17236 
17237   FOR_EACH_BB_FN (bb, cfun)
17238     {
17239       rtx_insn *insn;
17240 
17241       FOR_BB_INSNS (bb, insn)
17242 	{
17243 	  unsigned address_regnum, regno, maxregno =
17244 	    TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17245 	  auto_sbitmap to_clear_bitmap (maxregno + 1);
17246 	  rtx_insn *seq;
17247 	  rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17248 	  rtx address;
17249 	  CUMULATIVE_ARGS args_so_far_v;
17250 	  cumulative_args_t args_so_far;
17251 	  tree arg_type, fntype;
17252 	  bool first_param = true;
17253 	  function_args_iterator args_iter;
17254 	  uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17255 
17256 	  if (!NONDEBUG_INSN_P (insn))
17257 	    continue;
17258 
17259 	  if (!CALL_P (insn))
17260 	    continue;
17261 
17262 	  pat = PATTERN (insn);
17263 	  gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17264 	  call = XVECEXP (pat, 0, 0);
17265 
17266 	  /* Get the real call RTX if the insn sets a value, ie. returns.  */
17267 	  if (GET_CODE (call) == SET)
17268 	      call = SET_SRC (call);
17269 
17270 	  /* Check if it is a cmse_nonsecure_call.  */
17271 	  unspec = XEXP (call, 0);
17272 	  if (GET_CODE (unspec) != UNSPEC
17273 	      || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17274 	    continue;
17275 
17276 	  /* Determine the caller-saved registers we need to clear.  */
17277 	  bitmap_clear (to_clear_bitmap);
17278 	  bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17279 
17280 	  /* Only look at the caller-saved floating point registers in case of
17281 	     -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17282 	     lazy store and loads which clear both caller- and callee-saved
17283 	     registers.  */
17284 	  if (TARGET_HARD_FLOAT_ABI)
17285 	    {
17286 	      auto_sbitmap float_bitmap (maxregno + 1);
17287 
17288 	      bitmap_clear (float_bitmap);
17289 	      bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17290 				D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17291 	      bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17292 	    }
17293 
17294 	  /* Make sure the register used to hold the function address is not
17295 	     cleared.  */
17296 	  address = RTVEC_ELT (XVEC (unspec, 0), 0);
17297 	  gcc_assert (MEM_P (address));
17298 	  gcc_assert (REG_P (XEXP (address, 0)));
17299 	  address_regnum = REGNO (XEXP (address, 0));
17300 	  if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17301 	    bitmap_clear_bit (to_clear_bitmap, address_regnum);
17302 
17303 	  /* Set basic block of call insn so that df rescan is performed on
17304 	     insns inserted here.  */
17305 	  set_block_for_insn (insn, bb);
17306 	  df_set_flags (DF_DEFER_INSN_RESCAN);
17307 	  start_sequence ();
17308 
17309 	  /* Make sure the scheduler doesn't schedule other insns beyond
17310 	     here.  */
17311 	  emit_insn (gen_blockage ());
17312 
17313 	  /* Walk through all arguments and clear registers appropriately.
17314 	  */
17315 	  fntype = TREE_TYPE (MEM_EXPR (address));
17316 	  arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17317 				    NULL_TREE);
17318 	  args_so_far = pack_cumulative_args (&args_so_far_v);
17319 	  FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17320 	    {
17321 	      rtx arg_rtx;
17322 	      uint64_t to_clear_args_mask;
17323 	      machine_mode arg_mode = TYPE_MODE (arg_type);
17324 
17325 	      if (VOID_TYPE_P (arg_type))
17326 		continue;
17327 
17328 	      if (!first_param)
17329 		arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17330 					  true);
17331 
17332 	      arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17333 					  true);
17334 	      gcc_assert (REG_P (arg_rtx));
17335 	      to_clear_args_mask
17336 		= compute_not_to_clear_mask (arg_type, arg_rtx,
17337 					     REGNO (arg_rtx),
17338 					     &padding_bits_to_clear[0]);
17339 	      if (to_clear_args_mask)
17340 		{
17341 		  for (regno = R0_REGNUM; regno <= maxregno; regno++)
17342 		    {
17343 		      if (to_clear_args_mask & (1ULL << regno))
17344 			bitmap_clear_bit (to_clear_bitmap, regno);
17345 		    }
17346 		}
17347 
17348 	      first_param = false;
17349 	    }
17350 
17351 	  /* We use right shift and left shift to clear the LSB of the address
17352 	     we jump to instead of using bic, to avoid having to use an extra
17353 	     register on Thumb-1.  */
17354 	  clearing_reg = XEXP (address, 0);
17355 	  shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17356 	  emit_insn (gen_rtx_SET (clearing_reg, shift));
17357 	  shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17358 	  emit_insn (gen_rtx_SET (clearing_reg, shift));
17359 
17360 	  /* Clear caller-saved registers that leak before doing a non-secure
17361 	     call.  */
17362 	  ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17363 	  cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17364 				NUM_ARG_REGS, ip_reg, clearing_reg);
17365 
17366 	  seq = get_insns ();
17367 	  end_sequence ();
17368 	  emit_insn_before (seq, insn);
17369 	}
17370     }
17371 }
17372 
17373 /* Rewrite move insn into subtract of 0 if the condition codes will
17374    be useful in next conditional jump insn.  */
17375 
17376 static void
17377 thumb1_reorg (void)
17378 {
17379   basic_block bb;
17380 
17381   FOR_EACH_BB_FN (bb, cfun)
17382     {
17383       rtx dest, src;
17384       rtx cmp, op0, op1, set = NULL;
17385       rtx_insn *prev, *insn = BB_END (bb);
17386       bool insn_clobbered = false;
17387 
17388       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17389 	insn = PREV_INSN (insn);
17390 
17391       /* Find the last cbranchsi4_insn in basic block BB.  */
17392       if (insn == BB_HEAD (bb)
17393 	  || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17394 	continue;
17395 
17396       /* Get the register with which we are comparing.  */
17397       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17398       op0 = XEXP (cmp, 0);
17399       op1 = XEXP (cmp, 1);
17400 
17401       /* Check that comparison is against ZERO.  */
17402       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17403 	continue;
17404 
17405       /* Find the first flag setting insn before INSN in basic block BB.  */
17406       gcc_assert (insn != BB_HEAD (bb));
17407       for (prev = PREV_INSN (insn);
17408 	   (!insn_clobbered
17409 	    && prev != BB_HEAD (bb)
17410 	    && (NOTE_P (prev)
17411 		|| DEBUG_INSN_P (prev)
17412 		|| ((set = single_set (prev)) != NULL
17413 		    && get_attr_conds (prev) == CONDS_NOCOND)));
17414 	   prev = PREV_INSN (prev))
17415 	{
17416 	  if (reg_set_p (op0, prev))
17417 	    insn_clobbered = true;
17418 	}
17419 
17420       /* Skip if op0 is clobbered by insn other than prev. */
17421       if (insn_clobbered)
17422 	continue;
17423 
17424       if (!set)
17425 	continue;
17426 
17427       dest = SET_DEST (set);
17428       src = SET_SRC (set);
17429       if (!low_register_operand (dest, SImode)
17430 	  || !low_register_operand (src, SImode))
17431 	continue;
17432 
17433       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17434 	 in INSN.  Both src and dest of the move insn are checked.  */
17435       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17436 	{
17437 	  dest = copy_rtx (dest);
17438 	  src = copy_rtx (src);
17439 	  src = gen_rtx_MINUS (SImode, src, const0_rtx);
17440 	  PATTERN (prev) = gen_rtx_SET (dest, src);
17441 	  INSN_CODE (prev) = -1;
17442 	  /* Set test register in INSN to dest.  */
17443 	  XEXP (cmp, 0) = copy_rtx (dest);
17444 	  INSN_CODE (insn) = -1;
17445 	}
17446     }
17447 }
17448 
17449 /* Convert instructions to their cc-clobbering variant if possible, since
17450    that allows us to use smaller encodings.  */
17451 
17452 static void
17453 thumb2_reorg (void)
17454 {
17455   basic_block bb;
17456   regset_head live;
17457 
17458   INIT_REG_SET (&live);
17459 
17460   /* We are freeing block_for_insn in the toplev to keep compatibility
17461      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17462   compute_bb_for_insn ();
17463   df_analyze ();
17464 
17465   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17466 
17467   FOR_EACH_BB_FN (bb, cfun)
17468     {
17469       if ((current_tune->disparage_flag_setting_t16_encodings
17470 	   == tune_params::DISPARAGE_FLAGS_ALL)
17471 	  && optimize_bb_for_speed_p (bb))
17472 	continue;
17473 
17474       rtx_insn *insn;
17475       Convert_Action action = SKIP;
17476       Convert_Action action_for_partial_flag_setting
17477 	= ((current_tune->disparage_flag_setting_t16_encodings
17478 	    != tune_params::DISPARAGE_FLAGS_NEITHER)
17479 	   && optimize_bb_for_speed_p (bb))
17480 	  ? SKIP : CONV;
17481 
17482       COPY_REG_SET (&live, DF_LR_OUT (bb));
17483       df_simulate_initialize_backwards (bb, &live);
17484       FOR_BB_INSNS_REVERSE (bb, insn)
17485 	{
17486 	  if (NONJUMP_INSN_P (insn)
17487 	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
17488 	      && GET_CODE (PATTERN (insn)) == SET)
17489 	    {
17490 	      action = SKIP;
17491 	      rtx pat = PATTERN (insn);
17492 	      rtx dst = XEXP (pat, 0);
17493 	      rtx src = XEXP (pat, 1);
17494 	      rtx op0 = NULL_RTX, op1 = NULL_RTX;
17495 
17496 	      if (UNARY_P (src) || BINARY_P (src))
17497 		  op0 = XEXP (src, 0);
17498 
17499 	      if (BINARY_P (src))
17500 		  op1 = XEXP (src, 1);
17501 
17502 	      if (low_register_operand (dst, SImode))
17503 		{
17504 		  switch (GET_CODE (src))
17505 		    {
17506 		    case PLUS:
17507 		      /* Adding two registers and storing the result
17508 			 in the first source is already a 16-bit
17509 			 operation.  */
17510 		      if (rtx_equal_p (dst, op0)
17511 			  && register_operand (op1, SImode))
17512 			break;
17513 
17514 		      if (low_register_operand (op0, SImode))
17515 			{
17516 			  /* ADDS <Rd>,<Rn>,<Rm>  */
17517 			  if (low_register_operand (op1, SImode))
17518 			    action = CONV;
17519 			  /* ADDS <Rdn>,#<imm8>  */
17520 			  /* SUBS <Rdn>,#<imm8>  */
17521 			  else if (rtx_equal_p (dst, op0)
17522 				   && CONST_INT_P (op1)
17523 				   && IN_RANGE (INTVAL (op1), -255, 255))
17524 			    action = CONV;
17525 			  /* ADDS <Rd>,<Rn>,#<imm3>  */
17526 			  /* SUBS <Rd>,<Rn>,#<imm3>  */
17527 			  else if (CONST_INT_P (op1)
17528 				   && IN_RANGE (INTVAL (op1), -7, 7))
17529 			    action = CONV;
17530 			}
17531 		      /* ADCS <Rd>, <Rn>  */
17532 		      else if (GET_CODE (XEXP (src, 0)) == PLUS
17533 			      && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17534 			      && low_register_operand (XEXP (XEXP (src, 0), 1),
17535 						       SImode)
17536 			      && COMPARISON_P (op1)
17537 			      && cc_register (XEXP (op1, 0), VOIDmode)
17538 			      && maybe_get_arm_condition_code (op1) == ARM_CS
17539 			      && XEXP (op1, 1) == const0_rtx)
17540 		        action = CONV;
17541 		      break;
17542 
17543 		    case MINUS:
17544 		      /* RSBS <Rd>,<Rn>,#0
17545 			 Not handled here: see NEG below.  */
17546 		      /* SUBS <Rd>,<Rn>,#<imm3>
17547 			 SUBS <Rdn>,#<imm8>
17548 			 Not handled here: see PLUS above.  */
17549 		      /* SUBS <Rd>,<Rn>,<Rm>  */
17550 		      if (low_register_operand (op0, SImode)
17551 			  && low_register_operand (op1, SImode))
17552 			    action = CONV;
17553 		      break;
17554 
17555 		    case MULT:
17556 		      /* MULS <Rdm>,<Rn>,<Rdm>
17557 			 As an exception to the rule, this is only used
17558 			 when optimizing for size since MULS is slow on all
17559 			 known implementations.  We do not even want to use
17560 			 MULS in cold code, if optimizing for speed, so we
17561 			 test the global flag here.  */
17562 		      if (!optimize_size)
17563 			break;
17564 		      /* Fall through.  */
17565 		    case AND:
17566 		    case IOR:
17567 		    case XOR:
17568 		      /* ANDS <Rdn>,<Rm>  */
17569 		      if (rtx_equal_p (dst, op0)
17570 			  && low_register_operand (op1, SImode))
17571 			action = action_for_partial_flag_setting;
17572 		      else if (rtx_equal_p (dst, op1)
17573 			       && low_register_operand (op0, SImode))
17574 			action = action_for_partial_flag_setting == SKIP
17575 				 ? SKIP : SWAP_CONV;
17576 		      break;
17577 
17578 		    case ASHIFTRT:
17579 		    case ASHIFT:
17580 		    case LSHIFTRT:
17581 		      /* ASRS <Rdn>,<Rm> */
17582 		      /* LSRS <Rdn>,<Rm> */
17583 		      /* LSLS <Rdn>,<Rm> */
17584 		      if (rtx_equal_p (dst, op0)
17585 			  && low_register_operand (op1, SImode))
17586 			action = action_for_partial_flag_setting;
17587 		      /* ASRS <Rd>,<Rm>,#<imm5> */
17588 		      /* LSRS <Rd>,<Rm>,#<imm5> */
17589 		      /* LSLS <Rd>,<Rm>,#<imm5> */
17590 		      else if (low_register_operand (op0, SImode)
17591 			       && CONST_INT_P (op1)
17592 			       && IN_RANGE (INTVAL (op1), 0, 31))
17593 			action = action_for_partial_flag_setting;
17594 		      break;
17595 
17596 		    case ROTATERT:
17597 		      /* RORS <Rdn>,<Rm>  */
17598 		      if (rtx_equal_p (dst, op0)
17599 			  && low_register_operand (op1, SImode))
17600 			action = action_for_partial_flag_setting;
17601 		      break;
17602 
17603 		    case NOT:
17604 		      /* MVNS <Rd>,<Rm>  */
17605 		      if (low_register_operand (op0, SImode))
17606 			action = action_for_partial_flag_setting;
17607 		      break;
17608 
17609 		    case NEG:
17610 		      /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17611 		      if (low_register_operand (op0, SImode))
17612 			action = CONV;
17613 		      break;
17614 
17615 		    case CONST_INT:
17616 		      /* MOVS <Rd>,#<imm8>  */
17617 		      if (CONST_INT_P (src)
17618 			  && IN_RANGE (INTVAL (src), 0, 255))
17619 			action = action_for_partial_flag_setting;
17620 		      break;
17621 
17622 		    case REG:
17623 		      /* MOVS and MOV<c> with registers have different
17624 			 encodings, so are not relevant here.  */
17625 		      break;
17626 
17627 		    default:
17628 		      break;
17629 		    }
17630 		}
17631 
17632 	      if (action != SKIP)
17633 		{
17634 		  rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17635 		  rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17636 		  rtvec vec;
17637 
17638 		  if (action == SWAP_CONV)
17639 		    {
17640 		      src = copy_rtx (src);
17641 		      XEXP (src, 0) = op1;
17642 		      XEXP (src, 1) = op0;
17643 		      pat = gen_rtx_SET (dst, src);
17644 		      vec = gen_rtvec (2, pat, clobber);
17645 		    }
17646 		  else /* action == CONV */
17647 		    vec = gen_rtvec (2, pat, clobber);
17648 
17649 		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17650 		  INSN_CODE (insn) = -1;
17651 		}
17652 	    }
17653 
17654 	  if (NONDEBUG_INSN_P (insn))
17655 	    df_simulate_one_insn_backwards (bb, insn, &live);
17656 	}
17657     }
17658 
17659   CLEAR_REG_SET (&live);
17660 }
17661 
17662 /* Gcc puts the pool in the wrong place for ARM, since we can only
17663    load addresses a limited distance around the pc.  We do some
17664    special munging to move the constant pool values to the correct
17665    point in the code.  */
17666 static void
17667 arm_reorg (void)
17668 {
17669   rtx_insn *insn;
17670   HOST_WIDE_INT address = 0;
17671   Mfix * fix;
17672 
17673   if (use_cmse)
17674     cmse_nonsecure_call_clear_caller_saved ();
17675 
17676   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
17677   if (cfun->is_thunk)
17678     ;
17679   else if (TARGET_THUMB1)
17680     thumb1_reorg ();
17681   else if (TARGET_THUMB2)
17682     thumb2_reorg ();
17683 
17684   /* Ensure all insns that must be split have been split at this point.
17685      Otherwise, the pool placement code below may compute incorrect
17686      insn lengths.  Note that when optimizing, all insns have already
17687      been split at this point.  */
17688   if (!optimize)
17689     split_all_insns_noflow ();
17690 
17691   /* Make sure we do not attempt to create a literal pool even though it should
17692      no longer be necessary to create any.  */
17693   if (arm_disable_literal_pool)
17694     return ;
17695 
17696   minipool_fix_head = minipool_fix_tail = NULL;
17697 
17698   /* The first insn must always be a note, or the code below won't
17699      scan it properly.  */
17700   insn = get_insns ();
17701   gcc_assert (NOTE_P (insn));
17702   minipool_pad = 0;
17703 
17704   /* Scan all the insns and record the operands that will need fixing.  */
17705   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17706     {
17707       if (BARRIER_P (insn))
17708 	push_minipool_barrier (insn, address);
17709       else if (INSN_P (insn))
17710 	{
17711 	  rtx_jump_table_data *table;
17712 
17713 	  note_invalid_constants (insn, address, true);
17714 	  address += get_attr_length (insn);
17715 
17716 	  /* If the insn is a vector jump, add the size of the table
17717 	     and skip the table.  */
17718 	  if (tablejump_p (insn, NULL, &table))
17719 	    {
17720 	      address += get_jump_table_size (table);
17721 	      insn = table;
17722 	    }
17723 	}
17724       else if (LABEL_P (insn))
17725 	/* Add the worst-case padding due to alignment.  We don't add
17726 	   the _current_ padding because the minipool insertions
17727 	   themselves might change it.  */
17728 	address += get_label_padding (insn);
17729     }
17730 
17731   fix = minipool_fix_head;
17732 
17733   /* Now scan the fixups and perform the required changes.  */
17734   while (fix)
17735     {
17736       Mfix * ftmp;
17737       Mfix * fdel;
17738       Mfix *  last_added_fix;
17739       Mfix * last_barrier = NULL;
17740       Mfix * this_fix;
17741 
17742       /* Skip any further barriers before the next fix.  */
17743       while (fix && BARRIER_P (fix->insn))
17744 	fix = fix->next;
17745 
17746       /* No more fixes.  */
17747       if (fix == NULL)
17748 	break;
17749 
17750       last_added_fix = NULL;
17751 
17752       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17753 	{
17754 	  if (BARRIER_P (ftmp->insn))
17755 	    {
17756 	      if (ftmp->address >= minipool_vector_head->max_address)
17757 		break;
17758 
17759 	      last_barrier = ftmp;
17760 	    }
17761 	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17762 	    break;
17763 
17764 	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17765 	}
17766 
17767       /* If we found a barrier, drop back to that; any fixes that we
17768 	 could have reached but come after the barrier will now go in
17769 	 the next mini-pool.  */
17770       if (last_barrier != NULL)
17771 	{
17772 	  /* Reduce the refcount for those fixes that won't go into this
17773 	     pool after all.  */
17774 	  for (fdel = last_barrier->next;
17775 	       fdel && fdel != ftmp;
17776 	       fdel = fdel->next)
17777 	    {
17778 	      fdel->minipool->refcount--;
17779 	      fdel->minipool = NULL;
17780 	    }
17781 
17782 	  ftmp = last_barrier;
17783 	}
17784       else
17785         {
17786 	  /* ftmp is first fix that we can't fit into this pool and
17787 	     there no natural barriers that we could use.  Insert a
17788 	     new barrier in the code somewhere between the previous
17789 	     fix and this one, and arrange to jump around it.  */
17790 	  HOST_WIDE_INT max_address;
17791 
17792 	  /* The last item on the list of fixes must be a barrier, so
17793 	     we can never run off the end of the list of fixes without
17794 	     last_barrier being set.  */
17795 	  gcc_assert (ftmp);
17796 
17797 	  max_address = minipool_vector_head->max_address;
17798 	  /* Check that there isn't another fix that is in range that
17799 	     we couldn't fit into this pool because the pool was
17800 	     already too large: we need to put the pool before such an
17801 	     instruction.  The pool itself may come just after the
17802 	     fix because create_fix_barrier also allows space for a
17803 	     jump instruction.  */
17804 	  if (ftmp->address < max_address)
17805 	    max_address = ftmp->address + 1;
17806 
17807 	  last_barrier = create_fix_barrier (last_added_fix, max_address);
17808 	}
17809 
17810       assign_minipool_offsets (last_barrier);
17811 
17812       while (ftmp)
17813 	{
17814 	  if (!BARRIER_P (ftmp->insn)
17815 	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17816 		  == NULL))
17817 	    break;
17818 
17819 	  ftmp = ftmp->next;
17820 	}
17821 
17822       /* Scan over the fixes we have identified for this pool, fixing them
17823 	 up and adding the constants to the pool itself.  */
17824       for (this_fix = fix; this_fix && ftmp != this_fix;
17825 	   this_fix = this_fix->next)
17826 	if (!BARRIER_P (this_fix->insn))
17827 	  {
17828 	    rtx addr
17829 	      = plus_constant (Pmode,
17830 			       gen_rtx_LABEL_REF (VOIDmode,
17831 						  minipool_vector_label),
17832 			       this_fix->minipool->offset);
17833 	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17834 	  }
17835 
17836       dump_minipool (last_barrier->insn);
17837       fix = ftmp;
17838     }
17839 
17840   /* From now on we must synthesize any constants that we can't handle
17841      directly.  This can happen if the RTL gets split during final
17842      instruction generation.  */
17843   cfun->machine->after_arm_reorg = 1;
17844 
17845   /* Free the minipool memory.  */
17846   obstack_free (&minipool_obstack, minipool_startobj);
17847 }
17848 
17849 /* Routines to output assembly language.  */
17850 
17851 /* Return string representation of passed in real value.  */
17852 static const char *
17853 fp_const_from_val (REAL_VALUE_TYPE *r)
17854 {
17855   if (!fp_consts_inited)
17856     init_fp_table ();
17857 
17858   gcc_assert (real_equal (r, &value_fp0));
17859   return "0";
17860 }
17861 
17862 /* OPERANDS[0] is the entire list of insns that constitute pop,
17863    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17864    is in the list, UPDATE is true iff the list contains explicit
17865    update of base register.  */
17866 void
17867 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17868                          bool update)
17869 {
17870   int i;
17871   char pattern[100];
17872   int offset;
17873   const char *conditional;
17874   int num_saves = XVECLEN (operands[0], 0);
17875   unsigned int regno;
17876   unsigned int regno_base = REGNO (operands[1]);
17877   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17878 
17879   offset = 0;
17880   offset += update ? 1 : 0;
17881   offset += return_pc ? 1 : 0;
17882 
17883   /* Is the base register in the list?  */
17884   for (i = offset; i < num_saves; i++)
17885     {
17886       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17887       /* If SP is in the list, then the base register must be SP.  */
17888       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17889       /* If base register is in the list, there must be no explicit update.  */
17890       if (regno == regno_base)
17891         gcc_assert (!update);
17892     }
17893 
17894   conditional = reverse ? "%?%D0" : "%?%d0";
17895   /* Can't use POP if returning from an interrupt.  */
17896   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17897     sprintf (pattern, "pop%s\t{", conditional);
17898   else
17899     {
17900       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17901          It's just a convention, their semantics are identical.  */
17902       if (regno_base == SP_REGNUM)
17903 	sprintf (pattern, "ldmfd%s\t", conditional);
17904       else if (update)
17905 	sprintf (pattern, "ldmia%s\t", conditional);
17906       else
17907 	sprintf (pattern, "ldm%s\t", conditional);
17908 
17909       strcat (pattern, reg_names[regno_base]);
17910       if (update)
17911         strcat (pattern, "!, {");
17912       else
17913         strcat (pattern, ", {");
17914     }
17915 
17916   /* Output the first destination register.  */
17917   strcat (pattern,
17918           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17919 
17920   /* Output the rest of the destination registers.  */
17921   for (i = offset + 1; i < num_saves; i++)
17922     {
17923       strcat (pattern, ", ");
17924       strcat (pattern,
17925               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17926     }
17927 
17928   strcat (pattern, "}");
17929 
17930   if (interrupt_p && return_pc)
17931     strcat (pattern, "^");
17932 
17933   output_asm_insn (pattern, &cond);
17934 }
17935 
17936 
17937 /* Output the assembly for a store multiple.  */
17938 
17939 const char *
17940 vfp_output_vstmd (rtx * operands)
17941 {
17942   char pattern[100];
17943   int p;
17944   int base;
17945   int i;
17946   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17947 		   ? XEXP (operands[0], 0)
17948 		   : XEXP (XEXP (operands[0], 0), 0);
17949   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17950 
17951   if (push_p)
17952     strcpy (pattern, "vpush%?.64\t{%P1");
17953   else
17954     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17955 
17956   p = strlen (pattern);
17957 
17958   gcc_assert (REG_P (operands[1]));
17959 
17960   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17961   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17962     {
17963       p += sprintf (&pattern[p], ", d%d", base + i);
17964     }
17965   strcpy (&pattern[p], "}");
17966 
17967   output_asm_insn (pattern, operands);
17968   return "";
17969 }
17970 
17971 
17972 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17973    number of bytes pushed.  */
17974 
17975 static int
17976 vfp_emit_fstmd (int base_reg, int count)
17977 {
17978   rtx par;
17979   rtx dwarf;
17980   rtx tmp, reg;
17981   int i;
17982 
17983   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17984      register pairs are stored by a store multiple insn.  We avoid this
17985      by pushing an extra pair.  */
17986   if (count == 2 && !arm_arch6)
17987     {
17988       if (base_reg == LAST_VFP_REGNUM - 3)
17989 	base_reg -= 2;
17990       count++;
17991     }
17992 
17993   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17994      larger stores into multiple parts (up to a maximum of two, in
17995      practice).  */
17996   if (count > 16)
17997     {
17998       int saved;
17999       /* NOTE: base_reg is an internal register number, so each D register
18000          counts as 2.  */
18001       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
18002       saved += vfp_emit_fstmd (base_reg, 16);
18003       return saved;
18004     }
18005 
18006   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18007   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18008 
18009   reg = gen_rtx_REG (DFmode, base_reg);
18010   base_reg += 2;
18011 
18012   XVECEXP (par, 0, 0)
18013     = gen_rtx_SET (gen_frame_mem
18014 		   (BLKmode,
18015 		    gen_rtx_PRE_MODIFY (Pmode,
18016 					stack_pointer_rtx,
18017 					plus_constant
18018 					(Pmode, stack_pointer_rtx,
18019 					 - (count * 8)))
18020 		    ),
18021 		   gen_rtx_UNSPEC (BLKmode,
18022 				   gen_rtvec (1, reg),
18023 				   UNSPEC_PUSH_MULT));
18024 
18025   tmp = gen_rtx_SET (stack_pointer_rtx,
18026 		     plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18027   RTX_FRAME_RELATED_P (tmp) = 1;
18028   XVECEXP (dwarf, 0, 0) = tmp;
18029 
18030   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18031   RTX_FRAME_RELATED_P (tmp) = 1;
18032   XVECEXP (dwarf, 0, 1) = tmp;
18033 
18034   for (i = 1; i < count; i++)
18035     {
18036       reg = gen_rtx_REG (DFmode, base_reg);
18037       base_reg += 2;
18038       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18039 
18040       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18041 					plus_constant (Pmode,
18042 						       stack_pointer_rtx,
18043 						       i * 8)),
18044 			 reg);
18045       RTX_FRAME_RELATED_P (tmp) = 1;
18046       XVECEXP (dwarf, 0, i + 1) = tmp;
18047     }
18048 
18049   par = emit_insn (par);
18050   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18051   RTX_FRAME_RELATED_P (par) = 1;
18052 
18053   return count * 8;
18054 }
18055 
18056 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18057    has the cmse_nonsecure_call attribute and returns false otherwise.  */
18058 
18059 bool
18060 detect_cmse_nonsecure_call (tree addr)
18061 {
18062   if (!addr)
18063     return FALSE;
18064 
18065   tree fntype = TREE_TYPE (addr);
18066   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18067 				    TYPE_ATTRIBUTES (fntype)))
18068     return TRUE;
18069   return FALSE;
18070 }
18071 
18072 
18073 /* Emit a call instruction with pattern PAT.  ADDR is the address of
18074    the call target.  */
18075 
18076 void
18077 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18078 {
18079   rtx insn;
18080 
18081   insn = emit_call_insn (pat);
18082 
18083   /* The PIC register is live on entry to VxWorks PIC PLT entries.
18084      If the call might use such an entry, add a use of the PIC register
18085      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
18086   if (TARGET_VXWORKS_RTP
18087       && flag_pic
18088       && !sibcall
18089       && GET_CODE (addr) == SYMBOL_REF
18090       && (SYMBOL_REF_DECL (addr)
18091 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18092 	  : !SYMBOL_REF_LOCAL_P (addr)))
18093     {
18094       require_pic_register ();
18095       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18096     }
18097 
18098   if (TARGET_AAPCS_BASED)
18099     {
18100       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18101 	 linker.  We need to add an IP clobber to allow setting
18102 	 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
18103 	 is not needed since it's a fixed register.  */
18104       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18105       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18106     }
18107 }
18108 
18109 /* Output a 'call' insn.  */
18110 const char *
18111 output_call (rtx *operands)
18112 {
18113   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
18114 
18115   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
18116   if (REGNO (operands[0]) == LR_REGNUM)
18117     {
18118       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18119       output_asm_insn ("mov%?\t%0, %|lr", operands);
18120     }
18121 
18122   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18123 
18124   if (TARGET_INTERWORK || arm_arch4t)
18125     output_asm_insn ("bx%?\t%0", operands);
18126   else
18127     output_asm_insn ("mov%?\t%|pc, %0", operands);
18128 
18129   return "";
18130 }
18131 
18132 /* Output a move from arm registers to arm registers of a long double
18133    OPERANDS[0] is the destination.
18134    OPERANDS[1] is the source.  */
18135 const char *
18136 output_mov_long_double_arm_from_arm (rtx *operands)
18137 {
18138   /* We have to be careful here because the two might overlap.  */
18139   int dest_start = REGNO (operands[0]);
18140   int src_start = REGNO (operands[1]);
18141   rtx ops[2];
18142   int i;
18143 
18144   if (dest_start < src_start)
18145     {
18146       for (i = 0; i < 3; i++)
18147 	{
18148 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
18149 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
18150 	  output_asm_insn ("mov%?\t%0, %1", ops);
18151 	}
18152     }
18153   else
18154     {
18155       for (i = 2; i >= 0; i--)
18156 	{
18157 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
18158 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
18159 	  output_asm_insn ("mov%?\t%0, %1", ops);
18160 	}
18161     }
18162 
18163   return "";
18164 }
18165 
18166 void
18167 arm_emit_movpair (rtx dest, rtx src)
18168  {
18169   /* If the src is an immediate, simplify it.  */
18170   if (CONST_INT_P (src))
18171     {
18172       HOST_WIDE_INT val = INTVAL (src);
18173       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18174       if ((val >> 16) & 0x0000ffff)
18175 	{
18176 	  emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18177 					       GEN_INT (16)),
18178 			 GEN_INT ((val >> 16) & 0x0000ffff));
18179 	  rtx_insn *insn = get_last_insn ();
18180 	  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18181 	}
18182       return;
18183     }
18184    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18185    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18186    rtx_insn *insn = get_last_insn ();
18187    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18188  }
18189 
18190 /* Output a move between double words.  It must be REG<-MEM
18191    or MEM<-REG.  */
18192 const char *
18193 output_move_double (rtx *operands, bool emit, int *count)
18194 {
18195   enum rtx_code code0 = GET_CODE (operands[0]);
18196   enum rtx_code code1 = GET_CODE (operands[1]);
18197   rtx otherops[3];
18198   if (count)
18199     *count = 1;
18200 
18201   /* The only case when this might happen is when
18202      you are looking at the length of a DImode instruction
18203      that has an invalid constant in it.  */
18204   if (code0 == REG && code1 != MEM)
18205     {
18206       gcc_assert (!emit);
18207       *count = 2;
18208       return "";
18209     }
18210 
18211   if (code0 == REG)
18212     {
18213       unsigned int reg0 = REGNO (operands[0]);
18214 
18215       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18216 
18217       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18218 
18219       switch (GET_CODE (XEXP (operands[1], 0)))
18220 	{
18221 	case REG:
18222 
18223 	  if (emit)
18224 	    {
18225 	      if (TARGET_LDRD
18226 		  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18227 		output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18228 	      else
18229 		output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18230 	    }
18231 	  break;
18232 
18233 	case PRE_INC:
18234 	  gcc_assert (TARGET_LDRD);
18235 	  if (emit)
18236 	    output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18237 	  break;
18238 
18239 	case PRE_DEC:
18240 	  if (emit)
18241 	    {
18242 	      if (TARGET_LDRD)
18243 		output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18244 	      else
18245 		output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18246 	    }
18247 	  break;
18248 
18249 	case POST_INC:
18250 	  if (emit)
18251 	    {
18252 	      if (TARGET_LDRD)
18253 		output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18254 	      else
18255 		output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18256 	    }
18257 	  break;
18258 
18259 	case POST_DEC:
18260 	  gcc_assert (TARGET_LDRD);
18261 	  if (emit)
18262 	    output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18263 	  break;
18264 
18265 	case PRE_MODIFY:
18266 	case POST_MODIFY:
18267 	  /* Autoicrement addressing modes should never have overlapping
18268 	     base and destination registers, and overlapping index registers
18269 	     are already prohibited, so this doesn't need to worry about
18270 	     fix_cm3_ldrd.  */
18271 	  otherops[0] = operands[0];
18272 	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18273 	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18274 
18275 	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18276 	    {
18277 	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18278 		{
18279 		  /* Registers overlap so split out the increment.  */
18280 		  if (emit)
18281 		    {
18282 		      output_asm_insn ("add%?\t%1, %1, %2", otherops);
18283 		      output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18284 		    }
18285 		  if (count)
18286 		    *count = 2;
18287 		}
18288 	      else
18289 		{
18290 		  /* Use a single insn if we can.
18291 		     FIXME: IWMMXT allows offsets larger than ldrd can
18292 		     handle, fix these up with a pair of ldr.  */
18293 		  if (TARGET_THUMB2
18294 		      || !CONST_INT_P (otherops[2])
18295 		      || (INTVAL (otherops[2]) > -256
18296 			  && INTVAL (otherops[2]) < 256))
18297 		    {
18298 		      if (emit)
18299 			output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18300 		    }
18301 		  else
18302 		    {
18303 		      if (emit)
18304 			{
18305 			  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18306 			  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18307 			}
18308 		      if (count)
18309 			*count = 2;
18310 
18311 		    }
18312 		}
18313 	    }
18314 	  else
18315 	    {
18316 	      /* Use a single insn if we can.
18317 		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18318 		 fix these up with a pair of ldr.  */
18319 	      if (TARGET_THUMB2
18320 		  || !CONST_INT_P (otherops[2])
18321 		  || (INTVAL (otherops[2]) > -256
18322 		      && INTVAL (otherops[2]) < 256))
18323 		{
18324 		  if (emit)
18325 		    output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18326 		}
18327 	      else
18328 		{
18329 		  if (emit)
18330 		    {
18331 		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18332 		      output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18333 		    }
18334 		  if (count)
18335 		    *count = 2;
18336 		}
18337 	    }
18338 	  break;
18339 
18340 	case LABEL_REF:
18341 	case CONST:
18342 	  /* We might be able to use ldrd %0, %1 here.  However the range is
18343 	     different to ldr/adr, and it is broken on some ARMv7-M
18344 	     implementations.  */
18345 	  /* Use the second register of the pair to avoid problematic
18346 	     overlap.  */
18347 	  otherops[1] = operands[1];
18348 	  if (emit)
18349 	    output_asm_insn ("adr%?\t%0, %1", otherops);
18350 	  operands[1] = otherops[0];
18351 	  if (emit)
18352 	    {
18353 	      if (TARGET_LDRD)
18354 		output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18355 	      else
18356 		output_asm_insn ("ldmia%?\t%1, %M0", operands);
18357 	    }
18358 
18359 	  if (count)
18360 	    *count = 2;
18361 	  break;
18362 
18363 	  /* ??? This needs checking for thumb2.  */
18364 	default:
18365 	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18366 			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18367 	    {
18368 	      otherops[0] = operands[0];
18369 	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18370 	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18371 
18372 	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18373 		{
18374 		  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18375 		    {
18376 		      switch ((int) INTVAL (otherops[2]))
18377 			{
18378 			case -8:
18379 			  if (emit)
18380 			    output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18381 			  return "";
18382 			case -4:
18383 			  if (TARGET_THUMB2)
18384 			    break;
18385 			  if (emit)
18386 			    output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18387 			  return "";
18388 			case 4:
18389 			  if (TARGET_THUMB2)
18390 			    break;
18391 			  if (emit)
18392 			    output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18393 			  return "";
18394 			}
18395 		    }
18396 		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18397 		  operands[1] = otherops[0];
18398 		  if (TARGET_LDRD
18399 		      && (REG_P (otherops[2])
18400 			  || TARGET_THUMB2
18401 			  || (CONST_INT_P (otherops[2])
18402 			      && INTVAL (otherops[2]) > -256
18403 			      && INTVAL (otherops[2]) < 256)))
18404 		    {
18405 		      if (reg_overlap_mentioned_p (operands[0],
18406 						   otherops[2]))
18407 			{
18408 			  /* Swap base and index registers over to
18409 			     avoid a conflict.  */
18410 			  std::swap (otherops[1], otherops[2]);
18411 			}
18412 		      /* If both registers conflict, it will usually
18413 			 have been fixed by a splitter.  */
18414 		      if (reg_overlap_mentioned_p (operands[0], otherops[2])
18415 			  || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18416 			{
18417 			  if (emit)
18418 			    {
18419 			      output_asm_insn ("add%?\t%0, %1, %2", otherops);
18420 			      output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18421 			    }
18422 			  if (count)
18423 			    *count = 2;
18424 			}
18425 		      else
18426 			{
18427 			  otherops[0] = operands[0];
18428 			  if (emit)
18429 			    output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18430 			}
18431 		      return "";
18432 		    }
18433 
18434 		  if (CONST_INT_P (otherops[2]))
18435 		    {
18436 		      if (emit)
18437 			{
18438 			  if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18439 			    output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18440 			  else
18441 			    output_asm_insn ("add%?\t%0, %1, %2", otherops);
18442 			}
18443 		    }
18444 		  else
18445 		    {
18446 		      if (emit)
18447 			output_asm_insn ("add%?\t%0, %1, %2", otherops);
18448 		    }
18449 		}
18450 	      else
18451 		{
18452 		  if (emit)
18453 		    output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18454 		}
18455 
18456 	      if (count)
18457 		*count = 2;
18458 
18459 	      if (TARGET_LDRD)
18460 		return "ldrd%?\t%0, [%1]";
18461 
18462 	      return "ldmia%?\t%1, %M0";
18463 	    }
18464 	  else
18465 	    {
18466 	      otherops[1] = adjust_address (operands[1], SImode, 4);
18467 	      /* Take care of overlapping base/data reg.  */
18468 	      if (reg_mentioned_p (operands[0], operands[1]))
18469 		{
18470 		  if (emit)
18471 		    {
18472 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
18473 		      output_asm_insn ("ldr%?\t%0, %1", operands);
18474 		    }
18475 		  if (count)
18476 		    *count = 2;
18477 
18478 		}
18479 	      else
18480 		{
18481 		  if (emit)
18482 		    {
18483 		      output_asm_insn ("ldr%?\t%0, %1", operands);
18484 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
18485 		    }
18486 		  if (count)
18487 		    *count = 2;
18488 		}
18489 	    }
18490 	}
18491     }
18492   else
18493     {
18494       /* Constraints should ensure this.  */
18495       gcc_assert (code0 == MEM && code1 == REG);
18496       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18497                   || (TARGET_ARM && TARGET_LDRD));
18498 
18499       /* For TARGET_ARM the first source register of an STRD
18500 	 must be even.  This is usually the case for double-word
18501 	 values but user assembly constraints can force an odd
18502 	 starting register.  */
18503       bool allow_strd = TARGET_LDRD
18504 			 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18505       switch (GET_CODE (XEXP (operands[0], 0)))
18506         {
18507 	case REG:
18508 	  if (emit)
18509 	    {
18510 	      if (allow_strd)
18511 		output_asm_insn ("strd%?\t%1, [%m0]", operands);
18512 	      else
18513 		output_asm_insn ("stm%?\t%m0, %M1", operands);
18514 	    }
18515 	  break;
18516 
18517         case PRE_INC:
18518 	  gcc_assert (allow_strd);
18519 	  if (emit)
18520 	    output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18521 	  break;
18522 
18523         case PRE_DEC:
18524 	  if (emit)
18525 	    {
18526 	      if (allow_strd)
18527 		output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18528 	      else
18529 		output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18530 	    }
18531 	  break;
18532 
18533         case POST_INC:
18534 	  if (emit)
18535 	    {
18536 	      if (allow_strd)
18537 		output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18538 	      else
18539 		output_asm_insn ("stm%?\t%m0!, %M1", operands);
18540 	    }
18541 	  break;
18542 
18543         case POST_DEC:
18544 	  gcc_assert (allow_strd);
18545 	  if (emit)
18546 	    output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18547 	  break;
18548 
18549 	case PRE_MODIFY:
18550 	case POST_MODIFY:
18551 	  otherops[0] = operands[1];
18552 	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18553 	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18554 
18555 	  /* IWMMXT allows offsets larger than strd can handle,
18556 	     fix these up with a pair of str.  */
18557 	  if (!TARGET_THUMB2
18558 	      && CONST_INT_P (otherops[2])
18559 	      && (INTVAL(otherops[2]) <= -256
18560 		  || INTVAL(otherops[2]) >= 256))
18561 	    {
18562 	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18563 		{
18564 		  if (emit)
18565 		    {
18566 		      output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18567 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18568 		    }
18569 		  if (count)
18570 		    *count = 2;
18571 		}
18572 	      else
18573 		{
18574 		  if (emit)
18575 		    {
18576 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18577 		      output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18578 		    }
18579 		  if (count)
18580 		    *count = 2;
18581 		}
18582 	    }
18583 	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18584 	    {
18585 	      if (emit)
18586 		output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18587 	    }
18588 	  else
18589 	    {
18590 	      if (emit)
18591 		output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18592 	    }
18593 	  break;
18594 
18595 	case PLUS:
18596 	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18597 	  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18598 	    {
18599 	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18600 		{
18601 		case -8:
18602 		  if (emit)
18603 		    output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18604 		  return "";
18605 
18606 		case -4:
18607 		  if (TARGET_THUMB2)
18608 		    break;
18609 		  if (emit)
18610 		    output_asm_insn ("stmda%?\t%m0, %M1", operands);
18611 		  return "";
18612 
18613 		case 4:
18614 		  if (TARGET_THUMB2)
18615 		    break;
18616 		  if (emit)
18617 		    output_asm_insn ("stmib%?\t%m0, %M1", operands);
18618 		  return "";
18619 		}
18620 	    }
18621 	  if (allow_strd
18622 	      && (REG_P (otherops[2])
18623 		  || TARGET_THUMB2
18624 		  || (CONST_INT_P (otherops[2])
18625 		      && INTVAL (otherops[2]) > -256
18626 		      && INTVAL (otherops[2]) < 256)))
18627 	    {
18628 	      otherops[0] = operands[1];
18629 	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18630 	      if (emit)
18631 		output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18632 	      return "";
18633 	    }
18634 	  /* Fall through */
18635 
18636         default:
18637 	  otherops[0] = adjust_address (operands[0], SImode, 4);
18638 	  otherops[1] = operands[1];
18639 	  if (emit)
18640 	    {
18641 	      output_asm_insn ("str%?\t%1, %0", operands);
18642 	      output_asm_insn ("str%?\t%H1, %0", otherops);
18643 	    }
18644 	  if (count)
18645 	    *count = 2;
18646 	}
18647     }
18648 
18649   return "";
18650 }
18651 
18652 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18653    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18654 
18655 const char *
18656 output_move_quad (rtx *operands)
18657 {
18658   if (REG_P (operands[0]))
18659     {
18660       /* Load, or reg->reg move.  */
18661 
18662       if (MEM_P (operands[1]))
18663         {
18664           switch (GET_CODE (XEXP (operands[1], 0)))
18665             {
18666             case REG:
18667               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18668               break;
18669 
18670             case LABEL_REF:
18671             case CONST:
18672               output_asm_insn ("adr%?\t%0, %1", operands);
18673               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18674               break;
18675 
18676             default:
18677               gcc_unreachable ();
18678             }
18679         }
18680       else
18681         {
18682           rtx ops[2];
18683           int dest, src, i;
18684 
18685           gcc_assert (REG_P (operands[1]));
18686 
18687           dest = REGNO (operands[0]);
18688           src = REGNO (operands[1]);
18689 
18690           /* This seems pretty dumb, but hopefully GCC won't try to do it
18691              very often.  */
18692           if (dest < src)
18693             for (i = 0; i < 4; i++)
18694               {
18695                 ops[0] = gen_rtx_REG (SImode, dest + i);
18696                 ops[1] = gen_rtx_REG (SImode, src + i);
18697                 output_asm_insn ("mov%?\t%0, %1", ops);
18698               }
18699           else
18700             for (i = 3; i >= 0; i--)
18701               {
18702                 ops[0] = gen_rtx_REG (SImode, dest + i);
18703                 ops[1] = gen_rtx_REG (SImode, src + i);
18704                 output_asm_insn ("mov%?\t%0, %1", ops);
18705               }
18706         }
18707     }
18708   else
18709     {
18710       gcc_assert (MEM_P (operands[0]));
18711       gcc_assert (REG_P (operands[1]));
18712       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18713 
18714       switch (GET_CODE (XEXP (operands[0], 0)))
18715         {
18716         case REG:
18717           output_asm_insn ("stm%?\t%m0, %M1", operands);
18718           break;
18719 
18720         default:
18721           gcc_unreachable ();
18722         }
18723     }
18724 
18725   return "";
18726 }
18727 
18728 /* Output a VFP load or store instruction.  */
18729 
18730 const char *
18731 output_move_vfp (rtx *operands)
18732 {
18733   rtx reg, mem, addr, ops[2];
18734   int load = REG_P (operands[0]);
18735   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18736   int sp = (!TARGET_VFP_FP16INST
18737 	    || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18738   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18739   const char *templ;
18740   char buff[50];
18741   machine_mode mode;
18742 
18743   reg = operands[!load];
18744   mem = operands[load];
18745 
18746   mode = GET_MODE (reg);
18747 
18748   gcc_assert (REG_P (reg));
18749   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18750   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18751 	      || mode == SFmode
18752 	      || mode == DFmode
18753 	      || mode == HImode
18754 	      || mode == SImode
18755 	      || mode == DImode
18756               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18757   gcc_assert (MEM_P (mem));
18758 
18759   addr = XEXP (mem, 0);
18760 
18761   switch (GET_CODE (addr))
18762     {
18763     case PRE_DEC:
18764       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18765       ops[0] = XEXP (addr, 0);
18766       ops[1] = reg;
18767       break;
18768 
18769     case POST_INC:
18770       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18771       ops[0] = XEXP (addr, 0);
18772       ops[1] = reg;
18773       break;
18774 
18775     default:
18776       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18777       ops[0] = reg;
18778       ops[1] = mem;
18779       break;
18780     }
18781 
18782   sprintf (buff, templ,
18783 	   load ? "ld" : "st",
18784 	   dp ? "64" : sp ? "32" : "16",
18785 	   dp ? "P" : "",
18786 	   integer_p ? "\t%@ int" : "");
18787   output_asm_insn (buff, ops);
18788 
18789   return "";
18790 }
18791 
18792 /* Output a Neon double-word or quad-word load or store, or a load
18793    or store for larger structure modes.
18794 
18795    WARNING: The ordering of elements is weird in big-endian mode,
18796    because the EABI requires that vectors stored in memory appear
18797    as though they were stored by a VSTM, as required by the EABI.
18798    GCC RTL defines element ordering based on in-memory order.
18799    This can be different from the architectural ordering of elements
18800    within a NEON register. The intrinsics defined in arm_neon.h use the
18801    NEON register element ordering, not the GCC RTL element ordering.
18802 
18803    For example, the in-memory ordering of a big-endian a quadword
18804    vector with 16-bit elements when stored from register pair {d0,d1}
18805    will be (lowest address first, d0[N] is NEON register element N):
18806 
18807      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18808 
18809    When necessary, quadword registers (dN, dN+1) are moved to ARM
18810    registers from rN in the order:
18811 
18812      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18813 
18814    So that STM/LDM can be used on vectors in ARM registers, and the
18815    same memory layout will result as if VSTM/VLDM were used.
18816 
18817    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18818    possible, which allows use of appropriate alignment tags.
18819    Note that the choice of "64" is independent of the actual vector
18820    element size; this size simply ensures that the behavior is
18821    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18822 
18823    Due to limitations of those instructions, use of VST1.64/VLD1.64
18824    is not possible if:
18825     - the address contains PRE_DEC, or
18826     - the mode refers to more than 4 double-word registers
18827 
18828    In those cases, it would be possible to replace VSTM/VLDM by a
18829    sequence of instructions; this is not currently implemented since
18830    this is not certain to actually improve performance.  */
18831 
18832 const char *
18833 output_move_neon (rtx *operands)
18834 {
18835   rtx reg, mem, addr, ops[2];
18836   int regno, nregs, load = REG_P (operands[0]);
18837   const char *templ;
18838   char buff[50];
18839   machine_mode mode;
18840 
18841   reg = operands[!load];
18842   mem = operands[load];
18843 
18844   mode = GET_MODE (reg);
18845 
18846   gcc_assert (REG_P (reg));
18847   regno = REGNO (reg);
18848   nregs = REG_NREGS (reg) / 2;
18849   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18850 	      || NEON_REGNO_OK_FOR_QUAD (regno));
18851   gcc_assert (VALID_NEON_DREG_MODE (mode)
18852 	      || VALID_NEON_QREG_MODE (mode)
18853 	      || VALID_NEON_STRUCT_MODE (mode));
18854   gcc_assert (MEM_P (mem));
18855 
18856   addr = XEXP (mem, 0);
18857 
18858   /* Strip off const from addresses like (const (plus (...))).  */
18859   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18860     addr = XEXP (addr, 0);
18861 
18862   switch (GET_CODE (addr))
18863     {
18864     case POST_INC:
18865       /* We have to use vldm / vstm for too-large modes.  */
18866       if (nregs > 4)
18867 	{
18868 	  templ = "v%smia%%?\t%%0!, %%h1";
18869 	  ops[0] = XEXP (addr, 0);
18870 	}
18871       else
18872 	{
18873 	  templ = "v%s1.64\t%%h1, %%A0";
18874 	  ops[0] = mem;
18875 	}
18876       ops[1] = reg;
18877       break;
18878 
18879     case PRE_DEC:
18880       /* We have to use vldm / vstm in this case, since there is no
18881 	 pre-decrement form of the vld1 / vst1 instructions.  */
18882       templ = "v%smdb%%?\t%%0!, %%h1";
18883       ops[0] = XEXP (addr, 0);
18884       ops[1] = reg;
18885       break;
18886 
18887     case POST_MODIFY:
18888       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18889       gcc_unreachable ();
18890 
18891     case REG:
18892       /* We have to use vldm / vstm for too-large modes.  */
18893       if (nregs > 1)
18894 	{
18895 	  if (nregs > 4)
18896 	    templ = "v%smia%%?\t%%m0, %%h1";
18897 	  else
18898 	    templ = "v%s1.64\t%%h1, %%A0";
18899 
18900 	  ops[0] = mem;
18901 	  ops[1] = reg;
18902 	  break;
18903 	}
18904       /* Fall through.  */
18905     case LABEL_REF:
18906     case PLUS:
18907       {
18908 	int i;
18909 	int overlap = -1;
18910 	for (i = 0; i < nregs; i++)
18911 	  {
18912 	    /* We're only using DImode here because it's a convenient size.  */
18913 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18914 	    ops[1] = adjust_address (mem, DImode, 8 * i);
18915 	    if (reg_overlap_mentioned_p (ops[0], mem))
18916 	      {
18917 		gcc_assert (overlap == -1);
18918 		overlap = i;
18919 	      }
18920 	    else
18921 	      {
18922 		sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18923 		output_asm_insn (buff, ops);
18924 	      }
18925 	  }
18926 	if (overlap != -1)
18927 	  {
18928 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18929 	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
18930 	    sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18931 	    output_asm_insn (buff, ops);
18932 	  }
18933 
18934         return "";
18935       }
18936 
18937     default:
18938       gcc_unreachable ();
18939     }
18940 
18941   sprintf (buff, templ, load ? "ld" : "st");
18942   output_asm_insn (buff, ops);
18943 
18944   return "";
18945 }
18946 
18947 /* Compute and return the length of neon_mov<mode>, where <mode> is
18948    one of VSTRUCT modes: EI, OI, CI or XI.  */
18949 int
18950 arm_attr_length_move_neon (rtx_insn *insn)
18951 {
18952   rtx reg, mem, addr;
18953   int load;
18954   machine_mode mode;
18955 
18956   extract_insn_cached (insn);
18957 
18958   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18959     {
18960       mode = GET_MODE (recog_data.operand[0]);
18961       switch (mode)
18962 	{
18963 	case E_EImode:
18964 	case E_OImode:
18965 	  return 8;
18966 	case E_CImode:
18967 	  return 12;
18968 	case E_XImode:
18969 	  return 16;
18970 	default:
18971 	  gcc_unreachable ();
18972 	}
18973     }
18974 
18975   load = REG_P (recog_data.operand[0]);
18976   reg = recog_data.operand[!load];
18977   mem = recog_data.operand[load];
18978 
18979   gcc_assert (MEM_P (mem));
18980 
18981   addr = XEXP (mem, 0);
18982 
18983   /* Strip off const from addresses like (const (plus (...))).  */
18984   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18985     addr = XEXP (addr, 0);
18986 
18987   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18988     {
18989       int insns = REG_NREGS (reg) / 2;
18990       return insns * 4;
18991     }
18992   else
18993     return 4;
18994 }
18995 
18996 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18997    return zero.  */
18998 
18999 int
19000 arm_address_offset_is_imm (rtx_insn *insn)
19001 {
19002   rtx mem, addr;
19003 
19004   extract_insn_cached (insn);
19005 
19006   if (REG_P (recog_data.operand[0]))
19007     return 0;
19008 
19009   mem = recog_data.operand[0];
19010 
19011   gcc_assert (MEM_P (mem));
19012 
19013   addr = XEXP (mem, 0);
19014 
19015   if (REG_P (addr)
19016       || (GET_CODE (addr) == PLUS
19017 	  && REG_P (XEXP (addr, 0))
19018 	  && CONST_INT_P (XEXP (addr, 1))))
19019     return 1;
19020   else
19021     return 0;
19022 }
19023 
19024 /* Output an ADD r, s, #n where n may be too big for one instruction.
19025    If adding zero to one register, output nothing.  */
19026 const char *
19027 output_add_immediate (rtx *operands)
19028 {
19029   HOST_WIDE_INT n = INTVAL (operands[2]);
19030 
19031   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19032     {
19033       if (n < 0)
19034 	output_multi_immediate (operands,
19035 				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19036 				-n);
19037       else
19038 	output_multi_immediate (operands,
19039 				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19040 				n);
19041     }
19042 
19043   return "";
19044 }
19045 
19046 /* Output a multiple immediate operation.
19047    OPERANDS is the vector of operands referred to in the output patterns.
19048    INSTR1 is the output pattern to use for the first constant.
19049    INSTR2 is the output pattern to use for subsequent constants.
19050    IMMED_OP is the index of the constant slot in OPERANDS.
19051    N is the constant value.  */
19052 static const char *
19053 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19054 			int immed_op, HOST_WIDE_INT n)
19055 {
19056 #if HOST_BITS_PER_WIDE_INT > 32
19057   n &= 0xffffffff;
19058 #endif
19059 
19060   if (n == 0)
19061     {
19062       /* Quick and easy output.  */
19063       operands[immed_op] = const0_rtx;
19064       output_asm_insn (instr1, operands);
19065     }
19066   else
19067     {
19068       int i;
19069       const char * instr = instr1;
19070 
19071       /* Note that n is never zero here (which would give no output).  */
19072       for (i = 0; i < 32; i += 2)
19073 	{
19074 	  if (n & (3 << i))
19075 	    {
19076 	      operands[immed_op] = GEN_INT (n & (255 << i));
19077 	      output_asm_insn (instr, operands);
19078 	      instr = instr2;
19079 	      i += 6;
19080 	    }
19081 	}
19082     }
19083 
19084   return "";
19085 }
19086 
19087 /* Return the name of a shifter operation.  */
19088 static const char *
19089 arm_shift_nmem(enum rtx_code code)
19090 {
19091   switch (code)
19092     {
19093     case ASHIFT:
19094       return ARM_LSL_NAME;
19095 
19096     case ASHIFTRT:
19097       return "asr";
19098 
19099     case LSHIFTRT:
19100       return "lsr";
19101 
19102     case ROTATERT:
19103       return "ror";
19104 
19105     default:
19106       abort();
19107     }
19108 }
19109 
19110 /* Return the appropriate ARM instruction for the operation code.
19111    The returned result should not be overwritten.  OP is the rtx of the
19112    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19113    was shifted.  */
19114 const char *
19115 arithmetic_instr (rtx op, int shift_first_arg)
19116 {
19117   switch (GET_CODE (op))
19118     {
19119     case PLUS:
19120       return "add";
19121 
19122     case MINUS:
19123       return shift_first_arg ? "rsb" : "sub";
19124 
19125     case IOR:
19126       return "orr";
19127 
19128     case XOR:
19129       return "eor";
19130 
19131     case AND:
19132       return "and";
19133 
19134     case ASHIFT:
19135     case ASHIFTRT:
19136     case LSHIFTRT:
19137     case ROTATERT:
19138       return arm_shift_nmem(GET_CODE(op));
19139 
19140     default:
19141       gcc_unreachable ();
19142     }
19143 }
19144 
19145 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19146    for the operation code.  The returned result should not be overwritten.
19147    OP is the rtx code of the shift.
19148    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19149    shift.  */
19150 static const char *
19151 shift_op (rtx op, HOST_WIDE_INT *amountp)
19152 {
19153   const char * mnem;
19154   enum rtx_code code = GET_CODE (op);
19155 
19156   switch (code)
19157     {
19158     case ROTATE:
19159       if (!CONST_INT_P (XEXP (op, 1)))
19160 	{
19161 	  output_operand_lossage ("invalid shift operand");
19162 	  return NULL;
19163 	}
19164 
19165       code = ROTATERT;
19166       *amountp = 32 - INTVAL (XEXP (op, 1));
19167       mnem = "ror";
19168       break;
19169 
19170     case ASHIFT:
19171     case ASHIFTRT:
19172     case LSHIFTRT:
19173     case ROTATERT:
19174       mnem = arm_shift_nmem(code);
19175       if (CONST_INT_P (XEXP (op, 1)))
19176 	{
19177 	  *amountp = INTVAL (XEXP (op, 1));
19178 	}
19179       else if (REG_P (XEXP (op, 1)))
19180 	{
19181 	  *amountp = -1;
19182 	  return mnem;
19183 	}
19184       else
19185 	{
19186 	  output_operand_lossage ("invalid shift operand");
19187 	  return NULL;
19188 	}
19189       break;
19190 
19191     case MULT:
19192       /* We never have to worry about the amount being other than a
19193 	 power of 2, since this case can never be reloaded from a reg.  */
19194       if (!CONST_INT_P (XEXP (op, 1)))
19195 	{
19196 	  output_operand_lossage ("invalid shift operand");
19197 	  return NULL;
19198 	}
19199 
19200       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19201 
19202       /* Amount must be a power of two.  */
19203       if (*amountp & (*amountp - 1))
19204 	{
19205 	  output_operand_lossage ("invalid shift operand");
19206 	  return NULL;
19207 	}
19208 
19209       *amountp = exact_log2 (*amountp);
19210       gcc_assert (IN_RANGE (*amountp, 0, 31));
19211       return ARM_LSL_NAME;
19212 
19213     default:
19214       output_operand_lossage ("invalid shift operand");
19215       return NULL;
19216     }
19217 
19218   /* This is not 100% correct, but follows from the desire to merge
19219      multiplication by a power of 2 with the recognizer for a
19220      shift.  >=32 is not a valid shift for "lsl", so we must try and
19221      output a shift that produces the correct arithmetical result.
19222      Using lsr #32 is identical except for the fact that the carry bit
19223      is not set correctly if we set the flags; but we never use the
19224      carry bit from such an operation, so we can ignore that.  */
19225   if (code == ROTATERT)
19226     /* Rotate is just modulo 32.  */
19227     *amountp &= 31;
19228   else if (*amountp != (*amountp & 31))
19229     {
19230       if (code == ASHIFT)
19231 	mnem = "lsr";
19232       *amountp = 32;
19233     }
19234 
19235   /* Shifts of 0 are no-ops.  */
19236   if (*amountp == 0)
19237     return NULL;
19238 
19239   return mnem;
19240 }
19241 
19242 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19243    because /bin/as is horribly restrictive.  The judgement about
19244    whether or not each character is 'printable' (and can be output as
19245    is) or not (and must be printed with an octal escape) must be made
19246    with reference to the *host* character set -- the situation is
19247    similar to that discussed in the comments above pp_c_char in
19248    c-pretty-print.c.  */
19249 
19250 #define MAX_ASCII_LEN 51
19251 
19252 void
19253 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19254 {
19255   int i;
19256   int len_so_far = 0;
19257 
19258   fputs ("\t.ascii\t\"", stream);
19259 
19260   for (i = 0; i < len; i++)
19261     {
19262       int c = p[i];
19263 
19264       if (len_so_far >= MAX_ASCII_LEN)
19265 	{
19266 	  fputs ("\"\n\t.ascii\t\"", stream);
19267 	  len_so_far = 0;
19268 	}
19269 
19270       if (ISPRINT (c))
19271 	{
19272 	  if (c == '\\' || c == '\"')
19273 	    {
19274 	      putc ('\\', stream);
19275 	      len_so_far++;
19276 	    }
19277 	  putc (c, stream);
19278 	  len_so_far++;
19279 	}
19280       else
19281 	{
19282 	  fprintf (stream, "\\%03o", c);
19283 	  len_so_far += 4;
19284 	}
19285     }
19286 
19287   fputs ("\"\n", stream);
19288 }
19289 
19290 /* Whether a register is callee saved or not.  This is necessary because high
19291    registers are marked as caller saved when optimizing for size on Thumb-1
19292    targets despite being callee saved in order to avoid using them.  */
19293 #define callee_saved_reg_p(reg) \
19294   (!call_used_regs[reg] \
19295    || (TARGET_THUMB1 && optimize_size \
19296        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19297 
19298 /* Compute the register save mask for registers 0 through 12
19299    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19300 
19301 static unsigned long
19302 arm_compute_save_reg0_reg12_mask (void)
19303 {
19304   unsigned long func_type = arm_current_func_type ();
19305   unsigned long save_reg_mask = 0;
19306   unsigned int reg;
19307 
19308   if (IS_INTERRUPT (func_type))
19309     {
19310       unsigned int max_reg;
19311       /* Interrupt functions must not corrupt any registers,
19312 	 even call clobbered ones.  If this is a leaf function
19313 	 we can just examine the registers used by the RTL, but
19314 	 otherwise we have to assume that whatever function is
19315 	 called might clobber anything, and so we have to save
19316 	 all the call-clobbered registers as well.  */
19317       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19318 	/* FIQ handlers have registers r8 - r12 banked, so
19319 	   we only need to check r0 - r7, Normal ISRs only
19320 	   bank r14 and r15, so we must check up to r12.
19321 	   r13 is the stack pointer which is always preserved,
19322 	   so we do not need to consider it here.  */
19323 	max_reg = 7;
19324       else
19325 	max_reg = 12;
19326 
19327       for (reg = 0; reg <= max_reg; reg++)
19328 	if (df_regs_ever_live_p (reg)
19329 	    || (! crtl->is_leaf && call_used_regs[reg]))
19330 	  save_reg_mask |= (1 << reg);
19331 
19332       /* Also save the pic base register if necessary.  */
19333       if (flag_pic
19334 	  && !TARGET_SINGLE_PIC_BASE
19335 	  && arm_pic_register != INVALID_REGNUM
19336 	  && crtl->uses_pic_offset_table)
19337 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19338     }
19339   else if (IS_VOLATILE(func_type))
19340     {
19341       /* For noreturn functions we historically omitted register saves
19342 	 altogether.  However this really messes up debugging.  As a
19343 	 compromise save just the frame pointers.  Combined with the link
19344 	 register saved elsewhere this should be sufficient to get
19345 	 a backtrace.  */
19346       if (frame_pointer_needed)
19347 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19348       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19349 	save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19350       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19351 	save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19352     }
19353   else
19354     {
19355       /* In the normal case we only need to save those registers
19356 	 which are call saved and which are used by this function.  */
19357       for (reg = 0; reg <= 11; reg++)
19358 	if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19359 	  save_reg_mask |= (1 << reg);
19360 
19361       /* Handle the frame pointer as a special case.  */
19362       if (frame_pointer_needed)
19363 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19364 
19365       /* If we aren't loading the PIC register,
19366 	 don't stack it even though it may be live.  */
19367       if (flag_pic
19368 	  && !TARGET_SINGLE_PIC_BASE
19369 	  && arm_pic_register != INVALID_REGNUM
19370 	  && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19371 	      || crtl->uses_pic_offset_table))
19372 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19373 
19374       /* The prologue will copy SP into R0, so save it.  */
19375       if (IS_STACKALIGN (func_type))
19376 	save_reg_mask |= 1;
19377     }
19378 
19379   /* Save registers so the exception handler can modify them.  */
19380   if (crtl->calls_eh_return)
19381     {
19382       unsigned int i;
19383 
19384       for (i = 0; ; i++)
19385 	{
19386 	  reg = EH_RETURN_DATA_REGNO (i);
19387 	  if (reg == INVALID_REGNUM)
19388 	    break;
19389 	  save_reg_mask |= 1 << reg;
19390 	}
19391     }
19392 
19393   return save_reg_mask;
19394 }
19395 
19396 /* Return true if r3 is live at the start of the function.  */
19397 
19398 static bool
19399 arm_r3_live_at_start_p (void)
19400 {
19401   /* Just look at cfg info, which is still close enough to correct at this
19402      point.  This gives false positives for broken functions that might use
19403      uninitialized data that happens to be allocated in r3, but who cares?  */
19404   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19405 }
19406 
19407 /* Compute the number of bytes used to store the static chain register on the
19408    stack, above the stack frame.  We need to know this accurately to get the
19409    alignment of the rest of the stack frame correct.  */
19410 
19411 static int
19412 arm_compute_static_chain_stack_bytes (void)
19413 {
19414   /* Once the value is updated from the init value of -1, do not
19415      re-compute.  */
19416   if (cfun->machine->static_chain_stack_bytes != -1)
19417     return cfun->machine->static_chain_stack_bytes;
19418 
19419   /* See the defining assertion in arm_expand_prologue.  */
19420   if (IS_NESTED (arm_current_func_type ())
19421       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19422 	  || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19423 	       || flag_stack_clash_protection)
19424 	      && !df_regs_ever_live_p (LR_REGNUM)))
19425       && arm_r3_live_at_start_p ()
19426       && crtl->args.pretend_args_size == 0)
19427     return 4;
19428 
19429   return 0;
19430 }
19431 
19432 /* Compute a bit mask of which core registers need to be
19433    saved on the stack for the current function.
19434    This is used by arm_compute_frame_layout, which may add extra registers.  */
19435 
19436 static unsigned long
19437 arm_compute_save_core_reg_mask (void)
19438 {
19439   unsigned int save_reg_mask = 0;
19440   unsigned long func_type = arm_current_func_type ();
19441   unsigned int reg;
19442 
19443   if (IS_NAKED (func_type))
19444     /* This should never really happen.  */
19445     return 0;
19446 
19447   /* If we are creating a stack frame, then we must save the frame pointer,
19448      IP (which will hold the old stack pointer), LR and the PC.  */
19449   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19450     save_reg_mask |=
19451       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19452       | (1 << IP_REGNUM)
19453       | (1 << LR_REGNUM)
19454       | (1 << PC_REGNUM);
19455 
19456   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19457 
19458   /* Decide if we need to save the link register.
19459      Interrupt routines have their own banked link register,
19460      so they never need to save it.
19461      Otherwise if we do not use the link register we do not need to save
19462      it.  If we are pushing other registers onto the stack however, we
19463      can save an instruction in the epilogue by pushing the link register
19464      now and then popping it back into the PC.  This incurs extra memory
19465      accesses though, so we only do it when optimizing for size, and only
19466      if we know that we will not need a fancy return sequence.  */
19467   if (df_regs_ever_live_p (LR_REGNUM)
19468       || (save_reg_mask
19469 	  && optimize_size
19470 	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19471 	  && !crtl->tail_call_emit
19472 	  && !crtl->calls_eh_return))
19473     save_reg_mask |= 1 << LR_REGNUM;
19474 
19475   if (cfun->machine->lr_save_eliminated)
19476     save_reg_mask &= ~ (1 << LR_REGNUM);
19477 
19478   if (TARGET_REALLY_IWMMXT
19479       && ((bit_count (save_reg_mask)
19480 	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
19481 			   arm_compute_static_chain_stack_bytes())
19482 	   ) % 2) != 0)
19483     {
19484       /* The total number of registers that are going to be pushed
19485 	 onto the stack is odd.  We need to ensure that the stack
19486 	 is 64-bit aligned before we start to save iWMMXt registers,
19487 	 and also before we start to create locals.  (A local variable
19488 	 might be a double or long long which we will load/store using
19489 	 an iWMMXt instruction).  Therefore we need to push another
19490 	 ARM register, so that the stack will be 64-bit aligned.  We
19491 	 try to avoid using the arg registers (r0 -r3) as they might be
19492 	 used to pass values in a tail call.  */
19493       for (reg = 4; reg <= 12; reg++)
19494 	if ((save_reg_mask & (1 << reg)) == 0)
19495 	  break;
19496 
19497       if (reg <= 12)
19498 	save_reg_mask |= (1 << reg);
19499       else
19500 	{
19501 	  cfun->machine->sibcall_blocked = 1;
19502 	  save_reg_mask |= (1 << 3);
19503 	}
19504     }
19505 
19506   /* We may need to push an additional register for use initializing the
19507      PIC base register.  */
19508   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19509       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19510     {
19511       reg = thumb_find_work_register (1 << 4);
19512       if (!call_used_regs[reg])
19513 	save_reg_mask |= (1 << reg);
19514     }
19515 
19516   return save_reg_mask;
19517 }
19518 
19519 /* Return a mask for the call-clobbered low registers that are unused
19520    at the end of the prologue.  */
19521 static unsigned long
19522 thumb1_prologue_unused_call_clobbered_lo_regs (void)
19523 {
19524   unsigned long mask = 0;
19525 
19526   for (int reg = 0; reg <= LAST_LO_REGNUM; reg++)
19527     if (!callee_saved_reg_p (reg)
19528 	&& !REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
19529 			     reg))
19530       mask |= 1 << reg;
19531   return mask;
19532 }
19533 
19534 /* Similarly for the start of the epilogue.  */
19535 static unsigned long
19536 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
19537 {
19538   unsigned long mask = 0;
19539 
19540   for (int reg = 0; reg <= LAST_LO_REGNUM; reg++)
19541     if (!callee_saved_reg_p (reg)
19542 	&& !REGNO_REG_SET_P (df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun)),
19543 			     reg))
19544       mask |= 1 << reg;
19545   return mask;
19546 }
19547 
19548 /* Compute a bit mask of which core registers need to be
19549    saved on the stack for the current function.  */
19550 static unsigned long
19551 thumb1_compute_save_core_reg_mask (void)
19552 {
19553   unsigned long mask;
19554   unsigned reg;
19555 
19556   mask = 0;
19557   for (reg = 0; reg < 12; reg ++)
19558     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19559       mask |= 1 << reg;
19560 
19561   /* Handle the frame pointer as a special case.  */
19562   if (frame_pointer_needed)
19563     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19564 
19565   if (flag_pic
19566       && !TARGET_SINGLE_PIC_BASE
19567       && arm_pic_register != INVALID_REGNUM
19568       && crtl->uses_pic_offset_table)
19569     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19570 
19571   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19572   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19573     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19574 
19575   /* LR will also be pushed if any lo regs are pushed.  */
19576   if (mask & 0xff || thumb_force_lr_save ())
19577     mask |= (1 << LR_REGNUM);
19578 
19579   bool call_clobbered_scratch
19580     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
19581        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
19582 
19583   /* Make sure we have a low work register if we need one.  We will
19584      need one if we are going to push a high register, but we are not
19585      currently intending to push a low register.  However if both the
19586      prologue and epilogue have a spare call-clobbered low register,
19587      then we won't need to find an additional work register.  It does
19588      not need to be the same register in the prologue and
19589      epilogue.  */
19590   if ((mask & 0xff) == 0
19591       && !call_clobbered_scratch
19592       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19593     {
19594       /* Use thumb_find_work_register to choose which register
19595 	 we will use.  If the register is live then we will
19596 	 have to push it.  Use LAST_LO_REGNUM as our fallback
19597 	 choice for the register to select.  */
19598       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19599       /* Make sure the register returned by thumb_find_work_register is
19600 	 not part of the return value.  */
19601       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19602 	reg = LAST_LO_REGNUM;
19603 
19604       if (callee_saved_reg_p (reg))
19605 	mask |= 1 << reg;
19606     }
19607 
19608   /* The 504 below is 8 bytes less than 512 because there are two possible
19609      alignment words.  We can't tell here if they will be present or not so we
19610      have to play it safe and assume that they are. */
19611   if ((CALLER_INTERWORKING_SLOT_SIZE +
19612        ROUND_UP_WORD (get_frame_size ()) +
19613        crtl->outgoing_args_size) >= 504)
19614     {
19615       /* This is the same as the code in thumb1_expand_prologue() which
19616 	 determines which register to use for stack decrement. */
19617       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19618 	if (mask & (1 << reg))
19619 	  break;
19620 
19621       if (reg > LAST_LO_REGNUM)
19622 	{
19623 	  /* Make sure we have a register available for stack decrement. */
19624 	  mask |= 1 << LAST_LO_REGNUM;
19625 	}
19626     }
19627 
19628   return mask;
19629 }
19630 
19631 
19632 /* Return the number of bytes required to save VFP registers.  */
19633 static int
19634 arm_get_vfp_saved_size (void)
19635 {
19636   unsigned int regno;
19637   int count;
19638   int saved;
19639 
19640   saved = 0;
19641   /* Space for saved VFP registers.  */
19642   if (TARGET_HARD_FLOAT)
19643     {
19644       count = 0;
19645       for (regno = FIRST_VFP_REGNUM;
19646 	   regno < LAST_VFP_REGNUM;
19647 	   regno += 2)
19648 	{
19649 	  if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19650 	      && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19651 	    {
19652 	      if (count > 0)
19653 		{
19654 		  /* Workaround ARM10 VFPr1 bug.  */
19655 		  if (count == 2 && !arm_arch6)
19656 		    count++;
19657 		  saved += count * 8;
19658 		}
19659 	      count = 0;
19660 	    }
19661 	  else
19662 	    count++;
19663 	}
19664       if (count > 0)
19665 	{
19666 	  if (count == 2 && !arm_arch6)
19667 	    count++;
19668 	  saved += count * 8;
19669 	}
19670     }
19671   return saved;
19672 }
19673 
19674 
19675 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19676    everything bar the final return instruction.  If simple_return is true,
19677    then do not output epilogue, because it has already been emitted in RTL.
19678 
19679    Note: do not forget to update length attribute of corresponding insn pattern
19680    when changing assembly output (eg. length attribute of
19681    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19682    register clearing sequences).  */
19683 const char *
19684 output_return_instruction (rtx operand, bool really_return, bool reverse,
19685                            bool simple_return)
19686 {
19687   char conditional[10];
19688   char instr[100];
19689   unsigned reg;
19690   unsigned long live_regs_mask;
19691   unsigned long func_type;
19692   arm_stack_offsets *offsets;
19693 
19694   func_type = arm_current_func_type ();
19695 
19696   if (IS_NAKED (func_type))
19697     return "";
19698 
19699   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19700     {
19701       /* If this function was declared non-returning, and we have
19702 	 found a tail call, then we have to trust that the called
19703 	 function won't return.  */
19704       if (really_return)
19705 	{
19706 	  rtx ops[2];
19707 
19708 	  /* Otherwise, trap an attempted return by aborting.  */
19709 	  ops[0] = operand;
19710 	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19711 				       : "abort");
19712 	  assemble_external_libcall (ops[1]);
19713 	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19714 	}
19715 
19716       return "";
19717     }
19718 
19719   gcc_assert (!cfun->calls_alloca || really_return);
19720 
19721   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19722 
19723   cfun->machine->return_used_this_function = 1;
19724 
19725   offsets = arm_get_frame_offsets ();
19726   live_regs_mask = offsets->saved_regs_mask;
19727 
19728   if (!simple_return && live_regs_mask)
19729     {
19730       const char * return_reg;
19731 
19732       /* If we do not have any special requirements for function exit
19733 	 (e.g. interworking) then we can load the return address
19734 	 directly into the PC.  Otherwise we must load it into LR.  */
19735       if (really_return
19736 	  && !IS_CMSE_ENTRY (func_type)
19737 	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19738 	return_reg = reg_names[PC_REGNUM];
19739       else
19740 	return_reg = reg_names[LR_REGNUM];
19741 
19742       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19743 	{
19744 	  /* There are three possible reasons for the IP register
19745 	     being saved.  1) a stack frame was created, in which case
19746 	     IP contains the old stack pointer, or 2) an ISR routine
19747 	     corrupted it, or 3) it was saved to align the stack on
19748 	     iWMMXt.  In case 1, restore IP into SP, otherwise just
19749 	     restore IP.  */
19750 	  if (frame_pointer_needed)
19751 	    {
19752 	      live_regs_mask &= ~ (1 << IP_REGNUM);
19753 	      live_regs_mask |=   (1 << SP_REGNUM);
19754 	    }
19755 	  else
19756 	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19757 	}
19758 
19759       /* On some ARM architectures it is faster to use LDR rather than
19760 	 LDM to load a single register.  On other architectures, the
19761 	 cost is the same.  In 26 bit mode, or for exception handlers,
19762 	 we have to use LDM to load the PC so that the CPSR is also
19763 	 restored.  */
19764       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19765 	if (live_regs_mask == (1U << reg))
19766 	  break;
19767 
19768       if (reg <= LAST_ARM_REGNUM
19769 	  && (reg != LR_REGNUM
19770 	      || ! really_return
19771 	      || ! IS_INTERRUPT (func_type)))
19772 	{
19773 	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19774 		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19775 	}
19776       else
19777 	{
19778 	  char *p;
19779 	  int first = 1;
19780 
19781 	  /* Generate the load multiple instruction to restore the
19782 	     registers.  Note we can get here, even if
19783 	     frame_pointer_needed is true, but only if sp already
19784 	     points to the base of the saved core registers.  */
19785 	  if (live_regs_mask & (1 << SP_REGNUM))
19786 	    {
19787 	      unsigned HOST_WIDE_INT stack_adjust;
19788 
19789 	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19790 	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19791 
19792 	      if (stack_adjust && arm_arch5 && TARGET_ARM)
19793 		  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19794 	      else
19795 		{
19796 		  /* If we can't use ldmib (SA110 bug),
19797 		     then try to pop r3 instead.  */
19798 		  if (stack_adjust)
19799 		    live_regs_mask |= 1 << 3;
19800 
19801 		  sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19802 		}
19803 	    }
19804 	  /* For interrupt returns we have to use an LDM rather than
19805 	     a POP so that we can use the exception return variant.  */
19806 	  else if (IS_INTERRUPT (func_type))
19807 	    sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19808 	  else
19809 	    sprintf (instr, "pop%s\t{", conditional);
19810 
19811 	  p = instr + strlen (instr);
19812 
19813 	  for (reg = 0; reg <= SP_REGNUM; reg++)
19814 	    if (live_regs_mask & (1 << reg))
19815 	      {
19816 		int l = strlen (reg_names[reg]);
19817 
19818 		if (first)
19819 		  first = 0;
19820 		else
19821 		  {
19822 		    memcpy (p, ", ", 2);
19823 		    p += 2;
19824 		  }
19825 
19826 		memcpy (p, "%|", 2);
19827 		memcpy (p + 2, reg_names[reg], l);
19828 		p += l + 2;
19829 	      }
19830 
19831 	  if (live_regs_mask & (1 << LR_REGNUM))
19832 	    {
19833 	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19834 	      /* If returning from an interrupt, restore the CPSR.  */
19835 	      if (IS_INTERRUPT (func_type))
19836 		strcat (p, "^");
19837 	    }
19838 	  else
19839 	    strcpy (p, "}");
19840 	}
19841 
19842       output_asm_insn (instr, & operand);
19843 
19844       /* See if we need to generate an extra instruction to
19845 	 perform the actual function return.  */
19846       if (really_return
19847 	  && func_type != ARM_FT_INTERWORKED
19848 	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19849 	{
19850 	  /* The return has already been handled
19851 	     by loading the LR into the PC.  */
19852           return "";
19853 	}
19854     }
19855 
19856   if (really_return)
19857     {
19858       switch ((int) ARM_FUNC_TYPE (func_type))
19859 	{
19860 	case ARM_FT_ISR:
19861 	case ARM_FT_FIQ:
19862 	  /* ??? This is wrong for unified assembly syntax.  */
19863 	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19864 	  break;
19865 
19866 	case ARM_FT_INTERWORKED:
19867 	  gcc_assert (arm_arch5 || arm_arch4t);
19868 	  sprintf (instr, "bx%s\t%%|lr", conditional);
19869 	  break;
19870 
19871 	case ARM_FT_EXCEPTION:
19872 	  /* ??? This is wrong for unified assembly syntax.  */
19873 	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19874 	  break;
19875 
19876 	default:
19877 	  if (IS_CMSE_ENTRY (func_type))
19878 	    {
19879 	      /* Check if we have to clear the 'GE bits' which is only used if
19880 		 parallel add and subtraction instructions are available.  */
19881 	      if (TARGET_INT_SIMD)
19882 		snprintf (instr, sizeof (instr),
19883 			  "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19884 	      else
19885 		snprintf (instr, sizeof (instr),
19886 			  "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19887 
19888 	      output_asm_insn (instr, & operand);
19889 	      if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19890 		{
19891 		  /* Clear the cumulative exception-status bits (0-4,7) and the
19892 		     condition code bits (28-31) of the FPSCR.  We need to
19893 		     remember to clear the first scratch register used (IP) and
19894 		     save and restore the second (r4).  */
19895 		  snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19896 		  output_asm_insn (instr, & operand);
19897 		  snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19898 		  output_asm_insn (instr, & operand);
19899 		  snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19900 		  output_asm_insn (instr, & operand);
19901 		  snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19902 		  output_asm_insn (instr, & operand);
19903 		  snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19904 		  output_asm_insn (instr, & operand);
19905 		  snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19906 		  output_asm_insn (instr, & operand);
19907 		  snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19908 		  output_asm_insn (instr, & operand);
19909 		  snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19910 		  output_asm_insn (instr, & operand);
19911 		}
19912 	      snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19913 	    }
19914 	  /* Use bx if it's available.  */
19915 	  else if (arm_arch5 || arm_arch4t)
19916 	    sprintf (instr, "bx%s\t%%|lr", conditional);
19917 	  else
19918 	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19919 	  break;
19920 	}
19921 
19922       output_asm_insn (instr, & operand);
19923     }
19924 
19925   return "";
19926 }
19927 
19928 /* Output in FILE asm statements needed to declare the NAME of the function
19929    defined by its DECL node.  */
19930 
19931 void
19932 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19933 {
19934   size_t cmse_name_len;
19935   char *cmse_name = 0;
19936   char cmse_prefix[] = "__acle_se_";
19937 
19938   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19939      extra function label for each function with the 'cmse_nonsecure_entry'
19940      attribute.  This extra function label should be prepended with
19941      '__acle_se_', telling the linker that it needs to create secure gateway
19942      veneers for this function.  */
19943   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19944 				    DECL_ATTRIBUTES (decl)))
19945     {
19946       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19947       cmse_name = XALLOCAVEC (char, cmse_name_len);
19948       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19949       targetm.asm_out.globalize_label (file, cmse_name);
19950 
19951       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19952       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19953     }
19954 
19955   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19956   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19957   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19958   ASM_OUTPUT_LABEL (file, name);
19959 
19960   if (cmse_name)
19961     ASM_OUTPUT_LABEL (file, cmse_name);
19962 
19963   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19964 }
19965 
19966 /* Write the function name into the code section, directly preceding
19967    the function prologue.
19968 
19969    Code will be output similar to this:
19970      t0
19971 	 .ascii "arm_poke_function_name", 0
19972 	 .align
19973      t1
19974 	 .word 0xff000000 + (t1 - t0)
19975      arm_poke_function_name
19976 	 mov     ip, sp
19977 	 stmfd   sp!, {fp, ip, lr, pc}
19978 	 sub     fp, ip, #4
19979 
19980    When performing a stack backtrace, code can inspect the value
19981    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19982    at location pc - 12 and the top 8 bits are set, then we know
19983    that there is a function name embedded immediately preceding this
19984    location and has length ((pc[-3]) & 0xff000000).
19985 
19986    We assume that pc is declared as a pointer to an unsigned long.
19987 
19988    It is of no benefit to output the function name if we are assembling
19989    a leaf function.  These function types will not contain a stack
19990    backtrace structure, therefore it is not possible to determine the
19991    function name.  */
19992 void
19993 arm_poke_function_name (FILE *stream, const char *name)
19994 {
19995   unsigned long alignlength;
19996   unsigned long length;
19997   rtx           x;
19998 
19999   length      = strlen (name) + 1;
20000   alignlength = ROUND_UP_WORD (length);
20001 
20002   ASM_OUTPUT_ASCII (stream, name, length);
20003   ASM_OUTPUT_ALIGN (stream, 2);
20004   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
20005   assemble_aligned_integer (UNITS_PER_WORD, x);
20006 }
20007 
20008 /* Place some comments into the assembler stream
20009    describing the current function.  */
20010 static void
20011 arm_output_function_prologue (FILE *f)
20012 {
20013   unsigned long func_type;
20014 
20015   /* Sanity check.  */
20016   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
20017 
20018   func_type = arm_current_func_type ();
20019 
20020   switch ((int) ARM_FUNC_TYPE (func_type))
20021     {
20022     default:
20023     case ARM_FT_NORMAL:
20024       break;
20025     case ARM_FT_INTERWORKED:
20026       asm_fprintf (f, "\t%@ Function supports interworking.\n");
20027       break;
20028     case ARM_FT_ISR:
20029       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
20030       break;
20031     case ARM_FT_FIQ:
20032       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
20033       break;
20034     case ARM_FT_EXCEPTION:
20035       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
20036       break;
20037     }
20038 
20039   if (IS_NAKED (func_type))
20040     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20041 
20042   if (IS_VOLATILE (func_type))
20043     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
20044 
20045   if (IS_NESTED (func_type))
20046     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20047   if (IS_STACKALIGN (func_type))
20048     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20049   if (IS_CMSE_ENTRY (func_type))
20050     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20051 
20052   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20053 	       (HOST_WIDE_INT) crtl->args.size,
20054 	       crtl->args.pretend_args_size,
20055 	       (HOST_WIDE_INT) get_frame_size ());
20056 
20057   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20058 	       frame_pointer_needed,
20059 	       cfun->machine->uses_anonymous_args);
20060 
20061   if (cfun->machine->lr_save_eliminated)
20062     asm_fprintf (f, "\t%@ link register save eliminated.\n");
20063 
20064   if (crtl->calls_eh_return)
20065     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20066 
20067 }
20068 
20069 static void
20070 arm_output_function_epilogue (FILE *)
20071 {
20072   arm_stack_offsets *offsets;
20073 
20074   if (TARGET_THUMB1)
20075     {
20076       int regno;
20077 
20078       /* Emit any call-via-reg trampolines that are needed for v4t support
20079 	 of call_reg and call_value_reg type insns.  */
20080       for (regno = 0; regno < LR_REGNUM; regno++)
20081 	{
20082 	  rtx label = cfun->machine->call_via[regno];
20083 
20084 	  if (label != NULL)
20085 	    {
20086 	      switch_to_section (function_section (current_function_decl));
20087 	      targetm.asm_out.internal_label (asm_out_file, "L",
20088 					      CODE_LABEL_NUMBER (label));
20089 	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20090 	    }
20091 	}
20092 
20093       /* ??? Probably not safe to set this here, since it assumes that a
20094 	 function will be emitted as assembly immediately after we generate
20095 	 RTL for it.  This does not happen for inline functions.  */
20096       cfun->machine->return_used_this_function = 0;
20097     }
20098   else /* TARGET_32BIT */
20099     {
20100       /* We need to take into account any stack-frame rounding.  */
20101       offsets = arm_get_frame_offsets ();
20102 
20103       gcc_assert (!use_return_insn (FALSE, NULL)
20104 		  || (cfun->machine->return_used_this_function != 0)
20105 		  || offsets->saved_regs == offsets->outgoing_args
20106 		  || frame_pointer_needed);
20107     }
20108 }
20109 
20110 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20111    STR and STRD.  If an even number of registers are being pushed, one
20112    or more STRD patterns are created for each register pair.  If an
20113    odd number of registers are pushed, emit an initial STR followed by
20114    as many STRD instructions as are needed.  This works best when the
20115    stack is initially 64-bit aligned (the normal case), since it
20116    ensures that each STRD is also 64-bit aligned.  */
20117 static void
20118 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20119 {
20120   int num_regs = 0;
20121   int i;
20122   int regno;
20123   rtx par = NULL_RTX;
20124   rtx dwarf = NULL_RTX;
20125   rtx tmp;
20126   bool first = true;
20127 
20128   num_regs = bit_count (saved_regs_mask);
20129 
20130   /* Must be at least one register to save, and can't save SP or PC.  */
20131   gcc_assert (num_regs > 0 && num_regs <= 14);
20132   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20133   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20134 
20135   /* Create sequence for DWARF info.  All the frame-related data for
20136      debugging is held in this wrapper.  */
20137   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20138 
20139   /* Describe the stack adjustment.  */
20140   tmp = gen_rtx_SET (stack_pointer_rtx,
20141 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20142   RTX_FRAME_RELATED_P (tmp) = 1;
20143   XVECEXP (dwarf, 0, 0) = tmp;
20144 
20145   /* Find the first register.  */
20146   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20147     ;
20148 
20149   i = 0;
20150 
20151   /* If there's an odd number of registers to push.  Start off by
20152      pushing a single register.  This ensures that subsequent strd
20153      operations are dword aligned (assuming that SP was originally
20154      64-bit aligned).  */
20155   if ((num_regs & 1) != 0)
20156     {
20157       rtx reg, mem, insn;
20158 
20159       reg = gen_rtx_REG (SImode, regno);
20160       if (num_regs == 1)
20161 	mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20162 						     stack_pointer_rtx));
20163       else
20164 	mem = gen_frame_mem (Pmode,
20165 			     gen_rtx_PRE_MODIFY
20166 			     (Pmode, stack_pointer_rtx,
20167 			      plus_constant (Pmode, stack_pointer_rtx,
20168 					     -4 * num_regs)));
20169 
20170       tmp = gen_rtx_SET (mem, reg);
20171       RTX_FRAME_RELATED_P (tmp) = 1;
20172       insn = emit_insn (tmp);
20173       RTX_FRAME_RELATED_P (insn) = 1;
20174       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20175       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20176       RTX_FRAME_RELATED_P (tmp) = 1;
20177       i++;
20178       regno++;
20179       XVECEXP (dwarf, 0, i) = tmp;
20180       first = false;
20181     }
20182 
20183   while (i < num_regs)
20184     if (saved_regs_mask & (1 << regno))
20185       {
20186 	rtx reg1, reg2, mem1, mem2;
20187 	rtx tmp0, tmp1, tmp2;
20188 	int regno2;
20189 
20190 	/* Find the register to pair with this one.  */
20191 	for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20192 	     regno2++)
20193 	  ;
20194 
20195 	reg1 = gen_rtx_REG (SImode, regno);
20196 	reg2 = gen_rtx_REG (SImode, regno2);
20197 
20198 	if (first)
20199 	  {
20200 	    rtx insn;
20201 
20202 	    first = false;
20203 	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20204 							stack_pointer_rtx,
20205 							-4 * num_regs));
20206 	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20207 							stack_pointer_rtx,
20208 							-4 * (num_regs - 1)));
20209 	    tmp0 = gen_rtx_SET (stack_pointer_rtx,
20210 				plus_constant (Pmode, stack_pointer_rtx,
20211 					       -4 * (num_regs)));
20212 	    tmp1 = gen_rtx_SET (mem1, reg1);
20213 	    tmp2 = gen_rtx_SET (mem2, reg2);
20214 	    RTX_FRAME_RELATED_P (tmp0) = 1;
20215 	    RTX_FRAME_RELATED_P (tmp1) = 1;
20216 	    RTX_FRAME_RELATED_P (tmp2) = 1;
20217 	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20218 	    XVECEXP (par, 0, 0) = tmp0;
20219 	    XVECEXP (par, 0, 1) = tmp1;
20220 	    XVECEXP (par, 0, 2) = tmp2;
20221 	    insn = emit_insn (par);
20222 	    RTX_FRAME_RELATED_P (insn) = 1;
20223 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20224 	  }
20225 	else
20226 	  {
20227 	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20228 							stack_pointer_rtx,
20229 							4 * i));
20230 	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20231 							stack_pointer_rtx,
20232 							4 * (i + 1)));
20233 	    tmp1 = gen_rtx_SET (mem1, reg1);
20234 	    tmp2 = gen_rtx_SET (mem2, reg2);
20235 	    RTX_FRAME_RELATED_P (tmp1) = 1;
20236 	    RTX_FRAME_RELATED_P (tmp2) = 1;
20237 	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20238 	    XVECEXP (par, 0, 0) = tmp1;
20239 	    XVECEXP (par, 0, 1) = tmp2;
20240 	    emit_insn (par);
20241 	  }
20242 
20243 	/* Create unwind information.  This is an approximation.  */
20244 	tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20245 					   plus_constant (Pmode,
20246 							  stack_pointer_rtx,
20247 							  4 * i)),
20248 			    reg1);
20249 	tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20250 					   plus_constant (Pmode,
20251 							  stack_pointer_rtx,
20252 							  4 * (i + 1))),
20253 			    reg2);
20254 
20255 	RTX_FRAME_RELATED_P (tmp1) = 1;
20256 	RTX_FRAME_RELATED_P (tmp2) = 1;
20257 	XVECEXP (dwarf, 0, i + 1) = tmp1;
20258 	XVECEXP (dwarf, 0, i + 2) = tmp2;
20259 	i += 2;
20260 	regno = regno2 + 1;
20261       }
20262     else
20263       regno++;
20264 
20265   return;
20266 }
20267 
20268 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20269    whenever possible, otherwise it emits single-word stores.  The first store
20270    also allocates stack space for all saved registers, using writeback with
20271    post-addressing mode.  All other stores use offset addressing.  If no STRD
20272    can be emitted, this function emits a sequence of single-word stores,
20273    and not an STM as before, because single-word stores provide more freedom
20274    scheduling and can be turned into an STM by peephole optimizations.  */
20275 static void
20276 arm_emit_strd_push (unsigned long saved_regs_mask)
20277 {
20278   int num_regs = 0;
20279   int i, j, dwarf_index  = 0;
20280   int offset = 0;
20281   rtx dwarf = NULL_RTX;
20282   rtx insn = NULL_RTX;
20283   rtx tmp, mem;
20284 
20285   /* TODO: A more efficient code can be emitted by changing the
20286      layout, e.g., first push all pairs that can use STRD to keep the
20287      stack aligned, and then push all other registers.  */
20288   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20289     if (saved_regs_mask & (1 << i))
20290       num_regs++;
20291 
20292   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20293   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20294   gcc_assert (num_regs > 0);
20295 
20296   /* Create sequence for DWARF info.  */
20297   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20298 
20299   /* For dwarf info, we generate explicit stack update.  */
20300   tmp = gen_rtx_SET (stack_pointer_rtx,
20301                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20302   RTX_FRAME_RELATED_P (tmp) = 1;
20303   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20304 
20305   /* Save registers.  */
20306   offset = - 4 * num_regs;
20307   j = 0;
20308   while (j <= LAST_ARM_REGNUM)
20309     if (saved_regs_mask & (1 << j))
20310       {
20311         if ((j % 2 == 0)
20312             && (saved_regs_mask & (1 << (j + 1))))
20313           {
20314             /* Current register and previous register form register pair for
20315                which STRD can be generated.  */
20316             if (offset < 0)
20317               {
20318                 /* Allocate stack space for all saved registers.  */
20319                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20320                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20321                 mem = gen_frame_mem (DImode, tmp);
20322                 offset = 0;
20323               }
20324             else if (offset > 0)
20325               mem = gen_frame_mem (DImode,
20326                                    plus_constant (Pmode,
20327                                                   stack_pointer_rtx,
20328                                                   offset));
20329             else
20330               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20331 
20332             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20333             RTX_FRAME_RELATED_P (tmp) = 1;
20334             tmp = emit_insn (tmp);
20335 
20336             /* Record the first store insn.  */
20337             if (dwarf_index == 1)
20338               insn = tmp;
20339 
20340             /* Generate dwarf info.  */
20341             mem = gen_frame_mem (SImode,
20342                                  plus_constant (Pmode,
20343                                                 stack_pointer_rtx,
20344                                                 offset));
20345             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20346             RTX_FRAME_RELATED_P (tmp) = 1;
20347             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20348 
20349             mem = gen_frame_mem (SImode,
20350                                  plus_constant (Pmode,
20351                                                 stack_pointer_rtx,
20352                                                 offset + 4));
20353             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20354             RTX_FRAME_RELATED_P (tmp) = 1;
20355             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20356 
20357             offset += 8;
20358             j += 2;
20359           }
20360         else
20361           {
20362             /* Emit a single word store.  */
20363             if (offset < 0)
20364               {
20365                 /* Allocate stack space for all saved registers.  */
20366                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20367                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20368                 mem = gen_frame_mem (SImode, tmp);
20369                 offset = 0;
20370               }
20371             else if (offset > 0)
20372               mem = gen_frame_mem (SImode,
20373                                    plus_constant (Pmode,
20374                                                   stack_pointer_rtx,
20375                                                   offset));
20376             else
20377               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20378 
20379             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20380             RTX_FRAME_RELATED_P (tmp) = 1;
20381             tmp = emit_insn (tmp);
20382 
20383             /* Record the first store insn.  */
20384             if (dwarf_index == 1)
20385               insn = tmp;
20386 
20387             /* Generate dwarf info.  */
20388             mem = gen_frame_mem (SImode,
20389                                  plus_constant(Pmode,
20390                                                stack_pointer_rtx,
20391                                                offset));
20392             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20393             RTX_FRAME_RELATED_P (tmp) = 1;
20394             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20395 
20396             offset += 4;
20397             j += 1;
20398           }
20399       }
20400     else
20401       j++;
20402 
20403   /* Attach dwarf info to the first insn we generate.  */
20404   gcc_assert (insn != NULL_RTX);
20405   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20406   RTX_FRAME_RELATED_P (insn) = 1;
20407 }
20408 
20409 /* Generate and emit an insn that we will recognize as a push_multi.
20410    Unfortunately, since this insn does not reflect very well the actual
20411    semantics of the operation, we need to annotate the insn for the benefit
20412    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20413    MASK for registers that should be annotated for DWARF2 frame unwind
20414    information.  */
20415 static rtx
20416 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20417 {
20418   int num_regs = 0;
20419   int num_dwarf_regs = 0;
20420   int i, j;
20421   rtx par;
20422   rtx dwarf;
20423   int dwarf_par_index;
20424   rtx tmp, reg;
20425 
20426   /* We don't record the PC in the dwarf frame information.  */
20427   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20428 
20429   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20430     {
20431       if (mask & (1 << i))
20432 	num_regs++;
20433       if (dwarf_regs_mask & (1 << i))
20434 	num_dwarf_regs++;
20435     }
20436 
20437   gcc_assert (num_regs && num_regs <= 16);
20438   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20439 
20440   /* For the body of the insn we are going to generate an UNSPEC in
20441      parallel with several USEs.  This allows the insn to be recognized
20442      by the push_multi pattern in the arm.md file.
20443 
20444      The body of the insn looks something like this:
20445 
20446        (parallel [
20447            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20448 	                                (const_int:SI <num>)))
20449 	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20450            (use (reg:SI XX))
20451            (use (reg:SI YY))
20452 	   ...
20453         ])
20454 
20455      For the frame note however, we try to be more explicit and actually
20456      show each register being stored into the stack frame, plus a (single)
20457      decrement of the stack pointer.  We do it this way in order to be
20458      friendly to the stack unwinding code, which only wants to see a single
20459      stack decrement per instruction.  The RTL we generate for the note looks
20460      something like this:
20461 
20462       (sequence [
20463            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20464            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20465            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20466            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20467 	   ...
20468         ])
20469 
20470      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20471      instead we'd have a parallel expression detailing all
20472      the stores to the various memory addresses so that debug
20473      information is more up-to-date. Remember however while writing
20474      this to take care of the constraints with the push instruction.
20475 
20476      Note also that this has to be taken care of for the VFP registers.
20477 
20478      For more see PR43399.  */
20479 
20480   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20481   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20482   dwarf_par_index = 1;
20483 
20484   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20485     {
20486       if (mask & (1 << i))
20487 	{
20488 	  reg = gen_rtx_REG (SImode, i);
20489 
20490 	  XVECEXP (par, 0, 0)
20491 	    = gen_rtx_SET (gen_frame_mem
20492 			   (BLKmode,
20493 			    gen_rtx_PRE_MODIFY (Pmode,
20494 						stack_pointer_rtx,
20495 						plus_constant
20496 						(Pmode, stack_pointer_rtx,
20497 						 -4 * num_regs))
20498 			    ),
20499 			   gen_rtx_UNSPEC (BLKmode,
20500 					   gen_rtvec (1, reg),
20501 					   UNSPEC_PUSH_MULT));
20502 
20503 	  if (dwarf_regs_mask & (1 << i))
20504 	    {
20505 	      tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20506 				 reg);
20507 	      RTX_FRAME_RELATED_P (tmp) = 1;
20508 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20509 	    }
20510 
20511 	  break;
20512 	}
20513     }
20514 
20515   for (j = 1, i++; j < num_regs; i++)
20516     {
20517       if (mask & (1 << i))
20518 	{
20519 	  reg = gen_rtx_REG (SImode, i);
20520 
20521 	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20522 
20523 	  if (dwarf_regs_mask & (1 << i))
20524 	    {
20525 	      tmp
20526 		= gen_rtx_SET (gen_frame_mem
20527 			       (SImode,
20528 				plus_constant (Pmode, stack_pointer_rtx,
20529 					       4 * j)),
20530 			       reg);
20531 	      RTX_FRAME_RELATED_P (tmp) = 1;
20532 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20533 	    }
20534 
20535 	  j++;
20536 	}
20537     }
20538 
20539   par = emit_insn (par);
20540 
20541   tmp = gen_rtx_SET (stack_pointer_rtx,
20542 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20543   RTX_FRAME_RELATED_P (tmp) = 1;
20544   XVECEXP (dwarf, 0, 0) = tmp;
20545 
20546   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20547 
20548   return par;
20549 }
20550 
20551 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20552    SIZE is the offset to be adjusted.
20553    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20554 static void
20555 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20556 {
20557   rtx dwarf;
20558 
20559   RTX_FRAME_RELATED_P (insn) = 1;
20560   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20561   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20562 }
20563 
20564 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20565    SAVED_REGS_MASK shows which registers need to be restored.
20566 
20567    Unfortunately, since this insn does not reflect very well the actual
20568    semantics of the operation, we need to annotate the insn for the benefit
20569    of DWARF2 frame unwind information.  */
20570 static void
20571 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20572 {
20573   int num_regs = 0;
20574   int i, j;
20575   rtx par;
20576   rtx dwarf = NULL_RTX;
20577   rtx tmp, reg;
20578   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20579   int offset_adj;
20580   int emit_update;
20581 
20582   offset_adj = return_in_pc ? 1 : 0;
20583   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20584     if (saved_regs_mask & (1 << i))
20585       num_regs++;
20586 
20587   gcc_assert (num_regs && num_regs <= 16);
20588 
20589   /* If SP is in reglist, then we don't emit SP update insn.  */
20590   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20591 
20592   /* The parallel needs to hold num_regs SETs
20593      and one SET for the stack update.  */
20594   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20595 
20596   if (return_in_pc)
20597     XVECEXP (par, 0, 0) = ret_rtx;
20598 
20599   if (emit_update)
20600     {
20601       /* Increment the stack pointer, based on there being
20602          num_regs 4-byte registers to restore.  */
20603       tmp = gen_rtx_SET (stack_pointer_rtx,
20604                          plus_constant (Pmode,
20605                                         stack_pointer_rtx,
20606                                         4 * num_regs));
20607       RTX_FRAME_RELATED_P (tmp) = 1;
20608       XVECEXP (par, 0, offset_adj) = tmp;
20609     }
20610 
20611   /* Now restore every reg, which may include PC.  */
20612   for (j = 0, i = 0; j < num_regs; i++)
20613     if (saved_regs_mask & (1 << i))
20614       {
20615         reg = gen_rtx_REG (SImode, i);
20616         if ((num_regs == 1) && emit_update && !return_in_pc)
20617           {
20618             /* Emit single load with writeback.  */
20619             tmp = gen_frame_mem (SImode,
20620                                  gen_rtx_POST_INC (Pmode,
20621                                                    stack_pointer_rtx));
20622             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20623             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20624             return;
20625           }
20626 
20627         tmp = gen_rtx_SET (reg,
20628                            gen_frame_mem
20629                            (SImode,
20630                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20631         RTX_FRAME_RELATED_P (tmp) = 1;
20632         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20633 
20634         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20635            should not have PC, skip PC.  */
20636         if (i != PC_REGNUM)
20637           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20638 
20639         j++;
20640       }
20641 
20642   if (return_in_pc)
20643     par = emit_jump_insn (par);
20644   else
20645     par = emit_insn (par);
20646 
20647   REG_NOTES (par) = dwarf;
20648   if (!return_in_pc)
20649     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20650 				 stack_pointer_rtx, stack_pointer_rtx);
20651 }
20652 
20653 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20654    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20655 
20656    Unfortunately, since this insn does not reflect very well the actual
20657    semantics of the operation, we need to annotate the insn for the benefit
20658    of DWARF2 frame unwind information.  */
20659 static void
20660 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20661 {
20662   int i, j;
20663   rtx par;
20664   rtx dwarf = NULL_RTX;
20665   rtx tmp, reg;
20666 
20667   gcc_assert (num_regs && num_regs <= 32);
20668 
20669     /* Workaround ARM10 VFPr1 bug.  */
20670   if (num_regs == 2 && !arm_arch6)
20671     {
20672       if (first_reg == 15)
20673         first_reg--;
20674 
20675       num_regs++;
20676     }
20677 
20678   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20679      there could be up to 32 D-registers to restore.
20680      If there are more than 16 D-registers, make two recursive calls,
20681      each of which emits one pop_multi instruction.  */
20682   if (num_regs > 16)
20683     {
20684       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20685       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20686       return;
20687     }
20688 
20689   /* The parallel needs to hold num_regs SETs
20690      and one SET for the stack update.  */
20691   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20692 
20693   /* Increment the stack pointer, based on there being
20694      num_regs 8-byte registers to restore.  */
20695   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20696   RTX_FRAME_RELATED_P (tmp) = 1;
20697   XVECEXP (par, 0, 0) = tmp;
20698 
20699   /* Now show every reg that will be restored, using a SET for each.  */
20700   for (j = 0, i=first_reg; j < num_regs; i += 2)
20701     {
20702       reg = gen_rtx_REG (DFmode, i);
20703 
20704       tmp = gen_rtx_SET (reg,
20705                          gen_frame_mem
20706                          (DFmode,
20707                           plus_constant (Pmode, base_reg, 8 * j)));
20708       RTX_FRAME_RELATED_P (tmp) = 1;
20709       XVECEXP (par, 0, j + 1) = tmp;
20710 
20711       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20712 
20713       j++;
20714     }
20715 
20716   par = emit_insn (par);
20717   REG_NOTES (par) = dwarf;
20718 
20719   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20720   if (REGNO (base_reg) == IP_REGNUM)
20721     {
20722       RTX_FRAME_RELATED_P (par) = 1;
20723       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20724     }
20725   else
20726     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20727 				 base_reg, base_reg);
20728 }
20729 
20730 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20731    number of registers are being popped, multiple LDRD patterns are created for
20732    all register pairs.  If odd number of registers are popped, last register is
20733    loaded by using LDR pattern.  */
20734 static void
20735 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20736 {
20737   int num_regs = 0;
20738   int i, j;
20739   rtx par = NULL_RTX;
20740   rtx dwarf = NULL_RTX;
20741   rtx tmp, reg, tmp1;
20742   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20743 
20744   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20745     if (saved_regs_mask & (1 << i))
20746       num_regs++;
20747 
20748   gcc_assert (num_regs && num_regs <= 16);
20749 
20750   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20751      to be popped.  So, if num_regs is even, now it will become odd,
20752      and we can generate pop with PC.  If num_regs is odd, it will be
20753      even now, and ldr with return can be generated for PC.  */
20754   if (return_in_pc)
20755     num_regs--;
20756 
20757   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20758 
20759   /* Var j iterates over all the registers to gather all the registers in
20760      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20761      A PARALLEL RTX of register-pair is created here, so that pattern for
20762      LDRD can be matched.  As PC is always last register to be popped, and
20763      we have already decremented num_regs if PC, we don't have to worry
20764      about PC in this loop.  */
20765   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20766     if (saved_regs_mask & (1 << j))
20767       {
20768         /* Create RTX for memory load.  */
20769         reg = gen_rtx_REG (SImode, j);
20770         tmp = gen_rtx_SET (reg,
20771                            gen_frame_mem (SImode,
20772                                plus_constant (Pmode,
20773                                               stack_pointer_rtx, 4 * i)));
20774         RTX_FRAME_RELATED_P (tmp) = 1;
20775 
20776         if (i % 2 == 0)
20777           {
20778             /* When saved-register index (i) is even, the RTX to be emitted is
20779                yet to be created.  Hence create it first.  The LDRD pattern we
20780                are generating is :
20781                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20782                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20783                where target registers need not be consecutive.  */
20784             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20785             dwarf = NULL_RTX;
20786           }
20787 
20788         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20789            added as 0th element and if i is odd, reg_i is added as 1st element
20790            of LDRD pattern shown above.  */
20791         XVECEXP (par, 0, (i % 2)) = tmp;
20792         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20793 
20794         if ((i % 2) == 1)
20795           {
20796             /* When saved-register index (i) is odd, RTXs for both the registers
20797                to be loaded are generated in above given LDRD pattern, and the
20798                pattern can be emitted now.  */
20799             par = emit_insn (par);
20800             REG_NOTES (par) = dwarf;
20801 	    RTX_FRAME_RELATED_P (par) = 1;
20802           }
20803 
20804         i++;
20805       }
20806 
20807   /* If the number of registers pushed is odd AND return_in_pc is false OR
20808      number of registers are even AND return_in_pc is true, last register is
20809      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20810      then LDR with post increment.  */
20811 
20812   /* Increment the stack pointer, based on there being
20813      num_regs 4-byte registers to restore.  */
20814   tmp = gen_rtx_SET (stack_pointer_rtx,
20815                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20816   RTX_FRAME_RELATED_P (tmp) = 1;
20817   tmp = emit_insn (tmp);
20818   if (!return_in_pc)
20819     {
20820       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20821 				   stack_pointer_rtx, stack_pointer_rtx);
20822     }
20823 
20824   dwarf = NULL_RTX;
20825 
20826   if (((num_regs % 2) == 1 && !return_in_pc)
20827       || ((num_regs % 2) == 0 && return_in_pc))
20828     {
20829       /* Scan for the single register to be popped.  Skip until the saved
20830          register is found.  */
20831       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20832 
20833       /* Gen LDR with post increment here.  */
20834       tmp1 = gen_rtx_MEM (SImode,
20835                           gen_rtx_POST_INC (SImode,
20836                                             stack_pointer_rtx));
20837       set_mem_alias_set (tmp1, get_frame_alias_set ());
20838 
20839       reg = gen_rtx_REG (SImode, j);
20840       tmp = gen_rtx_SET (reg, tmp1);
20841       RTX_FRAME_RELATED_P (tmp) = 1;
20842       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20843 
20844       if (return_in_pc)
20845         {
20846           /* If return_in_pc, j must be PC_REGNUM.  */
20847           gcc_assert (j == PC_REGNUM);
20848           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20849           XVECEXP (par, 0, 0) = ret_rtx;
20850           XVECEXP (par, 0, 1) = tmp;
20851           par = emit_jump_insn (par);
20852         }
20853       else
20854         {
20855           par = emit_insn (tmp);
20856 	  REG_NOTES (par) = dwarf;
20857 	  arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20858 				       stack_pointer_rtx, stack_pointer_rtx);
20859         }
20860 
20861     }
20862   else if ((num_regs % 2) == 1 && return_in_pc)
20863     {
20864       /* There are 2 registers to be popped.  So, generate the pattern
20865          pop_multiple_with_stack_update_and_return to pop in PC.  */
20866       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20867     }
20868 
20869   return;
20870 }
20871 
20872 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20873    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20874    offset addressing and then generates one separate stack udpate. This provides
20875    more scheduling freedom, compared to writeback on every load.  However,
20876    if the function returns using load into PC directly
20877    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20878    before the last load.  TODO: Add a peephole optimization to recognize
20879    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20880    peephole optimization to merge the load at stack-offset zero
20881    with the stack update instruction using load with writeback
20882    in post-index addressing mode.  */
20883 static void
20884 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20885 {
20886   int j = 0;
20887   int offset = 0;
20888   rtx par = NULL_RTX;
20889   rtx dwarf = NULL_RTX;
20890   rtx tmp, mem;
20891 
20892   /* Restore saved registers.  */
20893   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20894   j = 0;
20895   while (j <= LAST_ARM_REGNUM)
20896     if (saved_regs_mask & (1 << j))
20897       {
20898         if ((j % 2) == 0
20899             && (saved_regs_mask & (1 << (j + 1)))
20900             && (j + 1) != PC_REGNUM)
20901           {
20902             /* Current register and next register form register pair for which
20903                LDRD can be generated. PC is always the last register popped, and
20904                we handle it separately.  */
20905             if (offset > 0)
20906               mem = gen_frame_mem (DImode,
20907                                    plus_constant (Pmode,
20908                                                   stack_pointer_rtx,
20909                                                   offset));
20910             else
20911               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20912 
20913             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20914             tmp = emit_insn (tmp);
20915 	    RTX_FRAME_RELATED_P (tmp) = 1;
20916 
20917             /* Generate dwarf info.  */
20918 
20919             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20920                                     gen_rtx_REG (SImode, j),
20921                                     NULL_RTX);
20922             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20923                                     gen_rtx_REG (SImode, j + 1),
20924                                     dwarf);
20925 
20926             REG_NOTES (tmp) = dwarf;
20927 
20928             offset += 8;
20929             j += 2;
20930           }
20931         else if (j != PC_REGNUM)
20932           {
20933             /* Emit a single word load.  */
20934             if (offset > 0)
20935               mem = gen_frame_mem (SImode,
20936                                    plus_constant (Pmode,
20937                                                   stack_pointer_rtx,
20938                                                   offset));
20939             else
20940               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20941 
20942             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20943             tmp = emit_insn (tmp);
20944 	    RTX_FRAME_RELATED_P (tmp) = 1;
20945 
20946             /* Generate dwarf info.  */
20947             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20948                                               gen_rtx_REG (SImode, j),
20949                                               NULL_RTX);
20950 
20951             offset += 4;
20952             j += 1;
20953           }
20954         else /* j == PC_REGNUM */
20955           j++;
20956       }
20957     else
20958       j++;
20959 
20960   /* Update the stack.  */
20961   if (offset > 0)
20962     {
20963       tmp = gen_rtx_SET (stack_pointer_rtx,
20964                          plus_constant (Pmode,
20965                                         stack_pointer_rtx,
20966                                         offset));
20967       tmp = emit_insn (tmp);
20968       arm_add_cfa_adjust_cfa_note (tmp, offset,
20969 				   stack_pointer_rtx, stack_pointer_rtx);
20970       offset = 0;
20971     }
20972 
20973   if (saved_regs_mask & (1 << PC_REGNUM))
20974     {
20975       /* Only PC is to be popped.  */
20976       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20977       XVECEXP (par, 0, 0) = ret_rtx;
20978       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20979                          gen_frame_mem (SImode,
20980                                         gen_rtx_POST_INC (SImode,
20981                                                           stack_pointer_rtx)));
20982       RTX_FRAME_RELATED_P (tmp) = 1;
20983       XVECEXP (par, 0, 1) = tmp;
20984       par = emit_jump_insn (par);
20985 
20986       /* Generate dwarf info.  */
20987       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20988                               gen_rtx_REG (SImode, PC_REGNUM),
20989                               NULL_RTX);
20990       REG_NOTES (par) = dwarf;
20991       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20992 				   stack_pointer_rtx, stack_pointer_rtx);
20993     }
20994 }
20995 
20996 /* Calculate the size of the return value that is passed in registers.  */
20997 static unsigned
20998 arm_size_return_regs (void)
20999 {
21000   machine_mode mode;
21001 
21002   if (crtl->return_rtx != 0)
21003     mode = GET_MODE (crtl->return_rtx);
21004   else
21005     mode = DECL_MODE (DECL_RESULT (current_function_decl));
21006 
21007   return GET_MODE_SIZE (mode);
21008 }
21009 
21010 /* Return true if the current function needs to save/restore LR.  */
21011 static bool
21012 thumb_force_lr_save (void)
21013 {
21014   return !cfun->machine->lr_save_eliminated
21015 	 && (!crtl->is_leaf
21016 	     || thumb_far_jump_used_p ()
21017 	     || df_regs_ever_live_p (LR_REGNUM));
21018 }
21019 
21020 /* We do not know if r3 will be available because
21021    we do have an indirect tailcall happening in this
21022    particular case.  */
21023 static bool
21024 is_indirect_tailcall_p (rtx call)
21025 {
21026   rtx pat = PATTERN (call);
21027 
21028   /* Indirect tail call.  */
21029   pat = XVECEXP (pat, 0, 0);
21030   if (GET_CODE (pat) == SET)
21031     pat = SET_SRC (pat);
21032 
21033   pat = XEXP (XEXP (pat, 0), 0);
21034   return REG_P (pat);
21035 }
21036 
21037 /* Return true if r3 is used by any of the tail call insns in the
21038    current function.  */
21039 static bool
21040 any_sibcall_could_use_r3 (void)
21041 {
21042   edge_iterator ei;
21043   edge e;
21044 
21045   if (!crtl->tail_call_emit)
21046     return false;
21047   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21048     if (e->flags & EDGE_SIBCALL)
21049       {
21050 	rtx_insn *call = BB_END (e->src);
21051 	if (!CALL_P (call))
21052 	  call = prev_nonnote_nondebug_insn (call);
21053 	gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21054 	if (find_regno_fusage (call, USE, 3)
21055 	    || is_indirect_tailcall_p (call))
21056 	  return true;
21057       }
21058   return false;
21059 }
21060 
21061 
21062 /* Compute the distance from register FROM to register TO.
21063    These can be the arg pointer (26), the soft frame pointer (25),
21064    the stack pointer (13) or the hard frame pointer (11).
21065    In thumb mode r7 is used as the soft frame pointer, if needed.
21066    Typical stack layout looks like this:
21067 
21068        old stack pointer -> |    |
21069                              ----
21070                             |    | \
21071                             |    |   saved arguments for
21072                             |    |   vararg functions
21073 			    |    | /
21074                               --
21075    hard FP & arg pointer -> |    | \
21076                             |    |   stack
21077                             |    |   frame
21078                             |    | /
21079                               --
21080                             |    | \
21081                             |    |   call saved
21082                             |    |   registers
21083       soft frame pointer -> |    | /
21084                               --
21085                             |    | \
21086                             |    |   local
21087                             |    |   variables
21088      locals base pointer -> |    | /
21089                               --
21090                             |    | \
21091                             |    |   outgoing
21092                             |    |   arguments
21093    current stack pointer -> |    | /
21094                               --
21095 
21096   For a given function some or all of these stack components
21097   may not be needed, giving rise to the possibility of
21098   eliminating some of the registers.
21099 
21100   The values returned by this function must reflect the behavior
21101   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21102 
21103   The sign of the number returned reflects the direction of stack
21104   growth, so the values are positive for all eliminations except
21105   from the soft frame pointer to the hard frame pointer.
21106 
21107   SFP may point just inside the local variables block to ensure correct
21108   alignment.  */
21109 
21110 
21111 /* Return cached stack offsets.  */
21112 
21113 static arm_stack_offsets *
21114 arm_get_frame_offsets (void)
21115 {
21116   struct arm_stack_offsets *offsets;
21117 
21118   offsets = &cfun->machine->stack_offsets;
21119 
21120   return offsets;
21121 }
21122 
21123 
21124 /* Calculate stack offsets.  These are used to calculate register elimination
21125    offsets and in prologue/epilogue code.  Also calculates which registers
21126    should be saved.  */
21127 
21128 static void
21129 arm_compute_frame_layout (void)
21130 {
21131   struct arm_stack_offsets *offsets;
21132   unsigned long func_type;
21133   int saved;
21134   int core_saved;
21135   HOST_WIDE_INT frame_size;
21136   int i;
21137 
21138   offsets = &cfun->machine->stack_offsets;
21139 
21140   /* Initially this is the size of the local variables.  It will translated
21141      into an offset once we have determined the size of preceding data.  */
21142   frame_size = ROUND_UP_WORD (get_frame_size ());
21143 
21144   /* Space for variadic functions.  */
21145   offsets->saved_args = crtl->args.pretend_args_size;
21146 
21147   /* In Thumb mode this is incorrect, but never used.  */
21148   offsets->frame
21149     = (offsets->saved_args
21150        + arm_compute_static_chain_stack_bytes ()
21151        + (frame_pointer_needed ? 4 : 0));
21152 
21153   if (TARGET_32BIT)
21154     {
21155       unsigned int regno;
21156 
21157       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21158       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21159       saved = core_saved;
21160 
21161       /* We know that SP will be doubleword aligned on entry, and we must
21162 	 preserve that condition at any subroutine call.  We also require the
21163 	 soft frame pointer to be doubleword aligned.  */
21164 
21165       if (TARGET_REALLY_IWMMXT)
21166 	{
21167 	  /* Check for the call-saved iWMMXt registers.  */
21168 	  for (regno = FIRST_IWMMXT_REGNUM;
21169 	       regno <= LAST_IWMMXT_REGNUM;
21170 	       regno++)
21171 	    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21172 	      saved += 8;
21173 	}
21174 
21175       func_type = arm_current_func_type ();
21176       /* Space for saved VFP registers.  */
21177       if (! IS_VOLATILE (func_type)
21178 	  && TARGET_HARD_FLOAT)
21179 	saved += arm_get_vfp_saved_size ();
21180     }
21181   else /* TARGET_THUMB1 */
21182     {
21183       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21184       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21185       saved = core_saved;
21186       if (TARGET_BACKTRACE)
21187 	saved += 16;
21188     }
21189 
21190   /* Saved registers include the stack frame.  */
21191   offsets->saved_regs
21192     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21193   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21194 
21195   /* A leaf function does not need any stack alignment if it has nothing
21196      on the stack.  */
21197   if (crtl->is_leaf && frame_size == 0
21198       /* However if it calls alloca(), we have a dynamically allocated
21199 	 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
21200       && ! cfun->calls_alloca)
21201     {
21202       offsets->outgoing_args = offsets->soft_frame;
21203       offsets->locals_base = offsets->soft_frame;
21204       return;
21205     }
21206 
21207   /* Ensure SFP has the correct alignment.  */
21208   if (ARM_DOUBLEWORD_ALIGN
21209       && (offsets->soft_frame & 7))
21210     {
21211       offsets->soft_frame += 4;
21212       /* Try to align stack by pushing an extra reg.  Don't bother doing this
21213          when there is a stack frame as the alignment will be rolled into
21214 	 the normal stack adjustment.  */
21215       if (frame_size + crtl->outgoing_args_size == 0)
21216 	{
21217 	  int reg = -1;
21218 
21219 	  /* Register r3 is caller-saved.  Normally it does not need to be
21220 	     saved on entry by the prologue.  However if we choose to save
21221 	     it for padding then we may confuse the compiler into thinking
21222 	     a prologue sequence is required when in fact it is not.  This
21223 	     will occur when shrink-wrapping if r3 is used as a scratch
21224 	     register and there are no other callee-saved writes.
21225 
21226 	     This situation can be avoided when other callee-saved registers
21227 	     are available and r3 is not mandatory if we choose a callee-saved
21228 	     register for padding.  */
21229 	  bool prefer_callee_reg_p = false;
21230 
21231 	  /* If it is safe to use r3, then do so.  This sometimes
21232 	     generates better code on Thumb-2 by avoiding the need to
21233 	     use 32-bit push/pop instructions.  */
21234           if (! any_sibcall_could_use_r3 ()
21235 	      && arm_size_return_regs () <= 12
21236 	      && (offsets->saved_regs_mask & (1 << 3)) == 0
21237 	      && (TARGET_THUMB2
21238 		  || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21239 	    {
21240 	      reg = 3;
21241 	      if (!TARGET_THUMB2)
21242 		prefer_callee_reg_p = true;
21243 	    }
21244 	  if (reg == -1
21245 	      || prefer_callee_reg_p)
21246 	    {
21247 	      for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21248 		{
21249 		  /* Avoid fixed registers; they may be changed at
21250 		     arbitrary times so it's unsafe to restore them
21251 		     during the epilogue.  */
21252 		  if (!fixed_regs[i]
21253 		      && (offsets->saved_regs_mask & (1 << i)) == 0)
21254 		    {
21255 		      reg = i;
21256 		      break;
21257 		    }
21258 		}
21259 	    }
21260 
21261 	  if (reg != -1)
21262 	    {
21263 	      offsets->saved_regs += 4;
21264 	      offsets->saved_regs_mask |= (1 << reg);
21265 	    }
21266 	}
21267     }
21268 
21269   offsets->locals_base = offsets->soft_frame + frame_size;
21270   offsets->outgoing_args = (offsets->locals_base
21271 			    + crtl->outgoing_args_size);
21272 
21273   if (ARM_DOUBLEWORD_ALIGN)
21274     {
21275       /* Ensure SP remains doubleword aligned.  */
21276       if (offsets->outgoing_args & 7)
21277 	offsets->outgoing_args += 4;
21278       gcc_assert (!(offsets->outgoing_args & 7));
21279     }
21280 }
21281 
21282 
21283 /* Calculate the relative offsets for the different stack pointers.  Positive
21284    offsets are in the direction of stack growth.  */
21285 
21286 HOST_WIDE_INT
21287 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21288 {
21289   arm_stack_offsets *offsets;
21290 
21291   offsets = arm_get_frame_offsets ();
21292 
21293   /* OK, now we have enough information to compute the distances.
21294      There must be an entry in these switch tables for each pair
21295      of registers in ELIMINABLE_REGS, even if some of the entries
21296      seem to be redundant or useless.  */
21297   switch (from)
21298     {
21299     case ARG_POINTER_REGNUM:
21300       switch (to)
21301 	{
21302 	case THUMB_HARD_FRAME_POINTER_REGNUM:
21303 	  return 0;
21304 
21305 	case FRAME_POINTER_REGNUM:
21306 	  /* This is the reverse of the soft frame pointer
21307 	     to hard frame pointer elimination below.  */
21308 	  return offsets->soft_frame - offsets->saved_args;
21309 
21310 	case ARM_HARD_FRAME_POINTER_REGNUM:
21311 	  /* This is only non-zero in the case where the static chain register
21312 	     is stored above the frame.  */
21313 	  return offsets->frame - offsets->saved_args - 4;
21314 
21315 	case STACK_POINTER_REGNUM:
21316 	  /* If nothing has been pushed on the stack at all
21317 	     then this will return -4.  This *is* correct!  */
21318 	  return offsets->outgoing_args - (offsets->saved_args + 4);
21319 
21320 	default:
21321 	  gcc_unreachable ();
21322 	}
21323       gcc_unreachable ();
21324 
21325     case FRAME_POINTER_REGNUM:
21326       switch (to)
21327 	{
21328 	case THUMB_HARD_FRAME_POINTER_REGNUM:
21329 	  return 0;
21330 
21331 	case ARM_HARD_FRAME_POINTER_REGNUM:
21332 	  /* The hard frame pointer points to the top entry in the
21333 	     stack frame.  The soft frame pointer to the bottom entry
21334 	     in the stack frame.  If there is no stack frame at all,
21335 	     then they are identical.  */
21336 
21337 	  return offsets->frame - offsets->soft_frame;
21338 
21339 	case STACK_POINTER_REGNUM:
21340 	  return offsets->outgoing_args - offsets->soft_frame;
21341 
21342 	default:
21343 	  gcc_unreachable ();
21344 	}
21345       gcc_unreachable ();
21346 
21347     default:
21348       /* You cannot eliminate from the stack pointer.
21349 	 In theory you could eliminate from the hard frame
21350 	 pointer to the stack pointer, but this will never
21351 	 happen, since if a stack frame is not needed the
21352 	 hard frame pointer will never be used.  */
21353       gcc_unreachable ();
21354     }
21355 }
21356 
21357 /* Given FROM and TO register numbers, say whether this elimination is
21358    allowed.  Frame pointer elimination is automatically handled.
21359 
21360    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21361    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21362    pointer, we must eliminate FRAME_POINTER_REGNUM into
21363    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21364    ARG_POINTER_REGNUM.  */
21365 
21366 bool
21367 arm_can_eliminate (const int from, const int to)
21368 {
21369   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21370           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21371           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21372           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21373            true);
21374 }
21375 
21376 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21377    number of bytes pushed.  */
21378 
21379 static int
21380 arm_save_coproc_regs(void)
21381 {
21382   int saved_size = 0;
21383   unsigned reg;
21384   unsigned start_reg;
21385   rtx insn;
21386 
21387   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21388     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21389       {
21390 	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21391 	insn = gen_rtx_MEM (V2SImode, insn);
21392 	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21393 	RTX_FRAME_RELATED_P (insn) = 1;
21394 	saved_size += 8;
21395       }
21396 
21397   if (TARGET_HARD_FLOAT)
21398     {
21399       start_reg = FIRST_VFP_REGNUM;
21400 
21401       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21402 	{
21403 	  if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21404 	      && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21405 	    {
21406 	      if (start_reg != reg)
21407 		saved_size += vfp_emit_fstmd (start_reg,
21408 					      (reg - start_reg) / 2);
21409 	      start_reg = reg + 2;
21410 	    }
21411 	}
21412       if (start_reg != reg)
21413 	saved_size += vfp_emit_fstmd (start_reg,
21414 				      (reg - start_reg) / 2);
21415     }
21416   return saved_size;
21417 }
21418 
21419 
21420 /* Set the Thumb frame pointer from the stack pointer.  */
21421 
21422 static void
21423 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21424 {
21425   HOST_WIDE_INT amount;
21426   rtx insn, dwarf;
21427 
21428   amount = offsets->outgoing_args - offsets->locals_base;
21429   if (amount < 1024)
21430     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21431 				  stack_pointer_rtx, GEN_INT (amount)));
21432   else
21433     {
21434       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21435       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21436          expects the first two operands to be the same.  */
21437       if (TARGET_THUMB2)
21438 	{
21439 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21440 					stack_pointer_rtx,
21441 					hard_frame_pointer_rtx));
21442 	}
21443       else
21444 	{
21445 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21446 					hard_frame_pointer_rtx,
21447 					stack_pointer_rtx));
21448 	}
21449       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21450 			   plus_constant (Pmode, stack_pointer_rtx, amount));
21451       RTX_FRAME_RELATED_P (dwarf) = 1;
21452       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21453     }
21454 
21455   RTX_FRAME_RELATED_P (insn) = 1;
21456 }
21457 
21458 struct scratch_reg {
21459   rtx reg;
21460   bool saved;
21461 };
21462 
21463 /* Return a short-lived scratch register for use as a 2nd scratch register on
21464    function entry after the registers are saved in the prologue.  This register
21465    must be released by means of release_scratch_register_on_entry.  IP is not
21466    considered since it is always used as the 1st scratch register if available.
21467 
21468    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21469    mask of live registers.  */
21470 
21471 static void
21472 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21473 			       unsigned long live_regs)
21474 {
21475   int regno = -1;
21476 
21477   sr->saved = false;
21478 
21479   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21480     regno = LR_REGNUM;
21481   else
21482     {
21483       unsigned int i;
21484 
21485       for (i = 4; i < 11; i++)
21486 	if (regno1 != i && (live_regs & (1 << i)) != 0)
21487 	  {
21488 	    regno = i;
21489 	    break;
21490 	  }
21491 
21492       if (regno < 0)
21493 	{
21494 	  /* If IP is used as the 1st scratch register for a nested function,
21495 	     then either r3 wasn't available or is used to preserve IP.  */
21496 	  if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21497 	    regno1 = 3;
21498 	  regno = (regno1 == 3 ? 2 : 3);
21499 	  sr->saved
21500 	    = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21501 			       regno);
21502 	}
21503     }
21504 
21505   sr->reg = gen_rtx_REG (SImode, regno);
21506   if (sr->saved)
21507     {
21508       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21509       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21510       rtx x = gen_rtx_SET (stack_pointer_rtx,
21511 		           plus_constant (Pmode, stack_pointer_rtx, -4));
21512       RTX_FRAME_RELATED_P (insn) = 1;
21513       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21514     }
21515 }
21516 
21517 /* Release a scratch register obtained from the preceding function.  */
21518 
21519 static void
21520 release_scratch_register_on_entry (struct scratch_reg *sr)
21521 {
21522   if (sr->saved)
21523     {
21524       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21525       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21526       rtx x = gen_rtx_SET (stack_pointer_rtx,
21527 			   plus_constant (Pmode, stack_pointer_rtx, 4));
21528       RTX_FRAME_RELATED_P (insn) = 1;
21529       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21530     }
21531 }
21532 
21533 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21534 
21535 #if PROBE_INTERVAL > 4096
21536 #error Cannot use indexed addressing mode for stack probing
21537 #endif
21538 
21539 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21540    inclusive.  These are offsets from the current stack pointer.  REGNO1
21541    is the index number of the 1st scratch register and LIVE_REGS is the
21542    mask of live registers.  */
21543 
21544 static void
21545 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21546 			    unsigned int regno1, unsigned long live_regs)
21547 {
21548   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21549 
21550   /* See if we have a constant small number of probes to generate.  If so,
21551      that's the easy case.  */
21552   if (size <= PROBE_INTERVAL)
21553     {
21554       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21555       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21556       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21557     }
21558 
21559   /* The run-time loop is made up of 10 insns in the generic case while the
21560      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21561   else if (size <= 5 * PROBE_INTERVAL)
21562     {
21563       HOST_WIDE_INT i, rem;
21564 
21565       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21566       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21567       emit_stack_probe (reg1);
21568 
21569       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21570 	 it exceeds SIZE.  If only two probes are needed, this will not
21571 	 generate any code.  Then probe at FIRST + SIZE.  */
21572       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21573 	{
21574 	  emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21575 	  emit_stack_probe (reg1);
21576 	}
21577 
21578       rem = size - (i - PROBE_INTERVAL);
21579       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21580 	{
21581 	  emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21582 	  emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21583 	}
21584       else
21585 	emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21586     }
21587 
21588   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21589      extra careful with variables wrapping around because we might be at
21590      the very top (or the very bottom) of the address space and we have
21591      to be able to handle this case properly; in particular, we use an
21592      equality test for the loop condition.  */
21593   else
21594     {
21595       HOST_WIDE_INT rounded_size;
21596       struct scratch_reg sr;
21597 
21598       get_scratch_register_on_entry (&sr, regno1, live_regs);
21599 
21600       emit_move_insn (reg1, GEN_INT (first));
21601 
21602 
21603       /* Step 1: round SIZE to the previous multiple of the interval.  */
21604 
21605       rounded_size = size & -PROBE_INTERVAL;
21606       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21607 
21608 
21609       /* Step 2: compute initial and final value of the loop counter.  */
21610 
21611       /* TEST_ADDR = SP + FIRST.  */
21612       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21613 
21614       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21615       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21616 
21617 
21618       /* Step 3: the loop
21619 
21620 	 do
21621 	   {
21622 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21623 	     probe at TEST_ADDR
21624 	   }
21625 	 while (TEST_ADDR != LAST_ADDR)
21626 
21627 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21628 	 until it is equal to ROUNDED_SIZE.  */
21629 
21630       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21631 
21632 
21633       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21634 	 that SIZE is equal to ROUNDED_SIZE.  */
21635 
21636       if (size != rounded_size)
21637 	{
21638 	  HOST_WIDE_INT rem = size - rounded_size;
21639 
21640 	  if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21641 	    {
21642 	      emit_set_insn (sr.reg,
21643 			     plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21644 	      emit_stack_probe (plus_constant (Pmode, sr.reg,
21645 					       PROBE_INTERVAL - rem));
21646 	    }
21647 	  else
21648 	    emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21649 	}
21650 
21651       release_scratch_register_on_entry (&sr);
21652     }
21653 
21654   /* Make sure nothing is scheduled before we are done.  */
21655   emit_insn (gen_blockage ());
21656 }
21657 
21658 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21659    absolute addresses.  */
21660 
21661 const char *
21662 output_probe_stack_range (rtx reg1, rtx reg2)
21663 {
21664   static int labelno = 0;
21665   char loop_lab[32];
21666   rtx xops[2];
21667 
21668   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21669 
21670   /* Loop.  */
21671   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21672 
21673   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21674   xops[0] = reg1;
21675   xops[1] = GEN_INT (PROBE_INTERVAL);
21676   output_asm_insn ("sub\t%0, %0, %1", xops);
21677 
21678   /* Probe at TEST_ADDR.  */
21679   output_asm_insn ("str\tr0, [%0, #0]", xops);
21680 
21681   /* Test if TEST_ADDR == LAST_ADDR.  */
21682   xops[1] = reg2;
21683   output_asm_insn ("cmp\t%0, %1", xops);
21684 
21685   /* Branch.  */
21686   fputs ("\tbne\t", asm_out_file);
21687   assemble_name_raw (asm_out_file, loop_lab);
21688   fputc ('\n', asm_out_file);
21689 
21690   return "";
21691 }
21692 
21693 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21694    function.  */
21695 void
21696 arm_expand_prologue (void)
21697 {
21698   rtx amount;
21699   rtx insn;
21700   rtx ip_rtx;
21701   unsigned long live_regs_mask;
21702   unsigned long func_type;
21703   int fp_offset = 0;
21704   int saved_pretend_args = 0;
21705   int saved_regs = 0;
21706   unsigned HOST_WIDE_INT args_to_push;
21707   HOST_WIDE_INT size;
21708   arm_stack_offsets *offsets;
21709   bool clobber_ip;
21710 
21711   func_type = arm_current_func_type ();
21712 
21713   /* Naked functions don't have prologues.  */
21714   if (IS_NAKED (func_type))
21715     {
21716       if (flag_stack_usage_info)
21717 	current_function_static_stack_size = 0;
21718       return;
21719     }
21720 
21721   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21722   args_to_push = crtl->args.pretend_args_size;
21723 
21724   /* Compute which register we will have to save onto the stack.  */
21725   offsets = arm_get_frame_offsets ();
21726   live_regs_mask = offsets->saved_regs_mask;
21727 
21728   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21729 
21730   if (IS_STACKALIGN (func_type))
21731     {
21732       rtx r0, r1;
21733 
21734       /* Handle a word-aligned stack pointer.  We generate the following:
21735 
21736 	  mov r0, sp
21737 	  bic r1, r0, #7
21738 	  mov sp, r1
21739 	  <save and restore r0 in normal prologue/epilogue>
21740 	  mov sp, r0
21741 	  bx lr
21742 
21743 	 The unwinder doesn't need to know about the stack realignment.
21744 	 Just tell it we saved SP in r0.  */
21745       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21746 
21747       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21748       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21749 
21750       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21751       RTX_FRAME_RELATED_P (insn) = 1;
21752       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21753 
21754       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21755 
21756       /* ??? The CFA changes here, which may cause GDB to conclude that it
21757 	 has entered a different function.  That said, the unwind info is
21758 	 correct, individually, before and after this instruction because
21759 	 we've described the save of SP, which will override the default
21760 	 handling of SP as restoring from the CFA.  */
21761       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21762     }
21763 
21764   /* Let's compute the static_chain_stack_bytes required and store it.  Right
21765      now the value must be -1 as stored by arm_init_machine_status ().  */
21766   cfun->machine->static_chain_stack_bytes
21767     = arm_compute_static_chain_stack_bytes ();
21768 
21769   /* The static chain register is the same as the IP register.  If it is
21770      clobbered when creating the frame, we need to save and restore it.  */
21771   clobber_ip = IS_NESTED (func_type)
21772 	       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21773 		   || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21774 			|| flag_stack_clash_protection)
21775 		       && !df_regs_ever_live_p (LR_REGNUM)
21776 		       && arm_r3_live_at_start_p ()));
21777 
21778   /* Find somewhere to store IP whilst the frame is being created.
21779      We try the following places in order:
21780 
21781        1. The last argument register r3 if it is available.
21782        2. A slot on the stack above the frame if there are no
21783 	  arguments to push onto the stack.
21784        3. Register r3 again, after pushing the argument registers
21785 	  onto the stack, if this is a varargs function.
21786        4. The last slot on the stack created for the arguments to
21787 	  push, if this isn't a varargs function.
21788 
21789      Note - we only need to tell the dwarf2 backend about the SP
21790      adjustment in the second variant; the static chain register
21791      doesn't need to be unwound, as it doesn't contain a value
21792      inherited from the caller.  */
21793   if (clobber_ip)
21794     {
21795       if (!arm_r3_live_at_start_p ())
21796 	insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21797       else if (args_to_push == 0)
21798 	{
21799 	  rtx addr, dwarf;
21800 
21801 	  gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21802 	  saved_regs += 4;
21803 
21804 	  addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21805 	  insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21806 	  fp_offset = 4;
21807 
21808 	  /* Just tell the dwarf backend that we adjusted SP.  */
21809 	  dwarf = gen_rtx_SET (stack_pointer_rtx,
21810 			       plus_constant (Pmode, stack_pointer_rtx,
21811 					      -fp_offset));
21812 	  RTX_FRAME_RELATED_P (insn) = 1;
21813 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21814 	}
21815       else
21816 	{
21817 	  /* Store the args on the stack.  */
21818 	  if (cfun->machine->uses_anonymous_args)
21819 	    {
21820 	      insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21821 					  (0xf0 >> (args_to_push / 4)) & 0xf);
21822 	      emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21823 	      saved_pretend_args = 1;
21824 	    }
21825 	  else
21826 	    {
21827 	      rtx addr, dwarf;
21828 
21829 	      if (args_to_push == 4)
21830 		addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21831 	      else
21832 		addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21833 					   plus_constant (Pmode,
21834 							  stack_pointer_rtx,
21835 							  -args_to_push));
21836 
21837 	      insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21838 
21839 	      /* Just tell the dwarf backend that we adjusted SP.  */
21840 	      dwarf = gen_rtx_SET (stack_pointer_rtx,
21841 				   plus_constant (Pmode, stack_pointer_rtx,
21842 						  -args_to_push));
21843 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21844 	    }
21845 
21846 	  RTX_FRAME_RELATED_P (insn) = 1;
21847 	  fp_offset = args_to_push;
21848 	  args_to_push = 0;
21849 	}
21850     }
21851 
21852   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21853     {
21854       if (IS_INTERRUPT (func_type))
21855 	{
21856 	  /* Interrupt functions must not corrupt any registers.
21857 	     Creating a frame pointer however, corrupts the IP
21858 	     register, so we must push it first.  */
21859 	  emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21860 
21861 	  /* Do not set RTX_FRAME_RELATED_P on this insn.
21862 	     The dwarf stack unwinding code only wants to see one
21863 	     stack decrement per function, and this is not it.  If
21864 	     this instruction is labeled as being part of the frame
21865 	     creation sequence then dwarf2out_frame_debug_expr will
21866 	     die when it encounters the assignment of IP to FP
21867 	     later on, since the use of SP here establishes SP as
21868 	     the CFA register and not IP.
21869 
21870 	     Anyway this instruction is not really part of the stack
21871 	     frame creation although it is part of the prologue.  */
21872 	}
21873 
21874       insn = emit_set_insn (ip_rtx,
21875 			    plus_constant (Pmode, stack_pointer_rtx,
21876 					   fp_offset));
21877       RTX_FRAME_RELATED_P (insn) = 1;
21878     }
21879 
21880   if (args_to_push)
21881     {
21882       /* Push the argument registers, or reserve space for them.  */
21883       if (cfun->machine->uses_anonymous_args)
21884 	insn = emit_multi_reg_push
21885 	  ((0xf0 >> (args_to_push / 4)) & 0xf,
21886 	   (0xf0 >> (args_to_push / 4)) & 0xf);
21887       else
21888 	insn = emit_insn
21889 	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21890 		       GEN_INT (- args_to_push)));
21891       RTX_FRAME_RELATED_P (insn) = 1;
21892     }
21893 
21894   /* If this is an interrupt service routine, and the link register
21895      is going to be pushed, and we're not generating extra
21896      push of IP (needed when frame is needed and frame layout if apcs),
21897      subtracting four from LR now will mean that the function return
21898      can be done with a single instruction.  */
21899   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21900       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21901       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21902       && TARGET_ARM)
21903     {
21904       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21905 
21906       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21907     }
21908 
21909   if (live_regs_mask)
21910     {
21911       unsigned long dwarf_regs_mask = live_regs_mask;
21912 
21913       saved_regs += bit_count (live_regs_mask) * 4;
21914       if (optimize_size && !frame_pointer_needed
21915 	  && saved_regs == offsets->saved_regs - offsets->saved_args)
21916 	{
21917 	  /* If no coprocessor registers are being pushed and we don't have
21918 	     to worry about a frame pointer then push extra registers to
21919 	     create the stack frame.  This is done in a way that does not
21920 	     alter the frame layout, so is independent of the epilogue.  */
21921 	  int n;
21922 	  int frame;
21923 	  n = 0;
21924 	  while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21925 	    n++;
21926 	  frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21927 	  if (frame && n * 4 >= frame)
21928 	    {
21929 	      n = frame / 4;
21930 	      live_regs_mask |= (1 << n) - 1;
21931 	      saved_regs += frame;
21932 	    }
21933 	}
21934 
21935       if (TARGET_LDRD
21936 	  && current_tune->prefer_ldrd_strd
21937           && !optimize_function_for_size_p (cfun))
21938         {
21939 	  gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21940           if (TARGET_THUMB2)
21941 	    thumb2_emit_strd_push (live_regs_mask);
21942           else if (TARGET_ARM
21943                    && !TARGET_APCS_FRAME
21944                    && !IS_INTERRUPT (func_type))
21945 	    arm_emit_strd_push (live_regs_mask);
21946           else
21947             {
21948 	      insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21949               RTX_FRAME_RELATED_P (insn) = 1;
21950             }
21951         }
21952       else
21953         {
21954 	  insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21955           RTX_FRAME_RELATED_P (insn) = 1;
21956         }
21957     }
21958 
21959   if (! IS_VOLATILE (func_type))
21960     saved_regs += arm_save_coproc_regs ();
21961 
21962   if (frame_pointer_needed && TARGET_ARM)
21963     {
21964       /* Create the new frame pointer.  */
21965       if (TARGET_APCS_FRAME)
21966 	{
21967 	  insn = GEN_INT (-(4 + args_to_push + fp_offset));
21968 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21969 	  RTX_FRAME_RELATED_P (insn) = 1;
21970 	}
21971       else
21972 	{
21973 	  insn = GEN_INT (saved_regs - (4 + fp_offset));
21974 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21975 					stack_pointer_rtx, insn));
21976 	  RTX_FRAME_RELATED_P (insn) = 1;
21977 	}
21978     }
21979 
21980   size = offsets->outgoing_args - offsets->saved_args;
21981   if (flag_stack_usage_info)
21982     current_function_static_stack_size = size;
21983 
21984   /* If this isn't an interrupt service routine and we have a frame, then do
21985      stack checking.  We use IP as the first scratch register, except for the
21986      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21987   if (!IS_INTERRUPT (func_type)
21988       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21989 	  || flag_stack_clash_protection))
21990     {
21991       unsigned int regno;
21992 
21993       if (!IS_NESTED (func_type) || clobber_ip)
21994 	regno = IP_REGNUM;
21995       else if (df_regs_ever_live_p (LR_REGNUM))
21996 	regno = LR_REGNUM;
21997       else
21998 	regno = 3;
21999 
22000       if (crtl->is_leaf && !cfun->calls_alloca)
22001 	{
22002 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
22003 	    arm_emit_probe_stack_range (get_stack_check_protect (),
22004 					size - get_stack_check_protect (),
22005 					regno, live_regs_mask);
22006 	}
22007       else if (size > 0)
22008 	arm_emit_probe_stack_range (get_stack_check_protect (), size,
22009 				    regno, live_regs_mask);
22010     }
22011 
22012   /* Recover the static chain register.  */
22013   if (clobber_ip)
22014     {
22015       if (!arm_r3_live_at_start_p () || saved_pretend_args)
22016 	insn = gen_rtx_REG (SImode, 3);
22017       else
22018 	{
22019 	  insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
22020 	  insn = gen_frame_mem (SImode, insn);
22021 	}
22022       emit_set_insn (ip_rtx, insn);
22023       emit_insn (gen_force_register_use (ip_rtx));
22024     }
22025 
22026   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
22027     {
22028       /* This add can produce multiple insns for a large constant, so we
22029 	 need to get tricky.  */
22030       rtx_insn *last = get_last_insn ();
22031 
22032       amount = GEN_INT (offsets->saved_args + saved_regs
22033 			- offsets->outgoing_args);
22034 
22035       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22036 				    amount));
22037       do
22038 	{
22039 	  last = last ? NEXT_INSN (last) : get_insns ();
22040 	  RTX_FRAME_RELATED_P (last) = 1;
22041 	}
22042       while (last != insn);
22043 
22044       /* If the frame pointer is needed, emit a special barrier that
22045 	 will prevent the scheduler from moving stores to the frame
22046 	 before the stack adjustment.  */
22047       if (frame_pointer_needed)
22048 	emit_insn (gen_stack_tie (stack_pointer_rtx,
22049 				  hard_frame_pointer_rtx));
22050     }
22051 
22052 
22053   if (frame_pointer_needed && TARGET_THUMB2)
22054     thumb_set_frame_pointer (offsets);
22055 
22056   if (flag_pic && arm_pic_register != INVALID_REGNUM)
22057     {
22058       unsigned long mask;
22059 
22060       mask = live_regs_mask;
22061       mask &= THUMB2_WORK_REGS;
22062       if (!IS_NESTED (func_type))
22063 	mask |= (1 << IP_REGNUM);
22064       arm_load_pic_register (mask);
22065     }
22066 
22067   /* If we are profiling, make sure no instructions are scheduled before
22068      the call to mcount.  Similarly if the user has requested no
22069      scheduling in the prolog.  Similarly if we want non-call exceptions
22070      using the EABI unwinder, to prevent faulting instructions from being
22071      swapped with a stack adjustment.  */
22072   if (crtl->profile || !TARGET_SCHED_PROLOG
22073       || (arm_except_unwind_info (&global_options) == UI_TARGET
22074 	  && cfun->can_throw_non_call_exceptions))
22075     emit_insn (gen_blockage ());
22076 
22077   /* If the link register is being kept alive, with the return address in it,
22078      then make sure that it does not get reused by the ce2 pass.  */
22079   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22080     cfun->machine->lr_save_eliminated = 1;
22081 }
22082 
22083 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
22084 static void
22085 arm_print_condition (FILE *stream)
22086 {
22087   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22088     {
22089       /* Branch conversion is not implemented for Thumb-2.  */
22090       if (TARGET_THUMB)
22091 	{
22092 	  output_operand_lossage ("predicated Thumb instruction");
22093 	  return;
22094 	}
22095       if (current_insn_predicate != NULL)
22096 	{
22097 	  output_operand_lossage
22098 	    ("predicated instruction in conditional sequence");
22099 	  return;
22100 	}
22101 
22102       fputs (arm_condition_codes[arm_current_cc], stream);
22103     }
22104   else if (current_insn_predicate)
22105     {
22106       enum arm_cond_code code;
22107 
22108       if (TARGET_THUMB1)
22109 	{
22110 	  output_operand_lossage ("predicated Thumb instruction");
22111 	  return;
22112 	}
22113 
22114       code = get_arm_condition_code (current_insn_predicate);
22115       fputs (arm_condition_codes[code], stream);
22116     }
22117 }
22118 
22119 
22120 /* Globally reserved letters: acln
22121    Puncutation letters currently used: @_|?().!#
22122    Lower case letters currently used: bcdefhimpqtvwxyz
22123    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22124    Letters previously used, but now deprecated/obsolete: sVWXYZ.
22125 
22126    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22127 
22128    If CODE is 'd', then the X is a condition operand and the instruction
22129    should only be executed if the condition is true.
22130    if CODE is 'D', then the X is a condition operand and the instruction
22131    should only be executed if the condition is false: however, if the mode
22132    of the comparison is CCFPEmode, then always execute the instruction -- we
22133    do this because in these circumstances !GE does not necessarily imply LT;
22134    in these cases the instruction pattern will take care to make sure that
22135    an instruction containing %d will follow, thereby undoing the effects of
22136    doing this instruction unconditionally.
22137    If CODE is 'N' then X is a floating point operand that must be negated
22138    before output.
22139    If CODE is 'B' then output a bitwise inverted value of X (a const int).
22140    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
22141 static void
22142 arm_print_operand (FILE *stream, rtx x, int code)
22143 {
22144   switch (code)
22145     {
22146     case '@':
22147       fputs (ASM_COMMENT_START, stream);
22148       return;
22149 
22150     case '_':
22151       fputs (user_label_prefix, stream);
22152       return;
22153 
22154     case '|':
22155       fputs (REGISTER_PREFIX, stream);
22156       return;
22157 
22158     case '?':
22159       arm_print_condition (stream);
22160       return;
22161 
22162     case '.':
22163       /* The current condition code for a condition code setting instruction.
22164 	 Preceded by 's' in unified syntax, otherwise followed by 's'.  */
22165       fputc('s', stream);
22166       arm_print_condition (stream);
22167       return;
22168 
22169     case '!':
22170       /* If the instruction is conditionally executed then print
22171 	 the current condition code, otherwise print 's'.  */
22172       gcc_assert (TARGET_THUMB2);
22173       if (current_insn_predicate)
22174 	arm_print_condition (stream);
22175       else
22176 	fputc('s', stream);
22177       break;
22178 
22179     /* %# is a "break" sequence. It doesn't output anything, but is used to
22180        separate e.g. operand numbers from following text, if that text consists
22181        of further digits which we don't want to be part of the operand
22182        number.  */
22183     case '#':
22184       return;
22185 
22186     case 'N':
22187       {
22188 	REAL_VALUE_TYPE r;
22189 	r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22190 	fprintf (stream, "%s", fp_const_from_val (&r));
22191       }
22192       return;
22193 
22194     /* An integer or symbol address without a preceding # sign.  */
22195     case 'c':
22196       switch (GET_CODE (x))
22197 	{
22198 	case CONST_INT:
22199 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22200 	  break;
22201 
22202 	case SYMBOL_REF:
22203 	  output_addr_const (stream, x);
22204 	  break;
22205 
22206 	case CONST:
22207 	  if (GET_CODE (XEXP (x, 0)) == PLUS
22208 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22209 	    {
22210 	      output_addr_const (stream, x);
22211 	      break;
22212 	    }
22213 	  /* Fall through.  */
22214 
22215 	default:
22216 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
22217 	}
22218       return;
22219 
22220     /* An integer that we want to print in HEX.  */
22221     case 'x':
22222       switch (GET_CODE (x))
22223 	{
22224 	case CONST_INT:
22225 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22226 	  break;
22227 
22228 	default:
22229 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
22230 	}
22231       return;
22232 
22233     case 'B':
22234       if (CONST_INT_P (x))
22235 	{
22236 	  HOST_WIDE_INT val;
22237 	  val = ARM_SIGN_EXTEND (~INTVAL (x));
22238 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22239 	}
22240       else
22241 	{
22242 	  putc ('~', stream);
22243 	  output_addr_const (stream, x);
22244 	}
22245       return;
22246 
22247     case 'b':
22248       /* Print the log2 of a CONST_INT.  */
22249       {
22250 	HOST_WIDE_INT val;
22251 
22252 	if (!CONST_INT_P (x)
22253 	    || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22254 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
22255 	else
22256 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22257       }
22258       return;
22259 
22260     case 'L':
22261       /* The low 16 bits of an immediate constant.  */
22262       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22263       return;
22264 
22265     case 'i':
22266       fprintf (stream, "%s", arithmetic_instr (x, 1));
22267       return;
22268 
22269     case 'I':
22270       fprintf (stream, "%s", arithmetic_instr (x, 0));
22271       return;
22272 
22273     case 'S':
22274       {
22275 	HOST_WIDE_INT val;
22276 	const char *shift;
22277 
22278 	shift = shift_op (x, &val);
22279 
22280 	if (shift)
22281 	  {
22282 	    fprintf (stream, ", %s ", shift);
22283 	    if (val == -1)
22284 	      arm_print_operand (stream, XEXP (x, 1), 0);
22285 	    else
22286 	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22287 	  }
22288       }
22289       return;
22290 
22291       /* An explanation of the 'Q', 'R' and 'H' register operands:
22292 
22293 	 In a pair of registers containing a DI or DF value the 'Q'
22294 	 operand returns the register number of the register containing
22295 	 the least significant part of the value.  The 'R' operand returns
22296 	 the register number of the register containing the most
22297 	 significant part of the value.
22298 
22299 	 The 'H' operand returns the higher of the two register numbers.
22300 	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22301 	 same as the 'Q' operand, since the most significant part of the
22302 	 value is held in the lower number register.  The reverse is true
22303 	 on systems where WORDS_BIG_ENDIAN is false.
22304 
22305 	 The purpose of these operands is to distinguish between cases
22306 	 where the endian-ness of the values is important (for example
22307 	 when they are added together), and cases where the endian-ness
22308 	 is irrelevant, but the order of register operations is important.
22309 	 For example when loading a value from memory into a register
22310 	 pair, the endian-ness does not matter.  Provided that the value
22311 	 from the lower memory address is put into the lower numbered
22312 	 register, and the value from the higher address is put into the
22313 	 higher numbered register, the load will work regardless of whether
22314 	 the value being loaded is big-wordian or little-wordian.  The
22315 	 order of the two register loads can matter however, if the address
22316 	 of the memory location is actually held in one of the registers
22317 	 being overwritten by the load.
22318 
22319 	 The 'Q' and 'R' constraints are also available for 64-bit
22320 	 constants.  */
22321     case 'Q':
22322       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22323 	{
22324 	  rtx part = gen_lowpart (SImode, x);
22325 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22326 	  return;
22327 	}
22328 
22329       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22330 	{
22331 	  output_operand_lossage ("invalid operand for code '%c'", code);
22332 	  return;
22333 	}
22334 
22335       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22336       return;
22337 
22338     case 'R':
22339       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22340 	{
22341 	  machine_mode mode = GET_MODE (x);
22342 	  rtx part;
22343 
22344 	  if (mode == VOIDmode)
22345 	    mode = DImode;
22346 	  part = gen_highpart_mode (SImode, mode, x);
22347 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22348 	  return;
22349 	}
22350 
22351       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22352 	{
22353 	  output_operand_lossage ("invalid operand for code '%c'", code);
22354 	  return;
22355 	}
22356 
22357       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22358       return;
22359 
22360     case 'H':
22361       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22362 	{
22363 	  output_operand_lossage ("invalid operand for code '%c'", code);
22364 	  return;
22365 	}
22366 
22367       asm_fprintf (stream, "%r", REGNO (x) + 1);
22368       return;
22369 
22370     case 'J':
22371       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22372 	{
22373 	  output_operand_lossage ("invalid operand for code '%c'", code);
22374 	  return;
22375 	}
22376 
22377       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22378       return;
22379 
22380     case 'K':
22381       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22382 	{
22383 	  output_operand_lossage ("invalid operand for code '%c'", code);
22384 	  return;
22385 	}
22386 
22387       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22388       return;
22389 
22390     case 'm':
22391       asm_fprintf (stream, "%r",
22392 		   REG_P (XEXP (x, 0))
22393 		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22394       return;
22395 
22396     case 'M':
22397       asm_fprintf (stream, "{%r-%r}",
22398 		   REGNO (x),
22399 		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22400       return;
22401 
22402     /* Like 'M', but writing doubleword vector registers, for use by Neon
22403        insns.  */
22404     case 'h':
22405       {
22406         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22407         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22408         if (numregs == 1)
22409           asm_fprintf (stream, "{d%d}", regno);
22410         else
22411           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22412       }
22413       return;
22414 
22415     case 'd':
22416       /* CONST_TRUE_RTX means always -- that's the default.  */
22417       if (x == const_true_rtx)
22418 	return;
22419 
22420       if (!COMPARISON_P (x))
22421 	{
22422 	  output_operand_lossage ("invalid operand for code '%c'", code);
22423 	  return;
22424 	}
22425 
22426       fputs (arm_condition_codes[get_arm_condition_code (x)],
22427 	     stream);
22428       return;
22429 
22430     case 'D':
22431       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22432 	 want to do that.  */
22433       if (x == const_true_rtx)
22434 	{
22435 	  output_operand_lossage ("instruction never executed");
22436 	  return;
22437 	}
22438       if (!COMPARISON_P (x))
22439 	{
22440 	  output_operand_lossage ("invalid operand for code '%c'", code);
22441 	  return;
22442 	}
22443 
22444       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22445 				 (get_arm_condition_code (x))],
22446 	     stream);
22447       return;
22448 
22449     case 's':
22450     case 'V':
22451     case 'W':
22452     case 'X':
22453     case 'Y':
22454     case 'Z':
22455       /* Former Maverick support, removed after GCC-4.7.  */
22456       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22457       return;
22458 
22459     case 'U':
22460       if (!REG_P (x)
22461 	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22462 	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22463 	/* Bad value for wCG register number.  */
22464 	{
22465 	  output_operand_lossage ("invalid operand for code '%c'", code);
22466 	  return;
22467 	}
22468 
22469       else
22470 	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22471       return;
22472 
22473       /* Print an iWMMXt control register name.  */
22474     case 'w':
22475       if (!CONST_INT_P (x)
22476 	  || INTVAL (x) < 0
22477 	  || INTVAL (x) >= 16)
22478 	/* Bad value for wC register number.  */
22479 	{
22480 	  output_operand_lossage ("invalid operand for code '%c'", code);
22481 	  return;
22482 	}
22483 
22484       else
22485 	{
22486 	  static const char * wc_reg_names [16] =
22487 	    {
22488 	      "wCID",  "wCon",  "wCSSF", "wCASF",
22489 	      "wC4",   "wC5",   "wC6",   "wC7",
22490 	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22491 	      "wC12",  "wC13",  "wC14",  "wC15"
22492 	    };
22493 
22494 	  fputs (wc_reg_names [INTVAL (x)], stream);
22495 	}
22496       return;
22497 
22498     /* Print the high single-precision register of a VFP double-precision
22499        register.  */
22500     case 'p':
22501       {
22502         machine_mode mode = GET_MODE (x);
22503         int regno;
22504 
22505         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22506           {
22507 	    output_operand_lossage ("invalid operand for code '%c'", code);
22508 	    return;
22509           }
22510 
22511         regno = REGNO (x);
22512         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22513           {
22514 	    output_operand_lossage ("invalid operand for code '%c'", code);
22515 	    return;
22516           }
22517 
22518 	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22519       }
22520       return;
22521 
22522     /* Print a VFP/Neon double precision or quad precision register name.  */
22523     case 'P':
22524     case 'q':
22525       {
22526 	machine_mode mode = GET_MODE (x);
22527 	int is_quad = (code == 'q');
22528 	int regno;
22529 
22530 	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22531 	  {
22532 	    output_operand_lossage ("invalid operand for code '%c'", code);
22533 	    return;
22534 	  }
22535 
22536 	if (!REG_P (x)
22537 	    || !IS_VFP_REGNUM (REGNO (x)))
22538 	  {
22539 	    output_operand_lossage ("invalid operand for code '%c'", code);
22540 	    return;
22541 	  }
22542 
22543 	regno = REGNO (x);
22544 	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22545             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22546 	  {
22547 	    output_operand_lossage ("invalid operand for code '%c'", code);
22548 	    return;
22549 	  }
22550 
22551 	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22552 	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22553       }
22554       return;
22555 
22556     /* These two codes print the low/high doubleword register of a Neon quad
22557        register, respectively.  For pair-structure types, can also print
22558        low/high quadword registers.  */
22559     case 'e':
22560     case 'f':
22561       {
22562         machine_mode mode = GET_MODE (x);
22563         int regno;
22564 
22565         if ((GET_MODE_SIZE (mode) != 16
22566 	     && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22567           {
22568 	    output_operand_lossage ("invalid operand for code '%c'", code);
22569 	    return;
22570           }
22571 
22572         regno = REGNO (x);
22573         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22574           {
22575 	    output_operand_lossage ("invalid operand for code '%c'", code);
22576 	    return;
22577           }
22578 
22579         if (GET_MODE_SIZE (mode) == 16)
22580           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22581 				  + (code == 'f' ? 1 : 0));
22582         else
22583           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22584 				  + (code == 'f' ? 1 : 0));
22585       }
22586       return;
22587 
22588     /* Print a VFPv3 floating-point constant, represented as an integer
22589        index.  */
22590     case 'G':
22591       {
22592         int index = vfp3_const_double_index (x);
22593 	gcc_assert (index != -1);
22594 	fprintf (stream, "%d", index);
22595       }
22596       return;
22597 
22598     /* Print bits representing opcode features for Neon.
22599 
22600        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22601        and polynomials as unsigned.
22602 
22603        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22604 
22605        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22606 
22607     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22608     case 'T':
22609       {
22610         HOST_WIDE_INT bits = INTVAL (x);
22611         fputc ("uspf"[bits & 3], stream);
22612       }
22613       return;
22614 
22615     /* Likewise, but signed and unsigned integers are both 'i'.  */
22616     case 'F':
22617       {
22618         HOST_WIDE_INT bits = INTVAL (x);
22619         fputc ("iipf"[bits & 3], stream);
22620       }
22621       return;
22622 
22623     /* As for 'T', but emit 'u' instead of 'p'.  */
22624     case 't':
22625       {
22626         HOST_WIDE_INT bits = INTVAL (x);
22627         fputc ("usuf"[bits & 3], stream);
22628       }
22629       return;
22630 
22631     /* Bit 2: rounding (vs none).  */
22632     case 'O':
22633       {
22634         HOST_WIDE_INT bits = INTVAL (x);
22635         fputs ((bits & 4) != 0 ? "r" : "", stream);
22636       }
22637       return;
22638 
22639     /* Memory operand for vld1/vst1 instruction.  */
22640     case 'A':
22641       {
22642 	rtx addr;
22643 	bool postinc = FALSE;
22644 	rtx postinc_reg = NULL;
22645 	unsigned align, memsize, align_bits;
22646 
22647 	gcc_assert (MEM_P (x));
22648 	addr = XEXP (x, 0);
22649 	if (GET_CODE (addr) == POST_INC)
22650 	  {
22651 	    postinc = 1;
22652 	    addr = XEXP (addr, 0);
22653 	  }
22654 	if (GET_CODE (addr) == POST_MODIFY)
22655 	  {
22656 	    postinc_reg = XEXP( XEXP (addr, 1), 1);
22657 	    addr = XEXP (addr, 0);
22658 	  }
22659 	asm_fprintf (stream, "[%r", REGNO (addr));
22660 
22661 	/* We know the alignment of this access, so we can emit a hint in the
22662 	   instruction (for some alignments) as an aid to the memory subsystem
22663 	   of the target.  */
22664 	align = MEM_ALIGN (x) >> 3;
22665 	memsize = MEM_SIZE (x);
22666 
22667 	/* Only certain alignment specifiers are supported by the hardware.  */
22668 	if (memsize == 32 && (align % 32) == 0)
22669 	  align_bits = 256;
22670 	else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22671 	  align_bits = 128;
22672 	else if (memsize >= 8 && (align % 8) == 0)
22673 	  align_bits = 64;
22674 	else
22675 	  align_bits = 0;
22676 
22677 	if (align_bits != 0)
22678 	  asm_fprintf (stream, ":%d", align_bits);
22679 
22680 	asm_fprintf (stream, "]");
22681 
22682 	if (postinc)
22683 	  fputs("!", stream);
22684 	if (postinc_reg)
22685 	  asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22686       }
22687       return;
22688 
22689     case 'C':
22690       {
22691 	rtx addr;
22692 
22693 	gcc_assert (MEM_P (x));
22694 	addr = XEXP (x, 0);
22695 	gcc_assert (REG_P (addr));
22696 	asm_fprintf (stream, "[%r]", REGNO (addr));
22697       }
22698       return;
22699 
22700     /* Translate an S register number into a D register number and element index.  */
22701     case 'y':
22702       {
22703         machine_mode mode = GET_MODE (x);
22704         int regno;
22705 
22706         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22707           {
22708 	    output_operand_lossage ("invalid operand for code '%c'", code);
22709 	    return;
22710           }
22711 
22712         regno = REGNO (x);
22713         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22714           {
22715 	    output_operand_lossage ("invalid operand for code '%c'", code);
22716 	    return;
22717           }
22718 
22719 	regno = regno - FIRST_VFP_REGNUM;
22720 	fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22721       }
22722       return;
22723 
22724     case 'v':
22725 	gcc_assert (CONST_DOUBLE_P (x));
22726 	int result;
22727 	result = vfp3_const_double_for_fract_bits (x);
22728 	if (result == 0)
22729 	  result = vfp3_const_double_for_bits (x);
22730 	fprintf (stream, "#%d", result);
22731 	return;
22732 
22733     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22734        number into a D register number and element index.  */
22735     case 'z':
22736       {
22737         machine_mode mode = GET_MODE (x);
22738         int regno;
22739 
22740         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22741           {
22742 	    output_operand_lossage ("invalid operand for code '%c'", code);
22743 	    return;
22744           }
22745 
22746         regno = REGNO (x);
22747         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22748           {
22749 	    output_operand_lossage ("invalid operand for code '%c'", code);
22750 	    return;
22751           }
22752 
22753 	regno = regno - FIRST_VFP_REGNUM;
22754 	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22755       }
22756       return;
22757 
22758     default:
22759       if (x == 0)
22760 	{
22761 	  output_operand_lossage ("missing operand");
22762 	  return;
22763 	}
22764 
22765       switch (GET_CODE (x))
22766 	{
22767 	case REG:
22768 	  asm_fprintf (stream, "%r", REGNO (x));
22769 	  break;
22770 
22771 	case MEM:
22772 	  output_address (GET_MODE (x), XEXP (x, 0));
22773 	  break;
22774 
22775 	case CONST_DOUBLE:
22776 	  {
22777             char fpstr[20];
22778             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22779 			      sizeof (fpstr), 0, 1);
22780             fprintf (stream, "#%s", fpstr);
22781 	  }
22782 	  break;
22783 
22784 	default:
22785 	  gcc_assert (GET_CODE (x) != NEG);
22786 	  fputc ('#', stream);
22787 	  if (GET_CODE (x) == HIGH)
22788 	    {
22789 	      fputs (":lower16:", stream);
22790 	      x = XEXP (x, 0);
22791 	    }
22792 
22793 	  output_addr_const (stream, x);
22794 	  break;
22795 	}
22796     }
22797 }
22798 
22799 /* Target hook for printing a memory address.  */
22800 static void
22801 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22802 {
22803   if (TARGET_32BIT)
22804     {
22805       int is_minus = GET_CODE (x) == MINUS;
22806 
22807       if (REG_P (x))
22808 	asm_fprintf (stream, "[%r]", REGNO (x));
22809       else if (GET_CODE (x) == PLUS || is_minus)
22810 	{
22811 	  rtx base = XEXP (x, 0);
22812 	  rtx index = XEXP (x, 1);
22813 	  HOST_WIDE_INT offset = 0;
22814 	  if (!REG_P (base)
22815 	      || (REG_P (index) && REGNO (index) == SP_REGNUM))
22816 	    {
22817 	      /* Ensure that BASE is a register.  */
22818 	      /* (one of them must be).  */
22819 	      /* Also ensure the SP is not used as in index register.  */
22820 	      std::swap (base, index);
22821 	    }
22822 	  switch (GET_CODE (index))
22823 	    {
22824 	    case CONST_INT:
22825 	      offset = INTVAL (index);
22826 	      if (is_minus)
22827 		offset = -offset;
22828 	      asm_fprintf (stream, "[%r, #%wd]",
22829 			   REGNO (base), offset);
22830 	      break;
22831 
22832 	    case REG:
22833 	      asm_fprintf (stream, "[%r, %s%r]",
22834 			   REGNO (base), is_minus ? "-" : "",
22835 			   REGNO (index));
22836 	      break;
22837 
22838 	    case MULT:
22839 	    case ASHIFTRT:
22840 	    case LSHIFTRT:
22841 	    case ASHIFT:
22842 	    case ROTATERT:
22843 	      {
22844 		asm_fprintf (stream, "[%r, %s%r",
22845 			     REGNO (base), is_minus ? "-" : "",
22846 			     REGNO (XEXP (index, 0)));
22847 		arm_print_operand (stream, index, 'S');
22848 		fputs ("]", stream);
22849 		break;
22850 	      }
22851 
22852 	    default:
22853 	      gcc_unreachable ();
22854 	    }
22855 	}
22856       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22857 	       || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22858 	{
22859 	  gcc_assert (REG_P (XEXP (x, 0)));
22860 
22861 	  if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22862 	    asm_fprintf (stream, "[%r, #%s%d]!",
22863 			 REGNO (XEXP (x, 0)),
22864 			 GET_CODE (x) == PRE_DEC ? "-" : "",
22865 			 GET_MODE_SIZE (mode));
22866 	  else
22867 	    asm_fprintf (stream, "[%r], #%s%d",
22868 			 REGNO (XEXP (x, 0)),
22869 			 GET_CODE (x) == POST_DEC ? "-" : "",
22870 			 GET_MODE_SIZE (mode));
22871 	}
22872       else if (GET_CODE (x) == PRE_MODIFY)
22873 	{
22874 	  asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22875 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22876 	    asm_fprintf (stream, "#%wd]!",
22877 			 INTVAL (XEXP (XEXP (x, 1), 1)));
22878 	  else
22879 	    asm_fprintf (stream, "%r]!",
22880 			 REGNO (XEXP (XEXP (x, 1), 1)));
22881 	}
22882       else if (GET_CODE (x) == POST_MODIFY)
22883 	{
22884 	  asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22885 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22886 	    asm_fprintf (stream, "#%wd",
22887 			 INTVAL (XEXP (XEXP (x, 1), 1)));
22888 	  else
22889 	    asm_fprintf (stream, "%r",
22890 			 REGNO (XEXP (XEXP (x, 1), 1)));
22891 	}
22892       else output_addr_const (stream, x);
22893     }
22894   else
22895     {
22896       if (REG_P (x))
22897 	asm_fprintf (stream, "[%r]", REGNO (x));
22898       else if (GET_CODE (x) == POST_INC)
22899 	asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22900       else if (GET_CODE (x) == PLUS)
22901 	{
22902 	  gcc_assert (REG_P (XEXP (x, 0)));
22903 	  if (CONST_INT_P (XEXP (x, 1)))
22904 	    asm_fprintf (stream, "[%r, #%wd]",
22905 			 REGNO (XEXP (x, 0)),
22906 			 INTVAL (XEXP (x, 1)));
22907 	  else
22908 	    asm_fprintf (stream, "[%r, %r]",
22909 			 REGNO (XEXP (x, 0)),
22910 			 REGNO (XEXP (x, 1)));
22911 	}
22912       else
22913 	output_addr_const (stream, x);
22914     }
22915 }
22916 
22917 /* Target hook for indicating whether a punctuation character for
22918    TARGET_PRINT_OPERAND is valid.  */
22919 static bool
22920 arm_print_operand_punct_valid_p (unsigned char code)
22921 {
22922   return (code == '@' || code == '|' || code == '.'
22923 	  || code == '(' || code == ')' || code == '#'
22924 	  || (TARGET_32BIT && (code == '?'))
22925 	  || (TARGET_THUMB2 && (code == '!'))
22926 	  || (TARGET_THUMB && (code == '_')));
22927 }
22928 
22929 /* Target hook for assembling integer objects.  The ARM version needs to
22930    handle word-sized values specially.  */
22931 static bool
22932 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22933 {
22934   machine_mode mode;
22935 
22936   if (size == UNITS_PER_WORD && aligned_p)
22937     {
22938       fputs ("\t.word\t", asm_out_file);
22939       output_addr_const (asm_out_file, x);
22940 
22941       /* Mark symbols as position independent.  We only do this in the
22942 	 .text segment, not in the .data segment.  */
22943       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22944 	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22945 	{
22946 	  /* See legitimize_pic_address for an explanation of the
22947 	     TARGET_VXWORKS_RTP check.  */
22948 	  /* References to weak symbols cannot be resolved locally:
22949 	     they may be overridden by a non-weak definition at link
22950 	     time.  */
22951 	  if (!arm_pic_data_is_text_relative
22952 	      || (GET_CODE (x) == SYMBOL_REF
22953 		  && (!SYMBOL_REF_LOCAL_P (x)
22954 		      || (SYMBOL_REF_DECL (x)
22955 			  ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22956 	    fputs ("(GOT)", asm_out_file);
22957 	  else
22958 	    fputs ("(GOTOFF)", asm_out_file);
22959 	}
22960       fputc ('\n', asm_out_file);
22961       return true;
22962     }
22963 
22964   mode = GET_MODE (x);
22965 
22966   if (arm_vector_mode_supported_p (mode))
22967     {
22968       int i, units;
22969 
22970       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22971 
22972       units = CONST_VECTOR_NUNITS (x);
22973       size = GET_MODE_UNIT_SIZE (mode);
22974 
22975       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22976         for (i = 0; i < units; i++)
22977 	  {
22978 	    rtx elt = CONST_VECTOR_ELT (x, i);
22979 	    assemble_integer
22980 	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22981 	  }
22982       else
22983         for (i = 0; i < units; i++)
22984           {
22985             rtx elt = CONST_VECTOR_ELT (x, i);
22986 	    assemble_real
22987 	      (*CONST_DOUBLE_REAL_VALUE (elt),
22988 	       as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22989 	       i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22990           }
22991 
22992       return true;
22993     }
22994 
22995   return default_assemble_integer (x, size, aligned_p);
22996 }
22997 
22998 static void
22999 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
23000 {
23001   section *s;
23002 
23003   if (!TARGET_AAPCS_BASED)
23004     {
23005       (is_ctor ?
23006        default_named_section_asm_out_constructor
23007        : default_named_section_asm_out_destructor) (symbol, priority);
23008       return;
23009     }
23010 
23011   /* Put these in the .init_array section, using a special relocation.  */
23012   if (priority != DEFAULT_INIT_PRIORITY)
23013     {
23014       char buf[18];
23015       sprintf (buf, "%s.%.5u",
23016 	       is_ctor ? ".init_array" : ".fini_array",
23017 	       priority);
23018       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
23019     }
23020   else if (is_ctor)
23021     s = ctors_section;
23022   else
23023     s = dtors_section;
23024 
23025   switch_to_section (s);
23026   assemble_align (POINTER_SIZE);
23027   fputs ("\t.word\t", asm_out_file);
23028   output_addr_const (asm_out_file, symbol);
23029   fputs ("(target1)\n", asm_out_file);
23030 }
23031 
23032 /* Add a function to the list of static constructors.  */
23033 
23034 static void
23035 arm_elf_asm_constructor (rtx symbol, int priority)
23036 {
23037   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
23038 }
23039 
23040 /* Add a function to the list of static destructors.  */
23041 
23042 static void
23043 arm_elf_asm_destructor (rtx symbol, int priority)
23044 {
23045   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23046 }
23047 
23048 /* A finite state machine takes care of noticing whether or not instructions
23049    can be conditionally executed, and thus decrease execution time and code
23050    size by deleting branch instructions.  The fsm is controlled by
23051    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
23052 
23053 /* The state of the fsm controlling condition codes are:
23054    0: normal, do nothing special
23055    1: make ASM_OUTPUT_OPCODE not output this instruction
23056    2: make ASM_OUTPUT_OPCODE not output this instruction
23057    3: make instructions conditional
23058    4: make instructions conditional
23059 
23060    State transitions (state->state by whom under condition):
23061    0 -> 1 final_prescan_insn if the `target' is a label
23062    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23063    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23064    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23065    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23066           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23067    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23068           (the target insn is arm_target_insn).
23069 
23070    If the jump clobbers the conditions then we use states 2 and 4.
23071 
23072    A similar thing can be done with conditional return insns.
23073 
23074    XXX In case the `target' is an unconditional branch, this conditionalising
23075    of the instructions always reduces code size, but not always execution
23076    time.  But then, I want to reduce the code size to somewhere near what
23077    /bin/cc produces.  */
23078 
23079 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23080    instructions.  When a COND_EXEC instruction is seen the subsequent
23081    instructions are scanned so that multiple conditional instructions can be
23082    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
23083    specify the length and true/false mask for the IT block.  These will be
23084    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
23085 
23086 /* Returns the index of the ARM condition code string in
23087    `arm_condition_codes', or ARM_NV if the comparison is invalid.
23088    COMPARISON should be an rtx like `(eq (...) (...))'.  */
23089 
23090 enum arm_cond_code
23091 maybe_get_arm_condition_code (rtx comparison)
23092 {
23093   machine_mode mode = GET_MODE (XEXP (comparison, 0));
23094   enum arm_cond_code code;
23095   enum rtx_code comp_code = GET_CODE (comparison);
23096 
23097   if (GET_MODE_CLASS (mode) != MODE_CC)
23098     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23099 			   XEXP (comparison, 1));
23100 
23101   switch (mode)
23102     {
23103     case E_CC_DNEmode: code = ARM_NE; goto dominance;
23104     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23105     case E_CC_DGEmode: code = ARM_GE; goto dominance;
23106     case E_CC_DGTmode: code = ARM_GT; goto dominance;
23107     case E_CC_DLEmode: code = ARM_LE; goto dominance;
23108     case E_CC_DLTmode: code = ARM_LT; goto dominance;
23109     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23110     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23111     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23112     case E_CC_DLTUmode: code = ARM_CC;
23113 
23114     dominance:
23115       if (comp_code == EQ)
23116 	return ARM_INVERSE_CONDITION_CODE (code);
23117       if (comp_code == NE)
23118 	return code;
23119       return ARM_NV;
23120 
23121     case E_CC_NOOVmode:
23122       switch (comp_code)
23123 	{
23124 	case NE: return ARM_NE;
23125 	case EQ: return ARM_EQ;
23126 	case GE: return ARM_PL;
23127 	case LT: return ARM_MI;
23128 	default: return ARM_NV;
23129 	}
23130 
23131     case E_CC_Zmode:
23132       switch (comp_code)
23133 	{
23134 	case NE: return ARM_NE;
23135 	case EQ: return ARM_EQ;
23136 	default: return ARM_NV;
23137 	}
23138 
23139     case E_CC_Nmode:
23140       switch (comp_code)
23141 	{
23142 	case NE: return ARM_MI;
23143 	case EQ: return ARM_PL;
23144 	default: return ARM_NV;
23145 	}
23146 
23147     case E_CCFPEmode:
23148     case E_CCFPmode:
23149       /* We can handle all cases except UNEQ and LTGT.  */
23150       switch (comp_code)
23151 	{
23152 	case GE: return ARM_GE;
23153 	case GT: return ARM_GT;
23154 	case LE: return ARM_LS;
23155 	case LT: return ARM_MI;
23156 	case NE: return ARM_NE;
23157 	case EQ: return ARM_EQ;
23158 	case ORDERED: return ARM_VC;
23159 	case UNORDERED: return ARM_VS;
23160 	case UNLT: return ARM_LT;
23161 	case UNLE: return ARM_LE;
23162 	case UNGT: return ARM_HI;
23163 	case UNGE: return ARM_PL;
23164 	  /* UNEQ and LTGT do not have a representation.  */
23165 	case UNEQ: /* Fall through.  */
23166 	case LTGT: /* Fall through.  */
23167 	default: return ARM_NV;
23168 	}
23169 
23170     case E_CC_SWPmode:
23171       switch (comp_code)
23172 	{
23173 	case NE: return ARM_NE;
23174 	case EQ: return ARM_EQ;
23175 	case GE: return ARM_LE;
23176 	case GT: return ARM_LT;
23177 	case LE: return ARM_GE;
23178 	case LT: return ARM_GT;
23179 	case GEU: return ARM_LS;
23180 	case GTU: return ARM_CC;
23181 	case LEU: return ARM_CS;
23182 	case LTU: return ARM_HI;
23183 	default: return ARM_NV;
23184 	}
23185 
23186     case E_CC_Cmode:
23187       switch (comp_code)
23188 	{
23189 	case LTU: return ARM_CS;
23190 	case GEU: return ARM_CC;
23191 	case NE: return ARM_CS;
23192 	case EQ: return ARM_CC;
23193 	default: return ARM_NV;
23194 	}
23195 
23196     case E_CC_CZmode:
23197       switch (comp_code)
23198 	{
23199 	case NE: return ARM_NE;
23200 	case EQ: return ARM_EQ;
23201 	case GEU: return ARM_CS;
23202 	case GTU: return ARM_HI;
23203 	case LEU: return ARM_LS;
23204 	case LTU: return ARM_CC;
23205 	default: return ARM_NV;
23206 	}
23207 
23208     case E_CC_NCVmode:
23209       switch (comp_code)
23210 	{
23211 	case GE: return ARM_GE;
23212 	case LT: return ARM_LT;
23213 	case GEU: return ARM_CS;
23214 	case LTU: return ARM_CC;
23215 	default: return ARM_NV;
23216 	}
23217 
23218     case E_CC_Vmode:
23219       switch (comp_code)
23220 	{
23221 	case NE: return ARM_VS;
23222 	case EQ: return ARM_VC;
23223 	default: return ARM_NV;
23224 	}
23225 
23226     case E_CCmode:
23227       switch (comp_code)
23228 	{
23229 	case NE: return ARM_NE;
23230 	case EQ: return ARM_EQ;
23231 	case GE: return ARM_GE;
23232 	case GT: return ARM_GT;
23233 	case LE: return ARM_LE;
23234 	case LT: return ARM_LT;
23235 	case GEU: return ARM_CS;
23236 	case GTU: return ARM_HI;
23237 	case LEU: return ARM_LS;
23238 	case LTU: return ARM_CC;
23239 	default: return ARM_NV;
23240 	}
23241 
23242     default: gcc_unreachable ();
23243     }
23244 }
23245 
23246 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
23247 static enum arm_cond_code
23248 get_arm_condition_code (rtx comparison)
23249 {
23250   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23251   gcc_assert (code != ARM_NV);
23252   return code;
23253 }
23254 
23255 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
23256    code registers when not targetting Thumb1.  The VFP condition register
23257    only exists when generating hard-float code.  */
23258 static bool
23259 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23260 {
23261   if (!TARGET_32BIT)
23262     return false;
23263 
23264   *p1 = CC_REGNUM;
23265   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23266   return true;
23267 }
23268 
23269 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23270    instructions.  */
23271 void
23272 thumb2_final_prescan_insn (rtx_insn *insn)
23273 {
23274   rtx_insn *first_insn = insn;
23275   rtx body = PATTERN (insn);
23276   rtx predicate;
23277   enum arm_cond_code code;
23278   int n;
23279   int mask;
23280   int max;
23281 
23282   /* max_insns_skipped in the tune was already taken into account in the
23283      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23284      just emit the IT blocks as we can.  It does not make sense to split
23285      the IT blocks.  */
23286   max = MAX_INSN_PER_IT_BLOCK;
23287 
23288   /* Remove the previous insn from the count of insns to be output.  */
23289   if (arm_condexec_count)
23290       arm_condexec_count--;
23291 
23292   /* Nothing to do if we are already inside a conditional block.  */
23293   if (arm_condexec_count)
23294     return;
23295 
23296   if (GET_CODE (body) != COND_EXEC)
23297     return;
23298 
23299   /* Conditional jumps are implemented directly.  */
23300   if (JUMP_P (insn))
23301     return;
23302 
23303   predicate = COND_EXEC_TEST (body);
23304   arm_current_cc = get_arm_condition_code (predicate);
23305 
23306   n = get_attr_ce_count (insn);
23307   arm_condexec_count = 1;
23308   arm_condexec_mask = (1 << n) - 1;
23309   arm_condexec_masklen = n;
23310   /* See if subsequent instructions can be combined into the same block.  */
23311   for (;;)
23312     {
23313       insn = next_nonnote_insn (insn);
23314 
23315       /* Jumping into the middle of an IT block is illegal, so a label or
23316          barrier terminates the block.  */
23317       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23318 	break;
23319 
23320       body = PATTERN (insn);
23321       /* USE and CLOBBER aren't really insns, so just skip them.  */
23322       if (GET_CODE (body) == USE
23323 	  || GET_CODE (body) == CLOBBER)
23324 	continue;
23325 
23326       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23327       if (GET_CODE (body) != COND_EXEC)
23328 	break;
23329       /* Maximum number of conditionally executed instructions in a block.  */
23330       n = get_attr_ce_count (insn);
23331       if (arm_condexec_masklen + n > max)
23332 	break;
23333 
23334       predicate = COND_EXEC_TEST (body);
23335       code = get_arm_condition_code (predicate);
23336       mask = (1 << n) - 1;
23337       if (arm_current_cc == code)
23338 	arm_condexec_mask |= (mask << arm_condexec_masklen);
23339       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23340 	break;
23341 
23342       arm_condexec_count++;
23343       arm_condexec_masklen += n;
23344 
23345       /* A jump must be the last instruction in a conditional block.  */
23346       if (JUMP_P (insn))
23347 	break;
23348     }
23349   /* Restore recog_data (getting the attributes of other insns can
23350      destroy this array, but final.c assumes that it remains intact
23351      across this call).  */
23352   extract_constrain_insn_cached (first_insn);
23353 }
23354 
23355 void
23356 arm_final_prescan_insn (rtx_insn *insn)
23357 {
23358   /* BODY will hold the body of INSN.  */
23359   rtx body = PATTERN (insn);
23360 
23361   /* This will be 1 if trying to repeat the trick, and things need to be
23362      reversed if it appears to fail.  */
23363   int reverse = 0;
23364 
23365   /* If we start with a return insn, we only succeed if we find another one.  */
23366   int seeking_return = 0;
23367   enum rtx_code return_code = UNKNOWN;
23368 
23369   /* START_INSN will hold the insn from where we start looking.  This is the
23370      first insn after the following code_label if REVERSE is true.  */
23371   rtx_insn *start_insn = insn;
23372 
23373   /* If in state 4, check if the target branch is reached, in order to
23374      change back to state 0.  */
23375   if (arm_ccfsm_state == 4)
23376     {
23377       if (insn == arm_target_insn)
23378 	{
23379 	  arm_target_insn = NULL;
23380 	  arm_ccfsm_state = 0;
23381 	}
23382       return;
23383     }
23384 
23385   /* If in state 3, it is possible to repeat the trick, if this insn is an
23386      unconditional branch to a label, and immediately following this branch
23387      is the previous target label which is only used once, and the label this
23388      branch jumps to is not too far off.  */
23389   if (arm_ccfsm_state == 3)
23390     {
23391       if (simplejump_p (insn))
23392 	{
23393 	  start_insn = next_nonnote_insn (start_insn);
23394 	  if (BARRIER_P (start_insn))
23395 	    {
23396 	      /* XXX Isn't this always a barrier?  */
23397 	      start_insn = next_nonnote_insn (start_insn);
23398 	    }
23399 	  if (LABEL_P (start_insn)
23400 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23401 	      && LABEL_NUSES (start_insn) == 1)
23402 	    reverse = TRUE;
23403 	  else
23404 	    return;
23405 	}
23406       else if (ANY_RETURN_P (body))
23407         {
23408 	  start_insn = next_nonnote_insn (start_insn);
23409 	  if (BARRIER_P (start_insn))
23410 	    start_insn = next_nonnote_insn (start_insn);
23411 	  if (LABEL_P (start_insn)
23412 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23413 	      && LABEL_NUSES (start_insn) == 1)
23414 	    {
23415 	      reverse = TRUE;
23416 	      seeking_return = 1;
23417 	      return_code = GET_CODE (body);
23418 	    }
23419 	  else
23420 	    return;
23421         }
23422       else
23423 	return;
23424     }
23425 
23426   gcc_assert (!arm_ccfsm_state || reverse);
23427   if (!JUMP_P (insn))
23428     return;
23429 
23430   /* This jump might be paralleled with a clobber of the condition codes
23431      the jump should always come first */
23432   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23433     body = XVECEXP (body, 0, 0);
23434 
23435   if (reverse
23436       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23437 	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23438     {
23439       int insns_skipped;
23440       int fail = FALSE, succeed = FALSE;
23441       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23442       int then_not_else = TRUE;
23443       rtx_insn *this_insn = start_insn;
23444       rtx label = 0;
23445 
23446       /* Register the insn jumped to.  */
23447       if (reverse)
23448         {
23449 	  if (!seeking_return)
23450 	    label = XEXP (SET_SRC (body), 0);
23451         }
23452       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23453 	label = XEXP (XEXP (SET_SRC (body), 1), 0);
23454       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23455 	{
23456 	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
23457 	  then_not_else = FALSE;
23458 	}
23459       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23460 	{
23461 	  seeking_return = 1;
23462 	  return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23463 	}
23464       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23465         {
23466 	  seeking_return = 1;
23467 	  return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23468 	  then_not_else = FALSE;
23469         }
23470       else
23471 	gcc_unreachable ();
23472 
23473       /* See how many insns this branch skips, and what kind of insns.  If all
23474 	 insns are okay, and the label or unconditional branch to the same
23475 	 label is not too far away, succeed.  */
23476       for (insns_skipped = 0;
23477 	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23478 	{
23479 	  rtx scanbody;
23480 
23481 	  this_insn = next_nonnote_insn (this_insn);
23482 	  if (!this_insn)
23483 	    break;
23484 
23485 	  switch (GET_CODE (this_insn))
23486 	    {
23487 	    case CODE_LABEL:
23488 	      /* Succeed if it is the target label, otherwise fail since
23489 		 control falls in from somewhere else.  */
23490 	      if (this_insn == label)
23491 		{
23492 		  arm_ccfsm_state = 1;
23493 		  succeed = TRUE;
23494 		}
23495 	      else
23496 		fail = TRUE;
23497 	      break;
23498 
23499 	    case BARRIER:
23500 	      /* Succeed if the following insn is the target label.
23501 		 Otherwise fail.
23502 		 If return insns are used then the last insn in a function
23503 		 will be a barrier.  */
23504 	      this_insn = next_nonnote_insn (this_insn);
23505 	      if (this_insn && this_insn == label)
23506 		{
23507 		  arm_ccfsm_state = 1;
23508 		  succeed = TRUE;
23509 		}
23510 	      else
23511 		fail = TRUE;
23512 	      break;
23513 
23514 	    case CALL_INSN:
23515 	      /* The AAPCS says that conditional calls should not be
23516 		 used since they make interworking inefficient (the
23517 		 linker can't transform BL<cond> into BLX).  That's
23518 		 only a problem if the machine has BLX.  */
23519 	      if (arm_arch5)
23520 		{
23521 		  fail = TRUE;
23522 		  break;
23523 		}
23524 
23525 	      /* Succeed if the following insn is the target label, or
23526 		 if the following two insns are a barrier and the
23527 		 target label.  */
23528 	      this_insn = next_nonnote_insn (this_insn);
23529 	      if (this_insn && BARRIER_P (this_insn))
23530 		this_insn = next_nonnote_insn (this_insn);
23531 
23532 	      if (this_insn && this_insn == label
23533 		  && insns_skipped < max_insns_skipped)
23534 		{
23535 		  arm_ccfsm_state = 1;
23536 		  succeed = TRUE;
23537 		}
23538 	      else
23539 		fail = TRUE;
23540 	      break;
23541 
23542 	    case JUMP_INSN:
23543       	      /* If this is an unconditional branch to the same label, succeed.
23544 		 If it is to another label, do nothing.  If it is conditional,
23545 		 fail.  */
23546 	      /* XXX Probably, the tests for SET and the PC are
23547 		 unnecessary.  */
23548 
23549 	      scanbody = PATTERN (this_insn);
23550 	      if (GET_CODE (scanbody) == SET
23551 		  && GET_CODE (SET_DEST (scanbody)) == PC)
23552 		{
23553 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23554 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23555 		    {
23556 		      arm_ccfsm_state = 2;
23557 		      succeed = TRUE;
23558 		    }
23559 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23560 		    fail = TRUE;
23561 		}
23562 	      /* Fail if a conditional return is undesirable (e.g. on a
23563 		 StrongARM), but still allow this if optimizing for size.  */
23564 	      else if (GET_CODE (scanbody) == return_code
23565 		       && !use_return_insn (TRUE, NULL)
23566 		       && !optimize_size)
23567 		fail = TRUE;
23568 	      else if (GET_CODE (scanbody) == return_code)
23569 	        {
23570 		  arm_ccfsm_state = 2;
23571 		  succeed = TRUE;
23572 	        }
23573 	      else if (GET_CODE (scanbody) == PARALLEL)
23574 	        {
23575 		  switch (get_attr_conds (this_insn))
23576 		    {
23577 		    case CONDS_NOCOND:
23578 		      break;
23579 		    default:
23580 		      fail = TRUE;
23581 		      break;
23582 		    }
23583 		}
23584 	      else
23585 		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
23586 
23587 	      break;
23588 
23589 	    case INSN:
23590 	      /* Instructions using or affecting the condition codes make it
23591 		 fail.  */
23592 	      scanbody = PATTERN (this_insn);
23593 	      if (!(GET_CODE (scanbody) == SET
23594 		    || GET_CODE (scanbody) == PARALLEL)
23595 		  || get_attr_conds (this_insn) != CONDS_NOCOND)
23596 		fail = TRUE;
23597 	      break;
23598 
23599 	    default:
23600 	      break;
23601 	    }
23602 	}
23603       if (succeed)
23604 	{
23605 	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23606 	    arm_target_label = CODE_LABEL_NUMBER (label);
23607 	  else
23608 	    {
23609 	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
23610 
23611 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23612 	        {
23613 		  this_insn = next_nonnote_insn (this_insn);
23614 		  gcc_assert (!this_insn
23615 			      || (!BARRIER_P (this_insn)
23616 				  && !LABEL_P (this_insn)));
23617 	        }
23618 	      if (!this_insn)
23619 	        {
23620 		  /* Oh, dear! we ran off the end.. give up.  */
23621 		  extract_constrain_insn_cached (insn);
23622 		  arm_ccfsm_state = 0;
23623 		  arm_target_insn = NULL;
23624 		  return;
23625 	        }
23626 	      arm_target_insn = this_insn;
23627 	    }
23628 
23629 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23630 	     what it was.  */
23631 	  if (!reverse)
23632 	    arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23633 
23634 	  if (reverse || then_not_else)
23635 	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23636 	}
23637 
23638       /* Restore recog_data (getting the attributes of other insns can
23639 	 destroy this array, but final.c assumes that it remains intact
23640 	 across this call.  */
23641       extract_constrain_insn_cached (insn);
23642     }
23643 }
23644 
23645 /* Output IT instructions.  */
23646 void
23647 thumb2_asm_output_opcode (FILE * stream)
23648 {
23649   char buff[5];
23650   int n;
23651 
23652   if (arm_condexec_mask)
23653     {
23654       for (n = 0; n < arm_condexec_masklen; n++)
23655 	buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23656       buff[n] = 0;
23657       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23658 		  arm_condition_codes[arm_current_cc]);
23659       arm_condexec_mask = 0;
23660     }
23661 }
23662 
23663 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23664    UNITS_PER_WORD bytes wide.  */
23665 static unsigned int
23666 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23667 {
23668   if (TARGET_32BIT
23669       && regno > PC_REGNUM
23670       && regno != FRAME_POINTER_REGNUM
23671       && regno != ARG_POINTER_REGNUM
23672       && !IS_VFP_REGNUM (regno))
23673     return 1;
23674 
23675   return ARM_NUM_REGS (mode);
23676 }
23677 
23678 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23679 static bool
23680 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23681 {
23682   if (GET_MODE_CLASS (mode) == MODE_CC)
23683     return (regno == CC_REGNUM
23684 	    || (TARGET_HARD_FLOAT
23685 		&& regno == VFPCC_REGNUM));
23686 
23687   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23688     return false;
23689 
23690   if (TARGET_THUMB1)
23691     /* For the Thumb we only allow values bigger than SImode in
23692        registers 0 - 6, so that there is always a second low
23693        register available to hold the upper part of the value.
23694        We probably we ought to ensure that the register is the
23695        start of an even numbered register pair.  */
23696     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23697 
23698   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23699     {
23700       if (mode == SFmode || mode == SImode)
23701 	return VFP_REGNO_OK_FOR_SINGLE (regno);
23702 
23703       if (mode == DFmode)
23704 	return VFP_REGNO_OK_FOR_DOUBLE (regno);
23705 
23706       if (mode == HFmode)
23707 	return VFP_REGNO_OK_FOR_SINGLE (regno);
23708 
23709       /* VFP registers can hold HImode values.  */
23710       if (mode == HImode)
23711 	return VFP_REGNO_OK_FOR_SINGLE (regno);
23712 
23713       if (TARGET_NEON)
23714         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23715                || (VALID_NEON_QREG_MODE (mode)
23716                    && NEON_REGNO_OK_FOR_QUAD (regno))
23717 	       || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23718 	       || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23719 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23720 	       || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23721 	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23722 
23723       return false;
23724     }
23725 
23726   if (TARGET_REALLY_IWMMXT)
23727     {
23728       if (IS_IWMMXT_GR_REGNUM (regno))
23729 	return mode == SImode;
23730 
23731       if (IS_IWMMXT_REGNUM (regno))
23732 	return VALID_IWMMXT_REG_MODE (mode);
23733     }
23734 
23735   /* We allow almost any value to be stored in the general registers.
23736      Restrict doubleword quantities to even register pairs in ARM state
23737      so that we can use ldrd.  Do not allow very large Neon structure
23738      opaque modes in general registers; they would use too many.  */
23739   if (regno <= LAST_ARM_REGNUM)
23740     {
23741       if (ARM_NUM_REGS (mode) > 4)
23742 	return false;
23743 
23744       if (TARGET_THUMB2)
23745 	return true;
23746 
23747       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23748     }
23749 
23750   if (regno == FRAME_POINTER_REGNUM
23751       || regno == ARG_POINTER_REGNUM)
23752     /* We only allow integers in the fake hard registers.  */
23753     return GET_MODE_CLASS (mode) == MODE_INT;
23754 
23755   return false;
23756 }
23757 
23758 /* Implement TARGET_MODES_TIEABLE_P.  */
23759 
23760 static bool
23761 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23762 {
23763   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23764     return true;
23765 
23766   /* We specifically want to allow elements of "structure" modes to
23767      be tieable to the structure.  This more general condition allows
23768      other rarer situations too.  */
23769   if (TARGET_NEON
23770       && (VALID_NEON_DREG_MODE (mode1)
23771 	  || VALID_NEON_QREG_MODE (mode1)
23772 	  || VALID_NEON_STRUCT_MODE (mode1))
23773       && (VALID_NEON_DREG_MODE (mode2)
23774 	  || VALID_NEON_QREG_MODE (mode2)
23775 	  || VALID_NEON_STRUCT_MODE (mode2)))
23776     return true;
23777 
23778   return false;
23779 }
23780 
23781 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23782    not used in arm mode.  */
23783 
23784 enum reg_class
23785 arm_regno_class (int regno)
23786 {
23787   if (regno == PC_REGNUM)
23788     return NO_REGS;
23789 
23790   if (TARGET_THUMB1)
23791     {
23792       if (regno == STACK_POINTER_REGNUM)
23793 	return STACK_REG;
23794       if (regno == CC_REGNUM)
23795 	return CC_REG;
23796       if (regno < 8)
23797 	return LO_REGS;
23798       return HI_REGS;
23799     }
23800 
23801   if (TARGET_THUMB2 && regno < 8)
23802     return LO_REGS;
23803 
23804   if (   regno <= LAST_ARM_REGNUM
23805       || regno == FRAME_POINTER_REGNUM
23806       || regno == ARG_POINTER_REGNUM)
23807     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23808 
23809   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23810     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23811 
23812   if (IS_VFP_REGNUM (regno))
23813     {
23814       if (regno <= D7_VFP_REGNUM)
23815 	return VFP_D0_D7_REGS;
23816       else if (regno <= LAST_LO_VFP_REGNUM)
23817         return VFP_LO_REGS;
23818       else
23819         return VFP_HI_REGS;
23820     }
23821 
23822   if (IS_IWMMXT_REGNUM (regno))
23823     return IWMMXT_REGS;
23824 
23825   if (IS_IWMMXT_GR_REGNUM (regno))
23826     return IWMMXT_GR_REGS;
23827 
23828   return NO_REGS;
23829 }
23830 
23831 /* Handle a special case when computing the offset
23832    of an argument from the frame pointer.  */
23833 int
23834 arm_debugger_arg_offset (int value, rtx addr)
23835 {
23836   rtx_insn *insn;
23837 
23838   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23839   if (value != 0)
23840     return 0;
23841 
23842   /* We can only cope with the case where the address is held in a register.  */
23843   if (!REG_P (addr))
23844     return 0;
23845 
23846   /* If we are using the frame pointer to point at the argument, then
23847      an offset of 0 is correct.  */
23848   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23849     return 0;
23850 
23851   /* If we are using the stack pointer to point at the
23852      argument, then an offset of 0 is correct.  */
23853   /* ??? Check this is consistent with thumb2 frame layout.  */
23854   if ((TARGET_THUMB || !frame_pointer_needed)
23855       && REGNO (addr) == SP_REGNUM)
23856     return 0;
23857 
23858   /* Oh dear.  The argument is pointed to by a register rather
23859      than being held in a register, or being stored at a known
23860      offset from the frame pointer.  Since GDB only understands
23861      those two kinds of argument we must translate the address
23862      held in the register into an offset from the frame pointer.
23863      We do this by searching through the insns for the function
23864      looking to see where this register gets its value.  If the
23865      register is initialized from the frame pointer plus an offset
23866      then we are in luck and we can continue, otherwise we give up.
23867 
23868      This code is exercised by producing debugging information
23869      for a function with arguments like this:
23870 
23871            double func (double a, double b, int c, double d) {return d;}
23872 
23873      Without this code the stab for parameter 'd' will be set to
23874      an offset of 0 from the frame pointer, rather than 8.  */
23875 
23876   /* The if() statement says:
23877 
23878      If the insn is a normal instruction
23879      and if the insn is setting the value in a register
23880      and if the register being set is the register holding the address of the argument
23881      and if the address is computing by an addition
23882      that involves adding to a register
23883      which is the frame pointer
23884      a constant integer
23885 
23886      then...  */
23887 
23888   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23889     {
23890       if (   NONJUMP_INSN_P (insn)
23891 	  && GET_CODE (PATTERN (insn)) == SET
23892 	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23893 	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23894 	  && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23895 	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23896 	  && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23897 	     )
23898 	{
23899 	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23900 
23901 	  break;
23902 	}
23903     }
23904 
23905   if (value == 0)
23906     {
23907       debug_rtx (addr);
23908       warning (0, "unable to compute real location of stacked parameter");
23909       value = 8; /* XXX magic hack */
23910     }
23911 
23912   return value;
23913 }
23914 
23915 /* Implement TARGET_PROMOTED_TYPE.  */
23916 
23917 static tree
23918 arm_promoted_type (const_tree t)
23919 {
23920   if (SCALAR_FLOAT_TYPE_P (t)
23921       && TYPE_PRECISION (t) == 16
23922       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23923     return float_type_node;
23924   return NULL_TREE;
23925 }
23926 
23927 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23928    This simply adds HFmode as a supported mode; even though we don't
23929    implement arithmetic on this type directly, it's supported by
23930    optabs conversions, much the way the double-word arithmetic is
23931    special-cased in the default hook.  */
23932 
23933 static bool
23934 arm_scalar_mode_supported_p (scalar_mode mode)
23935 {
23936   if (mode == HFmode)
23937     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23938   else if (ALL_FIXED_POINT_MODE_P (mode))
23939     return true;
23940   else
23941     return default_scalar_mode_supported_p (mode);
23942 }
23943 
23944 /* Set the value of FLT_EVAL_METHOD.
23945    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23946 
23947     0: evaluate all operations and constants, whose semantic type has at
23948        most the range and precision of type float, to the range and
23949        precision of float; evaluate all other operations and constants to
23950        the range and precision of the semantic type;
23951 
23952     N, where _FloatN is a supported interchange floating type
23953        evaluate all operations and constants, whose semantic type has at
23954        most the range and precision of _FloatN type, to the range and
23955        precision of the _FloatN type; evaluate all other operations and
23956        constants to the range and precision of the semantic type;
23957 
23958    If we have the ARMv8.2-A extensions then we support _Float16 in native
23959    precision, so we should set this to 16.  Otherwise, we support the type,
23960    but want to evaluate expressions in float precision, so set this to
23961    0.  */
23962 
23963 static enum flt_eval_method
23964 arm_excess_precision (enum excess_precision_type type)
23965 {
23966   switch (type)
23967     {
23968       case EXCESS_PRECISION_TYPE_FAST:
23969       case EXCESS_PRECISION_TYPE_STANDARD:
23970 	/* We can calculate either in 16-bit range and precision or
23971 	   32-bit range and precision.  Make that decision based on whether
23972 	   we have native support for the ARMv8.2-A 16-bit floating-point
23973 	   instructions or not.  */
23974 	return (TARGET_VFP_FP16INST
23975 		? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23976 		: FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23977       case EXCESS_PRECISION_TYPE_IMPLICIT:
23978 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23979       default:
23980 	gcc_unreachable ();
23981     }
23982   return FLT_EVAL_METHOD_UNPREDICTABLE;
23983 }
23984 
23985 
23986 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23987    _Float16 if we are using anything other than ieee format for 16-bit
23988    floating point.  Otherwise, punt to the default implementation.  */
23989 static opt_scalar_float_mode
23990 arm_floatn_mode (int n, bool extended)
23991 {
23992   if (!extended && n == 16)
23993     {
23994       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23995 	return HFmode;
23996       return opt_scalar_float_mode ();
23997     }
23998 
23999   return default_floatn_mode (n, extended);
24000 }
24001 
24002 
24003 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24004    not to early-clobber SRC registers in the process.
24005 
24006    We assume that the operands described by SRC and DEST represent a
24007    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
24008    number of components into which the copy has been decomposed.  */
24009 void
24010 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24011 {
24012   unsigned int i;
24013 
24014   if (!reg_overlap_mentioned_p (operands[0], operands[1])
24015       || REGNO (operands[0]) < REGNO (operands[1]))
24016     {
24017       for (i = 0; i < count; i++)
24018 	{
24019 	  operands[2 * i] = dest[i];
24020 	  operands[2 * i + 1] = src[i];
24021 	}
24022     }
24023   else
24024     {
24025       for (i = 0; i < count; i++)
24026 	{
24027 	  operands[2 * i] = dest[count - i - 1];
24028 	  operands[2 * i + 1] = src[count - i - 1];
24029 	}
24030     }
24031 }
24032 
24033 /* Split operands into moves from op[1] + op[2] into op[0].  */
24034 
24035 void
24036 neon_split_vcombine (rtx operands[3])
24037 {
24038   unsigned int dest = REGNO (operands[0]);
24039   unsigned int src1 = REGNO (operands[1]);
24040   unsigned int src2 = REGNO (operands[2]);
24041   machine_mode halfmode = GET_MODE (operands[1]);
24042   unsigned int halfregs = REG_NREGS (operands[1]);
24043   rtx destlo, desthi;
24044 
24045   if (src1 == dest && src2 == dest + halfregs)
24046     {
24047       /* No-op move.  Can't split to nothing; emit something.  */
24048       emit_note (NOTE_INSN_DELETED);
24049       return;
24050     }
24051 
24052   /* Preserve register attributes for variable tracking.  */
24053   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24054   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24055 			       GET_MODE_SIZE (halfmode));
24056 
24057   /* Special case of reversed high/low parts.  Use VSWP.  */
24058   if (src2 == dest && src1 == dest + halfregs)
24059     {
24060       rtx x = gen_rtx_SET (destlo, operands[1]);
24061       rtx y = gen_rtx_SET (desthi, operands[2]);
24062       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24063       return;
24064     }
24065 
24066   if (!reg_overlap_mentioned_p (operands[2], destlo))
24067     {
24068       /* Try to avoid unnecessary moves if part of the result
24069 	 is in the right place already.  */
24070       if (src1 != dest)
24071 	emit_move_insn (destlo, operands[1]);
24072       if (src2 != dest + halfregs)
24073 	emit_move_insn (desthi, operands[2]);
24074     }
24075   else
24076     {
24077       if (src2 != dest + halfregs)
24078 	emit_move_insn (desthi, operands[2]);
24079       if (src1 != dest)
24080 	emit_move_insn (destlo, operands[1]);
24081     }
24082 }
24083 
24084 /* Return the number (counting from 0) of
24085    the least significant set bit in MASK.  */
24086 
24087 inline static int
24088 number_of_first_bit_set (unsigned mask)
24089 {
24090   return ctz_hwi (mask);
24091 }
24092 
24093 /* Like emit_multi_reg_push, but allowing for a different set of
24094    registers to be described as saved.  MASK is the set of registers
24095    to be saved; REAL_REGS is the set of registers to be described as
24096    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
24097 
24098 static rtx_insn *
24099 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24100 {
24101   unsigned long regno;
24102   rtx par[10], tmp, reg;
24103   rtx_insn *insn;
24104   int i, j;
24105 
24106   /* Build the parallel of the registers actually being stored.  */
24107   for (i = 0; mask; ++i, mask &= mask - 1)
24108     {
24109       regno = ctz_hwi (mask);
24110       reg = gen_rtx_REG (SImode, regno);
24111 
24112       if (i == 0)
24113 	tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24114       else
24115 	tmp = gen_rtx_USE (VOIDmode, reg);
24116 
24117       par[i] = tmp;
24118     }
24119 
24120   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24121   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24122   tmp = gen_frame_mem (BLKmode, tmp);
24123   tmp = gen_rtx_SET (tmp, par[0]);
24124   par[0] = tmp;
24125 
24126   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24127   insn = emit_insn (tmp);
24128 
24129   /* Always build the stack adjustment note for unwind info.  */
24130   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24131   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24132   par[0] = tmp;
24133 
24134   /* Build the parallel of the registers recorded as saved for unwind.  */
24135   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24136     {
24137       regno = ctz_hwi (real_regs);
24138       reg = gen_rtx_REG (SImode, regno);
24139 
24140       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24141       tmp = gen_frame_mem (SImode, tmp);
24142       tmp = gen_rtx_SET (tmp, reg);
24143       RTX_FRAME_RELATED_P (tmp) = 1;
24144       par[j + 1] = tmp;
24145     }
24146 
24147   if (j == 0)
24148     tmp = par[0];
24149   else
24150     {
24151       RTX_FRAME_RELATED_P (par[0]) = 1;
24152       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24153     }
24154 
24155   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24156 
24157   return insn;
24158 }
24159 
24160 /* Emit code to push or pop registers to or from the stack.  F is the
24161    assembly file.  MASK is the registers to pop.  */
24162 static void
24163 thumb_pop (FILE *f, unsigned long mask)
24164 {
24165   int regno;
24166   int lo_mask = mask & 0xFF;
24167 
24168   gcc_assert (mask);
24169 
24170   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24171     {
24172       /* Special case.  Do not generate a POP PC statement here, do it in
24173 	 thumb_exit() */
24174       thumb_exit (f, -1);
24175       return;
24176     }
24177 
24178   fprintf (f, "\tpop\t{");
24179 
24180   /* Look at the low registers first.  */
24181   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24182     {
24183       if (lo_mask & 1)
24184 	{
24185 	  asm_fprintf (f, "%r", regno);
24186 
24187 	  if ((lo_mask & ~1) != 0)
24188 	    fprintf (f, ", ");
24189 	}
24190     }
24191 
24192   if (mask & (1 << PC_REGNUM))
24193     {
24194       /* Catch popping the PC.  */
24195       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24196 	  || IS_CMSE_ENTRY (arm_current_func_type ()))
24197 	{
24198 	  /* The PC is never poped directly, instead
24199 	     it is popped into r3 and then BX is used.  */
24200 	  fprintf (f, "}\n");
24201 
24202 	  thumb_exit (f, -1);
24203 
24204 	  return;
24205 	}
24206       else
24207 	{
24208 	  if (mask & 0xFF)
24209 	    fprintf (f, ", ");
24210 
24211 	  asm_fprintf (f, "%r", PC_REGNUM);
24212 	}
24213     }
24214 
24215   fprintf (f, "}\n");
24216 }
24217 
24218 /* Generate code to return from a thumb function.
24219    If 'reg_containing_return_addr' is -1, then the return address is
24220    actually on the stack, at the stack pointer.
24221 
24222    Note: do not forget to update length attribute of corresponding insn pattern
24223    when changing assembly output (eg. length attribute of epilogue_insns when
24224    updating Armv8-M Baseline Security Extensions register clearing
24225    sequences).  */
24226 static void
24227 thumb_exit (FILE *f, int reg_containing_return_addr)
24228 {
24229   unsigned regs_available_for_popping;
24230   unsigned regs_to_pop;
24231   int pops_needed;
24232   unsigned available;
24233   unsigned required;
24234   machine_mode mode;
24235   int size;
24236   int restore_a4 = FALSE;
24237 
24238   /* Compute the registers we need to pop.  */
24239   regs_to_pop = 0;
24240   pops_needed = 0;
24241 
24242   if (reg_containing_return_addr == -1)
24243     {
24244       regs_to_pop |= 1 << LR_REGNUM;
24245       ++pops_needed;
24246     }
24247 
24248   if (TARGET_BACKTRACE)
24249     {
24250       /* Restore the (ARM) frame pointer and stack pointer.  */
24251       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24252       pops_needed += 2;
24253     }
24254 
24255   /* If there is nothing to pop then just emit the BX instruction and
24256      return.  */
24257   if (pops_needed == 0)
24258     {
24259       if (crtl->calls_eh_return)
24260 	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24261 
24262       if (IS_CMSE_ENTRY (arm_current_func_type ()))
24263 	{
24264 	  asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24265 		       reg_containing_return_addr);
24266 	  asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24267 	}
24268       else
24269 	asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24270       return;
24271     }
24272   /* Otherwise if we are not supporting interworking and we have not created
24273      a backtrace structure and the function was not entered in ARM mode then
24274      just pop the return address straight into the PC.  */
24275   else if (!TARGET_INTERWORK
24276 	   && !TARGET_BACKTRACE
24277 	   && !is_called_in_ARM_mode (current_function_decl)
24278 	   && !crtl->calls_eh_return
24279 	   && !IS_CMSE_ENTRY (arm_current_func_type ()))
24280     {
24281       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24282       return;
24283     }
24284 
24285   /* Find out how many of the (return) argument registers we can corrupt.  */
24286   regs_available_for_popping = 0;
24287 
24288   /* If returning via __builtin_eh_return, the bottom three registers
24289      all contain information needed for the return.  */
24290   if (crtl->calls_eh_return)
24291     size = 12;
24292   else
24293     {
24294       /* If we can deduce the registers used from the function's
24295 	 return value.  This is more reliable that examining
24296 	 df_regs_ever_live_p () because that will be set if the register is
24297 	 ever used in the function, not just if the register is used
24298 	 to hold a return value.  */
24299 
24300       if (crtl->return_rtx != 0)
24301 	mode = GET_MODE (crtl->return_rtx);
24302       else
24303 	mode = DECL_MODE (DECL_RESULT (current_function_decl));
24304 
24305       size = GET_MODE_SIZE (mode);
24306 
24307       if (size == 0)
24308 	{
24309 	  /* In a void function we can use any argument register.
24310 	     In a function that returns a structure on the stack
24311 	     we can use the second and third argument registers.  */
24312 	  if (mode == VOIDmode)
24313 	    regs_available_for_popping =
24314 	      (1 << ARG_REGISTER (1))
24315 	      | (1 << ARG_REGISTER (2))
24316 	      | (1 << ARG_REGISTER (3));
24317 	  else
24318 	    regs_available_for_popping =
24319 	      (1 << ARG_REGISTER (2))
24320 	      | (1 << ARG_REGISTER (3));
24321 	}
24322       else if (size <= 4)
24323 	regs_available_for_popping =
24324 	  (1 << ARG_REGISTER (2))
24325 	  | (1 << ARG_REGISTER (3));
24326       else if (size <= 8)
24327 	regs_available_for_popping =
24328 	  (1 << ARG_REGISTER (3));
24329     }
24330 
24331   /* Match registers to be popped with registers into which we pop them.  */
24332   for (available = regs_available_for_popping,
24333        required  = regs_to_pop;
24334        required != 0 && available != 0;
24335        available &= ~(available & - available),
24336        required  &= ~(required  & - required))
24337     -- pops_needed;
24338 
24339   /* If we have any popping registers left over, remove them.  */
24340   if (available > 0)
24341     regs_available_for_popping &= ~available;
24342 
24343   /* Otherwise if we need another popping register we can use
24344      the fourth argument register.  */
24345   else if (pops_needed)
24346     {
24347       /* If we have not found any free argument registers and
24348 	 reg a4 contains the return address, we must move it.  */
24349       if (regs_available_for_popping == 0
24350 	  && reg_containing_return_addr == LAST_ARG_REGNUM)
24351 	{
24352 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24353 	  reg_containing_return_addr = LR_REGNUM;
24354 	}
24355       else if (size > 12)
24356 	{
24357 	  /* Register a4 is being used to hold part of the return value,
24358 	     but we have dire need of a free, low register.  */
24359 	  restore_a4 = TRUE;
24360 
24361 	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24362 	}
24363 
24364       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24365 	{
24366 	  /* The fourth argument register is available.  */
24367 	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24368 
24369 	  --pops_needed;
24370 	}
24371     }
24372 
24373   /* Pop as many registers as we can.  */
24374   thumb_pop (f, regs_available_for_popping);
24375 
24376   /* Process the registers we popped.  */
24377   if (reg_containing_return_addr == -1)
24378     {
24379       /* The return address was popped into the lowest numbered register.  */
24380       regs_to_pop &= ~(1 << LR_REGNUM);
24381 
24382       reg_containing_return_addr =
24383 	number_of_first_bit_set (regs_available_for_popping);
24384 
24385       /* Remove this register for the mask of available registers, so that
24386          the return address will not be corrupted by further pops.  */
24387       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24388     }
24389 
24390   /* If we popped other registers then handle them here.  */
24391   if (regs_available_for_popping)
24392     {
24393       int frame_pointer;
24394 
24395       /* Work out which register currently contains the frame pointer.  */
24396       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24397 
24398       /* Move it into the correct place.  */
24399       asm_fprintf (f, "\tmov\t%r, %r\n",
24400 		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24401 
24402       /* (Temporarily) remove it from the mask of popped registers.  */
24403       regs_available_for_popping &= ~(1 << frame_pointer);
24404       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24405 
24406       if (regs_available_for_popping)
24407 	{
24408 	  int stack_pointer;
24409 
24410 	  /* We popped the stack pointer as well,
24411 	     find the register that contains it.  */
24412 	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24413 
24414 	  /* Move it into the stack register.  */
24415 	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24416 
24417 	  /* At this point we have popped all necessary registers, so
24418 	     do not worry about restoring regs_available_for_popping
24419 	     to its correct value:
24420 
24421 	     assert (pops_needed == 0)
24422 	     assert (regs_available_for_popping == (1 << frame_pointer))
24423 	     assert (regs_to_pop == (1 << STACK_POINTER))  */
24424 	}
24425       else
24426 	{
24427 	  /* Since we have just move the popped value into the frame
24428 	     pointer, the popping register is available for reuse, and
24429 	     we know that we still have the stack pointer left to pop.  */
24430 	  regs_available_for_popping |= (1 << frame_pointer);
24431 	}
24432     }
24433 
24434   /* If we still have registers left on the stack, but we no longer have
24435      any registers into which we can pop them, then we must move the return
24436      address into the link register and make available the register that
24437      contained it.  */
24438   if (regs_available_for_popping == 0 && pops_needed > 0)
24439     {
24440       regs_available_for_popping |= 1 << reg_containing_return_addr;
24441 
24442       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24443 		   reg_containing_return_addr);
24444 
24445       reg_containing_return_addr = LR_REGNUM;
24446     }
24447 
24448   /* If we have registers left on the stack then pop some more.
24449      We know that at most we will want to pop FP and SP.  */
24450   if (pops_needed > 0)
24451     {
24452       int  popped_into;
24453       int  move_to;
24454 
24455       thumb_pop (f, regs_available_for_popping);
24456 
24457       /* We have popped either FP or SP.
24458 	 Move whichever one it is into the correct register.  */
24459       popped_into = number_of_first_bit_set (regs_available_for_popping);
24460       move_to     = number_of_first_bit_set (regs_to_pop);
24461 
24462       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24463       --pops_needed;
24464     }
24465 
24466   /* If we still have not popped everything then we must have only
24467      had one register available to us and we are now popping the SP.  */
24468   if (pops_needed > 0)
24469     {
24470       int  popped_into;
24471 
24472       thumb_pop (f, regs_available_for_popping);
24473 
24474       popped_into = number_of_first_bit_set (regs_available_for_popping);
24475 
24476       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24477       /*
24478 	assert (regs_to_pop == (1 << STACK_POINTER))
24479 	assert (pops_needed == 1)
24480       */
24481     }
24482 
24483   /* If necessary restore the a4 register.  */
24484   if (restore_a4)
24485     {
24486       if (reg_containing_return_addr != LR_REGNUM)
24487 	{
24488 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24489 	  reg_containing_return_addr = LR_REGNUM;
24490 	}
24491 
24492       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24493     }
24494 
24495   if (crtl->calls_eh_return)
24496     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24497 
24498   /* Return to caller.  */
24499   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24500     {
24501       /* This is for the cases where LR is not being used to contain the return
24502          address.  It may therefore contain information that we might not want
24503 	 to leak, hence it must be cleared.  The value in R0 will never be a
24504 	 secret at this point, so it is safe to use it, see the clearing code
24505 	 in 'cmse_nonsecure_entry_clear_before_return'.  */
24506       if (reg_containing_return_addr != LR_REGNUM)
24507 	asm_fprintf (f, "\tmov\tlr, r0\n");
24508 
24509       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24510       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24511     }
24512   else
24513     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24514 }
24515 
24516 /* Scan INSN just before assembler is output for it.
24517    For Thumb-1, we track the status of the condition codes; this
24518    information is used in the cbranchsi4_insn pattern.  */
24519 void
24520 thumb1_final_prescan_insn (rtx_insn *insn)
24521 {
24522   if (flag_print_asm_name)
24523     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24524 		 INSN_ADDRESSES (INSN_UID (insn)));
24525   /* Don't overwrite the previous setter when we get to a cbranch.  */
24526   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24527     {
24528       enum attr_conds conds;
24529 
24530       if (cfun->machine->thumb1_cc_insn)
24531 	{
24532 	  if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24533 	      || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24534 	    CC_STATUS_INIT;
24535 	}
24536       conds = get_attr_conds (insn);
24537       if (conds == CONDS_SET)
24538 	{
24539 	  rtx set = single_set (insn);
24540 	  cfun->machine->thumb1_cc_insn = insn;
24541 	  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24542 	  cfun->machine->thumb1_cc_op1 = const0_rtx;
24543 	  cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24544 	  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24545 	    {
24546 	      rtx src1 = XEXP (SET_SRC (set), 1);
24547 	      if (src1 == const0_rtx)
24548 		cfun->machine->thumb1_cc_mode = CCmode;
24549 	    }
24550 	  else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24551 	    {
24552 	      /* Record the src register operand instead of dest because
24553 		 cprop_hardreg pass propagates src.  */
24554 	      cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24555 	    }
24556 	}
24557       else if (conds != CONDS_NOCOND)
24558 	cfun->machine->thumb1_cc_insn = NULL_RTX;
24559     }
24560 
24561     /* Check if unexpected far jump is used.  */
24562     if (cfun->machine->lr_save_eliminated
24563         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24564       internal_error("Unexpected thumb1 far jump");
24565 }
24566 
24567 int
24568 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24569 {
24570   unsigned HOST_WIDE_INT mask = 0xff;
24571   int i;
24572 
24573   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24574   if (val == 0) /* XXX */
24575     return 0;
24576 
24577   for (i = 0; i < 25; i++)
24578     if ((val & (mask << i)) == val)
24579       return 1;
24580 
24581   return 0;
24582 }
24583 
24584 /* Returns nonzero if the current function contains,
24585    or might contain a far jump.  */
24586 static int
24587 thumb_far_jump_used_p (void)
24588 {
24589   rtx_insn *insn;
24590   bool far_jump = false;
24591   unsigned int func_size = 0;
24592 
24593   /* If we have already decided that far jumps may be used,
24594      do not bother checking again, and always return true even if
24595      it turns out that they are not being used.  Once we have made
24596      the decision that far jumps are present (and that hence the link
24597      register will be pushed onto the stack) we cannot go back on it.  */
24598   if (cfun->machine->far_jump_used)
24599     return 1;
24600 
24601   /* If this function is not being called from the prologue/epilogue
24602      generation code then it must be being called from the
24603      INITIAL_ELIMINATION_OFFSET macro.  */
24604   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24605     {
24606       /* In this case we know that we are being asked about the elimination
24607 	 of the arg pointer register.  If that register is not being used,
24608 	 then there are no arguments on the stack, and we do not have to
24609 	 worry that a far jump might force the prologue to push the link
24610 	 register, changing the stack offsets.  In this case we can just
24611 	 return false, since the presence of far jumps in the function will
24612 	 not affect stack offsets.
24613 
24614 	 If the arg pointer is live (or if it was live, but has now been
24615 	 eliminated and so set to dead) then we do have to test to see if
24616 	 the function might contain a far jump.  This test can lead to some
24617 	 false negatives, since before reload is completed, then length of
24618 	 branch instructions is not known, so gcc defaults to returning their
24619 	 longest length, which in turn sets the far jump attribute to true.
24620 
24621 	 A false negative will not result in bad code being generated, but it
24622 	 will result in a needless push and pop of the link register.  We
24623 	 hope that this does not occur too often.
24624 
24625 	 If we need doubleword stack alignment this could affect the other
24626 	 elimination offsets so we can't risk getting it wrong.  */
24627       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24628 	cfun->machine->arg_pointer_live = 1;
24629       else if (!cfun->machine->arg_pointer_live)
24630 	return 0;
24631     }
24632 
24633   /* We should not change far_jump_used during or after reload, as there is
24634      no chance to change stack frame layout.  */
24635   if (reload_in_progress || reload_completed)
24636     return 0;
24637 
24638   /* Check to see if the function contains a branch
24639      insn with the far jump attribute set.  */
24640   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24641     {
24642       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24643 	{
24644 	  far_jump = true;
24645 	}
24646       func_size += get_attr_length (insn);
24647     }
24648 
24649   /* Attribute far_jump will always be true for thumb1 before
24650      shorten_branch pass.  So checking far_jump attribute before
24651      shorten_branch isn't much useful.
24652 
24653      Following heuristic tries to estimate more accurately if a far jump
24654      may finally be used.  The heuristic is very conservative as there is
24655      no chance to roll-back the decision of not to use far jump.
24656 
24657      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24658      2-byte insn is associated with a 4 byte constant pool.  Using
24659      function size 2048/3 as the threshold is conservative enough.  */
24660   if (far_jump)
24661     {
24662       if ((func_size * 3) >= 2048)
24663         {
24664 	  /* Record the fact that we have decided that
24665 	     the function does use far jumps.  */
24666 	  cfun->machine->far_jump_used = 1;
24667 	  return 1;
24668 	}
24669     }
24670 
24671   return 0;
24672 }
24673 
24674 /* Return nonzero if FUNC must be entered in ARM mode.  */
24675 static bool
24676 is_called_in_ARM_mode (tree func)
24677 {
24678   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24679 
24680   /* Ignore the problem about functions whose address is taken.  */
24681   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24682     return true;
24683 
24684 #ifdef ARM_PE
24685   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24686 #else
24687   return false;
24688 #endif
24689 }
24690 
24691 /* Given the stack offsets and register mask in OFFSETS, decide how
24692    many additional registers to push instead of subtracting a constant
24693    from SP.  For epilogues the principle is the same except we use pop.
24694    FOR_PROLOGUE indicates which we're generating.  */
24695 static int
24696 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24697 {
24698   HOST_WIDE_INT amount;
24699   unsigned long live_regs_mask = offsets->saved_regs_mask;
24700   /* Extract a mask of the ones we can give to the Thumb's push/pop
24701      instruction.  */
24702   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24703   /* Then count how many other high registers will need to be pushed.  */
24704   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24705   int n_free, reg_base, size;
24706 
24707   if (!for_prologue && frame_pointer_needed)
24708     amount = offsets->locals_base - offsets->saved_regs;
24709   else
24710     amount = offsets->outgoing_args - offsets->saved_regs;
24711 
24712   /* If the stack frame size is 512 exactly, we can save one load
24713      instruction, which should make this a win even when optimizing
24714      for speed.  */
24715   if (!optimize_size && amount != 512)
24716     return 0;
24717 
24718   /* Can't do this if there are high registers to push.  */
24719   if (high_regs_pushed != 0)
24720     return 0;
24721 
24722   /* Shouldn't do it in the prologue if no registers would normally
24723      be pushed at all.  In the epilogue, also allow it if we'll have
24724      a pop insn for the PC.  */
24725   if  (l_mask == 0
24726        && (for_prologue
24727 	   || TARGET_BACKTRACE
24728 	   || (live_regs_mask & 1 << LR_REGNUM) == 0
24729 	   || TARGET_INTERWORK
24730 	   || crtl->args.pretend_args_size != 0))
24731     return 0;
24732 
24733   /* Don't do this if thumb_expand_prologue wants to emit instructions
24734      between the push and the stack frame allocation.  */
24735   if (for_prologue
24736       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24737 	  || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24738     return 0;
24739 
24740   reg_base = 0;
24741   n_free = 0;
24742   if (!for_prologue)
24743     {
24744       size = arm_size_return_regs ();
24745       reg_base = ARM_NUM_INTS (size);
24746       live_regs_mask >>= reg_base;
24747     }
24748 
24749   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24750 	 && (for_prologue || call_used_regs[reg_base + n_free]))
24751     {
24752       live_regs_mask >>= 1;
24753       n_free++;
24754     }
24755 
24756   if (n_free == 0)
24757     return 0;
24758   gcc_assert (amount / 4 * 4 == amount);
24759 
24760   if (amount >= 512 && (amount - n_free * 4) < 512)
24761     return (amount - 508) / 4;
24762   if (amount <= n_free * 4)
24763     return amount / 4;
24764   return 0;
24765 }
24766 
24767 /* The bits which aren't usefully expanded as rtl.  */
24768 const char *
24769 thumb1_unexpanded_epilogue (void)
24770 {
24771   arm_stack_offsets *offsets;
24772   int regno;
24773   unsigned long live_regs_mask = 0;
24774   int high_regs_pushed = 0;
24775   int extra_pop;
24776   int had_to_push_lr;
24777   int size;
24778 
24779   if (cfun->machine->return_used_this_function != 0)
24780     return "";
24781 
24782   if (IS_NAKED (arm_current_func_type ()))
24783     return "";
24784 
24785   offsets = arm_get_frame_offsets ();
24786   live_regs_mask = offsets->saved_regs_mask;
24787   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24788 
24789   /* If we can deduce the registers used from the function's return value.
24790      This is more reliable that examining df_regs_ever_live_p () because that
24791      will be set if the register is ever used in the function, not just if
24792      the register is used to hold a return value.  */
24793   size = arm_size_return_regs ();
24794 
24795   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24796   if (extra_pop > 0)
24797     {
24798       unsigned long extra_mask = (1 << extra_pop) - 1;
24799       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24800     }
24801 
24802   /* The prolog may have pushed some high registers to use as
24803      work registers.  e.g. the testsuite file:
24804      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24805      compiles to produce:
24806 	push	{r4, r5, r6, r7, lr}
24807 	mov	r7, r9
24808 	mov	r6, r8
24809 	push	{r6, r7}
24810      as part of the prolog.  We have to undo that pushing here.  */
24811 
24812   if (high_regs_pushed)
24813     {
24814       unsigned long mask = live_regs_mask & 0xff;
24815       int next_hi_reg;
24816 
24817       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
24818 
24819       if (mask == 0)
24820 	/* Oh dear!  We have no low registers into which we can pop
24821            high registers!  */
24822 	internal_error
24823 	  ("no low registers available for popping high registers");
24824 
24825       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24826 	if (live_regs_mask & (1 << next_hi_reg))
24827 	  break;
24828 
24829       while (high_regs_pushed)
24830 	{
24831 	  /* Find lo register(s) into which the high register(s) can
24832              be popped.  */
24833 	  for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
24834 	    {
24835 	      if (mask & (1 << regno))
24836 		high_regs_pushed--;
24837 	      if (high_regs_pushed == 0)
24838 		break;
24839 	    }
24840 
24841 	  if (high_regs_pushed == 0 && regno >= 0)
24842 	    mask &= ~((1 << regno) - 1);
24843 
24844 	  /* Pop the values into the low register(s).  */
24845 	  thumb_pop (asm_out_file, mask);
24846 
24847 	  /* Move the value(s) into the high registers.  */
24848 	  for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
24849 	    {
24850 	      if (mask & (1 << regno))
24851 		{
24852 		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24853 			       regno);
24854 
24855 		  for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
24856 		       next_hi_reg--)
24857 		    if (live_regs_mask & (1 << next_hi_reg))
24858 		      break;
24859 		}
24860 	    }
24861 	}
24862       live_regs_mask &= ~0x0f00;
24863     }
24864 
24865   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24866   live_regs_mask &= 0xff;
24867 
24868   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24869     {
24870       /* Pop the return address into the PC.  */
24871       if (had_to_push_lr)
24872 	live_regs_mask |= 1 << PC_REGNUM;
24873 
24874       /* Either no argument registers were pushed or a backtrace
24875 	 structure was created which includes an adjusted stack
24876 	 pointer, so just pop everything.  */
24877       if (live_regs_mask)
24878 	thumb_pop (asm_out_file, live_regs_mask);
24879 
24880       /* We have either just popped the return address into the
24881 	 PC or it is was kept in LR for the entire function.
24882 	 Note that thumb_pop has already called thumb_exit if the
24883 	 PC was in the list.  */
24884       if (!had_to_push_lr)
24885 	thumb_exit (asm_out_file, LR_REGNUM);
24886     }
24887   else
24888     {
24889       /* Pop everything but the return address.  */
24890       if (live_regs_mask)
24891 	thumb_pop (asm_out_file, live_regs_mask);
24892 
24893       if (had_to_push_lr)
24894 	{
24895 	  if (size > 12)
24896 	    {
24897 	      /* We have no free low regs, so save one.  */
24898 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24899 			   LAST_ARG_REGNUM);
24900 	    }
24901 
24902 	  /* Get the return address into a temporary register.  */
24903 	  thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24904 
24905 	  if (size > 12)
24906 	    {
24907 	      /* Move the return address to lr.  */
24908 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24909 			   LAST_ARG_REGNUM);
24910 	      /* Restore the low register.  */
24911 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24912 			   IP_REGNUM);
24913 	      regno = LR_REGNUM;
24914 	    }
24915 	  else
24916 	    regno = LAST_ARG_REGNUM;
24917 	}
24918       else
24919 	regno = LR_REGNUM;
24920 
24921       /* Remove the argument registers that were pushed onto the stack.  */
24922       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24923 		   SP_REGNUM, SP_REGNUM,
24924 		   crtl->args.pretend_args_size);
24925 
24926       thumb_exit (asm_out_file, regno);
24927     }
24928 
24929   return "";
24930 }
24931 
24932 /* Functions to save and restore machine-specific function data.  */
24933 static struct machine_function *
24934 arm_init_machine_status (void)
24935 {
24936   struct machine_function *machine;
24937   machine = ggc_cleared_alloc<machine_function> ();
24938 
24939 #if ARM_FT_UNKNOWN != 0
24940   machine->func_type = ARM_FT_UNKNOWN;
24941 #endif
24942   machine->static_chain_stack_bytes = -1;
24943   return machine;
24944 }
24945 
24946 /* Return an RTX indicating where the return address to the
24947    calling function can be found.  */
24948 rtx
24949 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24950 {
24951   if (count != 0)
24952     return NULL_RTX;
24953 
24954   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24955 }
24956 
24957 /* Do anything needed before RTL is emitted for each function.  */
24958 void
24959 arm_init_expanders (void)
24960 {
24961   /* Arrange to initialize and mark the machine per-function status.  */
24962   init_machine_status = arm_init_machine_status;
24963 
24964   /* This is to stop the combine pass optimizing away the alignment
24965      adjustment of va_arg.  */
24966   /* ??? It is claimed that this should not be necessary.  */
24967   if (cfun)
24968     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24969 }
24970 
24971 /* Check that FUNC is called with a different mode.  */
24972 
24973 bool
24974 arm_change_mode_p (tree func)
24975 {
24976   if (TREE_CODE (func) != FUNCTION_DECL)
24977     return false;
24978 
24979   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24980 
24981   if (!callee_tree)
24982     callee_tree = target_option_default_node;
24983 
24984   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24985   int flags = callee_opts->x_target_flags;
24986 
24987   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24988 }
24989 
24990 /* Like arm_compute_initial_elimination offset.  Simpler because there
24991    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24992    to point at the base of the local variables after static stack
24993    space for a function has been allocated.  */
24994 
24995 HOST_WIDE_INT
24996 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24997 {
24998   arm_stack_offsets *offsets;
24999 
25000   offsets = arm_get_frame_offsets ();
25001 
25002   switch (from)
25003     {
25004     case ARG_POINTER_REGNUM:
25005       switch (to)
25006 	{
25007 	case STACK_POINTER_REGNUM:
25008 	  return offsets->outgoing_args - offsets->saved_args;
25009 
25010 	case FRAME_POINTER_REGNUM:
25011 	  return offsets->soft_frame - offsets->saved_args;
25012 
25013 	case ARM_HARD_FRAME_POINTER_REGNUM:
25014 	  return offsets->saved_regs - offsets->saved_args;
25015 
25016 	case THUMB_HARD_FRAME_POINTER_REGNUM:
25017 	  return offsets->locals_base - offsets->saved_args;
25018 
25019 	default:
25020 	  gcc_unreachable ();
25021 	}
25022       break;
25023 
25024     case FRAME_POINTER_REGNUM:
25025       switch (to)
25026 	{
25027 	case STACK_POINTER_REGNUM:
25028 	  return offsets->outgoing_args - offsets->soft_frame;
25029 
25030 	case ARM_HARD_FRAME_POINTER_REGNUM:
25031 	  return offsets->saved_regs - offsets->soft_frame;
25032 
25033 	case THUMB_HARD_FRAME_POINTER_REGNUM:
25034 	  return offsets->locals_base - offsets->soft_frame;
25035 
25036 	default:
25037 	  gcc_unreachable ();
25038 	}
25039       break;
25040 
25041     default:
25042       gcc_unreachable ();
25043     }
25044 }
25045 
25046 /* Generate the function's prologue.  */
25047 
25048 void
25049 thumb1_expand_prologue (void)
25050 {
25051   rtx_insn *insn;
25052 
25053   HOST_WIDE_INT amount;
25054   HOST_WIDE_INT size;
25055   arm_stack_offsets *offsets;
25056   unsigned long func_type;
25057   int regno;
25058   unsigned long live_regs_mask;
25059   unsigned long l_mask;
25060   unsigned high_regs_pushed = 0;
25061   bool lr_needs_saving;
25062 
25063   func_type = arm_current_func_type ();
25064 
25065   /* Naked functions don't have prologues.  */
25066   if (IS_NAKED (func_type))
25067     {
25068       if (flag_stack_usage_info)
25069 	current_function_static_stack_size = 0;
25070       return;
25071     }
25072 
25073   if (IS_INTERRUPT (func_type))
25074     {
25075       error ("interrupt Service Routines cannot be coded in Thumb mode");
25076       return;
25077     }
25078 
25079   if (is_called_in_ARM_mode (current_function_decl))
25080     emit_insn (gen_prologue_thumb1_interwork ());
25081 
25082   offsets = arm_get_frame_offsets ();
25083   live_regs_mask = offsets->saved_regs_mask;
25084   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25085 
25086   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
25087   l_mask = live_regs_mask & 0x40ff;
25088   /* Then count how many other high registers will need to be pushed.  */
25089   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25090 
25091   if (crtl->args.pretend_args_size)
25092     {
25093       rtx x = GEN_INT (-crtl->args.pretend_args_size);
25094 
25095       if (cfun->machine->uses_anonymous_args)
25096 	{
25097 	  int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25098 	  unsigned long mask;
25099 
25100 	  mask = 1ul << (LAST_ARG_REGNUM + 1);
25101 	  mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25102 
25103 	  insn = thumb1_emit_multi_reg_push (mask, 0);
25104 	}
25105       else
25106 	{
25107 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25108 					stack_pointer_rtx, x));
25109 	}
25110       RTX_FRAME_RELATED_P (insn) = 1;
25111     }
25112 
25113   if (TARGET_BACKTRACE)
25114     {
25115       HOST_WIDE_INT offset = 0;
25116       unsigned work_register;
25117       rtx work_reg, x, arm_hfp_rtx;
25118 
25119       /* We have been asked to create a stack backtrace structure.
25120          The code looks like this:
25121 
25122 	 0   .align 2
25123 	 0   func:
25124          0     sub   SP, #16         Reserve space for 4 registers.
25125 	 2     push  {R7}            Push low registers.
25126          4     add   R7, SP, #20     Get the stack pointer before the push.
25127          6     str   R7, [SP, #8]    Store the stack pointer
25128 					(before reserving the space).
25129          8     mov   R7, PC          Get hold of the start of this code + 12.
25130         10     str   R7, [SP, #16]   Store it.
25131         12     mov   R7, FP          Get hold of the current frame pointer.
25132         14     str   R7, [SP, #4]    Store it.
25133         16     mov   R7, LR          Get hold of the current return address.
25134         18     str   R7, [SP, #12]   Store it.
25135         20     add   R7, SP, #16     Point at the start of the
25136 					backtrace structure.
25137         22     mov   FP, R7          Put this value into the frame pointer.  */
25138 
25139       work_register = thumb_find_work_register (live_regs_mask);
25140       work_reg = gen_rtx_REG (SImode, work_register);
25141       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25142 
25143       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25144 				    stack_pointer_rtx, GEN_INT (-16)));
25145       RTX_FRAME_RELATED_P (insn) = 1;
25146 
25147       if (l_mask)
25148 	{
25149 	  insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25150 	  RTX_FRAME_RELATED_P (insn) = 1;
25151 	  lr_needs_saving = false;
25152 
25153 	  offset = bit_count (l_mask) * UNITS_PER_WORD;
25154 	}
25155 
25156       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25157       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25158 
25159       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25160       x = gen_frame_mem (SImode, x);
25161       emit_move_insn (x, work_reg);
25162 
25163       /* Make sure that the instruction fetching the PC is in the right place
25164 	 to calculate "start of backtrace creation code + 12".  */
25165       /* ??? The stores using the common WORK_REG ought to be enough to
25166 	 prevent the scheduler from doing anything weird.  Failing that
25167 	 we could always move all of the following into an UNSPEC_VOLATILE.  */
25168       if (l_mask)
25169 	{
25170 	  x = gen_rtx_REG (SImode, PC_REGNUM);
25171 	  emit_move_insn (work_reg, x);
25172 
25173 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25174 	  x = gen_frame_mem (SImode, x);
25175 	  emit_move_insn (x, work_reg);
25176 
25177 	  emit_move_insn (work_reg, arm_hfp_rtx);
25178 
25179 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
25180 	  x = gen_frame_mem (SImode, x);
25181 	  emit_move_insn (x, work_reg);
25182 	}
25183       else
25184 	{
25185 	  emit_move_insn (work_reg, arm_hfp_rtx);
25186 
25187 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
25188 	  x = gen_frame_mem (SImode, x);
25189 	  emit_move_insn (x, work_reg);
25190 
25191 	  x = gen_rtx_REG (SImode, PC_REGNUM);
25192 	  emit_move_insn (work_reg, x);
25193 
25194 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25195 	  x = gen_frame_mem (SImode, x);
25196 	  emit_move_insn (x, work_reg);
25197 	}
25198 
25199       x = gen_rtx_REG (SImode, LR_REGNUM);
25200       emit_move_insn (work_reg, x);
25201 
25202       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25203       x = gen_frame_mem (SImode, x);
25204       emit_move_insn (x, work_reg);
25205 
25206       x = GEN_INT (offset + 12);
25207       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25208 
25209       emit_move_insn (arm_hfp_rtx, work_reg);
25210     }
25211   /* Optimization:  If we are not pushing any low registers but we are going
25212      to push some high registers then delay our first push.  This will just
25213      be a push of LR and we can combine it with the push of the first high
25214      register.  */
25215   else if ((l_mask & 0xff) != 0
25216 	   || (high_regs_pushed == 0 && lr_needs_saving))
25217     {
25218       unsigned long mask = l_mask;
25219       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25220       insn = thumb1_emit_multi_reg_push (mask, mask);
25221       RTX_FRAME_RELATED_P (insn) = 1;
25222       lr_needs_saving = false;
25223     }
25224 
25225   if (high_regs_pushed)
25226     {
25227       unsigned pushable_regs;
25228       unsigned next_hi_reg;
25229       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25230 						 : crtl->args.info.nregs;
25231       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25232 
25233       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25234 	if (live_regs_mask & (1 << next_hi_reg))
25235 	  break;
25236 
25237       /* Here we need to mask out registers used for passing arguments
25238 	 even if they can be pushed.  This is to avoid using them to
25239 	 stash the high registers.  Such kind of stash may clobber the
25240 	 use of arguments.  */
25241       pushable_regs = l_mask & (~arg_regs_mask);
25242       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
25243 
25244       /* Normally, LR can be used as a scratch register once it has been
25245 	 saved; but if the function examines its own return address then
25246 	 the value is still live and we need to avoid using it.  */
25247       bool return_addr_live
25248 	= REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
25249 			   LR_REGNUM);
25250 
25251       if (lr_needs_saving || return_addr_live)
25252 	pushable_regs &= ~(1 << LR_REGNUM);
25253 
25254       if (pushable_regs == 0)
25255 	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25256 
25257       while (high_regs_pushed > 0)
25258 	{
25259 	  unsigned long real_regs_mask = 0;
25260 	  unsigned long push_mask = 0;
25261 
25262 	  for (regno = LR_REGNUM; regno >= 0; regno --)
25263 	    {
25264 	      if (pushable_regs & (1 << regno))
25265 		{
25266 		  emit_move_insn (gen_rtx_REG (SImode, regno),
25267 				  gen_rtx_REG (SImode, next_hi_reg));
25268 
25269 		  high_regs_pushed --;
25270 		  real_regs_mask |= (1 << next_hi_reg);
25271 		  push_mask |= (1 << regno);
25272 
25273 		  if (high_regs_pushed)
25274 		    {
25275 		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25276 			   next_hi_reg --)
25277 			if (live_regs_mask & (1 << next_hi_reg))
25278 			  break;
25279 		    }
25280 		  else
25281 		    break;
25282 		}
25283 	    }
25284 
25285 	  /* If we had to find a work register and we have not yet
25286 	     saved the LR then add it to the list of regs to push.  */
25287 	  if (lr_needs_saving)
25288 	    {
25289 	      push_mask |= 1 << LR_REGNUM;
25290 	      real_regs_mask |= 1 << LR_REGNUM;
25291 	      lr_needs_saving = false;
25292 	      /* If the return address is not live at this point, we
25293 		 can add LR to the list of registers that we can use
25294 		 for pushes.  */
25295 	      if (!return_addr_live)
25296 		pushable_regs |= 1 << LR_REGNUM;
25297 	    }
25298 
25299 	  insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25300 	  RTX_FRAME_RELATED_P (insn) = 1;
25301 	}
25302     }
25303 
25304   /* Load the pic register before setting the frame pointer,
25305      so we can use r7 as a temporary work register.  */
25306   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25307     arm_load_pic_register (live_regs_mask);
25308 
25309   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25310     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25311 		    stack_pointer_rtx);
25312 
25313   size = offsets->outgoing_args - offsets->saved_args;
25314   if (flag_stack_usage_info)
25315     current_function_static_stack_size = size;
25316 
25317   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25318   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25319        || flag_stack_clash_protection)
25320       && size)
25321     sorry ("-fstack-check=specific for Thumb-1");
25322 
25323   amount = offsets->outgoing_args - offsets->saved_regs;
25324   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25325   if (amount)
25326     {
25327       if (amount < 512)
25328 	{
25329 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25330 					GEN_INT (- amount)));
25331 	  RTX_FRAME_RELATED_P (insn) = 1;
25332 	}
25333       else
25334 	{
25335 	  rtx reg, dwarf;
25336 
25337 	  /* The stack decrement is too big for an immediate value in a single
25338 	     insn.  In theory we could issue multiple subtracts, but after
25339 	     three of them it becomes more space efficient to place the full
25340 	     value in the constant pool and load into a register.  (Also the
25341 	     ARM debugger really likes to see only one stack decrement per
25342 	     function).  So instead we look for a scratch register into which
25343 	     we can load the decrement, and then we subtract this from the
25344 	     stack pointer.  Unfortunately on the thumb the only available
25345 	     scratch registers are the argument registers, and we cannot use
25346 	     these as they may hold arguments to the function.  Instead we
25347 	     attempt to locate a call preserved register which is used by this
25348 	     function.  If we can find one, then we know that it will have
25349 	     been pushed at the start of the prologue and so we can corrupt
25350 	     it now.  */
25351 	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25352 	    if (live_regs_mask & (1 << regno))
25353 	      break;
25354 
25355 	  gcc_assert(regno <= LAST_LO_REGNUM);
25356 
25357 	  reg = gen_rtx_REG (SImode, regno);
25358 
25359 	  emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25360 
25361 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25362 					stack_pointer_rtx, reg));
25363 
25364 	  dwarf = gen_rtx_SET (stack_pointer_rtx,
25365 			       plus_constant (Pmode, stack_pointer_rtx,
25366 					      -amount));
25367 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25368 	  RTX_FRAME_RELATED_P (insn) = 1;
25369 	}
25370     }
25371 
25372   if (frame_pointer_needed)
25373     thumb_set_frame_pointer (offsets);
25374 
25375   /* If we are profiling, make sure no instructions are scheduled before
25376      the call to mcount.  Similarly if the user has requested no
25377      scheduling in the prolog.  Similarly if we want non-call exceptions
25378      using the EABI unwinder, to prevent faulting instructions from being
25379      swapped with a stack adjustment.  */
25380   if (crtl->profile || !TARGET_SCHED_PROLOG
25381       || (arm_except_unwind_info (&global_options) == UI_TARGET
25382 	  && cfun->can_throw_non_call_exceptions))
25383     emit_insn (gen_blockage ());
25384 
25385   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25386   if (live_regs_mask & 0xff)
25387     cfun->machine->lr_save_eliminated = 0;
25388 }
25389 
25390 /* Clear caller saved registers not used to pass return values and leaked
25391    condition flags before exiting a cmse_nonsecure_entry function.  */
25392 
25393 void
25394 cmse_nonsecure_entry_clear_before_return (void)
25395 {
25396   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25397   uint32_t padding_bits_to_clear = 0;
25398   auto_sbitmap to_clear_bitmap (maxregno + 1);
25399   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25400   tree result_type;
25401 
25402   bitmap_clear (to_clear_bitmap);
25403   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25404   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25405 
25406   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25407      registers.  */
25408   if (TARGET_HARD_FLOAT)
25409     {
25410       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25411 
25412       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25413 
25414       /* Make sure we don't clear the two scratch registers used to clear the
25415 	 relevant FPSCR bits in output_return_instruction.  */
25416       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25417       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25418       emit_use (gen_rtx_REG (SImode, 4));
25419       bitmap_clear_bit (to_clear_bitmap, 4);
25420     }
25421 
25422   /* If the user has defined registers to be caller saved, these are no longer
25423      restored by the function before returning and must thus be cleared for
25424      security purposes.  */
25425   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25426     {
25427       /* We do not touch registers that can be used to pass arguments as per
25428 	 the AAPCS, since these should never be made callee-saved by user
25429 	 options.  */
25430       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25431 	continue;
25432       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25433 	continue;
25434       if (call_used_regs[regno])
25435 	bitmap_set_bit (to_clear_bitmap, regno);
25436     }
25437 
25438   /* Make sure we do not clear the registers used to return the result in.  */
25439   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25440   if (!VOID_TYPE_P (result_type))
25441     {
25442       uint64_t to_clear_return_mask;
25443       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25444 
25445       /* No need to check that we return in registers, because we don't
25446 	 support returning on stack yet.  */
25447       gcc_assert (REG_P (result_rtl));
25448       to_clear_return_mask
25449 	= compute_not_to_clear_mask (result_type, result_rtl, 0,
25450 				     &padding_bits_to_clear);
25451       if (to_clear_return_mask)
25452 	{
25453 	  gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25454 	  for (regno = R0_REGNUM; regno <= maxregno; regno++)
25455 	    {
25456 	      if (to_clear_return_mask & (1ULL << regno))
25457 		bitmap_clear_bit (to_clear_bitmap, regno);
25458 	    }
25459 	}
25460     }
25461 
25462   if (padding_bits_to_clear != 0)
25463     {
25464       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25465       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25466 
25467       /* Padding_bits_to_clear is not 0 so we know we are dealing with
25468 	 returning a composite type, which only uses r0.  Let's make sure that
25469 	 r1-r3 is cleared too.  */
25470       bitmap_clear (to_clear_arg_regs_bitmap);
25471       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25472       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25473     }
25474 
25475   /* Clear full registers that leak before returning.  */
25476   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25477   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25478   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25479 			clearing_reg);
25480 }
25481 
25482 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25483    POP instruction can be generated.  LR should be replaced by PC.  All
25484    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25485    all we really need to check here is if single register is to be
25486    returned, or multiple register return.  */
25487 void
25488 thumb2_expand_return (bool simple_return)
25489 {
25490   int i, num_regs;
25491   unsigned long saved_regs_mask;
25492   arm_stack_offsets *offsets;
25493 
25494   offsets = arm_get_frame_offsets ();
25495   saved_regs_mask = offsets->saved_regs_mask;
25496 
25497   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25498     if (saved_regs_mask & (1 << i))
25499       num_regs++;
25500 
25501   if (!simple_return && saved_regs_mask)
25502     {
25503       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25504 	 functions or adapt code to handle according to ACLE.  This path should
25505 	 not be reachable for cmse_nonsecure_entry functions though we prefer
25506 	 to assert it for now to ensure that future code changes do not silently
25507 	 change this behavior.  */
25508       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25509       if (num_regs == 1)
25510         {
25511           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25512           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25513           rtx addr = gen_rtx_MEM (SImode,
25514                                   gen_rtx_POST_INC (SImode,
25515                                                     stack_pointer_rtx));
25516           set_mem_alias_set (addr, get_frame_alias_set ());
25517           XVECEXP (par, 0, 0) = ret_rtx;
25518           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25519           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25520           emit_jump_insn (par);
25521         }
25522       else
25523         {
25524           saved_regs_mask &= ~ (1 << LR_REGNUM);
25525           saved_regs_mask |=   (1 << PC_REGNUM);
25526           arm_emit_multi_reg_pop (saved_regs_mask);
25527         }
25528     }
25529   else
25530     {
25531       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25532 	cmse_nonsecure_entry_clear_before_return ();
25533       emit_jump_insn (simple_return_rtx);
25534     }
25535 }
25536 
25537 void
25538 thumb1_expand_epilogue (void)
25539 {
25540   HOST_WIDE_INT amount;
25541   arm_stack_offsets *offsets;
25542   int regno;
25543 
25544   /* Naked functions don't have prologues.  */
25545   if (IS_NAKED (arm_current_func_type ()))
25546     return;
25547 
25548   offsets = arm_get_frame_offsets ();
25549   amount = offsets->outgoing_args - offsets->saved_regs;
25550 
25551   if (frame_pointer_needed)
25552     {
25553       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25554       amount = offsets->locals_base - offsets->saved_regs;
25555     }
25556   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25557 
25558   gcc_assert (amount >= 0);
25559   if (amount)
25560     {
25561       emit_insn (gen_blockage ());
25562 
25563       if (amount < 512)
25564 	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25565 			       GEN_INT (amount)));
25566       else
25567 	{
25568 	  /* r3 is always free in the epilogue.  */
25569 	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25570 
25571 	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
25572 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25573 	}
25574     }
25575 
25576   /* Emit a USE (stack_pointer_rtx), so that
25577      the stack adjustment will not be deleted.  */
25578   emit_insn (gen_force_register_use (stack_pointer_rtx));
25579 
25580   if (crtl->profile || !TARGET_SCHED_PROLOG)
25581     emit_insn (gen_blockage ());
25582 
25583   /* Emit a clobber for each insn that will be restored in the epilogue,
25584      so that flow2 will get register lifetimes correct.  */
25585   for (regno = 0; regno < 13; regno++)
25586     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25587       emit_clobber (gen_rtx_REG (SImode, regno));
25588 
25589   if (! df_regs_ever_live_p (LR_REGNUM))
25590     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25591 
25592   /* Clear all caller-saved regs that are not used to return.  */
25593   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25594     cmse_nonsecure_entry_clear_before_return ();
25595 }
25596 
25597 /* Epilogue code for APCS frame.  */
25598 static void
25599 arm_expand_epilogue_apcs_frame (bool really_return)
25600 {
25601   unsigned long func_type;
25602   unsigned long saved_regs_mask;
25603   int num_regs = 0;
25604   int i;
25605   int floats_from_frame = 0;
25606   arm_stack_offsets *offsets;
25607 
25608   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25609   func_type = arm_current_func_type ();
25610 
25611   /* Get frame offsets for ARM.  */
25612   offsets = arm_get_frame_offsets ();
25613   saved_regs_mask = offsets->saved_regs_mask;
25614 
25615   /* Find the offset of the floating-point save area in the frame.  */
25616   floats_from_frame
25617     = (offsets->saved_args
25618        + arm_compute_static_chain_stack_bytes ()
25619        - offsets->frame);
25620 
25621   /* Compute how many core registers saved and how far away the floats are.  */
25622   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25623     if (saved_regs_mask & (1 << i))
25624       {
25625         num_regs++;
25626         floats_from_frame += 4;
25627       }
25628 
25629   if (TARGET_HARD_FLOAT)
25630     {
25631       int start_reg;
25632       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25633 
25634       /* The offset is from IP_REGNUM.  */
25635       int saved_size = arm_get_vfp_saved_size ();
25636       if (saved_size > 0)
25637         {
25638 	  rtx_insn *insn;
25639           floats_from_frame += saved_size;
25640           insn = emit_insn (gen_addsi3 (ip_rtx,
25641 					hard_frame_pointer_rtx,
25642 					GEN_INT (-floats_from_frame)));
25643 	  arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25644 				       ip_rtx, hard_frame_pointer_rtx);
25645         }
25646 
25647       /* Generate VFP register multi-pop.  */
25648       start_reg = FIRST_VFP_REGNUM;
25649 
25650       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25651         /* Look for a case where a reg does not need restoring.  */
25652         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25653             && (!df_regs_ever_live_p (i + 1)
25654                 || call_used_regs[i + 1]))
25655           {
25656             if (start_reg != i)
25657               arm_emit_vfp_multi_reg_pop (start_reg,
25658                                           (i - start_reg) / 2,
25659                                           gen_rtx_REG (SImode,
25660                                                        IP_REGNUM));
25661             start_reg = i + 2;
25662           }
25663 
25664       /* Restore the remaining regs that we have discovered (or possibly
25665          even all of them, if the conditional in the for loop never
25666          fired).  */
25667       if (start_reg != i)
25668         arm_emit_vfp_multi_reg_pop (start_reg,
25669                                     (i - start_reg) / 2,
25670                                     gen_rtx_REG (SImode, IP_REGNUM));
25671     }
25672 
25673   if (TARGET_IWMMXT)
25674     {
25675       /* The frame pointer is guaranteed to be non-double-word aligned, as
25676          it is set to double-word-aligned old_stack_pointer - 4.  */
25677       rtx_insn *insn;
25678       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25679 
25680       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25681         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25682           {
25683             rtx addr = gen_frame_mem (V2SImode,
25684                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25685                                                 - lrm_count * 4));
25686             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25687             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25688                                                gen_rtx_REG (V2SImode, i),
25689                                                NULL_RTX);
25690             lrm_count += 2;
25691           }
25692     }
25693 
25694   /* saved_regs_mask should contain IP which contains old stack pointer
25695      at the time of activation creation.  Since SP and IP are adjacent registers,
25696      we can restore the value directly into SP.  */
25697   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25698   saved_regs_mask &= ~(1 << IP_REGNUM);
25699   saved_regs_mask |= (1 << SP_REGNUM);
25700 
25701   /* There are two registers left in saved_regs_mask - LR and PC.  We
25702      only need to restore LR (the return address), but to
25703      save time we can load it directly into PC, unless we need a
25704      special function exit sequence, or we are not really returning.  */
25705   if (really_return
25706       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25707       && !crtl->calls_eh_return)
25708     /* Delete LR from the register mask, so that LR on
25709        the stack is loaded into the PC in the register mask.  */
25710     saved_regs_mask &= ~(1 << LR_REGNUM);
25711   else
25712     saved_regs_mask &= ~(1 << PC_REGNUM);
25713 
25714   num_regs = bit_count (saved_regs_mask);
25715   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25716     {
25717       rtx_insn *insn;
25718       emit_insn (gen_blockage ());
25719       /* Unwind the stack to just below the saved registers.  */
25720       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25721 				    hard_frame_pointer_rtx,
25722 				    GEN_INT (- 4 * num_regs)));
25723 
25724       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25725 				   stack_pointer_rtx, hard_frame_pointer_rtx);
25726     }
25727 
25728   arm_emit_multi_reg_pop (saved_regs_mask);
25729 
25730   if (IS_INTERRUPT (func_type))
25731     {
25732       /* Interrupt handlers will have pushed the
25733          IP onto the stack, so restore it now.  */
25734       rtx_insn *insn;
25735       rtx addr = gen_rtx_MEM (SImode,
25736                               gen_rtx_POST_INC (SImode,
25737                               stack_pointer_rtx));
25738       set_mem_alias_set (addr, get_frame_alias_set ());
25739       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25740       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25741                                          gen_rtx_REG (SImode, IP_REGNUM),
25742                                          NULL_RTX);
25743     }
25744 
25745   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25746     return;
25747 
25748   if (crtl->calls_eh_return)
25749     emit_insn (gen_addsi3 (stack_pointer_rtx,
25750 			   stack_pointer_rtx,
25751 			   gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25752 
25753   if (IS_STACKALIGN (func_type))
25754     /* Restore the original stack pointer.  Before prologue, the stack was
25755        realigned and the original stack pointer saved in r0.  For details,
25756        see comment in arm_expand_prologue.  */
25757     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25758 
25759   emit_jump_insn (simple_return_rtx);
25760 }
25761 
25762 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25763    function is not a sibcall.  */
25764 void
25765 arm_expand_epilogue (bool really_return)
25766 {
25767   unsigned long func_type;
25768   unsigned long saved_regs_mask;
25769   int num_regs = 0;
25770   int i;
25771   int amount;
25772   arm_stack_offsets *offsets;
25773 
25774   func_type = arm_current_func_type ();
25775 
25776   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25777      let output_return_instruction take care of instruction emission if any.  */
25778   if (IS_NAKED (func_type)
25779       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25780     {
25781       if (really_return)
25782         emit_jump_insn (simple_return_rtx);
25783       return;
25784     }
25785 
25786   /* If we are throwing an exception, then we really must be doing a
25787      return, so we can't tail-call.  */
25788   gcc_assert (!crtl->calls_eh_return || really_return);
25789 
25790   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25791     {
25792       arm_expand_epilogue_apcs_frame (really_return);
25793       return;
25794     }
25795 
25796   /* Get frame offsets for ARM.  */
25797   offsets = arm_get_frame_offsets ();
25798   saved_regs_mask = offsets->saved_regs_mask;
25799   num_regs = bit_count (saved_regs_mask);
25800 
25801   if (frame_pointer_needed)
25802     {
25803       rtx_insn *insn;
25804       /* Restore stack pointer if necessary.  */
25805       if (TARGET_ARM)
25806         {
25807           /* In ARM mode, frame pointer points to first saved register.
25808              Restore stack pointer to last saved register.  */
25809           amount = offsets->frame - offsets->saved_regs;
25810 
25811           /* Force out any pending memory operations that reference stacked data
25812              before stack de-allocation occurs.  */
25813           emit_insn (gen_blockage ());
25814 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25815 			    hard_frame_pointer_rtx,
25816 			    GEN_INT (amount)));
25817 	  arm_add_cfa_adjust_cfa_note (insn, amount,
25818 				       stack_pointer_rtx,
25819 				       hard_frame_pointer_rtx);
25820 
25821           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25822              deleted.  */
25823           emit_insn (gen_force_register_use (stack_pointer_rtx));
25824         }
25825       else
25826         {
25827           /* In Thumb-2 mode, the frame pointer points to the last saved
25828              register.  */
25829 	  amount = offsets->locals_base - offsets->saved_regs;
25830 	  if (amount)
25831 	    {
25832 	      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25833 				hard_frame_pointer_rtx,
25834 				GEN_INT (amount)));
25835 	      arm_add_cfa_adjust_cfa_note (insn, amount,
25836 					   hard_frame_pointer_rtx,
25837 					   hard_frame_pointer_rtx);
25838 	    }
25839 
25840           /* Force out any pending memory operations that reference stacked data
25841              before stack de-allocation occurs.  */
25842           emit_insn (gen_blockage ());
25843 	  insn = emit_insn (gen_movsi (stack_pointer_rtx,
25844 				       hard_frame_pointer_rtx));
25845 	  arm_add_cfa_adjust_cfa_note (insn, 0,
25846 				       stack_pointer_rtx,
25847 				       hard_frame_pointer_rtx);
25848           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25849              deleted.  */
25850           emit_insn (gen_force_register_use (stack_pointer_rtx));
25851         }
25852     }
25853   else
25854     {
25855       /* Pop off outgoing args and local frame to adjust stack pointer to
25856          last saved register.  */
25857       amount = offsets->outgoing_args - offsets->saved_regs;
25858       if (amount)
25859         {
25860 	  rtx_insn *tmp;
25861           /* Force out any pending memory operations that reference stacked data
25862              before stack de-allocation occurs.  */
25863           emit_insn (gen_blockage ());
25864 	  tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25865 				       stack_pointer_rtx,
25866 				       GEN_INT (amount)));
25867 	  arm_add_cfa_adjust_cfa_note (tmp, amount,
25868 				       stack_pointer_rtx, stack_pointer_rtx);
25869           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25870              not deleted.  */
25871           emit_insn (gen_force_register_use (stack_pointer_rtx));
25872         }
25873     }
25874 
25875   if (TARGET_HARD_FLOAT)
25876     {
25877       /* Generate VFP register multi-pop.  */
25878       int end_reg = LAST_VFP_REGNUM + 1;
25879 
25880       /* Scan the registers in reverse order.  We need to match
25881          any groupings made in the prologue and generate matching
25882          vldm operations.  The need to match groups is because,
25883          unlike pop, vldm can only do consecutive regs.  */
25884       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25885         /* Look for a case where a reg does not need restoring.  */
25886         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25887             && (!df_regs_ever_live_p (i + 1)
25888                 || call_used_regs[i + 1]))
25889           {
25890             /* Restore the regs discovered so far (from reg+2 to
25891                end_reg).  */
25892             if (end_reg > i + 2)
25893               arm_emit_vfp_multi_reg_pop (i + 2,
25894                                           (end_reg - (i + 2)) / 2,
25895                                           stack_pointer_rtx);
25896             end_reg = i;
25897           }
25898 
25899       /* Restore the remaining regs that we have discovered (or possibly
25900          even all of them, if the conditional in the for loop never
25901          fired).  */
25902       if (end_reg > i + 2)
25903         arm_emit_vfp_multi_reg_pop (i + 2,
25904                                     (end_reg - (i + 2)) / 2,
25905                                     stack_pointer_rtx);
25906     }
25907 
25908   if (TARGET_IWMMXT)
25909     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25910       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25911         {
25912           rtx_insn *insn;
25913           rtx addr = gen_rtx_MEM (V2SImode,
25914                                   gen_rtx_POST_INC (SImode,
25915                                                     stack_pointer_rtx));
25916           set_mem_alias_set (addr, get_frame_alias_set ());
25917           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25918           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25919                                              gen_rtx_REG (V2SImode, i),
25920                                              NULL_RTX);
25921 	  arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25922 				       stack_pointer_rtx, stack_pointer_rtx);
25923         }
25924 
25925   if (saved_regs_mask)
25926     {
25927       rtx insn;
25928       bool return_in_pc = false;
25929 
25930       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25931           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25932 	  && !IS_CMSE_ENTRY (func_type)
25933           && !IS_STACKALIGN (func_type)
25934           && really_return
25935           && crtl->args.pretend_args_size == 0
25936           && saved_regs_mask & (1 << LR_REGNUM)
25937           && !crtl->calls_eh_return)
25938         {
25939           saved_regs_mask &= ~(1 << LR_REGNUM);
25940           saved_regs_mask |= (1 << PC_REGNUM);
25941           return_in_pc = true;
25942         }
25943 
25944       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25945         {
25946           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25947             if (saved_regs_mask & (1 << i))
25948               {
25949                 rtx addr = gen_rtx_MEM (SImode,
25950                                         gen_rtx_POST_INC (SImode,
25951                                                           stack_pointer_rtx));
25952                 set_mem_alias_set (addr, get_frame_alias_set ());
25953 
25954                 if (i == PC_REGNUM)
25955                   {
25956                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25957                     XVECEXP (insn, 0, 0) = ret_rtx;
25958                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25959                                                         addr);
25960                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25961                     insn = emit_jump_insn (insn);
25962                   }
25963                 else
25964                   {
25965                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25966                                                  addr));
25967                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25968                                                        gen_rtx_REG (SImode, i),
25969                                                        NULL_RTX);
25970 		    arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25971 						 stack_pointer_rtx,
25972 						 stack_pointer_rtx);
25973                   }
25974               }
25975         }
25976       else
25977         {
25978           if (TARGET_LDRD
25979 	      && current_tune->prefer_ldrd_strd
25980               && !optimize_function_for_size_p (cfun))
25981             {
25982               if (TARGET_THUMB2)
25983                 thumb2_emit_ldrd_pop (saved_regs_mask);
25984               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25985                 arm_emit_ldrd_pop (saved_regs_mask);
25986               else
25987                 arm_emit_multi_reg_pop (saved_regs_mask);
25988             }
25989           else
25990             arm_emit_multi_reg_pop (saved_regs_mask);
25991         }
25992 
25993       if (return_in_pc)
25994         return;
25995     }
25996 
25997   amount
25998     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25999   if (amount)
26000     {
26001       int i, j;
26002       rtx dwarf = NULL_RTX;
26003       rtx_insn *tmp =
26004 	emit_insn (gen_addsi3 (stack_pointer_rtx,
26005 			       stack_pointer_rtx,
26006 			       GEN_INT (amount)));
26007 
26008       RTX_FRAME_RELATED_P (tmp) = 1;
26009 
26010       if (cfun->machine->uses_anonymous_args)
26011 	{
26012 	  /* Restore pretend args.  Refer arm_expand_prologue on how to save
26013 	     pretend_args in stack.  */
26014 	  int num_regs = crtl->args.pretend_args_size / 4;
26015 	  saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26016 	  for (j = 0, i = 0; j < num_regs; i++)
26017 	    if (saved_regs_mask & (1 << i))
26018 	      {
26019 		rtx reg = gen_rtx_REG (SImode, i);
26020 		dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26021 		j++;
26022 	      }
26023 	  REG_NOTES (tmp) = dwarf;
26024 	}
26025       arm_add_cfa_adjust_cfa_note (tmp, amount,
26026 				   stack_pointer_rtx, stack_pointer_rtx);
26027     }
26028 
26029     /* Clear all caller-saved regs that are not used to return.  */
26030     if (IS_CMSE_ENTRY (arm_current_func_type ()))
26031       {
26032 	/* CMSE_ENTRY always returns.  */
26033 	gcc_assert (really_return);
26034 	cmse_nonsecure_entry_clear_before_return ();
26035       }
26036 
26037   if (!really_return)
26038     return;
26039 
26040   if (crtl->calls_eh_return)
26041     emit_insn (gen_addsi3 (stack_pointer_rtx,
26042                            stack_pointer_rtx,
26043                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26044 
26045   if (IS_STACKALIGN (func_type))
26046     /* Restore the original stack pointer.  Before prologue, the stack was
26047        realigned and the original stack pointer saved in r0.  For details,
26048        see comment in arm_expand_prologue.  */
26049     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26050 
26051   emit_jump_insn (simple_return_rtx);
26052 }
26053 
26054 /* Implementation of insn prologue_thumb1_interwork.  This is the first
26055    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
26056 
26057 const char *
26058 thumb1_output_interwork (void)
26059 {
26060   const char * name;
26061   FILE *f = asm_out_file;
26062 
26063   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26064   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26065 	      == SYMBOL_REF);
26066   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26067 
26068   /* Generate code sequence to switch us into Thumb mode.  */
26069   /* The .code 32 directive has already been emitted by
26070      ASM_DECLARE_FUNCTION_NAME.  */
26071   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26072   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26073 
26074   /* Generate a label, so that the debugger will notice the
26075      change in instruction sets.  This label is also used by
26076      the assembler to bypass the ARM code when this function
26077      is called from a Thumb encoded function elsewhere in the
26078      same file.  Hence the definition of STUB_NAME here must
26079      agree with the definition in gas/config/tc-arm.c.  */
26080 
26081 #define STUB_NAME ".real_start_of"
26082 
26083   fprintf (f, "\t.code\t16\n");
26084 #ifdef ARM_PE
26085   if (arm_dllexport_name_p (name))
26086     name = arm_strip_name_encoding (name);
26087 #endif
26088   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26089   fprintf (f, "\t.thumb_func\n");
26090   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26091 
26092   return "";
26093 }
26094 
26095 /* Handle the case of a double word load into a low register from
26096    a computed memory address.  The computed address may involve a
26097    register which is overwritten by the load.  */
26098 const char *
26099 thumb_load_double_from_address (rtx *operands)
26100 {
26101   rtx addr;
26102   rtx base;
26103   rtx offset;
26104   rtx arg1;
26105   rtx arg2;
26106 
26107   gcc_assert (REG_P (operands[0]));
26108   gcc_assert (MEM_P (operands[1]));
26109 
26110   /* Get the memory address.  */
26111   addr = XEXP (operands[1], 0);
26112 
26113   /* Work out how the memory address is computed.  */
26114   switch (GET_CODE (addr))
26115     {
26116     case REG:
26117       operands[2] = adjust_address (operands[1], SImode, 4);
26118 
26119       if (REGNO (operands[0]) == REGNO (addr))
26120 	{
26121 	  output_asm_insn ("ldr\t%H0, %2", operands);
26122 	  output_asm_insn ("ldr\t%0, %1", operands);
26123 	}
26124       else
26125 	{
26126 	  output_asm_insn ("ldr\t%0, %1", operands);
26127 	  output_asm_insn ("ldr\t%H0, %2", operands);
26128 	}
26129       break;
26130 
26131     case CONST:
26132       /* Compute <address> + 4 for the high order load.  */
26133       operands[2] = adjust_address (operands[1], SImode, 4);
26134 
26135       output_asm_insn ("ldr\t%0, %1", operands);
26136       output_asm_insn ("ldr\t%H0, %2", operands);
26137       break;
26138 
26139     case PLUS:
26140       arg1   = XEXP (addr, 0);
26141       arg2   = XEXP (addr, 1);
26142 
26143       if (CONSTANT_P (arg1))
26144 	base = arg2, offset = arg1;
26145       else
26146 	base = arg1, offset = arg2;
26147 
26148       gcc_assert (REG_P (base));
26149 
26150       /* Catch the case of <address> = <reg> + <reg> */
26151       if (REG_P (offset))
26152 	{
26153 	  int reg_offset = REGNO (offset);
26154 	  int reg_base   = REGNO (base);
26155 	  int reg_dest   = REGNO (operands[0]);
26156 
26157 	  /* Add the base and offset registers together into the
26158              higher destination register.  */
26159 	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26160 		       reg_dest + 1, reg_base, reg_offset);
26161 
26162 	  /* Load the lower destination register from the address in
26163              the higher destination register.  */
26164 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26165 		       reg_dest, reg_dest + 1);
26166 
26167 	  /* Load the higher destination register from its own address
26168              plus 4.  */
26169 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26170 		       reg_dest + 1, reg_dest + 1);
26171 	}
26172       else
26173 	{
26174 	  /* Compute <address> + 4 for the high order load.  */
26175 	  operands[2] = adjust_address (operands[1], SImode, 4);
26176 
26177 	  /* If the computed address is held in the low order register
26178 	     then load the high order register first, otherwise always
26179 	     load the low order register first.  */
26180 	  if (REGNO (operands[0]) == REGNO (base))
26181 	    {
26182 	      output_asm_insn ("ldr\t%H0, %2", operands);
26183 	      output_asm_insn ("ldr\t%0, %1", operands);
26184 	    }
26185 	  else
26186 	    {
26187 	      output_asm_insn ("ldr\t%0, %1", operands);
26188 	      output_asm_insn ("ldr\t%H0, %2", operands);
26189 	    }
26190 	}
26191       break;
26192 
26193     case LABEL_REF:
26194       /* With no registers to worry about we can just load the value
26195          directly.  */
26196       operands[2] = adjust_address (operands[1], SImode, 4);
26197 
26198       output_asm_insn ("ldr\t%H0, %2", operands);
26199       output_asm_insn ("ldr\t%0, %1", operands);
26200       break;
26201 
26202     default:
26203       gcc_unreachable ();
26204     }
26205 
26206   return "";
26207 }
26208 
26209 const char *
26210 thumb_output_move_mem_multiple (int n, rtx *operands)
26211 {
26212   switch (n)
26213     {
26214     case 2:
26215       if (REGNO (operands[4]) > REGNO (operands[5]))
26216 	std::swap (operands[4], operands[5]);
26217 
26218       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26219       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26220       break;
26221 
26222     case 3:
26223       if (REGNO (operands[4]) > REGNO (operands[5]))
26224         std::swap (operands[4], operands[5]);
26225       if (REGNO (operands[5]) > REGNO (operands[6]))
26226         std::swap (operands[5], operands[6]);
26227       if (REGNO (operands[4]) > REGNO (operands[5]))
26228         std::swap (operands[4], operands[5]);
26229 
26230       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26231       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26232       break;
26233 
26234     default:
26235       gcc_unreachable ();
26236     }
26237 
26238   return "";
26239 }
26240 
26241 /* Output a call-via instruction for thumb state.  */
26242 const char *
26243 thumb_call_via_reg (rtx reg)
26244 {
26245   int regno = REGNO (reg);
26246   rtx *labelp;
26247 
26248   gcc_assert (regno < LR_REGNUM);
26249 
26250   /* If we are in the normal text section we can use a single instance
26251      per compilation unit.  If we are doing function sections, then we need
26252      an entry per section, since we can't rely on reachability.  */
26253   if (in_section == text_section)
26254     {
26255       thumb_call_reg_needed = 1;
26256 
26257       if (thumb_call_via_label[regno] == NULL)
26258 	thumb_call_via_label[regno] = gen_label_rtx ();
26259       labelp = thumb_call_via_label + regno;
26260     }
26261   else
26262     {
26263       if (cfun->machine->call_via[regno] == NULL)
26264 	cfun->machine->call_via[regno] = gen_label_rtx ();
26265       labelp = cfun->machine->call_via + regno;
26266     }
26267 
26268   output_asm_insn ("bl\t%a0", labelp);
26269   return "";
26270 }
26271 
26272 /* Routines for generating rtl.  */
26273 void
26274 thumb_expand_movmemqi (rtx *operands)
26275 {
26276   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26277   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26278   HOST_WIDE_INT len = INTVAL (operands[2]);
26279   HOST_WIDE_INT offset = 0;
26280 
26281   while (len >= 12)
26282     {
26283       emit_insn (gen_movmem12b (out, in, out, in));
26284       len -= 12;
26285     }
26286 
26287   if (len >= 8)
26288     {
26289       emit_insn (gen_movmem8b (out, in, out, in));
26290       len -= 8;
26291     }
26292 
26293   if (len >= 4)
26294     {
26295       rtx reg = gen_reg_rtx (SImode);
26296       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26297       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26298       len -= 4;
26299       offset += 4;
26300     }
26301 
26302   if (len >= 2)
26303     {
26304       rtx reg = gen_reg_rtx (HImode);
26305       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26306 					      plus_constant (Pmode, in,
26307 							     offset))));
26308       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26309 								offset)),
26310 			    reg));
26311       len -= 2;
26312       offset += 2;
26313     }
26314 
26315   if (len)
26316     {
26317       rtx reg = gen_reg_rtx (QImode);
26318       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26319 					      plus_constant (Pmode, in,
26320 							     offset))));
26321       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26322 								offset)),
26323 			    reg));
26324     }
26325 }
26326 
26327 void
26328 thumb_reload_out_hi (rtx *operands)
26329 {
26330   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26331 }
26332 
26333 /* Return the length of a function name prefix
26334     that starts with the character 'c'.  */
26335 static int
26336 arm_get_strip_length (int c)
26337 {
26338   switch (c)
26339     {
26340     ARM_NAME_ENCODING_LENGTHS
26341       default: return 0;
26342     }
26343 }
26344 
26345 /* Return a pointer to a function's name with any
26346    and all prefix encodings stripped from it.  */
26347 const char *
26348 arm_strip_name_encoding (const char *name)
26349 {
26350   int skip;
26351 
26352   while ((skip = arm_get_strip_length (* name)))
26353     name += skip;
26354 
26355   return name;
26356 }
26357 
26358 /* If there is a '*' anywhere in the name's prefix, then
26359    emit the stripped name verbatim, otherwise prepend an
26360    underscore if leading underscores are being used.  */
26361 void
26362 arm_asm_output_labelref (FILE *stream, const char *name)
26363 {
26364   int skip;
26365   int verbatim = 0;
26366 
26367   while ((skip = arm_get_strip_length (* name)))
26368     {
26369       verbatim |= (*name == '*');
26370       name += skip;
26371     }
26372 
26373   if (verbatim)
26374     fputs (name, stream);
26375   else
26376     asm_fprintf (stream, "%U%s", name);
26377 }
26378 
26379 /* This function is used to emit an EABI tag and its associated value.
26380    We emit the numerical value of the tag in case the assembler does not
26381    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26382    the tag name in a comment so that anyone reading the assembler output
26383    will know which tag is being set.
26384 
26385    This function is not static because arm-c.c needs it too.  */
26386 
26387 void
26388 arm_emit_eabi_attribute (const char *name, int num, int val)
26389 {
26390   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26391   if (flag_verbose_asm || flag_debug_asm)
26392     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26393   asm_fprintf (asm_out_file, "\n");
26394 }
26395 
26396 /* This function is used to print CPU tuning information as comment
26397    in assembler file.  Pointers are not printed for now.  */
26398 
26399 void
26400 arm_print_tune_info (void)
26401 {
26402   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26403   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26404 	       current_tune->constant_limit);
26405   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26406 	       "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26407   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26408 	       "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26409   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26410 	       "prefetch.l1_cache_size:\t%d\n",
26411 	       current_tune->prefetch.l1_cache_size);
26412   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26413 	       "prefetch.l1_cache_line_size:\t%d\n",
26414 	       current_tune->prefetch.l1_cache_line_size);
26415   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26416 	       "prefer_constant_pool:\t%d\n",
26417 	       (int) current_tune->prefer_constant_pool);
26418   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26419 	       "branch_cost:\t(s:speed, p:predictable)\n");
26420   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26421   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26422 	       current_tune->branch_cost (false, false));
26423   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26424 	       current_tune->branch_cost (false, true));
26425   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26426 	       current_tune->branch_cost (true, false));
26427   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26428 	       current_tune->branch_cost (true, true));
26429   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26430 	       "prefer_ldrd_strd:\t%d\n",
26431 	       (int) current_tune->prefer_ldrd_strd);
26432   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26433 	       "logical_op_non_short_circuit:\t[%d,%d]\n",
26434 	       (int) current_tune->logical_op_non_short_circuit_thumb,
26435 	       (int) current_tune->logical_op_non_short_circuit_arm);
26436   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26437 	       "prefer_neon_for_64bits:\t%d\n",
26438 	       (int) current_tune->prefer_neon_for_64bits);
26439   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26440 	       "disparage_flag_setting_t16_encodings:\t%d\n",
26441 	       (int) current_tune->disparage_flag_setting_t16_encodings);
26442   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26443 	       "string_ops_prefer_neon:\t%d\n",
26444 	       (int) current_tune->string_ops_prefer_neon);
26445   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26446 	       "max_insns_inline_memset:\t%d\n",
26447 	       current_tune->max_insns_inline_memset);
26448   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26449 	       current_tune->fusible_ops);
26450   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26451 	       (int) current_tune->sched_autopref);
26452 }
26453 
26454 /* Print .arch and .arch_extension directives corresponding to the
26455    current architecture configuration.  */
26456 static void
26457 arm_print_asm_arch_directives ()
26458 {
26459   const arch_option *arch
26460     = arm_parse_arch_option_name (all_architectures, "-march",
26461 				  arm_active_target.arch_name);
26462   auto_sbitmap opt_bits (isa_num_bits);
26463 
26464   gcc_assert (arch);
26465 
26466   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26467   arm_last_printed_arch_string = arm_active_target.arch_name;
26468   if (!arch->common.extensions)
26469     return;
26470 
26471   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26472        opt->name != NULL;
26473        opt++)
26474     {
26475       if (!opt->remove)
26476 	{
26477 	  arm_initialize_isa (opt_bits, opt->isa_bits);
26478 
26479 	  /* If every feature bit of this option is set in the target
26480 	     ISA specification, print out the option name.  However,
26481 	     don't print anything if all the bits are part of the
26482 	     FPU specification.  */
26483 	  if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26484 	      && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26485 	    asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26486 	}
26487     }
26488 }
26489 
26490 static void
26491 arm_file_start (void)
26492 {
26493   int val;
26494 
26495   if (TARGET_BPABI)
26496     {
26497       /* We don't have a specified CPU.  Use the architecture to
26498 	 generate the tags.
26499 
26500 	 Note: it might be better to do this unconditionally, then the
26501 	 assembler would not need to know about all new CPU names as
26502 	 they are added.  */
26503       if (!arm_active_target.core_name)
26504 	{
26505 	  /* armv7ve doesn't support any extensions.  */
26506 	  if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26507 	    {
26508 	      /* Keep backward compatability for assemblers
26509 		 which don't support armv7ve.  */
26510 	      asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26511 	      asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26512 	      asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26513 	      asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26514 	      asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26515 	      arm_last_printed_arch_string = "armv7ve";
26516 	    }
26517 	  else
26518 	    arm_print_asm_arch_directives ();
26519 	}
26520       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26521 	{
26522 	  asm_fprintf (asm_out_file, "\t.arch %s\n",
26523 		       arm_active_target.core_name + 8);
26524 	  arm_last_printed_arch_string = arm_active_target.core_name + 8;
26525 	}
26526       else
26527 	{
26528 	  const char* truncated_name
26529 	    = arm_rewrite_selected_cpu (arm_active_target.core_name);
26530 	  asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26531 	}
26532 
26533       if (print_tune_info)
26534 	arm_print_tune_info ();
26535 
26536       if (! TARGET_SOFT_FLOAT)
26537 	{
26538 	  if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26539 	    arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26540 
26541 	  if (TARGET_HARD_FLOAT_ABI)
26542 	    arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26543 	}
26544 
26545       /* Some of these attributes only apply when the corresponding features
26546 	 are used.  However we don't have any easy way of figuring this out.
26547 	 Conservatively record the setting that would have been used.  */
26548 
26549       if (flag_rounding_math)
26550 	arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26551 
26552       if (!flag_unsafe_math_optimizations)
26553 	{
26554 	  arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26555 	  arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26556 	}
26557       if (flag_signaling_nans)
26558 	arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26559 
26560       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26561 			   flag_finite_math_only ? 1 : 3);
26562 
26563       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26564       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26565       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26566 			       flag_short_enums ? 1 : 2);
26567 
26568       /* Tag_ABI_optimization_goals.  */
26569       if (optimize_size)
26570 	val = 4;
26571       else if (optimize >= 2)
26572 	val = 2;
26573       else if (optimize)
26574 	val = 1;
26575       else
26576 	val = 6;
26577       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26578 
26579       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26580 			       unaligned_access);
26581 
26582       if (arm_fp16_format)
26583 	arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26584 			     (int) arm_fp16_format);
26585 
26586       if (arm_lang_output_object_attributes_hook)
26587 	arm_lang_output_object_attributes_hook();
26588     }
26589 
26590   default_file_start ();
26591 }
26592 
26593 static void
26594 arm_file_end (void)
26595 {
26596   int regno;
26597 
26598   if (NEED_INDICATE_EXEC_STACK)
26599     /* Add .note.GNU-stack.  */
26600     file_end_indicate_exec_stack ();
26601 
26602   if (! thumb_call_reg_needed)
26603     return;
26604 
26605   switch_to_section (text_section);
26606   asm_fprintf (asm_out_file, "\t.code 16\n");
26607   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26608 
26609   for (regno = 0; regno < LR_REGNUM; regno++)
26610     {
26611       rtx label = thumb_call_via_label[regno];
26612 
26613       if (label != 0)
26614 	{
26615 	  targetm.asm_out.internal_label (asm_out_file, "L",
26616 					  CODE_LABEL_NUMBER (label));
26617 	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26618 	}
26619     }
26620 }
26621 
26622 #ifndef ARM_PE
26623 /* Symbols in the text segment can be accessed without indirecting via the
26624    constant pool; it may take an extra binary operation, but this is still
26625    faster than indirecting via memory.  Don't do this when not optimizing,
26626    since we won't be calculating al of the offsets necessary to do this
26627    simplification.  */
26628 
26629 static void
26630 arm_encode_section_info (tree decl, rtx rtl, int first)
26631 {
26632   if (optimize > 0 && TREE_CONSTANT (decl))
26633     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26634 
26635   default_encode_section_info (decl, rtl, first);
26636 }
26637 #endif /* !ARM_PE */
26638 
26639 static void
26640 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26641 {
26642   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26643       && !strcmp (prefix, "L"))
26644     {
26645       arm_ccfsm_state = 0;
26646       arm_target_insn = NULL;
26647     }
26648   default_internal_label (stream, prefix, labelno);
26649 }
26650 
26651 /* Output code to add DELTA to the first argument, and then jump
26652    to FUNCTION.  Used for C++ multiple inheritance.  */
26653 
26654 static void
26655 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26656 		     HOST_WIDE_INT, tree function)
26657 {
26658   static int thunk_label = 0;
26659   char label[256];
26660   char labelpc[256];
26661   int mi_delta = delta;
26662   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26663   int shift = 0;
26664   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26665                     ? 1 : 0);
26666   if (mi_delta < 0)
26667     mi_delta = - mi_delta;
26668 
26669   final_start_function (emit_barrier (), file, 1);
26670 
26671   if (TARGET_THUMB1)
26672     {
26673       int labelno = thunk_label++;
26674       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26675       /* Thunks are entered in arm mode when available.  */
26676       if (TARGET_THUMB1_ONLY)
26677 	{
26678 	  /* push r3 so we can use it as a temporary.  */
26679 	  /* TODO: Omit this save if r3 is not used.  */
26680 	  fputs ("\tpush {r3}\n", file);
26681 	  fputs ("\tldr\tr3, ", file);
26682 	}
26683       else
26684 	{
26685 	  fputs ("\tldr\tr12, ", file);
26686 	}
26687       assemble_name (file, label);
26688       fputc ('\n', file);
26689       if (flag_pic)
26690 	{
26691 	  /* If we are generating PIC, the ldr instruction below loads
26692 	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26693 	     the address of the add + 8, so we have:
26694 
26695 	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26696 	         = target + 1.
26697 
26698 	     Note that we have "+ 1" because some versions of GNU ld
26699 	     don't set the low bit of the result for R_ARM_REL32
26700 	     relocations against thumb function symbols.
26701 	     On ARMv6M this is +4, not +8.  */
26702 	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26703 	  assemble_name (file, labelpc);
26704 	  fputs (":\n", file);
26705 	  if (TARGET_THUMB1_ONLY)
26706 	    {
26707 	      /* This is 2 insns after the start of the thunk, so we know it
26708 	         is 4-byte aligned.  */
26709 	      fputs ("\tadd\tr3, pc, r3\n", file);
26710 	      fputs ("\tmov r12, r3\n", file);
26711 	    }
26712 	  else
26713 	    fputs ("\tadd\tr12, pc, r12\n", file);
26714 	}
26715       else if (TARGET_THUMB1_ONLY)
26716 	fputs ("\tmov r12, r3\n", file);
26717     }
26718   if (TARGET_THUMB1_ONLY)
26719     {
26720       if (mi_delta > 255)
26721 	{
26722 	  fputs ("\tldr\tr3, ", file);
26723 	  assemble_name (file, label);
26724 	  fputs ("+4\n", file);
26725 	  asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26726 		       mi_op, this_regno, this_regno);
26727 	}
26728       else if (mi_delta != 0)
26729 	{
26730 	  /* Thumb1 unified syntax requires s suffix in instruction name when
26731 	     one of the operands is immediate.  */
26732 	  asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26733 		       mi_op, this_regno, this_regno,
26734 		       mi_delta);
26735 	}
26736     }
26737   else
26738     {
26739       /* TODO: Use movw/movt for large constants when available.  */
26740       while (mi_delta != 0)
26741 	{
26742 	  if ((mi_delta & (3 << shift)) == 0)
26743 	    shift += 2;
26744 	  else
26745 	    {
26746 	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26747 			   mi_op, this_regno, this_regno,
26748 			   mi_delta & (0xff << shift));
26749 	      mi_delta &= ~(0xff << shift);
26750 	      shift += 8;
26751 	    }
26752 	}
26753     }
26754   if (TARGET_THUMB1)
26755     {
26756       if (TARGET_THUMB1_ONLY)
26757 	fputs ("\tpop\t{r3}\n", file);
26758 
26759       fprintf (file, "\tbx\tr12\n");
26760       ASM_OUTPUT_ALIGN (file, 2);
26761       assemble_name (file, label);
26762       fputs (":\n", file);
26763       if (flag_pic)
26764 	{
26765 	  /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26766 	  rtx tem = XEXP (DECL_RTL (function), 0);
26767 	  /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26768 	     pipeline offset is four rather than eight.  Adjust the offset
26769 	     accordingly.  */
26770 	  tem = plus_constant (GET_MODE (tem), tem,
26771 			       TARGET_THUMB1_ONLY ? -3 : -7);
26772 	  tem = gen_rtx_MINUS (GET_MODE (tem),
26773 			       tem,
26774 			       gen_rtx_SYMBOL_REF (Pmode,
26775 						   ggc_strdup (labelpc)));
26776 	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
26777 	}
26778       else
26779 	/* Output ".word .LTHUNKn".  */
26780 	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26781 
26782       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26783 	assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26784     }
26785   else
26786     {
26787       fputs ("\tb\t", file);
26788       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26789       if (NEED_PLT_RELOC)
26790         fputs ("(PLT)", file);
26791       fputc ('\n', file);
26792     }
26793 
26794   final_end_function ();
26795 }
26796 
26797 /* MI thunk handling for TARGET_32BIT.  */
26798 
26799 static void
26800 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26801 		       HOST_WIDE_INT vcall_offset, tree function)
26802 {
26803   const bool long_call_p = arm_is_long_call_p (function);
26804 
26805   /* On ARM, this_regno is R0 or R1 depending on
26806      whether the function returns an aggregate or not.
26807   */
26808   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26809 				       function)
26810 		    ? R1_REGNUM : R0_REGNUM);
26811 
26812   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26813   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26814   reload_completed = 1;
26815   emit_note (NOTE_INSN_PROLOGUE_END);
26816 
26817   /* Add DELTA to THIS_RTX.  */
26818   if (delta != 0)
26819     arm_split_constant (PLUS, Pmode, NULL_RTX,
26820 			delta, this_rtx, this_rtx, false);
26821 
26822   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26823   if (vcall_offset != 0)
26824     {
26825       /* Load *THIS_RTX.  */
26826       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26827       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26828       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26829 			  false);
26830       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26831       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26832       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26833     }
26834 
26835   /* Generate a tail call to the target function.  */
26836   if (!TREE_USED (function))
26837     {
26838       assemble_external (function);
26839       TREE_USED (function) = 1;
26840     }
26841   rtx funexp = XEXP (DECL_RTL (function), 0);
26842   if (long_call_p)
26843     {
26844       emit_move_insn (temp, funexp);
26845       funexp = temp;
26846     }
26847   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26848   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26849   SIBLING_CALL_P (insn) = 1;
26850   emit_barrier ();
26851 
26852   /* Indirect calls require a bit of fixup in PIC mode.  */
26853   if (long_call_p)
26854     {
26855       split_all_insns_noflow ();
26856       arm_reorg ();
26857     }
26858 
26859   insn = get_insns ();
26860   shorten_branches (insn);
26861   final_start_function (insn, file, 1);
26862   final (insn, file, 1);
26863   final_end_function ();
26864 
26865   /* Stop pretending this is a post-reload pass.  */
26866   reload_completed = 0;
26867 }
26868 
26869 /* Output code to add DELTA to the first argument, and then jump
26870    to FUNCTION.  Used for C++ multiple inheritance.  */
26871 
26872 static void
26873 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26874 		     HOST_WIDE_INT vcall_offset, tree function)
26875 {
26876   if (TARGET_32BIT)
26877     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26878   else
26879     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26880 }
26881 
26882 int
26883 arm_emit_vector_const (FILE *file, rtx x)
26884 {
26885   int i;
26886   const char * pattern;
26887 
26888   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26889 
26890   switch (GET_MODE (x))
26891     {
26892     case E_V2SImode: pattern = "%08x"; break;
26893     case E_V4HImode: pattern = "%04x"; break;
26894     case E_V8QImode: pattern = "%02x"; break;
26895     default:       gcc_unreachable ();
26896     }
26897 
26898   fprintf (file, "0x");
26899   for (i = CONST_VECTOR_NUNITS (x); i--;)
26900     {
26901       rtx element;
26902 
26903       element = CONST_VECTOR_ELT (x, i);
26904       fprintf (file, pattern, INTVAL (element));
26905     }
26906 
26907   return 1;
26908 }
26909 
26910 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26911    HFmode constant pool entries are actually loaded with ldr.  */
26912 void
26913 arm_emit_fp16_const (rtx c)
26914 {
26915   long bits;
26916 
26917   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26918   if (WORDS_BIG_ENDIAN)
26919     assemble_zeros (2);
26920   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26921   if (!WORDS_BIG_ENDIAN)
26922     assemble_zeros (2);
26923 }
26924 
26925 const char *
26926 arm_output_load_gr (rtx *operands)
26927 {
26928   rtx reg;
26929   rtx offset;
26930   rtx wcgr;
26931   rtx sum;
26932 
26933   if (!MEM_P (operands [1])
26934       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26935       || !REG_P (reg = XEXP (sum, 0))
26936       || !CONST_INT_P (offset = XEXP (sum, 1))
26937       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26938     return "wldrw%?\t%0, %1";
26939 
26940   /* Fix up an out-of-range load of a GR register.  */
26941   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26942   wcgr = operands[0];
26943   operands[0] = reg;
26944   output_asm_insn ("ldr%?\t%0, %1", operands);
26945 
26946   operands[0] = wcgr;
26947   operands[1] = reg;
26948   output_asm_insn ("tmcr%?\t%0, %1", operands);
26949   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26950 
26951   return "";
26952 }
26953 
26954 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26955 
26956    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26957    named arg and all anonymous args onto the stack.
26958    XXX I know the prologue shouldn't be pushing registers, but it is faster
26959    that way.  */
26960 
26961 static void
26962 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26963 			    machine_mode mode,
26964 			    tree type,
26965 			    int *pretend_size,
26966 			    int second_time ATTRIBUTE_UNUSED)
26967 {
26968   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26969   int nregs;
26970 
26971   cfun->machine->uses_anonymous_args = 1;
26972   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26973     {
26974       nregs = pcum->aapcs_ncrn;
26975       if (nregs & 1)
26976 	{
26977 	  int res = arm_needs_doubleword_align (mode, type);
26978 	  if (res < 0 && warn_psabi)
26979 	    inform (input_location, "parameter passing for argument of "
26980 		    "type %qT changed in GCC 7.1", type);
26981 	  else if (res > 0)
26982 	    nregs++;
26983 	}
26984     }
26985   else
26986     nregs = pcum->nregs;
26987 
26988   if (nregs < NUM_ARG_REGS)
26989     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26990 }
26991 
26992 /* We can't rely on the caller doing the proper promotion when
26993    using APCS or ATPCS.  */
26994 
26995 static bool
26996 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26997 {
26998     return !TARGET_AAPCS_BASED;
26999 }
27000 
27001 static machine_mode
27002 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27003                            machine_mode mode,
27004                            int *punsignedp ATTRIBUTE_UNUSED,
27005                            const_tree fntype ATTRIBUTE_UNUSED,
27006                            int for_return ATTRIBUTE_UNUSED)
27007 {
27008   if (GET_MODE_CLASS (mode) == MODE_INT
27009       && GET_MODE_SIZE (mode) < 4)
27010     return SImode;
27011 
27012   return mode;
27013 }
27014 
27015 
27016 static bool
27017 arm_default_short_enums (void)
27018 {
27019   return ARM_DEFAULT_SHORT_ENUMS;
27020 }
27021 
27022 
27023 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
27024 
27025 static bool
27026 arm_align_anon_bitfield (void)
27027 {
27028   return TARGET_AAPCS_BASED;
27029 }
27030 
27031 
27032 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
27033 
27034 static tree
27035 arm_cxx_guard_type (void)
27036 {
27037   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27038 }
27039 
27040 
27041 /* The EABI says test the least significant bit of a guard variable.  */
27042 
27043 static bool
27044 arm_cxx_guard_mask_bit (void)
27045 {
27046   return TARGET_AAPCS_BASED;
27047 }
27048 
27049 
27050 /* The EABI specifies that all array cookies are 8 bytes long.  */
27051 
27052 static tree
27053 arm_get_cookie_size (tree type)
27054 {
27055   tree size;
27056 
27057   if (!TARGET_AAPCS_BASED)
27058     return default_cxx_get_cookie_size (type);
27059 
27060   size = build_int_cst (sizetype, 8);
27061   return size;
27062 }
27063 
27064 
27065 /* The EABI says that array cookies should also contain the element size.  */
27066 
27067 static bool
27068 arm_cookie_has_size (void)
27069 {
27070   return TARGET_AAPCS_BASED;
27071 }
27072 
27073 
27074 /* The EABI says constructors and destructors should return a pointer to
27075    the object constructed/destroyed.  */
27076 
27077 static bool
27078 arm_cxx_cdtor_returns_this (void)
27079 {
27080   return TARGET_AAPCS_BASED;
27081 }
27082 
27083 /* The EABI says that an inline function may never be the key
27084    method.  */
27085 
27086 static bool
27087 arm_cxx_key_method_may_be_inline (void)
27088 {
27089   return !TARGET_AAPCS_BASED;
27090 }
27091 
27092 static void
27093 arm_cxx_determine_class_data_visibility (tree decl)
27094 {
27095   if (!TARGET_AAPCS_BASED
27096       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27097     return;
27098 
27099   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27100      is exported.  However, on systems without dynamic vague linkage,
27101      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
27102   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27103     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27104   else
27105     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27106   DECL_VISIBILITY_SPECIFIED (decl) = 1;
27107 }
27108 
27109 static bool
27110 arm_cxx_class_data_always_comdat (void)
27111 {
27112   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27113      vague linkage if the class has no key function.  */
27114   return !TARGET_AAPCS_BASED;
27115 }
27116 
27117 
27118 /* The EABI says __aeabi_atexit should be used to register static
27119    destructors.  */
27120 
27121 static bool
27122 arm_cxx_use_aeabi_atexit (void)
27123 {
27124   return TARGET_AAPCS_BASED;
27125 }
27126 
27127 
27128 void
27129 arm_set_return_address (rtx source, rtx scratch)
27130 {
27131   arm_stack_offsets *offsets;
27132   HOST_WIDE_INT delta;
27133   rtx addr, mem;
27134   unsigned long saved_regs;
27135 
27136   offsets = arm_get_frame_offsets ();
27137   saved_regs = offsets->saved_regs_mask;
27138 
27139   if ((saved_regs & (1 << LR_REGNUM)) == 0)
27140     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27141   else
27142     {
27143       if (frame_pointer_needed)
27144 	addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27145       else
27146 	{
27147 	  /* LR will be the first saved register.  */
27148 	  delta = offsets->outgoing_args - (offsets->frame + 4);
27149 
27150 
27151 	  if (delta >= 4096)
27152 	    {
27153 	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27154 				     GEN_INT (delta & ~4095)));
27155 	      addr = scratch;
27156 	      delta &= 4095;
27157 	    }
27158 	  else
27159 	    addr = stack_pointer_rtx;
27160 
27161 	  addr = plus_constant (Pmode, addr, delta);
27162 	}
27163 
27164       /* The store needs to be marked to prevent DSE from deleting
27165 	 it as dead if it is based on fp.  */
27166       mem = gen_frame_mem (Pmode, addr);
27167       MEM_VOLATILE_P (mem) = true;
27168       emit_move_insn (mem, source);
27169     }
27170 }
27171 
27172 
27173 void
27174 thumb_set_return_address (rtx source, rtx scratch)
27175 {
27176   arm_stack_offsets *offsets;
27177   HOST_WIDE_INT delta;
27178   HOST_WIDE_INT limit;
27179   int reg;
27180   rtx addr, mem;
27181   unsigned long mask;
27182 
27183   emit_use (source);
27184 
27185   offsets = arm_get_frame_offsets ();
27186   mask = offsets->saved_regs_mask;
27187   if (mask & (1 << LR_REGNUM))
27188     {
27189       limit = 1024;
27190       /* Find the saved regs.  */
27191       if (frame_pointer_needed)
27192 	{
27193 	  delta = offsets->soft_frame - offsets->saved_args;
27194 	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27195 	  if (TARGET_THUMB1)
27196 	    limit = 128;
27197 	}
27198       else
27199 	{
27200 	  delta = offsets->outgoing_args - offsets->saved_args;
27201 	  reg = SP_REGNUM;
27202 	}
27203       /* Allow for the stack frame.  */
27204       if (TARGET_THUMB1 && TARGET_BACKTRACE)
27205 	delta -= 16;
27206       /* The link register is always the first saved register.  */
27207       delta -= 4;
27208 
27209       /* Construct the address.  */
27210       addr = gen_rtx_REG (SImode, reg);
27211       if (delta > limit)
27212 	{
27213 	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27214 	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27215 	  addr = scratch;
27216 	}
27217       else
27218 	addr = plus_constant (Pmode, addr, delta);
27219 
27220       /* The store needs to be marked to prevent DSE from deleting
27221 	 it as dead if it is based on fp.  */
27222       mem = gen_frame_mem (Pmode, addr);
27223       MEM_VOLATILE_P (mem) = true;
27224       emit_move_insn (mem, source);
27225     }
27226   else
27227     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27228 }
27229 
27230 /* Implements target hook vector_mode_supported_p.  */
27231 bool
27232 arm_vector_mode_supported_p (machine_mode mode)
27233 {
27234   /* Neon also supports V2SImode, etc. listed in the clause below.  */
27235   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27236       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27237       || mode == V2DImode || mode == V8HFmode))
27238     return true;
27239 
27240   if ((TARGET_NEON || TARGET_IWMMXT)
27241       && ((mode == V2SImode)
27242 	  || (mode == V4HImode)
27243 	  || (mode == V8QImode)))
27244     return true;
27245 
27246   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27247       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27248       || mode == V2HAmode))
27249     return true;
27250 
27251   return false;
27252 }
27253 
27254 /* Implements target hook array_mode_supported_p.  */
27255 
27256 static bool
27257 arm_array_mode_supported_p (machine_mode mode,
27258 			    unsigned HOST_WIDE_INT nelems)
27259 {
27260   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27261      for now, as the lane-swapping logic needs to be extended in the expanders.
27262      See PR target/82518.  */
27263   if (TARGET_NEON && !BYTES_BIG_ENDIAN
27264       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27265       && (nelems >= 2 && nelems <= 4))
27266     return true;
27267 
27268   return false;
27269 }
27270 
27271 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27272    registers when autovectorizing for Neon, at least until multiple vector
27273    widths are supported properly by the middle-end.  */
27274 
27275 static machine_mode
27276 arm_preferred_simd_mode (scalar_mode mode)
27277 {
27278   if (TARGET_NEON)
27279     switch (mode)
27280       {
27281       case E_SFmode:
27282 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27283       case E_SImode:
27284 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27285       case E_HImode:
27286 	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27287       case E_QImode:
27288 	return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27289       case E_DImode:
27290 	if (!TARGET_NEON_VECTORIZE_DOUBLE)
27291 	  return V2DImode;
27292 	break;
27293 
27294       default:;
27295       }
27296 
27297   if (TARGET_REALLY_IWMMXT)
27298     switch (mode)
27299       {
27300       case E_SImode:
27301 	return V2SImode;
27302       case E_HImode:
27303 	return V4HImode;
27304       case E_QImode:
27305 	return V8QImode;
27306 
27307       default:;
27308       }
27309 
27310   return word_mode;
27311 }
27312 
27313 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27314 
27315    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27316    using r0-r4 for function arguments, r7 for the stack frame and don't have
27317    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27318    potentially problematic instructions accept high registers so this is not
27319    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27320    that require many low registers.  */
27321 static bool
27322 arm_class_likely_spilled_p (reg_class_t rclass)
27323 {
27324   if ((TARGET_THUMB1 && rclass == LO_REGS)
27325       || rclass  == CC_REG)
27326     return true;
27327 
27328   return false;
27329 }
27330 
27331 /* Implements target hook small_register_classes_for_mode_p.  */
27332 bool
27333 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27334 {
27335   return TARGET_THUMB1;
27336 }
27337 
27338 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27339    ARM insns and therefore guarantee that the shift count is modulo 256.
27340    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27341    guarantee no particular behavior for out-of-range counts.  */
27342 
27343 static unsigned HOST_WIDE_INT
27344 arm_shift_truncation_mask (machine_mode mode)
27345 {
27346   return mode == SImode ? 255 : 0;
27347 }
27348 
27349 
27350 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27351 
27352 unsigned int
27353 arm_dbx_register_number (unsigned int regno)
27354 {
27355   if (regno < 16)
27356     return regno;
27357 
27358   if (IS_VFP_REGNUM (regno))
27359     {
27360       /* See comment in arm_dwarf_register_span.  */
27361       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27362 	return 64 + regno - FIRST_VFP_REGNUM;
27363       else
27364 	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27365     }
27366 
27367   if (IS_IWMMXT_GR_REGNUM (regno))
27368     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27369 
27370   if (IS_IWMMXT_REGNUM (regno))
27371     return 112 + regno - FIRST_IWMMXT_REGNUM;
27372 
27373   return DWARF_FRAME_REGISTERS;
27374 }
27375 
27376 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27377    GCC models tham as 64 32-bit registers, so we need to describe this to
27378    the DWARF generation code.  Other registers can use the default.  */
27379 static rtx
27380 arm_dwarf_register_span (rtx rtl)
27381 {
27382   machine_mode mode;
27383   unsigned regno;
27384   rtx parts[16];
27385   int nregs;
27386   int i;
27387 
27388   regno = REGNO (rtl);
27389   if (!IS_VFP_REGNUM (regno))
27390     return NULL_RTX;
27391 
27392   /* XXX FIXME: The EABI defines two VFP register ranges:
27393 	64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27394 	256-287: D0-D31
27395      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27396      corresponding D register.  Until GDB supports this, we shall use the
27397      legacy encodings.  We also use these encodings for D0-D15 for
27398      compatibility with older debuggers.  */
27399   mode = GET_MODE (rtl);
27400   if (GET_MODE_SIZE (mode) < 8)
27401     return NULL_RTX;
27402 
27403   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27404     {
27405       nregs = GET_MODE_SIZE (mode) / 4;
27406       for (i = 0; i < nregs; i += 2)
27407 	if (TARGET_BIG_END)
27408 	  {
27409 	    parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27410 	    parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27411 	  }
27412 	else
27413 	  {
27414 	    parts[i] = gen_rtx_REG (SImode, regno + i);
27415 	    parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27416 	  }
27417     }
27418   else
27419     {
27420       nregs = GET_MODE_SIZE (mode) / 8;
27421       for (i = 0; i < nregs; i++)
27422 	parts[i] = gen_rtx_REG (DImode, regno + i);
27423     }
27424 
27425   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27426 }
27427 
27428 #if ARM_UNWIND_INFO
27429 /* Emit unwind directives for a store-multiple instruction or stack pointer
27430    push during alignment.
27431    These should only ever be generated by the function prologue code, so
27432    expect them to have a particular form.
27433    The store-multiple instruction sometimes pushes pc as the last register,
27434    although it should not be tracked into unwind information, or for -Os
27435    sometimes pushes some dummy registers before first register that needs
27436    to be tracked in unwind information; such dummy registers are there just
27437    to avoid separate stack adjustment, and will not be restored in the
27438    epilogue.  */
27439 
27440 static void
27441 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27442 {
27443   int i;
27444   HOST_WIDE_INT offset;
27445   HOST_WIDE_INT nregs;
27446   int reg_size;
27447   unsigned reg;
27448   unsigned lastreg;
27449   unsigned padfirst = 0, padlast = 0;
27450   rtx e;
27451 
27452   e = XVECEXP (p, 0, 0);
27453   gcc_assert (GET_CODE (e) == SET);
27454 
27455   /* First insn will adjust the stack pointer.  */
27456   gcc_assert (GET_CODE (e) == SET
27457 	      && REG_P (SET_DEST (e))
27458 	      && REGNO (SET_DEST (e)) == SP_REGNUM
27459 	      && GET_CODE (SET_SRC (e)) == PLUS);
27460 
27461   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27462   nregs = XVECLEN (p, 0) - 1;
27463   gcc_assert (nregs);
27464 
27465   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27466   if (reg < 16)
27467     {
27468       /* For -Os dummy registers can be pushed at the beginning to
27469 	 avoid separate stack pointer adjustment.  */
27470       e = XVECEXP (p, 0, 1);
27471       e = XEXP (SET_DEST (e), 0);
27472       if (GET_CODE (e) == PLUS)
27473 	padfirst = INTVAL (XEXP (e, 1));
27474       gcc_assert (padfirst == 0 || optimize_size);
27475       /* The function prologue may also push pc, but not annotate it as it is
27476 	 never restored.  We turn this into a stack pointer adjustment.  */
27477       e = XVECEXP (p, 0, nregs);
27478       e = XEXP (SET_DEST (e), 0);
27479       if (GET_CODE (e) == PLUS)
27480 	padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27481       else
27482 	padlast = offset - 4;
27483       gcc_assert (padlast == 0 || padlast == 4);
27484       if (padlast == 4)
27485 	fprintf (asm_out_file, "\t.pad #4\n");
27486       reg_size = 4;
27487       fprintf (asm_out_file, "\t.save {");
27488     }
27489   else if (IS_VFP_REGNUM (reg))
27490     {
27491       reg_size = 8;
27492       fprintf (asm_out_file, "\t.vsave {");
27493     }
27494   else
27495     /* Unknown register type.  */
27496     gcc_unreachable ();
27497 
27498   /* If the stack increment doesn't match the size of the saved registers,
27499      something has gone horribly wrong.  */
27500   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27501 
27502   offset = padfirst;
27503   lastreg = 0;
27504   /* The remaining insns will describe the stores.  */
27505   for (i = 1; i <= nregs; i++)
27506     {
27507       /* Expect (set (mem <addr>) (reg)).
27508          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27509       e = XVECEXP (p, 0, i);
27510       gcc_assert (GET_CODE (e) == SET
27511 		  && MEM_P (SET_DEST (e))
27512 		  && REG_P (SET_SRC (e)));
27513 
27514       reg = REGNO (SET_SRC (e));
27515       gcc_assert (reg >= lastreg);
27516 
27517       if (i != 1)
27518 	fprintf (asm_out_file, ", ");
27519       /* We can't use %r for vfp because we need to use the
27520 	 double precision register names.  */
27521       if (IS_VFP_REGNUM (reg))
27522 	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27523       else
27524 	asm_fprintf (asm_out_file, "%r", reg);
27525 
27526       if (flag_checking)
27527 	{
27528 	  /* Check that the addresses are consecutive.  */
27529 	  e = XEXP (SET_DEST (e), 0);
27530 	  if (GET_CODE (e) == PLUS)
27531 	    gcc_assert (REG_P (XEXP (e, 0))
27532 			&& REGNO (XEXP (e, 0)) == SP_REGNUM
27533 			&& CONST_INT_P (XEXP (e, 1))
27534 			&& offset == INTVAL (XEXP (e, 1)));
27535 	  else
27536 	    gcc_assert (i == 1
27537 			&& REG_P (e)
27538 			&& REGNO (e) == SP_REGNUM);
27539 	  offset += reg_size;
27540 	}
27541     }
27542   fprintf (asm_out_file, "}\n");
27543   if (padfirst)
27544     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27545 }
27546 
27547 /*  Emit unwind directives for a SET.  */
27548 
27549 static void
27550 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27551 {
27552   rtx e0;
27553   rtx e1;
27554   unsigned reg;
27555 
27556   e0 = XEXP (p, 0);
27557   e1 = XEXP (p, 1);
27558   switch (GET_CODE (e0))
27559     {
27560     case MEM:
27561       /* Pushing a single register.  */
27562       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27563 	  || !REG_P (XEXP (XEXP (e0, 0), 0))
27564 	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27565 	abort ();
27566 
27567       asm_fprintf (asm_out_file, "\t.save ");
27568       if (IS_VFP_REGNUM (REGNO (e1)))
27569 	asm_fprintf(asm_out_file, "{d%d}\n",
27570 		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27571       else
27572 	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27573       break;
27574 
27575     case REG:
27576       if (REGNO (e0) == SP_REGNUM)
27577 	{
27578 	  /* A stack increment.  */
27579 	  if (GET_CODE (e1) != PLUS
27580 	      || !REG_P (XEXP (e1, 0))
27581 	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
27582 	      || !CONST_INT_P (XEXP (e1, 1)))
27583 	    abort ();
27584 
27585 	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27586 		       -INTVAL (XEXP (e1, 1)));
27587 	}
27588       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27589 	{
27590 	  HOST_WIDE_INT offset;
27591 
27592 	  if (GET_CODE (e1) == PLUS)
27593 	    {
27594 	      if (!REG_P (XEXP (e1, 0))
27595 		  || !CONST_INT_P (XEXP (e1, 1)))
27596 		abort ();
27597 	      reg = REGNO (XEXP (e1, 0));
27598 	      offset = INTVAL (XEXP (e1, 1));
27599 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27600 			   HARD_FRAME_POINTER_REGNUM, reg,
27601 			   offset);
27602 	    }
27603 	  else if (REG_P (e1))
27604 	    {
27605 	      reg = REGNO (e1);
27606 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27607 			   HARD_FRAME_POINTER_REGNUM, reg);
27608 	    }
27609 	  else
27610 	    abort ();
27611 	}
27612       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27613 	{
27614 	  /* Move from sp to reg.  */
27615 	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27616 	}
27617      else if (GET_CODE (e1) == PLUS
27618 	      && REG_P (XEXP (e1, 0))
27619 	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
27620 	      && CONST_INT_P (XEXP (e1, 1)))
27621 	{
27622 	  /* Set reg to offset from sp.  */
27623 	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27624 		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27625 	}
27626       else
27627 	abort ();
27628       break;
27629 
27630     default:
27631       abort ();
27632     }
27633 }
27634 
27635 
27636 /* Emit unwind directives for the given insn.  */
27637 
27638 static void
27639 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27640 {
27641   rtx note, pat;
27642   bool handled_one = false;
27643 
27644   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27645     return;
27646 
27647   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27648       && (TREE_NOTHROW (current_function_decl)
27649 	  || crtl->all_throwers_are_sibcalls))
27650     return;
27651 
27652   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27653     return;
27654 
27655   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27656     {
27657       switch (REG_NOTE_KIND (note))
27658 	{
27659 	case REG_FRAME_RELATED_EXPR:
27660 	  pat = XEXP (note, 0);
27661 	  goto found;
27662 
27663 	case REG_CFA_REGISTER:
27664 	  pat = XEXP (note, 0);
27665 	  if (pat == NULL)
27666 	    {
27667 	      pat = PATTERN (insn);
27668 	      if (GET_CODE (pat) == PARALLEL)
27669 		pat = XVECEXP (pat, 0, 0);
27670 	    }
27671 
27672 	  /* Only emitted for IS_STACKALIGN re-alignment.  */
27673 	  {
27674 	    rtx dest, src;
27675 	    unsigned reg;
27676 
27677 	    src = SET_SRC (pat);
27678 	    dest = SET_DEST (pat);
27679 
27680 	    gcc_assert (src == stack_pointer_rtx);
27681 	    reg = REGNO (dest);
27682 	    asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27683 			 reg + 0x90, reg);
27684 	  }
27685 	  handled_one = true;
27686 	  break;
27687 
27688 	/* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27689 	   to get correct dwarf information for shrink-wrap.  We should not
27690 	   emit unwind information for it because these are used either for
27691 	   pretend arguments or notes to adjust sp and restore registers from
27692 	   stack.  */
27693 	case REG_CFA_DEF_CFA:
27694 	case REG_CFA_ADJUST_CFA:
27695 	case REG_CFA_RESTORE:
27696 	  return;
27697 
27698 	case REG_CFA_EXPRESSION:
27699 	case REG_CFA_OFFSET:
27700 	  /* ??? Only handling here what we actually emit.  */
27701 	  gcc_unreachable ();
27702 
27703 	default:
27704 	  break;
27705 	}
27706     }
27707   if (handled_one)
27708     return;
27709   pat = PATTERN (insn);
27710  found:
27711 
27712   switch (GET_CODE (pat))
27713     {
27714     case SET:
27715       arm_unwind_emit_set (asm_out_file, pat);
27716       break;
27717 
27718     case SEQUENCE:
27719       /* Store multiple.  */
27720       arm_unwind_emit_sequence (asm_out_file, pat);
27721       break;
27722 
27723     default:
27724       abort();
27725     }
27726 }
27727 
27728 
27729 /* Output a reference from a function exception table to the type_info
27730    object X.  The EABI specifies that the symbol should be relocated by
27731    an R_ARM_TARGET2 relocation.  */
27732 
27733 static bool
27734 arm_output_ttype (rtx x)
27735 {
27736   fputs ("\t.word\t", asm_out_file);
27737   output_addr_const (asm_out_file, x);
27738   /* Use special relocations for symbol references.  */
27739   if (!CONST_INT_P (x))
27740     fputs ("(TARGET2)", asm_out_file);
27741   fputc ('\n', asm_out_file);
27742 
27743   return TRUE;
27744 }
27745 
27746 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27747 
27748 static void
27749 arm_asm_emit_except_personality (rtx personality)
27750 {
27751   fputs ("\t.personality\t", asm_out_file);
27752   output_addr_const (asm_out_file, personality);
27753   fputc ('\n', asm_out_file);
27754 }
27755 #endif /* ARM_UNWIND_INFO */
27756 
27757 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27758 
27759 static void
27760 arm_asm_init_sections (void)
27761 {
27762 #if ARM_UNWIND_INFO
27763   exception_section = get_unnamed_section (0, output_section_asm_op,
27764 					   "\t.handlerdata");
27765 #endif /* ARM_UNWIND_INFO */
27766 
27767 #ifdef OBJECT_FORMAT_ELF
27768   if (target_pure_code)
27769     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27770 #endif
27771 }
27772 
27773 /* Output unwind directives for the start/end of a function.  */
27774 
27775 void
27776 arm_output_fn_unwind (FILE * f, bool prologue)
27777 {
27778   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27779     return;
27780 
27781   if (prologue)
27782     fputs ("\t.fnstart\n", f);
27783   else
27784     {
27785       /* If this function will never be unwound, then mark it as such.
27786          The came condition is used in arm_unwind_emit to suppress
27787 	 the frame annotations.  */
27788       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27789 	  && (TREE_NOTHROW (current_function_decl)
27790 	      || crtl->all_throwers_are_sibcalls))
27791 	fputs("\t.cantunwind\n", f);
27792 
27793       fputs ("\t.fnend\n", f);
27794     }
27795 }
27796 
27797 static bool
27798 arm_emit_tls_decoration (FILE *fp, rtx x)
27799 {
27800   enum tls_reloc reloc;
27801   rtx val;
27802 
27803   val = XVECEXP (x, 0, 0);
27804   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27805 
27806   output_addr_const (fp, val);
27807 
27808   switch (reloc)
27809     {
27810     case TLS_GD32:
27811       fputs ("(tlsgd)", fp);
27812       break;
27813     case TLS_LDM32:
27814       fputs ("(tlsldm)", fp);
27815       break;
27816     case TLS_LDO32:
27817       fputs ("(tlsldo)", fp);
27818       break;
27819     case TLS_IE32:
27820       fputs ("(gottpoff)", fp);
27821       break;
27822     case TLS_LE32:
27823       fputs ("(tpoff)", fp);
27824       break;
27825     case TLS_DESCSEQ:
27826       fputs ("(tlsdesc)", fp);
27827       break;
27828     default:
27829       gcc_unreachable ();
27830     }
27831 
27832   switch (reloc)
27833     {
27834     case TLS_GD32:
27835     case TLS_LDM32:
27836     case TLS_IE32:
27837     case TLS_DESCSEQ:
27838       fputs (" + (. - ", fp);
27839       output_addr_const (fp, XVECEXP (x, 0, 2));
27840       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27841       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27842       output_addr_const (fp, XVECEXP (x, 0, 3));
27843       fputc (')', fp);
27844       break;
27845     default:
27846       break;
27847     }
27848 
27849   return TRUE;
27850 }
27851 
27852 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27853 
27854 static void
27855 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27856 {
27857   gcc_assert (size == 4);
27858   fputs ("\t.word\t", file);
27859   output_addr_const (file, x);
27860   fputs ("(tlsldo)", file);
27861 }
27862 
27863 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27864 
27865 static bool
27866 arm_output_addr_const_extra (FILE *fp, rtx x)
27867 {
27868   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27869     return arm_emit_tls_decoration (fp, x);
27870   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27871     {
27872       char label[256];
27873       int labelno = INTVAL (XVECEXP (x, 0, 0));
27874 
27875       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27876       assemble_name_raw (fp, label);
27877 
27878       return TRUE;
27879     }
27880   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27881     {
27882       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27883       if (GOT_PCREL)
27884 	fputs ("+.", fp);
27885       fputs ("-(", fp);
27886       output_addr_const (fp, XVECEXP (x, 0, 0));
27887       fputc (')', fp);
27888       return TRUE;
27889     }
27890   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27891     {
27892       output_addr_const (fp, XVECEXP (x, 0, 0));
27893       if (GOT_PCREL)
27894         fputs ("+.", fp);
27895       fputs ("-(", fp);
27896       output_addr_const (fp, XVECEXP (x, 0, 1));
27897       fputc (')', fp);
27898       return TRUE;
27899     }
27900   else if (GET_CODE (x) == CONST_VECTOR)
27901     return arm_emit_vector_const (fp, x);
27902 
27903   return FALSE;
27904 }
27905 
27906 /* Output assembly for a shift instruction.
27907    SET_FLAGS determines how the instruction modifies the condition codes.
27908    0 - Do not set condition codes.
27909    1 - Set condition codes.
27910    2 - Use smallest instruction.  */
27911 const char *
27912 arm_output_shift(rtx * operands, int set_flags)
27913 {
27914   char pattern[100];
27915   static const char flag_chars[3] = {'?', '.', '!'};
27916   const char *shift;
27917   HOST_WIDE_INT val;
27918   char c;
27919 
27920   c = flag_chars[set_flags];
27921   shift = shift_op(operands[3], &val);
27922   if (shift)
27923     {
27924       if (val != -1)
27925 	operands[2] = GEN_INT(val);
27926       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27927     }
27928   else
27929     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27930 
27931   output_asm_insn (pattern, operands);
27932   return "";
27933 }
27934 
27935 /* Output assembly for a WMMX immediate shift instruction.  */
27936 const char *
27937 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27938 {
27939   int shift = INTVAL (operands[2]);
27940   char templ[50];
27941   machine_mode opmode = GET_MODE (operands[0]);
27942 
27943   gcc_assert (shift >= 0);
27944 
27945   /* If the shift value in the register versions is > 63 (for D qualifier),
27946      31 (for W qualifier) or 15 (for H qualifier).  */
27947   if (((opmode == V4HImode) && (shift > 15))
27948 	|| ((opmode == V2SImode) && (shift > 31))
27949 	|| ((opmode == DImode) && (shift > 63)))
27950   {
27951     if (wror_or_wsra)
27952       {
27953         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27954         output_asm_insn (templ, operands);
27955         if (opmode == DImode)
27956           {
27957 	    sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27958 	    output_asm_insn (templ, operands);
27959           }
27960       }
27961     else
27962       {
27963         /* The destination register will contain all zeros.  */
27964         sprintf (templ, "wzero\t%%0");
27965         output_asm_insn (templ, operands);
27966       }
27967     return "";
27968   }
27969 
27970   if ((opmode == DImode) && (shift > 32))
27971     {
27972       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27973       output_asm_insn (templ, operands);
27974       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27975       output_asm_insn (templ, operands);
27976     }
27977   else
27978     {
27979       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27980       output_asm_insn (templ, operands);
27981     }
27982   return "";
27983 }
27984 
27985 /* Output assembly for a WMMX tinsr instruction.  */
27986 const char *
27987 arm_output_iwmmxt_tinsr (rtx *operands)
27988 {
27989   int mask = INTVAL (operands[3]);
27990   int i;
27991   char templ[50];
27992   int units = mode_nunits[GET_MODE (operands[0])];
27993   gcc_assert ((mask & (mask - 1)) == 0);
27994   for (i = 0; i < units; ++i)
27995     {
27996       if ((mask & 0x01) == 1)
27997         {
27998           break;
27999         }
28000       mask >>= 1;
28001     }
28002   gcc_assert (i < units);
28003   {
28004     switch (GET_MODE (operands[0]))
28005       {
28006       case E_V8QImode:
28007 	sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28008 	break;
28009       case E_V4HImode:
28010 	sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28011 	break;
28012       case E_V2SImode:
28013 	sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28014 	break;
28015       default:
28016 	gcc_unreachable ();
28017 	break;
28018       }
28019     output_asm_insn (templ, operands);
28020   }
28021   return "";
28022 }
28023 
28024 /* Output a Thumb-1 casesi dispatch sequence.  */
28025 const char *
28026 thumb1_output_casesi (rtx *operands)
28027 {
28028   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
28029 
28030   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28031 
28032   switch (GET_MODE(diff_vec))
28033     {
28034     case E_QImode:
28035       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28036 	      "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28037     case E_HImode:
28038       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28039 	      "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28040     case E_SImode:
28041       return "bl\t%___gnu_thumb1_case_si";
28042     default:
28043       gcc_unreachable ();
28044     }
28045 }
28046 
28047 /* Output a Thumb-2 casesi instruction.  */
28048 const char *
28049 thumb2_output_casesi (rtx *operands)
28050 {
28051   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
28052 
28053   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28054 
28055   output_asm_insn ("cmp\t%0, %1", operands);
28056   output_asm_insn ("bhi\t%l3", operands);
28057   switch (GET_MODE(diff_vec))
28058     {
28059     case E_QImode:
28060       return "tbb\t[%|pc, %0]";
28061     case E_HImode:
28062       return "tbh\t[%|pc, %0, lsl #1]";
28063     case E_SImode:
28064       if (flag_pic)
28065 	{
28066 	  output_asm_insn ("adr\t%4, %l2", operands);
28067 	  output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28068 	  output_asm_insn ("add\t%4, %4, %5", operands);
28069 	  return "bx\t%4";
28070 	}
28071       else
28072 	{
28073 	  output_asm_insn ("adr\t%4, %l2", operands);
28074 	  return "ldr\t%|pc, [%4, %0, lsl #2]";
28075 	}
28076     default:
28077       gcc_unreachable ();
28078     }
28079 }
28080 
28081 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
28082    per-core tuning structs.  */
28083 static int
28084 arm_issue_rate (void)
28085 {
28086   return current_tune->issue_rate;
28087 }
28088 
28089 /* Return how many instructions should scheduler lookahead to choose the
28090    best one.  */
28091 static int
28092 arm_first_cycle_multipass_dfa_lookahead (void)
28093 {
28094   int issue_rate = arm_issue_rate ();
28095 
28096   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28097 }
28098 
28099 /* Enable modeling of L2 auto-prefetcher.  */
28100 static int
28101 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28102 {
28103   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28104 }
28105 
28106 const char *
28107 arm_mangle_type (const_tree type)
28108 {
28109   /* The ARM ABI documents (10th October 2008) say that "__va_list"
28110      has to be managled as if it is in the "std" namespace.  */
28111   if (TARGET_AAPCS_BASED
28112       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28113     return "St9__va_list";
28114 
28115   /* Half-precision float.  */
28116   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28117     return "Dh";
28118 
28119   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28120      builtin type.  */
28121   if (TYPE_NAME (type) != NULL)
28122     return arm_mangle_builtin_type (type);
28123 
28124   /* Use the default mangling.  */
28125   return NULL;
28126 }
28127 
28128 /* Order of allocation of core registers for Thumb: this allocation is
28129    written over the corresponding initial entries of the array
28130    initialized with REG_ALLOC_ORDER.  We allocate all low registers
28131    first.  Saving and restoring a low register is usually cheaper than
28132    using a call-clobbered high register.  */
28133 
28134 static const int thumb_core_reg_alloc_order[] =
28135 {
28136    3,  2,  1,  0,  4,  5,  6,  7,
28137   12, 14,  8,  9, 10, 11
28138 };
28139 
28140 /* Adjust register allocation order when compiling for Thumb.  */
28141 
28142 void
28143 arm_order_regs_for_local_alloc (void)
28144 {
28145   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28146   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28147   if (TARGET_THUMB)
28148     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28149             sizeof (thumb_core_reg_alloc_order));
28150 }
28151 
28152 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
28153 
28154 bool
28155 arm_frame_pointer_required (void)
28156 {
28157   if (SUBTARGET_FRAME_POINTER_REQUIRED)
28158     return true;
28159 
28160   /* If the function receives nonlocal gotos, it needs to save the frame
28161      pointer in the nonlocal_goto_save_area object.  */
28162   if (cfun->has_nonlocal_label)
28163     return true;
28164 
28165   /* The frame pointer is required for non-leaf APCS frames.  */
28166   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28167     return true;
28168 
28169   /* If we are probing the stack in the prologue, we will have a faulting
28170      instruction prior to the stack adjustment and this requires a frame
28171      pointer if we want to catch the exception using the EABI unwinder.  */
28172   if (!IS_INTERRUPT (arm_current_func_type ())
28173       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28174 	  || flag_stack_clash_protection)
28175       && arm_except_unwind_info (&global_options) == UI_TARGET
28176       && cfun->can_throw_non_call_exceptions)
28177     {
28178       HOST_WIDE_INT size = get_frame_size ();
28179 
28180       /* That's irrelevant if there is no stack adjustment.  */
28181       if (size <= 0)
28182 	return false;
28183 
28184       /* That's relevant only if there is a stack probe.  */
28185       if (crtl->is_leaf && !cfun->calls_alloca)
28186 	{
28187 	  /* We don't have the final size of the frame so adjust.  */
28188 	  size += 32 * UNITS_PER_WORD;
28189 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28190 	    return true;
28191 	}
28192       else
28193 	return true;
28194     }
28195 
28196   return false;
28197 }
28198 
28199 /* Only thumb1 can't support conditional execution, so return true if
28200    the target is not thumb1.  */
28201 static bool
28202 arm_have_conditional_execution (void)
28203 {
28204   return !TARGET_THUMB1;
28205 }
28206 
28207 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
28208 static HOST_WIDE_INT
28209 arm_vector_alignment (const_tree type)
28210 {
28211   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28212 
28213   if (TARGET_AAPCS_BASED)
28214     align = MIN (align, 64);
28215 
28216   return align;
28217 }
28218 
28219 static void
28220 arm_autovectorize_vector_sizes (vector_sizes *sizes)
28221 {
28222   if (!TARGET_NEON_VECTORIZE_DOUBLE)
28223     {
28224       sizes->safe_push (16);
28225       sizes->safe_push (8);
28226     }
28227 }
28228 
28229 static bool
28230 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28231 {
28232   /* Vectors which aren't in packed structures will not be less aligned than
28233      the natural alignment of their element type, so this is safe.  */
28234   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28235     return !is_packed;
28236 
28237   return default_builtin_vector_alignment_reachable (type, is_packed);
28238 }
28239 
28240 static bool
28241 arm_builtin_support_vector_misalignment (machine_mode mode,
28242 					 const_tree type, int misalignment,
28243 					 bool is_packed)
28244 {
28245   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28246     {
28247       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28248 
28249       if (is_packed)
28250         return align == 1;
28251 
28252       /* If the misalignment is unknown, we should be able to handle the access
28253 	 so long as it is not to a member of a packed data structure.  */
28254       if (misalignment == -1)
28255         return true;
28256 
28257       /* Return true if the misalignment is a multiple of the natural alignment
28258          of the vector's element type.  This is probably always going to be
28259 	 true in practice, since we've already established that this isn't a
28260 	 packed access.  */
28261       return ((misalignment % align) == 0);
28262     }
28263 
28264   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28265 						      is_packed);
28266 }
28267 
28268 static void
28269 arm_conditional_register_usage (void)
28270 {
28271   int regno;
28272 
28273   if (TARGET_THUMB1 && optimize_size)
28274     {
28275       /* When optimizing for size on Thumb-1, it's better not
28276         to use the HI regs, because of the overhead of
28277         stacking them.  */
28278       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28279 	fixed_regs[regno] = call_used_regs[regno] = 1;
28280     }
28281 
28282   /* The link register can be clobbered by any branch insn,
28283      but we have no way to track that at present, so mark
28284      it as unavailable.  */
28285   if (TARGET_THUMB1)
28286     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28287 
28288   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28289     {
28290       /* VFPv3 registers are disabled when earlier VFP
28291 	 versions are selected due to the definition of
28292 	 LAST_VFP_REGNUM.  */
28293       for (regno = FIRST_VFP_REGNUM;
28294 	   regno <= LAST_VFP_REGNUM; ++ regno)
28295 	{
28296 	  fixed_regs[regno] = 0;
28297 	  call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28298 	    || regno >= FIRST_VFP_REGNUM + 32;
28299 	}
28300     }
28301 
28302   if (TARGET_REALLY_IWMMXT)
28303     {
28304       regno = FIRST_IWMMXT_GR_REGNUM;
28305       /* The 2002/10/09 revision of the XScale ABI has wCG0
28306          and wCG1 as call-preserved registers.  The 2002/11/21
28307          revision changed this so that all wCG registers are
28308          scratch registers.  */
28309       for (regno = FIRST_IWMMXT_GR_REGNUM;
28310 	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28311 	fixed_regs[regno] = 0;
28312       /* The XScale ABI has wR0 - wR9 as scratch registers,
28313 	 the rest as call-preserved registers.  */
28314       for (regno = FIRST_IWMMXT_REGNUM;
28315 	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
28316 	{
28317 	  fixed_regs[regno] = 0;
28318 	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28319 	}
28320     }
28321 
28322   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28323     {
28324       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28325       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28326     }
28327   else if (TARGET_APCS_STACK)
28328     {
28329       fixed_regs[10]     = 1;
28330       call_used_regs[10] = 1;
28331     }
28332   /* -mcaller-super-interworking reserves r11 for calls to
28333      _interwork_r11_call_via_rN().  Making the register global
28334      is an easy way of ensuring that it remains valid for all
28335      calls.  */
28336   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28337       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28338     {
28339       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28340       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28341       if (TARGET_CALLER_INTERWORKING)
28342 	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28343     }
28344   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28345 }
28346 
28347 static reg_class_t
28348 arm_preferred_rename_class (reg_class_t rclass)
28349 {
28350   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28351      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28352      and code size can be reduced.  */
28353   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28354     return LO_REGS;
28355   else
28356     return NO_REGS;
28357 }
28358 
28359 /* Compute the attribute "length" of insn "*push_multi".
28360    So this function MUST be kept in sync with that insn pattern.  */
28361 int
28362 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28363 {
28364   int i, regno, hi_reg;
28365   int num_saves = XVECLEN (parallel_op, 0);
28366 
28367   /* ARM mode.  */
28368   if (TARGET_ARM)
28369     return 4;
28370   /* Thumb1 mode.  */
28371   if (TARGET_THUMB1)
28372     return 2;
28373 
28374   /* Thumb2 mode.  */
28375   regno = REGNO (first_op);
28376   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28377      list is 8-bit.  Normally this means all registers in the list must be
28378      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28379      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28380      with 16-bit encoding.  */
28381   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28382   for (i = 1; i < num_saves && !hi_reg; i++)
28383     {
28384       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28385       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28386     }
28387 
28388   if (!hi_reg)
28389     return 2;
28390   return 4;
28391 }
28392 
28393 /* Compute the attribute "length" of insn.  Currently, this function is used
28394    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28395    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28396    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28397    true if OPERANDS contains insn which explicit updates base register.  */
28398 
28399 int
28400 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28401 {
28402   /* ARM mode.  */
28403   if (TARGET_ARM)
28404     return 4;
28405   /* Thumb1 mode.  */
28406   if (TARGET_THUMB1)
28407     return 2;
28408 
28409   rtx parallel_op = operands[0];
28410   /* Initialize to elements number of PARALLEL.  */
28411   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28412   /* Initialize the value to base register.  */
28413   unsigned regno = REGNO (operands[1]);
28414   /* Skip return and write back pattern.
28415      We only need register pop pattern for later analysis.  */
28416   unsigned first_indx = 0;
28417   first_indx += return_pc ? 1 : 0;
28418   first_indx += write_back_p ? 1 : 0;
28419 
28420   /* A pop operation can be done through LDM or POP.  If the base register is SP
28421      and if it's with write back, then a LDM will be alias of POP.  */
28422   bool pop_p = (regno == SP_REGNUM && write_back_p);
28423   bool ldm_p = !pop_p;
28424 
28425   /* Check base register for LDM.  */
28426   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28427     return 4;
28428 
28429   /* Check each register in the list.  */
28430   for (; indx >= first_indx; indx--)
28431     {
28432       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28433       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28434 	 comment in arm_attr_length_push_multi.  */
28435       if (REGNO_REG_CLASS (regno) == HI_REGS
28436 	  && (regno != PC_REGNUM || ldm_p))
28437 	return 4;
28438     }
28439 
28440   return 2;
28441 }
28442 
28443 /* Compute the number of instructions emitted by output_move_double.  */
28444 int
28445 arm_count_output_move_double_insns (rtx *operands)
28446 {
28447   int count;
28448   rtx ops[2];
28449   /* output_move_double may modify the operands array, so call it
28450      here on a copy of the array.  */
28451   ops[0] = operands[0];
28452   ops[1] = operands[1];
28453   output_move_double (ops, false, &count);
28454   return count;
28455 }
28456 
28457 int
28458 vfp3_const_double_for_fract_bits (rtx operand)
28459 {
28460   REAL_VALUE_TYPE r0;
28461 
28462   if (!CONST_DOUBLE_P (operand))
28463     return 0;
28464 
28465   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28466   if (exact_real_inverse (DFmode, &r0)
28467       && !REAL_VALUE_NEGATIVE (r0))
28468     {
28469       if (exact_real_truncate (DFmode, &r0))
28470 	{
28471 	  HOST_WIDE_INT value = real_to_integer (&r0);
28472 	  value = value & 0xffffffff;
28473 	  if ((value != 0) && ( (value & (value - 1)) == 0))
28474 	    {
28475 	      int ret = exact_log2 (value);
28476 	      gcc_assert (IN_RANGE (ret, 0, 31));
28477 	      return ret;
28478 	    }
28479 	}
28480     }
28481   return 0;
28482 }
28483 
28484 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28485    log2 is in [1, 32], return that log2.  Otherwise return -1.
28486    This is used in the patterns for vcvt.s32.f32 floating-point to
28487    fixed-point conversions.  */
28488 
28489 int
28490 vfp3_const_double_for_bits (rtx x)
28491 {
28492   const REAL_VALUE_TYPE *r;
28493 
28494   if (!CONST_DOUBLE_P (x))
28495     return -1;
28496 
28497   r = CONST_DOUBLE_REAL_VALUE (x);
28498 
28499   if (REAL_VALUE_NEGATIVE (*r)
28500       || REAL_VALUE_ISNAN (*r)
28501       || REAL_VALUE_ISINF (*r)
28502       || !real_isinteger (r, SFmode))
28503     return -1;
28504 
28505   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28506 
28507 /* The exact_log2 above will have returned -1 if this is
28508    not an exact log2.  */
28509   if (!IN_RANGE (hwint, 1, 32))
28510     return -1;
28511 
28512   return hwint;
28513 }
28514 
28515 
28516 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28517 
28518 static void
28519 arm_pre_atomic_barrier (enum memmodel model)
28520 {
28521   if (need_atomic_barrier_p (model, true))
28522     emit_insn (gen_memory_barrier ());
28523 }
28524 
28525 static void
28526 arm_post_atomic_barrier (enum memmodel model)
28527 {
28528   if (need_atomic_barrier_p (model, false))
28529     emit_insn (gen_memory_barrier ());
28530 }
28531 
28532 /* Emit the load-exclusive and store-exclusive instructions.
28533    Use acquire and release versions if necessary.  */
28534 
28535 static void
28536 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28537 {
28538   rtx (*gen) (rtx, rtx);
28539 
28540   if (acq)
28541     {
28542       switch (mode)
28543         {
28544         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28545         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28546         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28547         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28548         default:
28549           gcc_unreachable ();
28550         }
28551     }
28552   else
28553     {
28554       switch (mode)
28555         {
28556         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28557         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28558         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28559         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28560         default:
28561           gcc_unreachable ();
28562         }
28563     }
28564 
28565   emit_insn (gen (rval, mem));
28566 }
28567 
28568 static void
28569 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28570                           rtx mem, bool rel)
28571 {
28572   rtx (*gen) (rtx, rtx, rtx);
28573 
28574   if (rel)
28575     {
28576       switch (mode)
28577         {
28578         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28579         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28580         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28581         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28582         default:
28583           gcc_unreachable ();
28584         }
28585     }
28586   else
28587     {
28588       switch (mode)
28589         {
28590         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28591         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28592         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28593         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28594         default:
28595           gcc_unreachable ();
28596         }
28597     }
28598 
28599   emit_insn (gen (bval, rval, mem));
28600 }
28601 
28602 /* Mark the previous jump instruction as unlikely.  */
28603 
28604 static void
28605 emit_unlikely_jump (rtx insn)
28606 {
28607   rtx_insn *jump = emit_jump_insn (insn);
28608   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28609 }
28610 
28611 /* Expand a compare and swap pattern.  */
28612 
28613 void
28614 arm_expand_compare_and_swap (rtx operands[])
28615 {
28616   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28617   machine_mode mode;
28618   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28619 
28620   bval = operands[0];
28621   rval = operands[1];
28622   mem = operands[2];
28623   oldval = operands[3];
28624   newval = operands[4];
28625   is_weak = operands[5];
28626   mod_s = operands[6];
28627   mod_f = operands[7];
28628   mode = GET_MODE (mem);
28629 
28630   /* Normally the succ memory model must be stronger than fail, but in the
28631      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28632      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28633 
28634   if (TARGET_HAVE_LDACQ
28635       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28636       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28637     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28638 
28639   switch (mode)
28640     {
28641     case E_QImode:
28642     case E_HImode:
28643       /* For narrow modes, we're going to perform the comparison in SImode,
28644 	 so do the zero-extension now.  */
28645       rval = gen_reg_rtx (SImode);
28646       oldval = convert_modes (SImode, mode, oldval, true);
28647       /* FALLTHRU */
28648 
28649     case E_SImode:
28650       /* Force the value into a register if needed.  We waited until after
28651 	 the zero-extension above to do this properly.  */
28652       if (!arm_add_operand (oldval, SImode))
28653 	oldval = force_reg (SImode, oldval);
28654       break;
28655 
28656     case E_DImode:
28657       if (!cmpdi_operand (oldval, mode))
28658 	oldval = force_reg (mode, oldval);
28659       break;
28660 
28661     default:
28662       gcc_unreachable ();
28663     }
28664 
28665   if (TARGET_THUMB1)
28666     {
28667       switch (mode)
28668 	{
28669 	case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28670 	case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28671 	case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28672 	case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28673 	default:
28674 	  gcc_unreachable ();
28675 	}
28676     }
28677   else
28678     {
28679       switch (mode)
28680 	{
28681 	case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28682 	case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28683 	case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28684 	case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28685 	default:
28686 	  gcc_unreachable ();
28687 	}
28688     }
28689 
28690   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28691   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28692 
28693   if (mode == QImode || mode == HImode)
28694     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28695 
28696   /* In all cases, we arrange for success to be signaled by Z set.
28697      This arrangement allows for the boolean result to be used directly
28698      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28699      boolean negation of the result is also stored in bval because Thumb-1
28700      backend lacks dependency tracking for CC flag due to flag-setting not
28701      being represented at RTL level.  */
28702   if (TARGET_THUMB1)
28703       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28704   else
28705     {
28706       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28707       emit_insn (gen_rtx_SET (bval, x));
28708     }
28709 }
28710 
28711 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28712    another memory store between the load-exclusive and store-exclusive can
28713    reset the monitor from Exclusive to Open state.  This means we must wait
28714    until after reload to split the pattern, lest we get a register spill in
28715    the middle of the atomic sequence.  Success of the compare and swap is
28716    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28717    for Thumb-1 targets (ie. negation of the boolean value returned by
28718    atomic_compare_and_swapmode standard pattern in operand 0).  */
28719 
28720 void
28721 arm_split_compare_and_swap (rtx operands[])
28722 {
28723   rtx rval, mem, oldval, newval, neg_bval;
28724   machine_mode mode;
28725   enum memmodel mod_s, mod_f;
28726   bool is_weak;
28727   rtx_code_label *label1, *label2;
28728   rtx x, cond;
28729 
28730   rval = operands[1];
28731   mem = operands[2];
28732   oldval = operands[3];
28733   newval = operands[4];
28734   is_weak = (operands[5] != const0_rtx);
28735   mod_s = memmodel_from_int (INTVAL (operands[6]));
28736   mod_f = memmodel_from_int (INTVAL (operands[7]));
28737   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28738   mode = GET_MODE (mem);
28739 
28740   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28741 
28742   bool use_acquire = TARGET_HAVE_LDACQ
28743                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28744 			  || is_mm_release (mod_s));
28745 
28746   bool use_release = TARGET_HAVE_LDACQ
28747                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28748 			  || is_mm_acquire (mod_s));
28749 
28750   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28751      a full barrier is emitted after the store-release.  */
28752   if (is_armv8_sync)
28753     use_acquire = false;
28754 
28755   /* Checks whether a barrier is needed and emits one accordingly.  */
28756   if (!(use_acquire || use_release))
28757     arm_pre_atomic_barrier (mod_s);
28758 
28759   label1 = NULL;
28760   if (!is_weak)
28761     {
28762       label1 = gen_label_rtx ();
28763       emit_label (label1);
28764     }
28765   label2 = gen_label_rtx ();
28766 
28767   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28768 
28769   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28770      as required to communicate with arm_expand_compare_and_swap.  */
28771   if (TARGET_32BIT)
28772     {
28773       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28774       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28775       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28776 				gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28777       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28778     }
28779   else
28780     {
28781       emit_move_insn (neg_bval, const1_rtx);
28782       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28783       if (thumb1_cmpneg_operand (oldval, SImode))
28784 	emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28785 						    label2, cond));
28786       else
28787 	emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28788     }
28789 
28790   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28791 
28792   /* Weak or strong, we want EQ to be true for success, so that we
28793      match the flags that we got from the compare above.  */
28794   if (TARGET_32BIT)
28795     {
28796       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28797       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28798       emit_insn (gen_rtx_SET (cond, x));
28799     }
28800 
28801   if (!is_weak)
28802     {
28803       /* Z is set to boolean value of !neg_bval, as required to communicate
28804 	 with arm_expand_compare_and_swap.  */
28805       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28806       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28807     }
28808 
28809   if (!is_mm_relaxed (mod_f))
28810     emit_label (label2);
28811 
28812   /* Checks whether a barrier is needed and emits one accordingly.  */
28813   if (is_armv8_sync
28814       || !(use_acquire || use_release))
28815     arm_post_atomic_barrier (mod_s);
28816 
28817   if (is_mm_relaxed (mod_f))
28818     emit_label (label2);
28819 }
28820 
28821 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28822    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28823    operation).  Operation is performed on the content at MEM and on VALUE
28824    following the memory model MODEL_RTX.  The content at MEM before and after
28825    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28826    success of the operation is returned in COND.  Using a scratch register or
28827    an operand register for these determines what result is returned for that
28828    pattern.  */
28829 
28830 void
28831 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28832 		     rtx value, rtx model_rtx, rtx cond)
28833 {
28834   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28835   machine_mode mode = GET_MODE (mem);
28836   machine_mode wmode = (mode == DImode ? DImode : SImode);
28837   rtx_code_label *label;
28838   bool all_low_regs, bind_old_new;
28839   rtx x;
28840 
28841   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28842 
28843   bool use_acquire = TARGET_HAVE_LDACQ
28844                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28845 			  || is_mm_release (model));
28846 
28847   bool use_release = TARGET_HAVE_LDACQ
28848                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28849 			  || is_mm_acquire (model));
28850 
28851   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28852      a full barrier is emitted after the store-release.  */
28853   if (is_armv8_sync)
28854     use_acquire = false;
28855 
28856   /* Checks whether a barrier is needed and emits one accordingly.  */
28857   if (!(use_acquire || use_release))
28858     arm_pre_atomic_barrier (model);
28859 
28860   label = gen_label_rtx ();
28861   emit_label (label);
28862 
28863   if (new_out)
28864     new_out = gen_lowpart (wmode, new_out);
28865   if (old_out)
28866     old_out = gen_lowpart (wmode, old_out);
28867   else
28868     old_out = new_out;
28869   value = simplify_gen_subreg (wmode, value, mode, 0);
28870 
28871   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28872 
28873   /* Does the operation require destination and first operand to use the same
28874      register?  This is decided by register constraints of relevant insn
28875      patterns in thumb1.md.  */
28876   gcc_assert (!new_out || REG_P (new_out));
28877   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28878 		 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28879 		 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28880   bind_old_new =
28881     (TARGET_THUMB1
28882      && code != SET
28883      && code != MINUS
28884      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28885 
28886   /* We want to return the old value while putting the result of the operation
28887      in the same register as the old value so copy the old value over to the
28888      destination register and use that register for the operation.  */
28889   if (old_out && bind_old_new)
28890     {
28891       emit_move_insn (new_out, old_out);
28892       old_out = new_out;
28893     }
28894 
28895   switch (code)
28896     {
28897     case SET:
28898       new_out = value;
28899       break;
28900 
28901     case NOT:
28902       x = gen_rtx_AND (wmode, old_out, value);
28903       emit_insn (gen_rtx_SET (new_out, x));
28904       x = gen_rtx_NOT (wmode, new_out);
28905       emit_insn (gen_rtx_SET (new_out, x));
28906       break;
28907 
28908     case MINUS:
28909       if (CONST_INT_P (value))
28910 	{
28911 	  value = GEN_INT (-INTVAL (value));
28912 	  code = PLUS;
28913 	}
28914       /* FALLTHRU */
28915 
28916     case PLUS:
28917       if (mode == DImode)
28918 	{
28919 	  /* DImode plus/minus need to clobber flags.  */
28920 	  /* The adddi3 and subdi3 patterns are incorrectly written so that
28921 	     they require matching operands, even when we could easily support
28922 	     three operands.  Thankfully, this can be fixed up post-splitting,
28923 	     as the individual add+adc patterns do accept three operands and
28924 	     post-reload cprop can make these moves go away.  */
28925 	  emit_move_insn (new_out, old_out);
28926 	  if (code == PLUS)
28927 	    x = gen_adddi3 (new_out, new_out, value);
28928 	  else
28929 	    x = gen_subdi3 (new_out, new_out, value);
28930 	  emit_insn (x);
28931 	  break;
28932 	}
28933       /* FALLTHRU */
28934 
28935     default:
28936       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28937       emit_insn (gen_rtx_SET (new_out, x));
28938       break;
28939     }
28940 
28941   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28942                             use_release);
28943 
28944   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28945   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28946 
28947   /* Checks whether a barrier is needed and emits one accordingly.  */
28948   if (is_armv8_sync
28949       || !(use_acquire || use_release))
28950     arm_post_atomic_barrier (model);
28951 }
28952 
28953 #define MAX_VECT_LEN 16
28954 
28955 struct expand_vec_perm_d
28956 {
28957   rtx target, op0, op1;
28958   vec_perm_indices perm;
28959   machine_mode vmode;
28960   bool one_vector_p;
28961   bool testing_p;
28962 };
28963 
28964 /* Generate a variable permutation.  */
28965 
28966 static void
28967 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28968 {
28969   machine_mode vmode = GET_MODE (target);
28970   bool one_vector_p = rtx_equal_p (op0, op1);
28971 
28972   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28973   gcc_checking_assert (GET_MODE (op0) == vmode);
28974   gcc_checking_assert (GET_MODE (op1) == vmode);
28975   gcc_checking_assert (GET_MODE (sel) == vmode);
28976   gcc_checking_assert (TARGET_NEON);
28977 
28978   if (one_vector_p)
28979     {
28980       if (vmode == V8QImode)
28981 	emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28982       else
28983 	emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28984     }
28985   else
28986     {
28987       rtx pair;
28988 
28989       if (vmode == V8QImode)
28990 	{
28991 	  pair = gen_reg_rtx (V16QImode);
28992 	  emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28993 	  pair = gen_lowpart (TImode, pair);
28994 	  emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28995 	}
28996       else
28997 	{
28998 	  pair = gen_reg_rtx (OImode);
28999 	  emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29000 	  emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29001 	}
29002     }
29003 }
29004 
29005 void
29006 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29007 {
29008   machine_mode vmode = GET_MODE (target);
29009   unsigned int nelt = GET_MODE_NUNITS (vmode);
29010   bool one_vector_p = rtx_equal_p (op0, op1);
29011   rtx mask;
29012 
29013   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29014      numbering of elements for big-endian, we must reverse the order.  */
29015   gcc_checking_assert (!BYTES_BIG_ENDIAN);
29016 
29017   /* The VTBL instruction does not use a modulo index, so we must take care
29018      of that ourselves.  */
29019   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29020   mask = gen_const_vec_duplicate (vmode, mask);
29021   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29022 
29023   arm_expand_vec_perm_1 (target, op0, op1, sel);
29024 }
29025 
29026 /* Map lane ordering between architectural lane order, and GCC lane order,
29027    taking into account ABI.  See comment above output_move_neon for details.  */
29028 
29029 static int
29030 neon_endian_lane_map (machine_mode mode, int lane)
29031 {
29032   if (BYTES_BIG_ENDIAN)
29033   {
29034     int nelems = GET_MODE_NUNITS (mode);
29035     /* Reverse lane order.  */
29036     lane = (nelems - 1 - lane);
29037     /* Reverse D register order, to match ABI.  */
29038     if (GET_MODE_SIZE (mode) == 16)
29039       lane = lane ^ (nelems / 2);
29040   }
29041   return lane;
29042 }
29043 
29044 /* Some permutations index into pairs of vectors, this is a helper function
29045    to map indexes into those pairs of vectors.  */
29046 
29047 static int
29048 neon_pair_endian_lane_map (machine_mode mode, int lane)
29049 {
29050   int nelem = GET_MODE_NUNITS (mode);
29051   if (BYTES_BIG_ENDIAN)
29052     lane =
29053       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
29054   return lane;
29055 }
29056 
29057 /* Generate or test for an insn that supports a constant permutation.  */
29058 
29059 /* Recognize patterns for the VUZP insns.  */
29060 
29061 static bool
29062 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29063 {
29064   unsigned int i, odd, mask, nelt = d->perm.length ();
29065   rtx out0, out1, in0, in1;
29066   rtx (*gen)(rtx, rtx, rtx, rtx);
29067   int first_elem;
29068   int swap_nelt;
29069 
29070   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29071     return false;
29072 
29073   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29074      big endian pattern on 64 bit vectors, so we correct for that.  */
29075   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29076     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29077 
29078   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29079 
29080   if (first_elem == neon_endian_lane_map (d->vmode, 0))
29081     odd = 0;
29082   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29083     odd = 1;
29084   else
29085     return false;
29086   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29087 
29088   for (i = 0; i < nelt; i++)
29089     {
29090       unsigned elt =
29091 	(neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29092       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29093 	return false;
29094     }
29095 
29096   /* Success!  */
29097   if (d->testing_p)
29098     return true;
29099 
29100   switch (d->vmode)
29101     {
29102     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29103     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
29104     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
29105     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
29106     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
29107     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
29108     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
29109     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
29110     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
29111     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
29112     default:
29113       gcc_unreachable ();
29114     }
29115 
29116   in0 = d->op0;
29117   in1 = d->op1;
29118   if (swap_nelt != 0)
29119     std::swap (in0, in1);
29120 
29121   out0 = d->target;
29122   out1 = gen_reg_rtx (d->vmode);
29123   if (odd)
29124     std::swap (out0, out1);
29125 
29126   emit_insn (gen (out0, in0, in1, out1));
29127   return true;
29128 }
29129 
29130 /* Recognize patterns for the VZIP insns.  */
29131 
29132 static bool
29133 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29134 {
29135   unsigned int i, high, mask, nelt = d->perm.length ();
29136   rtx out0, out1, in0, in1;
29137   rtx (*gen)(rtx, rtx, rtx, rtx);
29138   int first_elem;
29139   bool is_swapped;
29140 
29141   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29142     return false;
29143 
29144   is_swapped = BYTES_BIG_ENDIAN;
29145 
29146   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29147 
29148   high = nelt / 2;
29149   if (first_elem == neon_endian_lane_map (d->vmode, high))
29150     ;
29151   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29152     high = 0;
29153   else
29154     return false;
29155   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29156 
29157   for (i = 0; i < nelt / 2; i++)
29158     {
29159       unsigned elt =
29160 	neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29161       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29162 	  != elt)
29163 	return false;
29164       elt =
29165 	neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29166       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29167 	  != elt)
29168 	return false;
29169     }
29170 
29171   /* Success!  */
29172   if (d->testing_p)
29173     return true;
29174 
29175   switch (d->vmode)
29176     {
29177     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29178     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
29179     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
29180     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
29181     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
29182     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
29183     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
29184     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
29185     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
29186     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
29187     default:
29188       gcc_unreachable ();
29189     }
29190 
29191   in0 = d->op0;
29192   in1 = d->op1;
29193   if (is_swapped)
29194     std::swap (in0, in1);
29195 
29196   out0 = d->target;
29197   out1 = gen_reg_rtx (d->vmode);
29198   if (high)
29199     std::swap (out0, out1);
29200 
29201   emit_insn (gen (out0, in0, in1, out1));
29202   return true;
29203 }
29204 
29205 /* Recognize patterns for the VREV insns.  */
29206 
29207 static bool
29208 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29209 {
29210   unsigned int i, j, diff, nelt = d->perm.length ();
29211   rtx (*gen)(rtx, rtx);
29212 
29213   if (!d->one_vector_p)
29214     return false;
29215 
29216   diff = d->perm[0];
29217   switch (diff)
29218     {
29219     case 7:
29220       switch (d->vmode)
29221 	{
29222 	case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29223 	case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
29224 	default:
29225 	  return false;
29226 	}
29227       break;
29228     case 3:
29229       switch (d->vmode)
29230 	{
29231 	case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29232 	case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
29233 	case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
29234 	case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
29235 	case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
29236 	case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
29237 	default:
29238 	  return false;
29239 	}
29240       break;
29241     case 1:
29242       switch (d->vmode)
29243 	{
29244 	case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29245 	case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
29246 	case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
29247 	case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
29248 	case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
29249 	case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
29250 	case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
29251 	case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
29252 	default:
29253 	  return false;
29254 	}
29255       break;
29256     default:
29257       return false;
29258     }
29259 
29260   for (i = 0; i < nelt ; i += diff + 1)
29261     for (j = 0; j <= diff; j += 1)
29262       {
29263 	/* This is guaranteed to be true as the value of diff
29264 	   is 7, 3, 1 and we should have enough elements in the
29265 	   queue to generate this. Getting a vector mask with a
29266 	   value of diff other than these values implies that
29267 	   something is wrong by the time we get here.  */
29268 	gcc_assert (i + j < nelt);
29269 	if (d->perm[i + j] != i + diff - j)
29270 	  return false;
29271       }
29272 
29273   /* Success! */
29274   if (d->testing_p)
29275     return true;
29276 
29277   emit_insn (gen (d->target, d->op0));
29278   return true;
29279 }
29280 
29281 /* Recognize patterns for the VTRN insns.  */
29282 
29283 static bool
29284 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29285 {
29286   unsigned int i, odd, mask, nelt = d->perm.length ();
29287   rtx out0, out1, in0, in1;
29288   rtx (*gen)(rtx, rtx, rtx, rtx);
29289 
29290   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29291     return false;
29292 
29293   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29294   if (d->perm[0] == 0)
29295     odd = 0;
29296   else if (d->perm[0] == 1)
29297     odd = 1;
29298   else
29299     return false;
29300   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29301 
29302   for (i = 0; i < nelt; i += 2)
29303     {
29304       if (d->perm[i] != i + odd)
29305 	return false;
29306       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29307 	return false;
29308     }
29309 
29310   /* Success!  */
29311   if (d->testing_p)
29312     return true;
29313 
29314   switch (d->vmode)
29315     {
29316     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29317     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29318     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29319     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29320     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29321     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29322     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29323     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29324     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29325     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29326     default:
29327       gcc_unreachable ();
29328     }
29329 
29330   in0 = d->op0;
29331   in1 = d->op1;
29332   if (BYTES_BIG_ENDIAN)
29333     {
29334       std::swap (in0, in1);
29335       odd = !odd;
29336     }
29337 
29338   out0 = d->target;
29339   out1 = gen_reg_rtx (d->vmode);
29340   if (odd)
29341     std::swap (out0, out1);
29342 
29343   emit_insn (gen (out0, in0, in1, out1));
29344   return true;
29345 }
29346 
29347 /* Recognize patterns for the VEXT insns.  */
29348 
29349 static bool
29350 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29351 {
29352   unsigned int i, nelt = d->perm.length ();
29353   rtx (*gen) (rtx, rtx, rtx, rtx);
29354   rtx offset;
29355 
29356   unsigned int location;
29357 
29358   unsigned int next  = d->perm[0] + 1;
29359 
29360   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29361   if (BYTES_BIG_ENDIAN)
29362     return false;
29363 
29364   /* Check if the extracted indexes are increasing by one.  */
29365   for (i = 1; i < nelt; next++, i++)
29366     {
29367       /* If we hit the most significant element of the 2nd vector in
29368 	 the previous iteration, no need to test further.  */
29369       if (next == 2 * nelt)
29370 	return false;
29371 
29372       /* If we are operating on only one vector: it could be a
29373 	 rotation.  If there are only two elements of size < 64, let
29374 	 arm_evpc_neon_vrev catch it.  */
29375       if (d->one_vector_p && (next == nelt))
29376 	{
29377 	  if ((nelt == 2) && (d->vmode != V2DImode))
29378 	    return false;
29379 	  else
29380 	    next = 0;
29381 	}
29382 
29383       if (d->perm[i] != next)
29384 	return false;
29385     }
29386 
29387   location = d->perm[0];
29388 
29389   switch (d->vmode)
29390     {
29391     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29392     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29393     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29394     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29395     case E_V2SImode: gen = gen_neon_vextv2si; break;
29396     case E_V4SImode: gen = gen_neon_vextv4si; break;
29397     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29398     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29399     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29400     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29401     case E_V2DImode: gen = gen_neon_vextv2di; break;
29402     default:
29403       return false;
29404     }
29405 
29406   /* Success! */
29407   if (d->testing_p)
29408     return true;
29409 
29410   offset = GEN_INT (location);
29411   emit_insn (gen (d->target, d->op0, d->op1, offset));
29412   return true;
29413 }
29414 
29415 /* The NEON VTBL instruction is a fully variable permuation that's even
29416    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29417    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29418    can do slightly better by expanding this as a constant where we don't
29419    have to apply a mask.  */
29420 
29421 static bool
29422 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29423 {
29424   rtx rperm[MAX_VECT_LEN], sel;
29425   machine_mode vmode = d->vmode;
29426   unsigned int i, nelt = d->perm.length ();
29427 
29428   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29429      numbering of elements for big-endian, we must reverse the order.  */
29430   if (BYTES_BIG_ENDIAN)
29431     return false;
29432 
29433   if (d->testing_p)
29434     return true;
29435 
29436   /* Generic code will try constant permutation twice.  Once with the
29437      original mode and again with the elements lowered to QImode.
29438      So wait and don't do the selector expansion ourselves.  */
29439   if (vmode != V8QImode && vmode != V16QImode)
29440     return false;
29441 
29442   for (i = 0; i < nelt; ++i)
29443     rperm[i] = GEN_INT (d->perm[i]);
29444   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29445   sel = force_reg (vmode, sel);
29446 
29447   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29448   return true;
29449 }
29450 
29451 static bool
29452 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29453 {
29454   /* Check if the input mask matches vext before reordering the
29455      operands.  */
29456   if (TARGET_NEON)
29457     if (arm_evpc_neon_vext (d))
29458       return true;
29459 
29460   /* The pattern matching functions above are written to look for a small
29461      number to begin the sequence (0, 1, N/2).  If we begin with an index
29462      from the second operand, we can swap the operands.  */
29463   unsigned int nelt = d->perm.length ();
29464   if (d->perm[0] >= nelt)
29465     {
29466       d->perm.rotate_inputs (1);
29467       std::swap (d->op0, d->op1);
29468     }
29469 
29470   if (TARGET_NEON)
29471     {
29472       if (arm_evpc_neon_vuzp (d))
29473 	return true;
29474       if (arm_evpc_neon_vzip (d))
29475 	return true;
29476       if (arm_evpc_neon_vrev (d))
29477 	return true;
29478       if (arm_evpc_neon_vtrn (d))
29479 	return true;
29480       return arm_evpc_neon_vtbl (d);
29481     }
29482   return false;
29483 }
29484 
29485 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
29486 
29487 static bool
29488 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29489 			      const vec_perm_indices &sel)
29490 {
29491   struct expand_vec_perm_d d;
29492   int i, nelt, which;
29493 
29494   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29495     return false;
29496 
29497   d.target = target;
29498   d.op0 = op0;
29499   d.op1 = op1;
29500 
29501   d.vmode = vmode;
29502   gcc_assert (VECTOR_MODE_P (d.vmode));
29503   d.testing_p = !target;
29504 
29505   nelt = GET_MODE_NUNITS (d.vmode);
29506   for (i = which = 0; i < nelt; ++i)
29507     {
29508       int ei = sel[i] & (2 * nelt - 1);
29509       which |= (ei < nelt ? 1 : 2);
29510     }
29511 
29512   switch (which)
29513     {
29514     default:
29515       gcc_unreachable();
29516 
29517     case 3:
29518       d.one_vector_p = false;
29519       if (d.testing_p || !rtx_equal_p (op0, op1))
29520 	break;
29521 
29522       /* The elements of PERM do not suggest that only the first operand
29523 	 is used, but both operands are identical.  Allow easier matching
29524 	 of the permutation by folding the permutation into the single
29525 	 input vector.  */
29526       /* FALLTHRU */
29527     case 2:
29528       d.op0 = op1;
29529       d.one_vector_p = true;
29530       break;
29531 
29532     case 1:
29533       d.op1 = op0;
29534       d.one_vector_p = true;
29535       break;
29536     }
29537 
29538   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29539 
29540   if (!d.testing_p)
29541     return arm_expand_vec_perm_const_1 (&d);
29542 
29543   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29544   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29545   if (!d.one_vector_p)
29546     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29547 
29548   start_sequence ();
29549   bool ret = arm_expand_vec_perm_const_1 (&d);
29550   end_sequence ();
29551 
29552   return ret;
29553 }
29554 
29555 bool
29556 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29557 {
29558   /* If we are soft float and we do not have ldrd
29559      then all auto increment forms are ok.  */
29560   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29561     return true;
29562 
29563   switch (code)
29564     {
29565       /* Post increment and Pre Decrement are supported for all
29566 	 instruction forms except for vector forms.  */
29567     case ARM_POST_INC:
29568     case ARM_PRE_DEC:
29569       if (VECTOR_MODE_P (mode))
29570 	{
29571 	  if (code != ARM_PRE_DEC)
29572 	    return true;
29573 	  else
29574 	    return false;
29575 	}
29576 
29577       return true;
29578 
29579     case ARM_POST_DEC:
29580     case ARM_PRE_INC:
29581       /* Without LDRD and mode size greater than
29582 	 word size, there is no point in auto-incrementing
29583          because ldm and stm will not have these forms.  */
29584       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29585 	return false;
29586 
29587       /* Vector and floating point modes do not support
29588 	 these auto increment forms.  */
29589       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29590 	return false;
29591 
29592       return true;
29593 
29594     default:
29595       return false;
29596 
29597     }
29598 
29599   return false;
29600 }
29601 
29602 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29603    on ARM, since we know that shifts by negative amounts are no-ops.
29604    Additionally, the default expansion code is not available or suitable
29605    for post-reload insn splits (this can occur when the register allocator
29606    chooses not to do a shift in NEON).
29607 
29608    This function is used in both initial expand and post-reload splits, and
29609    handles all kinds of 64-bit shifts.
29610 
29611    Input requirements:
29612     - It is safe for the input and output to be the same register, but
29613       early-clobber rules apply for the shift amount and scratch registers.
29614     - Shift by register requires both scratch registers.  In all other cases
29615       the scratch registers may be NULL.
29616     - Ashiftrt by a register also clobbers the CC register.  */
29617 void
29618 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29619 			       rtx amount, rtx scratch1, rtx scratch2)
29620 {
29621   rtx out_high = gen_highpart (SImode, out);
29622   rtx out_low = gen_lowpart (SImode, out);
29623   rtx in_high = gen_highpart (SImode, in);
29624   rtx in_low = gen_lowpart (SImode, in);
29625 
29626   /* Terminology:
29627 	in = the register pair containing the input value.
29628 	out = the destination register pair.
29629 	up = the high- or low-part of each pair.
29630 	down = the opposite part to "up".
29631      In a shift, we can consider bits to shift from "up"-stream to
29632      "down"-stream, so in a left-shift "up" is the low-part and "down"
29633      is the high-part of each register pair.  */
29634 
29635   rtx out_up   = code == ASHIFT ? out_low : out_high;
29636   rtx out_down = code == ASHIFT ? out_high : out_low;
29637   rtx in_up   = code == ASHIFT ? in_low : in_high;
29638   rtx in_down = code == ASHIFT ? in_high : in_low;
29639 
29640   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29641   gcc_assert (out
29642 	      && (REG_P (out) || GET_CODE (out) == SUBREG)
29643 	      && GET_MODE (out) == DImode);
29644   gcc_assert (in
29645 	      && (REG_P (in) || GET_CODE (in) == SUBREG)
29646 	      && GET_MODE (in) == DImode);
29647   gcc_assert (amount
29648 	      && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29649 		   && GET_MODE (amount) == SImode)
29650 		  || CONST_INT_P (amount)));
29651   gcc_assert (scratch1 == NULL
29652 	      || (GET_CODE (scratch1) == SCRATCH)
29653 	      || (GET_MODE (scratch1) == SImode
29654 		  && REG_P (scratch1)));
29655   gcc_assert (scratch2 == NULL
29656 	      || (GET_CODE (scratch2) == SCRATCH)
29657 	      || (GET_MODE (scratch2) == SImode
29658 		  && REG_P (scratch2)));
29659   gcc_assert (!REG_P (out) || !REG_P (amount)
29660 	      || !HARD_REGISTER_P (out)
29661 	      || (REGNO (out) != REGNO (amount)
29662 		  && REGNO (out) + 1 != REGNO (amount)));
29663 
29664   /* Macros to make following code more readable.  */
29665   #define SUB_32(DEST,SRC) \
29666 	    gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29667   #define RSB_32(DEST,SRC) \
29668 	    gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29669   #define SUB_S_32(DEST,SRC) \
29670 	    gen_addsi3_compare0 ((DEST), (SRC), \
29671 				 GEN_INT (-32))
29672   #define SET(DEST,SRC) \
29673 	    gen_rtx_SET ((DEST), (SRC))
29674   #define SHIFT(CODE,SRC,AMOUNT) \
29675 	    gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29676   #define LSHIFT(CODE,SRC,AMOUNT) \
29677 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29678 			    SImode, (SRC), (AMOUNT))
29679   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29680 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29681 			    SImode, (SRC), (AMOUNT))
29682   #define ORR(A,B) \
29683 	    gen_rtx_IOR (SImode, (A), (B))
29684   #define BRANCH(COND,LABEL) \
29685 	    gen_arm_cond_branch ((LABEL), \
29686 				 gen_rtx_ ## COND (CCmode, cc_reg, \
29687 						   const0_rtx), \
29688 				 cc_reg)
29689 
29690   /* Shifts by register and shifts by constant are handled separately.  */
29691   if (CONST_INT_P (amount))
29692     {
29693       /* We have a shift-by-constant.  */
29694 
29695       /* First, handle out-of-range shift amounts.
29696 	 In both cases we try to match the result an ARM instruction in a
29697 	 shift-by-register would give.  This helps reduce execution
29698 	 differences between optimization levels, but it won't stop other
29699          parts of the compiler doing different things.  This is "undefined
29700          behavior, in any case.  */
29701       if (INTVAL (amount) <= 0)
29702 	emit_insn (gen_movdi (out, in));
29703       else if (INTVAL (amount) >= 64)
29704 	{
29705 	  if (code == ASHIFTRT)
29706 	    {
29707 	      rtx const31_rtx = GEN_INT (31);
29708 	      emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29709 	      emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29710 	    }
29711 	  else
29712 	    emit_insn (gen_movdi (out, const0_rtx));
29713 	}
29714 
29715       /* Now handle valid shifts. */
29716       else if (INTVAL (amount) < 32)
29717 	{
29718 	  /* Shifts by a constant less than 32.  */
29719 	  rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29720 
29721 	  /* Clearing the out register in DImode first avoids lots
29722 	     of spilling and results in less stack usage.
29723 	     Later this redundant insn is completely removed.
29724 	     Do that only if "in" and "out" are different registers.  */
29725 	  if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29726 	    emit_insn (SET (out, const0_rtx));
29727 	  emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29728 	  emit_insn (SET (out_down,
29729 			  ORR (REV_LSHIFT (code, in_up, reverse_amount),
29730 			       out_down)));
29731 	  emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29732 	}
29733       else
29734 	{
29735 	  /* Shifts by a constant greater than 31.  */
29736 	  rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29737 
29738 	  if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29739 	    emit_insn (SET (out, const0_rtx));
29740 	  emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29741 	  if (code == ASHIFTRT)
29742 	    emit_insn (gen_ashrsi3 (out_up, in_up,
29743 				    GEN_INT (31)));
29744 	  else
29745 	    emit_insn (SET (out_up, const0_rtx));
29746 	}
29747     }
29748   else
29749     {
29750       /* We have a shift-by-register.  */
29751       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29752 
29753       /* This alternative requires the scratch registers.  */
29754       gcc_assert (scratch1 && REG_P (scratch1));
29755       gcc_assert (scratch2 && REG_P (scratch2));
29756 
29757       /* We will need the values "amount-32" and "32-amount" later.
29758          Swapping them around now allows the later code to be more general. */
29759       switch (code)
29760 	{
29761 	case ASHIFT:
29762 	  emit_insn (SUB_32 (scratch1, amount));
29763 	  emit_insn (RSB_32 (scratch2, amount));
29764 	  break;
29765 	case ASHIFTRT:
29766 	  emit_insn (RSB_32 (scratch1, amount));
29767 	  /* Also set CC = amount > 32.  */
29768 	  emit_insn (SUB_S_32 (scratch2, amount));
29769 	  break;
29770 	case LSHIFTRT:
29771 	  emit_insn (RSB_32 (scratch1, amount));
29772 	  emit_insn (SUB_32 (scratch2, amount));
29773 	  break;
29774 	default:
29775 	  gcc_unreachable ();
29776 	}
29777 
29778       /* Emit code like this:
29779 
29780 	 arithmetic-left:
29781 	    out_down = in_down << amount;
29782 	    out_down = (in_up << (amount - 32)) | out_down;
29783 	    out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29784 	    out_up = in_up << amount;
29785 
29786 	 arithmetic-right:
29787 	    out_down = in_down >> amount;
29788 	    out_down = (in_up << (32 - amount)) | out_down;
29789 	    if (amount < 32)
29790 	      out_down = ((signed)in_up >> (amount - 32)) | out_down;
29791 	    out_up = in_up << amount;
29792 
29793 	 logical-right:
29794 	    out_down = in_down >> amount;
29795 	    out_down = (in_up << (32 - amount)) | out_down;
29796 	    if (amount < 32)
29797 	      out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29798 	    out_up = in_up << amount;
29799 
29800 	  The ARM and Thumb2 variants are the same but implemented slightly
29801 	  differently.  If this were only called during expand we could just
29802 	  use the Thumb2 case and let combine do the right thing, but this
29803 	  can also be called from post-reload splitters.  */
29804 
29805       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29806 
29807       if (!TARGET_THUMB2)
29808 	{
29809 	  /* Emit code for ARM mode.  */
29810 	  emit_insn (SET (out_down,
29811 			  ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29812 	  if (code == ASHIFTRT)
29813 	    {
29814 	      rtx_code_label *done_label = gen_label_rtx ();
29815 	      emit_jump_insn (BRANCH (LT, done_label));
29816 	      emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29817 					     out_down)));
29818 	      emit_label (done_label);
29819 	    }
29820 	  else
29821 	    emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29822 					   out_down)));
29823 	}
29824       else
29825 	{
29826 	  /* Emit code for Thumb2 mode.
29827 	     Thumb2 can't do shift and or in one insn.  */
29828 	  emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29829 	  emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29830 
29831 	  if (code == ASHIFTRT)
29832 	    {
29833 	      rtx_code_label *done_label = gen_label_rtx ();
29834 	      emit_jump_insn (BRANCH (LT, done_label));
29835 	      emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29836 	      emit_insn (SET (out_down, ORR (out_down, scratch2)));
29837 	      emit_label (done_label);
29838 	    }
29839 	  else
29840 	    {
29841 	      emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29842 	      emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29843 	    }
29844 	}
29845 
29846       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29847     }
29848 
29849   #undef SUB_32
29850   #undef RSB_32
29851   #undef SUB_S_32
29852   #undef SET
29853   #undef SHIFT
29854   #undef LSHIFT
29855   #undef REV_LSHIFT
29856   #undef ORR
29857   #undef BRANCH
29858 }
29859 
29860 /* Returns true if the pattern is a valid symbolic address, which is either a
29861    symbol_ref or (symbol_ref + addend).
29862 
29863    According to the ARM ELF ABI, the initial addend of REL-type relocations
29864    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29865    literal field of the instruction as a 16-bit signed value in the range
29866    -32768 <= A < 32768.  */
29867 
29868 bool
29869 arm_valid_symbolic_address_p (rtx addr)
29870 {
29871   rtx xop0, xop1 = NULL_RTX;
29872   rtx tmp = addr;
29873 
29874   if (target_word_relocations)
29875     return false;
29876 
29877   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29878     return true;
29879 
29880   /* (const (plus: symbol_ref const_int))  */
29881   if (GET_CODE (addr) == CONST)
29882     tmp = XEXP (addr, 0);
29883 
29884   if (GET_CODE (tmp) == PLUS)
29885     {
29886       xop0 = XEXP (tmp, 0);
29887       xop1 = XEXP (tmp, 1);
29888 
29889       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29890 	  return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29891     }
29892 
29893   return false;
29894 }
29895 
29896 /* Returns true if a valid comparison operation and makes
29897    the operands in a form that is valid.  */
29898 bool
29899 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29900 {
29901   enum rtx_code code = GET_CODE (*comparison);
29902   int code_int;
29903   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29904     ? GET_MODE (*op2) : GET_MODE (*op1);
29905 
29906   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29907 
29908   if (code == UNEQ || code == LTGT)
29909     return false;
29910 
29911   code_int = (int)code;
29912   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29913   PUT_CODE (*comparison, (enum rtx_code)code_int);
29914 
29915   switch (mode)
29916     {
29917     case E_SImode:
29918       if (!arm_add_operand (*op1, mode))
29919 	*op1 = force_reg (mode, *op1);
29920       if (!arm_add_operand (*op2, mode))
29921 	*op2 = force_reg (mode, *op2);
29922       return true;
29923 
29924     case E_DImode:
29925       if (!cmpdi_operand (*op1, mode))
29926 	*op1 = force_reg (mode, *op1);
29927       if (!cmpdi_operand (*op2, mode))
29928 	*op2 = force_reg (mode, *op2);
29929       return true;
29930 
29931     case E_HFmode:
29932       if (!TARGET_VFP_FP16INST)
29933 	break;
29934       /* FP16 comparisons are done in SF mode.  */
29935       mode = SFmode;
29936       *op1 = convert_to_mode (mode, *op1, 1);
29937       *op2 = convert_to_mode (mode, *op2, 1);
29938       /* Fall through.  */
29939     case E_SFmode:
29940     case E_DFmode:
29941       if (!vfp_compare_operand (*op1, mode))
29942 	*op1 = force_reg (mode, *op1);
29943       if (!vfp_compare_operand (*op2, mode))
29944 	*op2 = force_reg (mode, *op2);
29945       return true;
29946     default:
29947       break;
29948     }
29949 
29950   return false;
29951 
29952 }
29953 
29954 /* Maximum number of instructions to set block of memory.  */
29955 static int
29956 arm_block_set_max_insns (void)
29957 {
29958   if (optimize_function_for_size_p (cfun))
29959     return 4;
29960   else
29961     return current_tune->max_insns_inline_memset;
29962 }
29963 
29964 /* Return TRUE if it's profitable to set block of memory for
29965    non-vectorized case.  VAL is the value to set the memory
29966    with.  LENGTH is the number of bytes to set.  ALIGN is the
29967    alignment of the destination memory in bytes.  UNALIGNED_P
29968    is TRUE if we can only set the memory with instructions
29969    meeting alignment requirements.  USE_STRD_P is TRUE if we
29970    can use strd to set the memory.  */
29971 static bool
29972 arm_block_set_non_vect_profit_p (rtx val,
29973 				 unsigned HOST_WIDE_INT length,
29974 				 unsigned HOST_WIDE_INT align,
29975 				 bool unaligned_p, bool use_strd_p)
29976 {
29977   int num = 0;
29978   /* For leftovers in bytes of 0-7, we can set the memory block using
29979      strb/strh/str with minimum instruction number.  */
29980   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29981 
29982   if (unaligned_p)
29983     {
29984       num = arm_const_inline_cost (SET, val);
29985       num += length / align + length % align;
29986     }
29987   else if (use_strd_p)
29988     {
29989       num = arm_const_double_inline_cost (val);
29990       num += (length >> 3) + leftover[length & 7];
29991     }
29992   else
29993     {
29994       num = arm_const_inline_cost (SET, val);
29995       num += (length >> 2) + leftover[length & 3];
29996     }
29997 
29998   /* We may be able to combine last pair STRH/STRB into a single STR
29999      by shifting one byte back.  */
30000   if (unaligned_access && length > 3 && (length & 3) == 3)
30001     num--;
30002 
30003   return (num <= arm_block_set_max_insns ());
30004 }
30005 
30006 /* Return TRUE if it's profitable to set block of memory for
30007    vectorized case.  LENGTH is the number of bytes to set.
30008    ALIGN is the alignment of destination memory in bytes.
30009    MODE is the vector mode used to set the memory.  */
30010 static bool
30011 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
30012 			     unsigned HOST_WIDE_INT align,
30013 			     machine_mode mode)
30014 {
30015   int num;
30016   bool unaligned_p = ((align & 3) != 0);
30017   unsigned int nelt = GET_MODE_NUNITS (mode);
30018 
30019   /* Instruction loading constant value.  */
30020   num = 1;
30021   /* Instructions storing the memory.  */
30022   num += (length + nelt - 1) / nelt;
30023   /* Instructions adjusting the address expression.  Only need to
30024      adjust address expression if it's 4 bytes aligned and bytes
30025      leftover can only be stored by mis-aligned store instruction.  */
30026   if (!unaligned_p && (length & 3) != 0)
30027     num++;
30028 
30029   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
30030   if (!unaligned_p && mode == V16QImode)
30031     num--;
30032 
30033   return (num <= arm_block_set_max_insns ());
30034 }
30035 
30036 /* Set a block of memory using vectorization instructions for the
30037    unaligned case.  We fill the first LENGTH bytes of the memory
30038    area starting from DSTBASE with byte constant VALUE.  ALIGN is
30039    the alignment requirement of memory.  Return TRUE if succeeded.  */
30040 static bool
30041 arm_block_set_unaligned_vect (rtx dstbase,
30042 			      unsigned HOST_WIDE_INT length,
30043 			      unsigned HOST_WIDE_INT value,
30044 			      unsigned HOST_WIDE_INT align)
30045 {
30046   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
30047   rtx dst, mem;
30048   rtx val_vec, reg;
30049   rtx (*gen_func) (rtx, rtx);
30050   machine_mode mode;
30051   unsigned HOST_WIDE_INT v = value;
30052   unsigned int offset = 0;
30053   gcc_assert ((align & 0x3) != 0);
30054   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30055   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30056   if (length >= nelt_v16)
30057     {
30058       mode = V16QImode;
30059       gen_func = gen_movmisalignv16qi;
30060     }
30061   else
30062     {
30063       mode = V8QImode;
30064       gen_func = gen_movmisalignv8qi;
30065     }
30066   nelt_mode = GET_MODE_NUNITS (mode);
30067   gcc_assert (length >= nelt_mode);
30068   /* Skip if it isn't profitable.  */
30069   if (!arm_block_set_vect_profit_p (length, align, mode))
30070     return false;
30071 
30072   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30073   mem = adjust_automodify_address (dstbase, mode, dst, offset);
30074 
30075   v = sext_hwi (v, BITS_PER_WORD);
30076 
30077   reg = gen_reg_rtx (mode);
30078   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30079   /* Emit instruction loading the constant value.  */
30080   emit_move_insn (reg, val_vec);
30081 
30082   /* Handle nelt_mode bytes in a vector.  */
30083   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30084     {
30085       emit_insn ((*gen_func) (mem, reg));
30086       if (i + 2 * nelt_mode <= length)
30087 	{
30088 	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30089 	  offset += nelt_mode;
30090 	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
30091 	}
30092     }
30093 
30094   /* If there are not less than nelt_v8 bytes leftover, we must be in
30095      V16QI mode.  */
30096   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30097 
30098   /* Handle (8, 16) bytes leftover.  */
30099   if (i + nelt_v8 < length)
30100     {
30101       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30102       offset += length - i;
30103       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30104 
30105       /* We are shifting bytes back, set the alignment accordingly.  */
30106       if ((length & 1) != 0 && align >= 2)
30107 	set_mem_align (mem, BITS_PER_UNIT);
30108 
30109       emit_insn (gen_movmisalignv16qi (mem, reg));
30110     }
30111   /* Handle (0, 8] bytes leftover.  */
30112   else if (i < length && i + nelt_v8 >= length)
30113     {
30114       if (mode == V16QImode)
30115 	reg = gen_lowpart (V8QImode, reg);
30116 
30117       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30118 					      + (nelt_mode - nelt_v8))));
30119       offset += (length - i) + (nelt_mode - nelt_v8);
30120       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30121 
30122       /* We are shifting bytes back, set the alignment accordingly.  */
30123       if ((length & 1) != 0 && align >= 2)
30124 	set_mem_align (mem, BITS_PER_UNIT);
30125 
30126       emit_insn (gen_movmisalignv8qi (mem, reg));
30127     }
30128 
30129   return true;
30130 }
30131 
30132 /* Set a block of memory using vectorization instructions for the
30133    aligned case.  We fill the first LENGTH bytes of the memory area
30134    starting from DSTBASE with byte constant VALUE.  ALIGN is the
30135    alignment requirement of memory.  Return TRUE if succeeded.  */
30136 static bool
30137 arm_block_set_aligned_vect (rtx dstbase,
30138 			    unsigned HOST_WIDE_INT length,
30139 			    unsigned HOST_WIDE_INT value,
30140 			    unsigned HOST_WIDE_INT align)
30141 {
30142   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30143   rtx dst, addr, mem;
30144   rtx val_vec, reg;
30145   machine_mode mode;
30146   unsigned int offset = 0;
30147 
30148   gcc_assert ((align & 0x3) == 0);
30149   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30150   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30151   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30152     mode = V16QImode;
30153   else
30154     mode = V8QImode;
30155 
30156   nelt_mode = GET_MODE_NUNITS (mode);
30157   gcc_assert (length >= nelt_mode);
30158   /* Skip if it isn't profitable.  */
30159   if (!arm_block_set_vect_profit_p (length, align, mode))
30160     return false;
30161 
30162   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30163 
30164   reg = gen_reg_rtx (mode);
30165   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30166   /* Emit instruction loading the constant value.  */
30167   emit_move_insn (reg, val_vec);
30168 
30169   i = 0;
30170   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
30171   if (mode == V16QImode)
30172     {
30173       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30174       emit_insn (gen_movmisalignv16qi (mem, reg));
30175       i += nelt_mode;
30176       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
30177       if (i + nelt_v8 < length && i + nelt_v16 > length)
30178 	{
30179 	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30180 	  offset += length - nelt_mode;
30181 	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
30182 	  /* We are shifting bytes back, set the alignment accordingly.  */
30183 	  if ((length & 0x3) == 0)
30184 	    set_mem_align (mem, BITS_PER_UNIT * 4);
30185 	  else if ((length & 0x1) == 0)
30186 	    set_mem_align (mem, BITS_PER_UNIT * 2);
30187 	  else
30188 	    set_mem_align (mem, BITS_PER_UNIT);
30189 
30190 	  emit_insn (gen_movmisalignv16qi (mem, reg));
30191 	  return true;
30192 	}
30193       /* Fall through for bytes leftover.  */
30194       mode = V8QImode;
30195       nelt_mode = GET_MODE_NUNITS (mode);
30196       reg = gen_lowpart (V8QImode, reg);
30197     }
30198 
30199   /* Handle 8 bytes in a vector.  */
30200   for (; (i + nelt_mode <= length); i += nelt_mode)
30201     {
30202       addr = plus_constant (Pmode, dst, i);
30203       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30204       emit_move_insn (mem, reg);
30205     }
30206 
30207   /* Handle single word leftover by shifting 4 bytes back.  We can
30208      use aligned access for this case.  */
30209   if (i + UNITS_PER_WORD == length)
30210     {
30211       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30212       offset += i - UNITS_PER_WORD;
30213       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30214       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30215       if (align > UNITS_PER_WORD)
30216 	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30217 
30218       emit_move_insn (mem, reg);
30219     }
30220   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30221      We have to use unaligned access for this case.  */
30222   else if (i < length)
30223     {
30224       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30225       offset += length - nelt_mode;
30226       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30227       /* We are shifting bytes back, set the alignment accordingly.  */
30228       if ((length & 1) == 0)
30229 	set_mem_align (mem, BITS_PER_UNIT * 2);
30230       else
30231 	set_mem_align (mem, BITS_PER_UNIT);
30232 
30233       emit_insn (gen_movmisalignv8qi (mem, reg));
30234     }
30235 
30236   return true;
30237 }
30238 
30239 /* Set a block of memory using plain strh/strb instructions, only
30240    using instructions allowed by ALIGN on processor.  We fill the
30241    first LENGTH bytes of the memory area starting from DSTBASE
30242    with byte constant VALUE.  ALIGN is the alignment requirement
30243    of memory.  */
30244 static bool
30245 arm_block_set_unaligned_non_vect (rtx dstbase,
30246 				  unsigned HOST_WIDE_INT length,
30247 				  unsigned HOST_WIDE_INT value,
30248 				  unsigned HOST_WIDE_INT align)
30249 {
30250   unsigned int i;
30251   rtx dst, addr, mem;
30252   rtx val_exp, val_reg, reg;
30253   machine_mode mode;
30254   HOST_WIDE_INT v = value;
30255 
30256   gcc_assert (align == 1 || align == 2);
30257 
30258   if (align == 2)
30259     v |= (value << BITS_PER_UNIT);
30260 
30261   v = sext_hwi (v, BITS_PER_WORD);
30262   val_exp = GEN_INT (v);
30263   /* Skip if it isn't profitable.  */
30264   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30265 					align, true, false))
30266     return false;
30267 
30268   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30269   mode = (align == 2 ? HImode : QImode);
30270   val_reg = force_reg (SImode, val_exp);
30271   reg = gen_lowpart (mode, val_reg);
30272 
30273   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30274     {
30275       addr = plus_constant (Pmode, dst, i);
30276       mem = adjust_automodify_address (dstbase, mode, addr, i);
30277       emit_move_insn (mem, reg);
30278     }
30279 
30280   /* Handle single byte leftover.  */
30281   if (i + 1 == length)
30282     {
30283       reg = gen_lowpart (QImode, val_reg);
30284       addr = plus_constant (Pmode, dst, i);
30285       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30286       emit_move_insn (mem, reg);
30287       i++;
30288     }
30289 
30290   gcc_assert (i == length);
30291   return true;
30292 }
30293 
30294 /* Set a block of memory using plain strd/str/strh/strb instructions,
30295    to permit unaligned copies on processors which support unaligned
30296    semantics for those instructions.  We fill the first LENGTH bytes
30297    of the memory area starting from DSTBASE with byte constant VALUE.
30298    ALIGN is the alignment requirement of memory.  */
30299 static bool
30300 arm_block_set_aligned_non_vect (rtx dstbase,
30301 				unsigned HOST_WIDE_INT length,
30302 				unsigned HOST_WIDE_INT value,
30303 				unsigned HOST_WIDE_INT align)
30304 {
30305   unsigned int i;
30306   rtx dst, addr, mem;
30307   rtx val_exp, val_reg, reg;
30308   unsigned HOST_WIDE_INT v;
30309   bool use_strd_p;
30310 
30311   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30312 		&& TARGET_LDRD && current_tune->prefer_ldrd_strd);
30313 
30314   v = (value | (value << 8) | (value << 16) | (value << 24));
30315   if (length < UNITS_PER_WORD)
30316     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30317 
30318   if (use_strd_p)
30319     v |= (v << BITS_PER_WORD);
30320   else
30321     v = sext_hwi (v, BITS_PER_WORD);
30322 
30323   val_exp = GEN_INT (v);
30324   /* Skip if it isn't profitable.  */
30325   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30326 					align, false, use_strd_p))
30327     {
30328       if (!use_strd_p)
30329 	return false;
30330 
30331       /* Try without strd.  */
30332       v = (v >> BITS_PER_WORD);
30333       v = sext_hwi (v, BITS_PER_WORD);
30334       val_exp = GEN_INT (v);
30335       use_strd_p = false;
30336       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30337 					    align, false, use_strd_p))
30338 	return false;
30339     }
30340 
30341   i = 0;
30342   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30343   /* Handle double words using strd if possible.  */
30344   if (use_strd_p)
30345     {
30346       val_reg = force_reg (DImode, val_exp);
30347       reg = val_reg;
30348       for (; (i + 8 <= length); i += 8)
30349 	{
30350 	  addr = plus_constant (Pmode, dst, i);
30351 	  mem = adjust_automodify_address (dstbase, DImode, addr, i);
30352 	  emit_move_insn (mem, reg);
30353 	}
30354     }
30355   else
30356     val_reg = force_reg (SImode, val_exp);
30357 
30358   /* Handle words.  */
30359   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30360   for (; (i + 4 <= length); i += 4)
30361     {
30362       addr = plus_constant (Pmode, dst, i);
30363       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30364       if ((align & 3) == 0)
30365 	emit_move_insn (mem, reg);
30366       else
30367 	emit_insn (gen_unaligned_storesi (mem, reg));
30368     }
30369 
30370   /* Merge last pair of STRH and STRB into a STR if possible.  */
30371   if (unaligned_access && i > 0 && (i + 3) == length)
30372     {
30373       addr = plus_constant (Pmode, dst, i - 1);
30374       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30375       /* We are shifting one byte back, set the alignment accordingly.  */
30376       if ((align & 1) == 0)
30377 	set_mem_align (mem, BITS_PER_UNIT);
30378 
30379       /* Most likely this is an unaligned access, and we can't tell at
30380 	 compilation time.  */
30381       emit_insn (gen_unaligned_storesi (mem, reg));
30382       return true;
30383     }
30384 
30385   /* Handle half word leftover.  */
30386   if (i + 2 <= length)
30387     {
30388       reg = gen_lowpart (HImode, val_reg);
30389       addr = plus_constant (Pmode, dst, i);
30390       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30391       if ((align & 1) == 0)
30392 	emit_move_insn (mem, reg);
30393       else
30394 	emit_insn (gen_unaligned_storehi (mem, reg));
30395 
30396       i += 2;
30397     }
30398 
30399   /* Handle single byte leftover.  */
30400   if (i + 1 == length)
30401     {
30402       reg = gen_lowpart (QImode, val_reg);
30403       addr = plus_constant (Pmode, dst, i);
30404       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30405       emit_move_insn (mem, reg);
30406     }
30407 
30408   return true;
30409 }
30410 
30411 /* Set a block of memory using vectorization instructions for both
30412    aligned and unaligned cases.  We fill the first LENGTH bytes of
30413    the memory area starting from DSTBASE with byte constant VALUE.
30414    ALIGN is the alignment requirement of memory.  */
30415 static bool
30416 arm_block_set_vect (rtx dstbase,
30417 		    unsigned HOST_WIDE_INT length,
30418 		    unsigned HOST_WIDE_INT value,
30419 		    unsigned HOST_WIDE_INT align)
30420 {
30421   /* Check whether we need to use unaligned store instruction.  */
30422   if (((align & 3) != 0 || (length & 3) != 0)
30423       /* Check whether unaligned store instruction is available.  */
30424       && (!unaligned_access || BYTES_BIG_ENDIAN))
30425     return false;
30426 
30427   if ((align & 3) == 0)
30428     return arm_block_set_aligned_vect (dstbase, length, value, align);
30429   else
30430     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30431 }
30432 
30433 /* Expand string store operation.  Firstly we try to do that by using
30434    vectorization instructions, then try with ARM unaligned access and
30435    double-word store if profitable.  OPERANDS[0] is the destination,
30436    OPERANDS[1] is the number of bytes, operands[2] is the value to
30437    initialize the memory, OPERANDS[3] is the known alignment of the
30438    destination.  */
30439 bool
30440 arm_gen_setmem (rtx *operands)
30441 {
30442   rtx dstbase = operands[0];
30443   unsigned HOST_WIDE_INT length;
30444   unsigned HOST_WIDE_INT value;
30445   unsigned HOST_WIDE_INT align;
30446 
30447   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30448     return false;
30449 
30450   length = UINTVAL (operands[1]);
30451   if (length > 64)
30452     return false;
30453 
30454   value = (UINTVAL (operands[2]) & 0xFF);
30455   align = UINTVAL (operands[3]);
30456   if (TARGET_NEON && length >= 8
30457       && current_tune->string_ops_prefer_neon
30458       && arm_block_set_vect (dstbase, length, value, align))
30459     return true;
30460 
30461   if (!unaligned_access && (align & 3) != 0)
30462     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30463 
30464   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30465 }
30466 
30467 
30468 static bool
30469 arm_macro_fusion_p (void)
30470 {
30471   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30472 }
30473 
30474 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30475    for MOVW / MOVT macro fusion.  */
30476 
30477 static bool
30478 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30479 {
30480   /* We are trying to fuse
30481      movw imm / movt imm
30482     instructions as a group that gets scheduled together.  */
30483 
30484   rtx set_dest = SET_DEST (curr_set);
30485 
30486   if (GET_MODE (set_dest) != SImode)
30487     return false;
30488 
30489   /* We are trying to match:
30490      prev (movw)  == (set (reg r0) (const_int imm16))
30491      curr (movt) == (set (zero_extract (reg r0)
30492 					(const_int 16)
30493 					(const_int 16))
30494 			  (const_int imm16_1))
30495      or
30496      prev (movw) == (set (reg r1)
30497 			  (high (symbol_ref ("SYM"))))
30498     curr (movt) == (set (reg r0)
30499 			(lo_sum (reg r1)
30500 				(symbol_ref ("SYM"))))  */
30501 
30502     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30503       {
30504 	if (CONST_INT_P (SET_SRC (curr_set))
30505 	    && CONST_INT_P (SET_SRC (prev_set))
30506 	    && REG_P (XEXP (set_dest, 0))
30507 	    && REG_P (SET_DEST (prev_set))
30508 	    && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30509 	  return true;
30510 
30511       }
30512     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30513 	     && REG_P (SET_DEST (curr_set))
30514 	     && REG_P (SET_DEST (prev_set))
30515 	     && GET_CODE (SET_SRC (prev_set)) == HIGH
30516 	     && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30517       return true;
30518 
30519   return false;
30520 }
30521 
30522 static bool
30523 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30524 {
30525   rtx prev_set = single_set (prev);
30526   rtx curr_set = single_set (curr);
30527 
30528   if (!prev_set
30529       || !curr_set)
30530     return false;
30531 
30532   if (any_condjump_p (curr))
30533     return false;
30534 
30535   if (!arm_macro_fusion_p ())
30536     return false;
30537 
30538   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30539       && aarch_crypto_can_dual_issue (prev, curr))
30540     return true;
30541 
30542   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30543       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30544     return true;
30545 
30546   return false;
30547 }
30548 
30549 /* Return true iff the instruction fusion described by OP is enabled.  */
30550 bool
30551 arm_fusion_enabled_p (tune_params::fuse_ops op)
30552 {
30553   return current_tune->fusible_ops & op;
30554 }
30555 
30556 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30557    scheduled for speculative execution.  Reject the long-running division
30558    and square-root instructions.  */
30559 
30560 static bool
30561 arm_sched_can_speculate_insn (rtx_insn *insn)
30562 {
30563   switch (get_attr_type (insn))
30564     {
30565       case TYPE_SDIV:
30566       case TYPE_UDIV:
30567       case TYPE_FDIVS:
30568       case TYPE_FDIVD:
30569       case TYPE_FSQRTS:
30570       case TYPE_FSQRTD:
30571       case TYPE_NEON_FP_SQRT_S:
30572       case TYPE_NEON_FP_SQRT_D:
30573       case TYPE_NEON_FP_SQRT_S_Q:
30574       case TYPE_NEON_FP_SQRT_D_Q:
30575       case TYPE_NEON_FP_DIV_S:
30576       case TYPE_NEON_FP_DIV_D:
30577       case TYPE_NEON_FP_DIV_S_Q:
30578       case TYPE_NEON_FP_DIV_D_Q:
30579 	return false;
30580       default:
30581 	return true;
30582     }
30583 }
30584 
30585 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30586 
30587 static unsigned HOST_WIDE_INT
30588 arm_asan_shadow_offset (void)
30589 {
30590   return HOST_WIDE_INT_1U << 29;
30591 }
30592 
30593 
30594 /* This is a temporary fix for PR60655.  Ideally we need
30595    to handle most of these cases in the generic part but
30596    currently we reject minus (..) (sym_ref).  We try to
30597    ameliorate the case with minus (sym_ref1) (sym_ref2)
30598    where they are in the same section.  */
30599 
30600 static bool
30601 arm_const_not_ok_for_debug_p (rtx p)
30602 {
30603   tree decl_op0 = NULL;
30604   tree decl_op1 = NULL;
30605 
30606   if (GET_CODE (p) == UNSPEC)
30607     return true;
30608   if (GET_CODE (p) == MINUS)
30609     {
30610       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30611 	{
30612 	  decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30613 	  if (decl_op1
30614 	      && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30615 	      && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30616 	    {
30617 	      if ((VAR_P (decl_op1)
30618 		   || TREE_CODE (decl_op1) == CONST_DECL)
30619 		  && (VAR_P (decl_op0)
30620 		      || TREE_CODE (decl_op0) == CONST_DECL))
30621 		return (get_variable_section (decl_op1, false)
30622 			!= get_variable_section (decl_op0, false));
30623 
30624 	      if (TREE_CODE (decl_op1) == LABEL_DECL
30625 		  && TREE_CODE (decl_op0) == LABEL_DECL)
30626 		return (DECL_CONTEXT (decl_op1)
30627 			!= DECL_CONTEXT (decl_op0));
30628 	    }
30629 
30630 	  return true;
30631 	}
30632     }
30633 
30634   return false;
30635 }
30636 
30637 /* return TRUE if x is a reference to a value in a constant pool */
30638 extern bool
30639 arm_is_constant_pool_ref (rtx x)
30640 {
30641   return (MEM_P (x)
30642 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30643 	  && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30644 }
30645 
30646 /* Remember the last target of arm_set_current_function.  */
30647 static GTY(()) tree arm_previous_fndecl;
30648 
30649 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30650 
30651 void
30652 save_restore_target_globals (tree new_tree)
30653 {
30654   /* If we have a previous state, use it.  */
30655   if (TREE_TARGET_GLOBALS (new_tree))
30656     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30657   else if (new_tree == target_option_default_node)
30658     restore_target_globals (&default_target_globals);
30659   else
30660     {
30661       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30662       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30663     }
30664 
30665   arm_option_params_internal ();
30666 }
30667 
30668 /* Invalidate arm_previous_fndecl.  */
30669 
30670 void
30671 arm_reset_previous_fndecl (void)
30672 {
30673   arm_previous_fndecl = NULL_TREE;
30674 }
30675 
30676 /* Establish appropriate back-end context for processing the function
30677    FNDECL.  The argument might be NULL to indicate processing at top
30678    level, outside of any function scope.  */
30679 
30680 static void
30681 arm_set_current_function (tree fndecl)
30682 {
30683   if (!fndecl || fndecl == arm_previous_fndecl)
30684     return;
30685 
30686   tree old_tree = (arm_previous_fndecl
30687 		   ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30688 		   : NULL_TREE);
30689 
30690   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30691 
30692   /* If current function has no attributes but previous one did,
30693      use the default node.  */
30694   if (! new_tree && old_tree)
30695     new_tree = target_option_default_node;
30696 
30697   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30698      the default have been handled by save_restore_target_globals from
30699      arm_pragma_target_parse.  */
30700   if (old_tree == new_tree)
30701     return;
30702 
30703   arm_previous_fndecl = fndecl;
30704 
30705   /* First set the target options.  */
30706   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30707 
30708   save_restore_target_globals (new_tree);
30709 }
30710 
30711 /* Implement TARGET_OPTION_PRINT.  */
30712 
30713 static void
30714 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30715 {
30716   int flags = ptr->x_target_flags;
30717   const char *fpu_name;
30718 
30719   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30720 	      ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30721 
30722   fprintf (file, "%*sselected isa %s\n", indent, "",
30723 	   TARGET_THUMB2_P (flags) ? "thumb2" :
30724 	   TARGET_THUMB_P (flags) ? "thumb1" :
30725 	   "arm");
30726 
30727   if (ptr->x_arm_arch_string)
30728     fprintf (file, "%*sselected architecture %s\n", indent, "",
30729 	     ptr->x_arm_arch_string);
30730 
30731   if (ptr->x_arm_cpu_string)
30732     fprintf (file, "%*sselected CPU %s\n", indent, "",
30733 	     ptr->x_arm_cpu_string);
30734 
30735   if (ptr->x_arm_tune_string)
30736     fprintf (file, "%*sselected tune %s\n", indent, "",
30737 	     ptr->x_arm_tune_string);
30738 
30739   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30740 }
30741 
30742 /* Hook to determine if one function can safely inline another.  */
30743 
30744 static bool
30745 arm_can_inline_p (tree caller, tree callee)
30746 {
30747   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30748   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30749   bool can_inline = true;
30750 
30751   struct cl_target_option *caller_opts
30752 	= TREE_TARGET_OPTION (caller_tree ? caller_tree
30753 					   : target_option_default_node);
30754 
30755   struct cl_target_option *callee_opts
30756 	= TREE_TARGET_OPTION (callee_tree ? callee_tree
30757 					   : target_option_default_node);
30758 
30759   if (callee_opts == caller_opts)
30760     return true;
30761 
30762   /* Callee's ISA features should be a subset of the caller's.  */
30763   struct arm_build_target caller_target;
30764   struct arm_build_target callee_target;
30765   caller_target.isa = sbitmap_alloc (isa_num_bits);
30766   callee_target.isa = sbitmap_alloc (isa_num_bits);
30767 
30768   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30769 			      false);
30770   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30771 			      false);
30772   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30773     can_inline = false;
30774 
30775   sbitmap_free (caller_target.isa);
30776   sbitmap_free (callee_target.isa);
30777 
30778   /* OK to inline between different modes.
30779      Function with mode specific instructions, e.g using asm,
30780      must be explicitly protected with noinline.  */
30781   return can_inline;
30782 }
30783 
30784 /* Hook to fix function's alignment affected by target attribute.  */
30785 
30786 static void
30787 arm_relayout_function (tree fndecl)
30788 {
30789   if (DECL_USER_ALIGN (fndecl))
30790     return;
30791 
30792   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30793 
30794   if (!callee_tree)
30795     callee_tree = target_option_default_node;
30796 
30797   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30798   SET_DECL_ALIGN
30799     (fndecl,
30800      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30801 }
30802 
30803 /* Inner function to process the attribute((target(...))), take an argument and
30804    set the current options from the argument.  If we have a list, recursively
30805    go over the list.  */
30806 
30807 static bool
30808 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30809 {
30810   if (TREE_CODE (args) == TREE_LIST)
30811     {
30812       bool ret = true;
30813 
30814       for (; args; args = TREE_CHAIN (args))
30815 	if (TREE_VALUE (args)
30816 	    && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30817 	  ret = false;
30818       return ret;
30819     }
30820 
30821   else if (TREE_CODE (args) != STRING_CST)
30822     {
30823       error ("attribute %<target%> argument not a string");
30824       return false;
30825     }
30826 
30827   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30828   char *q;
30829 
30830   while ((q = strtok (argstr, ",")) != NULL)
30831     {
30832       while (ISSPACE (*q)) ++q;
30833 
30834       argstr = NULL;
30835       if (!strncmp (q, "thumb", 5))
30836 	  opts->x_target_flags |= MASK_THUMB;
30837 
30838       else if (!strncmp (q, "arm", 3))
30839 	  opts->x_target_flags &= ~MASK_THUMB;
30840 
30841       else if (!strncmp (q, "fpu=", 4))
30842 	{
30843 	  int fpu_index;
30844 	  if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30845 				       &fpu_index, CL_TARGET))
30846 	    {
30847 	      error ("invalid fpu for target attribute or pragma %qs", q);
30848 	      return false;
30849 	    }
30850 	  if (fpu_index == TARGET_FPU_auto)
30851 	    {
30852 	      /* This doesn't really make sense until we support
30853 		 general dynamic selection of the architecture and all
30854 		 sub-features.  */
30855 	      sorry ("auto fpu selection not currently permitted here");
30856 	      return false;
30857 	    }
30858 	  opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30859 	}
30860       else if (!strncmp (q, "arch=", 5))
30861 	{
30862 	  char* arch = q+5;
30863 	  const arch_option *arm_selected_arch
30864 	     = arm_parse_arch_option_name (all_architectures, "arch", arch);
30865 
30866 	  if (!arm_selected_arch)
30867 	    {
30868 	      error ("invalid architecture for target attribute or pragma %qs",
30869 		     q);
30870 	      return false;
30871 	    }
30872 
30873 	  opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30874 	}
30875       else if (q[0] == '+')
30876 	{
30877 	  opts->x_arm_arch_string
30878 	    = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30879 	}
30880       else
30881 	{
30882 	  error ("unknown target attribute or pragma %qs", q);
30883 	  return false;
30884 	}
30885     }
30886 
30887   return true;
30888 }
30889 
30890 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30891 
30892 tree
30893 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30894 				 struct gcc_options *opts_set)
30895 {
30896   struct cl_target_option cl_opts;
30897 
30898   if (!arm_valid_target_attribute_rec (args, opts))
30899     return NULL_TREE;
30900 
30901   cl_target_option_save (&cl_opts, opts);
30902   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30903   arm_option_check_internal (opts);
30904   /* Do any overrides, such as global options arch=xxx.
30905      We do this since arm_active_target was overridden.  */
30906   arm_option_reconfigure_globals ();
30907   arm_options_perform_arch_sanity_checks ();
30908   arm_option_override_internal (opts, opts_set);
30909 
30910   return build_target_option_node (opts);
30911 }
30912 
30913 static void
30914 add_attribute  (const char * mode, tree *attributes)
30915 {
30916   size_t len = strlen (mode);
30917   tree value = build_string (len, mode);
30918 
30919   TREE_TYPE (value) = build_array_type (char_type_node,
30920 					build_index_type (size_int (len)));
30921 
30922   *attributes = tree_cons (get_identifier ("target"),
30923 			   build_tree_list (NULL_TREE, value),
30924 			   *attributes);
30925 }
30926 
30927 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30928 
30929 static void
30930 arm_insert_attributes (tree fndecl, tree * attributes)
30931 {
30932   const char *mode;
30933 
30934   if (! TARGET_FLIP_THUMB)
30935     return;
30936 
30937   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30938       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30939    return;
30940 
30941   /* Nested definitions must inherit mode.  */
30942   if (current_function_decl)
30943    {
30944      mode = TARGET_THUMB ? "thumb" : "arm";
30945      add_attribute (mode, attributes);
30946      return;
30947    }
30948 
30949   /* If there is already a setting don't change it.  */
30950   if (lookup_attribute ("target", *attributes) != NULL)
30951     return;
30952 
30953   mode = thumb_flipper ? "thumb" : "arm";
30954   add_attribute (mode, attributes);
30955 
30956   thumb_flipper = !thumb_flipper;
30957 }
30958 
30959 /* Hook to validate attribute((target("string"))).  */
30960 
30961 static bool
30962 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30963 			      tree args, int ARG_UNUSED (flags))
30964 {
30965   bool ret = true;
30966   struct gcc_options func_options;
30967   tree cur_tree, new_optimize;
30968   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30969 
30970   /* Get the optimization options of the current function.  */
30971   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30972 
30973   /* If the function changed the optimization levels as well as setting target
30974      options, start with the optimizations specified.  */
30975   if (!func_optimize)
30976     func_optimize = optimization_default_node;
30977 
30978   /* Init func_options.  */
30979   memset (&func_options, 0, sizeof (func_options));
30980   init_options_struct (&func_options, NULL);
30981   lang_hooks.init_options_struct (&func_options);
30982 
30983   /* Initialize func_options to the defaults.  */
30984   cl_optimization_restore (&func_options,
30985 			   TREE_OPTIMIZATION (func_optimize));
30986 
30987   cl_target_option_restore (&func_options,
30988 			    TREE_TARGET_OPTION (target_option_default_node));
30989 
30990   /* Set func_options flags with new target mode.  */
30991   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30992 					      &global_options_set);
30993 
30994   if (cur_tree == NULL_TREE)
30995     ret = false;
30996 
30997   new_optimize = build_optimization_node (&func_options);
30998 
30999   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
31000 
31001   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31002 
31003   finalize_options_struct (&func_options);
31004 
31005   return ret;
31006 }
31007 
31008 /* Match an ISA feature bitmap to a named FPU.  We always use the
31009    first entry that exactly matches the feature set, so that we
31010    effectively canonicalize the FPU name for the assembler.  */
31011 static const char*
31012 arm_identify_fpu_from_isa (sbitmap isa)
31013 {
31014   auto_sbitmap fpubits (isa_num_bits);
31015   auto_sbitmap cand_fpubits (isa_num_bits);
31016 
31017   bitmap_and (fpubits, isa, isa_all_fpubits);
31018 
31019   /* If there are no ISA feature bits relating to the FPU, we must be
31020      doing soft-float.  */
31021   if (bitmap_empty_p (fpubits))
31022     return "softvfp";
31023 
31024   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31025     {
31026       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
31027       if (bitmap_equal_p (fpubits, cand_fpubits))
31028 	return all_fpus[i].name;
31029     }
31030   /* We must find an entry, or things have gone wrong.  */
31031   gcc_unreachable ();
31032 }
31033 
31034 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
31035    by the function fndecl.  */
31036 void
31037 arm_declare_function_name (FILE *stream, const char *name, tree decl)
31038 {
31039   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
31040 
31041   struct cl_target_option *targ_options;
31042   if (target_parts)
31043     targ_options = TREE_TARGET_OPTION (target_parts);
31044   else
31045     targ_options = TREE_TARGET_OPTION (target_option_current_node);
31046   gcc_assert (targ_options);
31047 
31048   /* Only update the assembler .arch string if it is distinct from the last
31049      such string we printed. arch_to_print is set conditionally in case
31050      targ_options->x_arm_arch_string is NULL which can be the case
31051      when cc1 is invoked directly without passing -march option.  */
31052   std::string arch_to_print;
31053   if (targ_options->x_arm_arch_string)
31054     arch_to_print = targ_options->x_arm_arch_string;
31055 
31056   if (arch_to_print != arm_last_printed_arch_string)
31057     {
31058       std::string arch_name
31059 	= arch_to_print.substr (0, arch_to_print.find ("+"));
31060       asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
31061       const arch_option *arch
31062 	= arm_parse_arch_option_name (all_architectures, "-march",
31063 				      targ_options->x_arm_arch_string);
31064       auto_sbitmap opt_bits (isa_num_bits);
31065 
31066       gcc_assert (arch);
31067       if (arch->common.extensions)
31068 	{
31069 	  for (const struct cpu_arch_extension *opt = arch->common.extensions;
31070 	       opt->name != NULL;
31071 	       opt++)
31072 	    {
31073 	      if (!opt->remove)
31074 		{
31075 		  arm_initialize_isa (opt_bits, opt->isa_bits);
31076 		  if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31077 		      && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31078 		    asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31079 				 opt->name);
31080 		}
31081 	     }
31082 	}
31083 
31084       arm_last_printed_arch_string = arch_to_print;
31085     }
31086 
31087   fprintf (stream, "\t.syntax unified\n");
31088 
31089   if (TARGET_THUMB)
31090     {
31091       if (is_called_in_ARM_mode (decl)
31092 	  || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31093 	      && cfun->is_thunk))
31094 	fprintf (stream, "\t.code 32\n");
31095       else if (TARGET_THUMB1)
31096 	fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31097       else
31098 	fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31099     }
31100   else
31101     fprintf (stream, "\t.arm\n");
31102 
31103   std::string fpu_to_print
31104     = TARGET_SOFT_FLOAT
31105 	? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31106 
31107   if (fpu_to_print != arm_last_printed_arch_string)
31108     {
31109       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31110       arm_last_printed_fpu_string = fpu_to_print;
31111     }
31112 
31113   if (TARGET_POKE_FUNCTION_NAME)
31114     arm_poke_function_name (stream, (const char *) name);
31115 }
31116 
31117 /* If MEM is in the form of [base+offset], extract the two parts
31118    of address and set to BASE and OFFSET, otherwise return false
31119    after clearing BASE and OFFSET.  */
31120 
31121 static bool
31122 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31123 {
31124   rtx addr;
31125 
31126   gcc_assert (MEM_P (mem));
31127 
31128   addr = XEXP (mem, 0);
31129 
31130   /* Strip off const from addresses like (const (addr)).  */
31131   if (GET_CODE (addr) == CONST)
31132     addr = XEXP (addr, 0);
31133 
31134   if (GET_CODE (addr) == REG)
31135     {
31136       *base = addr;
31137       *offset = const0_rtx;
31138       return true;
31139     }
31140 
31141   if (GET_CODE (addr) == PLUS
31142       && GET_CODE (XEXP (addr, 0)) == REG
31143       && CONST_INT_P (XEXP (addr, 1)))
31144     {
31145       *base = XEXP (addr, 0);
31146       *offset = XEXP (addr, 1);
31147       return true;
31148     }
31149 
31150   *base = NULL_RTX;
31151   *offset = NULL_RTX;
31152 
31153   return false;
31154 }
31155 
31156 /* If INSN is a load or store of address in the form of [base+offset],
31157    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
31158    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
31159    otherwise return FALSE.  */
31160 
31161 static bool
31162 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31163 {
31164   rtx x, dest, src;
31165 
31166   gcc_assert (INSN_P (insn));
31167   x = PATTERN (insn);
31168   if (GET_CODE (x) != SET)
31169     return false;
31170 
31171   src = SET_SRC (x);
31172   dest = SET_DEST (x);
31173   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31174     {
31175       *is_load = false;
31176       extract_base_offset_in_addr (dest, base, offset);
31177     }
31178   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31179     {
31180       *is_load = true;
31181       extract_base_offset_in_addr (src, base, offset);
31182     }
31183   else
31184     return false;
31185 
31186   return (*base != NULL_RTX && *offset != NULL_RTX);
31187 }
31188 
31189 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31190 
31191    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31192    and PRI are only calculated for these instructions.  For other instruction,
31193    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
31194    instruction fusion can be supported by returning different priorities.
31195 
31196    It's important that irrelevant instructions get the largest FUSION_PRI.  */
31197 
31198 static void
31199 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31200 			   int *fusion_pri, int *pri)
31201 {
31202   int tmp, off_val;
31203   bool is_load;
31204   rtx base, offset;
31205 
31206   gcc_assert (INSN_P (insn));
31207 
31208   tmp = max_pri - 1;
31209   if (!fusion_load_store (insn, &base, &offset, &is_load))
31210     {
31211       *pri = tmp;
31212       *fusion_pri = tmp;
31213       return;
31214     }
31215 
31216   /* Load goes first.  */
31217   if (is_load)
31218     *fusion_pri = tmp - 1;
31219   else
31220     *fusion_pri = tmp - 2;
31221 
31222   tmp /= 2;
31223 
31224   /* INSN with smaller base register goes first.  */
31225   tmp -= ((REGNO (base) & 0xff) << 20);
31226 
31227   /* INSN with smaller offset goes first.  */
31228   off_val = (int)(INTVAL (offset));
31229   if (off_val >= 0)
31230     tmp -= (off_val & 0xfffff);
31231   else
31232     tmp += ((- off_val) & 0xfffff);
31233 
31234   *pri = tmp;
31235   return;
31236 }
31237 
31238 
31239 /* Construct and return a PARALLEL RTX vector with elements numbering the
31240    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31241    the vector - from the perspective of the architecture.  This does not
31242    line up with GCC's perspective on lane numbers, so we end up with
31243    different masks depending on our target endian-ness.  The diagram
31244    below may help.  We must draw the distinction when building masks
31245    which select one half of the vector.  An instruction selecting
31246    architectural low-lanes for a big-endian target, must be described using
31247    a mask selecting GCC high-lanes.
31248 
31249                  Big-Endian             Little-Endian
31250 
31251 GCC             0   1   2   3           3   2   1   0
31252               | x | x | x | x |       | x | x | x | x |
31253 Architecture    3   2   1   0           3   2   1   0
31254 
31255 Low Mask:         { 2, 3 }                { 0, 1 }
31256 High Mask:        { 0, 1 }                { 2, 3 }
31257 */
31258 
31259 rtx
31260 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31261 {
31262   int nunits = GET_MODE_NUNITS (mode);
31263   rtvec v = rtvec_alloc (nunits / 2);
31264   int high_base = nunits / 2;
31265   int low_base = 0;
31266   int base;
31267   rtx t1;
31268   int i;
31269 
31270   if (BYTES_BIG_ENDIAN)
31271     base = high ? low_base : high_base;
31272   else
31273     base = high ? high_base : low_base;
31274 
31275   for (i = 0; i < nunits / 2; i++)
31276     RTVEC_ELT (v, i) = GEN_INT (base + i);
31277 
31278   t1 = gen_rtx_PARALLEL (mode, v);
31279   return t1;
31280 }
31281 
31282 /* Check OP for validity as a PARALLEL RTX vector with elements
31283    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31284    from the perspective of the architecture.  See the diagram above
31285    arm_simd_vect_par_cnst_half_p for more details.  */
31286 
31287 bool
31288 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31289 				       bool high)
31290 {
31291   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31292   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31293   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31294   int i = 0;
31295 
31296   if (!VECTOR_MODE_P (mode))
31297     return false;
31298 
31299   if (count_op != count_ideal)
31300     return false;
31301 
31302   for (i = 0; i < count_ideal; i++)
31303     {
31304       rtx elt_op = XVECEXP (op, 0, i);
31305       rtx elt_ideal = XVECEXP (ideal, 0, i);
31306 
31307       if (!CONST_INT_P (elt_op)
31308 	  || INTVAL (elt_ideal) != INTVAL (elt_op))
31309 	return false;
31310     }
31311   return true;
31312 }
31313 
31314 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31315    in Thumb1.  */
31316 static bool
31317 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31318 			 const_tree)
31319 {
31320   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31321   if (vcall_offset && TARGET_THUMB1)
31322     return false;
31323 
31324   /* Otherwise ok.  */
31325   return true;
31326 }
31327 
31328 /* Generate RTL for a conditional branch with rtx comparison CODE in
31329    mode CC_MODE. The destination of the unlikely conditional branch
31330    is LABEL_REF.  */
31331 
31332 void
31333 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31334 			  rtx label_ref)
31335 {
31336   rtx x;
31337   x = gen_rtx_fmt_ee (code, VOIDmode,
31338 		      gen_rtx_REG (cc_mode, CC_REGNUM),
31339 		      const0_rtx);
31340 
31341   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31342 			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
31343 			    pc_rtx);
31344   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31345 }
31346 
31347 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31348 
31349    For pure-code sections there is no letter code for this attribute, so
31350    output all the section flags numerically when this is needed.  */
31351 
31352 static bool
31353 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31354 {
31355 
31356   if (flags & SECTION_ARM_PURECODE)
31357     {
31358       *num = 0x20000000;
31359 
31360       if (!(flags & SECTION_DEBUG))
31361 	*num |= 0x2;
31362       if (flags & SECTION_EXCLUDE)
31363 	*num |= 0x80000000;
31364       if (flags & SECTION_WRITE)
31365 	*num |= 0x1;
31366       if (flags & SECTION_CODE)
31367 	*num |= 0x4;
31368       if (flags & SECTION_MERGE)
31369 	*num |= 0x10;
31370       if (flags & SECTION_STRINGS)
31371 	*num |= 0x20;
31372       if (flags & SECTION_TLS)
31373 	*num |= 0x400;
31374       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31375 	*num |= 0x200;
31376 
31377 	return true;
31378     }
31379 
31380   return false;
31381 }
31382 
31383 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31384 
31385    If pure-code is passed as an option, make sure all functions are in
31386    sections that have the SHF_ARM_PURECODE attribute.  */
31387 
31388 static section *
31389 arm_function_section (tree decl, enum node_frequency freq,
31390 		      bool startup, bool exit)
31391 {
31392   const char * section_name;
31393   section * sec;
31394 
31395   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31396     return default_function_section (decl, freq, startup, exit);
31397 
31398   if (!target_pure_code)
31399     return default_function_section (decl, freq, startup, exit);
31400 
31401 
31402   section_name = DECL_SECTION_NAME (decl);
31403 
31404   /* If a function is not in a named section then it falls under the 'default'
31405      text section, also known as '.text'.  We can preserve previous behavior as
31406      the default text section already has the SHF_ARM_PURECODE section
31407      attribute.  */
31408   if (!section_name)
31409     {
31410       section *default_sec = default_function_section (decl, freq, startup,
31411 						       exit);
31412 
31413       /* If default_sec is not null, then it must be a special section like for
31414 	 example .text.startup.  We set the pure-code attribute and return the
31415 	 same section to preserve existing behavior.  */
31416       if (default_sec)
31417 	  default_sec->common.flags |= SECTION_ARM_PURECODE;
31418       return default_sec;
31419     }
31420 
31421   /* Otherwise look whether a section has already been created with
31422      'section_name'.  */
31423   sec = get_named_section (decl, section_name, 0);
31424   if (!sec)
31425     /* If that is not the case passing NULL as the section's name to
31426        'get_named_section' will create a section with the declaration's
31427        section name.  */
31428     sec = get_named_section (decl, NULL, 0);
31429 
31430   /* Set the SHF_ARM_PURECODE attribute.  */
31431   sec->common.flags |= SECTION_ARM_PURECODE;
31432 
31433   return sec;
31434 }
31435 
31436 /* Implements the TARGET_SECTION_FLAGS hook.
31437 
31438    If DECL is a function declaration and pure-code is passed as an option
31439    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31440    section's name and RELOC indicates whether the declarations initializer may
31441    contain runtime relocations.  */
31442 
31443 static unsigned int
31444 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31445 {
31446   unsigned int flags = default_section_type_flags (decl, name, reloc);
31447 
31448   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31449     flags |= SECTION_ARM_PURECODE;
31450 
31451   return flags;
31452 }
31453 
31454 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31455 
31456 static void
31457 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31458 			   rtx op0, rtx op1,
31459 			   rtx *quot_p, rtx *rem_p)
31460 {
31461   if (mode == SImode)
31462     gcc_assert (!TARGET_IDIV);
31463 
31464   scalar_int_mode libval_mode
31465     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31466 
31467   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31468 					libval_mode,
31469 					op0, GET_MODE (op0),
31470 					op1, GET_MODE (op1));
31471 
31472   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31473   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31474 				       GET_MODE_SIZE (mode));
31475 
31476   gcc_assert (quotient);
31477   gcc_assert (remainder);
31478 
31479   *quot_p = quotient;
31480   *rem_p = remainder;
31481 }
31482 
31483 /*  This function checks for the availability of the coprocessor builtin passed
31484     in BUILTIN for the current target.  Returns true if it is available and
31485     false otherwise.  If a BUILTIN is passed for which this function has not
31486     been implemented it will cause an exception.  */
31487 
31488 bool
31489 arm_coproc_builtin_available (enum unspecv builtin)
31490 {
31491   /* None of these builtins are available in Thumb mode if the target only
31492      supports Thumb-1.  */
31493   if (TARGET_THUMB1)
31494     return false;
31495 
31496   switch (builtin)
31497     {
31498       case VUNSPEC_CDP:
31499       case VUNSPEC_LDC:
31500       case VUNSPEC_LDCL:
31501       case VUNSPEC_STC:
31502       case VUNSPEC_STCL:
31503       case VUNSPEC_MCR:
31504       case VUNSPEC_MRC:
31505 	if (arm_arch4)
31506 	  return true;
31507 	break;
31508       case VUNSPEC_CDP2:
31509       case VUNSPEC_LDC2:
31510       case VUNSPEC_LDC2L:
31511       case VUNSPEC_STC2:
31512       case VUNSPEC_STC2L:
31513       case VUNSPEC_MCR2:
31514       case VUNSPEC_MRC2:
31515 	/* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31516 	   ARMv8-{A,M}.  */
31517 	if (arm_arch5)
31518 	  return true;
31519 	break;
31520       case VUNSPEC_MCRR:
31521       case VUNSPEC_MRRC:
31522 	/* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31523 	   ARMv8-{A,M}.  */
31524 	if (arm_arch6 || arm_arch5te)
31525 	  return true;
31526 	break;
31527       case VUNSPEC_MCRR2:
31528       case VUNSPEC_MRRC2:
31529 	if (arm_arch6)
31530 	  return true;
31531 	break;
31532       default:
31533 	gcc_unreachable ();
31534     }
31535   return false;
31536 }
31537 
31538 /* This function returns true if OP is a valid memory operand for the ldc and
31539    stc coprocessor instructions and false otherwise.  */
31540 
31541 bool
31542 arm_coproc_ldc_stc_legitimate_address (rtx op)
31543 {
31544   HOST_WIDE_INT range;
31545   /* Has to be a memory operand.  */
31546   if (!MEM_P (op))
31547     return false;
31548 
31549   op = XEXP (op, 0);
31550 
31551   /* We accept registers.  */
31552   if (REG_P (op))
31553     return true;
31554 
31555   switch GET_CODE (op)
31556     {
31557       case PLUS:
31558 	{
31559 	  /* Or registers with an offset.  */
31560 	  if (!REG_P (XEXP (op, 0)))
31561 	    return false;
31562 
31563 	  op = XEXP (op, 1);
31564 
31565 	  /* The offset must be an immediate though.  */
31566 	  if (!CONST_INT_P (op))
31567 	    return false;
31568 
31569 	  range = INTVAL (op);
31570 
31571 	  /* Within the range of [-1020,1020].  */
31572 	  if (!IN_RANGE (range, -1020, 1020))
31573 	    return false;
31574 
31575 	  /* And a multiple of 4.  */
31576 	  return (range % 4) == 0;
31577 	}
31578       case PRE_INC:
31579       case POST_INC:
31580       case PRE_DEC:
31581       case POST_DEC:
31582 	return REG_P (XEXP (op, 0));
31583       default:
31584 	gcc_unreachable ();
31585     }
31586   return false;
31587 }
31588 
31589 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31590 
31591    In VFPv1, VFP registers could only be accessed in the mode they were
31592    set, so subregs would be invalid there.  However, we don't support
31593    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31594 
31595    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31596    VFP registers in little-endian order.  We can't describe that accurately to
31597    GCC, so avoid taking subregs of such values.
31598 
31599    The only exception is going from a 128-bit to a 64-bit type.  In that
31600    case the data layout happens to be consistent for big-endian, so we
31601    explicitly allow that case.  */
31602 
31603 static bool
31604 arm_can_change_mode_class (machine_mode from, machine_mode to,
31605 			   reg_class_t rclass)
31606 {
31607   if (TARGET_BIG_END
31608       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31609       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31610 	  || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31611       && reg_classes_intersect_p (VFP_REGS, rclass))
31612     return false;
31613   return true;
31614 }
31615 
31616 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31617    strcpy from constants will be faster.  */
31618 
31619 static HOST_WIDE_INT
31620 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31621 {
31622   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31623   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31624     return MAX (align, BITS_PER_WORD * factor);
31625   return align;
31626 }
31627 
31628 #if CHECKING_P
31629 namespace selftest {
31630 
31631 /* Scan the static data tables generated by parsecpu.awk looking for
31632    potential issues with the data.  We primarily check for
31633    inconsistencies in the option extensions at present (extensions
31634    that duplicate others but aren't marked as aliases).  Furthermore,
31635    for correct canonicalization later options must never be a subset
31636    of an earlier option.  Any extension should also only specify other
31637    feature bits and never an architecture bit.  The architecture is inferred
31638    from the declaration of the extension.  */
31639 static void
31640 arm_test_cpu_arch_data (void)
31641 {
31642   const arch_option *arch;
31643   const cpu_option *cpu;
31644   auto_sbitmap target_isa (isa_num_bits);
31645   auto_sbitmap isa1 (isa_num_bits);
31646   auto_sbitmap isa2 (isa_num_bits);
31647 
31648   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31649     {
31650       const cpu_arch_extension *ext1, *ext2;
31651 
31652       if (arch->common.extensions == NULL)
31653 	continue;
31654 
31655       arm_initialize_isa (target_isa, arch->common.isa_bits);
31656 
31657       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31658 	{
31659 	  if (ext1->alias)
31660 	    continue;
31661 
31662 	  arm_initialize_isa (isa1, ext1->isa_bits);
31663 	  for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31664 	    {
31665 	      if (ext2->alias || ext1->remove != ext2->remove)
31666 		continue;
31667 
31668 	      arm_initialize_isa (isa2, ext2->isa_bits);
31669 	      /* If the option is a subset of the parent option, it doesn't
31670 		 add anything and so isn't useful.  */
31671 	      ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31672 
31673 	      /* If the extension specifies any architectural bits then
31674 		 disallow it.  Extensions should only specify feature bits.  */
31675 	      ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31676 	    }
31677 	}
31678     }
31679 
31680   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31681     {
31682       const cpu_arch_extension *ext1, *ext2;
31683 
31684       if (cpu->common.extensions == NULL)
31685 	continue;
31686 
31687       arm_initialize_isa (target_isa, arch->common.isa_bits);
31688 
31689       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31690 	{
31691 	  if (ext1->alias)
31692 	    continue;
31693 
31694 	  arm_initialize_isa (isa1, ext1->isa_bits);
31695 	  for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31696 	    {
31697 	      if (ext2->alias || ext1->remove != ext2->remove)
31698 		continue;
31699 
31700 	      arm_initialize_isa (isa2, ext2->isa_bits);
31701 	      /* If the option is a subset of the parent option, it doesn't
31702 		 add anything and so isn't useful.  */
31703 	      ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31704 
31705 	      /* If the extension specifies any architectural bits then
31706 		 disallow it.  Extensions should only specify feature bits.  */
31707 	      ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31708 	    }
31709 	}
31710     }
31711 }
31712 
31713 /* Scan the static data tables generated by parsecpu.awk looking for
31714    potential issues with the data.  Here we check for consistency between the
31715    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31716    a feature bit that is not defined by any FPU flag.  */
31717 static void
31718 arm_test_fpu_data (void)
31719 {
31720   auto_sbitmap isa_all_fpubits (isa_num_bits);
31721   auto_sbitmap fpubits (isa_num_bits);
31722   auto_sbitmap tmpset (isa_num_bits);
31723 
31724   static const enum isa_feature fpu_bitlist[]
31725     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31726   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31727 
31728   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31729   {
31730     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31731     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31732     bitmap_clear (isa_all_fpubits);
31733     bitmap_copy (isa_all_fpubits, tmpset);
31734   }
31735 
31736   if (!bitmap_empty_p (isa_all_fpubits))
31737     {
31738 	fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31739 			 " group that are not defined by any FPU.\n"
31740 			 "       Check your arm-cpus.in.\n");
31741 	ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31742     }
31743 }
31744 
31745 static void
31746 arm_run_selftests (void)
31747 {
31748   arm_test_cpu_arch_data ();
31749   arm_test_fpu_data ();
31750 }
31751 } /* Namespace selftest.  */
31752 
31753 #undef TARGET_RUN_TARGET_SELFTESTS
31754 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31755 #endif /* CHECKING_P */
31756 
31757 struct gcc_target targetm = TARGET_INITIALIZER;
31758 
31759 #include "gt-arm.h"
31760