xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/arm/arm.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* Output routines for GCC for ARM.
2    Copyright (C) 1991-2020 Free Software Foundation, Inc.
3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4    and Martin Simmons (@harleqn.co.uk).
5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
6 
7    This file is part of GCC.
8 
9    GCC is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published
11    by the Free Software Foundation; either version 3, or (at your
12    option) any later version.
13 
14    GCC is distributed in the hope that it will be useful, but WITHOUT
15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
17    License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with GCC; see the file COPYING3.  If not see
21    <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "opts.h"
63 #include "dumpfile.h"
64 #include "target-globals.h"
65 #include "builtins.h"
66 #include "tm-constrs.h"
67 #include "rtl-iter.h"
68 #include "optabs-libfuncs.h"
69 #include "gimplify.h"
70 #include "gimple.h"
71 #include "selftest.h"
72 
73 /* This file should be included last.  */
74 #include "target-def.h"
75 
76 /* Forward definitions of types.  */
77 typedef struct minipool_node    Mnode;
78 typedef struct minipool_fixup   Mfix;
79 
80 void (*arm_lang_output_object_attributes_hook)(void);
81 
82 struct four_ints
83 {
84   int i[4];
85 };
86 
87 /* Forward function declarations.  */
88 static bool arm_const_not_ok_for_debug_p (rtx);
89 static int arm_needs_doubleword_align (machine_mode, const_tree);
90 static int arm_compute_static_chain_stack_bytes (void);
91 static arm_stack_offsets *arm_get_frame_offsets (void);
92 static void arm_compute_frame_layout (void);
93 static void arm_add_gc_roots (void);
94 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
95 			     unsigned HOST_WIDE_INT, rtx, rtx, int, int);
96 static unsigned bit_count (unsigned long);
97 static unsigned bitmap_popcount (const sbitmap);
98 static int arm_address_register_rtx_p (rtx, int);
99 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
100 static bool is_called_in_ARM_mode (tree);
101 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
102 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
103 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
104 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
105 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
106 inline static int thumb1_index_register_rtx_p (rtx, int);
107 static int thumb_far_jump_used_p (void);
108 static bool thumb_force_lr_save (void);
109 static unsigned arm_size_return_regs (void);
110 static bool arm_assemble_integer (rtx, unsigned int, int);
111 static void arm_print_operand (FILE *, rtx, int);
112 static void arm_print_operand_address (FILE *, machine_mode, rtx);
113 static bool arm_print_operand_punct_valid_p (unsigned char code);
114 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
115 static arm_cc get_arm_condition_code (rtx);
116 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
117 static const char *output_multi_immediate (rtx *, const char *, const char *,
118 					   int, HOST_WIDE_INT);
119 static const char *shift_op (rtx, HOST_WIDE_INT *);
120 static struct machine_function *arm_init_machine_status (void);
121 static void thumb_exit (FILE *, int);
122 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
123 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_forward_ref (Mfix *);
125 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
126 static Mnode *add_minipool_backward_ref (Mfix *);
127 static void assign_minipool_offsets (Mfix *);
128 static void arm_print_value (FILE *, rtx);
129 static void dump_minipool (rtx_insn *);
130 static int arm_barrier_cost (rtx_insn *);
131 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
132 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
133 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
134 			       machine_mode, rtx);
135 static void arm_reorg (void);
136 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
137 static unsigned long arm_compute_save_reg0_reg12_mask (void);
138 static unsigned long arm_compute_save_core_reg_mask (void);
139 static unsigned long arm_isr_value (tree);
140 static unsigned long arm_compute_func_type (void);
141 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
142 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
143 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
144 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
145 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
146 #endif
147 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
148 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
149 static void arm_output_function_epilogue (FILE *);
150 static void arm_output_function_prologue (FILE *);
151 static int arm_comp_type_attributes (const_tree, const_tree);
152 static void arm_set_default_type_attributes (tree);
153 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
154 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
155 static int optimal_immediate_sequence (enum rtx_code code,
156 				       unsigned HOST_WIDE_INT val,
157 				       struct four_ints *return_sequence);
158 static int optimal_immediate_sequence_1 (enum rtx_code code,
159 					 unsigned HOST_WIDE_INT val,
160 					 struct four_ints *return_sequence,
161 					 int i);
162 static int arm_get_strip_length (int);
163 static bool arm_function_ok_for_sibcall (tree, tree);
164 static machine_mode arm_promote_function_mode (const_tree,
165 						    machine_mode, int *,
166 						    const_tree, int);
167 static bool arm_return_in_memory (const_tree, const_tree);
168 static rtx arm_function_value (const_tree, const_tree, bool);
169 static rtx arm_libcall_value_1 (machine_mode);
170 static rtx arm_libcall_value (machine_mode, const_rtx);
171 static bool arm_function_value_regno_p (const unsigned int);
172 static void arm_internal_label (FILE *, const char *, unsigned long);
173 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
174 				 tree);
175 static bool arm_have_conditional_execution (void);
176 static bool arm_cannot_force_const_mem (machine_mode, rtx);
177 static bool arm_legitimate_constant_p (machine_mode, rtx);
178 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
179 static int arm_insn_cost (rtx_insn *, bool);
180 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
181 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
182 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
183 static void emit_constant_insn (rtx cond, rtx pattern);
184 static rtx_insn *emit_set_insn (rtx, rtx);
185 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
186 static rtx emit_multi_reg_push (unsigned long, unsigned long);
187 static void arm_emit_multi_reg_pop (unsigned long);
188 static int vfp_emit_fstmd (int, int);
189 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
190 static int arm_arg_partial_bytes (cumulative_args_t,
191 				  const function_arg_info &);
192 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
193 static void arm_function_arg_advance (cumulative_args_t,
194 				      const function_arg_info &);
195 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
196 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
197 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
198 				      const_tree);
199 static rtx aapcs_libcall_value (machine_mode);
200 static int aapcs_select_return_coproc (const_tree, const_tree);
201 
202 #ifdef OBJECT_FORMAT_ELF
203 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
204 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
205 #endif
206 #ifndef ARM_PE
207 static void arm_encode_section_info (tree, rtx, int);
208 #endif
209 
210 static void arm_file_end (void);
211 static void arm_file_start (void);
212 static void arm_insert_attributes (tree, tree *);
213 
214 static void arm_setup_incoming_varargs (cumulative_args_t,
215 					const function_arg_info &, int *, int);
216 static bool arm_pass_by_reference (cumulative_args_t,
217 				   const function_arg_info &);
218 static bool arm_promote_prototypes (const_tree);
219 static bool arm_default_short_enums (void);
220 static bool arm_align_anon_bitfield (void);
221 static bool arm_return_in_msb (const_tree);
222 static bool arm_must_pass_in_stack (const function_arg_info &);
223 static bool arm_return_in_memory (const_tree, const_tree);
224 #if ARM_UNWIND_INFO
225 static void arm_unwind_emit (FILE *, rtx_insn *);
226 static bool arm_output_ttype (rtx);
227 static void arm_asm_emit_except_personality (rtx);
228 #endif
229 static void arm_asm_init_sections (void);
230 static rtx arm_dwarf_register_span (rtx);
231 
232 static tree arm_cxx_guard_type (void);
233 static bool arm_cxx_guard_mask_bit (void);
234 static tree arm_get_cookie_size (tree);
235 static bool arm_cookie_has_size (void);
236 static bool arm_cxx_cdtor_returns_this (void);
237 static bool arm_cxx_key_method_may_be_inline (void);
238 static void arm_cxx_determine_class_data_visibility (tree);
239 static bool arm_cxx_class_data_always_comdat (void);
240 static bool arm_cxx_use_aeabi_atexit (void);
241 static void arm_init_libfuncs (void);
242 static tree arm_build_builtin_va_list (void);
243 static void arm_expand_builtin_va_start (tree, rtx);
244 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
245 static void arm_option_override (void);
246 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
247 static void arm_option_restore (struct gcc_options *,
248 				struct cl_target_option *);
249 static void arm_override_options_after_change (void);
250 static void arm_option_print (FILE *, int, struct cl_target_option *);
251 static void arm_set_current_function (tree);
252 static bool arm_can_inline_p (tree, tree);
253 static void arm_relayout_function (tree);
254 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
255 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
256 static bool arm_sched_can_speculate_insn (rtx_insn *);
257 static bool arm_macro_fusion_p (void);
258 static bool arm_cannot_copy_insn_p (rtx_insn *);
259 static int arm_issue_rate (void);
260 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 					unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 						     const_tree type,
286 						     int misalignment,
287 						     bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
296 
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 					  const vec_perm_indices &);
299 
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301 
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 					   tree vectype,
304 					   int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 				   enum vect_cost_for_stmt kind,
307 				   struct _stmt_vec_info *stmt_info,
308 				   int misalign,
309 				   enum vect_cost_model_location where);
310 
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 					 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314 
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 				     const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 						int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 static rtx_insn * thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
329 					vec<const char *> &, vec<rtx> &,
330 					HARD_REG_SET &);
331 static const char *arm_identify_fpu_from_isa (sbitmap);
332 
333 /* Table of machine attributes.  */
334 static const struct attribute_spec arm_attribute_table[] =
335 {
336   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337        affects_type_identity, handler, exclude } */
338   /* Function calls made to this symbol must be done indirectly, because
339      it may lie outside of the 26 bit addressing range of a normal function
340      call.  */
341   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
342   /* Whereas these functions are always known to reside within the 26 bit
343      addressing range.  */
344   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
345   /* Specify the procedure call conventions for a function.  */
346   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
347     NULL },
348   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
349   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
350     NULL },
351   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
352     NULL },
353   { "naked",        0, 0, true,  false, false, false,
354     arm_handle_fndecl_attribute, NULL },
355 #ifdef ARM_PE
356   /* ARM/PE has three new attributes:
357      interfacearm - ?
358      dllexport - for exporting a function/variable that will live in a dll
359      dllimport - for importing a function/variable from a dll
360 
361      Microsoft allows multiple declspecs in one __declspec, separating
362      them with spaces.  We do NOT support this.  Instead, use __declspec
363      multiple times.
364   */
365   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
366   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
367   { "interfacearm", 0, 0, true,  false, false, false,
368     arm_handle_fndecl_attribute, NULL },
369 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
370   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
371     NULL },
372   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
373     NULL },
374   { "notshared",    0, 0, false, true, false, false,
375     arm_handle_notshared_attribute, NULL },
376 #endif
377   /* ARMv8-M Security Extensions support.  */
378   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
379     arm_handle_cmse_nonsecure_entry, NULL },
380   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
381     arm_handle_cmse_nonsecure_call, NULL },
382   { "Advanced SIMD type", 0, 0, false, true, false, true, NULL, NULL },
383   { NULL, 0, 0, false, false, false, false, NULL, NULL }
384 };
385 
386 /* Initialize the GCC target structure.  */
387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
388 #undef  TARGET_MERGE_DECL_ATTRIBUTES
389 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
390 #endif
391 
392 #undef TARGET_CHECK_BUILTIN_CALL
393 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
394 
395 #undef TARGET_LEGITIMIZE_ADDRESS
396 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
397 
398 #undef  TARGET_ATTRIBUTE_TABLE
399 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
400 
401 #undef  TARGET_INSERT_ATTRIBUTES
402 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
403 
404 #undef TARGET_ASM_FILE_START
405 #define TARGET_ASM_FILE_START arm_file_start
406 #undef TARGET_ASM_FILE_END
407 #define TARGET_ASM_FILE_END arm_file_end
408 
409 #undef  TARGET_ASM_ALIGNED_SI_OP
410 #define TARGET_ASM_ALIGNED_SI_OP NULL
411 #undef  TARGET_ASM_INTEGER
412 #define TARGET_ASM_INTEGER arm_assemble_integer
413 
414 #undef TARGET_PRINT_OPERAND
415 #define TARGET_PRINT_OPERAND arm_print_operand
416 #undef TARGET_PRINT_OPERAND_ADDRESS
417 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
418 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
419 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
420 
421 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
422 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
423 
424 #undef  TARGET_ASM_FUNCTION_PROLOGUE
425 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
426 
427 #undef  TARGET_ASM_FUNCTION_EPILOGUE
428 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
429 
430 #undef TARGET_CAN_INLINE_P
431 #define TARGET_CAN_INLINE_P arm_can_inline_p
432 
433 #undef TARGET_RELAYOUT_FUNCTION
434 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
435 
436 #undef  TARGET_OPTION_OVERRIDE
437 #define TARGET_OPTION_OVERRIDE arm_option_override
438 
439 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
440 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
441 
442 #undef TARGET_OPTION_SAVE
443 #define TARGET_OPTION_SAVE arm_option_save
444 
445 #undef TARGET_OPTION_RESTORE
446 #define TARGET_OPTION_RESTORE arm_option_restore
447 
448 #undef TARGET_OPTION_PRINT
449 #define TARGET_OPTION_PRINT arm_option_print
450 
451 #undef  TARGET_COMP_TYPE_ATTRIBUTES
452 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
453 
454 #undef TARGET_SCHED_CAN_SPECULATE_INSN
455 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
456 
457 #undef TARGET_SCHED_MACRO_FUSION_P
458 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
459 
460 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
461 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
462 
463 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
464 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
465 
466 #undef  TARGET_SCHED_ADJUST_COST
467 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
468 
469 #undef TARGET_SET_CURRENT_FUNCTION
470 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
471 
472 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
473 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
474 
475 #undef TARGET_SCHED_REORDER
476 #define TARGET_SCHED_REORDER arm_sched_reorder
477 
478 #undef TARGET_REGISTER_MOVE_COST
479 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
480 
481 #undef TARGET_MEMORY_MOVE_COST
482 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
483 
484 #undef TARGET_ENCODE_SECTION_INFO
485 #ifdef ARM_PE
486 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
487 #else
488 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
489 #endif
490 
491 #undef  TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
493 
494 #undef  TARGET_ASM_INTERNAL_LABEL
495 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
496 
497 #undef TARGET_FLOATN_MODE
498 #define TARGET_FLOATN_MODE arm_floatn_mode
499 
500 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
501 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
502 
503 #undef  TARGET_FUNCTION_VALUE
504 #define TARGET_FUNCTION_VALUE arm_function_value
505 
506 #undef  TARGET_LIBCALL_VALUE
507 #define TARGET_LIBCALL_VALUE arm_libcall_value
508 
509 #undef TARGET_FUNCTION_VALUE_REGNO_P
510 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
511 
512 #undef  TARGET_ASM_OUTPUT_MI_THUNK
513 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
514 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
515 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
516 
517 #undef  TARGET_RTX_COSTS
518 #define TARGET_RTX_COSTS arm_rtx_costs
519 #undef  TARGET_ADDRESS_COST
520 #define TARGET_ADDRESS_COST arm_address_cost
521 #undef TARGET_INSN_COST
522 #define TARGET_INSN_COST arm_insn_cost
523 
524 #undef TARGET_SHIFT_TRUNCATION_MASK
525 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
526 #undef TARGET_VECTOR_MODE_SUPPORTED_P
527 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
528 #undef TARGET_ARRAY_MODE_SUPPORTED_P
529 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
530 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
531 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
532 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
533 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
534   arm_autovectorize_vector_modes
535 
536 #undef  TARGET_MACHINE_DEPENDENT_REORG
537 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
538 
539 #undef  TARGET_INIT_BUILTINS
540 #define TARGET_INIT_BUILTINS  arm_init_builtins
541 #undef  TARGET_EXPAND_BUILTIN
542 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
543 #undef  TARGET_BUILTIN_DECL
544 #define TARGET_BUILTIN_DECL arm_builtin_decl
545 
546 #undef TARGET_INIT_LIBFUNCS
547 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
548 
549 #undef TARGET_PROMOTE_FUNCTION_MODE
550 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
551 #undef TARGET_PROMOTE_PROTOTYPES
552 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
553 #undef TARGET_PASS_BY_REFERENCE
554 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
555 #undef TARGET_ARG_PARTIAL_BYTES
556 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
557 #undef TARGET_FUNCTION_ARG
558 #define TARGET_FUNCTION_ARG arm_function_arg
559 #undef TARGET_FUNCTION_ARG_ADVANCE
560 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
561 #undef TARGET_FUNCTION_ARG_PADDING
562 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
563 #undef TARGET_FUNCTION_ARG_BOUNDARY
564 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
565 
566 #undef  TARGET_SETUP_INCOMING_VARARGS
567 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
568 
569 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
570 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
571 
572 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
573 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
574 #undef TARGET_TRAMPOLINE_INIT
575 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
576 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
577 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
578 
579 #undef TARGET_WARN_FUNC_RETURN
580 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
581 
582 #undef TARGET_DEFAULT_SHORT_ENUMS
583 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
584 
585 #undef TARGET_ALIGN_ANON_BITFIELD
586 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
587 
588 #undef TARGET_NARROW_VOLATILE_BITFIELD
589 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
590 
591 #undef TARGET_CXX_GUARD_TYPE
592 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
593 
594 #undef TARGET_CXX_GUARD_MASK_BIT
595 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
596 
597 #undef TARGET_CXX_GET_COOKIE_SIZE
598 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
599 
600 #undef TARGET_CXX_COOKIE_HAS_SIZE
601 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
602 
603 #undef TARGET_CXX_CDTOR_RETURNS_THIS
604 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
605 
606 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
607 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
608 
609 #undef TARGET_CXX_USE_AEABI_ATEXIT
610 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
611 
612 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
613 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
614   arm_cxx_determine_class_data_visibility
615 
616 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
617 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
618 
619 #undef TARGET_RETURN_IN_MSB
620 #define TARGET_RETURN_IN_MSB arm_return_in_msb
621 
622 #undef TARGET_RETURN_IN_MEMORY
623 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
624 
625 #undef TARGET_MUST_PASS_IN_STACK
626 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
627 
628 #if ARM_UNWIND_INFO
629 #undef TARGET_ASM_UNWIND_EMIT
630 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
631 
632 /* EABI unwinding tables use a different format for the typeinfo tables.  */
633 #undef TARGET_ASM_TTYPE
634 #define TARGET_ASM_TTYPE arm_output_ttype
635 
636 #undef TARGET_ARM_EABI_UNWINDER
637 #define TARGET_ARM_EABI_UNWINDER true
638 
639 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
640 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
641 
642 #endif /* ARM_UNWIND_INFO */
643 
644 #undef TARGET_ASM_INIT_SECTIONS
645 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
646 
647 #undef TARGET_DWARF_REGISTER_SPAN
648 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
649 
650 #undef  TARGET_CANNOT_COPY_INSN_P
651 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
652 
653 #ifdef HAVE_AS_TLS
654 #undef TARGET_HAVE_TLS
655 #define TARGET_HAVE_TLS true
656 #endif
657 
658 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
659 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
660 
661 #undef TARGET_LEGITIMATE_CONSTANT_P
662 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
663 
664 #undef TARGET_CANNOT_FORCE_CONST_MEM
665 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
666 
667 #undef TARGET_MAX_ANCHOR_OFFSET
668 #define TARGET_MAX_ANCHOR_OFFSET 4095
669 
670 /* The minimum is set such that the total size of the block
671    for a particular anchor is -4088 + 1 + 4095 bytes, which is
672    divisible by eight, ensuring natural spacing of anchors.  */
673 #undef TARGET_MIN_ANCHOR_OFFSET
674 #define TARGET_MIN_ANCHOR_OFFSET -4088
675 
676 #undef TARGET_SCHED_ISSUE_RATE
677 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
678 
679 #undef TARGET_SCHED_VARIABLE_ISSUE
680 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
681 
682 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
683 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
684   arm_first_cycle_multipass_dfa_lookahead
685 
686 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
687 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
688   arm_first_cycle_multipass_dfa_lookahead_guard
689 
690 #undef TARGET_MANGLE_TYPE
691 #define TARGET_MANGLE_TYPE arm_mangle_type
692 
693 #undef TARGET_INVALID_CONVERSION
694 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
695 
696 #undef TARGET_INVALID_UNARY_OP
697 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
698 
699 #undef TARGET_INVALID_BINARY_OP
700 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
701 
702 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
703 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
704 
705 #undef TARGET_BUILD_BUILTIN_VA_LIST
706 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
707 #undef TARGET_EXPAND_BUILTIN_VA_START
708 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
709 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
710 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
711 
712 #ifdef HAVE_AS_TLS
713 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
714 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
715 #endif
716 
717 #undef TARGET_LEGITIMATE_ADDRESS_P
718 #define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p
719 
720 #undef TARGET_PREFERRED_RELOAD_CLASS
721 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
722 
723 #undef TARGET_PROMOTED_TYPE
724 #define TARGET_PROMOTED_TYPE arm_promoted_type
725 
726 #undef TARGET_SCALAR_MODE_SUPPORTED_P
727 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
728 
729 #undef TARGET_COMPUTE_FRAME_LAYOUT
730 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
731 
732 #undef TARGET_FRAME_POINTER_REQUIRED
733 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
734 
735 #undef TARGET_CAN_ELIMINATE
736 #define TARGET_CAN_ELIMINATE arm_can_eliminate
737 
738 #undef TARGET_CONDITIONAL_REGISTER_USAGE
739 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
740 
741 #undef TARGET_CLASS_LIKELY_SPILLED_P
742 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
743 
744 #undef TARGET_VECTORIZE_BUILTINS
745 #define TARGET_VECTORIZE_BUILTINS
746 
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
749   arm_builtin_vectorized_function
750 
751 #undef TARGET_VECTOR_ALIGNMENT
752 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
753 
754 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
755 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
756   arm_vector_alignment_reachable
757 
758 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
759 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
760   arm_builtin_support_vector_misalignment
761 
762 #undef TARGET_PREFERRED_RENAME_CLASS
763 #define TARGET_PREFERRED_RENAME_CLASS \
764   arm_preferred_rename_class
765 
766 #undef TARGET_VECTORIZE_VEC_PERM_CONST
767 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
768 
769 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
770 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
771   arm_builtin_vectorization_cost
772 #undef TARGET_VECTORIZE_ADD_STMT_COST
773 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
774 
775 #undef TARGET_CANONICALIZE_COMPARISON
776 #define TARGET_CANONICALIZE_COMPARISON \
777   arm_canonicalize_comparison
778 
779 #undef TARGET_ASAN_SHADOW_OFFSET
780 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
781 
782 #undef MAX_INSN_PER_IT_BLOCK
783 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
784 
785 #undef TARGET_CAN_USE_DOLOOP_P
786 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
787 
788 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
789 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
790 
791 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
792 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
793 
794 #undef TARGET_SCHED_FUSION_PRIORITY
795 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
796 
797 #undef  TARGET_ASM_FUNCTION_SECTION
798 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
799 
800 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
801 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
802 
803 #undef TARGET_SECTION_TYPE_FLAGS
804 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
805 
806 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
807 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
808 
809 #undef TARGET_C_EXCESS_PRECISION
810 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
811 
812 /* Although the architecture reserves bits 0 and 1, only the former is
813    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
814 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
815 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
816 
817 #undef TARGET_FIXED_CONDITION_CODE_REGS
818 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
819 
820 #undef TARGET_HARD_REGNO_NREGS
821 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
822 #undef TARGET_HARD_REGNO_MODE_OK
823 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
824 
825 #undef TARGET_MODES_TIEABLE_P
826 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
827 
828 #undef TARGET_CAN_CHANGE_MODE_CLASS
829 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
830 
831 #undef TARGET_CONSTANT_ALIGNMENT
832 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
833 
834 #undef TARGET_MD_ASM_ADJUST
835 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
836 
837 /* Obstack for minipool constant handling.  */
838 static struct obstack minipool_obstack;
839 static char *         minipool_startobj;
840 
841 /* The maximum number of insns skipped which
842    will be conditionalised if possible.  */
843 static int max_insns_skipped = 5;
844 
845 extern FILE * asm_out_file;
846 
847 /* True if we are currently building a constant table.  */
848 int making_const_table;
849 
850 /* The processor for which instructions should be scheduled.  */
851 enum processor_type arm_tune = TARGET_CPU_arm_none;
852 
853 /* The current tuning set.  */
854 const struct tune_params *current_tune;
855 
856 /* Which floating point hardware to schedule for.  */
857 int arm_fpu_attr;
858 
859 /* Used for Thumb call_via trampolines.  */
860 rtx thumb_call_via_label[14];
861 static int thumb_call_reg_needed;
862 
863 /* The bits in this mask specify which instruction scheduling options should
864    be used.  */
865 unsigned int tune_flags = 0;
866 
867 /* The highest ARM architecture version supported by the
868    target.  */
869 enum base_architecture arm_base_arch = BASE_ARCH_0;
870 
871 /* Active target architecture and tuning.  */
872 
873 struct arm_build_target arm_active_target;
874 
875 /* The following are used in the arm.md file as equivalents to bits
876    in the above two flag variables.  */
877 
878 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
879 int arm_arch4 = 0;
880 
881 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
882 int arm_arch4t = 0;
883 
884 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
885 int arm_arch5t = 0;
886 
887 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
888 int arm_arch5te = 0;
889 
890 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
891 int arm_arch6 = 0;
892 
893 /* Nonzero if this chip supports the ARM 6K extensions.  */
894 int arm_arch6k = 0;
895 
896 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
897 int arm_arch6kz = 0;
898 
899 /* Nonzero if instructions present in ARMv6-M can be used.  */
900 int arm_arch6m = 0;
901 
902 /* Nonzero if this chip supports the ARM 7 extensions.  */
903 int arm_arch7 = 0;
904 
905 /* Nonzero if this chip supports the Large Physical Address Extension.  */
906 int arm_arch_lpae = 0;
907 
908 /* Nonzero if instructions not present in the 'M' profile can be used.  */
909 int arm_arch_notm = 0;
910 
911 /* Nonzero if instructions present in ARMv7E-M can be used.  */
912 int arm_arch7em = 0;
913 
914 /* Nonzero if instructions present in ARMv8 can be used.  */
915 int arm_arch8 = 0;
916 
917 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
918 int arm_arch8_1 = 0;
919 
920 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
921 int arm_arch8_2 = 0;
922 
923 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
924 int arm_arch8_3 = 0;
925 
926 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
927 int arm_arch8_4 = 0;
928 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
929    extensions.  */
930 int arm_arch8_1m_main = 0;
931 
932 /* Nonzero if this chip supports the FP16 instructions extension of ARM
933    Architecture 8.2.  */
934 int arm_fp16_inst = 0;
935 
936 /* Nonzero if this chip can benefit from load scheduling.  */
937 int arm_ld_sched = 0;
938 
939 /* Nonzero if this chip is a StrongARM.  */
940 int arm_tune_strongarm = 0;
941 
942 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
943 int arm_arch_iwmmxt = 0;
944 
945 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
946 int arm_arch_iwmmxt2 = 0;
947 
948 /* Nonzero if this chip is an XScale.  */
949 int arm_arch_xscale = 0;
950 
951 /* Nonzero if tuning for XScale  */
952 int arm_tune_xscale = 0;
953 
954 /* Nonzero if we want to tune for stores that access the write-buffer.
955    This typically means an ARM6 or ARM7 with MMU or MPU.  */
956 int arm_tune_wbuf = 0;
957 
958 /* Nonzero if tuning for Cortex-A9.  */
959 int arm_tune_cortex_a9 = 0;
960 
961 /* Nonzero if we should define __THUMB_INTERWORK__ in the
962    preprocessor.
963    XXX This is a bit of a hack, it's intended to help work around
964    problems in GLD which doesn't understand that armv5t code is
965    interworking clean.  */
966 int arm_cpp_interwork = 0;
967 
968 /* Nonzero if chip supports Thumb 1.  */
969 int arm_arch_thumb1;
970 
971 /* Nonzero if chip supports Thumb 2.  */
972 int arm_arch_thumb2;
973 
974 /* Nonzero if chip supports integer division instruction.  */
975 int arm_arch_arm_hwdiv;
976 int arm_arch_thumb_hwdiv;
977 
978 /* Nonzero if chip disallows volatile memory access in IT block.  */
979 int arm_arch_no_volatile_ce;
980 
981 /* Nonzero if we shouldn't use literal pools.  */
982 bool arm_disable_literal_pool = false;
983 
984 /* The register number to be used for the PIC offset register.  */
985 unsigned arm_pic_register = INVALID_REGNUM;
986 
987 enum arm_pcs arm_pcs_default;
988 
989 /* For an explanation of these variables, see final_prescan_insn below.  */
990 int arm_ccfsm_state;
991 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
992 enum arm_cond_code arm_current_cc;
993 
994 rtx arm_target_insn;
995 int arm_target_label;
996 /* The number of conditionally executed insns, including the current insn.  */
997 int arm_condexec_count = 0;
998 /* A bitmask specifying the patterns for the IT block.
999    Zero means do not output an IT block before this insn. */
1000 int arm_condexec_mask = 0;
1001 /* The number of bits used in arm_condexec_mask.  */
1002 int arm_condexec_masklen = 0;
1003 
1004 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1005 int arm_arch_crc = 0;
1006 
1007 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1008 int arm_arch_dotprod = 0;
1009 
1010 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1011 int arm_arch_cmse = 0;
1012 
1013 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1014 int arm_m_profile_small_mul = 0;
1015 
1016 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1017 int arm_arch_i8mm = 0;
1018 
1019 /* Nonzero if chip supports the BFloat16 instructions.  */
1020 int arm_arch_bf16 = 0;
1021 
1022 /* Nonzero if chip supports the Custom Datapath Extension.  */
1023 int arm_arch_cde = 0;
1024 int arm_arch_cde_coproc = 0;
1025 const int arm_arch_cde_coproc_bits[] = {
1026   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1027 };
1028 
1029 /* The condition codes of the ARM, and the inverse function.  */
1030 static const char * const arm_condition_codes[] =
1031 {
1032   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1033   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1034 };
1035 
1036 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1037 int arm_regs_in_sequence[] =
1038 {
1039   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1040 };
1041 
1042 #define DEF_FP_SYSREG(reg) #reg,
1043 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1044   FP_SYSREGS
1045 };
1046 #undef DEF_FP_SYSREG
1047 
1048 #define ARM_LSL_NAME "lsl"
1049 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1050 
1051 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1052 				   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1053 				   | (1 << PIC_OFFSET_TABLE_REGNUM)))
1054 
1055 /* Initialization code.  */
1056 
1057 struct cpu_tune
1058 {
1059   enum processor_type scheduler;
1060   unsigned int tune_flags;
1061   const struct tune_params *tune;
1062 };
1063 
1064 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1065 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1066   {								\
1067     num_slots,							\
1068     l1_size,							\
1069     l1_line_size						\
1070   }
1071 
1072 /* arm generic vectorizer costs.  */
1073 static const
1074 struct cpu_vec_costs arm_default_vec_cost = {
1075   1,					/* scalar_stmt_cost.  */
1076   1,					/* scalar load_cost.  */
1077   1,					/* scalar_store_cost.  */
1078   1,					/* vec_stmt_cost.  */
1079   1,					/* vec_to_scalar_cost.  */
1080   1,					/* scalar_to_vec_cost.  */
1081   1,					/* vec_align_load_cost.  */
1082   1,					/* vec_unalign_load_cost.  */
1083   1,					/* vec_unalign_store_cost.  */
1084   1,					/* vec_store_cost.  */
1085   3,					/* cond_taken_branch_cost.  */
1086   1,					/* cond_not_taken_branch_cost.  */
1087 };
1088 
1089 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1090 #include "aarch-cost-tables.h"
1091 
1092 
1093 
1094 const struct cpu_cost_table cortexa9_extra_costs =
1095 {
1096   /* ALU */
1097   {
1098     0,			/* arith.  */
1099     0,			/* logical.  */
1100     0,			/* shift.  */
1101     COSTS_N_INSNS (1),	/* shift_reg.  */
1102     COSTS_N_INSNS (1),	/* arith_shift.  */
1103     COSTS_N_INSNS (2),	/* arith_shift_reg.  */
1104     0,			/* log_shift.  */
1105     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1106     COSTS_N_INSNS (1),	/* extend.  */
1107     COSTS_N_INSNS (2),	/* extend_arith.  */
1108     COSTS_N_INSNS (1),	/* bfi.  */
1109     COSTS_N_INSNS (1),	/* bfx.  */
1110     0,			/* clz.  */
1111     0,			/* rev.  */
1112     0,			/* non_exec.  */
1113     true		/* non_exec_costs_exec.  */
1114   },
1115   {
1116     /* MULT SImode */
1117     {
1118       COSTS_N_INSNS (3),	/* simple.  */
1119       COSTS_N_INSNS (3),	/* flag_setting.  */
1120       COSTS_N_INSNS (2),	/* extend.  */
1121       COSTS_N_INSNS (3),	/* add.  */
1122       COSTS_N_INSNS (2),	/* extend_add.  */
1123       COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A9.  */
1124     },
1125     /* MULT DImode */
1126     {
1127       0,			/* simple (N/A).  */
1128       0,			/* flag_setting (N/A).  */
1129       COSTS_N_INSNS (4),	/* extend.  */
1130       0,			/* add (N/A).  */
1131       COSTS_N_INSNS (4),	/* extend_add.  */
1132       0				/* idiv (N/A).  */
1133     }
1134   },
1135   /* LD/ST */
1136   {
1137     COSTS_N_INSNS (2),	/* load.  */
1138     COSTS_N_INSNS (2),	/* load_sign_extend.  */
1139     COSTS_N_INSNS (2),	/* ldrd.  */
1140     COSTS_N_INSNS (2),	/* ldm_1st.  */
1141     1,			/* ldm_regs_per_insn_1st.  */
1142     2,			/* ldm_regs_per_insn_subsequent.  */
1143     COSTS_N_INSNS (5),	/* loadf.  */
1144     COSTS_N_INSNS (5),	/* loadd.  */
1145     COSTS_N_INSNS (1),  /* load_unaligned.  */
1146     COSTS_N_INSNS (2),	/* store.  */
1147     COSTS_N_INSNS (2),	/* strd.  */
1148     COSTS_N_INSNS (2),	/* stm_1st.  */
1149     1,			/* stm_regs_per_insn_1st.  */
1150     2,			/* stm_regs_per_insn_subsequent.  */
1151     COSTS_N_INSNS (1),	/* storef.  */
1152     COSTS_N_INSNS (1),	/* stored.  */
1153     COSTS_N_INSNS (1),	/* store_unaligned.  */
1154     COSTS_N_INSNS (1),	/* loadv.  */
1155     COSTS_N_INSNS (1)	/* storev.  */
1156   },
1157   {
1158     /* FP SFmode */
1159     {
1160       COSTS_N_INSNS (14),	/* div.  */
1161       COSTS_N_INSNS (4),	/* mult.  */
1162       COSTS_N_INSNS (7),	/* mult_addsub. */
1163       COSTS_N_INSNS (30),	/* fma.  */
1164       COSTS_N_INSNS (3),	/* addsub.  */
1165       COSTS_N_INSNS (1),	/* fpconst.  */
1166       COSTS_N_INSNS (1),	/* neg.  */
1167       COSTS_N_INSNS (3),	/* compare.  */
1168       COSTS_N_INSNS (3),	/* widen.  */
1169       COSTS_N_INSNS (3),	/* narrow.  */
1170       COSTS_N_INSNS (3),	/* toint.  */
1171       COSTS_N_INSNS (3),	/* fromint.  */
1172       COSTS_N_INSNS (3)		/* roundint.  */
1173     },
1174     /* FP DFmode */
1175     {
1176       COSTS_N_INSNS (24),	/* div.  */
1177       COSTS_N_INSNS (5),	/* mult.  */
1178       COSTS_N_INSNS (8),	/* mult_addsub.  */
1179       COSTS_N_INSNS (30),	/* fma.  */
1180       COSTS_N_INSNS (3),	/* addsub.  */
1181       COSTS_N_INSNS (1),	/* fpconst.  */
1182       COSTS_N_INSNS (1),	/* neg.  */
1183       COSTS_N_INSNS (3),	/* compare.  */
1184       COSTS_N_INSNS (3),	/* widen.  */
1185       COSTS_N_INSNS (3),	/* narrow.  */
1186       COSTS_N_INSNS (3),	/* toint.  */
1187       COSTS_N_INSNS (3),	/* fromint.  */
1188       COSTS_N_INSNS (3)		/* roundint.  */
1189     }
1190   },
1191   /* Vector */
1192   {
1193     COSTS_N_INSNS (1)	/* alu.  */
1194   }
1195 };
1196 
1197 const struct cpu_cost_table cortexa8_extra_costs =
1198 {
1199   /* ALU */
1200   {
1201     0,			/* arith.  */
1202     0,			/* logical.  */
1203     COSTS_N_INSNS (1),	/* shift.  */
1204     0,			/* shift_reg.  */
1205     COSTS_N_INSNS (1),	/* arith_shift.  */
1206     0,			/* arith_shift_reg.  */
1207     COSTS_N_INSNS (1),	/* log_shift.  */
1208     0,			/* log_shift_reg.  */
1209     0,			/* extend.  */
1210     0,			/* extend_arith.  */
1211     0,			/* bfi.  */
1212     0,			/* bfx.  */
1213     0,			/* clz.  */
1214     0,			/* rev.  */
1215     0,			/* non_exec.  */
1216     true		/* non_exec_costs_exec.  */
1217   },
1218   {
1219     /* MULT SImode */
1220     {
1221       COSTS_N_INSNS (1),	/* simple.  */
1222       COSTS_N_INSNS (1),	/* flag_setting.  */
1223       COSTS_N_INSNS (1),	/* extend.  */
1224       COSTS_N_INSNS (1),	/* add.  */
1225       COSTS_N_INSNS (1),	/* extend_add.  */
1226       COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A8.  */
1227     },
1228     /* MULT DImode */
1229     {
1230       0,			/* simple (N/A).  */
1231       0,			/* flag_setting (N/A).  */
1232       COSTS_N_INSNS (2),	/* extend.  */
1233       0,			/* add (N/A).  */
1234       COSTS_N_INSNS (2),	/* extend_add.  */
1235       0				/* idiv (N/A).  */
1236     }
1237   },
1238   /* LD/ST */
1239   {
1240     COSTS_N_INSNS (1),	/* load.  */
1241     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1242     COSTS_N_INSNS (1),	/* ldrd.  */
1243     COSTS_N_INSNS (1),	/* ldm_1st.  */
1244     1,			/* ldm_regs_per_insn_1st.  */
1245     2,			/* ldm_regs_per_insn_subsequent.  */
1246     COSTS_N_INSNS (1),	/* loadf.  */
1247     COSTS_N_INSNS (1),	/* loadd.  */
1248     COSTS_N_INSNS (1),  /* load_unaligned.  */
1249     COSTS_N_INSNS (1),	/* store.  */
1250     COSTS_N_INSNS (1),	/* strd.  */
1251     COSTS_N_INSNS (1),	/* stm_1st.  */
1252     1,			/* stm_regs_per_insn_1st.  */
1253     2,			/* stm_regs_per_insn_subsequent.  */
1254     COSTS_N_INSNS (1),	/* storef.  */
1255     COSTS_N_INSNS (1),	/* stored.  */
1256     COSTS_N_INSNS (1),	/* store_unaligned.  */
1257     COSTS_N_INSNS (1),	/* loadv.  */
1258     COSTS_N_INSNS (1)	/* storev.  */
1259   },
1260   {
1261     /* FP SFmode */
1262     {
1263       COSTS_N_INSNS (36),	/* div.  */
1264       COSTS_N_INSNS (11),	/* mult.  */
1265       COSTS_N_INSNS (20),	/* mult_addsub. */
1266       COSTS_N_INSNS (30),	/* fma.  */
1267       COSTS_N_INSNS (9),	/* addsub.  */
1268       COSTS_N_INSNS (3),	/* fpconst.  */
1269       COSTS_N_INSNS (3),	/* neg.  */
1270       COSTS_N_INSNS (6),	/* compare.  */
1271       COSTS_N_INSNS (4),	/* widen.  */
1272       COSTS_N_INSNS (4),	/* narrow.  */
1273       COSTS_N_INSNS (8),	/* toint.  */
1274       COSTS_N_INSNS (8),	/* fromint.  */
1275       COSTS_N_INSNS (8)		/* roundint.  */
1276     },
1277     /* FP DFmode */
1278     {
1279       COSTS_N_INSNS (64),	/* div.  */
1280       COSTS_N_INSNS (16),	/* mult.  */
1281       COSTS_N_INSNS (25),	/* mult_addsub.  */
1282       COSTS_N_INSNS (30),	/* fma.  */
1283       COSTS_N_INSNS (9),	/* addsub.  */
1284       COSTS_N_INSNS (3),	/* fpconst.  */
1285       COSTS_N_INSNS (3),	/* neg.  */
1286       COSTS_N_INSNS (6),	/* compare.  */
1287       COSTS_N_INSNS (6),	/* widen.  */
1288       COSTS_N_INSNS (6),	/* narrow.  */
1289       COSTS_N_INSNS (8),	/* toint.  */
1290       COSTS_N_INSNS (8),	/* fromint.  */
1291       COSTS_N_INSNS (8)		/* roundint.  */
1292     }
1293   },
1294   /* Vector */
1295   {
1296     COSTS_N_INSNS (1)	/* alu.  */
1297   }
1298 };
1299 
1300 const struct cpu_cost_table cortexa5_extra_costs =
1301 {
1302   /* ALU */
1303   {
1304     0,			/* arith.  */
1305     0,			/* logical.  */
1306     COSTS_N_INSNS (1),	/* shift.  */
1307     COSTS_N_INSNS (1),	/* shift_reg.  */
1308     COSTS_N_INSNS (1),	/* arith_shift.  */
1309     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1310     COSTS_N_INSNS (1),	/* log_shift.  */
1311     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1312     COSTS_N_INSNS (1),	/* extend.  */
1313     COSTS_N_INSNS (1),	/* extend_arith.  */
1314     COSTS_N_INSNS (1),	/* bfi.  */
1315     COSTS_N_INSNS (1),	/* bfx.  */
1316     COSTS_N_INSNS (1),	/* clz.  */
1317     COSTS_N_INSNS (1),	/* rev.  */
1318     0,			/* non_exec.  */
1319     true		/* non_exec_costs_exec.  */
1320   },
1321 
1322   {
1323     /* MULT SImode */
1324     {
1325       0,			/* simple.  */
1326       COSTS_N_INSNS (1),	/* flag_setting.  */
1327       COSTS_N_INSNS (1),	/* extend.  */
1328       COSTS_N_INSNS (1),	/* add.  */
1329       COSTS_N_INSNS (1),	/* extend_add.  */
1330       COSTS_N_INSNS (7)		/* idiv.  */
1331     },
1332     /* MULT DImode */
1333     {
1334       0,			/* simple (N/A).  */
1335       0,			/* flag_setting (N/A).  */
1336       COSTS_N_INSNS (1),	/* extend.  */
1337       0,			/* add.  */
1338       COSTS_N_INSNS (2),	/* extend_add.  */
1339       0				/* idiv (N/A).  */
1340     }
1341   },
1342   /* LD/ST */
1343   {
1344     COSTS_N_INSNS (1),	/* load.  */
1345     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1346     COSTS_N_INSNS (6),	/* ldrd.  */
1347     COSTS_N_INSNS (1),	/* ldm_1st.  */
1348     1,			/* ldm_regs_per_insn_1st.  */
1349     2,			/* ldm_regs_per_insn_subsequent.  */
1350     COSTS_N_INSNS (2),	/* loadf.  */
1351     COSTS_N_INSNS (4),	/* loadd.  */
1352     COSTS_N_INSNS (1),	/* load_unaligned.  */
1353     COSTS_N_INSNS (1),	/* store.  */
1354     COSTS_N_INSNS (3),	/* strd.  */
1355     COSTS_N_INSNS (1),	/* stm_1st.  */
1356     1,			/* stm_regs_per_insn_1st.  */
1357     2,			/* stm_regs_per_insn_subsequent.  */
1358     COSTS_N_INSNS (2),	/* storef.  */
1359     COSTS_N_INSNS (2),	/* stored.  */
1360     COSTS_N_INSNS (1),	/* store_unaligned.  */
1361     COSTS_N_INSNS (1),	/* loadv.  */
1362     COSTS_N_INSNS (1)	/* storev.  */
1363   },
1364   {
1365     /* FP SFmode */
1366     {
1367       COSTS_N_INSNS (15),	/* div.  */
1368       COSTS_N_INSNS (3),	/* mult.  */
1369       COSTS_N_INSNS (7),	/* mult_addsub. */
1370       COSTS_N_INSNS (7),	/* fma.  */
1371       COSTS_N_INSNS (3),	/* addsub.  */
1372       COSTS_N_INSNS (3),	/* fpconst.  */
1373       COSTS_N_INSNS (3),	/* neg.  */
1374       COSTS_N_INSNS (3),	/* compare.  */
1375       COSTS_N_INSNS (3),	/* widen.  */
1376       COSTS_N_INSNS (3),	/* narrow.  */
1377       COSTS_N_INSNS (3),	/* toint.  */
1378       COSTS_N_INSNS (3),	/* fromint.  */
1379       COSTS_N_INSNS (3)		/* roundint.  */
1380     },
1381     /* FP DFmode */
1382     {
1383       COSTS_N_INSNS (30),	/* div.  */
1384       COSTS_N_INSNS (6),	/* mult.  */
1385       COSTS_N_INSNS (10),	/* mult_addsub.  */
1386       COSTS_N_INSNS (7),	/* fma.  */
1387       COSTS_N_INSNS (3),	/* addsub.  */
1388       COSTS_N_INSNS (3),	/* fpconst.  */
1389       COSTS_N_INSNS (3),	/* neg.  */
1390       COSTS_N_INSNS (3),	/* compare.  */
1391       COSTS_N_INSNS (3),	/* widen.  */
1392       COSTS_N_INSNS (3),	/* narrow.  */
1393       COSTS_N_INSNS (3),	/* toint.  */
1394       COSTS_N_INSNS (3),	/* fromint.  */
1395       COSTS_N_INSNS (3)		/* roundint.  */
1396     }
1397   },
1398   /* Vector */
1399   {
1400     COSTS_N_INSNS (1)	/* alu.  */
1401   }
1402 };
1403 
1404 
1405 const struct cpu_cost_table cortexa7_extra_costs =
1406 {
1407   /* ALU */
1408   {
1409     0,			/* arith.  */
1410     0,			/* logical.  */
1411     COSTS_N_INSNS (1),	/* shift.  */
1412     COSTS_N_INSNS (1),	/* shift_reg.  */
1413     COSTS_N_INSNS (1),	/* arith_shift.  */
1414     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1415     COSTS_N_INSNS (1),	/* log_shift.  */
1416     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1417     COSTS_N_INSNS (1),	/* extend.  */
1418     COSTS_N_INSNS (1),	/* extend_arith.  */
1419     COSTS_N_INSNS (1),	/* bfi.  */
1420     COSTS_N_INSNS (1),	/* bfx.  */
1421     COSTS_N_INSNS (1),	/* clz.  */
1422     COSTS_N_INSNS (1),	/* rev.  */
1423     0,			/* non_exec.  */
1424     true		/* non_exec_costs_exec.  */
1425   },
1426 
1427   {
1428     /* MULT SImode */
1429     {
1430       0,			/* simple.  */
1431       COSTS_N_INSNS (1),	/* flag_setting.  */
1432       COSTS_N_INSNS (1),	/* extend.  */
1433       COSTS_N_INSNS (1),	/* add.  */
1434       COSTS_N_INSNS (1),	/* extend_add.  */
1435       COSTS_N_INSNS (7)		/* idiv.  */
1436     },
1437     /* MULT DImode */
1438     {
1439       0,			/* simple (N/A).  */
1440       0,			/* flag_setting (N/A).  */
1441       COSTS_N_INSNS (1),	/* extend.  */
1442       0,			/* add.  */
1443       COSTS_N_INSNS (2),	/* extend_add.  */
1444       0				/* idiv (N/A).  */
1445     }
1446   },
1447   /* LD/ST */
1448   {
1449     COSTS_N_INSNS (1),	/* load.  */
1450     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1451     COSTS_N_INSNS (3),	/* ldrd.  */
1452     COSTS_N_INSNS (1),	/* ldm_1st.  */
1453     1,			/* ldm_regs_per_insn_1st.  */
1454     2,			/* ldm_regs_per_insn_subsequent.  */
1455     COSTS_N_INSNS (2),	/* loadf.  */
1456     COSTS_N_INSNS (2),	/* loadd.  */
1457     COSTS_N_INSNS (1),	/* load_unaligned.  */
1458     COSTS_N_INSNS (1),	/* store.  */
1459     COSTS_N_INSNS (3),	/* strd.  */
1460     COSTS_N_INSNS (1),	/* stm_1st.  */
1461     1,			/* stm_regs_per_insn_1st.  */
1462     2,			/* stm_regs_per_insn_subsequent.  */
1463     COSTS_N_INSNS (2),	/* storef.  */
1464     COSTS_N_INSNS (2),	/* stored.  */
1465     COSTS_N_INSNS (1),	/* store_unaligned.  */
1466     COSTS_N_INSNS (1),	/* loadv.  */
1467     COSTS_N_INSNS (1)	/* storev.  */
1468   },
1469   {
1470     /* FP SFmode */
1471     {
1472       COSTS_N_INSNS (15),	/* div.  */
1473       COSTS_N_INSNS (3),	/* mult.  */
1474       COSTS_N_INSNS (7),	/* mult_addsub. */
1475       COSTS_N_INSNS (7),	/* fma.  */
1476       COSTS_N_INSNS (3),	/* addsub.  */
1477       COSTS_N_INSNS (3),	/* fpconst.  */
1478       COSTS_N_INSNS (3),	/* neg.  */
1479       COSTS_N_INSNS (3),	/* compare.  */
1480       COSTS_N_INSNS (3),	/* widen.  */
1481       COSTS_N_INSNS (3),	/* narrow.  */
1482       COSTS_N_INSNS (3),	/* toint.  */
1483       COSTS_N_INSNS (3),	/* fromint.  */
1484       COSTS_N_INSNS (3)		/* roundint.  */
1485     },
1486     /* FP DFmode */
1487     {
1488       COSTS_N_INSNS (30),	/* div.  */
1489       COSTS_N_INSNS (6),	/* mult.  */
1490       COSTS_N_INSNS (10),	/* mult_addsub.  */
1491       COSTS_N_INSNS (7),	/* fma.  */
1492       COSTS_N_INSNS (3),	/* addsub.  */
1493       COSTS_N_INSNS (3),	/* fpconst.  */
1494       COSTS_N_INSNS (3),	/* neg.  */
1495       COSTS_N_INSNS (3),	/* compare.  */
1496       COSTS_N_INSNS (3),	/* widen.  */
1497       COSTS_N_INSNS (3),	/* narrow.  */
1498       COSTS_N_INSNS (3),	/* toint.  */
1499       COSTS_N_INSNS (3),	/* fromint.  */
1500       COSTS_N_INSNS (3)		/* roundint.  */
1501     }
1502   },
1503   /* Vector */
1504   {
1505     COSTS_N_INSNS (1)	/* alu.  */
1506   }
1507 };
1508 
1509 const struct cpu_cost_table cortexa12_extra_costs =
1510 {
1511   /* ALU */
1512   {
1513     0,			/* arith.  */
1514     0,			/* logical.  */
1515     0,			/* shift.  */
1516     COSTS_N_INSNS (1),	/* shift_reg.  */
1517     COSTS_N_INSNS (1),	/* arith_shift.  */
1518     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1519     COSTS_N_INSNS (1),	/* log_shift.  */
1520     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1521     0,			/* extend.  */
1522     COSTS_N_INSNS (1),	/* extend_arith.  */
1523     0,			/* bfi.  */
1524     COSTS_N_INSNS (1),	/* bfx.  */
1525     COSTS_N_INSNS (1),	/* clz.  */
1526     COSTS_N_INSNS (1),	/* rev.  */
1527     0,			/* non_exec.  */
1528     true		/* non_exec_costs_exec.  */
1529   },
1530   /* MULT SImode */
1531   {
1532     {
1533       COSTS_N_INSNS (2),	/* simple.  */
1534       COSTS_N_INSNS (3),	/* flag_setting.  */
1535       COSTS_N_INSNS (2),	/* extend.  */
1536       COSTS_N_INSNS (3),	/* add.  */
1537       COSTS_N_INSNS (2),	/* extend_add.  */
1538       COSTS_N_INSNS (18)	/* idiv.  */
1539     },
1540     /* MULT DImode */
1541     {
1542       0,			/* simple (N/A).  */
1543       0,			/* flag_setting (N/A).  */
1544       COSTS_N_INSNS (3),	/* extend.  */
1545       0,			/* add (N/A).  */
1546       COSTS_N_INSNS (3),	/* extend_add.  */
1547       0				/* idiv (N/A).  */
1548     }
1549   },
1550   /* LD/ST */
1551   {
1552     COSTS_N_INSNS (3),	/* load.  */
1553     COSTS_N_INSNS (3),	/* load_sign_extend.  */
1554     COSTS_N_INSNS (3),	/* ldrd.  */
1555     COSTS_N_INSNS (3),	/* ldm_1st.  */
1556     1,			/* ldm_regs_per_insn_1st.  */
1557     2,			/* ldm_regs_per_insn_subsequent.  */
1558     COSTS_N_INSNS (3),	/* loadf.  */
1559     COSTS_N_INSNS (3),	/* loadd.  */
1560     0,			/* load_unaligned.  */
1561     0,			/* store.  */
1562     0,			/* strd.  */
1563     0,			/* stm_1st.  */
1564     1,			/* stm_regs_per_insn_1st.  */
1565     2,			/* stm_regs_per_insn_subsequent.  */
1566     COSTS_N_INSNS (2),	/* storef.  */
1567     COSTS_N_INSNS (2),	/* stored.  */
1568     0,			/* store_unaligned.  */
1569     COSTS_N_INSNS (1),	/* loadv.  */
1570     COSTS_N_INSNS (1)	/* storev.  */
1571   },
1572   {
1573     /* FP SFmode */
1574     {
1575       COSTS_N_INSNS (17),	/* div.  */
1576       COSTS_N_INSNS (4),	/* mult.  */
1577       COSTS_N_INSNS (8),	/* mult_addsub. */
1578       COSTS_N_INSNS (8),	/* fma.  */
1579       COSTS_N_INSNS (4),	/* addsub.  */
1580       COSTS_N_INSNS (2),	/* fpconst. */
1581       COSTS_N_INSNS (2),	/* neg.  */
1582       COSTS_N_INSNS (2),	/* compare.  */
1583       COSTS_N_INSNS (4),	/* widen.  */
1584       COSTS_N_INSNS (4),	/* narrow.  */
1585       COSTS_N_INSNS (4),	/* toint.  */
1586       COSTS_N_INSNS (4),	/* fromint.  */
1587       COSTS_N_INSNS (4)		/* roundint.  */
1588     },
1589     /* FP DFmode */
1590     {
1591       COSTS_N_INSNS (31),	/* div.  */
1592       COSTS_N_INSNS (4),	/* mult.  */
1593       COSTS_N_INSNS (8),	/* mult_addsub.  */
1594       COSTS_N_INSNS (8),	/* fma.  */
1595       COSTS_N_INSNS (4),	/* addsub.  */
1596       COSTS_N_INSNS (2),	/* fpconst.  */
1597       COSTS_N_INSNS (2),	/* neg.  */
1598       COSTS_N_INSNS (2),	/* compare.  */
1599       COSTS_N_INSNS (4),	/* widen.  */
1600       COSTS_N_INSNS (4),	/* narrow.  */
1601       COSTS_N_INSNS (4),	/* toint.  */
1602       COSTS_N_INSNS (4),	/* fromint.  */
1603       COSTS_N_INSNS (4)		/* roundint.  */
1604     }
1605   },
1606   /* Vector */
1607   {
1608     COSTS_N_INSNS (1)	/* alu.  */
1609   }
1610 };
1611 
1612 const struct cpu_cost_table cortexa15_extra_costs =
1613 {
1614   /* ALU */
1615   {
1616     0,			/* arith.  */
1617     0,			/* logical.  */
1618     0,			/* shift.  */
1619     0,			/* shift_reg.  */
1620     COSTS_N_INSNS (1),	/* arith_shift.  */
1621     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1622     COSTS_N_INSNS (1),	/* log_shift.  */
1623     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1624     0,			/* extend.  */
1625     COSTS_N_INSNS (1),	/* extend_arith.  */
1626     COSTS_N_INSNS (1),	/* bfi.  */
1627     0,			/* bfx.  */
1628     0,			/* clz.  */
1629     0,			/* rev.  */
1630     0,			/* non_exec.  */
1631     true		/* non_exec_costs_exec.  */
1632   },
1633   /* MULT SImode */
1634   {
1635     {
1636       COSTS_N_INSNS (2),	/* simple.  */
1637       COSTS_N_INSNS (3),	/* flag_setting.  */
1638       COSTS_N_INSNS (2),	/* extend.  */
1639       COSTS_N_INSNS (2),	/* add.  */
1640       COSTS_N_INSNS (2),	/* extend_add.  */
1641       COSTS_N_INSNS (18)	/* idiv.  */
1642     },
1643     /* MULT DImode */
1644     {
1645       0,			/* simple (N/A).  */
1646       0,			/* flag_setting (N/A).  */
1647       COSTS_N_INSNS (3),	/* extend.  */
1648       0,			/* add (N/A).  */
1649       COSTS_N_INSNS (3),	/* extend_add.  */
1650       0				/* idiv (N/A).  */
1651     }
1652   },
1653   /* LD/ST */
1654   {
1655     COSTS_N_INSNS (3),	/* load.  */
1656     COSTS_N_INSNS (3),	/* load_sign_extend.  */
1657     COSTS_N_INSNS (3),	/* ldrd.  */
1658     COSTS_N_INSNS (4),	/* ldm_1st.  */
1659     1,			/* ldm_regs_per_insn_1st.  */
1660     2,			/* ldm_regs_per_insn_subsequent.  */
1661     COSTS_N_INSNS (4),	/* loadf.  */
1662     COSTS_N_INSNS (4),	/* loadd.  */
1663     0,			/* load_unaligned.  */
1664     0,			/* store.  */
1665     0,			/* strd.  */
1666     COSTS_N_INSNS (1),	/* stm_1st.  */
1667     1,			/* stm_regs_per_insn_1st.  */
1668     2,			/* stm_regs_per_insn_subsequent.  */
1669     0,			/* storef.  */
1670     0,			/* stored.  */
1671     0,			/* store_unaligned.  */
1672     COSTS_N_INSNS (1),	/* loadv.  */
1673     COSTS_N_INSNS (1)	/* storev.  */
1674   },
1675   {
1676     /* FP SFmode */
1677     {
1678       COSTS_N_INSNS (17),	/* div.  */
1679       COSTS_N_INSNS (4),	/* mult.  */
1680       COSTS_N_INSNS (8),	/* mult_addsub. */
1681       COSTS_N_INSNS (8),	/* fma.  */
1682       COSTS_N_INSNS (4),	/* addsub.  */
1683       COSTS_N_INSNS (2),	/* fpconst. */
1684       COSTS_N_INSNS (2),	/* neg.  */
1685       COSTS_N_INSNS (5),	/* compare.  */
1686       COSTS_N_INSNS (4),	/* widen.  */
1687       COSTS_N_INSNS (4),	/* narrow.  */
1688       COSTS_N_INSNS (4),	/* toint.  */
1689       COSTS_N_INSNS (4),	/* fromint.  */
1690       COSTS_N_INSNS (4)		/* roundint.  */
1691     },
1692     /* FP DFmode */
1693     {
1694       COSTS_N_INSNS (31),	/* div.  */
1695       COSTS_N_INSNS (4),	/* mult.  */
1696       COSTS_N_INSNS (8),	/* mult_addsub.  */
1697       COSTS_N_INSNS (8),	/* fma.  */
1698       COSTS_N_INSNS (4),	/* addsub.  */
1699       COSTS_N_INSNS (2),	/* fpconst.  */
1700       COSTS_N_INSNS (2),	/* neg.  */
1701       COSTS_N_INSNS (2),	/* compare.  */
1702       COSTS_N_INSNS (4),	/* widen.  */
1703       COSTS_N_INSNS (4),	/* narrow.  */
1704       COSTS_N_INSNS (4),	/* toint.  */
1705       COSTS_N_INSNS (4),	/* fromint.  */
1706       COSTS_N_INSNS (4)		/* roundint.  */
1707     }
1708   },
1709   /* Vector */
1710   {
1711     COSTS_N_INSNS (1)	/* alu.  */
1712   }
1713 };
1714 
1715 const struct cpu_cost_table v7m_extra_costs =
1716 {
1717   /* ALU */
1718   {
1719     0,			/* arith.  */
1720     0,			/* logical.  */
1721     0,			/* shift.  */
1722     0,			/* shift_reg.  */
1723     0,			/* arith_shift.  */
1724     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1725     0,			/* log_shift.  */
1726     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1727     0,			/* extend.  */
1728     COSTS_N_INSNS (1),	/* extend_arith.  */
1729     0,			/* bfi.  */
1730     0,			/* bfx.  */
1731     0,			/* clz.  */
1732     0,			/* rev.  */
1733     COSTS_N_INSNS (1),	/* non_exec.  */
1734     false		/* non_exec_costs_exec.  */
1735   },
1736   {
1737     /* MULT SImode */
1738     {
1739       COSTS_N_INSNS (1),	/* simple.  */
1740       COSTS_N_INSNS (1),	/* flag_setting.  */
1741       COSTS_N_INSNS (2),	/* extend.  */
1742       COSTS_N_INSNS (1),	/* add.  */
1743       COSTS_N_INSNS (3),	/* extend_add.  */
1744       COSTS_N_INSNS (8)		/* idiv.  */
1745     },
1746     /* MULT DImode */
1747     {
1748       0,			/* simple (N/A).  */
1749       0,			/* flag_setting (N/A).  */
1750       COSTS_N_INSNS (2),	/* extend.  */
1751       0,			/* add (N/A).  */
1752       COSTS_N_INSNS (3),	/* extend_add.  */
1753       0				/* idiv (N/A).  */
1754     }
1755   },
1756   /* LD/ST */
1757   {
1758     COSTS_N_INSNS (2),	/* load.  */
1759     0,			/* load_sign_extend.  */
1760     COSTS_N_INSNS (3),	/* ldrd.  */
1761     COSTS_N_INSNS (2),	/* ldm_1st.  */
1762     1,			/* ldm_regs_per_insn_1st.  */
1763     1,			/* ldm_regs_per_insn_subsequent.  */
1764     COSTS_N_INSNS (2),	/* loadf.  */
1765     COSTS_N_INSNS (3),	/* loadd.  */
1766     COSTS_N_INSNS (1),  /* load_unaligned.  */
1767     COSTS_N_INSNS (2),	/* store.  */
1768     COSTS_N_INSNS (3),	/* strd.  */
1769     COSTS_N_INSNS (2),	/* stm_1st.  */
1770     1,			/* stm_regs_per_insn_1st.  */
1771     1,			/* stm_regs_per_insn_subsequent.  */
1772     COSTS_N_INSNS (2),	/* storef.  */
1773     COSTS_N_INSNS (3),	/* stored.  */
1774     COSTS_N_INSNS (1),	/* store_unaligned.  */
1775     COSTS_N_INSNS (1),	/* loadv.  */
1776     COSTS_N_INSNS (1)	/* storev.  */
1777   },
1778   {
1779     /* FP SFmode */
1780     {
1781       COSTS_N_INSNS (7),	/* div.  */
1782       COSTS_N_INSNS (2),	/* mult.  */
1783       COSTS_N_INSNS (5),	/* mult_addsub.  */
1784       COSTS_N_INSNS (3),	/* fma.  */
1785       COSTS_N_INSNS (1),	/* addsub.  */
1786       0,			/* fpconst.  */
1787       0,			/* neg.  */
1788       0,			/* compare.  */
1789       0,			/* widen.  */
1790       0,			/* narrow.  */
1791       0,			/* toint.  */
1792       0,			/* fromint.  */
1793       0				/* roundint.  */
1794     },
1795     /* FP DFmode */
1796     {
1797       COSTS_N_INSNS (15),	/* div.  */
1798       COSTS_N_INSNS (5),	/* mult.  */
1799       COSTS_N_INSNS (7),	/* mult_addsub.  */
1800       COSTS_N_INSNS (7),	/* fma.  */
1801       COSTS_N_INSNS (3),	/* addsub.  */
1802       0,			/* fpconst.  */
1803       0,			/* neg.  */
1804       0,			/* compare.  */
1805       0,			/* widen.  */
1806       0,			/* narrow.  */
1807       0,			/* toint.  */
1808       0,			/* fromint.  */
1809       0				/* roundint.  */
1810     }
1811   },
1812   /* Vector */
1813   {
1814     COSTS_N_INSNS (1)	/* alu.  */
1815   }
1816 };
1817 
1818 const struct addr_mode_cost_table generic_addr_mode_costs =
1819 {
1820   /* int.  */
1821   {
1822     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1823     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1824     COSTS_N_INSNS (0)	/* AMO_WB.  */
1825   },
1826   /* float.  */
1827   {
1828     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1829     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1830     COSTS_N_INSNS (0)	/* AMO_WB.  */
1831   },
1832   /* vector.  */
1833   {
1834     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1835     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1836     COSTS_N_INSNS (0)	/* AMO_WB.  */
1837   }
1838 };
1839 
1840 const struct tune_params arm_slowmul_tune =
1841 {
1842   &generic_extra_costs,			/* Insn extra costs.  */
1843   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1844   NULL,					/* Sched adj cost.  */
1845   arm_default_branch_cost,
1846   &arm_default_vec_cost,
1847   3,						/* Constant limit.  */
1848   5,						/* Max cond insns.  */
1849   8,						/* Memset max inline.  */
1850   1,						/* Issue rate.  */
1851   ARM_PREFETCH_NOT_BENEFICIAL,
1852   tune_params::PREF_CONST_POOL_TRUE,
1853   tune_params::PREF_LDRD_FALSE,
1854   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1855   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1856   tune_params::DISPARAGE_FLAGS_NEITHER,
1857   tune_params::PREF_NEON_STRINGOPS_FALSE,
1858   tune_params::FUSE_NOTHING,
1859   tune_params::SCHED_AUTOPREF_OFF
1860 };
1861 
1862 const struct tune_params arm_fastmul_tune =
1863 {
1864   &generic_extra_costs,			/* Insn extra costs.  */
1865   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1866   NULL,					/* Sched adj cost.  */
1867   arm_default_branch_cost,
1868   &arm_default_vec_cost,
1869   1,						/* Constant limit.  */
1870   5,						/* Max cond insns.  */
1871   8,						/* Memset max inline.  */
1872   1,						/* Issue rate.  */
1873   ARM_PREFETCH_NOT_BENEFICIAL,
1874   tune_params::PREF_CONST_POOL_TRUE,
1875   tune_params::PREF_LDRD_FALSE,
1876   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1877   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1878   tune_params::DISPARAGE_FLAGS_NEITHER,
1879   tune_params::PREF_NEON_STRINGOPS_FALSE,
1880   tune_params::FUSE_NOTHING,
1881   tune_params::SCHED_AUTOPREF_OFF
1882 };
1883 
1884 /* StrongARM has early execution of branches, so a sequence that is worth
1885    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1886 
1887 const struct tune_params arm_strongarm_tune =
1888 {
1889   &generic_extra_costs,			/* Insn extra costs.  */
1890   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1891   NULL,					/* Sched adj cost.  */
1892   arm_default_branch_cost,
1893   &arm_default_vec_cost,
1894   1,						/* Constant limit.  */
1895   3,						/* Max cond insns.  */
1896   8,						/* Memset max inline.  */
1897   1,						/* Issue rate.  */
1898   ARM_PREFETCH_NOT_BENEFICIAL,
1899   tune_params::PREF_CONST_POOL_TRUE,
1900   tune_params::PREF_LDRD_FALSE,
1901   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1902   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1903   tune_params::DISPARAGE_FLAGS_NEITHER,
1904   tune_params::PREF_NEON_STRINGOPS_FALSE,
1905   tune_params::FUSE_NOTHING,
1906   tune_params::SCHED_AUTOPREF_OFF
1907 };
1908 
1909 const struct tune_params arm_xscale_tune =
1910 {
1911   &generic_extra_costs,			/* Insn extra costs.  */
1912   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1913   xscale_sched_adjust_cost,
1914   arm_default_branch_cost,
1915   &arm_default_vec_cost,
1916   2,						/* Constant limit.  */
1917   3,						/* Max cond insns.  */
1918   8,						/* Memset max inline.  */
1919   1,						/* Issue rate.  */
1920   ARM_PREFETCH_NOT_BENEFICIAL,
1921   tune_params::PREF_CONST_POOL_TRUE,
1922   tune_params::PREF_LDRD_FALSE,
1923   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1924   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1925   tune_params::DISPARAGE_FLAGS_NEITHER,
1926   tune_params::PREF_NEON_STRINGOPS_FALSE,
1927   tune_params::FUSE_NOTHING,
1928   tune_params::SCHED_AUTOPREF_OFF
1929 };
1930 
1931 const struct tune_params arm_9e_tune =
1932 {
1933   &generic_extra_costs,			/* Insn extra costs.  */
1934   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1935   NULL,					/* Sched adj cost.  */
1936   arm_default_branch_cost,
1937   &arm_default_vec_cost,
1938   1,						/* Constant limit.  */
1939   5,						/* Max cond insns.  */
1940   8,						/* Memset max inline.  */
1941   1,						/* Issue rate.  */
1942   ARM_PREFETCH_NOT_BENEFICIAL,
1943   tune_params::PREF_CONST_POOL_TRUE,
1944   tune_params::PREF_LDRD_FALSE,
1945   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1946   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1947   tune_params::DISPARAGE_FLAGS_NEITHER,
1948   tune_params::PREF_NEON_STRINGOPS_FALSE,
1949   tune_params::FUSE_NOTHING,
1950   tune_params::SCHED_AUTOPREF_OFF
1951 };
1952 
1953 const struct tune_params arm_marvell_pj4_tune =
1954 {
1955   &generic_extra_costs,			/* Insn extra costs.  */
1956   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1957   NULL,					/* Sched adj cost.  */
1958   arm_default_branch_cost,
1959   &arm_default_vec_cost,
1960   1,						/* Constant limit.  */
1961   5,						/* Max cond insns.  */
1962   8,						/* Memset max inline.  */
1963   2,						/* Issue rate.  */
1964   ARM_PREFETCH_NOT_BENEFICIAL,
1965   tune_params::PREF_CONST_POOL_TRUE,
1966   tune_params::PREF_LDRD_FALSE,
1967   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1968   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1969   tune_params::DISPARAGE_FLAGS_NEITHER,
1970   tune_params::PREF_NEON_STRINGOPS_FALSE,
1971   tune_params::FUSE_NOTHING,
1972   tune_params::SCHED_AUTOPREF_OFF
1973 };
1974 
1975 const struct tune_params arm_v6t2_tune =
1976 {
1977   &generic_extra_costs,			/* Insn extra costs.  */
1978   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1979   NULL,					/* Sched adj cost.  */
1980   arm_default_branch_cost,
1981   &arm_default_vec_cost,
1982   1,						/* Constant limit.  */
1983   5,						/* Max cond insns.  */
1984   8,						/* Memset max inline.  */
1985   1,						/* Issue rate.  */
1986   ARM_PREFETCH_NOT_BENEFICIAL,
1987   tune_params::PREF_CONST_POOL_FALSE,
1988   tune_params::PREF_LDRD_FALSE,
1989   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1990   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1991   tune_params::DISPARAGE_FLAGS_NEITHER,
1992   tune_params::PREF_NEON_STRINGOPS_FALSE,
1993   tune_params::FUSE_NOTHING,
1994   tune_params::SCHED_AUTOPREF_OFF
1995 };
1996 
1997 
1998 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1999 const struct tune_params arm_cortex_tune =
2000 {
2001   &generic_extra_costs,
2002   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2003   NULL,					/* Sched adj cost.  */
2004   arm_default_branch_cost,
2005   &arm_default_vec_cost,
2006   1,						/* Constant limit.  */
2007   5,						/* Max cond insns.  */
2008   8,						/* Memset max inline.  */
2009   2,						/* Issue rate.  */
2010   ARM_PREFETCH_NOT_BENEFICIAL,
2011   tune_params::PREF_CONST_POOL_FALSE,
2012   tune_params::PREF_LDRD_FALSE,
2013   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2014   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2015   tune_params::DISPARAGE_FLAGS_NEITHER,
2016   tune_params::PREF_NEON_STRINGOPS_FALSE,
2017   tune_params::FUSE_NOTHING,
2018   tune_params::SCHED_AUTOPREF_OFF
2019 };
2020 
2021 const struct tune_params arm_cortex_a8_tune =
2022 {
2023   &cortexa8_extra_costs,
2024   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2025   NULL,					/* Sched adj cost.  */
2026   arm_default_branch_cost,
2027   &arm_default_vec_cost,
2028   1,						/* Constant limit.  */
2029   5,						/* Max cond insns.  */
2030   8,						/* Memset max inline.  */
2031   2,						/* Issue rate.  */
2032   ARM_PREFETCH_NOT_BENEFICIAL,
2033   tune_params::PREF_CONST_POOL_FALSE,
2034   tune_params::PREF_LDRD_FALSE,
2035   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2036   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2037   tune_params::DISPARAGE_FLAGS_NEITHER,
2038   tune_params::PREF_NEON_STRINGOPS_TRUE,
2039   tune_params::FUSE_NOTHING,
2040   tune_params::SCHED_AUTOPREF_OFF
2041 };
2042 
2043 const struct tune_params arm_cortex_a7_tune =
2044 {
2045   &cortexa7_extra_costs,
2046   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2047   NULL,					/* Sched adj cost.  */
2048   arm_default_branch_cost,
2049   &arm_default_vec_cost,
2050   1,						/* Constant limit.  */
2051   5,						/* Max cond insns.  */
2052   8,						/* Memset max inline.  */
2053   2,						/* Issue rate.  */
2054   ARM_PREFETCH_NOT_BENEFICIAL,
2055   tune_params::PREF_CONST_POOL_FALSE,
2056   tune_params::PREF_LDRD_FALSE,
2057   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2058   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2059   tune_params::DISPARAGE_FLAGS_NEITHER,
2060   tune_params::PREF_NEON_STRINGOPS_TRUE,
2061   tune_params::FUSE_NOTHING,
2062   tune_params::SCHED_AUTOPREF_OFF
2063 };
2064 
2065 const struct tune_params arm_cortex_a15_tune =
2066 {
2067   &cortexa15_extra_costs,
2068   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2069   NULL,					/* Sched adj cost.  */
2070   arm_default_branch_cost,
2071   &arm_default_vec_cost,
2072   1,						/* Constant limit.  */
2073   2,						/* Max cond insns.  */
2074   8,						/* Memset max inline.  */
2075   3,						/* Issue rate.  */
2076   ARM_PREFETCH_NOT_BENEFICIAL,
2077   tune_params::PREF_CONST_POOL_FALSE,
2078   tune_params::PREF_LDRD_TRUE,
2079   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2080   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2081   tune_params::DISPARAGE_FLAGS_ALL,
2082   tune_params::PREF_NEON_STRINGOPS_TRUE,
2083   tune_params::FUSE_NOTHING,
2084   tune_params::SCHED_AUTOPREF_FULL
2085 };
2086 
2087 const struct tune_params arm_cortex_a35_tune =
2088 {
2089   &cortexa53_extra_costs,
2090   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2091   NULL,					/* Sched adj cost.  */
2092   arm_default_branch_cost,
2093   &arm_default_vec_cost,
2094   1,						/* Constant limit.  */
2095   5,						/* Max cond insns.  */
2096   8,						/* Memset max inline.  */
2097   1,						/* Issue rate.  */
2098   ARM_PREFETCH_NOT_BENEFICIAL,
2099   tune_params::PREF_CONST_POOL_FALSE,
2100   tune_params::PREF_LDRD_FALSE,
2101   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2102   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2103   tune_params::DISPARAGE_FLAGS_NEITHER,
2104   tune_params::PREF_NEON_STRINGOPS_TRUE,
2105   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2106   tune_params::SCHED_AUTOPREF_OFF
2107 };
2108 
2109 const struct tune_params arm_cortex_a53_tune =
2110 {
2111   &cortexa53_extra_costs,
2112   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2113   NULL,					/* Sched adj cost.  */
2114   arm_default_branch_cost,
2115   &arm_default_vec_cost,
2116   1,						/* Constant limit.  */
2117   5,						/* Max cond insns.  */
2118   8,						/* Memset max inline.  */
2119   2,						/* Issue rate.  */
2120   ARM_PREFETCH_NOT_BENEFICIAL,
2121   tune_params::PREF_CONST_POOL_FALSE,
2122   tune_params::PREF_LDRD_FALSE,
2123   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2124   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2125   tune_params::DISPARAGE_FLAGS_NEITHER,
2126   tune_params::PREF_NEON_STRINGOPS_TRUE,
2127   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2128   tune_params::SCHED_AUTOPREF_OFF
2129 };
2130 
2131 const struct tune_params arm_cortex_a57_tune =
2132 {
2133   &cortexa57_extra_costs,
2134   &generic_addr_mode_costs,		/* addressing mode costs */
2135   NULL,					/* Sched adj cost.  */
2136   arm_default_branch_cost,
2137   &arm_default_vec_cost,
2138   1,						/* Constant limit.  */
2139   2,						/* Max cond insns.  */
2140   8,						/* Memset max inline.  */
2141   3,						/* Issue rate.  */
2142   ARM_PREFETCH_NOT_BENEFICIAL,
2143   tune_params::PREF_CONST_POOL_FALSE,
2144   tune_params::PREF_LDRD_TRUE,
2145   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2146   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2147   tune_params::DISPARAGE_FLAGS_ALL,
2148   tune_params::PREF_NEON_STRINGOPS_TRUE,
2149   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2150   tune_params::SCHED_AUTOPREF_FULL
2151 };
2152 
2153 const struct tune_params arm_exynosm1_tune =
2154 {
2155   &exynosm1_extra_costs,
2156   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2157   NULL,						/* Sched adj cost.  */
2158   arm_default_branch_cost,
2159   &arm_default_vec_cost,
2160   1,						/* Constant limit.  */
2161   2,						/* Max cond insns.  */
2162   8,						/* Memset max inline.  */
2163   3,						/* Issue rate.  */
2164   ARM_PREFETCH_NOT_BENEFICIAL,
2165   tune_params::PREF_CONST_POOL_FALSE,
2166   tune_params::PREF_LDRD_TRUE,
2167   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* Thumb.  */
2168   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* ARM.  */
2169   tune_params::DISPARAGE_FLAGS_ALL,
2170   tune_params::PREF_NEON_STRINGOPS_TRUE,
2171   tune_params::FUSE_NOTHING,
2172   tune_params::SCHED_AUTOPREF_OFF
2173 };
2174 
2175 const struct tune_params arm_xgene1_tune =
2176 {
2177   &xgene1_extra_costs,
2178   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2179   NULL,					/* Sched adj cost.  */
2180   arm_default_branch_cost,
2181   &arm_default_vec_cost,
2182   1,						/* Constant limit.  */
2183   2,						/* Max cond insns.  */
2184   32,						/* Memset max inline.  */
2185   4,						/* Issue rate.  */
2186   ARM_PREFETCH_NOT_BENEFICIAL,
2187   tune_params::PREF_CONST_POOL_FALSE,
2188   tune_params::PREF_LDRD_TRUE,
2189   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2190   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2191   tune_params::DISPARAGE_FLAGS_ALL,
2192   tune_params::PREF_NEON_STRINGOPS_FALSE,
2193   tune_params::FUSE_NOTHING,
2194   tune_params::SCHED_AUTOPREF_OFF
2195 };
2196 
2197 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2198    less appealing.  Set max_insns_skipped to a low value.  */
2199 
2200 const struct tune_params arm_cortex_a5_tune =
2201 {
2202   &cortexa5_extra_costs,
2203   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2204   NULL,					/* Sched adj cost.  */
2205   arm_cortex_a5_branch_cost,
2206   &arm_default_vec_cost,
2207   1,						/* Constant limit.  */
2208   1,						/* Max cond insns.  */
2209   8,						/* Memset max inline.  */
2210   2,						/* Issue rate.  */
2211   ARM_PREFETCH_NOT_BENEFICIAL,
2212   tune_params::PREF_CONST_POOL_FALSE,
2213   tune_params::PREF_LDRD_FALSE,
2214   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2215   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2216   tune_params::DISPARAGE_FLAGS_NEITHER,
2217   tune_params::PREF_NEON_STRINGOPS_TRUE,
2218   tune_params::FUSE_NOTHING,
2219   tune_params::SCHED_AUTOPREF_OFF
2220 };
2221 
2222 const struct tune_params arm_cortex_a9_tune =
2223 {
2224   &cortexa9_extra_costs,
2225   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2226   cortex_a9_sched_adjust_cost,
2227   arm_default_branch_cost,
2228   &arm_default_vec_cost,
2229   1,						/* Constant limit.  */
2230   5,						/* Max cond insns.  */
2231   8,						/* Memset max inline.  */
2232   2,						/* Issue rate.  */
2233   ARM_PREFETCH_BENEFICIAL(4,32,32),
2234   tune_params::PREF_CONST_POOL_FALSE,
2235   tune_params::PREF_LDRD_FALSE,
2236   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2237   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2238   tune_params::DISPARAGE_FLAGS_NEITHER,
2239   tune_params::PREF_NEON_STRINGOPS_FALSE,
2240   tune_params::FUSE_NOTHING,
2241   tune_params::SCHED_AUTOPREF_OFF
2242 };
2243 
2244 const struct tune_params arm_cortex_a12_tune =
2245 {
2246   &cortexa12_extra_costs,
2247   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2248   NULL,					/* Sched adj cost.  */
2249   arm_default_branch_cost,
2250   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2251   1,						/* Constant limit.  */
2252   2,						/* Max cond insns.  */
2253   8,						/* Memset max inline.  */
2254   2,						/* Issue rate.  */
2255   ARM_PREFETCH_NOT_BENEFICIAL,
2256   tune_params::PREF_CONST_POOL_FALSE,
2257   tune_params::PREF_LDRD_TRUE,
2258   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2259   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2260   tune_params::DISPARAGE_FLAGS_ALL,
2261   tune_params::PREF_NEON_STRINGOPS_TRUE,
2262   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2263   tune_params::SCHED_AUTOPREF_OFF
2264 };
2265 
2266 const struct tune_params arm_cortex_a73_tune =
2267 {
2268   &cortexa57_extra_costs,
2269   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2270   NULL,						/* Sched adj cost.  */
2271   arm_default_branch_cost,
2272   &arm_default_vec_cost,			/* Vectorizer costs.  */
2273   1,						/* Constant limit.  */
2274   2,						/* Max cond insns.  */
2275   8,						/* Memset max inline.  */
2276   2,						/* Issue rate.  */
2277   ARM_PREFETCH_NOT_BENEFICIAL,
2278   tune_params::PREF_CONST_POOL_FALSE,
2279   tune_params::PREF_LDRD_TRUE,
2280   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2281   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2282   tune_params::DISPARAGE_FLAGS_ALL,
2283   tune_params::PREF_NEON_STRINGOPS_TRUE,
2284   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2285   tune_params::SCHED_AUTOPREF_FULL
2286 };
2287 
2288 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2289    cycle to execute each.  An LDR from the constant pool also takes two cycles
2290    to execute, but mildly increases pipelining opportunity (consecutive
2291    loads/stores can be pipelined together, saving one cycle), and may also
2292    improve icache utilisation.  Hence we prefer the constant pool for such
2293    processors.  */
2294 
2295 const struct tune_params arm_v7m_tune =
2296 {
2297   &v7m_extra_costs,
2298   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2299   NULL,					/* Sched adj cost.  */
2300   arm_cortex_m_branch_cost,
2301   &arm_default_vec_cost,
2302   1,						/* Constant limit.  */
2303   2,						/* Max cond insns.  */
2304   8,						/* Memset max inline.  */
2305   1,						/* Issue rate.  */
2306   ARM_PREFETCH_NOT_BENEFICIAL,
2307   tune_params::PREF_CONST_POOL_TRUE,
2308   tune_params::PREF_LDRD_FALSE,
2309   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2310   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2311   tune_params::DISPARAGE_FLAGS_NEITHER,
2312   tune_params::PREF_NEON_STRINGOPS_FALSE,
2313   tune_params::FUSE_NOTHING,
2314   tune_params::SCHED_AUTOPREF_OFF
2315 };
2316 
2317 /* Cortex-M7 tuning.  */
2318 
2319 const struct tune_params arm_cortex_m7_tune =
2320 {
2321   &v7m_extra_costs,
2322   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2323   NULL,					/* Sched adj cost.  */
2324   arm_cortex_m7_branch_cost,
2325   &arm_default_vec_cost,
2326   0,						/* Constant limit.  */
2327   1,						/* Max cond insns.  */
2328   8,						/* Memset max inline.  */
2329   2,						/* Issue rate.  */
2330   ARM_PREFETCH_NOT_BENEFICIAL,
2331   tune_params::PREF_CONST_POOL_TRUE,
2332   tune_params::PREF_LDRD_FALSE,
2333   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2334   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2335   tune_params::DISPARAGE_FLAGS_NEITHER,
2336   tune_params::PREF_NEON_STRINGOPS_FALSE,
2337   tune_params::FUSE_NOTHING,
2338   tune_params::SCHED_AUTOPREF_OFF
2339 };
2340 
2341 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2342    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2343    cortex-m23.  */
2344 const struct tune_params arm_v6m_tune =
2345 {
2346   &generic_extra_costs,			/* Insn extra costs.  */
2347   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2348   NULL,					/* Sched adj cost.  */
2349   arm_default_branch_cost,
2350   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2351   1,						/* Constant limit.  */
2352   5,						/* Max cond insns.  */
2353   8,						/* Memset max inline.  */
2354   1,						/* Issue rate.  */
2355   ARM_PREFETCH_NOT_BENEFICIAL,
2356   tune_params::PREF_CONST_POOL_FALSE,
2357   tune_params::PREF_LDRD_FALSE,
2358   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2359   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2360   tune_params::DISPARAGE_FLAGS_NEITHER,
2361   tune_params::PREF_NEON_STRINGOPS_FALSE,
2362   tune_params::FUSE_NOTHING,
2363   tune_params::SCHED_AUTOPREF_OFF
2364 };
2365 
2366 const struct tune_params arm_fa726te_tune =
2367 {
2368   &generic_extra_costs,				/* Insn extra costs.  */
2369   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2370   fa726te_sched_adjust_cost,
2371   arm_default_branch_cost,
2372   &arm_default_vec_cost,
2373   1,						/* Constant limit.  */
2374   5,						/* Max cond insns.  */
2375   8,						/* Memset max inline.  */
2376   2,						/* Issue rate.  */
2377   ARM_PREFETCH_NOT_BENEFICIAL,
2378   tune_params::PREF_CONST_POOL_TRUE,
2379   tune_params::PREF_LDRD_FALSE,
2380   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2381   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2382   tune_params::DISPARAGE_FLAGS_NEITHER,
2383   tune_params::PREF_NEON_STRINGOPS_FALSE,
2384   tune_params::FUSE_NOTHING,
2385   tune_params::SCHED_AUTOPREF_OFF
2386 };
2387 
2388 /* Auto-generated CPU, FPU and architecture tables.  */
2389 #include "arm-cpu-data.h"
2390 
2391 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2392    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2393    is thus chosen to be big enough to hold the longest architecture name.  */
2394 
2395 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2396 
2397 /* Supported TLS relocations.  */
2398 
2399 enum tls_reloc {
2400   TLS_GD32,
2401   TLS_GD32_FDPIC,
2402   TLS_LDM32,
2403   TLS_LDM32_FDPIC,
2404   TLS_LDO32,
2405   TLS_IE32,
2406   TLS_IE32_FDPIC,
2407   TLS_LE32,
2408   TLS_DESCSEQ	/* GNU scheme */
2409 };
2410 
2411 /* The maximum number of insns to be used when loading a constant.  */
2412 inline static int
arm_constant_limit(bool size_p)2413 arm_constant_limit (bool size_p)
2414 {
2415   return size_p ? 1 : current_tune->constant_limit;
2416 }
2417 
2418 /* Emit an insn that's a simple single-set.  Both the operands must be known
2419    to be valid.  */
2420 inline static rtx_insn *
emit_set_insn(rtx x,rtx y)2421 emit_set_insn (rtx x, rtx y)
2422 {
2423   return emit_insn (gen_rtx_SET (x, y));
2424 }
2425 
2426 /* Return the number of bits set in VALUE.  */
2427 static unsigned
bit_count(unsigned long value)2428 bit_count (unsigned long value)
2429 {
2430   unsigned long count = 0;
2431 
2432   while (value)
2433     {
2434       count++;
2435       value &= value - 1;  /* Clear the least-significant set bit.  */
2436     }
2437 
2438   return count;
2439 }
2440 
2441 /* Return the number of bits set in BMAP.  */
2442 static unsigned
bitmap_popcount(const sbitmap bmap)2443 bitmap_popcount (const sbitmap bmap)
2444 {
2445   unsigned int count = 0;
2446   unsigned int n = 0;
2447   sbitmap_iterator sbi;
2448 
2449   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2450     count++;
2451   return count;
2452 }
2453 
2454 typedef struct
2455 {
2456   machine_mode mode;
2457   const char *name;
2458 } arm_fixed_mode_set;
2459 
2460 /* A small helper for setting fixed-point library libfuncs.  */
2461 
2462 static void
arm_set_fixed_optab_libfunc(optab optable,machine_mode mode,const char * funcname,const char * modename,int num_suffix)2463 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2464 			     const char *funcname, const char *modename,
2465 			     int num_suffix)
2466 {
2467   char buffer[50];
2468 
2469   if (num_suffix == 0)
2470     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2471   else
2472     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2473 
2474   set_optab_libfunc (optable, mode, buffer);
2475 }
2476 
2477 static void
arm_set_fixed_conv_libfunc(convert_optab optable,machine_mode to,machine_mode from,const char * funcname,const char * toname,const char * fromname)2478 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2479 			    machine_mode from, const char *funcname,
2480 			    const char *toname, const char *fromname)
2481 {
2482   char buffer[50];
2483   const char *maybe_suffix_2 = "";
2484 
2485   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2486   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2487       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2488       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2489     maybe_suffix_2 = "2";
2490 
2491   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2492 	   maybe_suffix_2);
2493 
2494   set_conv_libfunc (optable, to, from, buffer);
2495 }
2496 
2497 static GTY(()) rtx speculation_barrier_libfunc;
2498 
2499 /* Record that we have no arithmetic or comparison libfuncs for
2500    machine mode MODE.  */
2501 
2502 static void
arm_block_arith_comp_libfuncs_for_mode(machine_mode mode)2503 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2504 {
2505   /* Arithmetic.  */
2506   set_optab_libfunc (add_optab, mode, NULL);
2507   set_optab_libfunc (sdiv_optab, mode, NULL);
2508   set_optab_libfunc (smul_optab, mode, NULL);
2509   set_optab_libfunc (neg_optab, mode, NULL);
2510   set_optab_libfunc (sub_optab, mode, NULL);
2511 
2512   /* Comparisons.  */
2513   set_optab_libfunc (eq_optab, mode, NULL);
2514   set_optab_libfunc (ne_optab, mode, NULL);
2515   set_optab_libfunc (lt_optab, mode, NULL);
2516   set_optab_libfunc (le_optab, mode, NULL);
2517   set_optab_libfunc (ge_optab, mode, NULL);
2518   set_optab_libfunc (gt_optab, mode, NULL);
2519   set_optab_libfunc (unord_optab, mode, NULL);
2520 }
2521 
2522 /* Set up library functions unique to ARM.  */
2523 static void
arm_init_libfuncs(void)2524 arm_init_libfuncs (void)
2525 {
2526   machine_mode mode_iter;
2527 
2528   /* For Linux, we have access to kernel support for atomic operations.  */
2529   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2530     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2531 
2532   /* There are no special library functions unless we are using the
2533      ARM BPABI.  */
2534   if (!TARGET_BPABI)
2535     return;
2536 
2537   /* The functions below are described in Section 4 of the "Run-Time
2538      ABI for the ARM architecture", Version 1.0.  */
2539 
2540   /* Double-precision floating-point arithmetic.  Table 2.  */
2541   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2542   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2543   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2544   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2545   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2546 
2547   /* Double-precision comparisons.  Table 3.  */
2548   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2549   set_optab_libfunc (ne_optab, DFmode, NULL);
2550   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2551   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2552   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2553   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2554   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2555 
2556   /* Single-precision floating-point arithmetic.  Table 4.  */
2557   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2558   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2559   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2560   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2561   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2562 
2563   /* Single-precision comparisons.  Table 5.  */
2564   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2565   set_optab_libfunc (ne_optab, SFmode, NULL);
2566   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2567   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2568   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2569   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2570   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2571 
2572   /* Floating-point to integer conversions.  Table 6.  */
2573   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2574   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2575   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2576   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2577   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2578   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2579   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2580   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2581 
2582   /* Conversions between floating types.  Table 7.  */
2583   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2584   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2585 
2586   /* Integer to floating-point conversions.  Table 8.  */
2587   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2588   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2589   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2590   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2591   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2592   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2593   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2594   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2595 
2596   /* Long long.  Table 9.  */
2597   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2598   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2599   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2600   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2601   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2602   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2603   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2604   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2605 
2606   /* Integer (32/32->32) division.  \S 4.3.1.  */
2607   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2608   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2609 
2610   /* The divmod functions are designed so that they can be used for
2611      plain division, even though they return both the quotient and the
2612      remainder.  The quotient is returned in the usual location (i.e.,
2613      r0 for SImode, {r0, r1} for DImode), just as would be expected
2614      for an ordinary division routine.  Because the AAPCS calling
2615      conventions specify that all of { r0, r1, r2, r3 } are
2616      callee-saved registers, there is no need to tell the compiler
2617      explicitly that those registers are clobbered by these
2618      routines.  */
2619   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2620   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2621 
2622   /* For SImode division the ABI provides div-without-mod routines,
2623      which are faster.  */
2624   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2625   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2626 
2627   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2628      divmod libcalls instead.  */
2629   set_optab_libfunc (smod_optab, DImode, NULL);
2630   set_optab_libfunc (umod_optab, DImode, NULL);
2631   set_optab_libfunc (smod_optab, SImode, NULL);
2632   set_optab_libfunc (umod_optab, SImode, NULL);
2633 
2634   /* Half-precision float operations.  The compiler handles all operations
2635      with NULL libfuncs by converting the SFmode.  */
2636   switch (arm_fp16_format)
2637     {
2638     case ARM_FP16_FORMAT_IEEE:
2639     case ARM_FP16_FORMAT_ALTERNATIVE:
2640 
2641       /* Conversions.  */
2642       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2643 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2644 			 ? "__gnu_f2h_ieee"
2645 			 : "__gnu_f2h_alternative"));
2646       set_conv_libfunc (sext_optab, SFmode, HFmode,
2647 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2648 			 ? "__gnu_h2f_ieee"
2649 			 : "__gnu_h2f_alternative"));
2650 
2651       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2652 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2653 			 ? "__gnu_d2h_ieee"
2654 			 : "__gnu_d2h_alternative"));
2655 
2656       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2657       break;
2658 
2659     default:
2660       break;
2661     }
2662 
2663   /* For all possible libcalls in BFmode, record NULL.  */
2664   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2665     {
2666       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2667       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2668       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2669       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2670     }
2671   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2672 
2673   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2674   {
2675     const arm_fixed_mode_set fixed_arith_modes[] =
2676       {
2677 	{ E_QQmode, "qq" },
2678 	{ E_UQQmode, "uqq" },
2679 	{ E_HQmode, "hq" },
2680 	{ E_UHQmode, "uhq" },
2681 	{ E_SQmode, "sq" },
2682 	{ E_USQmode, "usq" },
2683 	{ E_DQmode, "dq" },
2684 	{ E_UDQmode, "udq" },
2685 	{ E_TQmode, "tq" },
2686 	{ E_UTQmode, "utq" },
2687 	{ E_HAmode, "ha" },
2688 	{ E_UHAmode, "uha" },
2689 	{ E_SAmode, "sa" },
2690 	{ E_USAmode, "usa" },
2691 	{ E_DAmode, "da" },
2692 	{ E_UDAmode, "uda" },
2693 	{ E_TAmode, "ta" },
2694 	{ E_UTAmode, "uta" }
2695       };
2696     const arm_fixed_mode_set fixed_conv_modes[] =
2697       {
2698 	{ E_QQmode, "qq" },
2699 	{ E_UQQmode, "uqq" },
2700 	{ E_HQmode, "hq" },
2701 	{ E_UHQmode, "uhq" },
2702 	{ E_SQmode, "sq" },
2703 	{ E_USQmode, "usq" },
2704 	{ E_DQmode, "dq" },
2705 	{ E_UDQmode, "udq" },
2706 	{ E_TQmode, "tq" },
2707 	{ E_UTQmode, "utq" },
2708 	{ E_HAmode, "ha" },
2709 	{ E_UHAmode, "uha" },
2710 	{ E_SAmode, "sa" },
2711 	{ E_USAmode, "usa" },
2712 	{ E_DAmode, "da" },
2713 	{ E_UDAmode, "uda" },
2714 	{ E_TAmode, "ta" },
2715 	{ E_UTAmode, "uta" },
2716 	{ E_QImode, "qi" },
2717 	{ E_HImode, "hi" },
2718 	{ E_SImode, "si" },
2719 	{ E_DImode, "di" },
2720 	{ E_TImode, "ti" },
2721 	{ E_SFmode, "sf" },
2722 	{ E_DFmode, "df" }
2723       };
2724     unsigned int i, j;
2725 
2726     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2727       {
2728 	arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2729 				     "add", fixed_arith_modes[i].name, 3);
2730 	arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2731 				     "ssadd", fixed_arith_modes[i].name, 3);
2732 	arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2733 				     "usadd", fixed_arith_modes[i].name, 3);
2734 	arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2735 				     "sub", fixed_arith_modes[i].name, 3);
2736 	arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2737 				     "sssub", fixed_arith_modes[i].name, 3);
2738 	arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2739 				     "ussub", fixed_arith_modes[i].name, 3);
2740 	arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2741 				     "mul", fixed_arith_modes[i].name, 3);
2742 	arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2743 				     "ssmul", fixed_arith_modes[i].name, 3);
2744 	arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2745 				     "usmul", fixed_arith_modes[i].name, 3);
2746 	arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2747 				     "div", fixed_arith_modes[i].name, 3);
2748 	arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2749 				     "udiv", fixed_arith_modes[i].name, 3);
2750 	arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2751 				     "ssdiv", fixed_arith_modes[i].name, 3);
2752 	arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2753 				     "usdiv", fixed_arith_modes[i].name, 3);
2754 	arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2755 				     "neg", fixed_arith_modes[i].name, 2);
2756 	arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2757 				     "ssneg", fixed_arith_modes[i].name, 2);
2758 	arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2759 				     "usneg", fixed_arith_modes[i].name, 2);
2760 	arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2761 				     "ashl", fixed_arith_modes[i].name, 3);
2762 	arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2763 				     "ashr", fixed_arith_modes[i].name, 3);
2764 	arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2765 				     "lshr", fixed_arith_modes[i].name, 3);
2766 	arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2767 				     "ssashl", fixed_arith_modes[i].name, 3);
2768 	arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2769 				     "usashl", fixed_arith_modes[i].name, 3);
2770 	arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2771 				     "cmp", fixed_arith_modes[i].name, 2);
2772       }
2773 
2774     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2775       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2776 	{
2777 	  if (i == j
2778 	      || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2779 		  && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2780 	    continue;
2781 
2782 	  arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2783 				      fixed_conv_modes[j].mode, "fract",
2784 				      fixed_conv_modes[i].name,
2785 				      fixed_conv_modes[j].name);
2786 	  arm_set_fixed_conv_libfunc (satfract_optab,
2787 				      fixed_conv_modes[i].mode,
2788 				      fixed_conv_modes[j].mode, "satfract",
2789 				      fixed_conv_modes[i].name,
2790 				      fixed_conv_modes[j].name);
2791 	  arm_set_fixed_conv_libfunc (fractuns_optab,
2792 				      fixed_conv_modes[i].mode,
2793 				      fixed_conv_modes[j].mode, "fractuns",
2794 				      fixed_conv_modes[i].name,
2795 				      fixed_conv_modes[j].name);
2796 	  arm_set_fixed_conv_libfunc (satfractuns_optab,
2797 				      fixed_conv_modes[i].mode,
2798 				      fixed_conv_modes[j].mode, "satfractuns",
2799 				      fixed_conv_modes[i].name,
2800 				      fixed_conv_modes[j].name);
2801 	}
2802   }
2803 
2804   if (TARGET_AAPCS_BASED)
2805     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2806 
2807   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2808 }
2809 
2810 /* On AAPCS systems, this is the "struct __va_list".  */
2811 static GTY(()) tree va_list_type;
2812 
2813 /* Return the type to use as __builtin_va_list.  */
2814 static tree
arm_build_builtin_va_list(void)2815 arm_build_builtin_va_list (void)
2816 {
2817   tree va_list_name;
2818   tree ap_field;
2819 
2820   if (!TARGET_AAPCS_BASED)
2821     return std_build_builtin_va_list ();
2822 
2823   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2824      defined as:
2825 
2826        struct __va_list
2827        {
2828 	 void *__ap;
2829        };
2830 
2831      The C Library ABI further reinforces this definition in \S
2832      4.1.
2833 
2834      We must follow this definition exactly.  The structure tag
2835      name is visible in C++ mangled names, and thus forms a part
2836      of the ABI.  The field name may be used by people who
2837      #include <stdarg.h>.  */
2838   /* Create the type.  */
2839   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2840   /* Give it the required name.  */
2841   va_list_name = build_decl (BUILTINS_LOCATION,
2842 			     TYPE_DECL,
2843 			     get_identifier ("__va_list"),
2844 			     va_list_type);
2845   DECL_ARTIFICIAL (va_list_name) = 1;
2846   TYPE_NAME (va_list_type) = va_list_name;
2847   TYPE_STUB_DECL (va_list_type) = va_list_name;
2848   /* Create the __ap field.  */
2849   ap_field = build_decl (BUILTINS_LOCATION,
2850 			 FIELD_DECL,
2851 			 get_identifier ("__ap"),
2852 			 ptr_type_node);
2853   DECL_ARTIFICIAL (ap_field) = 1;
2854   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2855   TYPE_FIELDS (va_list_type) = ap_field;
2856   /* Compute its layout.  */
2857   layout_type (va_list_type);
2858 
2859   return va_list_type;
2860 }
2861 
2862 /* Return an expression of type "void *" pointing to the next
2863    available argument in a variable-argument list.  VALIST is the
2864    user-level va_list object, of type __builtin_va_list.  */
2865 static tree
arm_extract_valist_ptr(tree valist)2866 arm_extract_valist_ptr (tree valist)
2867 {
2868   if (TREE_TYPE (valist) == error_mark_node)
2869     return error_mark_node;
2870 
2871   /* On an AAPCS target, the pointer is stored within "struct
2872      va_list".  */
2873   if (TARGET_AAPCS_BASED)
2874     {
2875       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2876       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2877 		       valist, ap_field, NULL_TREE);
2878     }
2879 
2880   return valist;
2881 }
2882 
2883 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2884 static void
arm_expand_builtin_va_start(tree valist,rtx nextarg)2885 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2886 {
2887   valist = arm_extract_valist_ptr (valist);
2888   std_expand_builtin_va_start (valist, nextarg);
2889 }
2890 
2891 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2892 static tree
arm_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)2893 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2894 			  gimple_seq *post_p)
2895 {
2896   valist = arm_extract_valist_ptr (valist);
2897   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2898 }
2899 
2900 /* Check any incompatible options that the user has specified.  */
2901 static void
arm_option_check_internal(struct gcc_options * opts)2902 arm_option_check_internal (struct gcc_options *opts)
2903 {
2904   int flags = opts->x_target_flags;
2905 
2906   /* iWMMXt and NEON are incompatible.  */
2907   if (TARGET_IWMMXT
2908       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2909     error ("iWMMXt and NEON are incompatible");
2910 
2911   /* Make sure that the processor choice does not conflict with any of the
2912      other command line choices.  */
2913   if (TARGET_ARM_P (flags)
2914       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2915     error ("target CPU does not support ARM mode");
2916 
2917   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2918   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2919     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2920 
2921   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2922     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2923 
2924   /* If this target is normally configured to use APCS frames, warn if they
2925      are turned off and debugging is turned on.  */
2926   if (TARGET_ARM_P (flags)
2927       && write_symbols != NO_DEBUG
2928       && !TARGET_APCS_FRAME
2929       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2930     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2931 	     "debugging");
2932 
2933   /* iWMMXt unsupported under Thumb mode.  */
2934   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2935     error ("iWMMXt unsupported under Thumb mode");
2936 
2937   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2938     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2939 
2940   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2941     {
2942       error ("RTP PIC is incompatible with Thumb");
2943       flag_pic = 0;
2944     }
2945 
2946   if (target_pure_code || target_slow_flash_data)
2947     {
2948       const char *flag = (target_pure_code ? "-mpure-code" :
2949 					     "-mslow-flash-data");
2950       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2951 
2952       /* We only support -mslow-flash-data on M-profile targets with
2953 	 MOVT.  */
2954       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2955 	error ("%s only supports non-pic code on M-profile targets with the "
2956 	       "MOVT instruction", flag);
2957 
2958       /* We only support -mpure-code on M-profile targets.  */
2959       if (target_pure_code && common_unsupported_modes)
2960 	error ("%s only supports non-pic code on M-profile targets", flag);
2961 
2962       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2963 	 -mword-relocations forbids relocation of MOVT/MOVW.  */
2964       if (target_word_relocations)
2965 	error ("%s incompatible with %<-mword-relocations%>", flag);
2966     }
2967 }
2968 
2969 /* Recompute the global settings depending on target attribute options.  */
2970 
2971 static void
arm_option_params_internal(void)2972 arm_option_params_internal (void)
2973 {
2974   /* If we are not using the default (ARM mode) section anchor offset
2975      ranges, then set the correct ranges now.  */
2976   if (TARGET_THUMB1)
2977     {
2978       /* Thumb-1 LDR instructions cannot have negative offsets.
2979          Permissible positive offset ranges are 5-bit (for byte loads),
2980          6-bit (for halfword loads), or 7-bit (for word loads).
2981          Empirical results suggest a 7-bit anchor range gives the best
2982          overall code size.  */
2983       targetm.min_anchor_offset = 0;
2984       targetm.max_anchor_offset = 127;
2985     }
2986   else if (TARGET_THUMB2)
2987     {
2988       /* The minimum is set such that the total size of the block
2989          for a particular anchor is 248 + 1 + 4095 bytes, which is
2990          divisible by eight, ensuring natural spacing of anchors.  */
2991       targetm.min_anchor_offset = -248;
2992       targetm.max_anchor_offset = 4095;
2993     }
2994   else
2995     {
2996       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2997       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2998     }
2999 
3000   /* Increase the number of conditional instructions with -Os.  */
3001   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3002 
3003   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3004   if (TARGET_THUMB2)
3005     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3006 
3007   if (TARGET_THUMB1)
3008     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3009   else
3010     targetm.md_asm_adjust = arm_md_asm_adjust;
3011 }
3012 
3013 /* True if -mflip-thumb should next add an attribute for the default
3014    mode, false if it should next add an attribute for the opposite mode.  */
3015 static GTY(()) bool thumb_flipper;
3016 
3017 /* Options after initial target override.  */
3018 static GTY(()) tree init_optimize;
3019 
3020 static void
arm_override_options_after_change_1(struct gcc_options * opts)3021 arm_override_options_after_change_1 (struct gcc_options *opts)
3022 {
3023   /* -falign-functions without argument: supply one.  */
3024   if (opts->x_flag_align_functions && !opts->x_str_align_functions)
3025     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3026       && opts->x_optimize_size ? "2" : "4";
3027 }
3028 
3029 /* Implement targetm.override_options_after_change.  */
3030 
3031 static void
arm_override_options_after_change(void)3032 arm_override_options_after_change (void)
3033 {
3034   arm_configure_build_target (&arm_active_target,
3035 			      TREE_TARGET_OPTION (target_option_default_node),
3036 			      false);
3037 
3038   arm_override_options_after_change_1 (&global_options);
3039 }
3040 
3041 /* Implement TARGET_OPTION_SAVE.  */
3042 static void
arm_option_save(struct cl_target_option * ptr,struct gcc_options * opts)3043 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
3044 {
3045   ptr->x_arm_arch_string = opts->x_arm_arch_string;
3046   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
3047   ptr->x_arm_tune_string = opts->x_arm_tune_string;
3048 }
3049 
3050 /* Implement TARGET_OPTION_RESTORE.  */
3051 static void
arm_option_restore(struct gcc_options * opts,struct cl_target_option * ptr)3052 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
3053 {
3054   opts->x_arm_arch_string = ptr->x_arm_arch_string;
3055   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
3056   opts->x_arm_tune_string = ptr->x_arm_tune_string;
3057   arm_configure_build_target (&arm_active_target, ptr, false);
3058   arm_option_reconfigure_globals ();
3059 }
3060 
3061 /* Reset options between modes that the user has specified.  */
3062 static void
arm_option_override_internal(struct gcc_options * opts,struct gcc_options * opts_set)3063 arm_option_override_internal (struct gcc_options *opts,
3064 			      struct gcc_options *opts_set)
3065 {
3066   arm_override_options_after_change_1 (opts);
3067 
3068   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3069     {
3070       /* The default is to enable interworking, so this warning message would
3071 	 be confusing to users who have just compiled with
3072 	 eg, -march=armv4.  */
3073       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3074       opts->x_target_flags &= ~MASK_INTERWORK;
3075     }
3076 
3077   if (TARGET_THUMB_P (opts->x_target_flags)
3078       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3079     {
3080       warning (0, "target CPU does not support THUMB instructions");
3081       opts->x_target_flags &= ~MASK_THUMB;
3082     }
3083 
3084   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3085     {
3086       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3087       opts->x_target_flags &= ~MASK_APCS_FRAME;
3088     }
3089 
3090   /* Callee super interworking implies thumb interworking.  Adding
3091      this to the flags here simplifies the logic elsewhere.  */
3092   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3093     opts->x_target_flags |= MASK_INTERWORK;
3094 
3095   /* need to remember initial values so combinaisons of options like
3096      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3097   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3098 
3099   if (! opts_set->x_arm_restrict_it)
3100     opts->x_arm_restrict_it = arm_arch8;
3101 
3102   /* ARM execution state and M profile don't have [restrict] IT.  */
3103   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3104     opts->x_arm_restrict_it = 0;
3105 
3106   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3107   if (!opts_set->x_arm_restrict_it
3108       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3109     opts->x_arm_restrict_it = 0;
3110 
3111   /* Enable -munaligned-access by default for
3112      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3113      i.e. Thumb2 and ARM state only.
3114      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3115      - ARMv8 architecture-base processors.
3116 
3117      Disable -munaligned-access by default for
3118      - all pre-ARMv6 architecture-based processors
3119      - ARMv6-M architecture-based processors
3120      - ARMv8-M Baseline processors.  */
3121 
3122   if (! opts_set->x_unaligned_access)
3123     {
3124       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3125 			  && arm_arch6 && (arm_arch_notm || arm_arch7));
3126     }
3127   else if (opts->x_unaligned_access == 1
3128 	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3129     {
3130       warning (0, "target CPU does not support unaligned accesses");
3131      opts->x_unaligned_access = 0;
3132     }
3133 
3134   /* Don't warn since it's on by default in -O2.  */
3135   if (TARGET_THUMB1_P (opts->x_target_flags))
3136     opts->x_flag_schedule_insns = 0;
3137   else
3138     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3139 
3140   /* Disable shrink-wrap when optimizing function for size, since it tends to
3141      generate additional returns.  */
3142   if (optimize_function_for_size_p (cfun)
3143       && TARGET_THUMB2_P (opts->x_target_flags))
3144     opts->x_flag_shrink_wrap = false;
3145   else
3146     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3147 
3148   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3149      - epilogue_insns - does not accurately model the corresponding insns
3150      emitted in the asm file.  In particular, see the comment in thumb_exit
3151      'Find out how many of the (return) argument registers we can corrupt'.
3152      As a consequence, the epilogue may clobber registers without fipa-ra
3153      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3154      TODO: Accurately model clobbers for epilogue_insns and reenable
3155      fipa-ra.  */
3156   if (TARGET_THUMB1_P (opts->x_target_flags))
3157     opts->x_flag_ipa_ra = 0;
3158   else
3159     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3160 
3161   /* Thumb2 inline assembly code should always use unified syntax.
3162      This will apply to ARM and Thumb1 eventually.  */
3163   if (TARGET_THUMB2_P (opts->x_target_flags))
3164     opts->x_inline_asm_unified = true;
3165 
3166 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3167   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3168 #endif
3169 }
3170 
3171 static sbitmap isa_all_fpubits_internal;
3172 static sbitmap isa_all_fpbits;
3173 static sbitmap isa_quirkbits;
3174 
3175 /* Configure a build target TARGET from the user-specified options OPTS and
3176    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3177    architecture have been specified, but the two are not identical.  */
3178 void
arm_configure_build_target(struct arm_build_target * target,struct cl_target_option * opts,bool warn_compatible)3179 arm_configure_build_target (struct arm_build_target *target,
3180 			    struct cl_target_option *opts,
3181 			    bool warn_compatible)
3182 {
3183   const cpu_option *arm_selected_tune = NULL;
3184   const arch_option *arm_selected_arch = NULL;
3185   const cpu_option *arm_selected_cpu = NULL;
3186   const arm_fpu_desc *arm_selected_fpu = NULL;
3187   const char *tune_opts = NULL;
3188   const char *arch_opts = NULL;
3189   const char *cpu_opts = NULL;
3190 
3191   bitmap_clear (target->isa);
3192   target->core_name = NULL;
3193   target->arch_name = NULL;
3194 
3195   if (opts->x_arm_arch_string)
3196     {
3197       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3198 						      "-march",
3199 						      opts->x_arm_arch_string);
3200       arch_opts = strchr (opts->x_arm_arch_string, '+');
3201     }
3202 
3203   if (opts->x_arm_cpu_string)
3204     {
3205       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3206 						    opts->x_arm_cpu_string);
3207       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3208       arm_selected_tune = arm_selected_cpu;
3209       /* If taking the tuning from -mcpu, we don't need to rescan the
3210 	 options for tuning.  */
3211     }
3212 
3213   if (opts->x_arm_tune_string)
3214     {
3215       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3216 						     opts->x_arm_tune_string);
3217       tune_opts = strchr (opts->x_arm_tune_string, '+');
3218     }
3219 
3220   if (arm_selected_arch)
3221     {
3222       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3223       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3224 				 arch_opts);
3225 
3226       if (arm_selected_cpu)
3227 	{
3228 	  auto_sbitmap cpu_isa (isa_num_bits);
3229 	  auto_sbitmap isa_delta (isa_num_bits);
3230 
3231 	  arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3232 	  arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3233 				     cpu_opts);
3234 	  bitmap_xor (isa_delta, cpu_isa, target->isa);
3235 	  /* Ignore any bits that are quirk bits.  */
3236 	  bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3237 	  /* Ignore (for now) any bits that might be set by -mfpu.  */
3238 	  bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits_internal);
3239 
3240 	  /* And if the target ISA lacks floating point, ignore any
3241 	     extensions that depend on that.  */
3242 	  if (!bitmap_bit_p (target->isa, isa_bit_vfpv2))
3243 	    bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3244 
3245 	  if (!bitmap_empty_p (isa_delta))
3246 	    {
3247 	      if (warn_compatible)
3248 		warning (0, "switch %<-mcpu=%s%> conflicts "
3249 			 "with %<-march=%s%> switch",
3250 			 arm_selected_cpu->common.name,
3251 			 arm_selected_arch->common.name);
3252 	      /* -march wins for code generation.
3253 		 -mcpu wins for default tuning.  */
3254 	      if (!arm_selected_tune)
3255 		arm_selected_tune = arm_selected_cpu;
3256 
3257 	      arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3258 	      target->arch_name = arm_selected_arch->common.name;
3259 	    }
3260 	  else
3261 	    {
3262 	      /* Architecture and CPU are essentially the same.
3263 		 Prefer the CPU setting.  */
3264 	      arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3265 	      target->core_name = arm_selected_cpu->common.name;
3266 	      /* Copy the CPU's capabilities, so that we inherit the
3267 		 appropriate extensions and quirks.  */
3268 	      bitmap_copy (target->isa, cpu_isa);
3269 	    }
3270 	}
3271       else
3272 	{
3273 	  /* Pick a CPU based on the architecture.  */
3274 	  arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3275 	  target->arch_name = arm_selected_arch->common.name;
3276 	  /* Note: target->core_name is left unset in this path.  */
3277 	}
3278     }
3279   else if (arm_selected_cpu)
3280     {
3281       target->core_name = arm_selected_cpu->common.name;
3282       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3283       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3284 				 cpu_opts);
3285       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3286     }
3287   /* If the user did not specify a processor or architecture, choose
3288      one for them.  */
3289   else
3290     {
3291       const cpu_option *sel;
3292       auto_sbitmap sought_isa (isa_num_bits);
3293       bitmap_clear (sought_isa);
3294       auto_sbitmap default_isa (isa_num_bits);
3295 
3296       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3297 						    TARGET_CPU_DEFAULT);
3298       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3299       gcc_assert (arm_selected_cpu->common.name);
3300 
3301       /* RWE: All of the selection logic below (to the end of this
3302 	 'if' clause) looks somewhat suspect.  It appears to be mostly
3303 	 there to support forcing thumb support when the default CPU
3304 	 does not have thumb (somewhat dubious in terms of what the
3305 	 user might be expecting).  I think it should be removed once
3306 	 support for the pre-thumb era cores is removed.  */
3307       sel = arm_selected_cpu;
3308       arm_initialize_isa (default_isa, sel->common.isa_bits);
3309       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3310 				 cpu_opts);
3311 
3312       /* Now check to see if the user has specified any command line
3313 	 switches that require certain abilities from the cpu.  */
3314 
3315       if (TARGET_INTERWORK || TARGET_THUMB)
3316 	bitmap_set_bit (sought_isa, isa_bit_thumb);
3317 
3318       /* If there are such requirements and the default CPU does not
3319 	 satisfy them, we need to run over the complete list of
3320 	 cores looking for one that is satisfactory.  */
3321       if (!bitmap_empty_p (sought_isa)
3322 	  && !bitmap_subset_p (sought_isa, default_isa))
3323 	{
3324 	  auto_sbitmap candidate_isa (isa_num_bits);
3325 	  /* We're only interested in a CPU with at least the
3326 	     capabilities of the default CPU and the required
3327 	     additional features.  */
3328 	  bitmap_ior (default_isa, default_isa, sought_isa);
3329 
3330 	  /* Try to locate a CPU type that supports all of the abilities
3331 	     of the default CPU, plus the extra abilities requested by
3332 	     the user.  */
3333 	  for (sel = all_cores; sel->common.name != NULL; sel++)
3334 	    {
3335 	      arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3336 	      /* An exact match?  */
3337 	      if (bitmap_equal_p (default_isa, candidate_isa))
3338 		break;
3339 	    }
3340 
3341 	  if (sel->common.name == NULL)
3342 	    {
3343 	      unsigned current_bit_count = isa_num_bits;
3344 	      const cpu_option *best_fit = NULL;
3345 
3346 	      /* Ideally we would like to issue an error message here
3347 		 saying that it was not possible to find a CPU compatible
3348 		 with the default CPU, but which also supports the command
3349 		 line options specified by the programmer, and so they
3350 		 ought to use the -mcpu=<name> command line option to
3351 		 override the default CPU type.
3352 
3353 		 If we cannot find a CPU that has exactly the
3354 		 characteristics of the default CPU and the given
3355 		 command line options we scan the array again looking
3356 		 for a best match.  The best match must have at least
3357 		 the capabilities of the perfect match.  */
3358 	      for (sel = all_cores; sel->common.name != NULL; sel++)
3359 		{
3360 		  arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3361 
3362 		  if (bitmap_subset_p (default_isa, candidate_isa))
3363 		    {
3364 		      unsigned count;
3365 
3366 		      bitmap_and_compl (candidate_isa, candidate_isa,
3367 					default_isa);
3368 		      count = bitmap_popcount (candidate_isa);
3369 
3370 		      if (count < current_bit_count)
3371 			{
3372 			  best_fit = sel;
3373 			  current_bit_count = count;
3374 			}
3375 		    }
3376 
3377 		  gcc_assert (best_fit);
3378 		  sel = best_fit;
3379 		}
3380 	    }
3381 	  arm_selected_cpu = sel;
3382 	}
3383 
3384       /* Now we know the CPU, we can finally initialize the target
3385 	 structure.  */
3386       target->core_name = arm_selected_cpu->common.name;
3387       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3388       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3389 				 cpu_opts);
3390       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3391     }
3392 
3393   gcc_assert (arm_selected_cpu);
3394   gcc_assert (arm_selected_arch);
3395 
3396   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3397     {
3398       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3399       auto_sbitmap fpu_bits (isa_num_bits);
3400 
3401       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3402       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3403       bitmap_ior (target->isa, target->isa, fpu_bits);
3404     }
3405 
3406   /* If we have the soft-float ABI, clear any feature bits relating to use of
3407      floating-point operations.  They'll just confuse things later on.  */
3408   if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3409     bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3410 
3411   /* There may be implied bits which we still need to enable. These are
3412      non-named features which are needed to complete other sets of features,
3413      but cannot be enabled from arm-cpus.in due to being shared between
3414      multiple fgroups. Each entry in all_implied_fbits is of the form
3415      ante -> cons, meaning that if the feature "ante" is enabled, we should
3416      implicitly enable "cons".  */
3417   const struct fbit_implication *impl = all_implied_fbits;
3418   while (impl->ante)
3419     {
3420       if (bitmap_bit_p (target->isa, impl->ante))
3421 	bitmap_set_bit (target->isa, impl->cons);
3422       impl++;
3423     }
3424 
3425   if (!arm_selected_tune)
3426     arm_selected_tune = arm_selected_cpu;
3427   else /* Validate the features passed to -mtune.  */
3428     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3429 
3430   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3431 
3432   /* Finish initializing the target structure.  */
3433   if (!target->arch_name)
3434     target->arch_name = arm_selected_arch->common.name;
3435   target->arch_pp_name = arm_selected_arch->arch;
3436   target->base_arch = arm_selected_arch->base_arch;
3437   target->profile = arm_selected_arch->profile;
3438 
3439   target->tune_flags = tune_data->tune_flags;
3440   target->tune = tune_data->tune;
3441   target->tune_core = tune_data->scheduler;
3442 }
3443 
3444 /* Fix up any incompatible options that the user has specified.  */
3445 static void
arm_option_override(void)3446 arm_option_override (void)
3447 {
3448   static const enum isa_feature fpu_bitlist_internal[]
3449     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3450   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3451   static const enum isa_feature fp_bitlist[]
3452     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3453   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3454   cl_target_option opts;
3455 
3456   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3457   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3458 
3459   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3460   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3461   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3462   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3463 
3464   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3465 
3466   if (!global_options_set.x_arm_fpu_index)
3467     {
3468       bool ok;
3469       int fpu_index;
3470 
3471       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3472 				  CL_TARGET);
3473       gcc_assert (ok);
3474       arm_fpu_index = (enum fpu_type) fpu_index;
3475     }
3476 
3477   cl_target_option_save (&opts, &global_options);
3478   arm_configure_build_target (&arm_active_target, &opts, true);
3479 
3480 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3481   SUBTARGET_OVERRIDE_OPTIONS;
3482 #endif
3483 
3484   /* Initialize boolean versions of the architectural flags, for use
3485      in the arm.md file and for enabling feature flags.  */
3486   arm_option_reconfigure_globals ();
3487 
3488   arm_tune = arm_active_target.tune_core;
3489   tune_flags = arm_active_target.tune_flags;
3490   current_tune = arm_active_target.tune;
3491 
3492   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3493   if (TARGET_APCS_FRAME)
3494     flag_shrink_wrap = false;
3495 
3496   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3497     {
3498       warning (0, "%<-mapcs-stack-check%> incompatible with "
3499 	       "%<-mno-apcs-frame%>");
3500       target_flags |= MASK_APCS_FRAME;
3501     }
3502 
3503   if (TARGET_POKE_FUNCTION_NAME)
3504     target_flags |= MASK_APCS_FRAME;
3505 
3506   if (TARGET_APCS_REENT && flag_pic)
3507     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3508 
3509   if (TARGET_APCS_REENT)
3510     warning (0, "APCS reentrant code not supported.  Ignored");
3511 
3512   /* Set up some tuning parameters.  */
3513   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3514   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3515   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3516   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3517   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3518   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3519 
3520   /* For arm2/3 there is no need to do any scheduling if we are doing
3521      software floating-point.  */
3522   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3523     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3524 
3525   /* Override the default structure alignment for AAPCS ABI.  */
3526   if (!global_options_set.x_arm_structure_size_boundary)
3527     {
3528       if (TARGET_AAPCS_BASED)
3529 	arm_structure_size_boundary = 8;
3530     }
3531   else
3532     {
3533       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3534 
3535       if (arm_structure_size_boundary != 8
3536 	  && arm_structure_size_boundary != 32
3537 	  && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3538 	{
3539 	  if (ARM_DOUBLEWORD_ALIGN)
3540 	    warning (0,
3541 		     "structure size boundary can only be set to 8, 32 or 64");
3542 	  else
3543 	    warning (0, "structure size boundary can only be set to 8 or 32");
3544 	  arm_structure_size_boundary
3545 	    = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3546 	}
3547     }
3548 
3549   if (TARGET_VXWORKS_RTP)
3550     {
3551       if (!global_options_set.x_arm_pic_data_is_text_relative)
3552 	arm_pic_data_is_text_relative = 0;
3553     }
3554   else if (flag_pic
3555 	   && !arm_pic_data_is_text_relative
3556 	   && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3557     /* When text & data segments don't have a fixed displacement, the
3558        intended use is with a single, read only, pic base register.
3559        Unless the user explicitly requested not to do that, set
3560        it.  */
3561     target_flags |= MASK_SINGLE_PIC_BASE;
3562 
3563   /* If stack checking is disabled, we can use r10 as the PIC register,
3564      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3565   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3566     {
3567       if (TARGET_VXWORKS_RTP)
3568 	warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3569       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3570     }
3571 
3572   if (flag_pic && TARGET_VXWORKS_RTP)
3573     arm_pic_register = 9;
3574 
3575   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3576   if (TARGET_FDPIC)
3577     {
3578       arm_pic_register = FDPIC_REGNUM;
3579       if (TARGET_THUMB1)
3580 	sorry ("FDPIC mode is not supported in Thumb-1 mode");
3581     }
3582 
3583   if (arm_pic_register_string != NULL)
3584     {
3585       int pic_register = decode_reg_name (arm_pic_register_string);
3586 
3587       if (!flag_pic)
3588 	warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3589 
3590       /* Prevent the user from choosing an obviously stupid PIC register.  */
3591       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3592 	       || pic_register == HARD_FRAME_POINTER_REGNUM
3593 	       || pic_register == STACK_POINTER_REGNUM
3594 	       || pic_register >= PC_REGNUM
3595 	       || (TARGET_VXWORKS_RTP
3596 		   && (unsigned int) pic_register != arm_pic_register))
3597 	error ("unable to use %qs for PIC register", arm_pic_register_string);
3598       else
3599 	arm_pic_register = pic_register;
3600     }
3601 
3602   if (flag_pic)
3603     target_word_relocations = 1;
3604 
3605   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3606   if (fix_cm3_ldrd == 2)
3607     {
3608       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3609 	fix_cm3_ldrd = 1;
3610       else
3611 	fix_cm3_ldrd = 0;
3612     }
3613 
3614   /* Enable fix_vlldm by default if required.  */
3615   if (fix_vlldm == 2)
3616     {
3617       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3618 	fix_vlldm = 1;
3619       else
3620 	fix_vlldm = 0;
3621     }
3622 
3623   /* Hot/Cold partitioning is not currently supported, since we can't
3624      handle literal pool placement in that case.  */
3625   if (flag_reorder_blocks_and_partition)
3626     {
3627       inform (input_location,
3628 	      "%<-freorder-blocks-and-partition%> not supported "
3629 	      "on this architecture");
3630       flag_reorder_blocks_and_partition = 0;
3631       flag_reorder_blocks = 1;
3632     }
3633 
3634   if (flag_pic)
3635     /* Hoisting PIC address calculations more aggressively provides a small,
3636        but measurable, size reduction for PIC code.  Therefore, we decrease
3637        the bar for unrestricted expression hoisting to the cost of PIC address
3638        calculation, which is 2 instructions.  */
3639     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3640 			 param_gcse_unrestricted_cost, 2);
3641 
3642   /* ARM EABI defaults to strict volatile bitfields.  */
3643   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3644       && abi_version_at_least(2))
3645     flag_strict_volatile_bitfields = 1;
3646 
3647   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3648      have deemed it beneficial (signified by setting
3649      prefetch.num_slots to 1 or more).  */
3650   if (flag_prefetch_loop_arrays < 0
3651       && HAVE_prefetch
3652       && optimize >= 3
3653       && current_tune->prefetch.num_slots > 0)
3654     flag_prefetch_loop_arrays = 1;
3655 
3656   /* Set up parameters to be used in prefetching algorithm.  Do not
3657      override the defaults unless we are tuning for a core we have
3658      researched values for.  */
3659   if (current_tune->prefetch.num_slots > 0)
3660     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3661 			 param_simultaneous_prefetches,
3662 			 current_tune->prefetch.num_slots);
3663   if (current_tune->prefetch.l1_cache_line_size >= 0)
3664     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3665 			 param_l1_cache_line_size,
3666 			 current_tune->prefetch.l1_cache_line_size);
3667   if (current_tune->prefetch.l1_cache_size >= 0)
3668     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3669 			 param_l1_cache_size,
3670 			 current_tune->prefetch.l1_cache_size);
3671 
3672   /* Look through ready list and all of queue for instructions
3673      relevant for L2 auto-prefetcher.  */
3674   int sched_autopref_queue_depth;
3675 
3676   switch (current_tune->sched_autopref)
3677     {
3678     case tune_params::SCHED_AUTOPREF_OFF:
3679       sched_autopref_queue_depth = -1;
3680       break;
3681 
3682     case tune_params::SCHED_AUTOPREF_RANK:
3683       sched_autopref_queue_depth = 0;
3684       break;
3685 
3686     case tune_params::SCHED_AUTOPREF_FULL:
3687       sched_autopref_queue_depth = max_insn_queue_index + 1;
3688       break;
3689 
3690     default:
3691       gcc_unreachable ();
3692     }
3693 
3694   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3695 		       param_sched_autopref_queue_depth,
3696 		       sched_autopref_queue_depth);
3697 
3698   /* Currently, for slow flash data, we just disable literal pools.  We also
3699      disable it for pure-code.  */
3700   if (target_slow_flash_data || target_pure_code)
3701     arm_disable_literal_pool = true;
3702 
3703   /* Disable scheduling fusion by default if it's not armv7 processor
3704      or doesn't prefer ldrd/strd.  */
3705   if (flag_schedule_fusion == 2
3706       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3707     flag_schedule_fusion = 0;
3708 
3709   /* Need to remember initial options before they are overriden.  */
3710   init_optimize = build_optimization_node (&global_options);
3711 
3712   arm_options_perform_arch_sanity_checks ();
3713   arm_option_override_internal (&global_options, &global_options_set);
3714   arm_option_check_internal (&global_options);
3715   arm_option_params_internal ();
3716 
3717   /* Create the default target_options structure.  */
3718   target_option_default_node = target_option_current_node
3719     = build_target_option_node (&global_options);
3720 
3721   /* Register global variables with the garbage collector.  */
3722   arm_add_gc_roots ();
3723 
3724   /* Init initial mode for testing.  */
3725   thumb_flipper = TARGET_THUMB;
3726 }
3727 
3728 
3729 /* Reconfigure global status flags from the active_target.isa.  */
3730 void
arm_option_reconfigure_globals(void)3731 arm_option_reconfigure_globals (void)
3732 {
3733   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3734   arm_base_arch = arm_active_target.base_arch;
3735 
3736   /* Initialize boolean versions of the architectural flags, for use
3737      in the arm.md file.  */
3738   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3739   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3740   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3741   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3742   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3743   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3744   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3745   arm_arch6m = arm_arch6 && !arm_arch_notm;
3746   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3747   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3748   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3749   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3750   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3751   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3752   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3753   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3754 				    isa_bit_armv8_1m_main);
3755   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3756   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3757   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3758   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3759   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3760   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3761   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3762   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3763   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3764   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3765   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3766   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3767 
3768   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3769   if (arm_fp16_inst)
3770     {
3771       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3772 	error ("selected fp16 options are incompatible");
3773       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3774     }
3775 
3776   arm_arch_cde = 0;
3777   arm_arch_cde_coproc = 0;
3778   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3779 		    isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3780 		    isa_bit_cdecp6, isa_bit_cdecp7};
3781   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3782     {
3783       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3784       if (cde_bit)
3785 	{
3786 	  arm_arch_cde |= cde_bit;
3787 	  arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3788 	}
3789     }
3790 
3791   /* And finally, set up some quirks.  */
3792   arm_arch_no_volatile_ce
3793     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3794   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3795 					    isa_bit_quirk_armv6kz);
3796 
3797   /* Use the cp15 method if it is available.  */
3798   if (target_thread_pointer == TP_AUTO)
3799     {
3800       if (arm_arch6k && !TARGET_THUMB1)
3801 	target_thread_pointer = TP_CP15;
3802       else
3803 	target_thread_pointer = TP_SOFT;
3804     }
3805 }
3806 
3807 /* Perform some validation between the desired architecture and the rest of the
3808    options.  */
3809 void
arm_options_perform_arch_sanity_checks(void)3810 arm_options_perform_arch_sanity_checks (void)
3811 {
3812   /* V5T code we generate is completely interworking capable, so we turn off
3813      TARGET_INTERWORK here to avoid many tests later on.  */
3814 
3815   /* XXX However, we must pass the right pre-processor defines to CPP
3816      or GLD can get confused.  This is a hack.  */
3817   if (TARGET_INTERWORK)
3818     arm_cpp_interwork = 1;
3819 
3820   if (arm_arch5t)
3821     target_flags &= ~MASK_INTERWORK;
3822 
3823   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3824     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3825 
3826   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3827     error ("iwmmxt abi requires an iwmmxt capable cpu");
3828 
3829   /* BPABI targets use linker tricks to allow interworking on cores
3830      without thumb support.  */
3831   if (TARGET_INTERWORK
3832       && !TARGET_BPABI
3833       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3834     {
3835       warning (0, "target CPU does not support interworking" );
3836       target_flags &= ~MASK_INTERWORK;
3837     }
3838 
3839   /* If soft-float is specified then don't use FPU.  */
3840   if (TARGET_SOFT_FLOAT)
3841     arm_fpu_attr = FPU_NONE;
3842   else
3843     arm_fpu_attr = FPU_VFP;
3844 
3845   if (TARGET_AAPCS_BASED)
3846     {
3847       if (TARGET_CALLER_INTERWORKING)
3848 	error ("AAPCS does not support %<-mcaller-super-interworking%>");
3849       else
3850 	if (TARGET_CALLEE_INTERWORKING)
3851 	  error ("AAPCS does not support %<-mcallee-super-interworking%>");
3852     }
3853 
3854   /* __fp16 support currently assumes the core has ldrh.  */
3855   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3856     sorry ("__fp16 and no ldrh");
3857 
3858   if (use_cmse && !arm_arch_cmse)
3859     error ("target CPU does not support ARMv8-M Security Extensions");
3860 
3861   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3862      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3863   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3864     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3865 
3866 
3867   if (TARGET_AAPCS_BASED)
3868     {
3869       if (arm_abi == ARM_ABI_IWMMXT)
3870 	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3871       else if (TARGET_HARD_FLOAT_ABI)
3872 	{
3873 	  arm_pcs_default = ARM_PCS_AAPCS_VFP;
3874 	  if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3875 	      && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3876 	    error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3877 	}
3878       else
3879 	arm_pcs_default = ARM_PCS_AAPCS;
3880     }
3881   else
3882     {
3883       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3884 	sorry ("%<-mfloat-abi=hard%> and VFP");
3885 
3886       if (arm_abi == ARM_ABI_APCS)
3887 	arm_pcs_default = ARM_PCS_APCS;
3888       else
3889 	arm_pcs_default = ARM_PCS_ATPCS;
3890     }
3891 }
3892 
3893 /* Test whether a local function descriptor is canonical, i.e.,
3894    whether we can use GOTOFFFUNCDESC to compute the address of the
3895    function.  */
3896 static bool
arm_fdpic_local_funcdesc_p(rtx fnx)3897 arm_fdpic_local_funcdesc_p (rtx fnx)
3898 {
3899   tree fn;
3900   enum symbol_visibility vis;
3901   bool ret;
3902 
3903   if (!TARGET_FDPIC)
3904     return true;
3905 
3906   if (! SYMBOL_REF_LOCAL_P (fnx))
3907     return false;
3908 
3909   fn = SYMBOL_REF_DECL (fnx);
3910 
3911   if (! fn)
3912     return false;
3913 
3914   vis = DECL_VISIBILITY (fn);
3915 
3916   if (vis == VISIBILITY_PROTECTED)
3917     /* Private function descriptors for protected functions are not
3918        canonical.  Temporarily change the visibility to global so that
3919        we can ensure uniqueness of funcdesc pointers.  */
3920     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3921 
3922   ret = default_binds_local_p_1 (fn, flag_pic);
3923 
3924   DECL_VISIBILITY (fn) = vis;
3925 
3926   return ret;
3927 }
3928 
3929 static void
arm_add_gc_roots(void)3930 arm_add_gc_roots (void)
3931 {
3932   gcc_obstack_init(&minipool_obstack);
3933   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3934 }
3935 
3936 /* A table of known ARM exception types.
3937    For use with the interrupt function attribute.  */
3938 
3939 typedef struct
3940 {
3941   const char *const arg;
3942   const unsigned long return_value;
3943 }
3944 isr_attribute_arg;
3945 
3946 static const isr_attribute_arg isr_attribute_args [] =
3947 {
3948   { "IRQ",   ARM_FT_ISR },
3949   { "irq",   ARM_FT_ISR },
3950   { "FIQ",   ARM_FT_FIQ },
3951   { "fiq",   ARM_FT_FIQ },
3952   { "ABORT", ARM_FT_ISR },
3953   { "abort", ARM_FT_ISR },
3954   { "UNDEF", ARM_FT_EXCEPTION },
3955   { "undef", ARM_FT_EXCEPTION },
3956   { "SWI",   ARM_FT_EXCEPTION },
3957   { "swi",   ARM_FT_EXCEPTION },
3958   { NULL,    ARM_FT_NORMAL }
3959 };
3960 
3961 /* Returns the (interrupt) function type of the current
3962    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3963 
3964 static unsigned long
arm_isr_value(tree argument)3965 arm_isr_value (tree argument)
3966 {
3967   const isr_attribute_arg * ptr;
3968   const char *              arg;
3969 
3970   if (!arm_arch_notm)
3971     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3972 
3973   /* No argument - default to IRQ.  */
3974   if (argument == NULL_TREE)
3975     return ARM_FT_ISR;
3976 
3977   /* Get the value of the argument.  */
3978   if (TREE_VALUE (argument) == NULL_TREE
3979       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3980     return ARM_FT_UNKNOWN;
3981 
3982   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3983 
3984   /* Check it against the list of known arguments.  */
3985   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3986     if (streq (arg, ptr->arg))
3987       return ptr->return_value;
3988 
3989   /* An unrecognized interrupt type.  */
3990   return ARM_FT_UNKNOWN;
3991 }
3992 
3993 /* Computes the type of the current function.  */
3994 
3995 static unsigned long
arm_compute_func_type(void)3996 arm_compute_func_type (void)
3997 {
3998   unsigned long type = ARM_FT_UNKNOWN;
3999   tree a;
4000   tree attr;
4001 
4002   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4003 
4004   /* Decide if the current function is volatile.  Such functions
4005      never return, and many memory cycles can be saved by not storing
4006      register values that will never be needed again.  This optimization
4007      was added to speed up context switching in a kernel application.  */
4008   if (optimize > 0
4009       && (TREE_NOTHROW (current_function_decl)
4010           || !(flag_unwind_tables
4011                || (flag_exceptions
4012 		   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4013       && TREE_THIS_VOLATILE (current_function_decl))
4014     type |= ARM_FT_VOLATILE;
4015 
4016   if (cfun->static_chain_decl != NULL)
4017     type |= ARM_FT_NESTED;
4018 
4019   attr = DECL_ATTRIBUTES (current_function_decl);
4020 
4021   a = lookup_attribute ("naked", attr);
4022   if (a != NULL_TREE)
4023     type |= ARM_FT_NAKED;
4024 
4025   a = lookup_attribute ("isr", attr);
4026   if (a == NULL_TREE)
4027     a = lookup_attribute ("interrupt", attr);
4028 
4029   if (a == NULL_TREE)
4030     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4031   else
4032     type |= arm_isr_value (TREE_VALUE (a));
4033 
4034   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4035     type |= ARM_FT_CMSE_ENTRY;
4036 
4037   return type;
4038 }
4039 
4040 /* Returns the type of the current function.  */
4041 
4042 unsigned long
arm_current_func_type(void)4043 arm_current_func_type (void)
4044 {
4045   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4046     cfun->machine->func_type = arm_compute_func_type ();
4047 
4048   return cfun->machine->func_type;
4049 }
4050 
4051 bool
arm_allocate_stack_slots_for_args(void)4052 arm_allocate_stack_slots_for_args (void)
4053 {
4054   /* Naked functions should not allocate stack slots for arguments.  */
4055   return !IS_NAKED (arm_current_func_type ());
4056 }
4057 
4058 static bool
arm_warn_func_return(tree decl)4059 arm_warn_func_return (tree decl)
4060 {
4061   /* Naked functions are implemented entirely in assembly, including the
4062      return sequence, so suppress warnings about this.  */
4063   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4064 }
4065 
4066 
4067 /* Output assembler code for a block containing the constant parts
4068    of a trampoline, leaving space for the variable parts.
4069 
4070    On the ARM, (if r8 is the static chain regnum, and remembering that
4071    referencing pc adds an offset of 8) the trampoline looks like:
4072 	   ldr 		r8, [pc, #0]
4073 	   ldr		pc, [pc]
4074 	   .word	static chain value
4075 	   .word	function's address
4076    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4077 
4078    In FDPIC mode, the trampoline looks like:
4079 	   .word	trampoline address
4080 	   .word	trampoline GOT address
4081 	   ldr 		r12, [pc, #8] ; #4 for Arm mode
4082 	   ldr 		r9,  [pc, #8] ; #4 for Arm mode
4083 	   ldr		pc,  [pc, #8] ; #4 for Arm mode
4084 	   .word	static chain value
4085 	   .word	GOT address
4086 	   .word	function's address
4087 */
4088 
4089 static void
arm_asm_trampoline_template(FILE * f)4090 arm_asm_trampoline_template (FILE *f)
4091 {
4092   fprintf (f, "\t.syntax unified\n");
4093 
4094   if (TARGET_FDPIC)
4095     {
4096       /* The first two words are a function descriptor pointing to the
4097 	 trampoline code just below.  */
4098       if (TARGET_ARM)
4099 	fprintf (f, "\t.arm\n");
4100       else if (TARGET_THUMB2)
4101 	fprintf (f, "\t.thumb\n");
4102       else
4103 	/* Only ARM and Thumb-2 are supported.  */
4104 	gcc_unreachable ();
4105 
4106       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4107       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4108       /* Trampoline code which sets the static chain register but also
4109 	 PIC register before jumping into real code.  */
4110       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4111 		   STATIC_CHAIN_REGNUM, PC_REGNUM,
4112 		   TARGET_THUMB2 ? 8 : 4);
4113       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4114 		   PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4115 		   TARGET_THUMB2 ? 8 : 4);
4116       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4117 		   PC_REGNUM, PC_REGNUM,
4118 		   TARGET_THUMB2 ? 8 : 4);
4119       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4120     }
4121   else if (TARGET_ARM)
4122     {
4123       fprintf (f, "\t.arm\n");
4124       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4125       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4126     }
4127   else if (TARGET_THUMB2)
4128     {
4129       fprintf (f, "\t.thumb\n");
4130       /* The Thumb-2 trampoline is similar to the arm implementation.
4131 	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4132       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4133 		   STATIC_CHAIN_REGNUM, PC_REGNUM);
4134       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4135     }
4136   else
4137     {
4138       ASM_OUTPUT_ALIGN (f, 2);
4139       fprintf (f, "\t.code\t16\n");
4140       fprintf (f, ".Ltrampoline_start:\n");
4141       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4142       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4143       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4144       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4145       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4146       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4147     }
4148   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4149   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4150 }
4151 
4152 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4153 
4154 static void
arm_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)4155 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4156 {
4157   rtx fnaddr, mem, a_tramp;
4158 
4159   emit_block_move (m_tramp, assemble_trampoline_template (),
4160 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4161 
4162   if (TARGET_FDPIC)
4163     {
4164       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4165       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4166       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4167       /* The function start address is at offset 8, but in Thumb mode
4168 	 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4169 	 below.  */
4170       rtx trampoline_code_start
4171 	= plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4172 
4173       /* Write initial funcdesc which points to the trampoline.  */
4174       mem = adjust_address (m_tramp, SImode, 0);
4175       emit_move_insn (mem, trampoline_code_start);
4176       mem = adjust_address (m_tramp, SImode, 4);
4177       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4178       /* Setup static chain.  */
4179       mem = adjust_address (m_tramp, SImode, 20);
4180       emit_move_insn (mem, chain_value);
4181       /* GOT + real function entry point.  */
4182       mem = adjust_address (m_tramp, SImode, 24);
4183       emit_move_insn (mem, gotaddr);
4184       mem = adjust_address (m_tramp, SImode, 28);
4185       emit_move_insn (mem, fnaddr);
4186     }
4187   else
4188     {
4189       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4190       emit_move_insn (mem, chain_value);
4191 
4192       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4193       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4194       emit_move_insn (mem, fnaddr);
4195     }
4196 
4197   a_tramp = XEXP (m_tramp, 0);
4198   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4199 		     LCT_NORMAL, VOIDmode, a_tramp, Pmode,
4200 		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4201 }
4202 
4203 /* Thumb trampolines should be entered in thumb mode, so set
4204    the bottom bit of the address.  */
4205 
4206 static rtx
arm_trampoline_adjust_address(rtx addr)4207 arm_trampoline_adjust_address (rtx addr)
4208 {
4209   /* For FDPIC don't fix trampoline address since it's a function
4210      descriptor and not a function address.  */
4211   if (TARGET_THUMB && !TARGET_FDPIC)
4212     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4213 				NULL, 0, OPTAB_LIB_WIDEN);
4214   return addr;
4215 }
4216 
4217 /* Return 1 if it is possible to return using a single instruction.
4218    If SIBLING is non-null, this is a test for a return before a sibling
4219    call.  SIBLING is the call insn, so we can examine its register usage.  */
4220 
4221 int
use_return_insn(int iscond,rtx sibling)4222 use_return_insn (int iscond, rtx sibling)
4223 {
4224   int regno;
4225   unsigned int func_type;
4226   unsigned long saved_int_regs;
4227   unsigned HOST_WIDE_INT stack_adjust;
4228   arm_stack_offsets *offsets;
4229 
4230   /* Never use a return instruction before reload has run.  */
4231   if (!reload_completed)
4232     return 0;
4233 
4234   func_type = arm_current_func_type ();
4235 
4236   /* Naked, volatile and stack alignment functions need special
4237      consideration.  */
4238   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4239     return 0;
4240 
4241   /* So do interrupt functions that use the frame pointer and Thumb
4242      interrupt functions.  */
4243   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4244     return 0;
4245 
4246   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4247       && !optimize_function_for_size_p (cfun))
4248     return 0;
4249 
4250   offsets = arm_get_frame_offsets ();
4251   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4252 
4253   /* As do variadic functions.  */
4254   if (crtl->args.pretend_args_size
4255       || cfun->machine->uses_anonymous_args
4256       /* Or if the function calls __builtin_eh_return () */
4257       || crtl->calls_eh_return
4258       /* Or if the function calls alloca */
4259       || cfun->calls_alloca
4260       /* Or if there is a stack adjustment.  However, if the stack pointer
4261 	 is saved on the stack, we can use a pre-incrementing stack load.  */
4262       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4263 				 && stack_adjust == 4))
4264       /* Or if the static chain register was saved above the frame, under the
4265 	 assumption that the stack pointer isn't saved on the stack.  */
4266       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4267           && arm_compute_static_chain_stack_bytes() != 0))
4268     return 0;
4269 
4270   saved_int_regs = offsets->saved_regs_mask;
4271 
4272   /* Unfortunately, the insn
4273 
4274        ldmib sp, {..., sp, ...}
4275 
4276      triggers a bug on most SA-110 based devices, such that the stack
4277      pointer won't be correctly restored if the instruction takes a
4278      page fault.  We work around this problem by popping r3 along with
4279      the other registers, since that is never slower than executing
4280      another instruction.
4281 
4282      We test for !arm_arch5t here, because code for any architecture
4283      less than this could potentially be run on one of the buggy
4284      chips.  */
4285   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4286     {
4287       /* Validate that r3 is a call-clobbered register (always true in
4288 	 the default abi) ...  */
4289       if (!call_used_or_fixed_reg_p (3))
4290 	return 0;
4291 
4292       /* ... that it isn't being used for a return value ... */
4293       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4294 	return 0;
4295 
4296       /* ... or for a tail-call argument ...  */
4297       if (sibling)
4298 	{
4299 	  gcc_assert (CALL_P (sibling));
4300 
4301 	  if (find_regno_fusage (sibling, USE, 3))
4302 	    return 0;
4303 	}
4304 
4305       /* ... and that there are no call-saved registers in r0-r2
4306 	 (always true in the default ABI).  */
4307       if (saved_int_regs & 0x7)
4308 	return 0;
4309     }
4310 
4311   /* Can't be done if interworking with Thumb, and any registers have been
4312      stacked.  */
4313   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4314     return 0;
4315 
4316   /* On StrongARM, conditional returns are expensive if they aren't
4317      taken and multiple registers have been stacked.  */
4318   if (iscond && arm_tune_strongarm)
4319     {
4320       /* Conditional return when just the LR is stored is a simple
4321 	 conditional-load instruction, that's not expensive.  */
4322       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4323 	return 0;
4324 
4325       if (flag_pic
4326 	  && arm_pic_register != INVALID_REGNUM
4327 	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4328 	return 0;
4329     }
4330 
4331   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4332      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4333      also needs several instructions to save and restore FP context.  */
4334   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4335     return 0;
4336 
4337   /* If there are saved registers but the LR isn't saved, then we need
4338      two instructions for the return.  */
4339   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4340     return 0;
4341 
4342   /* Can't be done if any of the VFP regs are pushed,
4343      since this also requires an insn.  */
4344   if (TARGET_VFP_BASE)
4345     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4346       if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
4347 	return 0;
4348 
4349   if (TARGET_REALLY_IWMMXT)
4350     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4351       if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
4352 	return 0;
4353 
4354   return 1;
4355 }
4356 
4357 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4358    shrink-wrapping if possible.  This is the case if we need to emit a
4359    prologue, which we can test by looking at the offsets.  */
4360 bool
use_simple_return_p(void)4361 use_simple_return_p (void)
4362 {
4363   arm_stack_offsets *offsets;
4364 
4365   /* Note this function can be called before or after reload.  */
4366   if (!reload_completed)
4367     arm_compute_frame_layout ();
4368 
4369   offsets = arm_get_frame_offsets ();
4370   return offsets->outgoing_args != 0;
4371 }
4372 
4373 /* Return TRUE if int I is a valid immediate ARM constant.  */
4374 
4375 int
const_ok_for_arm(HOST_WIDE_INT i)4376 const_ok_for_arm (HOST_WIDE_INT i)
4377 {
4378   int lowbit;
4379 
4380   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4381      be all zero, or all one.  */
4382   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4383       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4384 	  != ((~(unsigned HOST_WIDE_INT) 0)
4385 	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4386     return FALSE;
4387 
4388   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4389 
4390   /* Fast return for 0 and small values.  We must do this for zero, since
4391      the code below can't handle that one case.  */
4392   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4393     return TRUE;
4394 
4395   /* Get the number of trailing zeros.  */
4396   lowbit = ffs((int) i) - 1;
4397 
4398   /* Only even shifts are allowed in ARM mode so round down to the
4399      nearest even number.  */
4400   if (TARGET_ARM)
4401     lowbit &= ~1;
4402 
4403   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4404     return TRUE;
4405 
4406   if (TARGET_ARM)
4407     {
4408       /* Allow rotated constants in ARM mode.  */
4409       if (lowbit <= 4
4410 	   && ((i & ~0xc000003f) == 0
4411 	       || (i & ~0xf000000f) == 0
4412 	       || (i & ~0xfc000003) == 0))
4413 	return TRUE;
4414     }
4415   else if (TARGET_THUMB2)
4416     {
4417       HOST_WIDE_INT v;
4418 
4419       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4420       v = i & 0xff;
4421       v |= v << 16;
4422       if (i == v || i == (v | (v << 8)))
4423 	return TRUE;
4424 
4425       /* Allow repeated pattern 0xXY00XY00.  */
4426       v = i & 0xff00;
4427       v |= v << 16;
4428       if (i == v)
4429 	return TRUE;
4430     }
4431   else if (TARGET_HAVE_MOVT)
4432     {
4433       /* Thumb-1 Targets with MOVT.  */
4434       if (i > 0xffff)
4435 	return FALSE;
4436       else
4437 	return TRUE;
4438     }
4439 
4440   return FALSE;
4441 }
4442 
4443 /* Return true if I is a valid constant for the operation CODE.  */
4444 int
const_ok_for_op(HOST_WIDE_INT i,enum rtx_code code)4445 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4446 {
4447   if (const_ok_for_arm (i))
4448     return 1;
4449 
4450   switch (code)
4451     {
4452     case SET:
4453       /* See if we can use movw.  */
4454       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4455 	return 1;
4456       else
4457 	/* Otherwise, try mvn.  */
4458 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4459 
4460     case PLUS:
4461       /* See if we can use addw or subw.  */
4462       if (TARGET_THUMB2
4463 	  && ((i & 0xfffff000) == 0
4464 	      || ((-i) & 0xfffff000) == 0))
4465 	return 1;
4466       /* Fall through.  */
4467     case COMPARE:
4468     case EQ:
4469     case NE:
4470     case GT:
4471     case LE:
4472     case LT:
4473     case GE:
4474     case GEU:
4475     case LTU:
4476     case GTU:
4477     case LEU:
4478     case UNORDERED:
4479     case ORDERED:
4480     case UNEQ:
4481     case UNGE:
4482     case UNLT:
4483     case UNGT:
4484     case UNLE:
4485       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4486 
4487     case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
4488     case XOR:
4489       return 0;
4490 
4491     case IOR:
4492       if (TARGET_THUMB2)
4493 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4494       return 0;
4495 
4496     case AND:
4497       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4498 
4499     default:
4500       gcc_unreachable ();
4501     }
4502 }
4503 
4504 /* Return true if I is a valid di mode constant for the operation CODE.  */
4505 int
const_ok_for_dimode_op(HOST_WIDE_INT i,enum rtx_code code)4506 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4507 {
4508   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4509   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4510   rtx hi = GEN_INT (hi_val);
4511   rtx lo = GEN_INT (lo_val);
4512 
4513   if (TARGET_THUMB1)
4514     return 0;
4515 
4516   switch (code)
4517     {
4518     case AND:
4519     case IOR:
4520     case XOR:
4521       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4522 	     || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4523     case PLUS:
4524       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4525 
4526     default:
4527       return 0;
4528     }
4529 }
4530 
4531 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
4532    Avoid generating useless code when one of the bytes is zero.  */
4533 void
thumb1_gen_const_int(rtx op0,HOST_WIDE_INT op1)4534 thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1)
4535 {
4536   bool mov_done_p = false;
4537   int i;
4538 
4539   /* Emit upper 3 bytes if needed.  */
4540   for (i = 0; i < 3; i++)
4541     {
4542       int byte = (op1 >> (8 * (3 - i))) & 0xff;
4543 
4544       if (byte)
4545 	{
4546 	  emit_set_insn (op0, mov_done_p
4547 			 ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte))
4548 			 : GEN_INT (byte));
4549 	  mov_done_p = true;
4550 	}
4551 
4552       if (mov_done_p)
4553 	emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8)));
4554     }
4555 
4556   /* Emit lower byte if needed.  */
4557   if (!mov_done_p)
4558     emit_set_insn (op0, GEN_INT (op1 & 0xff));
4559   else if (op1 & 0xff)
4560     emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff)));
4561 }
4562 
4563 /* Emit a sequence of insns to handle a large constant.
4564    CODE is the code of the operation required, it can be any of SET, PLUS,
4565    IOR, AND, XOR, MINUS;
4566    MODE is the mode in which the operation is being performed;
4567    VAL is the integer to operate on;
4568    SOURCE is the other operand (a register, or a null-pointer for SET);
4569    SUBTARGETS means it is safe to create scratch registers if that will
4570    either produce a simpler sequence, or we will want to cse the values.
4571    Return value is the number of insns emitted.  */
4572 
4573 /* ??? Tweak this for thumb2.  */
4574 int
arm_split_constant(enum rtx_code code,machine_mode mode,rtx insn,HOST_WIDE_INT val,rtx target,rtx source,int subtargets)4575 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4576 		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4577 {
4578   rtx cond;
4579 
4580   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4581     cond = COND_EXEC_TEST (PATTERN (insn));
4582   else
4583     cond = NULL_RTX;
4584 
4585   if (subtargets || code == SET
4586       || (REG_P (target) && REG_P (source)
4587 	  && REGNO (target) != REGNO (source)))
4588     {
4589       /* After arm_reorg has been called, we can't fix up expensive
4590 	 constants by pushing them into memory so we must synthesize
4591 	 them in-line, regardless of the cost.  This is only likely to
4592 	 be more costly on chips that have load delay slots and we are
4593 	 compiling without running the scheduler (so no splitting
4594 	 occurred before the final instruction emission).
4595 
4596 	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4597       */
4598       if (!cfun->machine->after_arm_reorg
4599 	  && !cond
4600 	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4601 				1, 0)
4602 	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
4603 		 + (code != SET))))
4604 	{
4605 	  if (code == SET)
4606 	    {
4607 	      /* Currently SET is the only monadic value for CODE, all
4608 		 the rest are diadic.  */
4609 	      if (TARGET_USE_MOVT)
4610 		arm_emit_movpair (target, GEN_INT (val));
4611 	      else
4612 		emit_set_insn (target, GEN_INT (val));
4613 
4614 	      return 1;
4615 	    }
4616 	  else
4617 	    {
4618 	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4619 
4620 	      if (TARGET_USE_MOVT)
4621 		arm_emit_movpair (temp, GEN_INT (val));
4622 	      else
4623 		emit_set_insn (temp, GEN_INT (val));
4624 
4625 	      /* For MINUS, the value is subtracted from, since we never
4626 		 have subtraction of a constant.  */
4627 	      if (code == MINUS)
4628 		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4629 	      else
4630 		emit_set_insn (target,
4631 			       gen_rtx_fmt_ee (code, mode, source, temp));
4632 	      return 2;
4633 	    }
4634 	}
4635     }
4636 
4637   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4638 			   1);
4639 }
4640 
4641 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4642    ARM/THUMB2 immediates, and add up to VAL.
4643    Thr function return value gives the number of insns required.  */
4644 static int
optimal_immediate_sequence(enum rtx_code code,unsigned HOST_WIDE_INT val,struct four_ints * return_sequence)4645 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4646 			    struct four_ints *return_sequence)
4647 {
4648   int best_consecutive_zeros = 0;
4649   int i;
4650   int best_start = 0;
4651   int insns1, insns2;
4652   struct four_ints tmp_sequence;
4653 
4654   /* If we aren't targeting ARM, the best place to start is always at
4655      the bottom, otherwise look more closely.  */
4656   if (TARGET_ARM)
4657     {
4658       for (i = 0; i < 32; i += 2)
4659 	{
4660 	  int consecutive_zeros = 0;
4661 
4662 	  if (!(val & (3 << i)))
4663 	    {
4664 	      while ((i < 32) && !(val & (3 << i)))
4665 		{
4666 		  consecutive_zeros += 2;
4667 		  i += 2;
4668 		}
4669 	      if (consecutive_zeros > best_consecutive_zeros)
4670 		{
4671 		  best_consecutive_zeros = consecutive_zeros;
4672 		  best_start = i - consecutive_zeros;
4673 		}
4674 	      i -= 2;
4675 	    }
4676 	}
4677     }
4678 
4679   /* So long as it won't require any more insns to do so, it's
4680      desirable to emit a small constant (in bits 0...9) in the last
4681      insn.  This way there is more chance that it can be combined with
4682      a later addressing insn to form a pre-indexed load or store
4683      operation.  Consider:
4684 
4685 	   *((volatile int *)0xe0000100) = 1;
4686 	   *((volatile int *)0xe0000110) = 2;
4687 
4688      We want this to wind up as:
4689 
4690 	    mov rA, #0xe0000000
4691 	    mov rB, #1
4692 	    str rB, [rA, #0x100]
4693 	    mov rB, #2
4694 	    str rB, [rA, #0x110]
4695 
4696      rather than having to synthesize both large constants from scratch.
4697 
4698      Therefore, we calculate how many insns would be required to emit
4699      the constant starting from `best_start', and also starting from
4700      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4701      yield a shorter sequence, we may as well use zero.  */
4702   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4703   if (best_start != 0
4704       && ((HOST_WIDE_INT_1U << best_start) < val))
4705     {
4706       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4707       if (insns2 <= insns1)
4708 	{
4709 	  *return_sequence = tmp_sequence;
4710 	  insns1 = insns2;
4711 	}
4712     }
4713 
4714   return insns1;
4715 }
4716 
4717 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4718 static int
optimal_immediate_sequence_1(enum rtx_code code,unsigned HOST_WIDE_INT val,struct four_ints * return_sequence,int i)4719 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4720 			     struct four_ints *return_sequence, int i)
4721 {
4722   int remainder = val & 0xffffffff;
4723   int insns = 0;
4724 
4725   /* Try and find a way of doing the job in either two or three
4726      instructions.
4727 
4728      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4729      location.  We start at position I.  This may be the MSB, or
4730      optimial_immediate_sequence may have positioned it at the largest block
4731      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4732      wrapping around to the top of the word when we drop off the bottom.
4733      In the worst case this code should produce no more than four insns.
4734 
4735      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4736      constants, shifted to any arbitrary location.  We should always start
4737      at the MSB.  */
4738   do
4739     {
4740       int end;
4741       unsigned int b1, b2, b3, b4;
4742       unsigned HOST_WIDE_INT result;
4743       int loc;
4744 
4745       gcc_assert (insns < 4);
4746 
4747       if (i <= 0)
4748 	i += 32;
4749 
4750       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4751       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4752 	{
4753 	  loc = i;
4754 	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4755 	    /* We can use addw/subw for the last 12 bits.  */
4756 	    result = remainder;
4757 	  else
4758 	    {
4759 	      /* Use an 8-bit shifted/rotated immediate.  */
4760 	      end = i - 8;
4761 	      if (end < 0)
4762 		end += 32;
4763 	      result = remainder & ((0x0ff << end)
4764 				   | ((i < end) ? (0xff >> (32 - end))
4765 						: 0));
4766 	      i -= 8;
4767 	    }
4768 	}
4769       else
4770 	{
4771 	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
4772 	     arbitrary shifts.  */
4773 	  i -= TARGET_ARM ? 2 : 1;
4774 	  continue;
4775 	}
4776 
4777       /* Next, see if we can do a better job with a thumb2 replicated
4778 	 constant.
4779 
4780          We do it this way around to catch the cases like 0x01F001E0 where
4781 	 two 8-bit immediates would work, but a replicated constant would
4782 	 make it worse.
4783 
4784          TODO: 16-bit constants that don't clear all the bits, but still win.
4785          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4786       if (TARGET_THUMB2)
4787 	{
4788 	  b1 = (remainder & 0xff000000) >> 24;
4789 	  b2 = (remainder & 0x00ff0000) >> 16;
4790 	  b3 = (remainder & 0x0000ff00) >> 8;
4791 	  b4 = remainder & 0xff;
4792 
4793 	  if (loc > 24)
4794 	    {
4795 	      /* The 8-bit immediate already found clears b1 (and maybe b2),
4796 		 but must leave b3 and b4 alone.  */
4797 
4798 	      /* First try to find a 32-bit replicated constant that clears
4799 		 almost everything.  We can assume that we can't do it in one,
4800 		 or else we wouldn't be here.  */
4801 	      unsigned int tmp = b1 & b2 & b3 & b4;
4802 	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4803 				  + (tmp << 24);
4804 	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4805 					    + (tmp == b3) + (tmp == b4);
4806 	      if (tmp
4807 		  && (matching_bytes >= 3
4808 		      || (matching_bytes == 2
4809 			  && const_ok_for_op (remainder & ~tmp2, code))))
4810 		{
4811 		  /* At least 3 of the bytes match, and the fourth has at
4812 		     least as many bits set, or two of the bytes match
4813 		     and it will only require one more insn to finish.  */
4814 		  result = tmp2;
4815 		  i = tmp != b1 ? 32
4816 		      : tmp != b2 ? 24
4817 		      : tmp != b3 ? 16
4818 		      : 8;
4819 		}
4820 
4821 	      /* Second, try to find a 16-bit replicated constant that can
4822 		 leave three of the bytes clear.  If b2 or b4 is already
4823 		 zero, then we can.  If the 8-bit from above would not
4824 		 clear b2 anyway, then we still win.  */
4825 	      else if (b1 == b3 && (!b2 || !b4
4826 			       || (remainder & 0x00ff0000 & ~result)))
4827 		{
4828 		  result = remainder & 0xff00ff00;
4829 		  i = 24;
4830 		}
4831 	    }
4832 	  else if (loc > 16)
4833 	    {
4834 	      /* The 8-bit immediate already found clears b2 (and maybe b3)
4835 		 and we don't get here unless b1 is alredy clear, but it will
4836 		 leave b4 unchanged.  */
4837 
4838 	      /* If we can clear b2 and b4 at once, then we win, since the
4839 		 8-bits couldn't possibly reach that far.  */
4840 	      if (b2 == b4)
4841 		{
4842 		  result = remainder & 0x00ff00ff;
4843 		  i = 16;
4844 		}
4845 	    }
4846 	}
4847 
4848       return_sequence->i[insns++] = result;
4849       remainder &= ~result;
4850 
4851       if (code == SET || code == MINUS)
4852 	code = PLUS;
4853     }
4854   while (remainder);
4855 
4856   return insns;
4857 }
4858 
4859 /* Emit an instruction with the indicated PATTERN.  If COND is
4860    non-NULL, conditionalize the execution of the instruction on COND
4861    being true.  */
4862 
4863 static void
emit_constant_insn(rtx cond,rtx pattern)4864 emit_constant_insn (rtx cond, rtx pattern)
4865 {
4866   if (cond)
4867     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4868   emit_insn (pattern);
4869 }
4870 
4871 /* As above, but extra parameter GENERATE which, if clear, suppresses
4872    RTL generation.  */
4873 
4874 static int
arm_gen_constant(enum rtx_code code,machine_mode mode,rtx cond,unsigned HOST_WIDE_INT val,rtx target,rtx source,int subtargets,int generate)4875 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4876 		  unsigned HOST_WIDE_INT val, rtx target, rtx source,
4877 		  int subtargets, int generate)
4878 {
4879   int can_invert = 0;
4880   int can_negate = 0;
4881   int final_invert = 0;
4882   int i;
4883   int set_sign_bit_copies = 0;
4884   int clear_sign_bit_copies = 0;
4885   int clear_zero_bit_copies = 0;
4886   int set_zero_bit_copies = 0;
4887   int insns = 0, neg_insns, inv_insns;
4888   unsigned HOST_WIDE_INT temp1, temp2;
4889   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4890   struct four_ints *immediates;
4891   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4892 
4893   /* Find out which operations are safe for a given CODE.  Also do a quick
4894      check for degenerate cases; these can occur when DImode operations
4895      are split.  */
4896   switch (code)
4897     {
4898     case SET:
4899       can_invert = 1;
4900       break;
4901 
4902     case PLUS:
4903       can_negate = 1;
4904       break;
4905 
4906     case IOR:
4907       if (remainder == 0xffffffff)
4908 	{
4909 	  if (generate)
4910 	    emit_constant_insn (cond,
4911 				gen_rtx_SET (target,
4912 					     GEN_INT (ARM_SIGN_EXTEND (val))));
4913 	  return 1;
4914 	}
4915 
4916       if (remainder == 0)
4917 	{
4918 	  if (reload_completed && rtx_equal_p (target, source))
4919 	    return 0;
4920 
4921 	  if (generate)
4922 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4923 	  return 1;
4924 	}
4925       break;
4926 
4927     case AND:
4928       if (remainder == 0)
4929 	{
4930 	  if (generate)
4931 	    emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4932 	  return 1;
4933 	}
4934       if (remainder == 0xffffffff)
4935 	{
4936 	  if (reload_completed && rtx_equal_p (target, source))
4937 	    return 0;
4938 	  if (generate)
4939 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4940 	  return 1;
4941 	}
4942       can_invert = 1;
4943       break;
4944 
4945     case XOR:
4946       if (remainder == 0)
4947 	{
4948 	  if (reload_completed && rtx_equal_p (target, source))
4949 	    return 0;
4950 	  if (generate)
4951 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4952 	  return 1;
4953 	}
4954 
4955       if (remainder == 0xffffffff)
4956 	{
4957 	  if (generate)
4958 	    emit_constant_insn (cond,
4959 				gen_rtx_SET (target,
4960 					     gen_rtx_NOT (mode, source)));
4961 	  return 1;
4962 	}
4963       final_invert = 1;
4964       break;
4965 
4966     case MINUS:
4967       /* We treat MINUS as (val - source), since (source - val) is always
4968 	 passed as (source + (-val)).  */
4969       if (remainder == 0)
4970 	{
4971 	  if (generate)
4972 	    emit_constant_insn (cond,
4973 				gen_rtx_SET (target,
4974 					     gen_rtx_NEG (mode, source)));
4975 	  return 1;
4976 	}
4977       if (const_ok_for_arm (val))
4978 	{
4979 	  if (generate)
4980 	    emit_constant_insn (cond,
4981 				gen_rtx_SET (target,
4982 					     gen_rtx_MINUS (mode, GEN_INT (val),
4983 							    source)));
4984 	  return 1;
4985 	}
4986 
4987       break;
4988 
4989     default:
4990       gcc_unreachable ();
4991     }
4992 
4993   /* If we can do it in one insn get out quickly.  */
4994   if (const_ok_for_op (val, code))
4995     {
4996       if (generate)
4997 	emit_constant_insn (cond,
4998 			    gen_rtx_SET (target,
4999 					 (source
5000 					  ? gen_rtx_fmt_ee (code, mode, source,
5001 							    GEN_INT (val))
5002 					  : GEN_INT (val))));
5003       return 1;
5004     }
5005 
5006   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5007      insn.  */
5008   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5009       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5010     {
5011       if (generate)
5012 	{
5013 	  if (mode == SImode && i == 16)
5014 	    /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5015 	       smaller insn.  */
5016 	    emit_constant_insn (cond,
5017 				gen_zero_extendhisi2
5018 				(target, gen_lowpart (HImode, source)));
5019 	  else
5020 	    /* Extz only supports SImode, but we can coerce the operands
5021 	       into that mode.  */
5022 	    emit_constant_insn (cond,
5023 				gen_extzv_t2 (gen_lowpart (SImode, target),
5024 					      gen_lowpart (SImode, source),
5025 					      GEN_INT (i), const0_rtx));
5026 	}
5027 
5028       return 1;
5029     }
5030 
5031   /* Calculate a few attributes that may be useful for specific
5032      optimizations.  */
5033   /* Count number of leading zeros.  */
5034   for (i = 31; i >= 0; i--)
5035     {
5036       if ((remainder & (1 << i)) == 0)
5037 	clear_sign_bit_copies++;
5038       else
5039 	break;
5040     }
5041 
5042   /* Count number of leading 1's.  */
5043   for (i = 31; i >= 0; i--)
5044     {
5045       if ((remainder & (1 << i)) != 0)
5046 	set_sign_bit_copies++;
5047       else
5048 	break;
5049     }
5050 
5051   /* Count number of trailing zero's.  */
5052   for (i = 0; i <= 31; i++)
5053     {
5054       if ((remainder & (1 << i)) == 0)
5055 	clear_zero_bit_copies++;
5056       else
5057 	break;
5058     }
5059 
5060   /* Count number of trailing 1's.  */
5061   for (i = 0; i <= 31; i++)
5062     {
5063       if ((remainder & (1 << i)) != 0)
5064 	set_zero_bit_copies++;
5065       else
5066 	break;
5067     }
5068 
5069   switch (code)
5070     {
5071     case SET:
5072       /* See if we can do this by sign_extending a constant that is known
5073 	 to be negative.  This is a good, way of doing it, since the shift
5074 	 may well merge into a subsequent insn.  */
5075       if (set_sign_bit_copies > 1)
5076 	{
5077 	  if (const_ok_for_arm
5078 	      (temp1 = ARM_SIGN_EXTEND (remainder
5079 					<< (set_sign_bit_copies - 1))))
5080 	    {
5081 	      if (generate)
5082 		{
5083 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5084 		  emit_constant_insn (cond,
5085 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
5086 		  emit_constant_insn (cond,
5087 				      gen_ashrsi3 (target, new_src,
5088 						   GEN_INT (set_sign_bit_copies - 1)));
5089 		}
5090 	      return 2;
5091 	    }
5092 	  /* For an inverted constant, we will need to set the low bits,
5093 	     these will be shifted out of harm's way.  */
5094 	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5095 	  if (const_ok_for_arm (~temp1))
5096 	    {
5097 	      if (generate)
5098 		{
5099 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5100 		  emit_constant_insn (cond,
5101 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
5102 		  emit_constant_insn (cond,
5103 				      gen_ashrsi3 (target, new_src,
5104 						   GEN_INT (set_sign_bit_copies - 1)));
5105 		}
5106 	      return 2;
5107 	    }
5108 	}
5109 
5110       /* See if we can calculate the value as the difference between two
5111 	 valid immediates.  */
5112       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5113 	{
5114 	  int topshift = clear_sign_bit_copies & ~1;
5115 
5116 	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5117 				   & (0xff000000 >> topshift));
5118 
5119 	  /* If temp1 is zero, then that means the 9 most significant
5120 	     bits of remainder were 1 and we've caused it to overflow.
5121 	     When topshift is 0 we don't need to do anything since we
5122 	     can borrow from 'bit 32'.  */
5123 	  if (temp1 == 0 && topshift != 0)
5124 	    temp1 = 0x80000000 >> (topshift - 1);
5125 
5126 	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5127 
5128 	  if (const_ok_for_arm (temp2))
5129 	    {
5130 	      if (generate)
5131 		{
5132 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5133 		  emit_constant_insn (cond,
5134 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
5135 		  emit_constant_insn (cond,
5136 				      gen_addsi3 (target, new_src,
5137 						  GEN_INT (-temp2)));
5138 		}
5139 
5140 	      return 2;
5141 	    }
5142 	}
5143 
5144       /* See if we can generate this by setting the bottom (or the top)
5145 	 16 bits, and then shifting these into the other half of the
5146 	 word.  We only look for the simplest cases, to do more would cost
5147 	 too much.  Be careful, however, not to generate this when the
5148 	 alternative would take fewer insns.  */
5149       if (val & 0xffff0000)
5150 	{
5151 	  temp1 = remainder & 0xffff0000;
5152 	  temp2 = remainder & 0x0000ffff;
5153 
5154 	  /* Overlaps outside this range are best done using other methods.  */
5155 	  for (i = 9; i < 24; i++)
5156 	    {
5157 	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5158 		  && !const_ok_for_arm (temp2))
5159 		{
5160 		  rtx new_src = (subtargets
5161 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5162 				 : target);
5163 		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5164 					    source, subtargets, generate);
5165 		  source = new_src;
5166 		  if (generate)
5167 		    emit_constant_insn
5168 		      (cond,
5169 		       gen_rtx_SET
5170 		       (target,
5171 			gen_rtx_IOR (mode,
5172 				     gen_rtx_ASHIFT (mode, source,
5173 						     GEN_INT (i)),
5174 				     source)));
5175 		  return insns + 1;
5176 		}
5177 	    }
5178 
5179 	  /* Don't duplicate cases already considered.  */
5180 	  for (i = 17; i < 24; i++)
5181 	    {
5182 	      if (((temp1 | (temp1 >> i)) == remainder)
5183 		  && !const_ok_for_arm (temp1))
5184 		{
5185 		  rtx new_src = (subtargets
5186 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5187 				 : target);
5188 		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5189 					    source, subtargets, generate);
5190 		  source = new_src;
5191 		  if (generate)
5192 		    emit_constant_insn
5193 		      (cond,
5194 		       gen_rtx_SET (target,
5195 				    gen_rtx_IOR
5196 				    (mode,
5197 				     gen_rtx_LSHIFTRT (mode, source,
5198 						       GEN_INT (i)),
5199 				     source)));
5200 		  return insns + 1;
5201 		}
5202 	    }
5203 	}
5204       break;
5205 
5206     case IOR:
5207     case XOR:
5208       /* If we have IOR or XOR, and the constant can be loaded in a
5209 	 single instruction, and we can find a temporary to put it in,
5210 	 then this can be done in two instructions instead of 3-4.  */
5211       if (subtargets
5212 	  /* TARGET can't be NULL if SUBTARGETS is 0 */
5213 	  || (reload_completed && !reg_mentioned_p (target, source)))
5214 	{
5215 	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5216 	    {
5217 	      if (generate)
5218 		{
5219 		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5220 
5221 		  emit_constant_insn (cond,
5222 				      gen_rtx_SET (sub, GEN_INT (val)));
5223 		  emit_constant_insn (cond,
5224 				      gen_rtx_SET (target,
5225 						   gen_rtx_fmt_ee (code, mode,
5226 								   source, sub)));
5227 		}
5228 	      return 2;
5229 	    }
5230 	}
5231 
5232       if (code == XOR)
5233 	break;
5234 
5235       /*  Convert.
5236 	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5237 	                     and the remainder 0s for e.g. 0xfff00000)
5238 	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5239 
5240 	  This can be done in 2 instructions by using shifts with mov or mvn.
5241 	  e.g. for
5242 	  x = x | 0xfff00000;
5243 	  we generate.
5244 	  mvn	r0, r0, asl #12
5245 	  mvn	r0, r0, lsr #12  */
5246       if (set_sign_bit_copies > 8
5247 	  && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5248 	{
5249 	  if (generate)
5250 	    {
5251 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5252 	      rtx shift = GEN_INT (set_sign_bit_copies);
5253 
5254 	      emit_constant_insn
5255 		(cond,
5256 		 gen_rtx_SET (sub,
5257 			      gen_rtx_NOT (mode,
5258 					   gen_rtx_ASHIFT (mode,
5259 							   source,
5260 							   shift))));
5261 	      emit_constant_insn
5262 		(cond,
5263 		 gen_rtx_SET (target,
5264 			      gen_rtx_NOT (mode,
5265 					   gen_rtx_LSHIFTRT (mode, sub,
5266 							     shift))));
5267 	    }
5268 	  return 2;
5269 	}
5270 
5271       /* Convert
5272 	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
5273 	   to
5274 	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5275 
5276 	  For eg. r0 = r0 | 0xfff
5277 	       mvn	r0, r0, lsr #12
5278 	       mvn	r0, r0, asl #12
5279 
5280       */
5281       if (set_zero_bit_copies > 8
5282 	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5283 	{
5284 	  if (generate)
5285 	    {
5286 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5287 	      rtx shift = GEN_INT (set_zero_bit_copies);
5288 
5289 	      emit_constant_insn
5290 		(cond,
5291 		 gen_rtx_SET (sub,
5292 			      gen_rtx_NOT (mode,
5293 					   gen_rtx_LSHIFTRT (mode,
5294 							     source,
5295 							     shift))));
5296 	      emit_constant_insn
5297 		(cond,
5298 		 gen_rtx_SET (target,
5299 			      gen_rtx_NOT (mode,
5300 					   gen_rtx_ASHIFT (mode, sub,
5301 							   shift))));
5302 	    }
5303 	  return 2;
5304 	}
5305 
5306       /* This will never be reached for Thumb2 because orn is a valid
5307 	 instruction. This is for Thumb1 and the ARM 32 bit cases.
5308 
5309 	 x = y | constant (such that ~constant is a valid constant)
5310 	 Transform this to
5311 	 x = ~(~y & ~constant).
5312       */
5313       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5314 	{
5315 	  if (generate)
5316 	    {
5317 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5318 	      emit_constant_insn (cond,
5319 				  gen_rtx_SET (sub,
5320 					       gen_rtx_NOT (mode, source)));
5321 	      source = sub;
5322 	      if (subtargets)
5323 		sub = gen_reg_rtx (mode);
5324 	      emit_constant_insn (cond,
5325 				  gen_rtx_SET (sub,
5326 					       gen_rtx_AND (mode, source,
5327 							    GEN_INT (temp1))));
5328 	      emit_constant_insn (cond,
5329 				  gen_rtx_SET (target,
5330 					       gen_rtx_NOT (mode, sub)));
5331 	    }
5332 	  return 3;
5333 	}
5334       break;
5335 
5336     case AND:
5337       /* See if two shifts will do 2 or more insn's worth of work.  */
5338       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5339 	{
5340 	  HOST_WIDE_INT shift_mask = ((0xffffffff
5341 				       << (32 - clear_sign_bit_copies))
5342 				      & 0xffffffff);
5343 
5344 	  if ((remainder | shift_mask) != 0xffffffff)
5345 	    {
5346 	      HOST_WIDE_INT new_val
5347 	        = ARM_SIGN_EXTEND (remainder | shift_mask);
5348 
5349 	      if (generate)
5350 		{
5351 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5352 		  insns = arm_gen_constant (AND, SImode, cond, new_val,
5353 					    new_src, source, subtargets, 1);
5354 		  source = new_src;
5355 		}
5356 	      else
5357 		{
5358 		  rtx targ = subtargets ? NULL_RTX : target;
5359 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5360 					    targ, source, subtargets, 0);
5361 		}
5362 	    }
5363 
5364 	  if (generate)
5365 	    {
5366 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5367 	      rtx shift = GEN_INT (clear_sign_bit_copies);
5368 
5369 	      emit_insn (gen_ashlsi3 (new_src, source, shift));
5370 	      emit_insn (gen_lshrsi3 (target, new_src, shift));
5371 	    }
5372 
5373 	  return insns + 2;
5374 	}
5375 
5376       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5377 	{
5378 	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5379 
5380 	  if ((remainder | shift_mask) != 0xffffffff)
5381 	    {
5382 	      HOST_WIDE_INT new_val
5383 	        = ARM_SIGN_EXTEND (remainder | shift_mask);
5384 	      if (generate)
5385 		{
5386 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5387 
5388 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5389 					    new_src, source, subtargets, 1);
5390 		  source = new_src;
5391 		}
5392 	      else
5393 		{
5394 		  rtx targ = subtargets ? NULL_RTX : target;
5395 
5396 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5397 					    targ, source, subtargets, 0);
5398 		}
5399 	    }
5400 
5401 	  if (generate)
5402 	    {
5403 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5404 	      rtx shift = GEN_INT (clear_zero_bit_copies);
5405 
5406 	      emit_insn (gen_lshrsi3 (new_src, source, shift));
5407 	      emit_insn (gen_ashlsi3 (target, new_src, shift));
5408 	    }
5409 
5410 	  return insns + 2;
5411 	}
5412 
5413       break;
5414 
5415     default:
5416       break;
5417     }
5418 
5419   /* Calculate what the instruction sequences would be if we generated it
5420      normally, negated, or inverted.  */
5421   if (code == AND)
5422     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5423     insns = 99;
5424   else
5425     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5426 
5427   if (can_negate)
5428     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5429 					    &neg_immediates);
5430   else
5431     neg_insns = 99;
5432 
5433   if (can_invert || final_invert)
5434     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5435 					    &inv_immediates);
5436   else
5437     inv_insns = 99;
5438 
5439   immediates = &pos_immediates;
5440 
5441   /* Is the negated immediate sequence more efficient?  */
5442   if (neg_insns < insns && neg_insns <= inv_insns)
5443     {
5444       insns = neg_insns;
5445       immediates = &neg_immediates;
5446     }
5447   else
5448     can_negate = 0;
5449 
5450   /* Is the inverted immediate sequence more efficient?
5451      We must allow for an extra NOT instruction for XOR operations, although
5452      there is some chance that the final 'mvn' will get optimized later.  */
5453   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5454     {
5455       insns = inv_insns;
5456       immediates = &inv_immediates;
5457     }
5458   else
5459     {
5460       can_invert = 0;
5461       final_invert = 0;
5462     }
5463 
5464   /* Now output the chosen sequence as instructions.  */
5465   if (generate)
5466     {
5467       for (i = 0; i < insns; i++)
5468 	{
5469 	  rtx new_src, temp1_rtx;
5470 
5471 	  temp1 = immediates->i[i];
5472 
5473 	  if (code == SET || code == MINUS)
5474 	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
5475 	  else if ((final_invert || i < (insns - 1)) && subtargets)
5476 	    new_src = gen_reg_rtx (mode);
5477 	  else
5478 	    new_src = target;
5479 
5480 	  if (can_invert)
5481 	    temp1 = ~temp1;
5482 	  else if (can_negate)
5483 	    temp1 = -temp1;
5484 
5485 	  temp1 = trunc_int_for_mode (temp1, mode);
5486 	  temp1_rtx = GEN_INT (temp1);
5487 
5488 	  if (code == SET)
5489 	    ;
5490 	  else if (code == MINUS)
5491 	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5492 	  else
5493 	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5494 
5495 	  emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5496 	  source = new_src;
5497 
5498 	  if (code == SET)
5499 	    {
5500 	      can_negate = can_invert;
5501 	      can_invert = 0;
5502 	      code = PLUS;
5503 	    }
5504 	  else if (code == MINUS)
5505 	    code = PLUS;
5506 	}
5507     }
5508 
5509   if (final_invert)
5510     {
5511       if (generate)
5512 	emit_constant_insn (cond, gen_rtx_SET (target,
5513 					       gen_rtx_NOT (mode, source)));
5514       insns++;
5515     }
5516 
5517   return insns;
5518 }
5519 
5520 /* Return TRUE if op is a constant where both the low and top words are
5521    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5522    we do not have RSC in that case.  */
5523 static bool
arm_const_double_prefer_rsbs_rsc(rtx op)5524 arm_const_double_prefer_rsbs_rsc (rtx op)
5525 {
5526   /* Thumb lacks RSC, so we never prefer that sequence.  */
5527   if (TARGET_THUMB || !CONST_INT_P (op))
5528     return false;
5529   HOST_WIDE_INT hi, lo;
5530   lo = UINTVAL (op) & 0xffffffffULL;
5531   hi = UINTVAL (op) >> 32;
5532   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5533 }
5534 
5535 /* Canonicalize a comparison so that we are more likely to recognize it.
5536    This can be done for a few constant compares, where we can make the
5537    immediate value easier to load.  */
5538 
5539 static void
arm_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)5540 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5541 			     bool op0_preserve_value)
5542 {
5543   machine_mode mode;
5544   unsigned HOST_WIDE_INT i, maxval;
5545 
5546   mode = GET_MODE (*op0);
5547   if (mode == VOIDmode)
5548     mode = GET_MODE (*op1);
5549 
5550   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5551 
5552   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5553      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5554      either reversed or (for constant OP1) adjusted to GE/LT.
5555      Similarly for GTU/LEU in Thumb mode.  */
5556   if (mode == DImode)
5557     {
5558 
5559       if (*code == GT || *code == LE
5560 	  || *code == GTU || *code == LEU)
5561 	{
5562 	  /* Missing comparison.  First try to use an available
5563 	     comparison.  */
5564 	  if (CONST_INT_P (*op1))
5565 	    {
5566 	      i = INTVAL (*op1);
5567 	      switch (*code)
5568 		{
5569 		case GT:
5570 		case LE:
5571 		  if (i != maxval)
5572 		    {
5573 		      /* Try to convert to GE/LT, unless that would be more
5574 			 expensive.  */
5575 		      if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5576 			  && arm_const_double_prefer_rsbs_rsc (*op1))
5577 			return;
5578 		      *op1 = GEN_INT (i + 1);
5579 		      *code = *code == GT ? GE : LT;
5580 		    }
5581 		  else
5582 		    {
5583 		      /* GT maxval is always false, LE maxval is always true.
5584 			 We can't fold that away here as we must make a
5585 			 comparison, but we can fold them to comparisons
5586 			 with the same result that can be handled:
5587 			   op0 GT maxval -> op0 LT minval
5588 			   op0 LE maxval -> op0 GE minval
5589 			 where minval = (-maxval - 1).  */
5590 		      *op1 = GEN_INT (-maxval - 1);
5591 		      *code = *code == GT ? LT : GE;
5592 		    }
5593 		  return;
5594 
5595 		case GTU:
5596 		case LEU:
5597 		  if (i != ~((unsigned HOST_WIDE_INT) 0))
5598 		    {
5599 		      /* Try to convert to GEU/LTU, unless that would
5600 			 be more expensive.  */
5601 		      if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5602 			  && arm_const_double_prefer_rsbs_rsc (*op1))
5603 			return;
5604 		      *op1 = GEN_INT (i + 1);
5605 		      *code = *code == GTU ? GEU : LTU;
5606 		    }
5607 		  else
5608 		    {
5609 		      /* GTU ~0 is always false, LEU ~0 is always true.
5610 			 We can't fold that away here as we must make a
5611 			 comparison, but we can fold them to comparisons
5612 			 with the same result that can be handled:
5613 			   op0 GTU ~0 -> op0 LTU 0
5614 			   op0 LEU ~0 -> op0 GEU 0.  */
5615 		      *op1 = const0_rtx;
5616 		      *code = *code == GTU ? LTU : GEU;
5617 		    }
5618 		  return;
5619 
5620 		default:
5621 		  gcc_unreachable ();
5622 		}
5623 	    }
5624 
5625 	  if (!op0_preserve_value)
5626 	    {
5627 	      std::swap (*op0, *op1);
5628 	      *code = (int)swap_condition ((enum rtx_code)*code);
5629 	    }
5630 	}
5631       return;
5632     }
5633 
5634   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5635      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5636      to facilitate possible combining with a cmp into 'ands'.  */
5637   if (mode == SImode
5638       && GET_CODE (*op0) == ZERO_EXTEND
5639       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5640       && GET_MODE (XEXP (*op0, 0)) == QImode
5641       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5642       && subreg_lowpart_p (XEXP (*op0, 0))
5643       && *op1 == const0_rtx)
5644     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5645 			GEN_INT (255));
5646 
5647   /* Comparisons smaller than DImode.  Only adjust comparisons against
5648      an out-of-range constant.  */
5649   if (!CONST_INT_P (*op1)
5650       || const_ok_for_arm (INTVAL (*op1))
5651       || const_ok_for_arm (- INTVAL (*op1)))
5652     return;
5653 
5654   i = INTVAL (*op1);
5655 
5656   switch (*code)
5657     {
5658     case EQ:
5659     case NE:
5660       return;
5661 
5662     case GT:
5663     case LE:
5664       if (i != maxval
5665 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5666 	{
5667 	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5668 	  *code = *code == GT ? GE : LT;
5669 	  return;
5670 	}
5671       break;
5672 
5673     case GE:
5674     case LT:
5675       if (i != ~maxval
5676 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5677 	{
5678 	  *op1 = GEN_INT (i - 1);
5679 	  *code = *code == GE ? GT : LE;
5680 	  return;
5681 	}
5682       break;
5683 
5684     case GTU:
5685     case LEU:
5686       if (i != ~((unsigned HOST_WIDE_INT) 0)
5687 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5688 	{
5689 	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5690 	  *code = *code == GTU ? GEU : LTU;
5691 	  return;
5692 	}
5693       break;
5694 
5695     case GEU:
5696     case LTU:
5697       if (i != 0
5698 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5699 	{
5700 	  *op1 = GEN_INT (i - 1);
5701 	  *code = *code == GEU ? GTU : LEU;
5702 	  return;
5703 	}
5704       break;
5705 
5706     default:
5707       gcc_unreachable ();
5708     }
5709 }
5710 
5711 
5712 /* Define how to find the value returned by a function.  */
5713 
5714 static rtx
arm_function_value(const_tree type,const_tree func,bool outgoing ATTRIBUTE_UNUSED)5715 arm_function_value(const_tree type, const_tree func,
5716 		   bool outgoing ATTRIBUTE_UNUSED)
5717 {
5718   machine_mode mode;
5719   int unsignedp ATTRIBUTE_UNUSED;
5720   rtx r ATTRIBUTE_UNUSED;
5721 
5722   mode = TYPE_MODE (type);
5723 
5724   if (TARGET_AAPCS_BASED)
5725     return aapcs_allocate_return_reg (mode, type, func);
5726 
5727   /* Promote integer types.  */
5728   if (INTEGRAL_TYPE_P (type))
5729     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5730 
5731   /* Promotes small structs returned in a register to full-word size
5732      for big-endian AAPCS.  */
5733   if (arm_return_in_msb (type))
5734     {
5735       HOST_WIDE_INT size = int_size_in_bytes (type);
5736       if (size % UNITS_PER_WORD != 0)
5737 	{
5738 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5739 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5740 	}
5741     }
5742 
5743   return arm_libcall_value_1 (mode);
5744 }
5745 
5746 /* libcall hashtable helpers.  */
5747 
5748 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5749 {
5750   static inline hashval_t hash (const rtx_def *);
5751   static inline bool equal (const rtx_def *, const rtx_def *);
5752   static inline void remove (rtx_def *);
5753 };
5754 
5755 inline bool
equal(const rtx_def * p1,const rtx_def * p2)5756 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5757 {
5758   return rtx_equal_p (p1, p2);
5759 }
5760 
5761 inline hashval_t
hash(const rtx_def * p1)5762 libcall_hasher::hash (const rtx_def *p1)
5763 {
5764   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5765 }
5766 
5767 typedef hash_table<libcall_hasher> libcall_table_type;
5768 
5769 static void
add_libcall(libcall_table_type * htab,rtx libcall)5770 add_libcall (libcall_table_type *htab, rtx libcall)
5771 {
5772   *htab->find_slot (libcall, INSERT) = libcall;
5773 }
5774 
5775 static bool
arm_libcall_uses_aapcs_base(const_rtx libcall)5776 arm_libcall_uses_aapcs_base (const_rtx libcall)
5777 {
5778   static bool init_done = false;
5779   static libcall_table_type *libcall_htab = NULL;
5780 
5781   if (!init_done)
5782     {
5783       init_done = true;
5784 
5785       libcall_htab = new libcall_table_type (31);
5786       add_libcall (libcall_htab,
5787 		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5788       add_libcall (libcall_htab,
5789 		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5790       add_libcall (libcall_htab,
5791 		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5792       add_libcall (libcall_htab,
5793 		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5794 
5795       add_libcall (libcall_htab,
5796 		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5797       add_libcall (libcall_htab,
5798 		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5799       add_libcall (libcall_htab,
5800 		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5801       add_libcall (libcall_htab,
5802 		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5803 
5804       add_libcall (libcall_htab,
5805 		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
5806       add_libcall (libcall_htab,
5807 		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5808       add_libcall (libcall_htab,
5809 		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
5810       add_libcall (libcall_htab,
5811 		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
5812       add_libcall (libcall_htab,
5813 		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
5814       add_libcall (libcall_htab,
5815 		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
5816       add_libcall (libcall_htab,
5817 		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
5818       add_libcall (libcall_htab,
5819 		   convert_optab_libfunc (ufix_optab, DImode, SFmode));
5820       add_libcall (libcall_htab,
5821 		   convert_optab_libfunc (sfix_optab, SImode, SFmode));
5822       add_libcall (libcall_htab,
5823 		   convert_optab_libfunc (ufix_optab, SImode, SFmode));
5824 
5825       /* Values from double-precision helper functions are returned in core
5826 	 registers if the selected core only supports single-precision
5827 	 arithmetic, even if we are using the hard-float ABI.  The same is
5828 	 true for single-precision helpers except in case of MVE, because in
5829 	 MVE we will be using the hard-float ABI on a CPU which doesn't support
5830 	 single-precision operations in hardware.  In MVE the following check
5831 	 enables use of emulation for the single-precision arithmetic
5832 	 operations.  */
5833       if (TARGET_HAVE_MVE)
5834 	{
5835 	  add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5836 	  add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5837 	  add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5838 	  add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5839 	  add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5840 	  add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5841 	  add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5842 	  add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5843 	  add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5844 	  add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5845 	  add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5846 	}
5847       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5848       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5849       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5850       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5851       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5852       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5853       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5854       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5855       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5856       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5857       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5858       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5859 							SFmode));
5860       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5861 							DFmode));
5862       add_libcall (libcall_htab,
5863 		   convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5864     }
5865 
5866   return libcall && libcall_htab->find (libcall) != NULL;
5867 }
5868 
5869 static rtx
arm_libcall_value_1(machine_mode mode)5870 arm_libcall_value_1 (machine_mode mode)
5871 {
5872   if (TARGET_AAPCS_BASED)
5873     return aapcs_libcall_value (mode);
5874   else if (TARGET_IWMMXT_ABI
5875 	   && arm_vector_mode_supported_p (mode))
5876     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5877   else
5878     return gen_rtx_REG (mode, ARG_REGISTER (1));
5879 }
5880 
5881 /* Define how to find the value returned by a library function
5882    assuming the value has mode MODE.  */
5883 
5884 static rtx
arm_libcall_value(machine_mode mode,const_rtx libcall)5885 arm_libcall_value (machine_mode mode, const_rtx libcall)
5886 {
5887   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5888       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5889     {
5890       /* The following libcalls return their result in integer registers,
5891 	 even though they return a floating point value.  */
5892       if (arm_libcall_uses_aapcs_base (libcall))
5893 	return gen_rtx_REG (mode, ARG_REGISTER(1));
5894 
5895     }
5896 
5897   return arm_libcall_value_1 (mode);
5898 }
5899 
5900 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5901 
5902 static bool
arm_function_value_regno_p(const unsigned int regno)5903 arm_function_value_regno_p (const unsigned int regno)
5904 {
5905   if (regno == ARG_REGISTER (1)
5906       || (TARGET_32BIT
5907 	  && TARGET_AAPCS_BASED
5908 	  && TARGET_HARD_FLOAT
5909 	  && regno == FIRST_VFP_REGNUM)
5910       || (TARGET_IWMMXT_ABI
5911 	  && regno == FIRST_IWMMXT_REGNUM))
5912     return true;
5913 
5914   return false;
5915 }
5916 
5917 /* Determine the amount of memory needed to store the possible return
5918    registers of an untyped call.  */
5919 int
arm_apply_result_size(void)5920 arm_apply_result_size (void)
5921 {
5922   int size = 16;
5923 
5924   if (TARGET_32BIT)
5925     {
5926       if (TARGET_HARD_FLOAT_ABI)
5927 	size += 32;
5928       if (TARGET_IWMMXT_ABI)
5929 	size += 8;
5930     }
5931 
5932   return size;
5933 }
5934 
5935 /* Decide whether TYPE should be returned in memory (true)
5936    or in a register (false).  FNTYPE is the type of the function making
5937    the call.  */
5938 static bool
arm_return_in_memory(const_tree type,const_tree fntype)5939 arm_return_in_memory (const_tree type, const_tree fntype)
5940 {
5941   HOST_WIDE_INT size;
5942 
5943   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5944 
5945   if (TARGET_AAPCS_BASED)
5946     {
5947       /* Simple, non-aggregate types (ie not including vectors and
5948 	 complex) are always returned in a register (or registers).
5949 	 We don't care about which register here, so we can short-cut
5950 	 some of the detail.  */
5951       if (!AGGREGATE_TYPE_P (type)
5952 	  && TREE_CODE (type) != VECTOR_TYPE
5953 	  && TREE_CODE (type) != COMPLEX_TYPE)
5954 	return false;
5955 
5956       /* Any return value that is no larger than one word can be
5957 	 returned in r0.  */
5958       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5959 	return false;
5960 
5961       /* Check any available co-processors to see if they accept the
5962 	 type as a register candidate (VFP, for example, can return
5963 	 some aggregates in consecutive registers).  These aren't
5964 	 available if the call is variadic.  */
5965       if (aapcs_select_return_coproc (type, fntype) >= 0)
5966 	return false;
5967 
5968       /* Vector values should be returned using ARM registers, not
5969 	 memory (unless they're over 16 bytes, which will break since
5970 	 we only have four call-clobbered registers to play with).  */
5971       if (TREE_CODE (type) == VECTOR_TYPE)
5972 	return (size < 0 || size > (4 * UNITS_PER_WORD));
5973 
5974       /* The rest go in memory.  */
5975       return true;
5976     }
5977 
5978   if (TREE_CODE (type) == VECTOR_TYPE)
5979     return (size < 0 || size > (4 * UNITS_PER_WORD));
5980 
5981   if (!AGGREGATE_TYPE_P (type) &&
5982       (TREE_CODE (type) != VECTOR_TYPE))
5983     /* All simple types are returned in registers.  */
5984     return false;
5985 
5986   if (arm_abi != ARM_ABI_APCS)
5987     {
5988       /* ATPCS and later return aggregate types in memory only if they are
5989 	 larger than a word (or are variable size).  */
5990       return (size < 0 || size > UNITS_PER_WORD);
5991     }
5992 
5993   /* For the arm-wince targets we choose to be compatible with Microsoft's
5994      ARM and Thumb compilers, which always return aggregates in memory.  */
5995 #ifndef ARM_WINCE
5996   /* All structures/unions bigger than one word are returned in memory.
5997      Also catch the case where int_size_in_bytes returns -1.  In this case
5998      the aggregate is either huge or of variable size, and in either case
5999      we will want to return it via memory and not in a register.  */
6000   if (size < 0 || size > UNITS_PER_WORD)
6001     return true;
6002 
6003   if (TREE_CODE (type) == RECORD_TYPE)
6004     {
6005       tree field;
6006 
6007       /* For a struct the APCS says that we only return in a register
6008 	 if the type is 'integer like' and every addressable element
6009 	 has an offset of zero.  For practical purposes this means
6010 	 that the structure can have at most one non bit-field element
6011 	 and that this element must be the first one in the structure.  */
6012 
6013       /* Find the first field, ignoring non FIELD_DECL things which will
6014 	 have been created by C++.  */
6015       /* NOTE: This code is deprecated and has not been updated to handle
6016 	 DECL_FIELD_ABI_IGNORED.  */
6017       for (field = TYPE_FIELDS (type);
6018 	   field && TREE_CODE (field) != FIELD_DECL;
6019 	   field = DECL_CHAIN (field))
6020 	continue;
6021 
6022       if (field == NULL)
6023 	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6024 
6025       /* Check that the first field is valid for returning in a register.  */
6026 
6027       /* ... Floats are not allowed */
6028       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6029 	return true;
6030 
6031       /* ... Aggregates that are not themselves valid for returning in
6032 	 a register are not allowed.  */
6033       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6034 	return true;
6035 
6036       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6037 	 since they are not addressable.  */
6038       for (field = DECL_CHAIN (field);
6039 	   field;
6040 	   field = DECL_CHAIN (field))
6041 	{
6042 	  if (TREE_CODE (field) != FIELD_DECL)
6043 	    continue;
6044 
6045 	  if (!DECL_BIT_FIELD_TYPE (field))
6046 	    return true;
6047 	}
6048 
6049       return false;
6050     }
6051 
6052   if (TREE_CODE (type) == UNION_TYPE)
6053     {
6054       tree field;
6055 
6056       /* Unions can be returned in registers if every element is
6057 	 integral, or can be returned in an integer register.  */
6058       for (field = TYPE_FIELDS (type);
6059 	   field;
6060 	   field = DECL_CHAIN (field))
6061 	{
6062 	  if (TREE_CODE (field) != FIELD_DECL)
6063 	    continue;
6064 
6065 	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
6066 	    return true;
6067 
6068 	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6069 	    return true;
6070 	}
6071 
6072       return false;
6073     }
6074 #endif /* not ARM_WINCE */
6075 
6076   /* Return all other types in memory.  */
6077   return true;
6078 }
6079 
6080 const struct pcs_attribute_arg
6081 {
6082   const char *arg;
6083   enum arm_pcs value;
6084 } pcs_attribute_args[] =
6085   {
6086     {"aapcs", ARM_PCS_AAPCS},
6087     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6088 #if 0
6089     /* We could recognize these, but changes would be needed elsewhere
6090      * to implement them.  */
6091     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6092     {"atpcs", ARM_PCS_ATPCS},
6093     {"apcs", ARM_PCS_APCS},
6094 #endif
6095     {NULL, ARM_PCS_UNKNOWN}
6096   };
6097 
6098 static enum arm_pcs
arm_pcs_from_attribute(tree attr)6099 arm_pcs_from_attribute (tree attr)
6100 {
6101   const struct pcs_attribute_arg *ptr;
6102   const char *arg;
6103 
6104   /* Get the value of the argument.  */
6105   if (TREE_VALUE (attr) == NULL_TREE
6106       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6107     return ARM_PCS_UNKNOWN;
6108 
6109   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6110 
6111   /* Check it against the list of known arguments.  */
6112   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6113     if (streq (arg, ptr->arg))
6114       return ptr->value;
6115 
6116   /* An unrecognized interrupt type.  */
6117   return ARM_PCS_UNKNOWN;
6118 }
6119 
6120 /* Get the PCS variant to use for this call.  TYPE is the function's type
6121    specification, DECL is the specific declartion.  DECL may be null if
6122    the call could be indirect or if this is a library call.  */
6123 static enum arm_pcs
arm_get_pcs_model(const_tree type,const_tree decl)6124 arm_get_pcs_model (const_tree type, const_tree decl)
6125 {
6126   bool user_convention = false;
6127   enum arm_pcs user_pcs = arm_pcs_default;
6128   tree attr;
6129 
6130   gcc_assert (type);
6131 
6132   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6133   if (attr)
6134     {
6135       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6136       user_convention = true;
6137     }
6138 
6139   if (TARGET_AAPCS_BASED)
6140     {
6141       /* Detect varargs functions.  These always use the base rules
6142 	 (no argument is ever a candidate for a co-processor
6143 	 register).  */
6144       bool base_rules = stdarg_p (type);
6145 
6146       if (user_convention)
6147 	{
6148 	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6149 	    sorry ("non-AAPCS derived PCS variant");
6150 	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6151 	    error ("variadic functions must use the base AAPCS variant");
6152 	}
6153 
6154       if (base_rules)
6155 	return ARM_PCS_AAPCS;
6156       else if (user_convention)
6157 	return user_pcs;
6158       else if (decl && flag_unit_at_a_time)
6159 	{
6160 	  /* Local functions never leak outside this compilation unit,
6161 	     so we are free to use whatever conventions are
6162 	     appropriate.  */
6163 	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6164 	  cgraph_node *local_info_node
6165 	    = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6166 	  if (local_info_node && local_info_node->local)
6167 	    return ARM_PCS_AAPCS_LOCAL;
6168 	}
6169     }
6170   else if (user_convention && user_pcs != arm_pcs_default)
6171     sorry ("PCS variant");
6172 
6173   /* For everything else we use the target's default.  */
6174   return arm_pcs_default;
6175 }
6176 
6177 
6178 static void
aapcs_vfp_cum_init(CUMULATIVE_ARGS * pcum ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,rtx libcall ATTRIBUTE_UNUSED,const_tree fndecl ATTRIBUTE_UNUSED)6179 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6180 		    const_tree fntype ATTRIBUTE_UNUSED,
6181 		    rtx libcall ATTRIBUTE_UNUSED,
6182 		    const_tree fndecl ATTRIBUTE_UNUSED)
6183 {
6184   /* Record the unallocated VFP registers.  */
6185   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6186   pcum->aapcs_vfp_reg_alloc = 0;
6187 }
6188 
6189 /* Bitmasks that indicate whether earlier versions of GCC would have
6190    taken a different path through the ABI logic.  This should result in
6191    a -Wpsabi warning if the earlier path led to a different ABI decision.
6192 
6193    WARN_PSABI_EMPTY_CXX17_BASE
6194       Indicates that the type includes an artificial empty C++17 base field
6195       that, prior to GCC 10.1, would prevent the type from being treated as
6196       a HFA or HVA.  See PR94711 for details.
6197 
6198    WARN_PSABI_NO_UNIQUE_ADDRESS
6199       Indicates that the type includes an empty [[no_unique_address]] field
6200       that, prior to GCC 10.1, would prevent the type from being treated as
6201       a HFA or HVA.  */
6202 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6203 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6204 
6205 /* Walk down the type tree of TYPE counting consecutive base elements.
6206    If *MODEP is VOIDmode, then set it to the first valid floating point
6207    type.  If a non-floating point type is found, or if a floating point
6208    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6209    otherwise return the count in the sub-tree.
6210 
6211    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6212    function has changed its behavior relative to earlier versions of GCC.
6213    Normally the argument should be nonnull and point to a zero-initialized
6214    variable.  The function then records whether the ABI decision might
6215    be affected by a known fix to the ABI logic, setting the associated
6216    WARN_PSABI_* bits if so.
6217 
6218    When the argument is instead a null pointer, the function tries to
6219    simulate the behavior of GCC before all such ABI fixes were made.
6220    This is useful to check whether the function returns something
6221    different after the ABI fixes.  */
6222 static int
aapcs_vfp_sub_candidate(const_tree type,machine_mode * modep,unsigned int * warn_psabi_flags)6223 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6224 			 unsigned int *warn_psabi_flags)
6225 {
6226   machine_mode mode;
6227   HOST_WIDE_INT size;
6228 
6229   switch (TREE_CODE (type))
6230     {
6231     case REAL_TYPE:
6232       mode = TYPE_MODE (type);
6233       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6234 	return -1;
6235 
6236       if (*modep == VOIDmode)
6237 	*modep = mode;
6238 
6239       if (*modep == mode)
6240 	return 1;
6241 
6242       break;
6243 
6244     case COMPLEX_TYPE:
6245       mode = TYPE_MODE (TREE_TYPE (type));
6246       if (mode != DFmode && mode != SFmode)
6247 	return -1;
6248 
6249       if (*modep == VOIDmode)
6250 	*modep = mode;
6251 
6252       if (*modep == mode)
6253 	return 2;
6254 
6255       break;
6256 
6257     case VECTOR_TYPE:
6258       /* Use V2SImode and V4SImode as representatives of all 64-bit
6259 	 and 128-bit vector types, whether or not those modes are
6260 	 supported with the present options.  */
6261       size = int_size_in_bytes (type);
6262       switch (size)
6263 	{
6264 	case 8:
6265 	  mode = V2SImode;
6266 	  break;
6267 	case 16:
6268 	  mode = V4SImode;
6269 	  break;
6270 	default:
6271 	  return -1;
6272 	}
6273 
6274       if (*modep == VOIDmode)
6275 	*modep = mode;
6276 
6277       /* Vector modes are considered to be opaque: two vectors are
6278 	 equivalent for the purposes of being homogeneous aggregates
6279 	 if they are the same size.  */
6280       if (*modep == mode)
6281 	return 1;
6282 
6283       break;
6284 
6285     case ARRAY_TYPE:
6286       {
6287 	int count;
6288 	tree index = TYPE_DOMAIN (type);
6289 
6290 	/* Can't handle incomplete types nor sizes that are not
6291 	   fixed.  */
6292 	if (!COMPLETE_TYPE_P (type)
6293 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6294 	  return -1;
6295 
6296 	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6297 					 warn_psabi_flags);
6298 	if (count == -1
6299 	    || !index
6300 	    || !TYPE_MAX_VALUE (index)
6301 	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6302 	    || !TYPE_MIN_VALUE (index)
6303 	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6304 	    || count < 0)
6305 	  return -1;
6306 
6307 	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6308 		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6309 
6310 	/* There must be no padding.  */
6311 	if (wi::to_wide (TYPE_SIZE (type))
6312 	    != count * GET_MODE_BITSIZE (*modep))
6313 	  return -1;
6314 
6315 	return count;
6316       }
6317 
6318     case RECORD_TYPE:
6319       {
6320 	int count = 0;
6321 	int sub_count;
6322 	tree field;
6323 
6324 	/* Can't handle incomplete types nor sizes that are not
6325 	   fixed.  */
6326 	if (!COMPLETE_TYPE_P (type)
6327 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6328 	  return -1;
6329 
6330 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6331 	  {
6332 	    if (TREE_CODE (field) != FIELD_DECL)
6333 	      continue;
6334 
6335 	    if (DECL_FIELD_ABI_IGNORED (field))
6336 	      {
6337 		/* See whether this is something that earlier versions of
6338 		   GCC failed to ignore.  */
6339 		unsigned int flag;
6340 		if (lookup_attribute ("no_unique_address",
6341 				      DECL_ATTRIBUTES (field)))
6342 		  flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6343 		else if (cxx17_empty_base_field_p (field))
6344 		  flag = WARN_PSABI_EMPTY_CXX17_BASE;
6345 		else
6346 		  /* No compatibility problem.  */
6347 		  continue;
6348 
6349 		/* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6350 		if (warn_psabi_flags)
6351 		  {
6352 		    *warn_psabi_flags |= flag;
6353 		    continue;
6354 		  }
6355 	      }
6356 
6357 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6358 						 warn_psabi_flags);
6359 	    if (sub_count < 0)
6360 	      return -1;
6361 	    count += sub_count;
6362 	  }
6363 
6364 	/* There must be no padding.  */
6365 	if (wi::to_wide (TYPE_SIZE (type))
6366 	    != count * GET_MODE_BITSIZE (*modep))
6367 	  return -1;
6368 
6369 	return count;
6370       }
6371 
6372     case UNION_TYPE:
6373     case QUAL_UNION_TYPE:
6374       {
6375 	/* These aren't very interesting except in a degenerate case.  */
6376 	int count = 0;
6377 	int sub_count;
6378 	tree field;
6379 
6380 	/* Can't handle incomplete types nor sizes that are not
6381 	   fixed.  */
6382 	if (!COMPLETE_TYPE_P (type)
6383 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6384 	  return -1;
6385 
6386 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6387 	  {
6388 	    if (TREE_CODE (field) != FIELD_DECL)
6389 	      continue;
6390 
6391 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6392 						 warn_psabi_flags);
6393 	    if (sub_count < 0)
6394 	      return -1;
6395 	    count = count > sub_count ? count : sub_count;
6396 	  }
6397 
6398 	/* There must be no padding.  */
6399 	if (wi::to_wide (TYPE_SIZE (type))
6400 	    != count * GET_MODE_BITSIZE (*modep))
6401 	  return -1;
6402 
6403 	return count;
6404       }
6405 
6406     default:
6407       break;
6408     }
6409 
6410   return -1;
6411 }
6412 
6413 /* Return true if PCS_VARIANT should use VFP registers.  */
6414 static bool
use_vfp_abi(enum arm_pcs pcs_variant,bool is_double)6415 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6416 {
6417   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6418     {
6419       static bool seen_thumb1_vfp = false;
6420 
6421       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6422 	{
6423 	  sorry ("Thumb-1 hard-float VFP ABI");
6424 	  /* sorry() is not immediately fatal, so only display this once.  */
6425 	  seen_thumb1_vfp = true;
6426 	}
6427 
6428       return true;
6429     }
6430 
6431   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6432     return false;
6433 
6434   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6435 	 (TARGET_VFP_DOUBLE || !is_double));
6436 }
6437 
6438 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6439    suitable for passing or returning in VFP registers for the PCS
6440    variant selected.  If it is, then *BASE_MODE is updated to contain
6441    a machine mode describing each element of the argument's type and
6442    *COUNT to hold the number of such elements.  */
6443 static bool
aapcs_vfp_is_call_or_return_candidate(enum arm_pcs pcs_variant,machine_mode mode,const_tree type,machine_mode * base_mode,int * count)6444 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6445 				       machine_mode mode, const_tree type,
6446 				       machine_mode *base_mode, int *count)
6447 {
6448   machine_mode new_mode = VOIDmode;
6449 
6450   /* If we have the type information, prefer that to working things
6451      out from the mode.  */
6452   if (type)
6453     {
6454       unsigned int warn_psabi_flags = 0;
6455       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6456 					      &warn_psabi_flags);
6457       if (ag_count > 0 && ag_count <= 4)
6458 	{
6459 	  static unsigned last_reported_type_uid;
6460 	  unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6461 	  int alt;
6462 	  if (warn_psabi
6463 	      && warn_psabi_flags
6464 	      && uid != last_reported_type_uid
6465 	      && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6466 		  != ag_count))
6467 	    {
6468 	      const char *url
6469 		= CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6470 	      gcc_assert (alt == -1);
6471 	      last_reported_type_uid = uid;
6472 	      /* Use TYPE_MAIN_VARIANT to strip any redundant const
6473 		 qualification.  */
6474 	      if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6475 		inform (input_location, "parameter passing for argument of "
6476 			"type %qT with %<[[no_unique_address]]%> members "
6477 			"changed %{in GCC 10.1%}",
6478 			TYPE_MAIN_VARIANT (type), url);
6479 	      else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6480 		inform (input_location, "parameter passing for argument of "
6481 			"type %qT when C++17 is enabled changed to match "
6482 			"C++14 %{in GCC 10.1%}",
6483 			TYPE_MAIN_VARIANT (type), url);
6484 	    }
6485 	  *count = ag_count;
6486 	}
6487       else
6488 	return false;
6489     }
6490   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6491 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6492 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6493     {
6494       *count = 1;
6495       new_mode = mode;
6496     }
6497   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6498     {
6499       *count = 2;
6500       new_mode = (mode == DCmode ? DFmode : SFmode);
6501     }
6502   else
6503     return false;
6504 
6505 
6506   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6507     return false;
6508 
6509   *base_mode = new_mode;
6510 
6511   if (TARGET_GENERAL_REGS_ONLY)
6512     error ("argument of type %qT not permitted with -mgeneral-regs-only",
6513 	   type);
6514 
6515   return true;
6516 }
6517 
6518 static bool
aapcs_vfp_is_return_candidate(enum arm_pcs pcs_variant,machine_mode mode,const_tree type)6519 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6520 			       machine_mode mode, const_tree type)
6521 {
6522   int count ATTRIBUTE_UNUSED;
6523   machine_mode ag_mode ATTRIBUTE_UNUSED;
6524 
6525   if (!use_vfp_abi (pcs_variant, false))
6526     return false;
6527   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6528 						&ag_mode, &count);
6529 }
6530 
6531 static bool
aapcs_vfp_is_call_candidate(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type)6532 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6533 			     const_tree type)
6534 {
6535   if (!use_vfp_abi (pcum->pcs_variant, false))
6536     return false;
6537 
6538   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6539 						&pcum->aapcs_vfp_rmode,
6540 						&pcum->aapcs_vfp_rcount);
6541 }
6542 
6543 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6544    for the behaviour of this function.  */
6545 
6546 static bool
aapcs_vfp_allocate(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type ATTRIBUTE_UNUSED)6547 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6548 		    const_tree type  ATTRIBUTE_UNUSED)
6549 {
6550   int rmode_size
6551     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6552   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6553   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6554   int regno;
6555 
6556   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6557     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6558       {
6559 	pcum->aapcs_vfp_reg_alloc = mask << regno;
6560 	if (mode == BLKmode
6561 	    || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6562 	    || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6563 	  {
6564 	    int i;
6565 	    int rcount = pcum->aapcs_vfp_rcount;
6566 	    int rshift = shift;
6567 	    machine_mode rmode = pcum->aapcs_vfp_rmode;
6568 	    rtx par;
6569 	    if (!(TARGET_NEON || TARGET_HAVE_MVE))
6570 	      {
6571 		/* Avoid using unsupported vector modes.  */
6572 		if (rmode == V2SImode)
6573 		  rmode = DImode;
6574 		else if (rmode == V4SImode)
6575 		  {
6576 		    rmode = DImode;
6577 		    rcount *= 2;
6578 		    rshift /= 2;
6579 		  }
6580 	      }
6581 	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6582 	    for (i = 0; i < rcount; i++)
6583 	      {
6584 		rtx tmp = gen_rtx_REG (rmode,
6585 				       FIRST_VFP_REGNUM + regno + i * rshift);
6586 		tmp = gen_rtx_EXPR_LIST
6587 		  (VOIDmode, tmp,
6588 		   GEN_INT (i * GET_MODE_SIZE (rmode)));
6589 		XVECEXP (par, 0, i) = tmp;
6590 	      }
6591 
6592 	    pcum->aapcs_reg = par;
6593 	  }
6594 	else
6595 	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6596 	return true;
6597       }
6598   return false;
6599 }
6600 
6601 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6602    comment there for the behaviour of this function.  */
6603 
6604 static rtx
aapcs_vfp_allocate_return_reg(enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,machine_mode mode,const_tree type ATTRIBUTE_UNUSED)6605 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6606 			       machine_mode mode,
6607 			       const_tree type ATTRIBUTE_UNUSED)
6608 {
6609   if (!use_vfp_abi (pcs_variant, false))
6610     return NULL;
6611 
6612   if (mode == BLKmode
6613       || (GET_MODE_CLASS (mode) == MODE_INT
6614 	  && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6615 	  && !(TARGET_NEON || TARGET_HAVE_MVE)))
6616     {
6617       int count;
6618       machine_mode ag_mode;
6619       int i;
6620       rtx par;
6621       int shift;
6622 
6623       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6624 					     &ag_mode, &count);
6625 
6626       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6627 	{
6628 	  if (ag_mode == V2SImode)
6629 	    ag_mode = DImode;
6630 	  else if (ag_mode == V4SImode)
6631 	    {
6632 	      ag_mode = DImode;
6633 	      count *= 2;
6634 	    }
6635 	}
6636       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6637       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6638       for (i = 0; i < count; i++)
6639 	{
6640 	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6641 	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6642 				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6643 	  XVECEXP (par, 0, i) = tmp;
6644 	}
6645 
6646       return par;
6647     }
6648 
6649   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6650 }
6651 
6652 static void
aapcs_vfp_advance(CUMULATIVE_ARGS * pcum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED)6653 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6654 		   machine_mode mode  ATTRIBUTE_UNUSED,
6655 		   const_tree type  ATTRIBUTE_UNUSED)
6656 {
6657   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6658   pcum->aapcs_vfp_reg_alloc = 0;
6659   return;
6660 }
6661 
6662 #define AAPCS_CP(X)				\
6663   {						\
6664     aapcs_ ## X ## _cum_init,			\
6665     aapcs_ ## X ## _is_call_candidate,		\
6666     aapcs_ ## X ## _allocate,			\
6667     aapcs_ ## X ## _is_return_candidate,	\
6668     aapcs_ ## X ## _allocate_return_reg,	\
6669     aapcs_ ## X ## _advance			\
6670   }
6671 
6672 /* Table of co-processors that can be used to pass arguments in
6673    registers.  Idealy no arugment should be a candidate for more than
6674    one co-processor table entry, but the table is processed in order
6675    and stops after the first match.  If that entry then fails to put
6676    the argument into a co-processor register, the argument will go on
6677    the stack.  */
6678 static struct
6679 {
6680   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6681   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6682 
6683   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6684      BLKmode) is a candidate for this co-processor's registers; this
6685      function should ignore any position-dependent state in
6686      CUMULATIVE_ARGS and only use call-type dependent information.  */
6687   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6688 
6689   /* Return true if the argument does get a co-processor register; it
6690      should set aapcs_reg to an RTX of the register allocated as is
6691      required for a return from FUNCTION_ARG.  */
6692   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6693 
6694   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6695      be returned in this co-processor's registers.  */
6696   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6697 
6698   /* Allocate and return an RTX element to hold the return type of a call.  This
6699      routine must not fail and will only be called if is_return_candidate
6700      returned true with the same parameters.  */
6701   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6702 
6703   /* Finish processing this argument and prepare to start processing
6704      the next one.  */
6705   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6706 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6707   {
6708     AAPCS_CP(vfp)
6709   };
6710 
6711 #undef AAPCS_CP
6712 
6713 static int
aapcs_select_call_coproc(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type)6714 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6715 			  const_tree type)
6716 {
6717   int i;
6718 
6719   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6720     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6721       return i;
6722 
6723   return -1;
6724 }
6725 
6726 static int
aapcs_select_return_coproc(const_tree type,const_tree fntype)6727 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6728 {
6729   /* We aren't passed a decl, so we can't check that a call is local.
6730      However, it isn't clear that that would be a win anyway, since it
6731      might limit some tail-calling opportunities.  */
6732   enum arm_pcs pcs_variant;
6733 
6734   if (fntype)
6735     {
6736       const_tree fndecl = NULL_TREE;
6737 
6738       if (TREE_CODE (fntype) == FUNCTION_DECL)
6739 	{
6740 	  fndecl = fntype;
6741 	  fntype = TREE_TYPE (fntype);
6742 	}
6743 
6744       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6745     }
6746   else
6747     pcs_variant = arm_pcs_default;
6748 
6749   if (pcs_variant != ARM_PCS_AAPCS)
6750     {
6751       int i;
6752 
6753       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6754 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6755 							TYPE_MODE (type),
6756 							type))
6757 	  return i;
6758     }
6759   return -1;
6760 }
6761 
6762 static rtx
aapcs_allocate_return_reg(machine_mode mode,const_tree type,const_tree fntype)6763 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6764 			   const_tree fntype)
6765 {
6766   /* We aren't passed a decl, so we can't check that a call is local.
6767      However, it isn't clear that that would be a win anyway, since it
6768      might limit some tail-calling opportunities.  */
6769   enum arm_pcs pcs_variant;
6770   int unsignedp ATTRIBUTE_UNUSED;
6771 
6772   if (fntype)
6773     {
6774       const_tree fndecl = NULL_TREE;
6775 
6776       if (TREE_CODE (fntype) == FUNCTION_DECL)
6777 	{
6778 	  fndecl = fntype;
6779 	  fntype = TREE_TYPE (fntype);
6780 	}
6781 
6782       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6783     }
6784   else
6785     pcs_variant = arm_pcs_default;
6786 
6787   /* Promote integer types.  */
6788   if (type && INTEGRAL_TYPE_P (type))
6789     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6790 
6791   if (pcs_variant != ARM_PCS_AAPCS)
6792     {
6793       int i;
6794 
6795       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6796 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6797 							type))
6798 	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6799 							     mode, type);
6800     }
6801 
6802   /* Promotes small structs returned in a register to full-word size
6803      for big-endian AAPCS.  */
6804   if (type && arm_return_in_msb (type))
6805     {
6806       HOST_WIDE_INT size = int_size_in_bytes (type);
6807       if (size % UNITS_PER_WORD != 0)
6808 	{
6809 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6810 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6811 	}
6812     }
6813 
6814   return gen_rtx_REG (mode, R0_REGNUM);
6815 }
6816 
6817 static rtx
aapcs_libcall_value(machine_mode mode)6818 aapcs_libcall_value (machine_mode mode)
6819 {
6820   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6821       && GET_MODE_SIZE (mode) <= 4)
6822     mode = SImode;
6823 
6824   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6825 }
6826 
6827 /* Lay out a function argument using the AAPCS rules.  The rule
6828    numbers referred to here are those in the AAPCS.  */
6829 static void
aapcs_layout_arg(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type,bool named)6830 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6831 		  const_tree type, bool named)
6832 {
6833   int nregs, nregs2;
6834   int ncrn;
6835 
6836   /* We only need to do this once per argument.  */
6837   if (pcum->aapcs_arg_processed)
6838     return;
6839 
6840   pcum->aapcs_arg_processed = true;
6841 
6842   /* Special case: if named is false then we are handling an incoming
6843      anonymous argument which is on the stack.  */
6844   if (!named)
6845     return;
6846 
6847   /* Is this a potential co-processor register candidate?  */
6848   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6849     {
6850       int slot = aapcs_select_call_coproc (pcum, mode, type);
6851       pcum->aapcs_cprc_slot = slot;
6852 
6853       /* We don't have to apply any of the rules from part B of the
6854 	 preparation phase, these are handled elsewhere in the
6855 	 compiler.  */
6856 
6857       if (slot >= 0)
6858 	{
6859 	  /* A Co-processor register candidate goes either in its own
6860 	     class of registers or on the stack.  */
6861 	  if (!pcum->aapcs_cprc_failed[slot])
6862 	    {
6863 	      /* C1.cp - Try to allocate the argument to co-processor
6864 		 registers.  */
6865 	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6866 		return;
6867 
6868 	      /* C2.cp - Put the argument on the stack and note that we
6869 		 can't assign any more candidates in this slot.  We also
6870 		 need to note that we have allocated stack space, so that
6871 		 we won't later try to split a non-cprc candidate between
6872 		 core registers and the stack.  */
6873 	      pcum->aapcs_cprc_failed[slot] = true;
6874 	      pcum->can_split = false;
6875 	    }
6876 
6877 	  /* We didn't get a register, so this argument goes on the
6878 	     stack.  */
6879 	  gcc_assert (pcum->can_split == false);
6880 	  return;
6881 	}
6882     }
6883 
6884   /* C3 - For double-word aligned arguments, round the NCRN up to the
6885      next even number.  */
6886   ncrn = pcum->aapcs_ncrn;
6887   if (ncrn & 1)
6888     {
6889       int res = arm_needs_doubleword_align (mode, type);
6890       /* Only warn during RTL expansion of call stmts, otherwise we would
6891 	 warn e.g. during gimplification even on functions that will be
6892 	 always inlined, and we'd warn multiple times.  Don't warn when
6893 	 called in expand_function_start either, as we warn instead in
6894 	 arm_function_arg_boundary in that case.  */
6895       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6896 	inform (input_location, "parameter passing for argument of type "
6897 		"%qT changed in GCC 7.1", type);
6898       else if (res > 0)
6899 	ncrn++;
6900     }
6901 
6902   nregs = ARM_NUM_REGS2(mode, type);
6903 
6904   /* Sigh, this test should really assert that nregs > 0, but a GCC
6905      extension allows empty structs and then gives them empty size; it
6906      then allows such a structure to be passed by value.  For some of
6907      the code below we have to pretend that such an argument has
6908      non-zero size so that we 'locate' it correctly either in
6909      registers or on the stack.  */
6910   gcc_assert (nregs >= 0);
6911 
6912   nregs2 = nregs ? nregs : 1;
6913 
6914   /* C4 - Argument fits entirely in core registers.  */
6915   if (ncrn + nregs2 <= NUM_ARG_REGS)
6916     {
6917       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6918       pcum->aapcs_next_ncrn = ncrn + nregs;
6919       return;
6920     }
6921 
6922   /* C5 - Some core registers left and there are no arguments already
6923      on the stack: split this argument between the remaining core
6924      registers and the stack.  */
6925   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6926     {
6927       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6928       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6929       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6930       return;
6931     }
6932 
6933   /* C6 - NCRN is set to 4.  */
6934   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6935 
6936   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6937   return;
6938 }
6939 
6940 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6941    for a call to a function whose data type is FNTYPE.
6942    For a library call, FNTYPE is NULL.  */
6943 void
arm_init_cumulative_args(CUMULATIVE_ARGS * pcum,tree fntype,rtx libname,tree fndecl ATTRIBUTE_UNUSED)6944 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6945 			  rtx libname,
6946 			  tree fndecl ATTRIBUTE_UNUSED)
6947 {
6948   /* Long call handling.  */
6949   if (fntype)
6950     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6951   else
6952     pcum->pcs_variant = arm_pcs_default;
6953 
6954   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6955     {
6956       if (arm_libcall_uses_aapcs_base (libname))
6957 	pcum->pcs_variant = ARM_PCS_AAPCS;
6958 
6959       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6960       pcum->aapcs_reg = NULL_RTX;
6961       pcum->aapcs_partial = 0;
6962       pcum->aapcs_arg_processed = false;
6963       pcum->aapcs_cprc_slot = -1;
6964       pcum->can_split = true;
6965 
6966       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6967 	{
6968 	  int i;
6969 
6970 	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6971 	    {
6972 	      pcum->aapcs_cprc_failed[i] = false;
6973 	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6974 	    }
6975 	}
6976       return;
6977     }
6978 
6979   /* Legacy ABIs */
6980 
6981   /* On the ARM, the offset starts at 0.  */
6982   pcum->nregs = 0;
6983   pcum->iwmmxt_nregs = 0;
6984   pcum->can_split = true;
6985 
6986   /* Varargs vectors are treated the same as long long.
6987      named_count avoids having to change the way arm handles 'named' */
6988   pcum->named_count = 0;
6989   pcum->nargs = 0;
6990 
6991   if (TARGET_REALLY_IWMMXT && fntype)
6992     {
6993       tree fn_arg;
6994 
6995       for (fn_arg = TYPE_ARG_TYPES (fntype);
6996 	   fn_arg;
6997 	   fn_arg = TREE_CHAIN (fn_arg))
6998 	pcum->named_count += 1;
6999 
7000       if (! pcum->named_count)
7001 	pcum->named_count = INT_MAX;
7002     }
7003 }
7004 
7005 /* Return 2 if double word alignment is required for argument passing,
7006    but wasn't required before the fix for PR88469.
7007    Return 1 if double word alignment is required for argument passing.
7008    Return -1 if double word alignment used to be required for argument
7009    passing before PR77728 ABI fix, but is not required anymore.
7010    Return 0 if double word alignment is not required and wasn't requried
7011    before either.  */
7012 static int
arm_needs_doubleword_align(machine_mode mode,const_tree type)7013 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7014 {
7015   if (!type)
7016     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7017 
7018   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7019   if (!AGGREGATE_TYPE_P (type))
7020     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7021 
7022   /* Array types: Use member alignment of element type.  */
7023   if (TREE_CODE (type) == ARRAY_TYPE)
7024     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7025 
7026   int ret = 0;
7027   int ret2 = 0;
7028   /* Record/aggregate types: Use greatest member alignment of any member.
7029 
7030      Note that we explicitly consider zero-sized fields here, even though
7031      they don't map to AAPCS machine types.  For example, in:
7032 
7033 	 struct __attribute__((aligned(8))) empty {};
7034 
7035 	 struct s {
7036 	   [[no_unique_address]] empty e;
7037 	   int x;
7038 	 };
7039 
7040      "s" contains only one Fundamental Data Type (the int field)
7041      but gains 8-byte alignment and size thanks to "e".  */
7042   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7043     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7044       {
7045 	if (TREE_CODE (field) == FIELD_DECL)
7046 	  return 1;
7047 	else
7048 	  /* Before PR77728 fix, we were incorrectly considering also
7049 	     other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7050 	     Make sure we can warn about that with -Wpsabi.  */
7051 	  ret = -1;
7052       }
7053     else if (TREE_CODE (field) == FIELD_DECL
7054 	     && DECL_BIT_FIELD_TYPE (field)
7055 	     && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7056       ret2 = 1;
7057 
7058   if (ret2)
7059     return 2;
7060 
7061   return ret;
7062 }
7063 
7064 
7065 /* Determine where to put an argument to a function.
7066    Value is zero to push the argument on the stack,
7067    or a hard register in which to store the argument.
7068 
7069    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7070     the preceding args and about the function being called.
7071    ARG is a description of the argument.
7072 
7073    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7074    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7075    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7076    defined), say it is passed in the stack (function_prologue will
7077    indeed make it pass in the stack if necessary).  */
7078 
7079 static rtx
arm_function_arg(cumulative_args_t pcum_v,const function_arg_info & arg)7080 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7081 {
7082   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7083   int nregs;
7084 
7085   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7086      a call insn (op3 of a call_value insn).  */
7087   if (arg.end_marker_p ())
7088     return const0_rtx;
7089 
7090   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7091     {
7092       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7093       return pcum->aapcs_reg;
7094     }
7095 
7096   /* Varargs vectors are treated the same as long long.
7097      named_count avoids having to change the way arm handles 'named' */
7098   if (TARGET_IWMMXT_ABI
7099       && arm_vector_mode_supported_p (arg.mode)
7100       && pcum->named_count > pcum->nargs + 1)
7101     {
7102       if (pcum->iwmmxt_nregs <= 9)
7103 	return gen_rtx_REG (arg.mode,
7104 			    pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7105       else
7106 	{
7107 	  pcum->can_split = false;
7108 	  return NULL_RTX;
7109 	}
7110     }
7111 
7112   /* Put doubleword aligned quantities in even register pairs.  */
7113   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7114     {
7115       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7116       if (res < 0 && warn_psabi)
7117 	inform (input_location, "parameter passing for argument of type "
7118 		"%qT changed in GCC 7.1", arg.type);
7119       else if (res > 0)
7120 	{
7121 	  pcum->nregs++;
7122 	  if (res > 1 && warn_psabi)
7123 	    inform (input_location, "parameter passing for argument of type "
7124 		    "%qT changed in GCC 9.1", arg.type);
7125 	}
7126     }
7127 
7128   /* Only allow splitting an arg between regs and memory if all preceding
7129      args were allocated to regs.  For args passed by reference we only count
7130      the reference pointer.  */
7131   if (pcum->can_split)
7132     nregs = 1;
7133   else
7134     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7135 
7136   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7137     return NULL_RTX;
7138 
7139   return gen_rtx_REG (arg.mode, pcum->nregs);
7140 }
7141 
7142 static unsigned int
arm_function_arg_boundary(machine_mode mode,const_tree type)7143 arm_function_arg_boundary (machine_mode mode, const_tree type)
7144 {
7145   if (!ARM_DOUBLEWORD_ALIGN)
7146     return PARM_BOUNDARY;
7147 
7148   int res = arm_needs_doubleword_align (mode, type);
7149   if (res < 0 && warn_psabi)
7150     inform (input_location, "parameter passing for argument of type %qT "
7151 	    "changed in GCC 7.1", type);
7152   if (res > 1 && warn_psabi)
7153     inform (input_location, "parameter passing for argument of type "
7154 	    "%qT changed in GCC 9.1", type);
7155 
7156   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7157 }
7158 
7159 static int
arm_arg_partial_bytes(cumulative_args_t pcum_v,const function_arg_info & arg)7160 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7161 {
7162   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7163   int nregs = pcum->nregs;
7164 
7165   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7166     {
7167       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7168       return pcum->aapcs_partial;
7169     }
7170 
7171   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7172     return 0;
7173 
7174   if (NUM_ARG_REGS > nregs
7175       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7176       && pcum->can_split)
7177     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7178 
7179   return 0;
7180 }
7181 
7182 /* Update the data in PCUM to advance over argument ARG.  */
7183 
7184 static void
arm_function_arg_advance(cumulative_args_t pcum_v,const function_arg_info & arg)7185 arm_function_arg_advance (cumulative_args_t pcum_v,
7186 			  const function_arg_info &arg)
7187 {
7188   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7189 
7190   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7191     {
7192       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7193 
7194       if (pcum->aapcs_cprc_slot >= 0)
7195 	{
7196 	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7197 							      arg.type);
7198 	  pcum->aapcs_cprc_slot = -1;
7199 	}
7200 
7201       /* Generic stuff.  */
7202       pcum->aapcs_arg_processed = false;
7203       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7204       pcum->aapcs_reg = NULL_RTX;
7205       pcum->aapcs_partial = 0;
7206     }
7207   else
7208     {
7209       pcum->nargs += 1;
7210       if (arm_vector_mode_supported_p (arg.mode)
7211 	  && pcum->named_count > pcum->nargs
7212 	  && TARGET_IWMMXT_ABI)
7213 	pcum->iwmmxt_nregs += 1;
7214       else
7215 	pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7216     }
7217 }
7218 
7219 /* Variable sized types are passed by reference.  This is a GCC
7220    extension to the ARM ABI.  */
7221 
7222 static bool
arm_pass_by_reference(cumulative_args_t,const function_arg_info & arg)7223 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7224 {
7225   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7226 }
7227 
7228 /* Encode the current state of the #pragma [no_]long_calls.  */
7229 typedef enum
7230 {
7231   OFF,		/* No #pragma [no_]long_calls is in effect.  */
7232   LONG,		/* #pragma long_calls is in effect.  */
7233   SHORT		/* #pragma no_long_calls is in effect.  */
7234 } arm_pragma_enum;
7235 
7236 static arm_pragma_enum arm_pragma_long_calls = OFF;
7237 
7238 void
arm_pr_long_calls(struct cpp_reader * pfile ATTRIBUTE_UNUSED)7239 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7240 {
7241   arm_pragma_long_calls = LONG;
7242 }
7243 
7244 void
arm_pr_no_long_calls(struct cpp_reader * pfile ATTRIBUTE_UNUSED)7245 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7246 {
7247   arm_pragma_long_calls = SHORT;
7248 }
7249 
7250 void
arm_pr_long_calls_off(struct cpp_reader * pfile ATTRIBUTE_UNUSED)7251 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7252 {
7253   arm_pragma_long_calls = OFF;
7254 }
7255 
7256 /* Handle an attribute requiring a FUNCTION_DECL;
7257    arguments as in struct attribute_spec.handler.  */
7258 static tree
arm_handle_fndecl_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7259 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7260 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7261 {
7262   if (TREE_CODE (*node) != FUNCTION_DECL)
7263     {
7264       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7265 	       name);
7266       *no_add_attrs = true;
7267     }
7268 
7269   return NULL_TREE;
7270 }
7271 
7272 /* Handle an "interrupt" or "isr" attribute;
7273    arguments as in struct attribute_spec.handler.  */
7274 static tree
arm_handle_isr_attribute(tree * node,tree name,tree args,int flags,bool * no_add_attrs)7275 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7276 			  bool *no_add_attrs)
7277 {
7278   if (DECL_P (*node))
7279     {
7280       if (TREE_CODE (*node) != FUNCTION_DECL)
7281 	{
7282 	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
7283 		   name);
7284 	  *no_add_attrs = true;
7285 	}
7286       /* FIXME: the argument if any is checked for type attributes;
7287 	 should it be checked for decl ones?  */
7288     }
7289   else
7290     {
7291       if (TREE_CODE (*node) == FUNCTION_TYPE
7292 	  || TREE_CODE (*node) == METHOD_TYPE)
7293 	{
7294 	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7295 	    {
7296 	      warning (OPT_Wattributes, "%qE attribute ignored",
7297 		       name);
7298 	      *no_add_attrs = true;
7299 	    }
7300 	}
7301       else if (TREE_CODE (*node) == POINTER_TYPE
7302 	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7303 		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7304 	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
7305 	{
7306 	  *node = build_variant_type_copy (*node);
7307 	  TREE_TYPE (*node) = build_type_attribute_variant
7308 	    (TREE_TYPE (*node),
7309 	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7310 	  *no_add_attrs = true;
7311 	}
7312       else
7313 	{
7314 	  /* Possibly pass this attribute on from the type to a decl.  */
7315 	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
7316 		       | (int) ATTR_FLAG_FUNCTION_NEXT
7317 		       | (int) ATTR_FLAG_ARRAY_NEXT))
7318 	    {
7319 	      *no_add_attrs = true;
7320 	      return tree_cons (name, args, NULL_TREE);
7321 	    }
7322 	  else
7323 	    {
7324 	      warning (OPT_Wattributes, "%qE attribute ignored",
7325 		       name);
7326 	    }
7327 	}
7328     }
7329 
7330   return NULL_TREE;
7331 }
7332 
7333 /* Handle a "pcs" attribute; arguments as in struct
7334    attribute_spec.handler.  */
7335 static tree
arm_handle_pcs_attribute(tree * node ATTRIBUTE_UNUSED,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7336 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7337 			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7338 {
7339   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7340     {
7341       warning (OPT_Wattributes, "%qE attribute ignored", name);
7342       *no_add_attrs = true;
7343     }
7344   return NULL_TREE;
7345 }
7346 
7347 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7348 /* Handle the "notshared" attribute.  This attribute is another way of
7349    requesting hidden visibility.  ARM's compiler supports
7350    "__declspec(notshared)"; we support the same thing via an
7351    attribute.  */
7352 
7353 static tree
arm_handle_notshared_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7354 arm_handle_notshared_attribute (tree *node,
7355 				tree name ATTRIBUTE_UNUSED,
7356 				tree args ATTRIBUTE_UNUSED,
7357 				int flags ATTRIBUTE_UNUSED,
7358 				bool *no_add_attrs)
7359 {
7360   tree decl = TYPE_NAME (*node);
7361 
7362   if (decl)
7363     {
7364       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7365       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7366       *no_add_attrs = false;
7367     }
7368   return NULL_TREE;
7369 }
7370 #endif
7371 
7372 /* This function returns true if a function with declaration FNDECL and type
7373    FNTYPE uses the stack to pass arguments or return variables and false
7374    otherwise.  This is used for functions with the attributes
7375    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7376    diagnostic messages if the stack is used.  NAME is the name of the attribute
7377    used.  */
7378 
7379 static bool
cmse_func_args_or_return_in_stack(tree fndecl,tree name,tree fntype)7380 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7381 {
7382   function_args_iterator args_iter;
7383   CUMULATIVE_ARGS args_so_far_v;
7384   cumulative_args_t args_so_far;
7385   bool first_param = true;
7386   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7387 
7388   /* Error out if any argument is passed on the stack.  */
7389   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7390   args_so_far = pack_cumulative_args (&args_so_far_v);
7391   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7392     {
7393       rtx arg_rtx;
7394 
7395       prev_arg_type = arg_type;
7396       if (VOID_TYPE_P (arg_type))
7397 	continue;
7398 
7399       function_arg_info arg (arg_type, /*named=*/true);
7400       if (!first_param)
7401 	/* ??? We should advance after processing the argument and pass
7402 	   the argument we're advancing past.  */
7403 	arm_function_arg_advance (args_so_far, arg);
7404       arg_rtx = arm_function_arg (args_so_far, arg);
7405       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7406 	{
7407 	  error ("%qE attribute not available to functions with arguments "
7408 		 "passed on the stack", name);
7409 	  return true;
7410 	}
7411       first_param = false;
7412     }
7413 
7414   /* Error out for variadic functions since we cannot control how many
7415      arguments will be passed and thus stack could be used.  stdarg_p () is not
7416      used for the checking to avoid browsing arguments twice.  */
7417   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7418     {
7419       error ("%qE attribute not available to functions with variable number "
7420 	     "of arguments", name);
7421       return true;
7422     }
7423 
7424   /* Error out if return value is passed on the stack.  */
7425   ret_type = TREE_TYPE (fntype);
7426   if (arm_return_in_memory (ret_type, fntype))
7427     {
7428       error ("%qE attribute not available to functions that return value on "
7429 	     "the stack", name);
7430       return true;
7431     }
7432   return false;
7433 }
7434 
7435 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7436    function will check whether the attribute is allowed here and will add the
7437    attribute to the function declaration tree or otherwise issue a warning.  */
7438 
7439 static tree
arm_handle_cmse_nonsecure_entry(tree * node,tree name,tree,int,bool * no_add_attrs)7440 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7441 				 tree /* args */,
7442 				 int /* flags */,
7443 				 bool *no_add_attrs)
7444 {
7445   tree fndecl;
7446 
7447   if (!use_cmse)
7448     {
7449       *no_add_attrs = true;
7450       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7451 	       "option.", name);
7452       return NULL_TREE;
7453     }
7454 
7455   /* Ignore attribute for function types.  */
7456   if (TREE_CODE (*node) != FUNCTION_DECL)
7457     {
7458       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7459 	       name);
7460       *no_add_attrs = true;
7461       return NULL_TREE;
7462     }
7463 
7464   fndecl = *node;
7465 
7466   /* Warn for static linkage functions.  */
7467   if (!TREE_PUBLIC (fndecl))
7468     {
7469       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7470 	       "with static linkage", name);
7471       *no_add_attrs = true;
7472       return NULL_TREE;
7473     }
7474 
7475   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7476 						TREE_TYPE (fndecl));
7477   return NULL_TREE;
7478 }
7479 
7480 
7481 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7482    function will check whether the attribute is allowed here and will add the
7483    attribute to the function type tree or otherwise issue a diagnostic.  The
7484    reason we check this at declaration time is to only allow the use of the
7485    attribute with declarations of function pointers and not function
7486    declarations.  This function checks NODE is of the expected type and issues
7487    diagnostics otherwise using NAME.  If it is not of the expected type
7488    *NO_ADD_ATTRS will be set to true.  */
7489 
7490 static tree
arm_handle_cmse_nonsecure_call(tree * node,tree name,tree,int,bool * no_add_attrs)7491 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7492 				 tree /* args */,
7493 				 int /* flags */,
7494 				 bool *no_add_attrs)
7495 {
7496   tree decl = NULL_TREE, fntype = NULL_TREE;
7497   tree type;
7498 
7499   if (!use_cmse)
7500     {
7501       *no_add_attrs = true;
7502       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7503 	       "option.", name);
7504       return NULL_TREE;
7505     }
7506 
7507   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7508     {
7509       decl = *node;
7510       fntype = TREE_TYPE (decl);
7511     }
7512 
7513   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7514     fntype = TREE_TYPE (fntype);
7515 
7516   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7517     {
7518 	warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7519 		 "function pointer", name);
7520 	*no_add_attrs = true;
7521 	return NULL_TREE;
7522     }
7523 
7524   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7525 
7526   if (*no_add_attrs)
7527     return NULL_TREE;
7528 
7529   /* Prevent trees being shared among function types with and without
7530      cmse_nonsecure_call attribute.  */
7531   type = TREE_TYPE (decl);
7532 
7533   type = build_distinct_type_copy (type);
7534   TREE_TYPE (decl) = type;
7535   fntype = type;
7536 
7537   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7538     {
7539       type = fntype;
7540       fntype = TREE_TYPE (fntype);
7541       fntype = build_distinct_type_copy (fntype);
7542       TREE_TYPE (type) = fntype;
7543     }
7544 
7545   /* Construct a type attribute and add it to the function type.  */
7546   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7547 			  TYPE_ATTRIBUTES (fntype));
7548   TYPE_ATTRIBUTES (fntype) = attrs;
7549   return NULL_TREE;
7550 }
7551 
7552 /* Return 0 if the attributes for two types are incompatible, 1 if they
7553    are compatible, and 2 if they are nearly compatible (which causes a
7554    warning to be generated).  */
7555 static int
arm_comp_type_attributes(const_tree type1,const_tree type2)7556 arm_comp_type_attributes (const_tree type1, const_tree type2)
7557 {
7558   int l1, l2, s1, s2;
7559 
7560   /* Check for mismatch of non-default calling convention.  */
7561   if (TREE_CODE (type1) != FUNCTION_TYPE)
7562     return 1;
7563 
7564   /* Check for mismatched call attributes.  */
7565   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7566   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7567   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7568   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7569 
7570   /* Only bother to check if an attribute is defined.  */
7571   if (l1 | l2 | s1 | s2)
7572     {
7573       /* If one type has an attribute, the other must have the same attribute.  */
7574       if ((l1 != l2) || (s1 != s2))
7575 	return 0;
7576 
7577       /* Disallow mixed attributes.  */
7578       if ((l1 & s2) || (l2 & s1))
7579 	return 0;
7580     }
7581 
7582   /* Check for mismatched ISR attribute.  */
7583   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7584   if (! l1)
7585     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7586   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7587   if (! l2)
7588     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7589   if (l1 != l2)
7590     return 0;
7591 
7592   l1 = lookup_attribute ("cmse_nonsecure_call",
7593 			 TYPE_ATTRIBUTES (type1)) != NULL;
7594   l2 = lookup_attribute ("cmse_nonsecure_call",
7595 			 TYPE_ATTRIBUTES (type2)) != NULL;
7596 
7597   if (l1 != l2)
7598     return 0;
7599 
7600   return 1;
7601 }
7602 
7603 /*  Assigns default attributes to newly defined type.  This is used to
7604     set short_call/long_call attributes for function types of
7605     functions defined inside corresponding #pragma scopes.  */
7606 static void
arm_set_default_type_attributes(tree type)7607 arm_set_default_type_attributes (tree type)
7608 {
7609   /* Add __attribute__ ((long_call)) to all functions, when
7610      inside #pragma long_calls or __attribute__ ((short_call)),
7611      when inside #pragma no_long_calls.  */
7612   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7613     {
7614       tree type_attr_list, attr_name;
7615       type_attr_list = TYPE_ATTRIBUTES (type);
7616 
7617       if (arm_pragma_long_calls == LONG)
7618  	attr_name = get_identifier ("long_call");
7619       else if (arm_pragma_long_calls == SHORT)
7620  	attr_name = get_identifier ("short_call");
7621       else
7622  	return;
7623 
7624       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7625       TYPE_ATTRIBUTES (type) = type_attr_list;
7626     }
7627 }
7628 
7629 /* Return true if DECL is known to be linked into section SECTION.  */
7630 
7631 static bool
arm_function_in_section_p(tree decl,section * section)7632 arm_function_in_section_p (tree decl, section *section)
7633 {
7634   /* We can only be certain about the prevailing symbol definition.  */
7635   if (!decl_binds_to_current_def_p (decl))
7636     return false;
7637 
7638   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7639   if (!DECL_SECTION_NAME (decl))
7640     {
7641       /* Make sure that we will not create a unique section for DECL.  */
7642       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7643 	return false;
7644     }
7645 
7646   return function_section (decl) == section;
7647 }
7648 
7649 /* Return nonzero if a 32-bit "long_call" should be generated for
7650    a call from the current function to DECL.  We generate a long_call
7651    if the function:
7652 
7653         a.  has an __attribute__((long call))
7654      or b.  is within the scope of a #pragma long_calls
7655      or c.  the -mlong-calls command line switch has been specified
7656 
7657    However we do not generate a long call if the function:
7658 
7659         d.  has an __attribute__ ((short_call))
7660      or e.  is inside the scope of a #pragma no_long_calls
7661      or f.  is defined in the same section as the current function.  */
7662 
7663 bool
arm_is_long_call_p(tree decl)7664 arm_is_long_call_p (tree decl)
7665 {
7666   tree attrs;
7667 
7668   if (!decl)
7669     return TARGET_LONG_CALLS;
7670 
7671   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7672   if (lookup_attribute ("short_call", attrs))
7673     return false;
7674 
7675   /* For "f", be conservative, and only cater for cases in which the
7676      whole of the current function is placed in the same section.  */
7677   if (!flag_reorder_blocks_and_partition
7678       && TREE_CODE (decl) == FUNCTION_DECL
7679       && arm_function_in_section_p (decl, current_function_section ()))
7680     return false;
7681 
7682   if (lookup_attribute ("long_call", attrs))
7683     return true;
7684 
7685   return TARGET_LONG_CALLS;
7686 }
7687 
7688 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7689 static bool
arm_function_ok_for_sibcall(tree decl,tree exp)7690 arm_function_ok_for_sibcall (tree decl, tree exp)
7691 {
7692   unsigned long func_type;
7693 
7694   if (cfun->machine->sibcall_blocked)
7695     return false;
7696 
7697   if (TARGET_FDPIC)
7698     {
7699       /* In FDPIC, never tailcall something for which we have no decl:
7700 	 the target function could be in a different module, requiring
7701 	 a different FDPIC register value.  */
7702       if (decl == NULL)
7703 	return false;
7704     }
7705 
7706   /* Never tailcall something if we are generating code for Thumb-1.  */
7707   if (TARGET_THUMB1)
7708     return false;
7709 
7710   /* The PIC register is live on entry to VxWorks PLT entries, so we
7711      must make the call before restoring the PIC register.  */
7712   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7713     return false;
7714 
7715   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7716      may be used both as target of the call and base register for restoring
7717      the VFP registers  */
7718   if (TARGET_APCS_FRAME && TARGET_ARM
7719       && TARGET_HARD_FLOAT
7720       && decl && arm_is_long_call_p (decl))
7721     return false;
7722 
7723   /* If we are interworking and the function is not declared static
7724      then we can't tail-call it unless we know that it exists in this
7725      compilation unit (since it might be a Thumb routine).  */
7726   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7727       && !TREE_ASM_WRITTEN (decl))
7728     return false;
7729 
7730   func_type = arm_current_func_type ();
7731   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7732   if (IS_INTERRUPT (func_type))
7733     return false;
7734 
7735   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7736      generated for entry functions themselves.  */
7737   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7738     return false;
7739 
7740   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7741      this would complicate matters for later code generation.  */
7742   if (TREE_CODE (exp) == CALL_EXPR)
7743     {
7744       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7745       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7746 	return false;
7747     }
7748 
7749   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7750     {
7751       /* Check that the return value locations are the same.  For
7752 	 example that we aren't returning a value from the sibling in
7753 	 a VFP register but then need to transfer it to a core
7754 	 register.  */
7755       rtx a, b;
7756       tree decl_or_type = decl;
7757 
7758       /* If it is an indirect function pointer, get the function type.  */
7759       if (!decl)
7760 	decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7761 
7762       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7763       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7764 			      cfun->decl, false);
7765       if (!rtx_equal_p (a, b))
7766 	return false;
7767     }
7768 
7769   /* Never tailcall if function may be called with a misaligned SP.  */
7770   if (IS_STACKALIGN (func_type))
7771     return false;
7772 
7773   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7774      references should become a NOP.  Don't convert such calls into
7775      sibling calls.  */
7776   if (TARGET_AAPCS_BASED
7777       && arm_abi == ARM_ABI_AAPCS
7778       && decl
7779       && DECL_WEAK (decl))
7780     return false;
7781 
7782   /* We cannot do a tailcall for an indirect call by descriptor if all the
7783      argument registers are used because the only register left to load the
7784      address is IP and it will already contain the static chain.  */
7785   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7786     {
7787       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7788       CUMULATIVE_ARGS cum;
7789       cumulative_args_t cum_v;
7790 
7791       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7792       cum_v = pack_cumulative_args (&cum);
7793 
7794       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7795 	{
7796 	  tree type = TREE_VALUE (t);
7797 	  if (!VOID_TYPE_P (type))
7798 	    {
7799 	      function_arg_info arg (type, /*named=*/true);
7800 	      arm_function_arg_advance (cum_v, arg);
7801 	    }
7802 	}
7803 
7804       function_arg_info arg (integer_type_node, /*named=*/true);
7805       if (!arm_function_arg (cum_v, arg))
7806 	return false;
7807     }
7808 
7809   /* Everything else is ok.  */
7810   return true;
7811 }
7812 
7813 
7814 /* Addressing mode support functions.  */
7815 
7816 /* Return nonzero if X is a legitimate immediate operand when compiling
7817    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7818 int
legitimate_pic_operand_p(rtx x)7819 legitimate_pic_operand_p (rtx x)
7820 {
7821   if (GET_CODE (x) == SYMBOL_REF
7822       || (GET_CODE (x) == CONST
7823 	  && GET_CODE (XEXP (x, 0)) == PLUS
7824 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7825     return 0;
7826 
7827   return 1;
7828 }
7829 
7830 /* Record that the current function needs a PIC register.  If PIC_REG is null,
7831    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
7832    both case cfun->machine->pic_reg is initialized if we have not already done
7833    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
7834    PIC register is reloaded in the current position of the instruction stream
7835    irregardless of whether it was loaded before.  Otherwise, it is only loaded
7836    if not already done so (crtl->uses_pic_offset_table is null).  Note that
7837    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7838    is only supported iff COMPUTE_NOW is false.  */
7839 
7840 static void
require_pic_register(rtx pic_reg,bool compute_now)7841 require_pic_register (rtx pic_reg, bool compute_now)
7842 {
7843   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7844 
7845   /* A lot of the logic here is made obscure by the fact that this
7846      routine gets called as part of the rtx cost estimation process.
7847      We don't want those calls to affect any assumptions about the real
7848      function; and further, we can't call entry_of_function() until we
7849      start the real expansion process.  */
7850   if (!crtl->uses_pic_offset_table || compute_now)
7851     {
7852       gcc_assert (can_create_pseudo_p ()
7853 		  || (pic_reg != NULL_RTX
7854 		      && REG_P (pic_reg)
7855 		      && GET_MODE (pic_reg) == Pmode));
7856       if (arm_pic_register != INVALID_REGNUM
7857 	  && !compute_now
7858 	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7859 	{
7860 	  if (!cfun->machine->pic_reg)
7861 	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7862 
7863 	  /* Play games to avoid marking the function as needing pic
7864 	     if we are being called as part of the cost-estimation
7865 	     process.  */
7866 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7867 	    crtl->uses_pic_offset_table = 1;
7868 	}
7869       else
7870 	{
7871 	  rtx_insn *seq, *insn;
7872 
7873 	  if (pic_reg == NULL_RTX)
7874 	    pic_reg = gen_reg_rtx (Pmode);
7875 	  if (!cfun->machine->pic_reg)
7876 	    cfun->machine->pic_reg = pic_reg;
7877 
7878 	  /* Play games to avoid marking the function as needing pic
7879 	     if we are being called as part of the cost-estimation
7880 	     process.  */
7881 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7882 	    {
7883 	      crtl->uses_pic_offset_table = 1;
7884 	      start_sequence ();
7885 
7886 	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7887 		  && arm_pic_register > LAST_LO_REGNUM
7888 		  && !compute_now)
7889 		emit_move_insn (cfun->machine->pic_reg,
7890 				gen_rtx_REG (Pmode, arm_pic_register));
7891 	      else
7892 		arm_load_pic_register (0UL, pic_reg);
7893 
7894 	      seq = get_insns ();
7895 	      end_sequence ();
7896 
7897 	      for (insn = seq; insn; insn = NEXT_INSN (insn))
7898 		if (INSN_P (insn))
7899 		  INSN_LOCATION (insn) = prologue_location;
7900 
7901 	      /* We can be called during expansion of PHI nodes, where
7902 	         we can't yet emit instructions directly in the final
7903 		 insn stream.  Queue the insns on the entry edge, they will
7904 		 be committed after everything else is expanded.  */
7905 	      if (currently_expanding_to_rtl)
7906 		insert_insn_on_edge (seq,
7907 				     single_succ_edge
7908 				     (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7909 	      else
7910 		emit_insn (seq);
7911 	    }
7912 	}
7913     }
7914 }
7915 
7916 /* Generate insns to calculate the address of ORIG in pic mode.  */
7917 static rtx_insn *
calculate_pic_address_constant(rtx reg,rtx pic_reg,rtx orig)7918 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
7919 {
7920   rtx pat;
7921   rtx mem;
7922 
7923   pat = gen_calculate_pic_address (reg, pic_reg, orig);
7924 
7925   /* Make the MEM as close to a constant as possible.  */
7926   mem = SET_SRC (pat);
7927   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7928   MEM_READONLY_P (mem) = 1;
7929   MEM_NOTRAP_P (mem) = 1;
7930 
7931   return emit_insn (pat);
7932 }
7933 
7934 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
7935    created to hold the result of the load.  If not NULL, PIC_REG indicates
7936    which register to use as PIC register, otherwise it is decided by register
7937    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
7938    location in the instruction stream, irregardless of whether it was loaded
7939    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7940    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7941 
7942    Returns the register REG into which the PIC load is performed.  */
7943 
7944 rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg,rtx pic_reg,bool compute_now)7945 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7946 			bool compute_now)
7947 {
7948   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7949 
7950   if (GET_CODE (orig) == SYMBOL_REF
7951       || GET_CODE (orig) == LABEL_REF)
7952     {
7953       if (reg == 0)
7954 	{
7955 	  gcc_assert (can_create_pseudo_p ());
7956 	  reg = gen_reg_rtx (Pmode);
7957 	}
7958 
7959       /* VxWorks does not impose a fixed gap between segments; the run-time
7960 	 gap can be different from the object-file gap.  We therefore can't
7961 	 use GOTOFF unless we are absolutely sure that the symbol is in the
7962 	 same segment as the GOT.  Unfortunately, the flexibility of linker
7963 	 scripts means that we can't be sure of that in general, so assume
7964 	 that GOTOFF is never valid on VxWorks.  */
7965       /* References to weak symbols cannot be resolved locally: they
7966 	 may be overridden by a non-weak definition at link time.  */
7967       rtx_insn *insn;
7968       if ((GET_CODE (orig) == LABEL_REF
7969 	   || (GET_CODE (orig) == SYMBOL_REF
7970 	       && SYMBOL_REF_LOCAL_P (orig)
7971 	       && (SYMBOL_REF_DECL (orig)
7972 		   ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
7973 	       && (!SYMBOL_REF_FUNCTION_P (orig)
7974 		   || arm_fdpic_local_funcdesc_p (orig))))
7975 	  && NEED_GOT_RELOC
7976 	  && arm_pic_data_is_text_relative)
7977 	insn = arm_pic_static_addr (orig, reg);
7978       else
7979 	{
7980 	  /* If this function doesn't have a pic register, create one now.  */
7981 	  require_pic_register (pic_reg, compute_now);
7982 
7983 	  if (pic_reg == NULL_RTX)
7984 	    pic_reg = cfun->machine->pic_reg;
7985 
7986 	  insn = calculate_pic_address_constant (reg, pic_reg, orig);
7987 	}
7988 
7989       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7990 	 by loop.  */
7991       set_unique_reg_note (insn, REG_EQUAL, orig);
7992 
7993       return reg;
7994     }
7995   else if (GET_CODE (orig) == CONST)
7996     {
7997       rtx base, offset;
7998 
7999       if (GET_CODE (XEXP (orig, 0)) == PLUS
8000 	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8001 	return orig;
8002 
8003       /* Handle the case where we have: const (UNSPEC_TLS).  */
8004       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8005 	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8006 	return orig;
8007 
8008       /* Handle the case where we have:
8009          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8010          CONST_INT.  */
8011       if (GET_CODE (XEXP (orig, 0)) == PLUS
8012           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8013           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8014         {
8015 	  gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8016 	  return orig;
8017 	}
8018 
8019       if (reg == 0)
8020 	{
8021 	  gcc_assert (can_create_pseudo_p ());
8022 	  reg = gen_reg_rtx (Pmode);
8023 	}
8024 
8025       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8026 
8027       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8028 				     pic_reg, compute_now);
8029       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8030 				       base == reg ? 0 : reg, pic_reg,
8031 				       compute_now);
8032 
8033       if (CONST_INT_P (offset))
8034 	{
8035 	  /* The base register doesn't really matter, we only want to
8036 	     test the index for the appropriate mode.  */
8037 	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
8038 	    {
8039 	      gcc_assert (can_create_pseudo_p ());
8040 	      offset = force_reg (Pmode, offset);
8041 	    }
8042 
8043 	  if (CONST_INT_P (offset))
8044 	    return plus_constant (Pmode, base, INTVAL (offset));
8045 	}
8046 
8047       if (GET_MODE_SIZE (mode) > 4
8048 	  && (GET_MODE_CLASS (mode) == MODE_INT
8049 	      || TARGET_SOFT_FLOAT))
8050 	{
8051 	  emit_insn (gen_addsi3 (reg, base, offset));
8052 	  return reg;
8053 	}
8054 
8055       return gen_rtx_PLUS (Pmode, base, offset);
8056     }
8057 
8058   return orig;
8059 }
8060 
8061 
8062 /* Whether a register is callee saved or not.  This is necessary because high
8063    registers are marked as caller saved when optimizing for size on Thumb-1
8064    targets despite being callee saved in order to avoid using them.  */
8065 #define callee_saved_reg_p(reg) \
8066   (!call_used_or_fixed_reg_p (reg) \
8067    || (TARGET_THUMB1 && optimize_size \
8068        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8069 
8070 /* Return a mask for the call-clobbered low registers that are unused
8071    at the end of the prologue.  */
8072 static unsigned long
thumb1_prologue_unused_call_clobbered_lo_regs(void)8073 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8074 {
8075   unsigned long mask = 0;
8076   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8077 
8078   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8079     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8080       mask |= 1 << (reg - FIRST_LO_REGNUM);
8081   return mask;
8082 }
8083 
8084 /* Similarly for the start of the epilogue.  */
8085 static unsigned long
thumb1_epilogue_unused_call_clobbered_lo_regs(void)8086 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8087 {
8088   unsigned long mask = 0;
8089   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8090 
8091   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8092     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8093       mask |= 1 << (reg - FIRST_LO_REGNUM);
8094   return mask;
8095 }
8096 
8097 /* Find a spare register to use during the prolog of a function.  */
8098 
8099 static int
thumb_find_work_register(unsigned long pushed_regs_mask)8100 thumb_find_work_register (unsigned long pushed_regs_mask)
8101 {
8102   int reg;
8103 
8104   unsigned long unused_regs
8105     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8106 
8107   /* Check the argument registers first as these are call-used.  The
8108      register allocation order means that sometimes r3 might be used
8109      but earlier argument registers might not, so check them all.  */
8110   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8111     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8112       return reg;
8113 
8114   /* Otherwise look for a call-saved register that is going to be pushed.  */
8115   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8116     if (pushed_regs_mask & (1 << reg))
8117       return reg;
8118 
8119   if (TARGET_THUMB2)
8120     {
8121       /* Thumb-2 can use high regs.  */
8122       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8123 	if (pushed_regs_mask & (1 << reg))
8124 	  return reg;
8125     }
8126   /* Something went wrong - thumb_compute_save_reg_mask()
8127      should have arranged for a suitable register to be pushed.  */
8128   gcc_unreachable ();
8129 }
8130 
8131 static GTY(()) int pic_labelno;
8132 
8133 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8134    low register.  */
8135 
8136 void
arm_load_pic_register(unsigned long saved_regs ATTRIBUTE_UNUSED,rtx pic_reg)8137 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8138 {
8139   rtx l1, labelno, pic_tmp, pic_rtx;
8140 
8141   if (crtl->uses_pic_offset_table == 0
8142       || TARGET_SINGLE_PIC_BASE
8143       || TARGET_FDPIC)
8144     return;
8145 
8146   gcc_assert (flag_pic);
8147 
8148   if (pic_reg == NULL_RTX)
8149     pic_reg = cfun->machine->pic_reg;
8150   if (TARGET_VXWORKS_RTP)
8151     {
8152       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8153       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8154       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8155 
8156       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8157 
8158       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8159       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8160     }
8161   else
8162     {
8163       /* We use an UNSPEC rather than a LABEL_REF because this label
8164 	 never appears in the code stream.  */
8165 
8166       labelno = GEN_INT (pic_labelno++);
8167       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8168       l1 = gen_rtx_CONST (VOIDmode, l1);
8169 
8170       /* On the ARM the PC register contains 'dot + 8' at the time of the
8171 	 addition, on the Thumb it is 'dot + 4'.  */
8172       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8173       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8174 				UNSPEC_GOTSYM_OFF);
8175       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8176 
8177       if (TARGET_32BIT)
8178 	{
8179 	  emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8180 	}
8181       else /* TARGET_THUMB1 */
8182 	{
8183 	  if (arm_pic_register != INVALID_REGNUM
8184 	      && REGNO (pic_reg) > LAST_LO_REGNUM)
8185 	    {
8186 	      /* We will have pushed the pic register, so we should always be
8187 		 able to find a work register.  */
8188 	      pic_tmp = gen_rtx_REG (SImode,
8189 				     thumb_find_work_register (saved_regs));
8190 	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8191 	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8192 	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8193 	    }
8194 	  else if (arm_pic_register != INVALID_REGNUM
8195 		   && arm_pic_register > LAST_LO_REGNUM
8196 		   && REGNO (pic_reg) <= LAST_LO_REGNUM)
8197 	    {
8198 	      emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8199 	      emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8200 	      emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8201 	    }
8202 	  else
8203 	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8204 	}
8205     }
8206 
8207   /* Need to emit this whether or not we obey regdecls,
8208      since setjmp/longjmp can cause life info to screw up.  */
8209   emit_use (pic_reg);
8210 }
8211 
8212 /* Try to determine whether an object, referenced via ORIG, will be
8213    placed in the text or data segment.  This is used in FDPIC mode, to
8214    decide which relocations to use when accessing ORIG.  *IS_READONLY
8215    is set to true if ORIG is a read-only location, false otherwise.
8216    Return true if we could determine the location of ORIG, false
8217    otherwise.  *IS_READONLY is valid only when we return true.  */
8218 static bool
arm_is_segment_info_known(rtx orig,bool * is_readonly)8219 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8220 {
8221   *is_readonly = false;
8222 
8223   if (GET_CODE (orig) == LABEL_REF)
8224     {
8225       *is_readonly = true;
8226       return true;
8227     }
8228 
8229   if (SYMBOL_REF_P (orig))
8230     {
8231       if (CONSTANT_POOL_ADDRESS_P (orig))
8232 	{
8233 	  *is_readonly = true;
8234 	  return true;
8235 	}
8236       if (SYMBOL_REF_LOCAL_P (orig)
8237 	  && !SYMBOL_REF_EXTERNAL_P (orig)
8238 	  && SYMBOL_REF_DECL (orig)
8239 	  && (!DECL_P (SYMBOL_REF_DECL (orig))
8240 	      || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8241 	{
8242 	  tree decl = SYMBOL_REF_DECL (orig);
8243 	  tree init = (TREE_CODE (decl) == VAR_DECL)
8244 	    ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8245 	    ? decl : 0;
8246 	  int reloc = 0;
8247 	  bool named_section, readonly;
8248 
8249 	  if (init && init != error_mark_node)
8250 	    reloc = compute_reloc_for_constant (init);
8251 
8252 	  named_section = TREE_CODE (decl) == VAR_DECL
8253 	    && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8254 	  readonly = decl_readonly_section (decl, reloc);
8255 
8256 	  /* We don't know where the link script will put a named
8257 	     section, so return false in such a case.  */
8258 	  if (named_section)
8259 	    return false;
8260 
8261 	  *is_readonly = readonly;
8262 	  return true;
8263 	}
8264 
8265       /* We don't know.  */
8266       return false;
8267     }
8268 
8269   gcc_unreachable ();
8270 }
8271 
8272 /* Generate code to load the address of a static var when flag_pic is set.  */
8273 static rtx_insn *
arm_pic_static_addr(rtx orig,rtx reg)8274 arm_pic_static_addr (rtx orig, rtx reg)
8275 {
8276   rtx l1, labelno, offset_rtx;
8277   rtx_insn *insn;
8278 
8279   gcc_assert (flag_pic);
8280 
8281   bool is_readonly = false;
8282   bool info_known = false;
8283 
8284   if (TARGET_FDPIC
8285       && SYMBOL_REF_P (orig)
8286       && !SYMBOL_REF_FUNCTION_P (orig))
8287     info_known = arm_is_segment_info_known (orig, &is_readonly);
8288 
8289   if (TARGET_FDPIC
8290       && SYMBOL_REF_P (orig)
8291       && !SYMBOL_REF_FUNCTION_P (orig)
8292       && !info_known)
8293     {
8294       /* We don't know where orig is stored, so we have be
8295 	 pessimistic and use a GOT relocation.  */
8296       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8297 
8298       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8299     }
8300   else if (TARGET_FDPIC
8301 	   && SYMBOL_REF_P (orig)
8302 	   && (SYMBOL_REF_FUNCTION_P (orig)
8303 	       || !is_readonly))
8304     {
8305       /* We use the GOTOFF relocation.  */
8306       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8307 
8308       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8309       emit_insn (gen_movsi (reg, l1));
8310       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8311     }
8312   else
8313     {
8314       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8315 	 PC-relative access.  */
8316       /* We use an UNSPEC rather than a LABEL_REF because this label
8317 	 never appears in the code stream.  */
8318       labelno = GEN_INT (pic_labelno++);
8319       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8320       l1 = gen_rtx_CONST (VOIDmode, l1);
8321 
8322       /* On the ARM the PC register contains 'dot + 8' at the time of the
8323 	 addition, on the Thumb it is 'dot + 4'.  */
8324       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8325       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8326 				   UNSPEC_SYMBOL_OFFSET);
8327       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8328 
8329       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8330 						   labelno));
8331     }
8332 
8333   return insn;
8334 }
8335 
8336 /* Return nonzero if X is valid as an ARM state addressing register.  */
8337 static int
arm_address_register_rtx_p(rtx x,int strict_p)8338 arm_address_register_rtx_p (rtx x, int strict_p)
8339 {
8340   int regno;
8341 
8342   if (!REG_P (x))
8343     return 0;
8344 
8345   regno = REGNO (x);
8346 
8347   if (strict_p)
8348     return ARM_REGNO_OK_FOR_BASE_P (regno);
8349 
8350   return (regno <= LAST_ARM_REGNUM
8351 	  || regno >= FIRST_PSEUDO_REGISTER
8352 	  || regno == FRAME_POINTER_REGNUM
8353 	  || regno == ARG_POINTER_REGNUM);
8354 }
8355 
8356 /* Return TRUE if this rtx is the difference of a symbol and a label,
8357    and will reduce to a PC-relative relocation in the object file.
8358    Expressions like this can be left alone when generating PIC, rather
8359    than forced through the GOT.  */
8360 static int
pcrel_constant_p(rtx x)8361 pcrel_constant_p (rtx x)
8362 {
8363   if (GET_CODE (x) == MINUS)
8364     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8365 
8366   return FALSE;
8367 }
8368 
8369 /* Return true if X will surely end up in an index register after next
8370    splitting pass.  */
8371 static bool
will_be_in_index_register(const_rtx x)8372 will_be_in_index_register (const_rtx x)
8373 {
8374   /* arm.md: calculate_pic_address will split this into a register.  */
8375   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8376 }
8377 
8378 /* Return nonzero if X is a valid ARM state address operand.  */
8379 int
arm_legitimate_address_outer_p(machine_mode mode,rtx x,RTX_CODE outer,int strict_p)8380 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8381 			        int strict_p)
8382 {
8383   bool use_ldrd;
8384   enum rtx_code code = GET_CODE (x);
8385 
8386   if (arm_address_register_rtx_p (x, strict_p))
8387     return 1;
8388 
8389   use_ldrd = (TARGET_LDRD
8390 	      && (mode == DImode || mode == DFmode));
8391 
8392   if (code == POST_INC || code == PRE_DEC
8393       || ((code == PRE_INC || code == POST_DEC)
8394 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8395     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8396 
8397   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8398 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8399 	   && GET_CODE (XEXP (x, 1)) == PLUS
8400 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8401     {
8402       rtx addend = XEXP (XEXP (x, 1), 1);
8403 
8404       /* Don't allow ldrd post increment by register because it's hard
8405 	 to fixup invalid register choices.  */
8406       if (use_ldrd
8407 	  && GET_CODE (x) == POST_MODIFY
8408 	  && REG_P (addend))
8409 	return 0;
8410 
8411       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8412 	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
8413     }
8414 
8415   /* After reload constants split into minipools will have addresses
8416      from a LABEL_REF.  */
8417   else if (reload_completed
8418 	   && (code == LABEL_REF
8419 	       || (code == CONST
8420 		   && GET_CODE (XEXP (x, 0)) == PLUS
8421 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8422 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8423     return 1;
8424 
8425   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8426     return 0;
8427 
8428   else if (code == PLUS)
8429     {
8430       rtx xop0 = XEXP (x, 0);
8431       rtx xop1 = XEXP (x, 1);
8432 
8433       return ((arm_address_register_rtx_p (xop0, strict_p)
8434 	       && ((CONST_INT_P (xop1)
8435 		    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8436 		   || (!strict_p && will_be_in_index_register (xop1))))
8437 	      || (arm_address_register_rtx_p (xop1, strict_p)
8438 		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8439     }
8440 
8441 #if 0
8442   /* Reload currently can't handle MINUS, so disable this for now */
8443   else if (GET_CODE (x) == MINUS)
8444     {
8445       rtx xop0 = XEXP (x, 0);
8446       rtx xop1 = XEXP (x, 1);
8447 
8448       return (arm_address_register_rtx_p (xop0, strict_p)
8449 	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8450     }
8451 #endif
8452 
8453   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8454 	   && code == SYMBOL_REF
8455 	   && CONSTANT_POOL_ADDRESS_P (x)
8456 	   && ! (flag_pic
8457 		 && symbol_mentioned_p (get_pool_constant (x))
8458 		 && ! pcrel_constant_p (get_pool_constant (x))))
8459     return 1;
8460 
8461   return 0;
8462 }
8463 
8464 /* Return true if we can avoid creating a constant pool entry for x.  */
8465 static bool
can_avoid_literal_pool_for_label_p(rtx x)8466 can_avoid_literal_pool_for_label_p (rtx x)
8467 {
8468   /* Normally we can assign constant values to target registers without
8469      the help of constant pool.  But there are cases we have to use constant
8470      pool like:
8471      1) assign a label to register.
8472      2) sign-extend a 8bit value to 32bit and then assign to register.
8473 
8474      Constant pool access in format:
8475      (set (reg r0) (mem (symbol_ref (".LC0"))))
8476      will cause the use of literal pool (later in function arm_reorg).
8477      So here we mark such format as an invalid format, then the compiler
8478      will adjust it into:
8479      (set (reg r0) (symbol_ref (".LC0")))
8480      (set (reg r0) (mem (reg r0))).
8481      No extra register is required, and (mem (reg r0)) won't cause the use
8482      of literal pools.  */
8483   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
8484       && CONSTANT_POOL_ADDRESS_P (x))
8485     return 1;
8486   return 0;
8487 }
8488 
8489 
8490 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8491 static int
thumb2_legitimate_address_p(machine_mode mode,rtx x,int strict_p)8492 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8493 {
8494   bool use_ldrd;
8495   enum rtx_code code = GET_CODE (x);
8496 
8497   if (TARGET_HAVE_MVE
8498       && (mode == V8QImode || mode == E_V4QImode || mode == V4HImode))
8499     return mve_vector_mem_operand (mode, x, strict_p);
8500 
8501   if (arm_address_register_rtx_p (x, strict_p))
8502     return 1;
8503 
8504   use_ldrd = (TARGET_LDRD
8505 	      && (mode == DImode || mode == DFmode));
8506 
8507   if (code == POST_INC || code == PRE_DEC
8508       || ((code == PRE_INC || code == POST_DEC)
8509 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8510     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8511 
8512   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8513 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8514 	   && GET_CODE (XEXP (x, 1)) == PLUS
8515 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8516     {
8517       /* Thumb-2 only has autoincrement by constant.  */
8518       rtx addend = XEXP (XEXP (x, 1), 1);
8519       HOST_WIDE_INT offset;
8520 
8521       if (!CONST_INT_P (addend))
8522 	return 0;
8523 
8524       offset = INTVAL(addend);
8525       if (GET_MODE_SIZE (mode) <= 4)
8526 	return (offset > -256 && offset < 256);
8527 
8528       return (use_ldrd && offset > -1024 && offset < 1024
8529 	      && (offset & 3) == 0);
8530     }
8531 
8532   /* After reload constants split into minipools will have addresses
8533      from a LABEL_REF.  */
8534   else if (reload_completed
8535 	   && (code == LABEL_REF
8536 	       || (code == CONST
8537 		   && GET_CODE (XEXP (x, 0)) == PLUS
8538 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8539 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8540     return 1;
8541 
8542   else if (mode == TImode
8543 	   || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8544 	   || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8545     return 0;
8546 
8547   else if (code == PLUS)
8548     {
8549       rtx xop0 = XEXP (x, 0);
8550       rtx xop1 = XEXP (x, 1);
8551 
8552       return ((arm_address_register_rtx_p (xop0, strict_p)
8553 	       && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8554 		   || (!strict_p && will_be_in_index_register (xop1))))
8555 	      || (arm_address_register_rtx_p (xop1, strict_p)
8556 		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8557     }
8558 
8559   else if (can_avoid_literal_pool_for_label_p (x))
8560     return 0;
8561 
8562   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8563 	   && code == SYMBOL_REF
8564 	   && CONSTANT_POOL_ADDRESS_P (x)
8565 	   && ! (flag_pic
8566 		 && symbol_mentioned_p (get_pool_constant (x))
8567 		 && ! pcrel_constant_p (get_pool_constant (x))))
8568     return 1;
8569 
8570   return 0;
8571 }
8572 
8573 /* Return nonzero if INDEX is valid for an address index operand in
8574    ARM state.  */
8575 static int
arm_legitimate_index_p(machine_mode mode,rtx index,RTX_CODE outer,int strict_p)8576 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8577 			int strict_p)
8578 {
8579   HOST_WIDE_INT range;
8580   enum rtx_code code = GET_CODE (index);
8581 
8582   /* Standard coprocessor addressing modes.  */
8583   if (TARGET_HARD_FLOAT
8584       && (mode == SFmode || mode == DFmode))
8585     return (code == CONST_INT && INTVAL (index) < 1024
8586 	    && INTVAL (index) > -1024
8587 	    && (INTVAL (index) & 3) == 0);
8588 
8589   /* For quad modes, we restrict the constant offset to be slightly less
8590      than what the instruction format permits.  We do this because for
8591      quad mode moves, we will actually decompose them into two separate
8592      double-mode reads or writes.  INDEX must therefore be a valid
8593      (double-mode) offset and so should INDEX+8.  */
8594   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8595     return (code == CONST_INT
8596 	    && INTVAL (index) < 1016
8597 	    && INTVAL (index) > -1024
8598 	    && (INTVAL (index) & 3) == 0);
8599 
8600   /* We have no such constraint on double mode offsets, so we permit the
8601      full range of the instruction format.  */
8602   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8603     return (code == CONST_INT
8604 	    && INTVAL (index) < 1024
8605 	    && INTVAL (index) > -1024
8606 	    && (INTVAL (index) & 3) == 0);
8607 
8608   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8609     return (code == CONST_INT
8610 	    && INTVAL (index) < 1024
8611 	    && INTVAL (index) > -1024
8612 	    && (INTVAL (index) & 3) == 0);
8613 
8614   if (arm_address_register_rtx_p (index, strict_p)
8615       && (GET_MODE_SIZE (mode) <= 4))
8616     return 1;
8617 
8618   if (mode == DImode || mode == DFmode)
8619     {
8620       if (code == CONST_INT)
8621 	{
8622 	  HOST_WIDE_INT val = INTVAL (index);
8623 
8624 	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8625 	     If vldr is selected it uses arm_coproc_mem_operand.  */
8626 	  if (TARGET_LDRD)
8627 	    return val > -256 && val < 256;
8628 	  else
8629 	    return val > -4096 && val < 4092;
8630 	}
8631 
8632       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8633     }
8634 
8635   if (GET_MODE_SIZE (mode) <= 4
8636       && ! (arm_arch4
8637 	    && (mode == HImode
8638 		|| mode == HFmode
8639 		|| (mode == QImode && outer == SIGN_EXTEND))))
8640     {
8641       if (code == MULT)
8642 	{
8643 	  rtx xiop0 = XEXP (index, 0);
8644 	  rtx xiop1 = XEXP (index, 1);
8645 
8646 	  return ((arm_address_register_rtx_p (xiop0, strict_p)
8647 		   && power_of_two_operand (xiop1, SImode))
8648 		  || (arm_address_register_rtx_p (xiop1, strict_p)
8649 		      && power_of_two_operand (xiop0, SImode)));
8650 	}
8651       else if (code == LSHIFTRT || code == ASHIFTRT
8652 	       || code == ASHIFT || code == ROTATERT)
8653 	{
8654 	  rtx op = XEXP (index, 1);
8655 
8656 	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8657 		  && CONST_INT_P (op)
8658 		  && INTVAL (op) > 0
8659 		  && INTVAL (op) <= 31);
8660 	}
8661     }
8662 
8663   /* For ARM v4 we may be doing a sign-extend operation during the
8664      load.  */
8665   if (arm_arch4)
8666     {
8667       if (mode == HImode
8668 	  || mode == HFmode
8669 	  || (outer == SIGN_EXTEND && mode == QImode))
8670 	range = 256;
8671       else
8672 	range = 4096;
8673     }
8674   else
8675     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8676 
8677   return (code == CONST_INT
8678 	  && INTVAL (index) < range
8679 	  && INTVAL (index) > -range);
8680 }
8681 
8682 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8683    index operand.  i.e. 1, 2, 4 or 8.  */
8684 static bool
thumb2_index_mul_operand(rtx op)8685 thumb2_index_mul_operand (rtx op)
8686 {
8687   HOST_WIDE_INT val;
8688 
8689   if (!CONST_INT_P (op))
8690     return false;
8691 
8692   val = INTVAL(op);
8693   return (val == 1 || val == 2 || val == 4 || val == 8);
8694 }
8695 
8696 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8697 static int
thumb2_legitimate_index_p(machine_mode mode,rtx index,int strict_p)8698 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8699 {
8700   enum rtx_code code = GET_CODE (index);
8701 
8702   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8703   /* Standard coprocessor addressing modes.  */
8704   if (TARGET_VFP_BASE
8705       && (mode == SFmode || mode == DFmode))
8706     return (code == CONST_INT && INTVAL (index) < 1024
8707 	    /* Thumb-2 allows only > -256 index range for it's core register
8708 	       load/stores. Since we allow SF/DF in core registers, we have
8709 	       to use the intersection between -256~4096 (core) and -1024~1024
8710 	       (coprocessor).  */
8711 	    && INTVAL (index) > -256
8712 	    && (INTVAL (index) & 3) == 0);
8713 
8714   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8715     {
8716       /* For DImode assume values will usually live in core regs
8717 	 and only allow LDRD addressing modes.  */
8718       if (!TARGET_LDRD || mode != DImode)
8719 	return (code == CONST_INT
8720 		&& INTVAL (index) < 1024
8721 		&& INTVAL (index) > -1024
8722 		&& (INTVAL (index) & 3) == 0);
8723     }
8724 
8725   /* For quad modes, we restrict the constant offset to be slightly less
8726      than what the instruction format permits.  We do this because for
8727      quad mode moves, we will actually decompose them into two separate
8728      double-mode reads or writes.  INDEX must therefore be a valid
8729      (double-mode) offset and so should INDEX+8.  */
8730   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8731     return (code == CONST_INT
8732 	    && INTVAL (index) < 1016
8733 	    && INTVAL (index) > -1024
8734 	    && (INTVAL (index) & 3) == 0);
8735 
8736   /* We have no such constraint on double mode offsets, so we permit the
8737      full range of the instruction format.  */
8738   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8739     return (code == CONST_INT
8740 	    && INTVAL (index) < 1024
8741 	    && INTVAL (index) > -1024
8742 	    && (INTVAL (index) & 3) == 0);
8743 
8744   if (arm_address_register_rtx_p (index, strict_p)
8745       && (GET_MODE_SIZE (mode) <= 4))
8746     return 1;
8747 
8748   if (mode == DImode || mode == DFmode)
8749     {
8750       if (code == CONST_INT)
8751 	{
8752 	  HOST_WIDE_INT val = INTVAL (index);
8753 	  /* Thumb-2 ldrd only has reg+const addressing modes.
8754 	     Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8755 	     If vldr is selected it uses arm_coproc_mem_operand.  */
8756 	  if (TARGET_LDRD)
8757 	    return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8758 	  else
8759 	    return IN_RANGE (val, -255, 4095 - 4);
8760 	}
8761       else
8762 	return 0;
8763     }
8764 
8765   if (code == MULT)
8766     {
8767       rtx xiop0 = XEXP (index, 0);
8768       rtx xiop1 = XEXP (index, 1);
8769 
8770       return ((arm_address_register_rtx_p (xiop0, strict_p)
8771 	       && thumb2_index_mul_operand (xiop1))
8772 	      || (arm_address_register_rtx_p (xiop1, strict_p)
8773 		  && thumb2_index_mul_operand (xiop0)));
8774     }
8775   else if (code == ASHIFT)
8776     {
8777       rtx op = XEXP (index, 1);
8778 
8779       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8780 	      && CONST_INT_P (op)
8781 	      && INTVAL (op) > 0
8782 	      && INTVAL (op) <= 3);
8783     }
8784 
8785   return (code == CONST_INT
8786 	  && INTVAL (index) < 4096
8787 	  && INTVAL (index) > -256);
8788 }
8789 
8790 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8791 static int
thumb1_base_register_rtx_p(rtx x,machine_mode mode,int strict_p)8792 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8793 {
8794   int regno;
8795 
8796   if (!REG_P (x))
8797     return 0;
8798 
8799   regno = REGNO (x);
8800 
8801   if (strict_p)
8802     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8803 
8804   return (regno <= LAST_LO_REGNUM
8805 	  || regno > LAST_VIRTUAL_REGISTER
8806 	  || regno == FRAME_POINTER_REGNUM
8807 	  || (GET_MODE_SIZE (mode) >= 4
8808 	      && (regno == STACK_POINTER_REGNUM
8809 		  || regno >= FIRST_PSEUDO_REGISTER
8810 		  || x == hard_frame_pointer_rtx
8811 		  || x == arg_pointer_rtx)));
8812 }
8813 
8814 /* Return nonzero if x is a legitimate index register.  This is the case
8815    for any base register that can access a QImode object.  */
8816 inline static int
thumb1_index_register_rtx_p(rtx x,int strict_p)8817 thumb1_index_register_rtx_p (rtx x, int strict_p)
8818 {
8819   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8820 }
8821 
8822 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8823 
8824    The AP may be eliminated to either the SP or the FP, so we use the
8825    least common denominator, e.g. SImode, and offsets from 0 to 64.
8826 
8827    ??? Verify whether the above is the right approach.
8828 
8829    ??? Also, the FP may be eliminated to the SP, so perhaps that
8830    needs special handling also.
8831 
8832    ??? Look at how the mips16 port solves this problem.  It probably uses
8833    better ways to solve some of these problems.
8834 
8835    Although it is not incorrect, we don't accept QImode and HImode
8836    addresses based on the frame pointer or arg pointer until the
8837    reload pass starts.  This is so that eliminating such addresses
8838    into stack based ones won't produce impossible code.  */
8839 int
thumb1_legitimate_address_p(machine_mode mode,rtx x,int strict_p)8840 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8841 {
8842   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8843     return 0;
8844 
8845   /* ??? Not clear if this is right.  Experiment.  */
8846   if (GET_MODE_SIZE (mode) < 4
8847       && !(reload_in_progress || reload_completed)
8848       && (reg_mentioned_p (frame_pointer_rtx, x)
8849 	  || reg_mentioned_p (arg_pointer_rtx, x)
8850 	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
8851 	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8852 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8853 	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8854     return 0;
8855 
8856   /* Accept any base register.  SP only in SImode or larger.  */
8857   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8858     return 1;
8859 
8860   /* This is PC relative data before arm_reorg runs.  */
8861   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8862 	   && GET_CODE (x) == SYMBOL_REF
8863 	   && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
8864 	   && !arm_disable_literal_pool)
8865     return 1;
8866 
8867   /* This is PC relative data after arm_reorg runs.  */
8868   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8869 	   && reload_completed
8870 	   && (GET_CODE (x) == LABEL_REF
8871 	       || (GET_CODE (x) == CONST
8872 		   && GET_CODE (XEXP (x, 0)) == PLUS
8873 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8874 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8875     return 1;
8876 
8877   /* Post-inc indexing only supported for SImode and larger.  */
8878   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8879 	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8880     return 1;
8881 
8882   else if (GET_CODE (x) == PLUS)
8883     {
8884       /* REG+REG address can be any two index registers.  */
8885       /* We disallow FRAME+REG addressing since we know that FRAME
8886 	 will be replaced with STACK, and SP relative addressing only
8887 	 permits SP+OFFSET.  */
8888       if (GET_MODE_SIZE (mode) <= 4
8889 	  && XEXP (x, 0) != frame_pointer_rtx
8890 	  && XEXP (x, 1) != frame_pointer_rtx
8891 	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8892 	  && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8893 	      || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8894 	return 1;
8895 
8896       /* REG+const has 5-7 bit offset for non-SP registers.  */
8897       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8898 		|| XEXP (x, 0) == arg_pointer_rtx)
8899 	       && CONST_INT_P (XEXP (x, 1))
8900 	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8901 	return 1;
8902 
8903       /* REG+const has 10-bit offset for SP, but only SImode and
8904 	 larger is supported.  */
8905       /* ??? Should probably check for DI/DFmode overflow here
8906 	 just like GO_IF_LEGITIMATE_OFFSET does.  */
8907       else if (REG_P (XEXP (x, 0))
8908 	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8909 	       && GET_MODE_SIZE (mode) >= 4
8910 	       && CONST_INT_P (XEXP (x, 1))
8911 	       && INTVAL (XEXP (x, 1)) >= 0
8912 	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8913 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
8914 	return 1;
8915 
8916       else if (REG_P (XEXP (x, 0))
8917 	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8918 		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8919 		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8920 		       && REGNO (XEXP (x, 0))
8921 			  <= LAST_VIRTUAL_POINTER_REGISTER))
8922 	       && GET_MODE_SIZE (mode) >= 4
8923 	       && CONST_INT_P (XEXP (x, 1))
8924 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
8925 	return 1;
8926     }
8927 
8928   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8929 	   && GET_MODE_SIZE (mode) == 4
8930 	   && GET_CODE (x) == SYMBOL_REF
8931 	   && CONSTANT_POOL_ADDRESS_P (x)
8932 	   && !arm_disable_literal_pool
8933 	   && ! (flag_pic
8934 		 && symbol_mentioned_p (get_pool_constant (x))
8935 		 && ! pcrel_constant_p (get_pool_constant (x))))
8936     return 1;
8937 
8938   return 0;
8939 }
8940 
8941 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8942    instruction of mode MODE.  */
8943 int
thumb_legitimate_offset_p(machine_mode mode,HOST_WIDE_INT val)8944 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8945 {
8946   switch (GET_MODE_SIZE (mode))
8947     {
8948     case 1:
8949       return val >= 0 && val < 32;
8950 
8951     case 2:
8952       return val >= 0 && val < 64 && (val & 1) == 0;
8953 
8954     default:
8955       return (val >= 0
8956 	      && (val + GET_MODE_SIZE (mode)) <= 128
8957 	      && (val & 3) == 0);
8958     }
8959 }
8960 
8961 bool
arm_legitimate_address_p(machine_mode mode,rtx x,bool strict_p)8962 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8963 {
8964   if (TARGET_ARM)
8965     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8966   else if (TARGET_THUMB2)
8967     return thumb2_legitimate_address_p (mode, x, strict_p);
8968   else /* if (TARGET_THUMB1) */
8969     return thumb1_legitimate_address_p (mode, x, strict_p);
8970 }
8971 
8972 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8973 
8974    Given an rtx X being reloaded into a reg required to be
8975    in class CLASS, return the class of reg to actually use.
8976    In general this is just CLASS, but for the Thumb core registers and
8977    immediate constants we prefer a LO_REGS class or a subset.  */
8978 
8979 static reg_class_t
arm_preferred_reload_class(rtx x ATTRIBUTE_UNUSED,reg_class_t rclass)8980 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8981 {
8982   if (TARGET_32BIT)
8983     return rclass;
8984   else
8985     {
8986       if (rclass == GENERAL_REGS)
8987 	return LO_REGS;
8988       else
8989 	return rclass;
8990     }
8991 }
8992 
8993 /* Build the SYMBOL_REF for __tls_get_addr.  */
8994 
8995 static GTY(()) rtx tls_get_addr_libfunc;
8996 
8997 static rtx
get_tls_get_addr(void)8998 get_tls_get_addr (void)
8999 {
9000   if (!tls_get_addr_libfunc)
9001     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9002   return tls_get_addr_libfunc;
9003 }
9004 
9005 rtx
arm_load_tp(rtx target)9006 arm_load_tp (rtx target)
9007 {
9008   if (!target)
9009     target = gen_reg_rtx (SImode);
9010 
9011   if (TARGET_HARD_TP)
9012     {
9013       /* Can return in any reg.  */
9014       emit_insn (gen_load_tp_hard (target));
9015     }
9016   else
9017     {
9018       /* Always returned in r0.  Immediately copy the result into a pseudo,
9019 	 otherwise other uses of r0 (e.g. setting up function arguments) may
9020 	 clobber the value.  */
9021 
9022       rtx tmp;
9023 
9024       if (TARGET_FDPIC)
9025 	{
9026 	  rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9027 	  rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9028 
9029 	  emit_insn (gen_load_tp_soft_fdpic ());
9030 
9031 	  /* Restore r9.  */
9032 	  emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9033 	}
9034       else
9035 	emit_insn (gen_load_tp_soft ());
9036 
9037       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9038       emit_move_insn (target, tmp);
9039     }
9040   return target;
9041 }
9042 
9043 static rtx
load_tls_operand(rtx x,rtx reg)9044 load_tls_operand (rtx x, rtx reg)
9045 {
9046   rtx tmp;
9047 
9048   if (reg == NULL_RTX)
9049     reg = gen_reg_rtx (SImode);
9050 
9051   tmp = gen_rtx_CONST (SImode, x);
9052 
9053   emit_move_insn (reg, tmp);
9054 
9055   return reg;
9056 }
9057 
9058 static rtx_insn *
arm_call_tls_get_addr(rtx x,rtx reg,rtx * valuep,int reloc)9059 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9060 {
9061   rtx label, labelno = NULL_RTX, sum;
9062 
9063   gcc_assert (reloc != TLS_DESCSEQ);
9064   start_sequence ();
9065 
9066   if (TARGET_FDPIC)
9067     {
9068       sum = gen_rtx_UNSPEC (Pmode,
9069 			    gen_rtvec (2, x, GEN_INT (reloc)),
9070 			    UNSPEC_TLS);
9071     }
9072   else
9073     {
9074       labelno = GEN_INT (pic_labelno++);
9075       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9076       label = gen_rtx_CONST (VOIDmode, label);
9077 
9078       sum = gen_rtx_UNSPEC (Pmode,
9079 			    gen_rtvec (4, x, GEN_INT (reloc), label,
9080 				       GEN_INT (TARGET_ARM ? 8 : 4)),
9081 			    UNSPEC_TLS);
9082     }
9083   reg = load_tls_operand (sum, reg);
9084 
9085   if (TARGET_FDPIC)
9086       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9087   else if (TARGET_ARM)
9088     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9089   else
9090     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9091 
9092   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9093 				     LCT_PURE, /* LCT_CONST?  */
9094 				     Pmode, reg, Pmode);
9095 
9096   rtx_insn *insns = get_insns ();
9097   end_sequence ();
9098 
9099   return insns;
9100 }
9101 
9102 static rtx
arm_tls_descseq_addr(rtx x,rtx reg)9103 arm_tls_descseq_addr (rtx x, rtx reg)
9104 {
9105   rtx labelno = GEN_INT (pic_labelno++);
9106   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9107   rtx sum = gen_rtx_UNSPEC (Pmode,
9108 			    gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9109 				       gen_rtx_CONST (VOIDmode, label),
9110 				       GEN_INT (!TARGET_ARM)),
9111 			    UNSPEC_TLS);
9112   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9113 
9114   emit_insn (gen_tlscall (x, labelno));
9115   if (!reg)
9116     reg = gen_reg_rtx (SImode);
9117   else
9118     gcc_assert (REGNO (reg) != R0_REGNUM);
9119 
9120   emit_move_insn (reg, reg0);
9121 
9122   return reg;
9123 }
9124 
9125 
9126 rtx
legitimize_tls_address(rtx x,rtx reg)9127 legitimize_tls_address (rtx x, rtx reg)
9128 {
9129   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9130   rtx_insn *insns;
9131   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9132 
9133   switch (model)
9134     {
9135     case TLS_MODEL_GLOBAL_DYNAMIC:
9136       if (TARGET_GNU2_TLS)
9137 	{
9138 	  gcc_assert (!TARGET_FDPIC);
9139 
9140 	  reg = arm_tls_descseq_addr (x, reg);
9141 
9142 	  tp = arm_load_tp (NULL_RTX);
9143 
9144 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
9145 	}
9146       else
9147 	{
9148 	  /* Original scheme */
9149 	  if (TARGET_FDPIC)
9150 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9151 	  else
9152 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9153 	  dest = gen_reg_rtx (Pmode);
9154 	  emit_libcall_block (insns, dest, ret, x);
9155 	}
9156       return dest;
9157 
9158     case TLS_MODEL_LOCAL_DYNAMIC:
9159       if (TARGET_GNU2_TLS)
9160 	{
9161 	  gcc_assert (!TARGET_FDPIC);
9162 
9163 	  reg = arm_tls_descseq_addr (x, reg);
9164 
9165 	  tp = arm_load_tp (NULL_RTX);
9166 
9167 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
9168 	}
9169       else
9170 	{
9171 	  if (TARGET_FDPIC)
9172 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9173 	  else
9174 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9175 
9176 	  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9177 	     share the LDM result with other LD model accesses.  */
9178 	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9179 				UNSPEC_TLS);
9180 	  dest = gen_reg_rtx (Pmode);
9181 	  emit_libcall_block (insns, dest, ret, eqv);
9182 
9183 	  /* Load the addend.  */
9184 	  addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9185 						     GEN_INT (TLS_LDO32)),
9186 				   UNSPEC_TLS);
9187 	  addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9188 	  dest = gen_rtx_PLUS (Pmode, dest, addend);
9189 	}
9190       return dest;
9191 
9192     case TLS_MODEL_INITIAL_EXEC:
9193       if (TARGET_FDPIC)
9194 	{
9195 	  sum = gen_rtx_UNSPEC (Pmode,
9196 				gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9197 				UNSPEC_TLS);
9198 	  reg = load_tls_operand (sum, reg);
9199 	  emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9200 	  emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9201 	}
9202       else
9203 	{
9204 	  labelno = GEN_INT (pic_labelno++);
9205 	  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9206 	  label = gen_rtx_CONST (VOIDmode, label);
9207 	  sum = gen_rtx_UNSPEC (Pmode,
9208 				gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9209 					   GEN_INT (TARGET_ARM ? 8 : 4)),
9210 				UNSPEC_TLS);
9211 	  reg = load_tls_operand (sum, reg);
9212 
9213 	  if (TARGET_ARM)
9214 	    emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9215 	  else if (TARGET_THUMB2)
9216 	    emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9217 	  else
9218 	    {
9219 	      emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9220 	      emit_move_insn (reg, gen_const_mem (SImode, reg));
9221 	    }
9222 	}
9223 
9224       tp = arm_load_tp (NULL_RTX);
9225 
9226       return gen_rtx_PLUS (Pmode, tp, reg);
9227 
9228     case TLS_MODEL_LOCAL_EXEC:
9229       tp = arm_load_tp (NULL_RTX);
9230 
9231       reg = gen_rtx_UNSPEC (Pmode,
9232 			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9233 			    UNSPEC_TLS);
9234       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9235 
9236       return gen_rtx_PLUS (Pmode, tp, reg);
9237 
9238     default:
9239       abort ();
9240     }
9241 }
9242 
9243 /* Try machine-dependent ways of modifying an illegitimate address
9244    to be legitimate.  If we find one, return the new, valid address.  */
9245 rtx
arm_legitimize_address(rtx x,rtx orig_x,machine_mode mode)9246 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9247 {
9248   if (arm_tls_referenced_p (x))
9249     {
9250       rtx addend = NULL;
9251 
9252       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9253 	{
9254 	  addend = XEXP (XEXP (x, 0), 1);
9255 	  x = XEXP (XEXP (x, 0), 0);
9256 	}
9257 
9258       if (GET_CODE (x) != SYMBOL_REF)
9259 	return x;
9260 
9261       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9262 
9263       x = legitimize_tls_address (x, NULL_RTX);
9264 
9265       if (addend)
9266 	{
9267 	  x = gen_rtx_PLUS (SImode, x, addend);
9268 	  orig_x = x;
9269 	}
9270       else
9271 	return x;
9272     }
9273 
9274   if (TARGET_THUMB1)
9275     return thumb_legitimize_address (x, orig_x, mode);
9276 
9277   if (GET_CODE (x) == PLUS)
9278     {
9279       rtx xop0 = XEXP (x, 0);
9280       rtx xop1 = XEXP (x, 1);
9281 
9282       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9283 	xop0 = force_reg (SImode, xop0);
9284 
9285       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9286 	  && !symbol_mentioned_p (xop1))
9287 	xop1 = force_reg (SImode, xop1);
9288 
9289       if (ARM_BASE_REGISTER_RTX_P (xop0)
9290 	  && CONST_INT_P (xop1))
9291 	{
9292 	  HOST_WIDE_INT n, low_n;
9293 	  rtx base_reg, val;
9294 	  n = INTVAL (xop1);
9295 
9296 	  /* VFP addressing modes actually allow greater offsets, but for
9297 	     now we just stick with the lowest common denominator.  */
9298 	  if (mode == DImode || mode == DFmode)
9299 	    {
9300 	      low_n = n & 0x0f;
9301 	      n &= ~0x0f;
9302 	      if (low_n > 4)
9303 		{
9304 		  n += 16;
9305 		  low_n -= 16;
9306 		}
9307 	    }
9308 	  else
9309 	    {
9310 	      low_n = ((mode) == TImode ? 0
9311 		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9312 	      n -= low_n;
9313 	    }
9314 
9315 	  base_reg = gen_reg_rtx (SImode);
9316 	  val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9317 	  emit_move_insn (base_reg, val);
9318 	  x = plus_constant (Pmode, base_reg, low_n);
9319 	}
9320       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9321 	x = gen_rtx_PLUS (SImode, xop0, xop1);
9322     }
9323 
9324   /* XXX We don't allow MINUS any more -- see comment in
9325      arm_legitimate_address_outer_p ().  */
9326   else if (GET_CODE (x) == MINUS)
9327     {
9328       rtx xop0 = XEXP (x, 0);
9329       rtx xop1 = XEXP (x, 1);
9330 
9331       if (CONSTANT_P (xop0))
9332 	xop0 = force_reg (SImode, xop0);
9333 
9334       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9335 	xop1 = force_reg (SImode, xop1);
9336 
9337       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9338 	x = gen_rtx_MINUS (SImode, xop0, xop1);
9339     }
9340 
9341   /* Make sure to take full advantage of the pre-indexed addressing mode
9342      with absolute addresses which often allows for the base register to
9343      be factorized for multiple adjacent memory references, and it might
9344      even allows for the mini pool to be avoided entirely. */
9345   else if (CONST_INT_P (x) && optimize > 0)
9346     {
9347       unsigned int bits;
9348       HOST_WIDE_INT mask, base, index;
9349       rtx base_reg;
9350 
9351       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9352 	 only use a 8-bit index. So let's use a 12-bit index for
9353 	 SImode only and hope that arm_gen_constant will enable LDRB
9354 	 to use more bits. */
9355       bits = (mode == SImode) ? 12 : 8;
9356       mask = (1 << bits) - 1;
9357       base = INTVAL (x) & ~mask;
9358       index = INTVAL (x) & mask;
9359       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9360 	{
9361 	  /* It'll most probably be more efficient to generate the
9362 	     base with more bits set and use a negative index instead.
9363 	     Don't do this for Thumb as negative offsets are much more
9364 	     limited.  */
9365 	  base |= mask;
9366 	  index -= mask;
9367 	}
9368       base_reg = force_reg (SImode, GEN_INT (base));
9369       x = plus_constant (Pmode, base_reg, index);
9370     }
9371 
9372   if (flag_pic)
9373     {
9374       /* We need to find and carefully transform any SYMBOL and LABEL
9375 	 references; so go back to the original address expression.  */
9376       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9377 					  false /*compute_now*/);
9378 
9379       if (new_x != orig_x)
9380 	x = new_x;
9381     }
9382 
9383   return x;
9384 }
9385 
9386 
9387 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9388    to be legitimate.  If we find one, return the new, valid address.  */
9389 rtx
thumb_legitimize_address(rtx x,rtx orig_x,machine_mode mode)9390 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9391 {
9392   if (GET_CODE (x) == PLUS
9393       && CONST_INT_P (XEXP (x, 1))
9394       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9395 	  || INTVAL (XEXP (x, 1)) < 0))
9396     {
9397       rtx xop0 = XEXP (x, 0);
9398       rtx xop1 = XEXP (x, 1);
9399       HOST_WIDE_INT offset = INTVAL (xop1);
9400 
9401       /* Try and fold the offset into a biasing of the base register and
9402 	 then offsetting that.  Don't do this when optimizing for space
9403 	 since it can cause too many CSEs.  */
9404       if (optimize_size && offset >= 0
9405 	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
9406 	{
9407 	  HOST_WIDE_INT delta;
9408 
9409 	  if (offset >= 256)
9410 	    delta = offset - (256 - GET_MODE_SIZE (mode));
9411 	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9412 	    delta = 31 * GET_MODE_SIZE (mode);
9413 	  else
9414 	    delta = offset & (~31 * GET_MODE_SIZE (mode));
9415 
9416 	  xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9417 				NULL_RTX);
9418 	  x = plus_constant (Pmode, xop0, delta);
9419 	}
9420       else if (offset < 0 && offset > -256)
9421 	/* Small negative offsets are best done with a subtract before the
9422 	   dereference, forcing these into a register normally takes two
9423 	   instructions.  */
9424 	x = force_operand (x, NULL_RTX);
9425       else
9426 	{
9427 	  /* For the remaining cases, force the constant into a register.  */
9428 	  xop1 = force_reg (SImode, xop1);
9429 	  x = gen_rtx_PLUS (SImode, xop0, xop1);
9430 	}
9431     }
9432   else if (GET_CODE (x) == PLUS
9433 	   && s_register_operand (XEXP (x, 1), SImode)
9434 	   && !s_register_operand (XEXP (x, 0), SImode))
9435     {
9436       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9437 
9438       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9439     }
9440 
9441   if (flag_pic)
9442     {
9443       /* We need to find and carefully transform any SYMBOL and LABEL
9444 	 references; so go back to the original address expression.  */
9445       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9446 					  false /*compute_now*/);
9447 
9448       if (new_x != orig_x)
9449 	x = new_x;
9450     }
9451 
9452   return x;
9453 }
9454 
9455 /* Return TRUE if X contains any TLS symbol references.  */
9456 
9457 bool
arm_tls_referenced_p(rtx x)9458 arm_tls_referenced_p (rtx x)
9459 {
9460   if (! TARGET_HAVE_TLS)
9461     return false;
9462 
9463   subrtx_iterator::array_type array;
9464   FOR_EACH_SUBRTX (iter, array, x, ALL)
9465     {
9466       const_rtx x = *iter;
9467       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
9468 	{
9469 	  /* ARM currently does not provide relocations to encode TLS variables
9470 	     into AArch32 instructions, only data, so there is no way to
9471 	     currently implement these if a literal pool is disabled.  */
9472 	  if (arm_disable_literal_pool)
9473 	    sorry ("accessing thread-local storage is not currently supported "
9474 		   "with %<-mpure-code%> or %<-mslow-flash-data%>");
9475 
9476 	  return true;
9477 	}
9478 
9479       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9480 	 TLS offsets, not real symbol references.  */
9481       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9482 	iter.skip_subrtxes ();
9483     }
9484   return false;
9485 }
9486 
9487 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9488 
9489    On the ARM, allow any integer (invalid ones are removed later by insn
9490    patterns), nice doubles and symbol_refs which refer to the function's
9491    constant pool XXX.
9492 
9493    When generating pic allow anything.  */
9494 
9495 static bool
arm_legitimate_constant_p_1(machine_mode,rtx x)9496 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9497 {
9498   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9499     return false;
9500 
9501   return flag_pic || !label_mentioned_p (x);
9502 }
9503 
9504 static bool
thumb_legitimate_constant_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x)9505 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9506 {
9507   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9508      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9509      for ARMv8-M Baseline or later the result is valid.  */
9510   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9511     x = XEXP (x, 0);
9512 
9513   return (CONST_INT_P (x)
9514 	  || CONST_DOUBLE_P (x)
9515 	  || CONSTANT_ADDRESS_P (x)
9516 	  || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
9517 	  || flag_pic);
9518 }
9519 
9520 static bool
arm_legitimate_constant_p(machine_mode mode,rtx x)9521 arm_legitimate_constant_p (machine_mode mode, rtx x)
9522 {
9523   return (!arm_cannot_force_const_mem (mode, x)
9524 	  && (TARGET_32BIT
9525 	      ? arm_legitimate_constant_p_1 (mode, x)
9526 	      : thumb_legitimate_constant_p (mode, x)));
9527 }
9528 
9529 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9530 
9531 static bool
arm_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)9532 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9533 {
9534   rtx base, offset;
9535   split_const (x, &base, &offset);
9536 
9537   if (SYMBOL_REF_P (base))
9538     {
9539       /* Function symbols cannot have an offset due to the Thumb bit.  */
9540       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9541 	  && INTVAL (offset) != 0)
9542 	return true;
9543 
9544       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9545 	  && !offset_within_block_p (base, INTVAL (offset)))
9546 	return true;
9547     }
9548   return arm_tls_referenced_p (x);
9549 }
9550 
9551 #define REG_OR_SUBREG_REG(X)						\
9552   (REG_P (X)							\
9553    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9554 
9555 #define REG_OR_SUBREG_RTX(X)			\
9556    (REG_P (X) ? (X) : SUBREG_REG (X))
9557 
9558 static inline int
thumb1_rtx_costs(rtx x,enum rtx_code code,enum rtx_code outer)9559 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9560 {
9561   machine_mode mode = GET_MODE (x);
9562   int total, words;
9563 
9564   switch (code)
9565     {
9566     case ASHIFT:
9567     case ASHIFTRT:
9568     case LSHIFTRT:
9569     case ROTATERT:
9570       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9571 
9572     case PLUS:
9573     case MINUS:
9574     case COMPARE:
9575     case NEG:
9576     case NOT:
9577       return COSTS_N_INSNS (1);
9578 
9579     case MULT:
9580       if (arm_arch6m && arm_m_profile_small_mul)
9581 	return COSTS_N_INSNS (32);
9582 
9583       if (CONST_INT_P (XEXP (x, 1)))
9584 	{
9585 	  int cycles = 0;
9586 	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9587 
9588 	  while (i)
9589 	    {
9590 	      i >>= 2;
9591 	      cycles++;
9592 	    }
9593 	  return COSTS_N_INSNS (2) + cycles;
9594 	}
9595       return COSTS_N_INSNS (1) + 16;
9596 
9597     case SET:
9598       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9599 	 the mode.  */
9600       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9601       return (COSTS_N_INSNS (words)
9602 	      + 4 * ((MEM_P (SET_SRC (x)))
9603 		     + MEM_P (SET_DEST (x))));
9604 
9605     case CONST_INT:
9606       if (outer == SET)
9607 	{
9608 	  if (UINTVAL (x) < 256
9609 	      /* 16-bit constant.  */
9610 	      || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9611 	    return 0;
9612 	  if (thumb_shiftable_const (INTVAL (x)))
9613 	    return COSTS_N_INSNS (2);
9614 	  return arm_disable_literal_pool
9615 	    ? COSTS_N_INSNS (8)
9616 	    : COSTS_N_INSNS (3);
9617 	}
9618       else if ((outer == PLUS || outer == COMPARE)
9619 	       && INTVAL (x) < 256 && INTVAL (x) > -256)
9620 	return 0;
9621       else if ((outer == IOR || outer == XOR || outer == AND)
9622 	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
9623 	return COSTS_N_INSNS (1);
9624       else if (outer == AND)
9625 	{
9626 	  int i;
9627 	  /* This duplicates the tests in the andsi3 expander.  */
9628 	  for (i = 9; i <= 31; i++)
9629 	    if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9630 		|| (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9631 	      return COSTS_N_INSNS (2);
9632 	}
9633       else if (outer == ASHIFT || outer == ASHIFTRT
9634 	       || outer == LSHIFTRT)
9635 	return 0;
9636       return COSTS_N_INSNS (2);
9637 
9638     case CONST:
9639     case CONST_DOUBLE:
9640     case LABEL_REF:
9641     case SYMBOL_REF:
9642       return COSTS_N_INSNS (3);
9643 
9644     case UDIV:
9645     case UMOD:
9646     case DIV:
9647     case MOD:
9648       return 100;
9649 
9650     case TRUNCATE:
9651       return 99;
9652 
9653     case AND:
9654     case XOR:
9655     case IOR:
9656       /* XXX guess.  */
9657       return 8;
9658 
9659     case MEM:
9660       /* XXX another guess.  */
9661       /* Memory costs quite a lot for the first word, but subsequent words
9662 	 load at the equivalent of a single insn each.  */
9663       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9664 	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9665 		 ? 4 : 0));
9666 
9667     case IF_THEN_ELSE:
9668       /* XXX a guess.  */
9669       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9670 	return 14;
9671       return 2;
9672 
9673     case SIGN_EXTEND:
9674     case ZERO_EXTEND:
9675       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9676       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9677 
9678       if (mode == SImode)
9679 	return total;
9680 
9681       if (arm_arch6)
9682 	return total + COSTS_N_INSNS (1);
9683 
9684       /* Assume a two-shift sequence.  Increase the cost slightly so
9685 	 we prefer actual shifts over an extend operation.  */
9686       return total + 1 + COSTS_N_INSNS (2);
9687 
9688     default:
9689       return 99;
9690     }
9691 }
9692 
9693 /* Estimates the size cost of thumb1 instructions.
9694    For now most of the code is copied from thumb1_rtx_costs. We need more
9695    fine grain tuning when we have more related test cases.  */
9696 static inline int
thumb1_size_rtx_costs(rtx x,enum rtx_code code,enum rtx_code outer)9697 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9698 {
9699   machine_mode mode = GET_MODE (x);
9700   int words, cost;
9701 
9702   switch (code)
9703     {
9704     case ASHIFT:
9705     case ASHIFTRT:
9706     case LSHIFTRT:
9707     case ROTATERT:
9708       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9709 
9710     case PLUS:
9711     case MINUS:
9712       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9713 	 defined by RTL expansion, especially for the expansion of
9714 	 multiplication.  */
9715       if ((GET_CODE (XEXP (x, 0)) == MULT
9716 	   && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9717 	  || (GET_CODE (XEXP (x, 1)) == MULT
9718 	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9719 	return COSTS_N_INSNS (2);
9720       /* Fall through.  */
9721     case COMPARE:
9722     case NEG:
9723     case NOT:
9724       return COSTS_N_INSNS (1);
9725 
9726     case MULT:
9727       if (CONST_INT_P (XEXP (x, 1)))
9728         {
9729           /* Thumb1 mul instruction can't operate on const. We must Load it
9730              into a register first.  */
9731           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9732 	  /* For the targets which have a very small and high-latency multiply
9733 	     unit, we prefer to synthesize the mult with up to 5 instructions,
9734 	     giving a good balance between size and performance.  */
9735 	  if (arm_arch6m && arm_m_profile_small_mul)
9736 	    return COSTS_N_INSNS (5);
9737 	  else
9738 	    return COSTS_N_INSNS (1) + const_size;
9739         }
9740       return COSTS_N_INSNS (1);
9741 
9742     case SET:
9743       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9744 	 the mode.  */
9745       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9746       cost = COSTS_N_INSNS (words);
9747       if (satisfies_constraint_J (SET_SRC (x))
9748 	  || satisfies_constraint_K (SET_SRC (x))
9749 	     /* Too big an immediate for a 2-byte mov, using MOVT.  */
9750 	  || (CONST_INT_P (SET_SRC (x))
9751 	      && UINTVAL (SET_SRC (x)) >= 256
9752 	      && TARGET_HAVE_MOVT
9753 	      && satisfies_constraint_j (SET_SRC (x)))
9754 	     /* thumb1_movdi_insn.  */
9755 	  || ((words > 1) && MEM_P (SET_SRC (x))))
9756 	cost += COSTS_N_INSNS (1);
9757       return cost;
9758 
9759     case CONST_INT:
9760       if (outer == SET)
9761         {
9762           if (UINTVAL (x) < 256)
9763             return COSTS_N_INSNS (1);
9764 	  /* movw is 4byte long.  */
9765 	  if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9766 	    return COSTS_N_INSNS (2);
9767 	  /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9768 	  if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9769             return COSTS_N_INSNS (2);
9770 	  /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9771           if (thumb_shiftable_const (INTVAL (x)))
9772             return COSTS_N_INSNS (2);
9773 	  return arm_disable_literal_pool
9774 	    ? COSTS_N_INSNS (8)
9775 	    : COSTS_N_INSNS (3);
9776         }
9777       else if ((outer == PLUS || outer == COMPARE)
9778                && INTVAL (x) < 256 && INTVAL (x) > -256)
9779         return 0;
9780       else if ((outer == IOR || outer == XOR || outer == AND)
9781                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9782         return COSTS_N_INSNS (1);
9783       else if (outer == AND)
9784         {
9785           int i;
9786           /* This duplicates the tests in the andsi3 expander.  */
9787           for (i = 9; i <= 31; i++)
9788             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9789                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9790               return COSTS_N_INSNS (2);
9791         }
9792       else if (outer == ASHIFT || outer == ASHIFTRT
9793                || outer == LSHIFTRT)
9794         return 0;
9795       return COSTS_N_INSNS (2);
9796 
9797     case CONST:
9798     case CONST_DOUBLE:
9799     case LABEL_REF:
9800     case SYMBOL_REF:
9801       return COSTS_N_INSNS (3);
9802 
9803     case UDIV:
9804     case UMOD:
9805     case DIV:
9806     case MOD:
9807       return 100;
9808 
9809     case TRUNCATE:
9810       return 99;
9811 
9812     case AND:
9813     case XOR:
9814     case IOR:
9815       return COSTS_N_INSNS (1);
9816 
9817     case MEM:
9818       return (COSTS_N_INSNS (1)
9819 	      + COSTS_N_INSNS (1)
9820 		* ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9821               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9822                  ? COSTS_N_INSNS (1) : 0));
9823 
9824     case IF_THEN_ELSE:
9825       /* XXX a guess.  */
9826       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9827         return 14;
9828       return 2;
9829 
9830     case ZERO_EXTEND:
9831       /* XXX still guessing.  */
9832       switch (GET_MODE (XEXP (x, 0)))
9833         {
9834           case E_QImode:
9835             return (1 + (mode == DImode ? 4 : 0)
9836                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9837 
9838           case E_HImode:
9839             return (4 + (mode == DImode ? 4 : 0)
9840                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9841 
9842           case E_SImode:
9843             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9844 
9845           default:
9846             return 99;
9847         }
9848 
9849     default:
9850       return 99;
9851     }
9852 }
9853 
9854 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
9855    PLUS, adds the carry flag, then return the other operand.  If
9856    neither is a carry, return OP unchanged.  */
9857 static rtx
strip_carry_operation(rtx op)9858 strip_carry_operation (rtx op)
9859 {
9860   gcc_assert (GET_CODE (op) == PLUS);
9861   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
9862     return XEXP (op, 1);
9863   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
9864     return XEXP (op, 0);
9865   return op;
9866 }
9867 
9868 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9869    operand, then return the operand that is being shifted.  If the shift
9870    is not by a constant, then set SHIFT_REG to point to the operand.
9871    Return NULL if OP is not a shifter operand.  */
9872 static rtx
shifter_op_p(rtx op,rtx * shift_reg)9873 shifter_op_p (rtx op, rtx *shift_reg)
9874 {
9875   enum rtx_code code = GET_CODE (op);
9876 
9877   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9878       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9879     return XEXP (op, 0);
9880   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9881     return XEXP (op, 0);
9882   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9883 	   || code == ASHIFTRT)
9884     {
9885       if (!CONST_INT_P (XEXP (op, 1)))
9886 	*shift_reg = XEXP (op, 1);
9887       return XEXP (op, 0);
9888     }
9889 
9890   return NULL;
9891 }
9892 
9893 static bool
arm_unspec_cost(rtx x,enum rtx_code,bool speed_p,int * cost)9894 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9895 {
9896   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9897   rtx_code code = GET_CODE (x);
9898   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9899 
9900   switch (XINT (x, 1))
9901     {
9902     case UNSPEC_UNALIGNED_LOAD:
9903       /* We can only do unaligned loads into the integer unit, and we can't
9904 	 use LDM or LDRD.  */
9905       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9906       if (speed_p)
9907 	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9908 		  + extra_cost->ldst.load_unaligned);
9909 
9910 #ifdef NOT_YET
9911       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9912 				 ADDR_SPACE_GENERIC, speed_p);
9913 #endif
9914       return true;
9915 
9916     case UNSPEC_UNALIGNED_STORE:
9917       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9918       if (speed_p)
9919 	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9920 		  + extra_cost->ldst.store_unaligned);
9921 
9922       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9923 #ifdef NOT_YET
9924       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9925 				 ADDR_SPACE_GENERIC, speed_p);
9926 #endif
9927       return true;
9928 
9929     case UNSPEC_VRINTZ:
9930     case UNSPEC_VRINTP:
9931     case UNSPEC_VRINTM:
9932     case UNSPEC_VRINTR:
9933     case UNSPEC_VRINTX:
9934     case UNSPEC_VRINTA:
9935       if (speed_p)
9936         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9937 
9938       return true;
9939     default:
9940       *cost = COSTS_N_INSNS (2);
9941       break;
9942     }
9943   return true;
9944 }
9945 
9946 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9947    call (one insn for -Os) and then one for processing the result.  */
9948 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9949 
9950 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)				\
9951 	do								\
9952 	  {								\
9953 	    shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);	\
9954 	    if (shift_op != NULL					\
9955 	        && arm_rtx_shift_left_p (XEXP (x, IDX)))		\
9956 	      {								\
9957 	        if (shift_reg)						\
9958 		  {							\
9959 		    if (speed_p)					\
9960 		      *cost += extra_cost->alu.arith_shift_reg;		\
9961 		    *cost += rtx_cost (shift_reg, GET_MODE (shift_reg),	\
9962 				       ASHIFT, 1, speed_p);		\
9963 		  }							\
9964 	        else if (speed_p)					\
9965 		  *cost += extra_cost->alu.arith_shift;			\
9966 									\
9967 		*cost += (rtx_cost (shift_op, GET_MODE (shift_op),	\
9968 				    ASHIFT, 0, speed_p)			\
9969 			  + rtx_cost (XEXP (x, 1 - IDX),		\
9970 				      GET_MODE (shift_op),		\
9971 			              OP, 1, speed_p));			\
9972 	        return true;						\
9973 	      }								\
9974 	  }								\
9975 	while (0)
9976 
9977 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9978    considering the costs of the addressing mode and memory access
9979    separately.  */
9980 static bool
arm_mem_costs(rtx x,const struct cpu_cost_table * extra_cost,int * cost,bool speed_p)9981 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9982 	       int *cost, bool speed_p)
9983 {
9984   machine_mode mode = GET_MODE (x);
9985 
9986   *cost = COSTS_N_INSNS (1);
9987 
9988   if (flag_pic
9989       && GET_CODE (XEXP (x, 0)) == PLUS
9990       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9991     /* This will be split into two instructions.  Add the cost of the
9992        additional instruction here.  The cost of the memory access is computed
9993        below.  See arm.md:calculate_pic_address.  */
9994     *cost += COSTS_N_INSNS (1);
9995 
9996   /* Calculate cost of the addressing mode.  */
9997   if (speed_p)
9998     {
9999       arm_addr_mode_op op_type;
10000       switch (GET_CODE (XEXP (x, 0)))
10001 	{
10002 	default:
10003 	case REG:
10004 	  op_type = AMO_DEFAULT;
10005 	  break;
10006 	case MINUS:
10007 	  /* MINUS does not appear in RTL, but the architecture supports it,
10008 	     so handle this case defensively.  */
10009 	  /* fall through */
10010 	case PLUS:
10011 	  op_type = AMO_NO_WB;
10012 	  break;
10013 	case PRE_INC:
10014 	case PRE_DEC:
10015 	case POST_INC:
10016 	case POST_DEC:
10017 	case PRE_MODIFY:
10018 	case POST_MODIFY:
10019 	  op_type = AMO_WB;
10020 	  break;
10021 	}
10022 
10023       if (VECTOR_MODE_P (mode))
10024 	  *cost += current_tune->addr_mode_costs->vector[op_type];
10025       else if (FLOAT_MODE_P (mode))
10026 	  *cost += current_tune->addr_mode_costs->fp[op_type];
10027       else
10028 	  *cost += current_tune->addr_mode_costs->integer[op_type];
10029     }
10030 
10031   /* Calculate cost of memory access.  */
10032   if (speed_p)
10033     {
10034       if (FLOAT_MODE_P (mode))
10035 	{
10036 	  if (GET_MODE_SIZE (mode) == 8)
10037 	    *cost += extra_cost->ldst.loadd;
10038 	  else
10039 	    *cost += extra_cost->ldst.loadf;
10040 	}
10041       else if (VECTOR_MODE_P (mode))
10042 	*cost += extra_cost->ldst.loadv;
10043       else
10044 	{
10045 	  /* Integer modes */
10046 	  if (GET_MODE_SIZE (mode) == 8)
10047 	    *cost += extra_cost->ldst.ldrd;
10048 	  else
10049 	    *cost += extra_cost->ldst.load;
10050 	}
10051     }
10052 
10053   return true;
10054 }
10055 
10056 /* RTX costs.  Make an estimate of the cost of executing the operation
10057    X, which is contained within an operation with code OUTER_CODE.
10058    SPEED_P indicates whether the cost desired is the performance cost,
10059    or the size cost.  The estimate is stored in COST and the return
10060    value is TRUE if the cost calculation is final, or FALSE if the
10061    caller should recurse through the operands of X to add additional
10062    costs.
10063 
10064    We currently make no attempt to model the size savings of Thumb-2
10065    16-bit instructions.  At the normal points in compilation where
10066    this code is called we have no measure of whether the condition
10067    flags are live or not, and thus no realistic way to determine what
10068    the size will eventually be.  */
10069 static bool
arm_rtx_costs_internal(rtx x,enum rtx_code code,enum rtx_code outer_code,const struct cpu_cost_table * extra_cost,int * cost,bool speed_p)10070 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10071 		   const struct cpu_cost_table *extra_cost,
10072 		   int *cost, bool speed_p)
10073 {
10074   machine_mode mode = GET_MODE (x);
10075 
10076   *cost = COSTS_N_INSNS (1);
10077 
10078   if (TARGET_THUMB1)
10079     {
10080       if (speed_p)
10081 	*cost = thumb1_rtx_costs (x, code, outer_code);
10082       else
10083 	*cost = thumb1_size_rtx_costs (x, code, outer_code);
10084       return true;
10085     }
10086 
10087   switch (code)
10088     {
10089     case SET:
10090       *cost = 0;
10091       /* SET RTXs don't have a mode so we get it from the destination.  */
10092       mode = GET_MODE (SET_DEST (x));
10093 
10094       if (REG_P (SET_SRC (x))
10095 	  && REG_P (SET_DEST (x)))
10096 	{
10097 	  /* Assume that most copies can be done with a single insn,
10098 	     unless we don't have HW FP, in which case everything
10099 	     larger than word mode will require two insns.  */
10100 	  *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10101 				   && GET_MODE_SIZE (mode) > 4)
10102 				  || mode == DImode)
10103 				 ? 2 : 1);
10104 	  /* Conditional register moves can be encoded
10105 	     in 16 bits in Thumb mode.  */
10106 	  if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10107 	    *cost >>= 1;
10108 
10109 	  return true;
10110 	}
10111 
10112       if (CONST_INT_P (SET_SRC (x)))
10113 	{
10114 	  /* Handle CONST_INT here, since the value doesn't have a mode
10115 	     and we would otherwise be unable to work out the true cost.  */
10116 	  *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10117 			    0, speed_p);
10118 	  outer_code = SET;
10119 	  /* Slightly lower the cost of setting a core reg to a constant.
10120 	     This helps break up chains and allows for better scheduling.  */
10121 	  if (REG_P (SET_DEST (x))
10122 	      && REGNO (SET_DEST (x)) <= LR_REGNUM)
10123 	    *cost -= 1;
10124 	  x = SET_SRC (x);
10125 	  /* Immediate moves with an immediate in the range [0, 255] can be
10126 	     encoded in 16 bits in Thumb mode.  */
10127 	  if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10128 	      && INTVAL (x) >= 0 && INTVAL (x) <=255)
10129 	    *cost >>= 1;
10130 	  goto const_int_cost;
10131 	}
10132 
10133       return false;
10134 
10135     case MEM:
10136       return arm_mem_costs (x, extra_cost, cost, speed_p);
10137 
10138     case PARALLEL:
10139     {
10140    /* Calculations of LDM costs are complex.  We assume an initial cost
10141    (ldm_1st) which will load the number of registers mentioned in
10142    ldm_regs_per_insn_1st registers; then each additional
10143    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10144    formula for N regs is thus:
10145 
10146    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10147 			     + ldm_regs_per_insn_subsequent - 1)
10148 			    / ldm_regs_per_insn_subsequent).
10149 
10150    Additional costs may also be added for addressing.  A similar
10151    formula is used for STM.  */
10152 
10153       bool is_ldm = load_multiple_operation (x, SImode);
10154       bool is_stm = store_multiple_operation (x, SImode);
10155 
10156       if (is_ldm || is_stm)
10157         {
10158 	  if (speed_p)
10159 	    {
10160 	      HOST_WIDE_INT nregs = XVECLEN (x, 0);
10161 	      HOST_WIDE_INT regs_per_insn_1st = is_ldm
10162 	                              ? extra_cost->ldst.ldm_regs_per_insn_1st
10163 	                              : extra_cost->ldst.stm_regs_per_insn_1st;
10164 	      HOST_WIDE_INT regs_per_insn_sub = is_ldm
10165 	                       ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10166 	                       : extra_cost->ldst.stm_regs_per_insn_subsequent;
10167 
10168 	      *cost += regs_per_insn_1st
10169 	               + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10170 					    + regs_per_insn_sub - 1)
10171 					  / regs_per_insn_sub);
10172 	      return true;
10173 	    }
10174 
10175         }
10176       return false;
10177     }
10178     case DIV:
10179     case UDIV:
10180       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10181 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10182 	*cost += COSTS_N_INSNS (speed_p
10183 			       ? extra_cost->fp[mode != SFmode].div : 0);
10184       else if (mode == SImode && TARGET_IDIV)
10185 	*cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10186       else
10187 	*cost = LIBCALL_COST (2);
10188 
10189       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10190 	 possible udiv is prefered.  */
10191       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10192       return false;	/* All arguments must be in registers.  */
10193 
10194     case MOD:
10195       /* MOD by a power of 2 can be expanded as:
10196 	 rsbs    r1, r0, #0
10197 	 and     r0, r0, #(n - 1)
10198 	 and     r1, r1, #(n - 1)
10199 	 rsbpl   r0, r1, #0.  */
10200       if (CONST_INT_P (XEXP (x, 1))
10201 	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10202 	  && mode == SImode)
10203 	{
10204 	  *cost += COSTS_N_INSNS (3);
10205 
10206 	  if (speed_p)
10207 	    *cost += 2 * extra_cost->alu.logical
10208 		     + extra_cost->alu.arith;
10209 	  return true;
10210 	}
10211 
10212     /* Fall-through.  */
10213     case UMOD:
10214       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10215 	 possible udiv is prefered.  */
10216       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10217       return false;	/* All arguments must be in registers.  */
10218 
10219     case ROTATE:
10220       if (mode == SImode && REG_P (XEXP (x, 1)))
10221 	{
10222 	  *cost += (COSTS_N_INSNS (1)
10223 		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10224 	  if (speed_p)
10225 	    *cost += extra_cost->alu.shift_reg;
10226 	  return true;
10227 	}
10228       /* Fall through */
10229     case ROTATERT:
10230     case ASHIFT:
10231     case LSHIFTRT:
10232     case ASHIFTRT:
10233       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10234 	{
10235 	  *cost += (COSTS_N_INSNS (2)
10236 		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10237 	  if (speed_p)
10238 	    *cost += 2 * extra_cost->alu.shift;
10239 	  /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10240 	  if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10241 	    *cost += 1;
10242 	  return true;
10243 	}
10244       else if (mode == SImode)
10245 	{
10246 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10247 	  /* Slightly disparage register shifts at -Os, but not by much.  */
10248 	  if (!CONST_INT_P (XEXP (x, 1)))
10249 	    *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10250 		      + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10251 	  return true;
10252 	}
10253       else if (GET_MODE_CLASS (mode) == MODE_INT
10254 	       && GET_MODE_SIZE (mode) < 4)
10255 	{
10256 	  if (code == ASHIFT)
10257 	    {
10258 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10259 	      /* Slightly disparage register shifts at -Os, but not by
10260 	         much.  */
10261 	      if (!CONST_INT_P (XEXP (x, 1)))
10262 		*cost += (speed_p ? extra_cost->alu.shift_reg : 1
10263 			  + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10264 	    }
10265 	  else if (code == LSHIFTRT || code == ASHIFTRT)
10266 	    {
10267 	      if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10268 		{
10269 		  /* Can use SBFX/UBFX.  */
10270 		  if (speed_p)
10271 		    *cost += extra_cost->alu.bfx;
10272 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10273 		}
10274 	      else
10275 		{
10276 		  *cost += COSTS_N_INSNS (1);
10277 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10278 		  if (speed_p)
10279 		    {
10280 		      if (CONST_INT_P (XEXP (x, 1)))
10281 			*cost += 2 * extra_cost->alu.shift;
10282 		      else
10283 			*cost += (extra_cost->alu.shift
10284 				  + extra_cost->alu.shift_reg);
10285 		    }
10286 		  else
10287 		    /* Slightly disparage register shifts.  */
10288 		    *cost += !CONST_INT_P (XEXP (x, 1));
10289 		}
10290 	    }
10291 	  else /* Rotates.  */
10292 	    {
10293 	      *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10294 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10295 	      if (speed_p)
10296 		{
10297 		  if (CONST_INT_P (XEXP (x, 1)))
10298 		    *cost += (2 * extra_cost->alu.shift
10299 			      + extra_cost->alu.log_shift);
10300 		  else
10301 		    *cost += (extra_cost->alu.shift
10302 			      + extra_cost->alu.shift_reg
10303 			      + extra_cost->alu.log_shift_reg);
10304 		}
10305 	    }
10306 	  return true;
10307 	}
10308 
10309       *cost = LIBCALL_COST (2);
10310       return false;
10311 
10312     case BSWAP:
10313       if (arm_arch6)
10314         {
10315           if (mode == SImode)
10316             {
10317               if (speed_p)
10318                 *cost += extra_cost->alu.rev;
10319 
10320               return false;
10321             }
10322         }
10323       else
10324         {
10325         /* No rev instruction available.  Look at arm_legacy_rev
10326            and thumb_legacy_rev for the form of RTL used then.  */
10327           if (TARGET_THUMB)
10328             {
10329               *cost += COSTS_N_INSNS (9);
10330 
10331               if (speed_p)
10332                 {
10333                   *cost += 6 * extra_cost->alu.shift;
10334                   *cost += 3 * extra_cost->alu.logical;
10335                 }
10336             }
10337           else
10338             {
10339               *cost += COSTS_N_INSNS (4);
10340 
10341               if (speed_p)
10342                 {
10343                   *cost += 2 * extra_cost->alu.shift;
10344                   *cost += extra_cost->alu.arith_shift;
10345                   *cost += 2 * extra_cost->alu.logical;
10346                 }
10347             }
10348           return true;
10349         }
10350       return false;
10351 
10352     case MINUS:
10353       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10354 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10355 	{
10356 	  if (GET_CODE (XEXP (x, 0)) == MULT
10357 	      || GET_CODE (XEXP (x, 1)) == MULT)
10358 	    {
10359 	      rtx mul_op0, mul_op1, sub_op;
10360 
10361 	      if (speed_p)
10362 		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
10363 
10364 	      if (GET_CODE (XEXP (x, 0)) == MULT)
10365 		{
10366 		  mul_op0 = XEXP (XEXP (x, 0), 0);
10367 		  mul_op1 = XEXP (XEXP (x, 0), 1);
10368 		  sub_op = XEXP (x, 1);
10369 		}
10370 	      else
10371 		{
10372 		  mul_op0 = XEXP (XEXP (x, 1), 0);
10373 		  mul_op1 = XEXP (XEXP (x, 1), 1);
10374 		  sub_op = XEXP (x, 0);
10375 		}
10376 
10377 	      /* The first operand of the multiply may be optionally
10378 		 negated.  */
10379 	      if (GET_CODE (mul_op0) == NEG)
10380 		mul_op0 = XEXP (mul_op0, 0);
10381 
10382 	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10383 			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
10384 			+ rtx_cost (sub_op, mode, code, 0, speed_p));
10385 
10386 	      return true;
10387 	    }
10388 
10389 	  if (speed_p)
10390 	    *cost += extra_cost->fp[mode != SFmode].addsub;
10391 	  return false;
10392 	}
10393 
10394       if (mode == SImode)
10395 	{
10396 	  rtx shift_by_reg = NULL;
10397 	  rtx shift_op;
10398 	  rtx non_shift_op;
10399 	  rtx op0 = XEXP (x, 0);
10400 	  rtx op1 = XEXP (x, 1);
10401 
10402 	  /* Factor out any borrow operation.  There's more than one way
10403 	     of expressing this; try to recognize them all.  */
10404 	  if (GET_CODE (op0) == MINUS)
10405 	    {
10406 	      if (arm_borrow_operation (op1, SImode))
10407 		{
10408 		  op1 = XEXP (op0, 1);
10409 		  op0 = XEXP (op0, 0);
10410 		}
10411 	      else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10412 		op0 = XEXP (op0, 0);
10413 	    }
10414 	  else if (GET_CODE (op1) == PLUS
10415 		   && arm_borrow_operation (XEXP (op1, 0), SImode))
10416 	    op1 = XEXP (op1, 0);
10417 	  else if (GET_CODE (op0) == NEG
10418 		   && arm_borrow_operation (op1, SImode))
10419 	    {
10420 	      /* Negate with carry-in.  For Thumb2 this is done with
10421 		 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10422 		 RSC instruction that exists in Arm mode.  */
10423 	      if (speed_p)
10424 		*cost += (TARGET_THUMB2
10425 			  ? extra_cost->alu.arith_shift
10426 			  : extra_cost->alu.arith);
10427 	      *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10428 	      return true;
10429 	    }
10430 	  /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10431 	     Note we do mean ~borrow here.  */
10432 	  else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10433 	    {
10434 	      *cost += rtx_cost (op1, mode, code, 1, speed_p);
10435 	      return true;
10436 	    }
10437 
10438 	  shift_op = shifter_op_p (op0, &shift_by_reg);
10439 	  if (shift_op == NULL)
10440 	    {
10441 	      shift_op = shifter_op_p (op1, &shift_by_reg);
10442 	      non_shift_op = op0;
10443 	    }
10444 	  else
10445 	    non_shift_op = op1;
10446 
10447 	  if (shift_op != NULL)
10448 	    {
10449 	      if (shift_by_reg != NULL)
10450 		{
10451 		  if (speed_p)
10452 		    *cost += extra_cost->alu.arith_shift_reg;
10453 		  *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10454 		}
10455 	      else if (speed_p)
10456 		*cost += extra_cost->alu.arith_shift;
10457 
10458 	      *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10459 	      *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10460 	      return true;
10461 	    }
10462 
10463 	  if (arm_arch_thumb2
10464 	      && GET_CODE (XEXP (x, 1)) == MULT)
10465 	    {
10466 	      /* MLS.  */
10467 	      if (speed_p)
10468 		*cost += extra_cost->mult[0].add;
10469 	      *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10470 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10471 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10472 	      return true;
10473 	    }
10474 
10475 	  if (CONST_INT_P (op0))
10476 	    {
10477 	      int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10478 					    INTVAL (op0), NULL_RTX,
10479 					    NULL_RTX, 1, 0);
10480 	      *cost = COSTS_N_INSNS (insns);
10481 	      if (speed_p)
10482 		*cost += insns * extra_cost->alu.arith;
10483 	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10484 	      return true;
10485 	    }
10486 	  else if (speed_p)
10487 	    *cost += extra_cost->alu.arith;
10488 
10489 	  /* Don't recurse as we don't want to cost any borrow that
10490 	     we've stripped.  */
10491 	  *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10492 	  *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10493 	  return true;
10494 	}
10495 
10496       if (GET_MODE_CLASS (mode) == MODE_INT
10497 	  && GET_MODE_SIZE (mode) < 4)
10498 	{
10499 	  rtx shift_op, shift_reg;
10500 	  shift_reg = NULL;
10501 
10502 	  /* We check both sides of the MINUS for shifter operands since,
10503 	     unlike PLUS, it's not commutative.  */
10504 
10505 	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10506 	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10507 
10508 	  /* Slightly disparage, as we might need to widen the result.  */
10509 	  *cost += 1;
10510 	  if (speed_p)
10511 	    *cost += extra_cost->alu.arith;
10512 
10513 	  if (CONST_INT_P (XEXP (x, 0)))
10514 	    {
10515 	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10516 	      return true;
10517 	    }
10518 
10519 	  return false;
10520 	}
10521 
10522       if (mode == DImode)
10523 	{
10524 	  *cost += COSTS_N_INSNS (1);
10525 
10526 	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10527 	    {
10528 	      rtx op1 = XEXP (x, 1);
10529 
10530 	      if (speed_p)
10531 		*cost += 2 * extra_cost->alu.arith;
10532 
10533 	      if (GET_CODE (op1) == ZERO_EXTEND)
10534 		*cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10535 				   0, speed_p);
10536 	      else
10537 		*cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10538 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10539 				 0, speed_p);
10540 	      return true;
10541 	    }
10542 	  else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10543 	    {
10544 	      if (speed_p)
10545 		*cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10546 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10547 				  0, speed_p)
10548 			+ rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10549 	      return true;
10550 	    }
10551 	  else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10552 		   || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10553 	    {
10554 	      if (speed_p)
10555 		*cost += (extra_cost->alu.arith
10556 			  + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10557 			     ? extra_cost->alu.arith
10558 			     : extra_cost->alu.arith_shift));
10559 	      *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10560 			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10561 				    GET_CODE (XEXP (x, 1)), 0, speed_p));
10562 	      return true;
10563 	    }
10564 
10565 	  if (speed_p)
10566 	    *cost += 2 * extra_cost->alu.arith;
10567 	  return false;
10568 	}
10569 
10570       /* Vector mode?  */
10571 
10572       *cost = LIBCALL_COST (2);
10573       return false;
10574 
10575     case PLUS:
10576       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10577 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10578 	{
10579 	  if (GET_CODE (XEXP (x, 0)) == MULT)
10580 	    {
10581 	      rtx mul_op0, mul_op1, add_op;
10582 
10583 	      if (speed_p)
10584 		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
10585 
10586 	      mul_op0 = XEXP (XEXP (x, 0), 0);
10587 	      mul_op1 = XEXP (XEXP (x, 0), 1);
10588 	      add_op = XEXP (x, 1);
10589 
10590 	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10591 			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
10592 			+ rtx_cost (add_op, mode, code, 0, speed_p));
10593 
10594 	      return true;
10595 	    }
10596 
10597 	  if (speed_p)
10598 	    *cost += extra_cost->fp[mode != SFmode].addsub;
10599 	  return false;
10600 	}
10601       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10602 	{
10603 	  *cost = LIBCALL_COST (2);
10604 	  return false;
10605 	}
10606 
10607 	/* Narrow modes can be synthesized in SImode, but the range
10608 	   of useful sub-operations is limited.  Check for shift operations
10609 	   on one of the operands.  Only left shifts can be used in the
10610 	   narrow modes.  */
10611       if (GET_MODE_CLASS (mode) == MODE_INT
10612 	  && GET_MODE_SIZE (mode) < 4)
10613 	{
10614 	  rtx shift_op, shift_reg;
10615 	  shift_reg = NULL;
10616 
10617 	  HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10618 
10619 	  if (CONST_INT_P (XEXP (x, 1)))
10620 	    {
10621 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10622 					    INTVAL (XEXP (x, 1)), NULL_RTX,
10623 					    NULL_RTX, 1, 0);
10624 	      *cost = COSTS_N_INSNS (insns);
10625 	      if (speed_p)
10626 		*cost += insns * extra_cost->alu.arith;
10627 	      /* Slightly penalize a narrow operation as the result may
10628 		 need widening.  */
10629 	      *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10630 	      return true;
10631 	    }
10632 
10633 	  /* Slightly penalize a narrow operation as the result may
10634 	     need widening.  */
10635 	  *cost += 1;
10636 	  if (speed_p)
10637 	    *cost += extra_cost->alu.arith;
10638 
10639 	  return false;
10640 	}
10641 
10642       if (mode == SImode)
10643 	{
10644 	  rtx shift_op, shift_reg;
10645 
10646 	  if (TARGET_INT_SIMD
10647 	      && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10648 		  || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10649 	    {
10650 	      /* UXTA[BH] or SXTA[BH].  */
10651 	      if (speed_p)
10652 		*cost += extra_cost->alu.extend_arith;
10653 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10654 				  0, speed_p)
10655 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10656 	      return true;
10657 	    }
10658 
10659 	  rtx op0 = XEXP (x, 0);
10660 	  rtx op1 = XEXP (x, 1);
10661 
10662 	  /* Handle a side effect of adding in the carry to an addition.  */
10663 	  if (GET_CODE (op0) == PLUS
10664 	      && arm_carry_operation (op1, mode))
10665 	    {
10666 	      op1 = XEXP (op0, 1);
10667 	      op0 = XEXP (op0, 0);
10668 	    }
10669 	  else if (GET_CODE (op1) == PLUS
10670 		   && arm_carry_operation (op0, mode))
10671 	    {
10672 	      op0 = XEXP (op1, 0);
10673 	      op1 = XEXP (op1, 1);
10674 	    }
10675 	  else if (GET_CODE (op0) == PLUS)
10676 	    {
10677 	      op0 = strip_carry_operation (op0);
10678 	      if (swap_commutative_operands_p (op0, op1))
10679 		std::swap (op0, op1);
10680 	    }
10681 
10682 	  if (arm_carry_operation (op0, mode))
10683 	    {
10684 	      /* Adding the carry to a register is a canonicalization of
10685 		 adding 0 to the register plus the carry.  */
10686 	      if (speed_p)
10687 		*cost += extra_cost->alu.arith;
10688 	      *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10689 	      return true;
10690 	    }
10691 
10692 	  shift_reg = NULL;
10693 	  shift_op = shifter_op_p (op0, &shift_reg);
10694 	  if (shift_op != NULL)
10695 	    {
10696 	      if (shift_reg)
10697 		{
10698 		  if (speed_p)
10699 		    *cost += extra_cost->alu.arith_shift_reg;
10700 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10701 		}
10702 	      else if (speed_p)
10703 		*cost += extra_cost->alu.arith_shift;
10704 
10705 	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10706 			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
10707 	      return true;
10708 	    }
10709 
10710 	  if (GET_CODE (op0) == MULT)
10711 	    {
10712 	      rtx mul_op = op0;
10713 
10714 	      if (TARGET_DSP_MULTIPLY
10715 		  && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10716 		       && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10717 			   || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10718 			       && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10719 			       && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10720 		      || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10721 			  && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10722 			  && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10723 			  && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10724 			      || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10725 				  && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10726 				  && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10727 				      == 16))))))
10728 		{
10729 		  /* SMLA[BT][BT].  */
10730 		  if (speed_p)
10731 		    *cost += extra_cost->mult[0].extend_add;
10732 		  *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10733 				      SIGN_EXTEND, 0, speed_p)
10734 			    + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10735 					SIGN_EXTEND, 0, speed_p)
10736 			    + rtx_cost (op1, mode, PLUS, 1, speed_p));
10737 		  return true;
10738 		}
10739 
10740 	      if (speed_p)
10741 		*cost += extra_cost->mult[0].add;
10742 	      *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10743 			+ rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10744 			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
10745 	      return true;
10746 	    }
10747 
10748 	  if (CONST_INT_P (op1))
10749 	    {
10750 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10751 					    INTVAL (op1), NULL_RTX,
10752 					    NULL_RTX, 1, 0);
10753 	      *cost = COSTS_N_INSNS (insns);
10754 	      if (speed_p)
10755 		*cost += insns * extra_cost->alu.arith;
10756 	      *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10757 	      return true;
10758 	    }
10759 
10760 	  if (speed_p)
10761 	    *cost += extra_cost->alu.arith;
10762 
10763 	  /* Don't recurse here because we want to test the operands
10764 	     without any carry operation.  */
10765 	  *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10766 	  *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10767 	  return true;
10768 	}
10769 
10770       if (mode == DImode)
10771 	{
10772 	  if (GET_CODE (XEXP (x, 0)) == MULT
10773 	      && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10774 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10775 		  || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10776 		      && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10777 	    {
10778 	      if (speed_p)
10779 		*cost += extra_cost->mult[1].extend_add;
10780 	      *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10781 				  ZERO_EXTEND, 0, speed_p)
10782 			+ rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10783 				    ZERO_EXTEND, 0, speed_p)
10784 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10785 	      return true;
10786 	    }
10787 
10788 	  *cost += COSTS_N_INSNS (1);
10789 
10790 	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10791 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10792 	    {
10793 	      if (speed_p)
10794 		*cost += (extra_cost->alu.arith
10795 			  + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10796 			     ? extra_cost->alu.arith
10797 			     : extra_cost->alu.arith_shift));
10798 
10799 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10800 				  0, speed_p)
10801 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10802 	      return true;
10803 	    }
10804 
10805 	  if (speed_p)
10806 	    *cost += 2 * extra_cost->alu.arith;
10807 	  return false;
10808 	}
10809 
10810       /* Vector mode?  */
10811       *cost = LIBCALL_COST (2);
10812       return false;
10813     case IOR:
10814       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10815         {
10816           if (speed_p)
10817             *cost += extra_cost->alu.rev;
10818 
10819           return true;
10820         }
10821     /* Fall through.  */
10822     case AND: case XOR:
10823       if (mode == SImode)
10824 	{
10825 	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10826 	  rtx op0 = XEXP (x, 0);
10827 	  rtx shift_op, shift_reg;
10828 
10829 	  if (subcode == NOT
10830 	      && (code == AND
10831 		  || (code == IOR && TARGET_THUMB2)))
10832 	    op0 = XEXP (op0, 0);
10833 
10834 	  shift_reg = NULL;
10835 	  shift_op = shifter_op_p (op0, &shift_reg);
10836 	  if (shift_op != NULL)
10837 	    {
10838 	      if (shift_reg)
10839 		{
10840 		  if (speed_p)
10841 		    *cost += extra_cost->alu.log_shift_reg;
10842 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10843 		}
10844 	      else if (speed_p)
10845 		*cost += extra_cost->alu.log_shift;
10846 
10847 	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10848 			+ rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10849 	      return true;
10850 	    }
10851 
10852 	  if (CONST_INT_P (XEXP (x, 1)))
10853 	    {
10854 	      int insns = arm_gen_constant (code, SImode, NULL_RTX,
10855 					    INTVAL (XEXP (x, 1)), NULL_RTX,
10856 					    NULL_RTX, 1, 0);
10857 
10858 	      *cost = COSTS_N_INSNS (insns);
10859 	      if (speed_p)
10860 		*cost += insns * extra_cost->alu.logical;
10861 	      *cost += rtx_cost (op0, mode, code, 0, speed_p);
10862 	      return true;
10863 	    }
10864 
10865 	  if (speed_p)
10866 	    *cost += extra_cost->alu.logical;
10867 	  *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10868 		    + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10869 	  return true;
10870 	}
10871 
10872       if (mode == DImode)
10873 	{
10874 	  rtx op0 = XEXP (x, 0);
10875 	  enum rtx_code subcode = GET_CODE (op0);
10876 
10877 	  *cost += COSTS_N_INSNS (1);
10878 
10879 	  if (subcode == NOT
10880 	      && (code == AND
10881 		  || (code == IOR && TARGET_THUMB2)))
10882 	    op0 = XEXP (op0, 0);
10883 
10884 	  if (GET_CODE (op0) == ZERO_EXTEND)
10885 	    {
10886 	      if (speed_p)
10887 		*cost += 2 * extra_cost->alu.logical;
10888 
10889 	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10890 				  0, speed_p)
10891 			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10892 	      return true;
10893 	    }
10894 	  else if (GET_CODE (op0) == SIGN_EXTEND)
10895 	    {
10896 	      if (speed_p)
10897 		*cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10898 
10899 	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10900 				  0, speed_p)
10901 			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10902 	      return true;
10903 	    }
10904 
10905 	  if (speed_p)
10906 	    *cost += 2 * extra_cost->alu.logical;
10907 
10908 	  return true;
10909 	}
10910       /* Vector mode?  */
10911 
10912       *cost = LIBCALL_COST (2);
10913       return false;
10914 
10915     case MULT:
10916       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10917 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10918 	{
10919 	  rtx op0 = XEXP (x, 0);
10920 
10921 	  if (GET_CODE (op0) == NEG && !flag_rounding_math)
10922 	    op0 = XEXP (op0, 0);
10923 
10924 	  if (speed_p)
10925 	    *cost += extra_cost->fp[mode != SFmode].mult;
10926 
10927 	  *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10928 		    + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10929 	  return true;
10930 	}
10931       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10932 	{
10933 	  *cost = LIBCALL_COST (2);
10934 	  return false;
10935 	}
10936 
10937       if (mode == SImode)
10938 	{
10939 	  if (TARGET_DSP_MULTIPLY
10940 	      && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10941 		   && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10942 		       || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10943 			   && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10944 			   && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10945 		  || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10946 		      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10947 		      && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10948 		      && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10949 			  || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10950 			      && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10951 			      && (INTVAL (XEXP (XEXP (x, 1), 1))
10952 				  == 16))))))
10953 	    {
10954 	      /* SMUL[TB][TB].  */
10955 	      if (speed_p)
10956 		*cost += extra_cost->mult[0].extend;
10957 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10958 				 SIGN_EXTEND, 0, speed_p);
10959 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10960 				 SIGN_EXTEND, 1, speed_p);
10961 	      return true;
10962 	    }
10963 	  if (speed_p)
10964 	    *cost += extra_cost->mult[0].simple;
10965 	  return false;
10966 	}
10967 
10968       if (mode == DImode)
10969 	{
10970 	  if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10971 		&& GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10972 	       || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10973 		   && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10974 	    {
10975 	      if (speed_p)
10976 		*cost += extra_cost->mult[1].extend;
10977 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10978 				  ZERO_EXTEND, 0, speed_p)
10979 			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10980 				    ZERO_EXTEND, 0, speed_p));
10981 	      return true;
10982 	    }
10983 
10984 	  *cost = LIBCALL_COST (2);
10985 	  return false;
10986 	}
10987 
10988       /* Vector mode?  */
10989       *cost = LIBCALL_COST (2);
10990       return false;
10991 
10992     case NEG:
10993       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10994 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10995 	{
10996 	  if (GET_CODE (XEXP (x, 0)) == MULT)
10997 	    {
10998 	      /* VNMUL.  */
10999 	      *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11000 	      return true;
11001 	    }
11002 
11003 	  if (speed_p)
11004 	    *cost += extra_cost->fp[mode != SFmode].neg;
11005 
11006 	  return false;
11007 	}
11008       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11009 	{
11010 	  *cost = LIBCALL_COST (1);
11011 	  return false;
11012 	}
11013 
11014       if (mode == SImode)
11015 	{
11016 	  if (GET_CODE (XEXP (x, 0)) == ABS)
11017 	    {
11018 	      *cost += COSTS_N_INSNS (1);
11019 	      /* Assume the non-flag-changing variant.  */
11020 	      if (speed_p)
11021 		*cost += (extra_cost->alu.log_shift
11022 			  + extra_cost->alu.arith_shift);
11023 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11024 	      return true;
11025 	    }
11026 
11027 	  if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11028 	      || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11029 	    {
11030 	      *cost += COSTS_N_INSNS (1);
11031 	      /* No extra cost for MOV imm and MVN imm.  */
11032 	      /* If the comparison op is using the flags, there's no further
11033 		 cost, otherwise we need to add the cost of the comparison.  */
11034 	      if (!(REG_P (XEXP (XEXP (x, 0), 0))
11035 		    && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11036 		    && XEXP (XEXP (x, 0), 1) == const0_rtx))
11037 		{
11038 		  mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11039 		  *cost += (COSTS_N_INSNS (1)
11040 			    + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11041 					0, speed_p)
11042 			    + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11043 					1, speed_p));
11044 		  if (speed_p)
11045 		    *cost += extra_cost->alu.arith;
11046 		}
11047 	      return true;
11048 	    }
11049 
11050 	  if (speed_p)
11051 	    *cost += extra_cost->alu.arith;
11052 	  return false;
11053 	}
11054 
11055       if (GET_MODE_CLASS (mode) == MODE_INT
11056 	  && GET_MODE_SIZE (mode) < 4)
11057 	{
11058 	  /* Slightly disparage, as we might need an extend operation.  */
11059 	  *cost += 1;
11060 	  if (speed_p)
11061 	    *cost += extra_cost->alu.arith;
11062 	  return false;
11063 	}
11064 
11065       if (mode == DImode)
11066 	{
11067 	  *cost += COSTS_N_INSNS (1);
11068 	  if (speed_p)
11069 	    *cost += 2 * extra_cost->alu.arith;
11070 	  return false;
11071 	}
11072 
11073       /* Vector mode?  */
11074       *cost = LIBCALL_COST (1);
11075       return false;
11076 
11077     case NOT:
11078       if (mode == SImode)
11079 	{
11080 	  rtx shift_op;
11081 	  rtx shift_reg = NULL;
11082 
11083 	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11084 
11085 	  if (shift_op)
11086 	    {
11087 	      if (shift_reg != NULL)
11088 		{
11089 		  if (speed_p)
11090 		    *cost += extra_cost->alu.log_shift_reg;
11091 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11092 		}
11093 	      else if (speed_p)
11094 		*cost += extra_cost->alu.log_shift;
11095 	      *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11096 	      return true;
11097 	    }
11098 
11099 	  if (speed_p)
11100 	    *cost += extra_cost->alu.logical;
11101 	  return false;
11102 	}
11103       if (mode == DImode)
11104 	{
11105 	  *cost += COSTS_N_INSNS (1);
11106 	  return false;
11107 	}
11108 
11109       /* Vector mode?  */
11110 
11111       *cost += LIBCALL_COST (1);
11112       return false;
11113 
11114     case IF_THEN_ELSE:
11115       {
11116         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11117 	  {
11118 	    *cost += COSTS_N_INSNS (3);
11119 	    return true;
11120 	  }
11121 	int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11122 	int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11123 
11124 	*cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11125 	/* Assume that if one arm of the if_then_else is a register,
11126 	   that it will be tied with the result and eliminate the
11127 	   conditional insn.  */
11128 	if (REG_P (XEXP (x, 1)))
11129 	  *cost += op2cost;
11130 	else if (REG_P (XEXP (x, 2)))
11131 	  *cost += op1cost;
11132 	else
11133 	  {
11134 	    if (speed_p)
11135 	      {
11136 		if (extra_cost->alu.non_exec_costs_exec)
11137 		  *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11138 		else
11139 		  *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11140 	      }
11141 	    else
11142 	      *cost += op1cost + op2cost;
11143 	  }
11144       }
11145       return true;
11146 
11147     case COMPARE:
11148       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11149 	*cost = 0;
11150       else
11151 	{
11152 	  machine_mode op0mode;
11153 	  /* We'll mostly assume that the cost of a compare is the cost of the
11154 	     LHS.  However, there are some notable exceptions.  */
11155 
11156 	  /* Floating point compares are never done as side-effects.  */
11157 	  op0mode = GET_MODE (XEXP (x, 0));
11158 	  if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11159 	      && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11160 	    {
11161 	      if (speed_p)
11162 		*cost += extra_cost->fp[op0mode != SFmode].compare;
11163 
11164 	      if (XEXP (x, 1) == CONST0_RTX (op0mode))
11165 		{
11166 		  *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11167 		  return true;
11168 		}
11169 
11170 	      return false;
11171 	    }
11172 	  else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11173 	    {
11174 	      *cost = LIBCALL_COST (2);
11175 	      return false;
11176 	    }
11177 
11178 	  /* DImode compares normally take two insns.  */
11179 	  if (op0mode == DImode)
11180 	    {
11181 	      *cost += COSTS_N_INSNS (1);
11182 	      if (speed_p)
11183 		*cost += 2 * extra_cost->alu.arith;
11184 	      return false;
11185 	    }
11186 
11187 	  if (op0mode == SImode)
11188 	    {
11189 	      rtx shift_op;
11190 	      rtx shift_reg;
11191 
11192 	      if (XEXP (x, 1) == const0_rtx
11193 		  && !(REG_P (XEXP (x, 0))
11194 		       || (GET_CODE (XEXP (x, 0)) == SUBREG
11195 			   && REG_P (SUBREG_REG (XEXP (x, 0))))))
11196 		{
11197 		  *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11198 
11199 		  /* Multiply operations that set the flags are often
11200 		     significantly more expensive.  */
11201 		  if (speed_p
11202 		      && GET_CODE (XEXP (x, 0)) == MULT
11203 		      && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11204 		    *cost += extra_cost->mult[0].flag_setting;
11205 
11206 		  if (speed_p
11207 		      && GET_CODE (XEXP (x, 0)) == PLUS
11208 		      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11209 		      && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11210 							    0), 1), mode))
11211 		    *cost += extra_cost->mult[0].flag_setting;
11212 		  return true;
11213 		}
11214 
11215 	      shift_reg = NULL;
11216 	      shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11217 	      if (shift_op != NULL)
11218 		{
11219 		  if (shift_reg != NULL)
11220 		    {
11221 		      *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11222 					 1, speed_p);
11223 		      if (speed_p)
11224 			*cost += extra_cost->alu.arith_shift_reg;
11225 		    }
11226 		  else if (speed_p)
11227 		    *cost += extra_cost->alu.arith_shift;
11228 		  *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11229 		  *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11230 		  return true;
11231 		}
11232 
11233 	      if (speed_p)
11234 		*cost += extra_cost->alu.arith;
11235 	      if (CONST_INT_P (XEXP (x, 1))
11236 		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11237 		{
11238 		  *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11239 		  return true;
11240 		}
11241 	      return false;
11242 	    }
11243 
11244 	  /* Vector mode?  */
11245 
11246 	  *cost = LIBCALL_COST (2);
11247 	  return false;
11248 	}
11249       return true;
11250 
11251     case EQ:
11252     case NE:
11253     case LT:
11254     case LE:
11255     case GT:
11256     case GE:
11257     case LTU:
11258     case LEU:
11259     case GEU:
11260     case GTU:
11261     case ORDERED:
11262     case UNORDERED:
11263     case UNEQ:
11264     case UNLE:
11265     case UNLT:
11266     case UNGE:
11267     case UNGT:
11268     case LTGT:
11269       if (outer_code == SET)
11270 	{
11271 	  /* Is it a store-flag operation?  */
11272 	  if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11273 	      && XEXP (x, 1) == const0_rtx)
11274 	    {
11275 	      /* Thumb also needs an IT insn.  */
11276 	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11277 	      return true;
11278 	    }
11279 	  if (XEXP (x, 1) == const0_rtx)
11280 	    {
11281 	      switch (code)
11282 		{
11283 		case LT:
11284 		  /* LSR Rd, Rn, #31.  */
11285 		  if (speed_p)
11286 		    *cost += extra_cost->alu.shift;
11287 		  break;
11288 
11289 		case EQ:
11290 		  /* RSBS T1, Rn, #0
11291 		     ADC  Rd, Rn, T1.  */
11292 
11293 		case NE:
11294 		  /* SUBS T1, Rn, #1
11295 		     SBC  Rd, Rn, T1.  */
11296 		  *cost += COSTS_N_INSNS (1);
11297 		  break;
11298 
11299 		case LE:
11300 		  /* RSBS T1, Rn, Rn, LSR #31
11301 		     ADC  Rd, Rn, T1. */
11302 		  *cost += COSTS_N_INSNS (1);
11303 		  if (speed_p)
11304 		    *cost += extra_cost->alu.arith_shift;
11305 		  break;
11306 
11307 		case GT:
11308 		  /* RSB  Rd, Rn, Rn, ASR #1
11309 		     LSR  Rd, Rd, #31.  */
11310 		  *cost += COSTS_N_INSNS (1);
11311 		  if (speed_p)
11312 		    *cost += (extra_cost->alu.arith_shift
11313 			      + extra_cost->alu.shift);
11314 		  break;
11315 
11316 		case GE:
11317 		  /* ASR  Rd, Rn, #31
11318 		     ADD  Rd, Rn, #1.  */
11319 		  *cost += COSTS_N_INSNS (1);
11320 		  if (speed_p)
11321 		    *cost += extra_cost->alu.shift;
11322 		  break;
11323 
11324 		default:
11325 		  /* Remaining cases are either meaningless or would take
11326 		     three insns anyway.  */
11327 		  *cost = COSTS_N_INSNS (3);
11328 		  break;
11329 		}
11330 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11331 	      return true;
11332 	    }
11333 	  else
11334 	    {
11335 	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11336 	      if (CONST_INT_P (XEXP (x, 1))
11337 		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11338 		{
11339 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11340 		  return true;
11341 		}
11342 
11343 	      return false;
11344 	    }
11345 	}
11346       /* Not directly inside a set.  If it involves the condition code
11347 	 register it must be the condition for a branch, cond_exec or
11348 	 I_T_E operation.  Since the comparison is performed elsewhere
11349 	 this is just the control part which has no additional
11350 	 cost.  */
11351       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11352 	       && XEXP (x, 1) == const0_rtx)
11353 	{
11354 	  *cost = 0;
11355 	  return true;
11356 	}
11357       return false;
11358 
11359     case ABS:
11360       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11361 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
11362 	{
11363 	  if (speed_p)
11364 	    *cost += extra_cost->fp[mode != SFmode].neg;
11365 
11366 	  return false;
11367 	}
11368       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11369 	{
11370 	  *cost = LIBCALL_COST (1);
11371 	  return false;
11372 	}
11373 
11374       if (mode == SImode)
11375 	{
11376 	  if (speed_p)
11377 	    *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11378 	  return false;
11379 	}
11380       /* Vector mode?  */
11381       *cost = LIBCALL_COST (1);
11382       return false;
11383 
11384     case SIGN_EXTEND:
11385       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11386 	  && MEM_P (XEXP (x, 0)))
11387 	{
11388 	  if (mode == DImode)
11389 	    *cost += COSTS_N_INSNS (1);
11390 
11391 	  if (!speed_p)
11392 	    return true;
11393 
11394 	  if (GET_MODE (XEXP (x, 0)) == SImode)
11395 	    *cost += extra_cost->ldst.load;
11396 	  else
11397 	    *cost += extra_cost->ldst.load_sign_extend;
11398 
11399 	  if (mode == DImode)
11400 	    *cost += extra_cost->alu.shift;
11401 
11402 	  return true;
11403 	}
11404 
11405       /* Widening from less than 32-bits requires an extend operation.  */
11406       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11407 	{
11408 	  /* We have SXTB/SXTH.  */
11409 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11410 	  if (speed_p)
11411 	    *cost += extra_cost->alu.extend;
11412 	}
11413       else if (GET_MODE (XEXP (x, 0)) != SImode)
11414 	{
11415 	  /* Needs two shifts.  */
11416 	  *cost += COSTS_N_INSNS (1);
11417 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11418 	  if (speed_p)
11419 	    *cost += 2 * extra_cost->alu.shift;
11420 	}
11421 
11422       /* Widening beyond 32-bits requires one more insn.  */
11423       if (mode == DImode)
11424 	{
11425 	  *cost += COSTS_N_INSNS (1);
11426 	  if (speed_p)
11427 	    *cost += extra_cost->alu.shift;
11428 	}
11429 
11430       return true;
11431 
11432     case ZERO_EXTEND:
11433       if ((arm_arch4
11434 	   || GET_MODE (XEXP (x, 0)) == SImode
11435 	   || GET_MODE (XEXP (x, 0)) == QImode)
11436 	  && MEM_P (XEXP (x, 0)))
11437 	{
11438 	  *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11439 
11440 	  if (mode == DImode)
11441 	    *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11442 
11443 	  return true;
11444 	}
11445 
11446       /* Widening from less than 32-bits requires an extend operation.  */
11447       if (GET_MODE (XEXP (x, 0)) == QImode)
11448 	{
11449 	  /* UXTB can be a shorter instruction in Thumb2, but it might
11450 	     be slower than the AND Rd, Rn, #255 alternative.  When
11451 	     optimizing for speed it should never be slower to use
11452 	     AND, and we don't really model 16-bit vs 32-bit insns
11453 	     here.  */
11454 	  if (speed_p)
11455 	    *cost += extra_cost->alu.logical;
11456 	}
11457       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11458 	{
11459 	  /* We have UXTB/UXTH.  */
11460 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11461 	  if (speed_p)
11462 	    *cost += extra_cost->alu.extend;
11463 	}
11464       else if (GET_MODE (XEXP (x, 0)) != SImode)
11465 	{
11466 	  /* Needs two shifts.  It's marginally preferable to use
11467 	     shifts rather than two BIC instructions as the second
11468 	     shift may merge with a subsequent insn as a shifter
11469 	     op.  */
11470 	  *cost = COSTS_N_INSNS (2);
11471 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11472 	  if (speed_p)
11473 	    *cost += 2 * extra_cost->alu.shift;
11474 	}
11475 
11476       /* Widening beyond 32-bits requires one more insn.  */
11477       if (mode == DImode)
11478 	{
11479 	  *cost += COSTS_N_INSNS (1);	/* No speed penalty.  */
11480 	}
11481 
11482       return true;
11483 
11484     case CONST_INT:
11485       *cost = 0;
11486       /* CONST_INT has no mode, so we cannot tell for sure how many
11487 	 insns are really going to be needed.  The best we can do is
11488 	 look at the value passed.  If it fits in SImode, then assume
11489 	 that's the mode it will be used for.  Otherwise assume it
11490 	 will be used in DImode.  */
11491       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11492 	mode = SImode;
11493       else
11494 	mode = DImode;
11495 
11496       /* Avoid blowing up in arm_gen_constant ().  */
11497       if (!(outer_code == PLUS
11498 	    || outer_code == AND
11499 	    || outer_code == IOR
11500 	    || outer_code == XOR
11501 	    || outer_code == MINUS))
11502 	outer_code = SET;
11503 
11504     const_int_cost:
11505       if (mode == SImode)
11506 	{
11507 	  *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11508 						    INTVAL (x), NULL, NULL,
11509 						    0, 0));
11510 	  /* Extra costs?  */
11511 	}
11512       else
11513 	{
11514 	  *cost += COSTS_N_INSNS (arm_gen_constant
11515 				  (outer_code, SImode, NULL,
11516 				   trunc_int_for_mode (INTVAL (x), SImode),
11517 				   NULL, NULL, 0, 0)
11518 				  + arm_gen_constant (outer_code, SImode, NULL,
11519 						      INTVAL (x) >> 32, NULL,
11520 						      NULL, 0, 0));
11521 	  /* Extra costs?  */
11522 	}
11523 
11524       return true;
11525 
11526     case CONST:
11527     case LABEL_REF:
11528     case SYMBOL_REF:
11529       if (speed_p)
11530 	{
11531 	  if (arm_arch_thumb2 && !flag_pic)
11532 	    *cost += COSTS_N_INSNS (1);
11533 	  else
11534 	    *cost += extra_cost->ldst.load;
11535 	}
11536       else
11537 	*cost += COSTS_N_INSNS (1);
11538 
11539       if (flag_pic)
11540 	{
11541 	  *cost += COSTS_N_INSNS (1);
11542 	  if (speed_p)
11543 	    *cost += extra_cost->alu.arith;
11544 	}
11545 
11546       return true;
11547 
11548     case CONST_FIXED:
11549       *cost = COSTS_N_INSNS (4);
11550       /* Fixme.  */
11551       return true;
11552 
11553     case CONST_DOUBLE:
11554       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11555 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
11556 	{
11557 	  if (vfp3_const_double_rtx (x))
11558 	    {
11559 	      if (speed_p)
11560 		*cost += extra_cost->fp[mode == DFmode].fpconst;
11561 	      return true;
11562 	    }
11563 
11564 	  if (speed_p)
11565 	    {
11566 	      if (mode == DFmode)
11567 		*cost += extra_cost->ldst.loadd;
11568 	      else
11569 		*cost += extra_cost->ldst.loadf;
11570 	    }
11571 	  else
11572 	    *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11573 
11574 	  return true;
11575 	}
11576       *cost = COSTS_N_INSNS (4);
11577       return true;
11578 
11579     case CONST_VECTOR:
11580       /* Fixme.  */
11581       if (((TARGET_NEON && TARGET_HARD_FLOAT
11582 	    && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11583 	   || TARGET_HAVE_MVE)
11584 	  && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11585 	*cost = COSTS_N_INSNS (1);
11586       else
11587 	*cost = COSTS_N_INSNS (4);
11588       return true;
11589 
11590     case HIGH:
11591     case LO_SUM:
11592       /* When optimizing for size, we prefer constant pool entries to
11593 	 MOVW/MOVT pairs, so bump the cost of these slightly.  */
11594       if (!speed_p)
11595 	*cost += 1;
11596       return true;
11597 
11598     case CLZ:
11599       if (speed_p)
11600 	*cost += extra_cost->alu.clz;
11601       return false;
11602 
11603     case SMIN:
11604       if (XEXP (x, 1) == const0_rtx)
11605 	{
11606 	  if (speed_p)
11607 	    *cost += extra_cost->alu.log_shift;
11608 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11609 	  return true;
11610 	}
11611       /* Fall through.  */
11612     case SMAX:
11613     case UMIN:
11614     case UMAX:
11615       *cost += COSTS_N_INSNS (1);
11616       return false;
11617 
11618     case TRUNCATE:
11619       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11620 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11621 	  && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11622 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11623 	  && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11624 	       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11625 	      || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11626 		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11627 		      == ZERO_EXTEND))))
11628 	{
11629 	  if (speed_p)
11630 	    *cost += extra_cost->mult[1].extend;
11631 	  *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11632 			      ZERO_EXTEND, 0, speed_p)
11633 		    + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11634 				ZERO_EXTEND, 0, speed_p));
11635 	  return true;
11636 	}
11637       *cost = LIBCALL_COST (1);
11638       return false;
11639 
11640     case UNSPEC_VOLATILE:
11641     case UNSPEC:
11642       return arm_unspec_cost (x, outer_code, speed_p, cost);
11643 
11644     case PC:
11645       /* Reading the PC is like reading any other register.  Writing it
11646 	 is more expensive, but we take that into account elsewhere.  */
11647       *cost = 0;
11648       return true;
11649 
11650     case ZERO_EXTRACT:
11651       /* TODO: Simple zero_extract of bottom bits using AND.  */
11652       /* Fall through.  */
11653     case SIGN_EXTRACT:
11654       if (arm_arch6
11655 	  && mode == SImode
11656 	  && CONST_INT_P (XEXP (x, 1))
11657 	  && CONST_INT_P (XEXP (x, 2)))
11658 	{
11659 	  if (speed_p)
11660 	    *cost += extra_cost->alu.bfx;
11661 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11662 	  return true;
11663 	}
11664       /* Without UBFX/SBFX, need to resort to shift operations.  */
11665       *cost += COSTS_N_INSNS (1);
11666       if (speed_p)
11667 	*cost += 2 * extra_cost->alu.shift;
11668       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11669       return true;
11670 
11671     case FLOAT_EXTEND:
11672       if (TARGET_HARD_FLOAT)
11673 	{
11674 	  if (speed_p)
11675 	    *cost += extra_cost->fp[mode == DFmode].widen;
11676 	  if (!TARGET_VFP5
11677 	      && GET_MODE (XEXP (x, 0)) == HFmode)
11678 	    {
11679 	      /* Pre v8, widening HF->DF is a two-step process, first
11680 	         widening to SFmode.  */
11681 	      *cost += COSTS_N_INSNS (1);
11682 	      if (speed_p)
11683 		*cost += extra_cost->fp[0].widen;
11684 	    }
11685 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11686 	  return true;
11687 	}
11688 
11689       *cost = LIBCALL_COST (1);
11690       return false;
11691 
11692     case FLOAT_TRUNCATE:
11693       if (TARGET_HARD_FLOAT)
11694 	{
11695 	  if (speed_p)
11696 	    *cost += extra_cost->fp[mode == DFmode].narrow;
11697 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11698 	  return true;
11699 	  /* Vector modes?  */
11700 	}
11701       *cost = LIBCALL_COST (1);
11702       return false;
11703 
11704     case FMA:
11705       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11706         {
11707           rtx op0 = XEXP (x, 0);
11708           rtx op1 = XEXP (x, 1);
11709           rtx op2 = XEXP (x, 2);
11710 
11711 
11712           /* vfms or vfnma.  */
11713           if (GET_CODE (op0) == NEG)
11714             op0 = XEXP (op0, 0);
11715 
11716           /* vfnms or vfnma.  */
11717           if (GET_CODE (op2) == NEG)
11718             op2 = XEXP (op2, 0);
11719 
11720           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11721           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11722           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11723 
11724           if (speed_p)
11725             *cost += extra_cost->fp[mode ==DFmode].fma;
11726 
11727           return true;
11728         }
11729 
11730       *cost = LIBCALL_COST (3);
11731       return false;
11732 
11733     case FIX:
11734     case UNSIGNED_FIX:
11735       if (TARGET_HARD_FLOAT)
11736 	{
11737 	  /* The *combine_vcvtf2i reduces a vmul+vcvt into
11738 	     a vcvt fixed-point conversion.  */
11739 	  if (code == FIX && mode == SImode
11740 	      && GET_CODE (XEXP (x, 0)) == FIX
11741 	      && GET_MODE (XEXP (x, 0)) == SFmode
11742 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11743 	      && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11744 		 > 0)
11745 	    {
11746 	      if (speed_p)
11747 		*cost += extra_cost->fp[0].toint;
11748 
11749 	      *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11750 				 code, 0, speed_p);
11751 	      return true;
11752 	    }
11753 
11754 	  if (GET_MODE_CLASS (mode) == MODE_INT)
11755 	    {
11756 	      mode = GET_MODE (XEXP (x, 0));
11757 	      if (speed_p)
11758 		*cost += extra_cost->fp[mode == DFmode].toint;
11759 	      /* Strip of the 'cost' of rounding towards zero.  */
11760 	      if (GET_CODE (XEXP (x, 0)) == FIX)
11761 		*cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11762 				   0, speed_p);
11763 	      else
11764 		*cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11765 	      /* ??? Increase the cost to deal with transferring from
11766 		 FP -> CORE registers?  */
11767 	      return true;
11768 	    }
11769 	  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11770 		   && TARGET_VFP5)
11771 	    {
11772 	      if (speed_p)
11773 		*cost += extra_cost->fp[mode == DFmode].roundint;
11774 	      return false;
11775 	    }
11776 	  /* Vector costs? */
11777 	}
11778       *cost = LIBCALL_COST (1);
11779       return false;
11780 
11781     case FLOAT:
11782     case UNSIGNED_FLOAT:
11783       if (TARGET_HARD_FLOAT)
11784 	{
11785 	  /* ??? Increase the cost to deal with transferring from CORE
11786 	     -> FP registers?  */
11787 	  if (speed_p)
11788 	    *cost += extra_cost->fp[mode == DFmode].fromint;
11789 	  return false;
11790 	}
11791       *cost = LIBCALL_COST (1);
11792       return false;
11793 
11794     case CALL:
11795       return true;
11796 
11797     case ASM_OPERANDS:
11798       {
11799       /* Just a guess.  Guess number of instructions in the asm
11800          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11801          though (see PR60663).  */
11802         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11803         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11804 
11805         *cost = COSTS_N_INSNS (asm_length + num_operands);
11806         return true;
11807       }
11808     default:
11809       if (mode != VOIDmode)
11810 	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11811       else
11812 	*cost = COSTS_N_INSNS (4); /* Who knows?  */
11813       return false;
11814     }
11815 }
11816 
11817 #undef HANDLE_NARROW_SHIFT_ARITH
11818 
11819 /* RTX costs entry point.  */
11820 
11821 static bool
arm_rtx_costs(rtx x,machine_mode mode ATTRIBUTE_UNUSED,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)11822 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11823 	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11824 {
11825   bool result;
11826   int code = GET_CODE (x);
11827   gcc_assert (current_tune->insn_extra_cost);
11828 
11829   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11830 				(enum rtx_code) outer_code,
11831 				current_tune->insn_extra_cost,
11832 				total, speed);
11833 
11834   if (dump_file && arm_verbose_cost)
11835     {
11836       print_rtl_single (dump_file, x);
11837       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11838 	       *total, result ? "final" : "partial");
11839     }
11840   return result;
11841 }
11842 
11843 static int
arm_insn_cost(rtx_insn * insn,bool speed)11844 arm_insn_cost (rtx_insn *insn, bool speed)
11845 {
11846   int cost;
11847 
11848   /* Don't cost a simple reg-reg move at a full insn cost: such moves
11849      will likely disappear during register allocation.  */
11850   if (!reload_completed
11851       && GET_CODE (PATTERN (insn)) == SET
11852       && REG_P (SET_DEST (PATTERN (insn)))
11853       && REG_P (SET_SRC (PATTERN (insn))))
11854     return 2;
11855   cost = pattern_cost (PATTERN (insn), speed);
11856   /* If the cost is zero, then it's likely a complex insn.  We don't want the
11857      cost of these to be less than something we know about.  */
11858   return cost ? cost : COSTS_N_INSNS (2);
11859 }
11860 
11861 /* All address computations that can be done are free, but rtx cost returns
11862    the same for practically all of them.  So we weight the different types
11863    of address here in the order (most pref first):
11864    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11865 static inline int
arm_arm_address_cost(rtx x)11866 arm_arm_address_cost (rtx x)
11867 {
11868   enum rtx_code c  = GET_CODE (x);
11869 
11870   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11871     return 0;
11872   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11873     return 10;
11874 
11875   if (c == PLUS)
11876     {
11877       if (CONST_INT_P (XEXP (x, 1)))
11878 	return 2;
11879 
11880       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11881 	return 3;
11882 
11883       return 4;
11884     }
11885 
11886   return 6;
11887 }
11888 
11889 static inline int
arm_thumb_address_cost(rtx x)11890 arm_thumb_address_cost (rtx x)
11891 {
11892   enum rtx_code c  = GET_CODE (x);
11893 
11894   if (c == REG)
11895     return 1;
11896   if (c == PLUS
11897       && REG_P (XEXP (x, 0))
11898       && CONST_INT_P (XEXP (x, 1)))
11899     return 1;
11900 
11901   return 2;
11902 }
11903 
11904 static int
arm_address_cost(rtx x,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)11905 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11906 		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11907 {
11908   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11909 }
11910 
11911 /* Adjust cost hook for XScale.  */
11912 static bool
xscale_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)11913 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11914 			  int * cost)
11915 {
11916   /* Some true dependencies can have a higher cost depending
11917      on precisely how certain input operands are used.  */
11918   if (dep_type == 0
11919       && recog_memoized (insn) >= 0
11920       && recog_memoized (dep) >= 0)
11921     {
11922       int shift_opnum = get_attr_shift (insn);
11923       enum attr_type attr_type = get_attr_type (dep);
11924 
11925       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11926 	 operand for INSN.  If we have a shifted input operand and the
11927 	 instruction we depend on is another ALU instruction, then we may
11928 	 have to account for an additional stall.  */
11929       if (shift_opnum != 0
11930 	  && (attr_type == TYPE_ALU_SHIFT_IMM
11931 	      || attr_type == TYPE_ALUS_SHIFT_IMM
11932 	      || attr_type == TYPE_LOGIC_SHIFT_IMM
11933 	      || attr_type == TYPE_LOGICS_SHIFT_IMM
11934 	      || attr_type == TYPE_ALU_SHIFT_REG
11935 	      || attr_type == TYPE_ALUS_SHIFT_REG
11936 	      || attr_type == TYPE_LOGIC_SHIFT_REG
11937 	      || attr_type == TYPE_LOGICS_SHIFT_REG
11938 	      || attr_type == TYPE_MOV_SHIFT
11939 	      || attr_type == TYPE_MVN_SHIFT
11940 	      || attr_type == TYPE_MOV_SHIFT_REG
11941 	      || attr_type == TYPE_MVN_SHIFT_REG))
11942 	{
11943 	  rtx shifted_operand;
11944 	  int opno;
11945 
11946 	  /* Get the shifted operand.  */
11947 	  extract_insn (insn);
11948 	  shifted_operand = recog_data.operand[shift_opnum];
11949 
11950 	  /* Iterate over all the operands in DEP.  If we write an operand
11951 	     that overlaps with SHIFTED_OPERAND, then we have increase the
11952 	     cost of this dependency.  */
11953 	  extract_insn (dep);
11954 	  preprocess_constraints (dep);
11955 	  for (opno = 0; opno < recog_data.n_operands; opno++)
11956 	    {
11957 	      /* We can ignore strict inputs.  */
11958 	      if (recog_data.operand_type[opno] == OP_IN)
11959 		continue;
11960 
11961 	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
11962 					   shifted_operand))
11963 		{
11964 		  *cost = 2;
11965 		  return false;
11966 		}
11967 	    }
11968 	}
11969     }
11970   return true;
11971 }
11972 
11973 /* Adjust cost hook for Cortex A9.  */
11974 static bool
cortex_a9_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)11975 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11976 			     int * cost)
11977 {
11978   switch (dep_type)
11979     {
11980     case REG_DEP_ANTI:
11981       *cost = 0;
11982       return false;
11983 
11984     case REG_DEP_TRUE:
11985     case REG_DEP_OUTPUT:
11986 	if (recog_memoized (insn) >= 0
11987 	    && recog_memoized (dep) >= 0)
11988 	  {
11989 	    if (GET_CODE (PATTERN (insn)) == SET)
11990 	      {
11991 		if (GET_MODE_CLASS
11992 		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11993 		  || GET_MODE_CLASS
11994 		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11995 		  {
11996 		    enum attr_type attr_type_insn = get_attr_type (insn);
11997 		    enum attr_type attr_type_dep = get_attr_type (dep);
11998 
11999 		    /* By default all dependencies of the form
12000 		       s0 = s0 <op> s1
12001 		       s0 = s0 <op> s2
12002 		       have an extra latency of 1 cycle because
12003 		       of the input and output dependency in this
12004 		       case. However this gets modeled as an true
12005 		       dependency and hence all these checks.  */
12006 		    if (REG_P (SET_DEST (PATTERN (insn)))
12007 			&& reg_set_p (SET_DEST (PATTERN (insn)), dep))
12008 		      {
12009 			/* FMACS is a special case where the dependent
12010 			   instruction can be issued 3 cycles before
12011 			   the normal latency in case of an output
12012 			   dependency.  */
12013 			if ((attr_type_insn == TYPE_FMACS
12014 			     || attr_type_insn == TYPE_FMACD)
12015 			    && (attr_type_dep == TYPE_FMACS
12016 				|| attr_type_dep == TYPE_FMACD))
12017 			  {
12018 			    if (dep_type == REG_DEP_OUTPUT)
12019 			      *cost = insn_default_latency (dep) - 3;
12020 			    else
12021 			      *cost = insn_default_latency (dep);
12022 			    return false;
12023 			  }
12024 			else
12025 			  {
12026 			    if (dep_type == REG_DEP_OUTPUT)
12027 			      *cost = insn_default_latency (dep) + 1;
12028 			    else
12029 			      *cost = insn_default_latency (dep);
12030 			  }
12031 			return false;
12032 		      }
12033 		  }
12034 	      }
12035 	  }
12036 	break;
12037 
12038     default:
12039       gcc_unreachable ();
12040     }
12041 
12042   return true;
12043 }
12044 
12045 /* Adjust cost hook for FA726TE.  */
12046 static bool
fa726te_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)12047 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12048 			   int * cost)
12049 {
12050   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12051      have penalty of 3.  */
12052   if (dep_type == REG_DEP_TRUE
12053       && recog_memoized (insn) >= 0
12054       && recog_memoized (dep) >= 0
12055       && get_attr_conds (dep) == CONDS_SET)
12056     {
12057       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12058       if (get_attr_conds (insn) == CONDS_USE
12059           && get_attr_type (insn) != TYPE_BRANCH)
12060         {
12061           *cost = 3;
12062           return false;
12063         }
12064 
12065       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12066           || get_attr_conds (insn) == CONDS_USE)
12067         {
12068           *cost = 0;
12069           return false;
12070         }
12071     }
12072 
12073   return true;
12074 }
12075 
12076 /* Implement TARGET_REGISTER_MOVE_COST.
12077 
12078    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12079    it is typically more expensive than a single memory access.  We set
12080    the cost to less than two memory accesses so that floating
12081    point to integer conversion does not go through memory.  */
12082 
12083 int
arm_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)12084 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12085 			reg_class_t from, reg_class_t to)
12086 {
12087   if (TARGET_32BIT)
12088     {
12089       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12090 	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12091 	return 15;
12092       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12093 	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12094 	return 4;
12095       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12096 	return 20;
12097       else
12098 	return 2;
12099     }
12100   else
12101     {
12102       if (from == HI_REGS || to == HI_REGS)
12103 	return 4;
12104       else
12105 	return 2;
12106     }
12107 }
12108 
12109 /* Implement TARGET_MEMORY_MOVE_COST.  */
12110 
12111 int
arm_memory_move_cost(machine_mode mode,reg_class_t rclass,bool in ATTRIBUTE_UNUSED)12112 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12113 		      bool in ATTRIBUTE_UNUSED)
12114 {
12115   if (TARGET_32BIT)
12116     return 10;
12117   else
12118     {
12119       if (GET_MODE_SIZE (mode) < 4)
12120 	return 8;
12121       else
12122 	return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12123     }
12124 }
12125 
12126 /* Vectorizer cost model implementation.  */
12127 
12128 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12129 static int
arm_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)12130 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12131 				tree vectype,
12132 				int misalign ATTRIBUTE_UNUSED)
12133 {
12134   unsigned elements;
12135 
12136   switch (type_of_cost)
12137     {
12138       case scalar_stmt:
12139         return current_tune->vec_costs->scalar_stmt_cost;
12140 
12141       case scalar_load:
12142         return current_tune->vec_costs->scalar_load_cost;
12143 
12144       case scalar_store:
12145         return current_tune->vec_costs->scalar_store_cost;
12146 
12147       case vector_stmt:
12148         return current_tune->vec_costs->vec_stmt_cost;
12149 
12150       case vector_load:
12151         return current_tune->vec_costs->vec_align_load_cost;
12152 
12153       case vector_store:
12154         return current_tune->vec_costs->vec_store_cost;
12155 
12156       case vec_to_scalar:
12157         return current_tune->vec_costs->vec_to_scalar_cost;
12158 
12159       case scalar_to_vec:
12160         return current_tune->vec_costs->scalar_to_vec_cost;
12161 
12162       case unaligned_load:
12163       case vector_gather_load:
12164         return current_tune->vec_costs->vec_unalign_load_cost;
12165 
12166       case unaligned_store:
12167       case vector_scatter_store:
12168         return current_tune->vec_costs->vec_unalign_store_cost;
12169 
12170       case cond_branch_taken:
12171         return current_tune->vec_costs->cond_taken_branch_cost;
12172 
12173       case cond_branch_not_taken:
12174         return current_tune->vec_costs->cond_not_taken_branch_cost;
12175 
12176       case vec_perm:
12177       case vec_promote_demote:
12178         return current_tune->vec_costs->vec_stmt_cost;
12179 
12180       case vec_construct:
12181 	elements = TYPE_VECTOR_SUBPARTS (vectype);
12182 	return elements / 2 + 1;
12183 
12184       default:
12185         gcc_unreachable ();
12186     }
12187 }
12188 
12189 /* Implement targetm.vectorize.add_stmt_cost.  */
12190 
12191 static unsigned
arm_add_stmt_cost(void * data,int count,enum vect_cost_for_stmt kind,struct _stmt_vec_info * stmt_info,int misalign,enum vect_cost_model_location where)12192 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
12193 		   struct _stmt_vec_info *stmt_info, int misalign,
12194 		   enum vect_cost_model_location where)
12195 {
12196   unsigned *cost = (unsigned *) data;
12197   unsigned retval = 0;
12198 
12199   if (flag_vect_cost_model)
12200     {
12201       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
12202       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
12203 
12204       /* Statements in an inner loop relative to the loop being
12205 	 vectorized are weighted more heavily.  The value here is
12206 	 arbitrary and could potentially be improved with analysis.  */
12207       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
12208 	count *= 50;  /* FIXME.  */
12209 
12210       retval = (unsigned) (count * stmt_cost);
12211       cost[where] += retval;
12212     }
12213 
12214   return retval;
12215 }
12216 
12217 /* Return true if and only if this insn can dual-issue only as older.  */
12218 static bool
cortexa7_older_only(rtx_insn * insn)12219 cortexa7_older_only (rtx_insn *insn)
12220 {
12221   if (recog_memoized (insn) < 0)
12222     return false;
12223 
12224   switch (get_attr_type (insn))
12225     {
12226     case TYPE_ALU_DSP_REG:
12227     case TYPE_ALU_SREG:
12228     case TYPE_ALUS_SREG:
12229     case TYPE_LOGIC_REG:
12230     case TYPE_LOGICS_REG:
12231     case TYPE_ADC_REG:
12232     case TYPE_ADCS_REG:
12233     case TYPE_ADR:
12234     case TYPE_BFM:
12235     case TYPE_REV:
12236     case TYPE_MVN_REG:
12237     case TYPE_SHIFT_IMM:
12238     case TYPE_SHIFT_REG:
12239     case TYPE_LOAD_BYTE:
12240     case TYPE_LOAD_4:
12241     case TYPE_STORE_4:
12242     case TYPE_FFARITHS:
12243     case TYPE_FADDS:
12244     case TYPE_FFARITHD:
12245     case TYPE_FADDD:
12246     case TYPE_FMOV:
12247     case TYPE_F_CVT:
12248     case TYPE_FCMPS:
12249     case TYPE_FCMPD:
12250     case TYPE_FCONSTS:
12251     case TYPE_FCONSTD:
12252     case TYPE_FMULS:
12253     case TYPE_FMACS:
12254     case TYPE_FMULD:
12255     case TYPE_FMACD:
12256     case TYPE_FDIVS:
12257     case TYPE_FDIVD:
12258     case TYPE_F_MRC:
12259     case TYPE_F_MRRC:
12260     case TYPE_F_FLAG:
12261     case TYPE_F_LOADS:
12262     case TYPE_F_STORES:
12263       return true;
12264     default:
12265       return false;
12266     }
12267 }
12268 
12269 /* Return true if and only if this insn can dual-issue as younger.  */
12270 static bool
cortexa7_younger(FILE * file,int verbose,rtx_insn * insn)12271 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12272 {
12273   if (recog_memoized (insn) < 0)
12274     {
12275       if (verbose > 5)
12276         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12277       return false;
12278     }
12279 
12280   switch (get_attr_type (insn))
12281     {
12282     case TYPE_ALU_IMM:
12283     case TYPE_ALUS_IMM:
12284     case TYPE_LOGIC_IMM:
12285     case TYPE_LOGICS_IMM:
12286     case TYPE_EXTEND:
12287     case TYPE_MVN_IMM:
12288     case TYPE_MOV_IMM:
12289     case TYPE_MOV_REG:
12290     case TYPE_MOV_SHIFT:
12291     case TYPE_MOV_SHIFT_REG:
12292     case TYPE_BRANCH:
12293     case TYPE_CALL:
12294       return true;
12295     default:
12296       return false;
12297     }
12298 }
12299 
12300 
12301 /* Look for an instruction that can dual issue only as an older
12302    instruction, and move it in front of any instructions that can
12303    dual-issue as younger, while preserving the relative order of all
12304    other instructions in the ready list.  This is a hueuristic to help
12305    dual-issue in later cycles, by postponing issue of more flexible
12306    instructions.  This heuristic may affect dual issue opportunities
12307    in the current cycle.  */
12308 static void
cortexa7_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * n_readyp,int clock)12309 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12310 			int *n_readyp, int clock)
12311 {
12312   int i;
12313   int first_older_only = -1, first_younger = -1;
12314 
12315   if (verbose > 5)
12316     fprintf (file,
12317              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12318              clock,
12319              *n_readyp);
12320 
12321   /* Traverse the ready list from the head (the instruction to issue
12322      first), and looking for the first instruction that can issue as
12323      younger and the first instruction that can dual-issue only as
12324      older.  */
12325   for (i = *n_readyp - 1; i >= 0; i--)
12326     {
12327       rtx_insn *insn = ready[i];
12328       if (cortexa7_older_only (insn))
12329         {
12330           first_older_only = i;
12331           if (verbose > 5)
12332             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12333           break;
12334         }
12335       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12336         first_younger = i;
12337     }
12338 
12339   /* Nothing to reorder because either no younger insn found or insn
12340      that can dual-issue only as older appears before any insn that
12341      can dual-issue as younger.  */
12342   if (first_younger == -1)
12343     {
12344       if (verbose > 5)
12345         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12346       return;
12347     }
12348 
12349   /* Nothing to reorder because no older-only insn in the ready list.  */
12350   if (first_older_only == -1)
12351     {
12352       if (verbose > 5)
12353         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12354       return;
12355     }
12356 
12357   /* Move first_older_only insn before first_younger.  */
12358   if (verbose > 5)
12359     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12360              INSN_UID(ready [first_older_only]),
12361              INSN_UID(ready [first_younger]));
12362   rtx_insn *first_older_only_insn = ready [first_older_only];
12363   for (i = first_older_only; i < first_younger; i++)
12364     {
12365       ready[i] = ready[i+1];
12366     }
12367 
12368   ready[i] = first_older_only_insn;
12369   return;
12370 }
12371 
12372 /* Implement TARGET_SCHED_REORDER. */
12373 static int
arm_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * n_readyp,int clock)12374 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12375                    int clock)
12376 {
12377   switch (arm_tune)
12378     {
12379     case TARGET_CPU_cortexa7:
12380       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12381       break;
12382     default:
12383       /* Do nothing for other cores.  */
12384       break;
12385     }
12386 
12387   return arm_issue_rate ();
12388 }
12389 
12390 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12391    It corrects the value of COST based on the relationship between
12392    INSN and DEP through the dependence LINK.  It returns the new
12393    value. There is a per-core adjust_cost hook to adjust scheduler costs
12394    and the per-core hook can choose to completely override the generic
12395    adjust_cost function. Only put bits of code into arm_adjust_cost that
12396    are common across all cores.  */
12397 static int
arm_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int cost,unsigned int)12398 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12399 		 unsigned int)
12400 {
12401   rtx i_pat, d_pat;
12402 
12403  /* When generating Thumb-1 code, we want to place flag-setting operations
12404     close to a conditional branch which depends on them, so that we can
12405     omit the comparison. */
12406   if (TARGET_THUMB1
12407       && dep_type == 0
12408       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12409       && recog_memoized (dep) >= 0
12410       && get_attr_conds (dep) == CONDS_SET)
12411     return 0;
12412 
12413   if (current_tune->sched_adjust_cost != NULL)
12414     {
12415       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12416 	return cost;
12417     }
12418 
12419   /* XXX Is this strictly true?  */
12420   if (dep_type == REG_DEP_ANTI
12421       || dep_type == REG_DEP_OUTPUT)
12422     return 0;
12423 
12424   /* Call insns don't incur a stall, even if they follow a load.  */
12425   if (dep_type == 0
12426       && CALL_P (insn))
12427     return 1;
12428 
12429   if ((i_pat = single_set (insn)) != NULL
12430       && MEM_P (SET_SRC (i_pat))
12431       && (d_pat = single_set (dep)) != NULL
12432       && MEM_P (SET_DEST (d_pat)))
12433     {
12434       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12435       /* This is a load after a store, there is no conflict if the load reads
12436 	 from a cached area.  Assume that loads from the stack, and from the
12437 	 constant pool are cached, and that others will miss.  This is a
12438 	 hack.  */
12439 
12440       if ((GET_CODE (src_mem) == SYMBOL_REF
12441 	   && CONSTANT_POOL_ADDRESS_P (src_mem))
12442 	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
12443 	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
12444 	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12445 	return 1;
12446     }
12447 
12448   return cost;
12449 }
12450 
12451 int
arm_max_conditional_execute(void)12452 arm_max_conditional_execute (void)
12453 {
12454   return max_insns_skipped;
12455 }
12456 
12457 static int
arm_default_branch_cost(bool speed_p,bool predictable_p ATTRIBUTE_UNUSED)12458 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12459 {
12460   if (TARGET_32BIT)
12461     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12462   else
12463     return (optimize > 0) ? 2 : 0;
12464 }
12465 
12466 static int
arm_cortex_a5_branch_cost(bool speed_p,bool predictable_p)12467 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12468 {
12469   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12470 }
12471 
12472 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12473    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12474    sequences of non-executed instructions in IT blocks probably take the same
12475    amount of time as executed instructions (and the IT instruction itself takes
12476    space in icache).  This function was experimentally determined to give good
12477    results on a popular embedded benchmark.  */
12478 
12479 static int
arm_cortex_m_branch_cost(bool speed_p,bool predictable_p)12480 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12481 {
12482   return (TARGET_32BIT && speed_p) ? 1
12483          : arm_default_branch_cost (speed_p, predictable_p);
12484 }
12485 
12486 static int
arm_cortex_m7_branch_cost(bool speed_p,bool predictable_p)12487 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12488 {
12489   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12490 }
12491 
12492 static bool fp_consts_inited = false;
12493 
12494 static REAL_VALUE_TYPE value_fp0;
12495 
12496 static void
init_fp_table(void)12497 init_fp_table (void)
12498 {
12499   REAL_VALUE_TYPE r;
12500 
12501   r = REAL_VALUE_ATOF ("0", DFmode);
12502   value_fp0 = r;
12503   fp_consts_inited = true;
12504 }
12505 
12506 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12507 int
arm_const_double_rtx(rtx x)12508 arm_const_double_rtx (rtx x)
12509 {
12510   const REAL_VALUE_TYPE *r;
12511 
12512   if (!fp_consts_inited)
12513     init_fp_table ();
12514 
12515   r = CONST_DOUBLE_REAL_VALUE (x);
12516   if (REAL_VALUE_MINUS_ZERO (*r))
12517     return 0;
12518 
12519   if (real_equal (r, &value_fp0))
12520     return 1;
12521 
12522   return 0;
12523 }
12524 
12525 /* VFPv3 has a fairly wide range of representable immediates, formed from
12526    "quarter-precision" floating-point values. These can be evaluated using this
12527    formula (with ^ for exponentiation):
12528 
12529      -1^s * n * 2^-r
12530 
12531    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12532    16 <= n <= 31 and 0 <= r <= 7.
12533 
12534    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12535 
12536      - A (most-significant) is the sign bit.
12537      - BCD are the exponent (encoded as r XOR 3).
12538      - EFGH are the mantissa (encoded as n - 16).
12539 */
12540 
12541 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12542    fconst[sd] instruction, or -1 if X isn't suitable.  */
12543 static int
vfp3_const_double_index(rtx x)12544 vfp3_const_double_index (rtx x)
12545 {
12546   REAL_VALUE_TYPE r, m;
12547   int sign, exponent;
12548   unsigned HOST_WIDE_INT mantissa, mant_hi;
12549   unsigned HOST_WIDE_INT mask;
12550   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12551   bool fail;
12552 
12553   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12554     return -1;
12555 
12556   r = *CONST_DOUBLE_REAL_VALUE (x);
12557 
12558   /* We can't represent these things, so detect them first.  */
12559   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12560     return -1;
12561 
12562   /* Extract sign, exponent and mantissa.  */
12563   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12564   r = real_value_abs (&r);
12565   exponent = REAL_EXP (&r);
12566   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12567      highest (sign) bit, with a fixed binary point at bit point_pos.
12568      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12569      bits for the mantissa, this may fail (low bits would be lost).  */
12570   real_ldexp (&m, &r, point_pos - exponent);
12571   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12572   mantissa = w.elt (0);
12573   mant_hi = w.elt (1);
12574 
12575   /* If there are bits set in the low part of the mantissa, we can't
12576      represent this value.  */
12577   if (mantissa != 0)
12578     return -1;
12579 
12580   /* Now make it so that mantissa contains the most-significant bits, and move
12581      the point_pos to indicate that the least-significant bits have been
12582      discarded.  */
12583   point_pos -= HOST_BITS_PER_WIDE_INT;
12584   mantissa = mant_hi;
12585 
12586   /* We can permit four significant bits of mantissa only, plus a high bit
12587      which is always 1.  */
12588   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12589   if ((mantissa & mask) != 0)
12590     return -1;
12591 
12592   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12593   mantissa >>= point_pos - 5;
12594 
12595   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12596      floating-point immediate zero with Neon using an integer-zero load, but
12597      that case is handled elsewhere.)  */
12598   if (mantissa == 0)
12599     return -1;
12600 
12601   gcc_assert (mantissa >= 16 && mantissa <= 31);
12602 
12603   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12604      normalized significands are in the range [1, 2). (Our mantissa is shifted
12605      left 4 places at this point relative to normalized IEEE754 values).  GCC
12606      internally uses [0.5, 1) (see real.c), so the exponent returned from
12607      REAL_EXP must be altered.  */
12608   exponent = 5 - exponent;
12609 
12610   if (exponent < 0 || exponent > 7)
12611     return -1;
12612 
12613   /* Sign, mantissa and exponent are now in the correct form to plug into the
12614      formula described in the comment above.  */
12615   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12616 }
12617 
12618 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12619 int
vfp3_const_double_rtx(rtx x)12620 vfp3_const_double_rtx (rtx x)
12621 {
12622   if (!TARGET_VFP3)
12623     return 0;
12624 
12625   return vfp3_const_double_index (x) != -1;
12626 }
12627 
12628 /* Recognize immediates which can be used in various Neon and MVE instructions.
12629    Legal immediates are described by the following table (for VMVN variants, the
12630    bitwise inverse of the constant shown is recognized. In either case, VMOV
12631    is output and the correct instruction to use for a given constant is chosen
12632    by the assembler). The constant shown is replicated across all elements of
12633    the destination vector.
12634 
12635    insn elems variant constant (binary)
12636    ---- ----- ------- -----------------
12637    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12638    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12639    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12640    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12641    vmov  i16     4    00000000 abcdefgh
12642    vmov  i16     5    abcdefgh 00000000
12643    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12644    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12645    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12646    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12647    vmvn  i16    10    00000000 abcdefgh
12648    vmvn  i16    11    abcdefgh 00000000
12649    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12650    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12651    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12652    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12653    vmov   i8    16    abcdefgh
12654    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12655                       eeeeeeee ffffffff gggggggg hhhhhhhh
12656    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12657    vmov  f32    19    00000000 00000000 00000000 00000000
12658 
12659    For case 18, B = !b. Representable values are exactly those accepted by
12660    vfp3_const_double_index, but are output as floating-point numbers rather
12661    than indices.
12662 
12663    For case 19, we will change it to vmov.i32 when assembling.
12664 
12665    Variants 0-5 (inclusive) may also be used as immediates for the second
12666    operand of VORR/VBIC instructions.
12667 
12668    The INVERSE argument causes the bitwise inverse of the given operand to be
12669    recognized instead (used for recognizing legal immediates for the VAND/VORN
12670    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12671    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12672    output, rather than the real insns vbic/vorr).
12673 
12674    INVERSE makes no difference to the recognition of float vectors.
12675 
12676    The return value is the variant of immediate as shown in the above table, or
12677    -1 if the given value doesn't match any of the listed patterns.
12678 */
12679 static int
simd_valid_immediate(rtx op,machine_mode mode,int inverse,rtx * modconst,int * elementwidth)12680 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12681 		      rtx *modconst, int *elementwidth)
12682 {
12683 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
12684   matches = 1;					\
12685   for (i = 0; i < idx; i += (STRIDE))		\
12686     if (!(TEST))				\
12687       matches = 0;				\
12688   if (matches)					\
12689     {						\
12690       immtype = (CLASS);			\
12691       elsize = (ELSIZE);			\
12692       break;					\
12693     }
12694 
12695   unsigned int i, elsize = 0, idx = 0, n_elts;
12696   unsigned int innersize;
12697   unsigned char bytes[16] = {};
12698   int immtype = -1, matches;
12699   unsigned int invmask = inverse ? 0xff : 0;
12700   bool vector = GET_CODE (op) == CONST_VECTOR;
12701 
12702   if (vector)
12703     n_elts = CONST_VECTOR_NUNITS (op);
12704   else
12705     {
12706       n_elts = 1;
12707       gcc_assert (mode != VOIDmode);
12708     }
12709 
12710   innersize = GET_MODE_UNIT_SIZE (mode);
12711 
12712   /* Only support 128-bit vectors for MVE.  */
12713   if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16))
12714     return -1;
12715 
12716   /* Vectors of float constants.  */
12717   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12718     {
12719       rtx el0 = CONST_VECTOR_ELT (op, 0);
12720 
12721       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12722         return -1;
12723 
12724       /* FP16 vectors cannot be represented.  */
12725       if (GET_MODE_INNER (mode) == HFmode)
12726 	return -1;
12727 
12728       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
12729 	 are distinct in this context.  */
12730       if (!const_vec_duplicate_p (op))
12731 	return -1;
12732 
12733       if (modconst)
12734         *modconst = CONST_VECTOR_ELT (op, 0);
12735 
12736       if (elementwidth)
12737         *elementwidth = 0;
12738 
12739       if (el0 == CONST0_RTX (GET_MODE (el0)))
12740 	return 19;
12741       else
12742 	return 18;
12743     }
12744 
12745   /* The tricks done in the code below apply for little-endian vector layout.
12746      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12747      FIXME: Implement logic for big-endian vectors.  */
12748   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12749     return -1;
12750 
12751   /* Splat vector constant out into a byte vector.  */
12752   for (i = 0; i < n_elts; i++)
12753     {
12754       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12755       unsigned HOST_WIDE_INT elpart;
12756 
12757       gcc_assert (CONST_INT_P (el));
12758       elpart = INTVAL (el);
12759 
12760       for (unsigned int byte = 0; byte < innersize; byte++)
12761 	{
12762 	  bytes[idx++] = (elpart & 0xff) ^ invmask;
12763 	  elpart >>= BITS_PER_UNIT;
12764 	}
12765     }
12766 
12767   /* Sanity check.  */
12768   gcc_assert (idx == GET_MODE_SIZE (mode));
12769 
12770   do
12771     {
12772       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12773 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12774 
12775       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12776 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12777 
12778       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12779 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12780 
12781       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12782 		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12783 
12784       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12785 
12786       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12787 
12788       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12789 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12790 
12791       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12792 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12793 
12794       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12795 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12796 
12797       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12798 		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12799 
12800       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12801 
12802       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12803 
12804       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12805 			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
12806 
12807       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12808 			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12809 
12810       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12811 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12812 
12813       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12814 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12815 
12816       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12817 
12818       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12819 			&& bytes[i] == bytes[(i + 8) % idx]);
12820     }
12821   while (0);
12822 
12823   if (immtype == -1)
12824     return -1;
12825 
12826   if (elementwidth)
12827     *elementwidth = elsize;
12828 
12829   if (modconst)
12830     {
12831       unsigned HOST_WIDE_INT imm = 0;
12832 
12833       /* Un-invert bytes of recognized vector, if necessary.  */
12834       if (invmask != 0)
12835         for (i = 0; i < idx; i++)
12836           bytes[i] ^= invmask;
12837 
12838       if (immtype == 17)
12839         {
12840           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12841           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12842 
12843           for (i = 0; i < 8; i++)
12844             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12845                    << (i * BITS_PER_UNIT);
12846 
12847           *modconst = GEN_INT (imm);
12848         }
12849       else
12850         {
12851           unsigned HOST_WIDE_INT imm = 0;
12852 
12853           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12854             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12855 
12856           *modconst = GEN_INT (imm);
12857         }
12858     }
12859 
12860   return immtype;
12861 #undef CHECK
12862 }
12863 
12864 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
12865    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
12866    (or zero for float elements), and a modified constant (whatever should be
12867    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
12868    modified to "simd_immediate_valid_for_move" as this function will be used
12869    both by neon and mve.  */
12870 int
simd_immediate_valid_for_move(rtx op,machine_mode mode,rtx * modconst,int * elementwidth)12871 simd_immediate_valid_for_move (rtx op, machine_mode mode,
12872 			       rtx *modconst, int *elementwidth)
12873 {
12874   rtx tmpconst;
12875   int tmpwidth;
12876   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12877 
12878   if (retval == -1)
12879     return 0;
12880 
12881   if (modconst)
12882     *modconst = tmpconst;
12883 
12884   if (elementwidth)
12885     *elementwidth = tmpwidth;
12886 
12887   return 1;
12888 }
12889 
12890 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12891    the immediate is valid, write a constant suitable for using as an operand
12892    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12893    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
12894 
12895 int
neon_immediate_valid_for_logic(rtx op,machine_mode mode,int inverse,rtx * modconst,int * elementwidth)12896 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12897 				rtx *modconst, int *elementwidth)
12898 {
12899   rtx tmpconst;
12900   int tmpwidth;
12901   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12902 
12903   if (retval < 0 || retval > 5)
12904     return 0;
12905 
12906   if (modconst)
12907     *modconst = tmpconst;
12908 
12909   if (elementwidth)
12910     *elementwidth = tmpwidth;
12911 
12912   return 1;
12913 }
12914 
12915 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12916    the immediate is valid, write a constant suitable for using as an operand
12917    to VSHR/VSHL to *MODCONST and the corresponding element width to
12918    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12919    because they have different limitations.  */
12920 
12921 int
neon_immediate_valid_for_shift(rtx op,machine_mode mode,rtx * modconst,int * elementwidth,bool isleftshift)12922 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12923 				rtx *modconst, int *elementwidth,
12924 				bool isleftshift)
12925 {
12926   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12927   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12928   unsigned HOST_WIDE_INT last_elt = 0;
12929   unsigned HOST_WIDE_INT maxshift;
12930 
12931   /* Split vector constant out into a byte vector.  */
12932   for (i = 0; i < n_elts; i++)
12933     {
12934       rtx el = CONST_VECTOR_ELT (op, i);
12935       unsigned HOST_WIDE_INT elpart;
12936 
12937       if (CONST_INT_P (el))
12938         elpart = INTVAL (el);
12939       else if (CONST_DOUBLE_P (el))
12940         return 0;
12941       else
12942         gcc_unreachable ();
12943 
12944       if (i != 0 && elpart != last_elt)
12945         return 0;
12946 
12947       last_elt = elpart;
12948     }
12949 
12950   /* Shift less than element size.  */
12951   maxshift = innersize * 8;
12952 
12953   if (isleftshift)
12954     {
12955       /* Left shift immediate value can be from 0 to <size>-1.  */
12956       if (last_elt >= maxshift)
12957         return 0;
12958     }
12959   else
12960     {
12961       /* Right shift immediate value can be from 1 to <size>.  */
12962       if (last_elt == 0 || last_elt > maxshift)
12963 	return 0;
12964     }
12965 
12966   if (elementwidth)
12967     *elementwidth = innersize * 8;
12968 
12969   if (modconst)
12970     *modconst = CONST_VECTOR_ELT (op, 0);
12971 
12972   return 1;
12973 }
12974 
12975 /* Return a string suitable for output of Neon immediate logic operation
12976    MNEM.  */
12977 
12978 char *
neon_output_logic_immediate(const char * mnem,rtx * op2,machine_mode mode,int inverse,int quad)12979 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12980 			     int inverse, int quad)
12981 {
12982   int width, is_valid;
12983   static char templ[40];
12984 
12985   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12986 
12987   gcc_assert (is_valid != 0);
12988 
12989   if (quad)
12990     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12991   else
12992     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12993 
12994   return templ;
12995 }
12996 
12997 /* Return a string suitable for output of Neon immediate shift operation
12998    (VSHR or VSHL) MNEM.  */
12999 
13000 char *
neon_output_shift_immediate(const char * mnem,char sign,rtx * op2,machine_mode mode,int quad,bool isleftshift)13001 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13002 			     machine_mode mode, int quad,
13003 			     bool isleftshift)
13004 {
13005   int width, is_valid;
13006   static char templ[40];
13007 
13008   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13009   gcc_assert (is_valid != 0);
13010 
13011   if (quad)
13012     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13013   else
13014     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13015 
13016   return templ;
13017 }
13018 
13019 /* Output a sequence of pairwise operations to implement a reduction.
13020    NOTE: We do "too much work" here, because pairwise operations work on two
13021    registers-worth of operands in one go. Unfortunately we can't exploit those
13022    extra calculations to do the full operation in fewer steps, I don't think.
13023    Although all vector elements of the result but the first are ignored, we
13024    actually calculate the same result in each of the elements. An alternative
13025    such as initially loading a vector with zero to use as each of the second
13026    operands would use up an additional register and take an extra instruction,
13027    for no particular gain.  */
13028 
13029 void
neon_pairwise_reduce(rtx op0,rtx op1,machine_mode mode,rtx (* reduc)(rtx,rtx,rtx))13030 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13031 		      rtx (*reduc) (rtx, rtx, rtx))
13032 {
13033   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13034   rtx tmpsum = op1;
13035 
13036   for (i = parts / 2; i >= 1; i /= 2)
13037     {
13038       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13039       emit_insn (reduc (dest, tmpsum, tmpsum));
13040       tmpsum = dest;
13041     }
13042 }
13043 
13044 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13045    loaded into a register using VDUP.
13046 
13047    If this is the case, and GENERATE is set, we also generate
13048    instructions to do this and return an RTX to assign to the register.  */
13049 
13050 static rtx
neon_vdup_constant(rtx vals,bool generate)13051 neon_vdup_constant (rtx vals, bool generate)
13052 {
13053   machine_mode mode = GET_MODE (vals);
13054   machine_mode inner_mode = GET_MODE_INNER (mode);
13055   rtx x;
13056 
13057   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13058     return NULL_RTX;
13059 
13060   if (!const_vec_duplicate_p (vals, &x))
13061     /* The elements are not all the same.  We could handle repeating
13062        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13063        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13064        vdup.i16).  */
13065     return NULL_RTX;
13066 
13067   if (!generate)
13068     return x;
13069 
13070   /* We can load this constant by using VDUP and a constant in a
13071      single ARM register.  This will be cheaper than a vector
13072      load.  */
13073 
13074   x = copy_to_mode_reg (inner_mode, x);
13075   return gen_vec_duplicate (mode, x);
13076 }
13077 
13078 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13079    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13080    into a register.
13081 
13082    If this is the case, and GENERATE is set, we also generate code to do
13083    this and return an RTX to copy into the register.  */
13084 
13085 rtx
neon_make_constant(rtx vals,bool generate)13086 neon_make_constant (rtx vals, bool generate)
13087 {
13088   machine_mode mode = GET_MODE (vals);
13089   rtx target;
13090   rtx const_vec = NULL_RTX;
13091   int n_elts = GET_MODE_NUNITS (mode);
13092   int n_const = 0;
13093   int i;
13094 
13095   if (GET_CODE (vals) == CONST_VECTOR)
13096     const_vec = vals;
13097   else if (GET_CODE (vals) == PARALLEL)
13098     {
13099       /* A CONST_VECTOR must contain only CONST_INTs and
13100 	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13101 	 Only store valid constants in a CONST_VECTOR.  */
13102       for (i = 0; i < n_elts; ++i)
13103 	{
13104 	  rtx x = XVECEXP (vals, 0, i);
13105 	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13106 	    n_const++;
13107 	}
13108       if (n_const == n_elts)
13109 	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13110     }
13111   else
13112     gcc_unreachable ();
13113 
13114   if (const_vec != NULL
13115       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13116     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13117     return const_vec;
13118   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13119     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13120        pipeline cycle; creating the constant takes one or two ARM
13121        pipeline cycles.  */
13122     return target;
13123   else if (const_vec != NULL_RTX)
13124     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13125        (for either double or quad vectors).  We cannot take advantage
13126        of single-cycle VLD1 because we need a PC-relative addressing
13127        mode.  */
13128     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13129   else
13130     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13131        We cannot construct an initializer.  */
13132     return NULL_RTX;
13133 }
13134 
13135 /* Initialize vector TARGET to VALS.  */
13136 
13137 void
neon_expand_vector_init(rtx target,rtx vals)13138 neon_expand_vector_init (rtx target, rtx vals)
13139 {
13140   machine_mode mode = GET_MODE (target);
13141   machine_mode inner_mode = GET_MODE_INNER (mode);
13142   int n_elts = GET_MODE_NUNITS (mode);
13143   int n_var = 0, one_var = -1;
13144   bool all_same = true;
13145   rtx x, mem;
13146   int i;
13147 
13148   for (i = 0; i < n_elts; ++i)
13149     {
13150       x = XVECEXP (vals, 0, i);
13151       if (!CONSTANT_P (x))
13152 	++n_var, one_var = i;
13153 
13154       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13155 	all_same = false;
13156     }
13157 
13158   if (n_var == 0)
13159     {
13160       rtx constant = neon_make_constant (vals);
13161       if (constant != NULL_RTX)
13162 	{
13163 	  emit_move_insn (target, constant);
13164 	  return;
13165 	}
13166     }
13167 
13168   /* Splat a single non-constant element if we can.  */
13169   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13170     {
13171       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13172       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13173       return;
13174     }
13175 
13176   /* One field is non-constant.  Load constant then overwrite varying
13177      field.  This is more efficient than using the stack.  */
13178   if (n_var == 1)
13179     {
13180       rtx copy = copy_rtx (vals);
13181       rtx merge_mask = GEN_INT (1 << one_var);
13182 
13183       /* Load constant part of vector, substitute neighboring value for
13184 	 varying element.  */
13185       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13186       neon_expand_vector_init (target, copy);
13187 
13188       /* Insert variable.  */
13189       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13190       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13191       return;
13192     }
13193 
13194   /* Construct the vector in memory one field at a time
13195      and load the whole vector.  */
13196   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13197   for (i = 0; i < n_elts; i++)
13198     emit_move_insn (adjust_address_nv (mem, inner_mode,
13199 				    i * GET_MODE_SIZE (inner_mode)),
13200 		    XVECEXP (vals, 0, i));
13201   emit_move_insn (target, mem);
13202 }
13203 
13204 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13205    ERR if it doesn't.  EXP indicates the source location, which includes the
13206    inlining history for intrinsics.  */
13207 
13208 static void
bounds_check(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high,const_tree exp,const char * desc)13209 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13210 	      const_tree exp, const char *desc)
13211 {
13212   HOST_WIDE_INT lane;
13213 
13214   gcc_assert (CONST_INT_P (operand));
13215 
13216   lane = INTVAL (operand);
13217 
13218   if (lane < low || lane >= high)
13219     {
13220       if (exp)
13221 	error ("%K%s %wd out of range %wd - %wd",
13222 	       exp, desc, lane, low, high - 1);
13223       else
13224 	error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13225     }
13226 }
13227 
13228 /* Bounds-check lanes.  */
13229 
13230 void
neon_lane_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high,const_tree exp)13231 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13232 		  const_tree exp)
13233 {
13234   bounds_check (operand, low, high, exp, "lane");
13235 }
13236 
13237 /* Bounds-check constants.  */
13238 
13239 void
arm_const_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)13240 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13241 {
13242   bounds_check (operand, low, high, NULL_TREE, "constant");
13243 }
13244 
13245 HOST_WIDE_INT
neon_element_bits(machine_mode mode)13246 neon_element_bits (machine_mode mode)
13247 {
13248   return GET_MODE_UNIT_BITSIZE (mode);
13249 }
13250 
13251 
13252 /* Predicates for `match_operand' and `match_operator'.  */
13253 
13254 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13255    WB level is 2 if full writeback address modes are allowed, 1
13256    if limited writeback address modes (POST_INC and PRE_DEC) are
13257    allowed and 0 if no writeback at all is supported.  */
13258 
13259 int
arm_coproc_mem_operand_wb(rtx op,int wb_level)13260 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13261 {
13262   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13263   rtx ind;
13264 
13265   /* Reject eliminable registers.  */
13266   if (! (reload_in_progress || reload_completed || lra_in_progress)
13267       && (   reg_mentioned_p (frame_pointer_rtx, op)
13268 	  || reg_mentioned_p (arg_pointer_rtx, op)
13269 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13270 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13271 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13272 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13273     return FALSE;
13274 
13275   /* Constants are converted into offsets from labels.  */
13276   if (!MEM_P (op))
13277     return FALSE;
13278 
13279   ind = XEXP (op, 0);
13280 
13281   if (reload_completed
13282       && (GET_CODE (ind) == LABEL_REF
13283 	  || (GET_CODE (ind) == CONST
13284 	      && GET_CODE (XEXP (ind, 0)) == PLUS
13285 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13286 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13287     return TRUE;
13288 
13289   /* Match: (mem (reg)).  */
13290   if (REG_P (ind))
13291     return arm_address_register_rtx_p (ind, 0);
13292 
13293   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13294      acceptable in any case (subject to verification by
13295      arm_address_register_rtx_p).  We need full writeback to accept
13296      PRE_INC and POST_DEC, and at least restricted writeback for
13297      PRE_INC and POST_DEC.  */
13298   if (wb_level > 0
13299       && (GET_CODE (ind) == POST_INC
13300 	  || GET_CODE (ind) == PRE_DEC
13301 	  || (wb_level > 1
13302 	      && (GET_CODE (ind) == PRE_INC
13303 		  || GET_CODE (ind) == POST_DEC))))
13304     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13305 
13306   if (wb_level > 1
13307       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13308       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13309       && GET_CODE (XEXP (ind, 1)) == PLUS
13310       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13311     ind = XEXP (ind, 1);
13312 
13313   /* Match:
13314      (plus (reg)
13315 	   (const))
13316 
13317      The encoded immediate for 16-bit modes is multiplied by 2,
13318      while the encoded immediate for 32-bit and 64-bit modes is
13319      multiplied by 4.  */
13320   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13321   if (GET_CODE (ind) == PLUS
13322       && REG_P (XEXP (ind, 0))
13323       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13324       && CONST_INT_P (XEXP (ind, 1))
13325       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13326       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13327     return TRUE;
13328 
13329   return FALSE;
13330 }
13331 
13332 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13333    WB is true if full writeback address modes are allowed and is false
13334    if limited writeback address modes (POST_INC and PRE_DEC) are
13335    allowed.  */
13336 
arm_coproc_mem_operand(rtx op,bool wb)13337 int arm_coproc_mem_operand (rtx op, bool wb)
13338 {
13339   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13340 }
13341 
13342 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13343    context in which no writeback address modes are allowed.  */
13344 
13345 int
arm_coproc_mem_operand_no_writeback(rtx op)13346 arm_coproc_mem_operand_no_writeback (rtx op)
13347 {
13348   return arm_coproc_mem_operand_wb (op, 0);
13349 }
13350 
13351 /* This function returns TRUE on matching mode and op.
13352 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13353 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13354 int
mve_vector_mem_operand(machine_mode mode,rtx op,bool strict)13355 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13356 {
13357   enum rtx_code code;
13358   HOST_WIDE_INT val;
13359   int  reg_no;
13360 
13361   /* Match: (mem (reg)).  */
13362   if (REG_P (op))
13363     {
13364       int reg_no = REGNO (op);
13365       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13366 	       ? reg_no <= LAST_LO_REGNUM
13367 	       :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13368 	      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13369     }
13370   code = GET_CODE (op);
13371 
13372   if (code == POST_INC || code == PRE_DEC
13373       || code == PRE_INC || code == POST_DEC)
13374     {
13375       reg_no = REGNO (XEXP (op, 0));
13376       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13377 	       ? reg_no <= LAST_LO_REGNUM
13378 	       :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13379 	      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13380     }
13381   else if ((code == POST_MODIFY || code == PRE_MODIFY)
13382 	   && GET_CODE (XEXP (op, 1)) == PLUS && REG_P (XEXP (XEXP (op, 1), 1)))
13383     {
13384       reg_no = REGNO (XEXP (op, 0));
13385       val = INTVAL (XEXP ( XEXP (op, 1), 1));
13386       switch (mode)
13387 	{
13388 	  case E_V16QImode:
13389 	    if (abs_hwi (val))
13390 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13391 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13392 	    return FALSE;
13393 	  case E_V8HImode:
13394 	  case E_V8HFmode:
13395 	    if (abs (val) <= 255)
13396 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13397 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13398 	    return FALSE;
13399 	  case E_V8QImode:
13400 	  case E_V4QImode:
13401 	    if (abs_hwi (val))
13402 	      return (reg_no <= LAST_LO_REGNUM
13403 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13404 	    return FALSE;
13405 	  case E_V4HImode:
13406 	  case E_V4HFmode:
13407 	    if (val % 2 == 0 && abs (val) <= 254)
13408 	      return (reg_no <= LAST_LO_REGNUM
13409 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13410 	    return FALSE;
13411 	  case E_V4SImode:
13412 	  case E_V4SFmode:
13413 	    if (val % 4 == 0 && abs (val) <= 508)
13414 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13415 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13416 	    return FALSE;
13417 	  case E_V2DImode:
13418 	  case E_V2DFmode:
13419 	  case E_TImode:
13420 	    if (val % 4 == 0 && val >= 0 && val <= 1020)
13421 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13422 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13423 	    return FALSE;
13424 	  default:
13425 	    return FALSE;
13426 	}
13427     }
13428   return FALSE;
13429 }
13430 
13431 /* Return TRUE if OP is a memory operand which we can load or store a vector
13432    to/from. TYPE is one of the following values:
13433     0 - Vector load/stor (vldr)
13434     1 - Core registers (ldm)
13435     2 - Element/structure loads (vld1)
13436  */
13437 int
neon_vector_mem_operand(rtx op,int type,bool strict)13438 neon_vector_mem_operand (rtx op, int type, bool strict)
13439 {
13440   rtx ind;
13441 
13442   /* Reject eliminable registers.  */
13443   if (strict && ! (reload_in_progress || reload_completed)
13444       && (reg_mentioned_p (frame_pointer_rtx, op)
13445 	  || reg_mentioned_p (arg_pointer_rtx, op)
13446 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13447 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13448 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13449 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13450     return FALSE;
13451 
13452   /* Constants are converted into offsets from labels.  */
13453   if (!MEM_P (op))
13454     return FALSE;
13455 
13456   ind = XEXP (op, 0);
13457 
13458   if (reload_completed
13459       && (GET_CODE (ind) == LABEL_REF
13460 	  || (GET_CODE (ind) == CONST
13461 	      && GET_CODE (XEXP (ind, 0)) == PLUS
13462 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13463 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13464     return TRUE;
13465 
13466   /* Match: (mem (reg)).  */
13467   if (REG_P (ind))
13468     return arm_address_register_rtx_p (ind, 0);
13469 
13470   /* Allow post-increment with Neon registers.  */
13471   if ((type != 1 && GET_CODE (ind) == POST_INC)
13472       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13473     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13474 
13475   /* Allow post-increment by register for VLDn */
13476   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13477       && GET_CODE (XEXP (ind, 1)) == PLUS
13478       && REG_P (XEXP (XEXP (ind, 1), 1))
13479       && REG_P (XEXP (ind, 0))
13480       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13481      return true;
13482 
13483   /* Match:
13484      (plus (reg)
13485           (const)).  */
13486   if (type == 0
13487       && GET_CODE (ind) == PLUS
13488       && REG_P (XEXP (ind, 0))
13489       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13490       && CONST_INT_P (XEXP (ind, 1))
13491       && INTVAL (XEXP (ind, 1)) > -1024
13492       /* For quad modes, we restrict the constant offset to be slightly less
13493 	 than what the instruction format permits.  We have no such constraint
13494 	 on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13495       && (INTVAL (XEXP (ind, 1))
13496 	  < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13497       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13498     return TRUE;
13499 
13500   return FALSE;
13501 }
13502 
13503 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13504    type.  */
13505 int
neon_struct_mem_operand(rtx op)13506 neon_struct_mem_operand (rtx op)
13507 {
13508   rtx ind;
13509 
13510   /* Reject eliminable registers.  */
13511   if (! (reload_in_progress || reload_completed)
13512       && (   reg_mentioned_p (frame_pointer_rtx, op)
13513 	  || reg_mentioned_p (arg_pointer_rtx, op)
13514 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13515 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13516 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13517 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13518     return FALSE;
13519 
13520   /* Constants are converted into offsets from labels.  */
13521   if (!MEM_P (op))
13522     return FALSE;
13523 
13524   ind = XEXP (op, 0);
13525 
13526   if (reload_completed
13527       && (GET_CODE (ind) == LABEL_REF
13528 	  || (GET_CODE (ind) == CONST
13529 	      && GET_CODE (XEXP (ind, 0)) == PLUS
13530 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13531 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13532     return TRUE;
13533 
13534   /* Match: (mem (reg)).  */
13535   if (REG_P (ind))
13536     return arm_address_register_rtx_p (ind, 0);
13537 
13538   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13539   if (GET_CODE (ind) == POST_INC
13540       || GET_CODE (ind) == PRE_DEC)
13541     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13542 
13543   return FALSE;
13544 }
13545 
13546 /* Prepares the operands for the VCMLA by lane instruction such that the right
13547    register number is selected.  This instruction is special in that it always
13548    requires a D register, however there is a choice to be made between Dn[0],
13549    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13550 
13551    The VCMLA by lane function always selects two values. For instance given D0
13552    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13553    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13554    D0[0] or D1[0] are both valid.
13555 
13556    This function centralizes that information based on OPERANDS, OPERANDS[3]
13557    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13558    updated to contain the right index.  */
13559 
13560 rtx *
neon_vcmla_lane_prepare_operands(rtx * operands)13561 neon_vcmla_lane_prepare_operands (rtx *operands)
13562 {
13563   int lane = INTVAL (operands[4]);
13564   machine_mode constmode = SImode;
13565   machine_mode mode = GET_MODE (operands[3]);
13566   int regno = REGNO (operands[3]);
13567   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13568   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13569     {
13570       operands[3] = gen_int_mode (regno + 1, constmode);
13571       operands[4]
13572 	= gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13573     }
13574   else
13575     {
13576       operands[3] = gen_int_mode (regno, constmode);
13577       operands[4] = gen_int_mode (lane, constmode);
13578     }
13579   return operands;
13580 }
13581 
13582 
13583 /* Return true if X is a register that will be eliminated later on.  */
13584 int
arm_eliminable_register(rtx x)13585 arm_eliminable_register (rtx x)
13586 {
13587   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13588 		       || REGNO (x) == ARG_POINTER_REGNUM
13589 		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13590 			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13591 }
13592 
13593 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13594    coprocessor registers.  Otherwise return NO_REGS.  */
13595 
13596 enum reg_class
coproc_secondary_reload_class(machine_mode mode,rtx x,bool wb)13597 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13598 {
13599   if (mode == HFmode)
13600     {
13601       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13602 	return GENERAL_REGS;
13603       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13604 	return NO_REGS;
13605       return GENERAL_REGS;
13606     }
13607 
13608   /* The neon move patterns handle all legitimate vector and struct
13609      addresses.  */
13610   if (TARGET_NEON
13611       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13612       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13613 	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13614 	  || VALID_NEON_STRUCT_MODE (mode)))
13615     return NO_REGS;
13616 
13617   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13618     return NO_REGS;
13619 
13620   return GENERAL_REGS;
13621 }
13622 
13623 /* Values which must be returned in the most-significant end of the return
13624    register.  */
13625 
13626 static bool
arm_return_in_msb(const_tree valtype)13627 arm_return_in_msb (const_tree valtype)
13628 {
13629   return (TARGET_AAPCS_BASED
13630           && BYTES_BIG_ENDIAN
13631 	  && (AGGREGATE_TYPE_P (valtype)
13632 	      || TREE_CODE (valtype) == COMPLEX_TYPE
13633 	      || FIXED_POINT_TYPE_P (valtype)));
13634 }
13635 
13636 /* Return TRUE if X references a SYMBOL_REF.  */
13637 int
symbol_mentioned_p(rtx x)13638 symbol_mentioned_p (rtx x)
13639 {
13640   const char * fmt;
13641   int i;
13642 
13643   if (GET_CODE (x) == SYMBOL_REF)
13644     return 1;
13645 
13646   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13647      are constant offsets, not symbols.  */
13648   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13649     return 0;
13650 
13651   fmt = GET_RTX_FORMAT (GET_CODE (x));
13652 
13653   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13654     {
13655       if (fmt[i] == 'E')
13656 	{
13657 	  int j;
13658 
13659 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13660 	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
13661 	      return 1;
13662 	}
13663       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13664 	return 1;
13665     }
13666 
13667   return 0;
13668 }
13669 
13670 /* Return TRUE if X references a LABEL_REF.  */
13671 int
label_mentioned_p(rtx x)13672 label_mentioned_p (rtx x)
13673 {
13674   const char * fmt;
13675   int i;
13676 
13677   if (GET_CODE (x) == LABEL_REF)
13678     return 1;
13679 
13680   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13681      instruction, but they are constant offsets, not symbols.  */
13682   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13683     return 0;
13684 
13685   fmt = GET_RTX_FORMAT (GET_CODE (x));
13686   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13687     {
13688       if (fmt[i] == 'E')
13689 	{
13690 	  int j;
13691 
13692 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13693 	    if (label_mentioned_p (XVECEXP (x, i, j)))
13694 	      return 1;
13695 	}
13696       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13697 	return 1;
13698     }
13699 
13700   return 0;
13701 }
13702 
13703 int
tls_mentioned_p(rtx x)13704 tls_mentioned_p (rtx x)
13705 {
13706   switch (GET_CODE (x))
13707     {
13708     case CONST:
13709       return tls_mentioned_p (XEXP (x, 0));
13710 
13711     case UNSPEC:
13712       if (XINT (x, 1) == UNSPEC_TLS)
13713 	return 1;
13714 
13715     /* Fall through.  */
13716     default:
13717       return 0;
13718     }
13719 }
13720 
13721 /* Must not copy any rtx that uses a pc-relative address.
13722    Also, disallow copying of load-exclusive instructions that
13723    may appear after splitting of compare-and-swap-style operations
13724    so as to prevent those loops from being transformed away from their
13725    canonical forms (see PR 69904).  */
13726 
13727 static bool
arm_cannot_copy_insn_p(rtx_insn * insn)13728 arm_cannot_copy_insn_p (rtx_insn *insn)
13729 {
13730   /* The tls call insn cannot be copied, as it is paired with a data
13731      word.  */
13732   if (recog_memoized (insn) == CODE_FOR_tlscall)
13733     return true;
13734 
13735   subrtx_iterator::array_type array;
13736   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13737     {
13738       const_rtx x = *iter;
13739       if (GET_CODE (x) == UNSPEC
13740 	  && (XINT (x, 1) == UNSPEC_PIC_BASE
13741 	      || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13742 	return true;
13743     }
13744 
13745   rtx set = single_set (insn);
13746   if (set)
13747     {
13748       rtx src = SET_SRC (set);
13749       if (GET_CODE (src) == ZERO_EXTEND)
13750 	src = XEXP (src, 0);
13751 
13752       /* Catch the load-exclusive and load-acquire operations.  */
13753       if (GET_CODE (src) == UNSPEC_VOLATILE
13754 	  && (XINT (src, 1) == VUNSPEC_LL
13755 	      || XINT (src, 1) == VUNSPEC_LAX))
13756 	return true;
13757     }
13758   return false;
13759 }
13760 
13761 enum rtx_code
minmax_code(rtx x)13762 minmax_code (rtx x)
13763 {
13764   enum rtx_code code = GET_CODE (x);
13765 
13766   switch (code)
13767     {
13768     case SMAX:
13769       return GE;
13770     case SMIN:
13771       return LE;
13772     case UMIN:
13773       return LEU;
13774     case UMAX:
13775       return GEU;
13776     default:
13777       gcc_unreachable ();
13778     }
13779 }
13780 
13781 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13782 
13783 bool
arm_sat_operator_match(rtx lo_bound,rtx hi_bound,int * mask,bool * signed_sat)13784 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13785 			int *mask, bool *signed_sat)
13786 {
13787   /* The high bound must be a power of two minus one.  */
13788   int log = exact_log2 (INTVAL (hi_bound) + 1);
13789   if (log == -1)
13790     return false;
13791 
13792   /* The low bound is either zero (for usat) or one less than the
13793      negation of the high bound (for ssat).  */
13794   if (INTVAL (lo_bound) == 0)
13795     {
13796       if (mask)
13797         *mask = log;
13798       if (signed_sat)
13799         *signed_sat = false;
13800 
13801       return true;
13802     }
13803 
13804   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13805     {
13806       if (mask)
13807         *mask = log + 1;
13808       if (signed_sat)
13809         *signed_sat = true;
13810 
13811       return true;
13812     }
13813 
13814   return false;
13815 }
13816 
13817 /* Return 1 if memory locations are adjacent.  */
13818 int
adjacent_mem_locations(rtx a,rtx b)13819 adjacent_mem_locations (rtx a, rtx b)
13820 {
13821   /* We don't guarantee to preserve the order of these memory refs.  */
13822   if (volatile_refs_p (a) || volatile_refs_p (b))
13823     return 0;
13824 
13825   if ((REG_P (XEXP (a, 0))
13826        || (GET_CODE (XEXP (a, 0)) == PLUS
13827 	   && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13828       && (REG_P (XEXP (b, 0))
13829 	  || (GET_CODE (XEXP (b, 0)) == PLUS
13830 	      && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13831     {
13832       HOST_WIDE_INT val0 = 0, val1 = 0;
13833       rtx reg0, reg1;
13834       int val_diff;
13835 
13836       if (GET_CODE (XEXP (a, 0)) == PLUS)
13837         {
13838 	  reg0 = XEXP (XEXP (a, 0), 0);
13839 	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13840         }
13841       else
13842 	reg0 = XEXP (a, 0);
13843 
13844       if (GET_CODE (XEXP (b, 0)) == PLUS)
13845         {
13846 	  reg1 = XEXP (XEXP (b, 0), 0);
13847 	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13848         }
13849       else
13850 	reg1 = XEXP (b, 0);
13851 
13852       /* Don't accept any offset that will require multiple
13853 	 instructions to handle, since this would cause the
13854 	 arith_adjacentmem pattern to output an overlong sequence.  */
13855       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13856 	return 0;
13857 
13858       /* Don't allow an eliminable register: register elimination can make
13859 	 the offset too large.  */
13860       if (arm_eliminable_register (reg0))
13861 	return 0;
13862 
13863       val_diff = val1 - val0;
13864 
13865       if (arm_ld_sched)
13866 	{
13867 	  /* If the target has load delay slots, then there's no benefit
13868 	     to using an ldm instruction unless the offset is zero and
13869 	     we are optimizing for size.  */
13870 	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13871 		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13872 		  && (val_diff == 4 || val_diff == -4));
13873 	}
13874 
13875       return ((REGNO (reg0) == REGNO (reg1))
13876 	      && (val_diff == 4 || val_diff == -4));
13877     }
13878 
13879   return 0;
13880 }
13881 
13882 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13883    for load operations, false for store operations.  CONSECUTIVE is true
13884    if the register numbers in the operation must be consecutive in the register
13885    bank. RETURN_PC is true if value is to be loaded in PC.
13886    The pattern we are trying to match for load is:
13887      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13888       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13889        :
13890        :
13891       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13892      ]
13893      where
13894      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13895      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13896      3.  If consecutive is TRUE, then for kth register being loaded,
13897          REGNO (R_dk) = REGNO (R_d0) + k.
13898    The pattern for store is similar.  */
13899 bool
ldm_stm_operation_p(rtx op,bool load,machine_mode mode,bool consecutive,bool return_pc)13900 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13901                      bool consecutive, bool return_pc)
13902 {
13903   HOST_WIDE_INT count = XVECLEN (op, 0);
13904   rtx reg, mem, addr;
13905   unsigned regno;
13906   unsigned first_regno;
13907   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13908   rtx elt;
13909   bool addr_reg_in_reglist = false;
13910   bool update = false;
13911   int reg_increment;
13912   int offset_adj;
13913   int regs_per_val;
13914 
13915   /* If not in SImode, then registers must be consecutive
13916      (e.g., VLDM instructions for DFmode).  */
13917   gcc_assert ((mode == SImode) || consecutive);
13918   /* Setting return_pc for stores is illegal.  */
13919   gcc_assert (!return_pc || load);
13920 
13921   /* Set up the increments and the regs per val based on the mode.  */
13922   reg_increment = GET_MODE_SIZE (mode);
13923   regs_per_val = reg_increment / 4;
13924   offset_adj = return_pc ? 1 : 0;
13925 
13926   if (count <= 1
13927       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13928       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13929     return false;
13930 
13931   /* Check if this is a write-back.  */
13932   elt = XVECEXP (op, 0, offset_adj);
13933   if (GET_CODE (SET_SRC (elt)) == PLUS)
13934     {
13935       i++;
13936       base = 1;
13937       update = true;
13938 
13939       /* The offset adjustment must be the number of registers being
13940          popped times the size of a single register.  */
13941       if (!REG_P (SET_DEST (elt))
13942           || !REG_P (XEXP (SET_SRC (elt), 0))
13943           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13944           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13945           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13946              ((count - 1 - offset_adj) * reg_increment))
13947         return false;
13948     }
13949 
13950   i = i + offset_adj;
13951   base = base + offset_adj;
13952   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13953      success depends on the type: VLDM can do just one reg,
13954      LDM must do at least two.  */
13955   if ((count <= i) && (mode == SImode))
13956       return false;
13957 
13958   elt = XVECEXP (op, 0, i - 1);
13959   if (GET_CODE (elt) != SET)
13960     return false;
13961 
13962   if (load)
13963     {
13964       reg = SET_DEST (elt);
13965       mem = SET_SRC (elt);
13966     }
13967   else
13968     {
13969       reg = SET_SRC (elt);
13970       mem = SET_DEST (elt);
13971     }
13972 
13973   if (!REG_P (reg) || !MEM_P (mem))
13974     return false;
13975 
13976   regno = REGNO (reg);
13977   first_regno = regno;
13978   addr = XEXP (mem, 0);
13979   if (GET_CODE (addr) == PLUS)
13980     {
13981       if (!CONST_INT_P (XEXP (addr, 1)))
13982 	return false;
13983 
13984       offset = INTVAL (XEXP (addr, 1));
13985       addr = XEXP (addr, 0);
13986     }
13987 
13988   if (!REG_P (addr))
13989     return false;
13990 
13991   /* Don't allow SP to be loaded unless it is also the base register. It
13992      guarantees that SP is reset correctly when an LDM instruction
13993      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13994   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13995     return false;
13996 
13997   if (regno == REGNO (addr))
13998     addr_reg_in_reglist = true;
13999 
14000   for (; i < count; i++)
14001     {
14002       elt = XVECEXP (op, 0, i);
14003       if (GET_CODE (elt) != SET)
14004         return false;
14005 
14006       if (load)
14007         {
14008           reg = SET_DEST (elt);
14009           mem = SET_SRC (elt);
14010         }
14011       else
14012         {
14013           reg = SET_SRC (elt);
14014           mem = SET_DEST (elt);
14015         }
14016 
14017       if (!REG_P (reg)
14018           || GET_MODE (reg) != mode
14019           || REGNO (reg) <= regno
14020           || (consecutive
14021               && (REGNO (reg) !=
14022                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14023           /* Don't allow SP to be loaded unless it is also the base register. It
14024              guarantees that SP is reset correctly when an LDM instruction
14025              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14026           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14027           || !MEM_P (mem)
14028           || GET_MODE (mem) != mode
14029           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14030 	       || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14031 	       || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14032 	       || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14033                    offset + (i - base) * reg_increment))
14034 	      && (!REG_P (XEXP (mem, 0))
14035 		  || offset + (i - base) * reg_increment != 0)))
14036         return false;
14037 
14038       regno = REGNO (reg);
14039       if (regno == REGNO (addr))
14040         addr_reg_in_reglist = true;
14041     }
14042 
14043   if (load)
14044     {
14045       if (update && addr_reg_in_reglist)
14046         return false;
14047 
14048       /* For Thumb-1, address register is always modified - either by write-back
14049          or by explicit load.  If the pattern does not describe an update,
14050          then the address register must be in the list of loaded registers.  */
14051       if (TARGET_THUMB1)
14052         return update || addr_reg_in_reglist;
14053     }
14054 
14055   return true;
14056 }
14057 
14058 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14059    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14060    following form:
14061 
14062    [(set (reg:SI <N>) (const_int 0))
14063     (set (reg:SI <M>) (const_int 0))
14064     ...
14065     (unspec_volatile [(const_int 0)]
14066 		     VUNSPEC_CLRM_APSR)
14067     (clobber (reg:CC CC_REGNUM))
14068    ]
14069 
14070    Any number (including 0) of set expressions is valid, the volatile unspec is
14071    optional.  All registers but SP and PC are allowed and registers must be in
14072    strict increasing order.
14073 
14074    To be a valid VSCCLRM pattern, OP must have the following form:
14075 
14076    [(unspec_volatile [(const_int 0)]
14077 		     VUNSPEC_VSCCLRM_VPR)
14078     (set (reg:SF <N>) (const_int 0))
14079     (set (reg:SF <M>) (const_int 0))
14080     ...
14081    ]
14082 
14083    As with CLRM, any number (including 0) of set expressions is valid, however
14084    the volatile unspec is mandatory here.  Any VFP single-precision register is
14085    accepted but all registers must be consecutive and in increasing order.  */
14086 
14087 bool
clear_operation_p(rtx op,bool vfp)14088 clear_operation_p (rtx op, bool vfp)
14089 {
14090   unsigned regno;
14091   unsigned last_regno = INVALID_REGNUM;
14092   rtx elt, reg, zero;
14093   int count = XVECLEN (op, 0);
14094   int first_set = vfp ? 1 : 0;
14095   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14096 
14097   for (int i = first_set; i < count; i++)
14098     {
14099       elt = XVECEXP (op, 0, i);
14100 
14101       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14102 	{
14103 	  if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14104 	      || XVECLEN (elt, 0) != 1
14105 	      || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14106 	      || i != count - 2)
14107 	    return false;
14108 
14109 	  continue;
14110 	}
14111 
14112       if (GET_CODE (elt) == CLOBBER)
14113 	continue;
14114 
14115       if (GET_CODE (elt) != SET)
14116 	return false;
14117 
14118       reg = SET_DEST (elt);
14119       zero = SET_SRC (elt);
14120 
14121       if (!REG_P (reg)
14122 	  || GET_MODE (reg) != expected_mode
14123 	  || zero != CONST0_RTX (SImode))
14124 	return false;
14125 
14126       regno = REGNO (reg);
14127 
14128       if (vfp)
14129 	{
14130 	  if (i != first_set && regno != last_regno + 1)
14131 	    return false;
14132 	}
14133       else
14134 	{
14135 	  if (regno == SP_REGNUM || regno == PC_REGNUM)
14136 	    return false;
14137 	  if (i != first_set && regno <= last_regno)
14138 	    return false;
14139 	}
14140 
14141       last_regno = regno;
14142     }
14143 
14144   return true;
14145 }
14146 
14147 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14148    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14149    instruction.  ADD_OFFSET is nonzero if the base address register needs
14150    to be modified with an add instruction before we can use it.  */
14151 
14152 static bool
multiple_operation_profitable_p(bool is_store ATTRIBUTE_UNUSED,int nops,HOST_WIDE_INT add_offset)14153 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14154 				 int nops, HOST_WIDE_INT add_offset)
14155  {
14156   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14157      if the offset isn't small enough.  The reason 2 ldrs are faster
14158      is because these ARMs are able to do more than one cache access
14159      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14160      whilst the ARM8 has a double bandwidth cache.  This means that
14161      these cores can do both an instruction fetch and a data fetch in
14162      a single cycle, so the trick of calculating the address into a
14163      scratch register (one of the result regs) and then doing a load
14164      multiple actually becomes slower (and no smaller in code size).
14165      That is the transformation
14166 
14167  	ldr	rd1, [rbase + offset]
14168  	ldr	rd2, [rbase + offset + 4]
14169 
14170      to
14171 
14172  	add	rd1, rbase, offset
14173  	ldmia	rd1, {rd1, rd2}
14174 
14175      produces worse code -- '3 cycles + any stalls on rd2' instead of
14176      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14177      access per cycle, the first sequence could never complete in less
14178      than 6 cycles, whereas the ldm sequence would only take 5 and
14179      would make better use of sequential accesses if not hitting the
14180      cache.
14181 
14182      We cheat here and test 'arm_ld_sched' which we currently know to
14183      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14184      changes, then the test below needs to be reworked.  */
14185   if (nops == 2 && arm_ld_sched && add_offset != 0)
14186     return false;
14187 
14188   /* XScale has load-store double instructions, but they have stricter
14189      alignment requirements than load-store multiple, so we cannot
14190      use them.
14191 
14192      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14193      the pipeline until completion.
14194 
14195 	NREGS		CYCLES
14196 	  1		  3
14197 	  2		  4
14198 	  3		  5
14199 	  4		  6
14200 
14201      An ldr instruction takes 1-3 cycles, but does not block the
14202      pipeline.
14203 
14204 	NREGS		CYCLES
14205 	  1		 1-3
14206 	  2		 2-6
14207 	  3		 3-9
14208 	  4		 4-12
14209 
14210      Best case ldr will always win.  However, the more ldr instructions
14211      we issue, the less likely we are to be able to schedule them well.
14212      Using ldr instructions also increases code size.
14213 
14214      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14215      for counts of 3 or 4 regs.  */
14216   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14217     return false;
14218   return true;
14219 }
14220 
14221 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14222    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14223    an array ORDER which describes the sequence to use when accessing the
14224    offsets that produces an ascending order.  In this sequence, each
14225    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14226    must have been filled in with the lowest offset by the caller.
14227    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14228    we use to verify that ORDER produces an ascending order of registers.
14229    Return true if it was possible to construct such an order, false if
14230    not.  */
14231 
14232 static bool
compute_offset_order(int nops,HOST_WIDE_INT * unsorted_offsets,int * order,int * unsorted_regs)14233 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14234 		      int *unsorted_regs)
14235 {
14236   int i;
14237   for (i = 1; i < nops; i++)
14238     {
14239       int j;
14240 
14241       order[i] = order[i - 1];
14242       for (j = 0; j < nops; j++)
14243 	if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14244 	  {
14245 	    /* We must find exactly one offset that is higher than the
14246 	       previous one by 4.  */
14247 	    if (order[i] != order[i - 1])
14248 	      return false;
14249 	    order[i] = j;
14250 	  }
14251       if (order[i] == order[i - 1])
14252 	return false;
14253       /* The register numbers must be ascending.  */
14254       if (unsorted_regs != NULL
14255 	  && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14256 	return false;
14257     }
14258   return true;
14259 }
14260 
14261 /* Used to determine in a peephole whether a sequence of load
14262    instructions can be changed into a load-multiple instruction.
14263    NOPS is the number of separate load instructions we are examining.  The
14264    first NOPS entries in OPERANDS are the destination registers, the
14265    next NOPS entries are memory operands.  If this function is
14266    successful, *BASE is set to the common base register of the memory
14267    accesses; *LOAD_OFFSET is set to the first memory location's offset
14268    from that base register.
14269    REGS is an array filled in with the destination register numbers.
14270    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14271    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14272    the sequence of registers in REGS matches the loads from ascending memory
14273    locations, and the function verifies that the register numbers are
14274    themselves ascending.  If CHECK_REGS is false, the register numbers
14275    are stored in the order they are found in the operands.  */
14276 static int
load_multiple_sequence(rtx * operands,int nops,int * regs,int * saved_order,int * base,HOST_WIDE_INT * load_offset,bool check_regs)14277 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14278 			int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14279 {
14280   int unsorted_regs[MAX_LDM_STM_OPS];
14281   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14282   int order[MAX_LDM_STM_OPS];
14283   int base_reg = -1;
14284   int i, ldm_case;
14285 
14286   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14287      easily extended if required.  */
14288   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14289 
14290   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14291 
14292   /* Loop over the operands and check that the memory references are
14293      suitable (i.e. immediate offsets from the same base register).  At
14294      the same time, extract the target register, and the memory
14295      offsets.  */
14296   for (i = 0; i < nops; i++)
14297     {
14298       rtx reg;
14299       rtx offset;
14300 
14301       /* Convert a subreg of a mem into the mem itself.  */
14302       if (GET_CODE (operands[nops + i]) == SUBREG)
14303 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
14304 
14305       gcc_assert (MEM_P (operands[nops + i]));
14306 
14307       /* Don't reorder volatile memory references; it doesn't seem worth
14308 	 looking for the case where the order is ok anyway.  */
14309       if (MEM_VOLATILE_P (operands[nops + i]))
14310 	return 0;
14311 
14312       offset = const0_rtx;
14313 
14314       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14315 	   || (GET_CODE (reg) == SUBREG
14316 	       && REG_P (reg = SUBREG_REG (reg))))
14317 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14318 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14319 		  || (GET_CODE (reg) == SUBREG
14320 		      && REG_P (reg = SUBREG_REG (reg))))
14321 	      && (CONST_INT_P (offset
14322 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
14323 	{
14324 	  if (i == 0)
14325 	    {
14326 	      base_reg = REGNO (reg);
14327 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14328 		return 0;
14329 	    }
14330 	  else if (base_reg != (int) REGNO (reg))
14331 	    /* Not addressed from the same base register.  */
14332 	    return 0;
14333 
14334 	  unsorted_regs[i] = (REG_P (operands[i])
14335 			      ? REGNO (operands[i])
14336 			      : REGNO (SUBREG_REG (operands[i])));
14337 
14338 	  /* If it isn't an integer register, or if it overwrites the
14339 	     base register but isn't the last insn in the list, then
14340 	     we can't do this.  */
14341 	  if (unsorted_regs[i] < 0
14342 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14343 	      || unsorted_regs[i] > 14
14344 	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
14345 	    return 0;
14346 
14347           /* Don't allow SP to be loaded unless it is also the base
14348              register.  It guarantees that SP is reset correctly when
14349              an LDM instruction is interrupted.  Otherwise, we might
14350              end up with a corrupt stack.  */
14351           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14352             return 0;
14353 
14354 	  unsorted_offsets[i] = INTVAL (offset);
14355 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14356 	    order[0] = i;
14357 	}
14358       else
14359 	/* Not a suitable memory address.  */
14360 	return 0;
14361     }
14362 
14363   /* All the useful information has now been extracted from the
14364      operands into unsorted_regs and unsorted_offsets; additionally,
14365      order[0] has been set to the lowest offset in the list.  Sort
14366      the offsets into order, verifying that they are adjacent, and
14367      check that the register numbers are ascending.  */
14368   if (!compute_offset_order (nops, unsorted_offsets, order,
14369 			     check_regs ? unsorted_regs : NULL))
14370     return 0;
14371 
14372   if (saved_order)
14373     memcpy (saved_order, order, sizeof order);
14374 
14375   if (base)
14376     {
14377       *base = base_reg;
14378 
14379       for (i = 0; i < nops; i++)
14380 	regs[i] = unsorted_regs[check_regs ? order[i] : i];
14381 
14382       *load_offset = unsorted_offsets[order[0]];
14383     }
14384 
14385   if (unsorted_offsets[order[0]] == 0)
14386     ldm_case = 1; /* ldmia */
14387   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14388     ldm_case = 2; /* ldmib */
14389   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14390     ldm_case = 3; /* ldmda */
14391   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14392     ldm_case = 4; /* ldmdb */
14393   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14394 	   || const_ok_for_arm (-unsorted_offsets[order[0]]))
14395     ldm_case = 5;
14396   else
14397     return 0;
14398 
14399   if (!multiple_operation_profitable_p (false, nops,
14400 					ldm_case == 5
14401 					? unsorted_offsets[order[0]] : 0))
14402     return 0;
14403 
14404   return ldm_case;
14405 }
14406 
14407 /* Used to determine in a peephole whether a sequence of store instructions can
14408    be changed into a store-multiple instruction.
14409    NOPS is the number of separate store instructions we are examining.
14410    NOPS_TOTAL is the total number of instructions recognized by the peephole
14411    pattern.
14412    The first NOPS entries in OPERANDS are the source registers, the next
14413    NOPS entries are memory operands.  If this function is successful, *BASE is
14414    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14415    to the first memory location's offset from that base register.  REGS is an
14416    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14417    likewise filled with the corresponding rtx's.
14418    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14419    numbers to an ascending order of stores.
14420    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14421    from ascending memory locations, and the function verifies that the register
14422    numbers are themselves ascending.  If CHECK_REGS is false, the register
14423    numbers are stored in the order they are found in the operands.  */
14424 static int
store_multiple_sequence(rtx * operands,int nops,int nops_total,int * regs,rtx * reg_rtxs,int * saved_order,int * base,HOST_WIDE_INT * load_offset,bool check_regs)14425 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14426 			 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14427 			 HOST_WIDE_INT *load_offset, bool check_regs)
14428 {
14429   int unsorted_regs[MAX_LDM_STM_OPS];
14430   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14431   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14432   int order[MAX_LDM_STM_OPS];
14433   int base_reg = -1;
14434   rtx base_reg_rtx = NULL;
14435   int i, stm_case;
14436 
14437   /* Write back of base register is currently only supported for Thumb 1.  */
14438   int base_writeback = TARGET_THUMB1;
14439 
14440   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14441      easily extended if required.  */
14442   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14443 
14444   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14445 
14446   /* Loop over the operands and check that the memory references are
14447      suitable (i.e. immediate offsets from the same base register).  At
14448      the same time, extract the target register, and the memory
14449      offsets.  */
14450   for (i = 0; i < nops; i++)
14451     {
14452       rtx reg;
14453       rtx offset;
14454 
14455       /* Convert a subreg of a mem into the mem itself.  */
14456       if (GET_CODE (operands[nops + i]) == SUBREG)
14457 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
14458 
14459       gcc_assert (MEM_P (operands[nops + i]));
14460 
14461       /* Don't reorder volatile memory references; it doesn't seem worth
14462 	 looking for the case where the order is ok anyway.  */
14463       if (MEM_VOLATILE_P (operands[nops + i]))
14464 	return 0;
14465 
14466       offset = const0_rtx;
14467 
14468       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14469 	   || (GET_CODE (reg) == SUBREG
14470 	       && REG_P (reg = SUBREG_REG (reg))))
14471 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14472 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14473 		  || (GET_CODE (reg) == SUBREG
14474 		      && REG_P (reg = SUBREG_REG (reg))))
14475 	      && (CONST_INT_P (offset
14476 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
14477 	{
14478 	  unsorted_reg_rtxs[i] = (REG_P (operands[i])
14479 				  ? operands[i] : SUBREG_REG (operands[i]));
14480 	  unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14481 
14482 	  if (i == 0)
14483 	    {
14484 	      base_reg = REGNO (reg);
14485 	      base_reg_rtx = reg;
14486 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14487 		return 0;
14488 	    }
14489 	  else if (base_reg != (int) REGNO (reg))
14490 	    /* Not addressed from the same base register.  */
14491 	    return 0;
14492 
14493 	  /* If it isn't an integer register, then we can't do this.  */
14494 	  if (unsorted_regs[i] < 0
14495 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14496 	      /* The effects are unpredictable if the base register is
14497 		 both updated and stored.  */
14498 	      || (base_writeback && unsorted_regs[i] == base_reg)
14499 	      || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14500 	      || unsorted_regs[i] > 14)
14501 	    return 0;
14502 
14503 	  unsorted_offsets[i] = INTVAL (offset);
14504 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14505 	    order[0] = i;
14506 	}
14507       else
14508 	/* Not a suitable memory address.  */
14509 	return 0;
14510     }
14511 
14512   /* All the useful information has now been extracted from the
14513      operands into unsorted_regs and unsorted_offsets; additionally,
14514      order[0] has been set to the lowest offset in the list.  Sort
14515      the offsets into order, verifying that they are adjacent, and
14516      check that the register numbers are ascending.  */
14517   if (!compute_offset_order (nops, unsorted_offsets, order,
14518 			     check_regs ? unsorted_regs : NULL))
14519     return 0;
14520 
14521   if (saved_order)
14522     memcpy (saved_order, order, sizeof order);
14523 
14524   if (base)
14525     {
14526       *base = base_reg;
14527 
14528       for (i = 0; i < nops; i++)
14529 	{
14530 	  regs[i] = unsorted_regs[check_regs ? order[i] : i];
14531 	  if (reg_rtxs)
14532 	    reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14533 	}
14534 
14535       *load_offset = unsorted_offsets[order[0]];
14536     }
14537 
14538   if (TARGET_THUMB1
14539       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14540     return 0;
14541 
14542   if (unsorted_offsets[order[0]] == 0)
14543     stm_case = 1; /* stmia */
14544   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14545     stm_case = 2; /* stmib */
14546   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14547     stm_case = 3; /* stmda */
14548   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14549     stm_case = 4; /* stmdb */
14550   else
14551     return 0;
14552 
14553   if (!multiple_operation_profitable_p (false, nops, 0))
14554     return 0;
14555 
14556   return stm_case;
14557 }
14558 
14559 /* Routines for use in generating RTL.  */
14560 
14561 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14562    the instruction; REGS and MEMS are arrays containing the operands.
14563    BASEREG is the base register to be used in addressing the memory operands.
14564    WBACK_OFFSET is nonzero if the instruction should update the base
14565    register.  */
14566 
14567 static rtx
arm_gen_load_multiple_1(int count,int * regs,rtx * mems,rtx basereg,HOST_WIDE_INT wback_offset)14568 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14569 			 HOST_WIDE_INT wback_offset)
14570 {
14571   int i = 0, j;
14572   rtx result;
14573 
14574   if (!multiple_operation_profitable_p (false, count, 0))
14575     {
14576       rtx seq;
14577 
14578       start_sequence ();
14579 
14580       for (i = 0; i < count; i++)
14581 	emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14582 
14583       if (wback_offset != 0)
14584 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14585 
14586       seq = get_insns ();
14587       end_sequence ();
14588 
14589       return seq;
14590     }
14591 
14592   result = gen_rtx_PARALLEL (VOIDmode,
14593 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14594   if (wback_offset != 0)
14595     {
14596       XVECEXP (result, 0, 0)
14597 	= gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14598       i = 1;
14599       count++;
14600     }
14601 
14602   for (j = 0; i < count; i++, j++)
14603     XVECEXP (result, 0, i)
14604       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14605 
14606   return result;
14607 }
14608 
14609 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14610    the instruction; REGS and MEMS are arrays containing the operands.
14611    BASEREG is the base register to be used in addressing the memory operands.
14612    WBACK_OFFSET is nonzero if the instruction should update the base
14613    register.  */
14614 
14615 static rtx
arm_gen_store_multiple_1(int count,int * regs,rtx * mems,rtx basereg,HOST_WIDE_INT wback_offset)14616 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14617 			  HOST_WIDE_INT wback_offset)
14618 {
14619   int i = 0, j;
14620   rtx result;
14621 
14622   if (GET_CODE (basereg) == PLUS)
14623     basereg = XEXP (basereg, 0);
14624 
14625   if (!multiple_operation_profitable_p (false, count, 0))
14626     {
14627       rtx seq;
14628 
14629       start_sequence ();
14630 
14631       for (i = 0; i < count; i++)
14632 	emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14633 
14634       if (wback_offset != 0)
14635 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14636 
14637       seq = get_insns ();
14638       end_sequence ();
14639 
14640       return seq;
14641     }
14642 
14643   result = gen_rtx_PARALLEL (VOIDmode,
14644 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14645   if (wback_offset != 0)
14646     {
14647       XVECEXP (result, 0, 0)
14648 	= gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14649       i = 1;
14650       count++;
14651     }
14652 
14653   for (j = 0; i < count; i++, j++)
14654     XVECEXP (result, 0, i)
14655       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14656 
14657   return result;
14658 }
14659 
14660 /* Generate either a load-multiple or a store-multiple instruction.  This
14661    function can be used in situations where we can start with a single MEM
14662    rtx and adjust its address upwards.
14663    COUNT is the number of operations in the instruction, not counting a
14664    possible update of the base register.  REGS is an array containing the
14665    register operands.
14666    BASEREG is the base register to be used in addressing the memory operands,
14667    which are constructed from BASEMEM.
14668    WRITE_BACK specifies whether the generated instruction should include an
14669    update of the base register.
14670    OFFSETP is used to pass an offset to and from this function; this offset
14671    is not used when constructing the address (instead BASEMEM should have an
14672    appropriate offset in its address), it is used only for setting
14673    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14674 
14675 static rtx
arm_gen_multiple_op(bool is_load,int * regs,int count,rtx basereg,bool write_back,rtx basemem,HOST_WIDE_INT * offsetp)14676 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14677 		     bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14678 {
14679   rtx mems[MAX_LDM_STM_OPS];
14680   HOST_WIDE_INT offset = *offsetp;
14681   int i;
14682 
14683   gcc_assert (count <= MAX_LDM_STM_OPS);
14684 
14685   if (GET_CODE (basereg) == PLUS)
14686     basereg = XEXP (basereg, 0);
14687 
14688   for (i = 0; i < count; i++)
14689     {
14690       rtx addr = plus_constant (Pmode, basereg, i * 4);
14691       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14692       offset += 4;
14693     }
14694 
14695   if (write_back)
14696     *offsetp = offset;
14697 
14698   if (is_load)
14699     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14700 				    write_back ? 4 * count : 0);
14701   else
14702     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14703 				     write_back ? 4 * count : 0);
14704 }
14705 
14706 rtx
arm_gen_load_multiple(int * regs,int count,rtx basereg,int write_back,rtx basemem,HOST_WIDE_INT * offsetp)14707 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14708 		       rtx basemem, HOST_WIDE_INT *offsetp)
14709 {
14710   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14711 			      offsetp);
14712 }
14713 
14714 rtx
arm_gen_store_multiple(int * regs,int count,rtx basereg,int write_back,rtx basemem,HOST_WIDE_INT * offsetp)14715 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14716 			rtx basemem, HOST_WIDE_INT *offsetp)
14717 {
14718   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14719 			      offsetp);
14720 }
14721 
14722 /* Called from a peephole2 expander to turn a sequence of loads into an
14723    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14724    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14725    is true if we can reorder the registers because they are used commutatively
14726    subsequently.
14727    Returns true iff we could generate a new instruction.  */
14728 
14729 bool
gen_ldm_seq(rtx * operands,int nops,bool sort_regs)14730 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14731 {
14732   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14733   rtx mems[MAX_LDM_STM_OPS];
14734   int i, j, base_reg;
14735   rtx base_reg_rtx;
14736   HOST_WIDE_INT offset;
14737   int write_back = FALSE;
14738   int ldm_case;
14739   rtx addr;
14740 
14741   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14742 				     &base_reg, &offset, !sort_regs);
14743 
14744   if (ldm_case == 0)
14745     return false;
14746 
14747   if (sort_regs)
14748     for (i = 0; i < nops - 1; i++)
14749       for (j = i + 1; j < nops; j++)
14750 	if (regs[i] > regs[j])
14751 	  {
14752 	    int t = regs[i];
14753 	    regs[i] = regs[j];
14754 	    regs[j] = t;
14755 	  }
14756   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14757 
14758   if (TARGET_THUMB1)
14759     {
14760       gcc_assert (ldm_case == 1 || ldm_case == 5);
14761 
14762       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
14763       write_back = true;
14764       for (i = 0; i < nops; i++)
14765 	if (base_reg == regs[i])
14766 	  write_back = false;
14767 
14768       /* Ensure the base is dead if it is updated.  */
14769       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14770 	return false;
14771     }
14772 
14773   if (ldm_case == 5)
14774     {
14775       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14776       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14777       offset = 0;
14778       base_reg_rtx = newbase;
14779     }
14780 
14781   for (i = 0; i < nops; i++)
14782     {
14783       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14784       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14785 					      SImode, addr, 0);
14786     }
14787   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14788 				      write_back ? offset + i * 4 : 0));
14789   return true;
14790 }
14791 
14792 /* Called from a peephole2 expander to turn a sequence of stores into an
14793    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14794    NOPS indicates how many separate stores we are trying to combine.
14795    Returns true iff we could generate a new instruction.  */
14796 
14797 bool
gen_stm_seq(rtx * operands,int nops)14798 gen_stm_seq (rtx *operands, int nops)
14799 {
14800   int i;
14801   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14802   rtx mems[MAX_LDM_STM_OPS];
14803   int base_reg;
14804   rtx base_reg_rtx;
14805   HOST_WIDE_INT offset;
14806   int write_back = FALSE;
14807   int stm_case;
14808   rtx addr;
14809   bool base_reg_dies;
14810 
14811   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14812 				      mem_order, &base_reg, &offset, true);
14813 
14814   if (stm_case == 0)
14815     return false;
14816 
14817   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14818 
14819   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14820   if (TARGET_THUMB1)
14821     {
14822       gcc_assert (base_reg_dies);
14823       write_back = TRUE;
14824     }
14825 
14826   if (stm_case == 5)
14827     {
14828       gcc_assert (base_reg_dies);
14829       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14830       offset = 0;
14831     }
14832 
14833   addr = plus_constant (Pmode, base_reg_rtx, offset);
14834 
14835   for (i = 0; i < nops; i++)
14836     {
14837       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14838       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14839 					      SImode, addr, 0);
14840     }
14841   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14842 				       write_back ? offset + i * 4 : 0));
14843   return true;
14844 }
14845 
14846 /* Called from a peephole2 expander to turn a sequence of stores that are
14847    preceded by constant loads into an STM instruction.  OPERANDS are the
14848    operands found by the peephole matcher; NOPS indicates how many
14849    separate stores we are trying to combine; there are 2 * NOPS
14850    instructions in the peephole.
14851    Returns true iff we could generate a new instruction.  */
14852 
14853 bool
gen_const_stm_seq(rtx * operands,int nops)14854 gen_const_stm_seq (rtx *operands, int nops)
14855 {
14856   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14857   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14858   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14859   rtx mems[MAX_LDM_STM_OPS];
14860   int base_reg;
14861   rtx base_reg_rtx;
14862   HOST_WIDE_INT offset;
14863   int write_back = FALSE;
14864   int stm_case;
14865   rtx addr;
14866   bool base_reg_dies;
14867   int i, j;
14868   HARD_REG_SET allocated;
14869 
14870   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14871 				      mem_order, &base_reg, &offset, false);
14872 
14873   if (stm_case == 0)
14874     return false;
14875 
14876   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14877 
14878   /* If the same register is used more than once, try to find a free
14879      register.  */
14880   CLEAR_HARD_REG_SET (allocated);
14881   for (i = 0; i < nops; i++)
14882     {
14883       for (j = i + 1; j < nops; j++)
14884 	if (regs[i] == regs[j])
14885 	  {
14886 	    rtx t = peep2_find_free_register (0, nops * 2,
14887 					      TARGET_THUMB1 ? "l" : "r",
14888 					      SImode, &allocated);
14889 	    if (t == NULL_RTX)
14890 	      return false;
14891 	    reg_rtxs[i] = t;
14892 	    regs[i] = REGNO (t);
14893 	  }
14894     }
14895 
14896   /* Compute an ordering that maps the register numbers to an ascending
14897      sequence.  */
14898   reg_order[0] = 0;
14899   for (i = 0; i < nops; i++)
14900     if (regs[i] < regs[reg_order[0]])
14901       reg_order[0] = i;
14902 
14903   for (i = 1; i < nops; i++)
14904     {
14905       int this_order = reg_order[i - 1];
14906       for (j = 0; j < nops; j++)
14907 	if (regs[j] > regs[reg_order[i - 1]]
14908 	    && (this_order == reg_order[i - 1]
14909 		|| regs[j] < regs[this_order]))
14910 	  this_order = j;
14911       reg_order[i] = this_order;
14912     }
14913 
14914   /* Ensure that registers that must be live after the instruction end
14915      up with the correct value.  */
14916   for (i = 0; i < nops; i++)
14917     {
14918       int this_order = reg_order[i];
14919       if ((this_order != mem_order[i]
14920 	   || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14921 	  && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14922 	return false;
14923     }
14924 
14925   /* Load the constants.  */
14926   for (i = 0; i < nops; i++)
14927     {
14928       rtx op = operands[2 * nops + mem_order[i]];
14929       sorted_regs[i] = regs[reg_order[i]];
14930       emit_move_insn (reg_rtxs[reg_order[i]], op);
14931     }
14932 
14933   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14934 
14935   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14936   if (TARGET_THUMB1)
14937     {
14938       gcc_assert (base_reg_dies);
14939       write_back = TRUE;
14940     }
14941 
14942   if (stm_case == 5)
14943     {
14944       gcc_assert (base_reg_dies);
14945       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14946       offset = 0;
14947     }
14948 
14949   addr = plus_constant (Pmode, base_reg_rtx, offset);
14950 
14951   for (i = 0; i < nops; i++)
14952     {
14953       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14954       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14955 					      SImode, addr, 0);
14956     }
14957   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14958 				       write_back ? offset + i * 4 : 0));
14959   return true;
14960 }
14961 
14962 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14963    unaligned copies on processors which support unaligned semantics for those
14964    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14965    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14966    An interleave factor of 1 (the minimum) will perform no interleaving.
14967    Load/store multiple are used for aligned addresses where possible.  */
14968 
14969 static void
arm_block_move_unaligned_straight(rtx dstbase,rtx srcbase,HOST_WIDE_INT length,unsigned int interleave_factor)14970 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14971 				   HOST_WIDE_INT length,
14972 				   unsigned int interleave_factor)
14973 {
14974   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14975   int *regnos = XALLOCAVEC (int, interleave_factor);
14976   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14977   HOST_WIDE_INT i, j;
14978   HOST_WIDE_INT remaining = length, words;
14979   rtx halfword_tmp = NULL, byte_tmp = NULL;
14980   rtx dst, src;
14981   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14982   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14983   HOST_WIDE_INT srcoffset, dstoffset;
14984   HOST_WIDE_INT src_autoinc, dst_autoinc;
14985   rtx mem, addr;
14986 
14987   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14988 
14989   /* Use hard registers if we have aligned source or destination so we can use
14990      load/store multiple with contiguous registers.  */
14991   if (dst_aligned || src_aligned)
14992     for (i = 0; i < interleave_factor; i++)
14993       regs[i] = gen_rtx_REG (SImode, i);
14994   else
14995     for (i = 0; i < interleave_factor; i++)
14996       regs[i] = gen_reg_rtx (SImode);
14997 
14998   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14999   src = copy_addr_to_reg (XEXP (srcbase, 0));
15000 
15001   srcoffset = dstoffset = 0;
15002 
15003   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15004      For copying the last bytes we want to subtract this offset again.  */
15005   src_autoinc = dst_autoinc = 0;
15006 
15007   for (i = 0; i < interleave_factor; i++)
15008     regnos[i] = i;
15009 
15010   /* Copy BLOCK_SIZE_BYTES chunks.  */
15011 
15012   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15013     {
15014       /* Load words.  */
15015       if (src_aligned && interleave_factor > 1)
15016 	{
15017 	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15018 					    TRUE, srcbase, &srcoffset));
15019 	  src_autoinc += UNITS_PER_WORD * interleave_factor;
15020 	}
15021       else
15022 	{
15023 	  for (j = 0; j < interleave_factor; j++)
15024 	    {
15025 	      addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15026 						 - src_autoinc));
15027 	      mem = adjust_automodify_address (srcbase, SImode, addr,
15028 					       srcoffset + j * UNITS_PER_WORD);
15029 	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
15030 	    }
15031 	  srcoffset += block_size_bytes;
15032 	}
15033 
15034       /* Store words.  */
15035       if (dst_aligned && interleave_factor > 1)
15036 	{
15037 	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15038 					     TRUE, dstbase, &dstoffset));
15039 	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
15040 	}
15041       else
15042 	{
15043 	  for (j = 0; j < interleave_factor; j++)
15044 	    {
15045 	      addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15046 						 - dst_autoinc));
15047 	      mem = adjust_automodify_address (dstbase, SImode, addr,
15048 					       dstoffset + j * UNITS_PER_WORD);
15049 	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
15050 	    }
15051 	  dstoffset += block_size_bytes;
15052 	}
15053 
15054       remaining -= block_size_bytes;
15055     }
15056 
15057   /* Copy any whole words left (note these aren't interleaved with any
15058      subsequent halfword/byte load/stores in the interests of simplicity).  */
15059 
15060   words = remaining / UNITS_PER_WORD;
15061 
15062   gcc_assert (words < interleave_factor);
15063 
15064   if (src_aligned && words > 1)
15065     {
15066       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15067 					&srcoffset));
15068       src_autoinc += UNITS_PER_WORD * words;
15069     }
15070   else
15071     {
15072       for (j = 0; j < words; j++)
15073 	{
15074 	  addr = plus_constant (Pmode, src,
15075 				srcoffset + j * UNITS_PER_WORD - src_autoinc);
15076 	  mem = adjust_automodify_address (srcbase, SImode, addr,
15077 					   srcoffset + j * UNITS_PER_WORD);
15078 	  if (src_aligned)
15079 	    emit_move_insn (regs[j], mem);
15080 	  else
15081 	    emit_insn (gen_unaligned_loadsi (regs[j], mem));
15082 	}
15083       srcoffset += words * UNITS_PER_WORD;
15084     }
15085 
15086   if (dst_aligned && words > 1)
15087     {
15088       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15089 					 &dstoffset));
15090       dst_autoinc += words * UNITS_PER_WORD;
15091     }
15092   else
15093     {
15094       for (j = 0; j < words; j++)
15095 	{
15096 	  addr = plus_constant (Pmode, dst,
15097 				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15098 	  mem = adjust_automodify_address (dstbase, SImode, addr,
15099 					   dstoffset + j * UNITS_PER_WORD);
15100 	  if (dst_aligned)
15101 	    emit_move_insn (mem, regs[j]);
15102 	  else
15103 	    emit_insn (gen_unaligned_storesi (mem, regs[j]));
15104 	}
15105       dstoffset += words * UNITS_PER_WORD;
15106     }
15107 
15108   remaining -= words * UNITS_PER_WORD;
15109 
15110   gcc_assert (remaining < 4);
15111 
15112   /* Copy a halfword if necessary.  */
15113 
15114   if (remaining >= 2)
15115     {
15116       halfword_tmp = gen_reg_rtx (SImode);
15117 
15118       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15119       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15120       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15121 
15122       /* Either write out immediately, or delay until we've loaded the last
15123 	 byte, depending on interleave factor.  */
15124       if (interleave_factor == 1)
15125 	{
15126 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15127 	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15128 	  emit_insn (gen_unaligned_storehi (mem,
15129 		       gen_lowpart (HImode, halfword_tmp)));
15130 	  halfword_tmp = NULL;
15131 	  dstoffset += 2;
15132 	}
15133 
15134       remaining -= 2;
15135       srcoffset += 2;
15136     }
15137 
15138   gcc_assert (remaining < 2);
15139 
15140   /* Copy last byte.  */
15141 
15142   if ((remaining & 1) != 0)
15143     {
15144       byte_tmp = gen_reg_rtx (SImode);
15145 
15146       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15147       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15148       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15149 
15150       if (interleave_factor == 1)
15151 	{
15152 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15153 	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15154 	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15155 	  byte_tmp = NULL;
15156 	  dstoffset++;
15157 	}
15158 
15159       remaining--;
15160       srcoffset++;
15161     }
15162 
15163   /* Store last halfword if we haven't done so already.  */
15164 
15165   if (halfword_tmp)
15166     {
15167       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15168       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15169       emit_insn (gen_unaligned_storehi (mem,
15170 		   gen_lowpart (HImode, halfword_tmp)));
15171       dstoffset += 2;
15172     }
15173 
15174   /* Likewise for last byte.  */
15175 
15176   if (byte_tmp)
15177     {
15178       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15179       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15180       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15181       dstoffset++;
15182     }
15183 
15184   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15185 }
15186 
15187 /* From mips_adjust_block_mem:
15188 
15189    Helper function for doing a loop-based block operation on memory
15190    reference MEM.  Each iteration of the loop will operate on LENGTH
15191    bytes of MEM.
15192 
15193    Create a new base register for use within the loop and point it to
15194    the start of MEM.  Create a new memory reference that uses this
15195    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15196 
15197 static void
arm_adjust_block_mem(rtx mem,HOST_WIDE_INT length,rtx * loop_reg,rtx * loop_mem)15198 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15199 		      rtx *loop_mem)
15200 {
15201   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15202 
15203   /* Although the new mem does not refer to a known location,
15204      it does keep up to LENGTH bytes of alignment.  */
15205   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15206   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15207 }
15208 
15209 /* From mips_block_move_loop:
15210 
15211    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15212    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15213    the memory regions do not overlap.  */
15214 
15215 static void
arm_block_move_unaligned_loop(rtx dest,rtx src,HOST_WIDE_INT length,unsigned int interleave_factor,HOST_WIDE_INT bytes_per_iter)15216 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15217 			       unsigned int interleave_factor,
15218 			       HOST_WIDE_INT bytes_per_iter)
15219 {
15220   rtx src_reg, dest_reg, final_src, test;
15221   HOST_WIDE_INT leftover;
15222 
15223   leftover = length % bytes_per_iter;
15224   length -= leftover;
15225 
15226   /* Create registers and memory references for use within the loop.  */
15227   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15228   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15229 
15230   /* Calculate the value that SRC_REG should have after the last iteration of
15231      the loop.  */
15232   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15233 				   0, 0, OPTAB_WIDEN);
15234 
15235   /* Emit the start of the loop.  */
15236   rtx_code_label *label = gen_label_rtx ();
15237   emit_label (label);
15238 
15239   /* Emit the loop body.  */
15240   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15241 				     interleave_factor);
15242 
15243   /* Move on to the next block.  */
15244   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15245   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15246 
15247   /* Emit the loop condition.  */
15248   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15249   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15250 
15251   /* Mop up any left-over bytes.  */
15252   if (leftover)
15253     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15254 }
15255 
15256 /* Emit a block move when either the source or destination is unaligned (not
15257    aligned to a four-byte boundary).  This may need further tuning depending on
15258    core type, optimize_size setting, etc.  */
15259 
15260 static int
arm_cpymemqi_unaligned(rtx * operands)15261 arm_cpymemqi_unaligned (rtx *operands)
15262 {
15263   HOST_WIDE_INT length = INTVAL (operands[2]);
15264 
15265   if (optimize_size)
15266     {
15267       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15268       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15269       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15270 	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15271 	 or dst_aligned though: allow more interleaving in those cases since the
15272 	 resulting code can be smaller.  */
15273       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15274       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15275 
15276       if (length > 12)
15277 	arm_block_move_unaligned_loop (operands[0], operands[1], length,
15278 				       interleave_factor, bytes_per_iter);
15279       else
15280 	arm_block_move_unaligned_straight (operands[0], operands[1], length,
15281 					   interleave_factor);
15282     }
15283   else
15284     {
15285       /* Note that the loop created by arm_block_move_unaligned_loop may be
15286 	 subject to loop unrolling, which makes tuning this condition a little
15287 	 redundant.  */
15288       if (length > 32)
15289 	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15290       else
15291 	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15292     }
15293 
15294   return 1;
15295 }
15296 
15297 int
arm_gen_cpymemqi(rtx * operands)15298 arm_gen_cpymemqi (rtx *operands)
15299 {
15300   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15301   HOST_WIDE_INT srcoffset, dstoffset;
15302   rtx src, dst, srcbase, dstbase;
15303   rtx part_bytes_reg = NULL;
15304   rtx mem;
15305 
15306   if (!CONST_INT_P (operands[2])
15307       || !CONST_INT_P (operands[3])
15308       || INTVAL (operands[2]) > 64)
15309     return 0;
15310 
15311   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15312     return arm_cpymemqi_unaligned (operands);
15313 
15314   if (INTVAL (operands[3]) & 3)
15315     return 0;
15316 
15317   dstbase = operands[0];
15318   srcbase = operands[1];
15319 
15320   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15321   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15322 
15323   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15324   out_words_to_go = INTVAL (operands[2]) / 4;
15325   last_bytes = INTVAL (operands[2]) & 3;
15326   dstoffset = srcoffset = 0;
15327 
15328   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15329     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15330 
15331   while (in_words_to_go >= 2)
15332     {
15333       if (in_words_to_go > 4)
15334 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15335 					  TRUE, srcbase, &srcoffset));
15336       else
15337 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15338 					  src, FALSE, srcbase,
15339 					  &srcoffset));
15340 
15341       if (out_words_to_go)
15342 	{
15343 	  if (out_words_to_go > 4)
15344 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15345 					       TRUE, dstbase, &dstoffset));
15346 	  else if (out_words_to_go != 1)
15347 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15348 					       out_words_to_go, dst,
15349 					       (last_bytes == 0
15350 						? FALSE : TRUE),
15351 					       dstbase, &dstoffset));
15352 	  else
15353 	    {
15354 	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15355 	      emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15356 	      if (last_bytes != 0)
15357 		{
15358 		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15359 		  dstoffset += 4;
15360 		}
15361 	    }
15362 	}
15363 
15364       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15365       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15366     }
15367 
15368   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15369   if (out_words_to_go)
15370     {
15371       rtx sreg;
15372 
15373       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15374       sreg = copy_to_reg (mem);
15375 
15376       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15377       emit_move_insn (mem, sreg);
15378       in_words_to_go--;
15379 
15380       gcc_assert (!in_words_to_go);	/* Sanity check */
15381     }
15382 
15383   if (in_words_to_go)
15384     {
15385       gcc_assert (in_words_to_go > 0);
15386 
15387       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15388       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15389     }
15390 
15391   gcc_assert (!last_bytes || part_bytes_reg);
15392 
15393   if (BYTES_BIG_ENDIAN && last_bytes)
15394     {
15395       rtx tmp = gen_reg_rtx (SImode);
15396 
15397       /* The bytes we want are in the top end of the word.  */
15398       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15399 			      GEN_INT (8 * (4 - last_bytes))));
15400       part_bytes_reg = tmp;
15401 
15402       while (last_bytes)
15403 	{
15404 	  mem = adjust_automodify_address (dstbase, QImode,
15405 					   plus_constant (Pmode, dst,
15406 							  last_bytes - 1),
15407 					   dstoffset + last_bytes - 1);
15408 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15409 
15410 	  if (--last_bytes)
15411 	    {
15412 	      tmp = gen_reg_rtx (SImode);
15413 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15414 	      part_bytes_reg = tmp;
15415 	    }
15416 	}
15417 
15418     }
15419   else
15420     {
15421       if (last_bytes > 1)
15422 	{
15423 	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15424 	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15425 	  last_bytes -= 2;
15426 	  if (last_bytes)
15427 	    {
15428 	      rtx tmp = gen_reg_rtx (SImode);
15429 	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15430 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15431 	      part_bytes_reg = tmp;
15432 	      dstoffset += 2;
15433 	    }
15434 	}
15435 
15436       if (last_bytes)
15437 	{
15438 	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15439 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15440 	}
15441     }
15442 
15443   return 1;
15444 }
15445 
15446 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15447 by mode size.  */
15448 inline static rtx
next_consecutive_mem(rtx mem)15449 next_consecutive_mem (rtx mem)
15450 {
15451   machine_mode mode = GET_MODE (mem);
15452   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15453   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15454 
15455   return adjust_automodify_address (mem, mode, addr, offset);
15456 }
15457 
15458 /* Copy using LDRD/STRD instructions whenever possible.
15459    Returns true upon success. */
15460 bool
gen_cpymem_ldrd_strd(rtx * operands)15461 gen_cpymem_ldrd_strd (rtx *operands)
15462 {
15463   unsigned HOST_WIDE_INT len;
15464   HOST_WIDE_INT align;
15465   rtx src, dst, base;
15466   rtx reg0;
15467   bool src_aligned, dst_aligned;
15468   bool src_volatile, dst_volatile;
15469 
15470   gcc_assert (CONST_INT_P (operands[2]));
15471   gcc_assert (CONST_INT_P (operands[3]));
15472 
15473   len = UINTVAL (operands[2]);
15474   if (len > 64)
15475     return false;
15476 
15477   /* Maximum alignment we can assume for both src and dst buffers.  */
15478   align = INTVAL (operands[3]);
15479 
15480   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15481     return false;
15482 
15483   /* Place src and dst addresses in registers
15484      and update the corresponding mem rtx.  */
15485   dst = operands[0];
15486   dst_volatile = MEM_VOLATILE_P (dst);
15487   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15488   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15489   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15490 
15491   src = operands[1];
15492   src_volatile = MEM_VOLATILE_P (src);
15493   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15494   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15495   src = adjust_automodify_address (src, VOIDmode, base, 0);
15496 
15497   if (!unaligned_access && !(src_aligned && dst_aligned))
15498     return false;
15499 
15500   if (src_volatile || dst_volatile)
15501     return false;
15502 
15503   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15504   if (!(dst_aligned || src_aligned))
15505     return arm_gen_cpymemqi (operands);
15506 
15507   /* If the either src or dst is unaligned we'll be accessing it as pairs
15508      of unaligned SImode accesses.  Otherwise we can generate DImode
15509      ldrd/strd instructions.  */
15510   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15511   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15512 
15513   while (len >= 8)
15514     {
15515       len -= 8;
15516       reg0 = gen_reg_rtx (DImode);
15517       rtx first_reg = NULL_RTX;
15518       rtx second_reg = NULL_RTX;
15519 
15520       if (!src_aligned || !dst_aligned)
15521 	{
15522 	  if (BYTES_BIG_ENDIAN)
15523 	    {
15524 	      second_reg = gen_lowpart (SImode, reg0);
15525 	      first_reg = gen_highpart_mode (SImode, DImode, reg0);
15526 	    }
15527 	  else
15528 	    {
15529 	      first_reg = gen_lowpart (SImode, reg0);
15530 	      second_reg = gen_highpart_mode (SImode, DImode, reg0);
15531 	    }
15532 	}
15533       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15534 	emit_move_insn (reg0, src);
15535       else if (src_aligned)
15536 	emit_insn (gen_unaligned_loaddi (reg0, src));
15537       else
15538 	{
15539 	  emit_insn (gen_unaligned_loadsi (first_reg, src));
15540 	  src = next_consecutive_mem (src);
15541 	  emit_insn (gen_unaligned_loadsi (second_reg, src));
15542 	}
15543 
15544       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15545 	emit_move_insn (dst, reg0);
15546       else if (dst_aligned)
15547 	emit_insn (gen_unaligned_storedi (dst, reg0));
15548       else
15549 	{
15550 	  emit_insn (gen_unaligned_storesi (dst, first_reg));
15551 	  dst = next_consecutive_mem (dst);
15552 	  emit_insn (gen_unaligned_storesi (dst, second_reg));
15553 	}
15554 
15555       src = next_consecutive_mem (src);
15556       dst = next_consecutive_mem (dst);
15557     }
15558 
15559   gcc_assert (len < 8);
15560   if (len >= 4)
15561     {
15562       /* More than a word but less than a double-word to copy.  Copy a word.  */
15563       reg0 = gen_reg_rtx (SImode);
15564       src = adjust_address (src, SImode, 0);
15565       dst = adjust_address (dst, SImode, 0);
15566       if (src_aligned)
15567         emit_move_insn (reg0, src);
15568       else
15569         emit_insn (gen_unaligned_loadsi (reg0, src));
15570 
15571       if (dst_aligned)
15572         emit_move_insn (dst, reg0);
15573       else
15574         emit_insn (gen_unaligned_storesi (dst, reg0));
15575 
15576       src = next_consecutive_mem (src);
15577       dst = next_consecutive_mem (dst);
15578       len -= 4;
15579     }
15580 
15581   if (len == 0)
15582     return true;
15583 
15584   /* Copy the remaining bytes.  */
15585   if (len >= 2)
15586     {
15587       dst = adjust_address (dst, HImode, 0);
15588       src = adjust_address (src, HImode, 0);
15589       reg0 = gen_reg_rtx (SImode);
15590       if (src_aligned)
15591         emit_insn (gen_zero_extendhisi2 (reg0, src));
15592       else
15593         emit_insn (gen_unaligned_loadhiu (reg0, src));
15594 
15595       if (dst_aligned)
15596         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15597       else
15598         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15599 
15600       src = next_consecutive_mem (src);
15601       dst = next_consecutive_mem (dst);
15602       if (len == 2)
15603         return true;
15604     }
15605 
15606   dst = adjust_address (dst, QImode, 0);
15607   src = adjust_address (src, QImode, 0);
15608   reg0 = gen_reg_rtx (QImode);
15609   emit_move_insn (reg0, src);
15610   emit_move_insn (dst, reg0);
15611   return true;
15612 }
15613 
15614 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15615    into its component 32-bit subregs.  OP2 may be an immediate
15616    constant and we want to simplify it in that case.  */
15617 void
arm_decompose_di_binop(rtx op1,rtx op2,rtx * lo_op1,rtx * hi_op1,rtx * lo_op2,rtx * hi_op2)15618 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15619 			rtx *lo_op2, rtx *hi_op2)
15620 {
15621   *lo_op1 = gen_lowpart (SImode, op1);
15622   *hi_op1 = gen_highpart (SImode, op1);
15623   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15624 				 subreg_lowpart_offset (SImode, DImode));
15625   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15626 				 subreg_highpart_offset (SImode, DImode));
15627 }
15628 
15629 /* Select a dominance comparison mode if possible for a test of the general
15630    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15631    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15632    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15633    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15634    In all cases OP will be either EQ or NE, but we don't need to know which
15635    here.  If we are unable to support a dominance comparison we return
15636    CC mode.  This will then fail to match for the RTL expressions that
15637    generate this call.  */
15638 machine_mode
arm_select_dominance_cc_mode(rtx x,rtx y,HOST_WIDE_INT cond_or)15639 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15640 {
15641   enum rtx_code cond1, cond2;
15642   int swapped = 0;
15643 
15644   /* Currently we will probably get the wrong result if the individual
15645      comparisons are not simple.  This also ensures that it is safe to
15646      reverse a comparison if necessary.  */
15647   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15648        != CCmode)
15649       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15650 	  != CCmode))
15651     return CCmode;
15652 
15653   /* The if_then_else variant of this tests the second condition if the
15654      first passes, but is true if the first fails.  Reverse the first
15655      condition to get a true "inclusive-or" expression.  */
15656   if (cond_or == DOM_CC_NX_OR_Y)
15657     cond1 = reverse_condition (cond1);
15658 
15659   /* If the comparisons are not equal, and one doesn't dominate the other,
15660      then we can't do this.  */
15661   if (cond1 != cond2
15662       && !comparison_dominates_p (cond1, cond2)
15663       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15664     return CCmode;
15665 
15666   if (swapped)
15667     std::swap (cond1, cond2);
15668 
15669   switch (cond1)
15670     {
15671     case EQ:
15672       if (cond_or == DOM_CC_X_AND_Y)
15673 	return CC_DEQmode;
15674 
15675       switch (cond2)
15676 	{
15677 	case EQ: return CC_DEQmode;
15678 	case LE: return CC_DLEmode;
15679 	case LEU: return CC_DLEUmode;
15680 	case GE: return CC_DGEmode;
15681 	case GEU: return CC_DGEUmode;
15682 	default: gcc_unreachable ();
15683 	}
15684 
15685     case LT:
15686       if (cond_or == DOM_CC_X_AND_Y)
15687 	return CC_DLTmode;
15688 
15689       switch (cond2)
15690 	{
15691 	case  LT:
15692 	    return CC_DLTmode;
15693 	case LE:
15694 	  return CC_DLEmode;
15695 	case NE:
15696 	  return CC_DNEmode;
15697 	default:
15698 	  gcc_unreachable ();
15699 	}
15700 
15701     case GT:
15702       if (cond_or == DOM_CC_X_AND_Y)
15703 	return CC_DGTmode;
15704 
15705       switch (cond2)
15706 	{
15707 	case GT:
15708 	  return CC_DGTmode;
15709 	case GE:
15710 	  return CC_DGEmode;
15711 	case NE:
15712 	  return CC_DNEmode;
15713 	default:
15714 	  gcc_unreachable ();
15715 	}
15716 
15717     case LTU:
15718       if (cond_or == DOM_CC_X_AND_Y)
15719 	return CC_DLTUmode;
15720 
15721       switch (cond2)
15722 	{
15723 	case LTU:
15724 	  return CC_DLTUmode;
15725 	case LEU:
15726 	  return CC_DLEUmode;
15727 	case NE:
15728 	  return CC_DNEmode;
15729 	default:
15730 	  gcc_unreachable ();
15731 	}
15732 
15733     case GTU:
15734       if (cond_or == DOM_CC_X_AND_Y)
15735 	return CC_DGTUmode;
15736 
15737       switch (cond2)
15738 	{
15739 	case GTU:
15740 	  return CC_DGTUmode;
15741 	case GEU:
15742 	  return CC_DGEUmode;
15743 	case NE:
15744 	  return CC_DNEmode;
15745 	default:
15746 	  gcc_unreachable ();
15747 	}
15748 
15749     /* The remaining cases only occur when both comparisons are the
15750        same.  */
15751     case NE:
15752       gcc_assert (cond1 == cond2);
15753       return CC_DNEmode;
15754 
15755     case LE:
15756       gcc_assert (cond1 == cond2);
15757       return CC_DLEmode;
15758 
15759     case GE:
15760       gcc_assert (cond1 == cond2);
15761       return CC_DGEmode;
15762 
15763     case LEU:
15764       gcc_assert (cond1 == cond2);
15765       return CC_DLEUmode;
15766 
15767     case GEU:
15768       gcc_assert (cond1 == cond2);
15769       return CC_DGEUmode;
15770 
15771     default:
15772       gcc_unreachable ();
15773     }
15774 }
15775 
15776 machine_mode
arm_select_cc_mode(enum rtx_code op,rtx x,rtx y)15777 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15778 {
15779   /* All floating point compares return CCFP if it is an equality
15780      comparison, and CCFPE otherwise.  */
15781   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15782     {
15783       switch (op)
15784 	{
15785 	case EQ:
15786 	case NE:
15787 	case UNORDERED:
15788 	case ORDERED:
15789 	case UNLT:
15790 	case UNLE:
15791 	case UNGT:
15792 	case UNGE:
15793 	case UNEQ:
15794 	case LTGT:
15795 	  return CCFPmode;
15796 
15797 	case LT:
15798 	case LE:
15799 	case GT:
15800 	case GE:
15801 	  return CCFPEmode;
15802 
15803 	default:
15804 	  gcc_unreachable ();
15805 	}
15806     }
15807 
15808   /* A compare with a shifted operand.  Because of canonicalization, the
15809      comparison will have to be swapped when we emit the assembler.  */
15810   if (GET_MODE (y) == SImode
15811       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15812       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15813 	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15814 	  || GET_CODE (x) == ROTATERT))
15815     return CC_SWPmode;
15816 
15817   /* A widened compare of the sum of a value plus a carry against a
15818      constant.  This is a representation of RSC.  We want to swap the
15819      result of the comparison at output.  Not valid if the Z bit is
15820      needed.  */
15821   if (GET_MODE (x) == DImode
15822       && GET_CODE (x) == PLUS
15823       && arm_borrow_operation (XEXP (x, 1), DImode)
15824       && CONST_INT_P (y)
15825       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15826 	   && (op == LE || op == GT))
15827 	  || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
15828 	      && (op == LEU || op == GTU))))
15829     return CC_SWPmode;
15830 
15831   /* If X is a constant we want to use CC_RSBmode.  This is
15832      non-canonical, but arm_gen_compare_reg uses this to generate the
15833      correct canonical form.  */
15834   if (GET_MODE (y) == SImode
15835       && (REG_P (y) || GET_CODE (y) == SUBREG)
15836       && CONST_INT_P (x))
15837     return CC_RSBmode;
15838 
15839   /* This operation is performed swapped, but since we only rely on the Z
15840      flag we don't need an additional mode.  */
15841   if (GET_MODE (y) == SImode
15842       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15843       && GET_CODE (x) == NEG
15844       && (op ==	EQ || op == NE))
15845     return CC_Zmode;
15846 
15847   /* This is a special case that is used by combine to allow a
15848      comparison of a shifted byte load to be split into a zero-extend
15849      followed by a comparison of the shifted integer (only valid for
15850      equalities and unsigned inequalities).  */
15851   if (GET_MODE (x) == SImode
15852       && GET_CODE (x) == ASHIFT
15853       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15854       && GET_CODE (XEXP (x, 0)) == SUBREG
15855       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15856       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15857       && (op == EQ || op == NE
15858 	  || op == GEU || op == GTU || op == LTU || op == LEU)
15859       && CONST_INT_P (y))
15860     return CC_Zmode;
15861 
15862   /* A construct for a conditional compare, if the false arm contains
15863      0, then both conditions must be true, otherwise either condition
15864      must be true.  Not all conditions are possible, so CCmode is
15865      returned if it can't be done.  */
15866   if (GET_CODE (x) == IF_THEN_ELSE
15867       && (XEXP (x, 2) == const0_rtx
15868 	  || XEXP (x, 2) == const1_rtx)
15869       && COMPARISON_P (XEXP (x, 0))
15870       && COMPARISON_P (XEXP (x, 1)))
15871     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15872 					 INTVAL (XEXP (x, 2)));
15873 
15874   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15875   if (GET_CODE (x) == AND
15876       && (op == EQ || op == NE)
15877       && COMPARISON_P (XEXP (x, 0))
15878       && COMPARISON_P (XEXP (x, 1)))
15879     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15880 					 DOM_CC_X_AND_Y);
15881 
15882   if (GET_CODE (x) == IOR
15883       && (op == EQ || op == NE)
15884       && COMPARISON_P (XEXP (x, 0))
15885       && COMPARISON_P (XEXP (x, 1)))
15886     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15887 					 DOM_CC_X_OR_Y);
15888 
15889   /* An operation (on Thumb) where we want to test for a single bit.
15890      This is done by shifting that bit up into the top bit of a
15891      scratch register; we can then branch on the sign bit.  */
15892   if (TARGET_THUMB1
15893       && GET_MODE (x) == SImode
15894       && (op == EQ || op == NE)
15895       && GET_CODE (x) == ZERO_EXTRACT
15896       && XEXP (x, 1) == const1_rtx)
15897     return CC_Nmode;
15898 
15899   /* An operation that sets the condition codes as a side-effect, the
15900      V flag is not set correctly, so we can only use comparisons where
15901      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15902      instead.)  */
15903   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15904   if (GET_MODE (x) == SImode
15905       && y == const0_rtx
15906       && (op == EQ || op == NE || op == LT || op == GE)
15907       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15908 	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
15909 	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15910 	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15911 	  || GET_CODE (x) == LSHIFTRT
15912 	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15913 	  || GET_CODE (x) == ROTATERT
15914 	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15915     return CC_NZmode;
15916 
15917   /* A comparison of ~reg with a const is really a special
15918      canoncialization of compare (~const, reg), which is a reverse
15919      subtract operation.  We may not get here if CONST is 0, but that
15920      doesn't matter because ~0 isn't a valid immediate for RSB.  */
15921   if (GET_MODE (x) == SImode
15922       && GET_CODE (x) == NOT
15923       && CONST_INT_P (y))
15924     return CC_RSBmode;
15925 
15926   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15927     return CC_Zmode;
15928 
15929   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15930       && GET_CODE (x) == PLUS
15931       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15932     return CC_Cmode;
15933 
15934   if (GET_MODE (x) == DImode
15935       && GET_CODE (x) == PLUS
15936       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
15937       && CONST_INT_P (y)
15938       && UINTVAL (y) == 0x800000000
15939       && (op == GEU || op == LTU))
15940     return CC_ADCmode;
15941 
15942   if (GET_MODE (x) == DImode
15943       && (op == GE || op == LT)
15944       && GET_CODE (x) == SIGN_EXTEND
15945       && ((GET_CODE (y) == PLUS
15946 	   && arm_borrow_operation (XEXP (y, 0), DImode))
15947 	  || arm_borrow_operation (y, DImode)))
15948     return CC_NVmode;
15949 
15950   if (GET_MODE (x) == DImode
15951       && (op == GEU || op == LTU)
15952       && GET_CODE (x) == ZERO_EXTEND
15953       && ((GET_CODE (y) == PLUS
15954 	   && arm_borrow_operation (XEXP (y, 0), DImode))
15955 	  || arm_borrow_operation (y, DImode)))
15956     return CC_Bmode;
15957 
15958   if (GET_MODE (x) == DImode
15959       && (op == EQ || op == NE)
15960       && (GET_CODE (x) == PLUS
15961 	  || GET_CODE (x) == MINUS)
15962       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15963 	  || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
15964       && GET_CODE (y) == SIGN_EXTEND
15965       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
15966     return CC_Vmode;
15967 
15968   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15969     return GET_MODE (x);
15970 
15971   return CCmode;
15972 }
15973 
15974 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
15975    the sequence of instructions needed to generate a suitable condition
15976    code register.  Return the CC register result.  */
15977 static rtx
arm_gen_dicompare_reg(rtx_code code,rtx x,rtx y,rtx scratch)15978 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
15979 {
15980   machine_mode mode;
15981   rtx cc_reg;
15982 
15983     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
15984   gcc_assert (TARGET_32BIT);
15985   gcc_assert (!CONST_INT_P (x));
15986 
15987   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
15988 				  subreg_lowpart_offset (SImode, DImode));
15989   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
15990 				  subreg_highpart_offset (SImode, DImode));
15991   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
15992 				  subreg_lowpart_offset (SImode, DImode));
15993   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
15994 				  subreg_highpart_offset (SImode, DImode));
15995   switch (code)
15996     {
15997     case EQ:
15998     case NE:
15999       {
16000 	if (y_lo == const0_rtx || y_hi == const0_rtx)
16001 	  {
16002 	    if (y_lo != const0_rtx)
16003 	      {
16004 		rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16005 
16006 		gcc_assert (y_hi == const0_rtx);
16007 		y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16008 		if (!arm_add_operand (y_lo, SImode))
16009 		  y_lo = force_reg (SImode, y_lo);
16010 		emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16011 		x_lo = scratch2;
16012 	      }
16013 	    else if (y_hi != const0_rtx)
16014 	      {
16015 		rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16016 
16017 		y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16018 		if (!arm_add_operand (y_hi, SImode))
16019 		  y_hi = force_reg (SImode, y_hi);
16020 		emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16021 		x_hi = scratch2;
16022 	      }
16023 
16024 	    if (!scratch)
16025 	      {
16026 		gcc_assert (!reload_completed);
16027 		scratch = gen_rtx_SCRATCH (SImode);
16028 	      }
16029 
16030 	    rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16031 	    cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16032 
16033 	    rtx set
16034 	      = gen_rtx_SET (cc_reg,
16035 			     gen_rtx_COMPARE (CC_NZmode,
16036 					      gen_rtx_IOR (SImode, x_lo, x_hi),
16037 					      const0_rtx));
16038 	    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16039 							      clobber)));
16040 	    return cc_reg;
16041 	  }
16042 
16043 	if (!arm_add_operand (y_lo, SImode))
16044 	  y_lo = force_reg (SImode, y_lo);
16045 
16046 	if (!arm_add_operand (y_hi, SImode))
16047 	  y_hi = force_reg (SImode, y_hi);
16048 
16049 	rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16050 	rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16051 	rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16052 	mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16053 	cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16054 
16055 	emit_insn (gen_rtx_SET (cc_reg,
16056 				gen_rtx_COMPARE (mode, conjunction,
16057 						 const0_rtx)));
16058 	return cc_reg;
16059       }
16060 
16061     case LT:
16062     case GE:
16063       {
16064 	if (y_lo == const0_rtx)
16065 	  {
16066 	    /* If the low word of y is 0, then this is simply a normal
16067 	       compare of the upper words.  */
16068 	    if (!arm_add_operand (y_hi, SImode))
16069 	      y_hi = force_reg (SImode, y_hi);
16070 
16071 	    return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16072 	  }
16073 
16074 	if (!arm_add_operand (y_lo, SImode))
16075 	  y_lo = force_reg (SImode, y_lo);
16076 
16077 	rtx cmp1
16078 	  = gen_rtx_LTU (DImode,
16079 			 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16080 			 const0_rtx);
16081 
16082 	if (!scratch)
16083 	  scratch = gen_rtx_SCRATCH (SImode);
16084 
16085 	if (!arm_not_operand (y_hi, SImode))
16086 	  y_hi = force_reg (SImode, y_hi);
16087 
16088 	rtx_insn *insn;
16089 	if (y_hi == const0_rtx)
16090 	  insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16091 							   cmp1));
16092 	else if (CONST_INT_P (y_hi))
16093 	  insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16094 							     y_hi, cmp1));
16095 	else
16096 	  insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16097 							 cmp1));
16098 	return SET_DEST (single_set (insn));
16099       }
16100 
16101     case LE:
16102     case GT:
16103       {
16104 	/* During expansion, we only expect to get here if y is a
16105 	   constant that we want to handle, otherwise we should have
16106 	   swapped the operands already.  */
16107 	gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16108 
16109 	if (!const_ok_for_arm (INTVAL (y_lo)))
16110 	  y_lo = force_reg (SImode, y_lo);
16111 
16112 	/* Perform a reverse subtract and compare.  */
16113 	rtx cmp1
16114 	  = gen_rtx_LTU (DImode,
16115 			 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16116 			 const0_rtx);
16117 	rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16118 								 x_hi, cmp1));
16119 	return SET_DEST (single_set (insn));
16120       }
16121 
16122     case LTU:
16123     case GEU:
16124       {
16125 	if (y_lo == const0_rtx)
16126 	  {
16127 	    /* If the low word of y is 0, then this is simply a normal
16128 	       compare of the upper words.  */
16129 	    if (!arm_add_operand (y_hi, SImode))
16130 	      y_hi = force_reg (SImode, y_hi);
16131 
16132 	    return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16133 	  }
16134 
16135 	if (!arm_add_operand (y_lo, SImode))
16136 	  y_lo = force_reg (SImode, y_lo);
16137 
16138 	rtx cmp1
16139 	  = gen_rtx_LTU (DImode,
16140 			 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16141 			 const0_rtx);
16142 
16143 	if (!scratch)
16144 	  scratch = gen_rtx_SCRATCH (SImode);
16145 	if (!arm_not_operand (y_hi, SImode))
16146 	  y_hi = force_reg (SImode, y_hi);
16147 
16148 	rtx_insn *insn;
16149 	if (y_hi == const0_rtx)
16150 	  insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16151 							  cmp1));
16152 	else if (CONST_INT_P (y_hi))
16153 	  {
16154 	    /* Constant is viewed as unsigned when zero-extended.  */
16155 	    y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16156 	    insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16157 							      y_hi, cmp1));
16158 	  }
16159 	else
16160 	  insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16161 							cmp1));
16162 	return SET_DEST (single_set (insn));
16163       }
16164 
16165     case LEU:
16166     case GTU:
16167       {
16168 	/* During expansion, we only expect to get here if y is a
16169 	   constant that we want to handle, otherwise we should have
16170 	   swapped the operands already.  */
16171 	gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16172 
16173 	if (!const_ok_for_arm (INTVAL (y_lo)))
16174 	  y_lo = force_reg (SImode, y_lo);
16175 
16176 	/* Perform a reverse subtract and compare.  */
16177 	rtx cmp1
16178 	  = gen_rtx_LTU (DImode,
16179 			 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16180 			 const0_rtx);
16181 	y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16182 	rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16183 								x_hi, cmp1));
16184 	return SET_DEST (single_set (insn));
16185       }
16186 
16187     default:
16188       gcc_unreachable ();
16189     }
16190 }
16191 
16192 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16193    return the rtx for register 0 in the proper mode.  */
16194 rtx
arm_gen_compare_reg(rtx_code code,rtx x,rtx y,rtx scratch)16195 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16196 {
16197   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16198     return arm_gen_dicompare_reg (code, x, y, scratch);
16199 
16200   machine_mode mode = SELECT_CC_MODE (code, x, y);
16201   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16202   if (mode == CC_RSBmode)
16203     {
16204       if (!scratch)
16205 	scratch = gen_rtx_SCRATCH (SImode);
16206       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16207 					      GEN_INT (~UINTVAL (x)), y));
16208     }
16209   else
16210     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16211 
16212   return cc_reg;
16213 }
16214 
16215 /* Generate a sequence of insns that will generate the correct return
16216    address mask depending on the physical architecture that the program
16217    is running on.  */
16218 rtx
arm_gen_return_addr_mask(void)16219 arm_gen_return_addr_mask (void)
16220 {
16221   rtx reg = gen_reg_rtx (Pmode);
16222 
16223   emit_insn (gen_return_addr_mask (reg));
16224   return reg;
16225 }
16226 
16227 void
arm_reload_in_hi(rtx * operands)16228 arm_reload_in_hi (rtx *operands)
16229 {
16230   rtx ref = operands[1];
16231   rtx base, scratch;
16232   HOST_WIDE_INT offset = 0;
16233 
16234   if (GET_CODE (ref) == SUBREG)
16235     {
16236       offset = SUBREG_BYTE (ref);
16237       ref = SUBREG_REG (ref);
16238     }
16239 
16240   if (REG_P (ref))
16241     {
16242       /* We have a pseudo which has been spilt onto the stack; there
16243 	 are two cases here: the first where there is a simple
16244 	 stack-slot replacement and a second where the stack-slot is
16245 	 out of range, or is used as a subreg.  */
16246       if (reg_equiv_mem (REGNO (ref)))
16247 	{
16248 	  ref = reg_equiv_mem (REGNO (ref));
16249 	  base = find_replacement (&XEXP (ref, 0));
16250 	}
16251       else
16252 	/* The slot is out of range, or was dressed up in a SUBREG.  */
16253 	base = reg_equiv_address (REGNO (ref));
16254 
16255       /* PR 62554: If there is no equivalent memory location then just move
16256 	 the value as an SImode register move.  This happens when the target
16257 	 architecture variant does not have an HImode register move.  */
16258       if (base == NULL)
16259 	{
16260 	  gcc_assert (REG_P (operands[0]));
16261 	  emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16262 				gen_rtx_SUBREG (SImode, ref, 0)));
16263 	  return;
16264 	}
16265     }
16266   else
16267     base = find_replacement (&XEXP (ref, 0));
16268 
16269   /* Handle the case where the address is too complex to be offset by 1.  */
16270   if (GET_CODE (base) == MINUS
16271       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16272     {
16273       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16274 
16275       emit_set_insn (base_plus, base);
16276       base = base_plus;
16277     }
16278   else if (GET_CODE (base) == PLUS)
16279     {
16280       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16281       HOST_WIDE_INT hi, lo;
16282 
16283       offset += INTVAL (XEXP (base, 1));
16284       base = XEXP (base, 0);
16285 
16286       /* Rework the address into a legal sequence of insns.  */
16287       /* Valid range for lo is -4095 -> 4095 */
16288       lo = (offset >= 0
16289 	    ? (offset & 0xfff)
16290 	    : -((-offset) & 0xfff));
16291 
16292       /* Corner case, if lo is the max offset then we would be out of range
16293 	 once we have added the additional 1 below, so bump the msb into the
16294 	 pre-loading insn(s).  */
16295       if (lo == 4095)
16296 	lo &= 0x7ff;
16297 
16298       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16299 	     ^ (HOST_WIDE_INT) 0x80000000)
16300 	    - (HOST_WIDE_INT) 0x80000000);
16301 
16302       gcc_assert (hi + lo == offset);
16303 
16304       if (hi != 0)
16305 	{
16306 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16307 
16308 	  /* Get the base address; addsi3 knows how to handle constants
16309 	     that require more than one insn.  */
16310 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16311 	  base = base_plus;
16312 	  offset = lo;
16313 	}
16314     }
16315 
16316   /* Operands[2] may overlap operands[0] (though it won't overlap
16317      operands[1]), that's why we asked for a DImode reg -- so we can
16318      use the bit that does not overlap.  */
16319   if (REGNO (operands[2]) == REGNO (operands[0]))
16320     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16321   else
16322     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16323 
16324   emit_insn (gen_zero_extendqisi2 (scratch,
16325 				   gen_rtx_MEM (QImode,
16326 						plus_constant (Pmode, base,
16327 							       offset))));
16328   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16329 				   gen_rtx_MEM (QImode,
16330 						plus_constant (Pmode, base,
16331 							       offset + 1))));
16332   if (!BYTES_BIG_ENDIAN)
16333     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16334 		   gen_rtx_IOR (SImode,
16335 				gen_rtx_ASHIFT
16336 				(SImode,
16337 				 gen_rtx_SUBREG (SImode, operands[0], 0),
16338 				 GEN_INT (8)),
16339 				scratch));
16340   else
16341     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16342 		   gen_rtx_IOR (SImode,
16343 				gen_rtx_ASHIFT (SImode, scratch,
16344 						GEN_INT (8)),
16345 				gen_rtx_SUBREG (SImode, operands[0], 0)));
16346 }
16347 
16348 /* Handle storing a half-word to memory during reload by synthesizing as two
16349    byte stores.  Take care not to clobber the input values until after we
16350    have moved them somewhere safe.  This code assumes that if the DImode
16351    scratch in operands[2] overlaps either the input value or output address
16352    in some way, then that value must die in this insn (we absolutely need
16353    two scratch registers for some corner cases).  */
16354 void
arm_reload_out_hi(rtx * operands)16355 arm_reload_out_hi (rtx *operands)
16356 {
16357   rtx ref = operands[0];
16358   rtx outval = operands[1];
16359   rtx base, scratch;
16360   HOST_WIDE_INT offset = 0;
16361 
16362   if (GET_CODE (ref) == SUBREG)
16363     {
16364       offset = SUBREG_BYTE (ref);
16365       ref = SUBREG_REG (ref);
16366     }
16367 
16368   if (REG_P (ref))
16369     {
16370       /* We have a pseudo which has been spilt onto the stack; there
16371 	 are two cases here: the first where there is a simple
16372 	 stack-slot replacement and a second where the stack-slot is
16373 	 out of range, or is used as a subreg.  */
16374       if (reg_equiv_mem (REGNO (ref)))
16375 	{
16376 	  ref = reg_equiv_mem (REGNO (ref));
16377 	  base = find_replacement (&XEXP (ref, 0));
16378 	}
16379       else
16380 	/* The slot is out of range, or was dressed up in a SUBREG.  */
16381 	base = reg_equiv_address (REGNO (ref));
16382 
16383       /* PR 62254: If there is no equivalent memory location then just move
16384 	 the value as an SImode register move.  This happens when the target
16385 	 architecture variant does not have an HImode register move.  */
16386       if (base == NULL)
16387 	{
16388 	  gcc_assert (REG_P (outval) || SUBREG_P (outval));
16389 
16390 	  if (REG_P (outval))
16391 	    {
16392 	      emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16393 				    gen_rtx_SUBREG (SImode, outval, 0)));
16394 	    }
16395 	  else /* SUBREG_P (outval)  */
16396 	    {
16397 	      if (GET_MODE (SUBREG_REG (outval)) == SImode)
16398 		emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16399 				      SUBREG_REG (outval)));
16400 	      else
16401 		/* FIXME: Handle other cases ?  */
16402 		gcc_unreachable ();
16403 	    }
16404 	  return;
16405 	}
16406     }
16407   else
16408     base = find_replacement (&XEXP (ref, 0));
16409 
16410   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16411 
16412   /* Handle the case where the address is too complex to be offset by 1.  */
16413   if (GET_CODE (base) == MINUS
16414       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16415     {
16416       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16417 
16418       /* Be careful not to destroy OUTVAL.  */
16419       if (reg_overlap_mentioned_p (base_plus, outval))
16420 	{
16421 	  /* Updating base_plus might destroy outval, see if we can
16422 	     swap the scratch and base_plus.  */
16423 	  if (!reg_overlap_mentioned_p (scratch, outval))
16424 	    std::swap (scratch, base_plus);
16425 	  else
16426 	    {
16427 	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16428 
16429 	      /* Be conservative and copy OUTVAL into the scratch now,
16430 		 this should only be necessary if outval is a subreg
16431 		 of something larger than a word.  */
16432 	      /* XXX Might this clobber base?  I can't see how it can,
16433 		 since scratch is known to overlap with OUTVAL, and
16434 		 must be wider than a word.  */
16435 	      emit_insn (gen_movhi (scratch_hi, outval));
16436 	      outval = scratch_hi;
16437 	    }
16438 	}
16439 
16440       emit_set_insn (base_plus, base);
16441       base = base_plus;
16442     }
16443   else if (GET_CODE (base) == PLUS)
16444     {
16445       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16446       HOST_WIDE_INT hi, lo;
16447 
16448       offset += INTVAL (XEXP (base, 1));
16449       base = XEXP (base, 0);
16450 
16451       /* Rework the address into a legal sequence of insns.  */
16452       /* Valid range for lo is -4095 -> 4095 */
16453       lo = (offset >= 0
16454 	    ? (offset & 0xfff)
16455 	    : -((-offset) & 0xfff));
16456 
16457       /* Corner case, if lo is the max offset then we would be out of range
16458 	 once we have added the additional 1 below, so bump the msb into the
16459 	 pre-loading insn(s).  */
16460       if (lo == 4095)
16461 	lo &= 0x7ff;
16462 
16463       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16464 	     ^ (HOST_WIDE_INT) 0x80000000)
16465 	    - (HOST_WIDE_INT) 0x80000000);
16466 
16467       gcc_assert (hi + lo == offset);
16468 
16469       if (hi != 0)
16470 	{
16471 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16472 
16473 	  /* Be careful not to destroy OUTVAL.  */
16474 	  if (reg_overlap_mentioned_p (base_plus, outval))
16475 	    {
16476 	      /* Updating base_plus might destroy outval, see if we
16477 		 can swap the scratch and base_plus.  */
16478 	      if (!reg_overlap_mentioned_p (scratch, outval))
16479 	        std::swap (scratch, base_plus);
16480 	      else
16481 		{
16482 		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16483 
16484 		  /* Be conservative and copy outval into scratch now,
16485 		     this should only be necessary if outval is a
16486 		     subreg of something larger than a word.  */
16487 		  /* XXX Might this clobber base?  I can't see how it
16488 		     can, since scratch is known to overlap with
16489 		     outval.  */
16490 		  emit_insn (gen_movhi (scratch_hi, outval));
16491 		  outval = scratch_hi;
16492 		}
16493 	    }
16494 
16495 	  /* Get the base address; addsi3 knows how to handle constants
16496 	     that require more than one insn.  */
16497 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16498 	  base = base_plus;
16499 	  offset = lo;
16500 	}
16501     }
16502 
16503   if (BYTES_BIG_ENDIAN)
16504     {
16505       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16506 					 plus_constant (Pmode, base,
16507 							offset + 1)),
16508 			    gen_lowpart (QImode, outval)));
16509       emit_insn (gen_lshrsi3 (scratch,
16510 			      gen_rtx_SUBREG (SImode, outval, 0),
16511 			      GEN_INT (8)));
16512       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16513 								offset)),
16514 			    gen_lowpart (QImode, scratch)));
16515     }
16516   else
16517     {
16518       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16519 								offset)),
16520 			    gen_lowpart (QImode, outval)));
16521       emit_insn (gen_lshrsi3 (scratch,
16522 			      gen_rtx_SUBREG (SImode, outval, 0),
16523 			      GEN_INT (8)));
16524       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16525 					 plus_constant (Pmode, base,
16526 							offset + 1)),
16527 			    gen_lowpart (QImode, scratch)));
16528     }
16529 }
16530 
16531 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16532    (padded to the size of a word) should be passed in a register.  */
16533 
16534 static bool
arm_must_pass_in_stack(const function_arg_info & arg)16535 arm_must_pass_in_stack (const function_arg_info &arg)
16536 {
16537   if (TARGET_AAPCS_BASED)
16538     return must_pass_in_stack_var_size (arg);
16539   else
16540     return must_pass_in_stack_var_size_or_pad (arg);
16541 }
16542 
16543 
16544 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16545    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16546    the default.  For AAPCS based ABIs small aggregate types are placed
16547    in the lowest memory address.  */
16548 
16549 static pad_direction
arm_function_arg_padding(machine_mode mode,const_tree type)16550 arm_function_arg_padding (machine_mode mode, const_tree type)
16551 {
16552   if (!TARGET_AAPCS_BASED)
16553     return default_function_arg_padding (mode, type);
16554 
16555   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16556     return PAD_DOWNWARD;
16557 
16558   return PAD_UPWARD;
16559 }
16560 
16561 
16562 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16563    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16564    register has useful data, and return the opposite if the most
16565    significant byte does.  */
16566 
16567 bool
arm_pad_reg_upward(machine_mode mode,tree type,int first ATTRIBUTE_UNUSED)16568 arm_pad_reg_upward (machine_mode mode,
16569                     tree type, int first ATTRIBUTE_UNUSED)
16570 {
16571   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16572     {
16573       /* For AAPCS, small aggregates, small fixed-point types,
16574 	 and small complex types are always padded upwards.  */
16575       if (type)
16576 	{
16577 	  if ((AGGREGATE_TYPE_P (type)
16578 	       || TREE_CODE (type) == COMPLEX_TYPE
16579 	       || FIXED_POINT_TYPE_P (type))
16580 	      && int_size_in_bytes (type) <= 4)
16581 	    return true;
16582 	}
16583       else
16584 	{
16585 	  if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16586 	      && GET_MODE_SIZE (mode) <= 4)
16587 	    return true;
16588 	}
16589     }
16590 
16591   /* Otherwise, use default padding.  */
16592   return !BYTES_BIG_ENDIAN;
16593 }
16594 
16595 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16596    assuming that the address in the base register is word aligned.  */
16597 bool
offset_ok_for_ldrd_strd(HOST_WIDE_INT offset)16598 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16599 {
16600   HOST_WIDE_INT max_offset;
16601 
16602   /* Offset must be a multiple of 4 in Thumb mode.  */
16603   if (TARGET_THUMB2 && ((offset & 3) != 0))
16604     return false;
16605 
16606   if (TARGET_THUMB2)
16607     max_offset = 1020;
16608   else if (TARGET_ARM)
16609     max_offset = 255;
16610   else
16611     return false;
16612 
16613   return ((offset <= max_offset) && (offset >= -max_offset));
16614 }
16615 
16616 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16617    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16618    Assumes that the address in the base register RN is word aligned.  Pattern
16619    guarantees that both memory accesses use the same base register,
16620    the offsets are constants within the range, and the gap between the offsets is 4.
16621    If preload complete then check that registers are legal.  WBACK indicates whether
16622    address is updated.  LOAD indicates whether memory access is load or store.  */
16623 bool
operands_ok_ldrd_strd(rtx rt,rtx rt2,rtx rn,HOST_WIDE_INT offset,bool wback,bool load)16624 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16625                        bool wback, bool load)
16626 {
16627   unsigned int t, t2, n;
16628 
16629   if (!reload_completed)
16630     return true;
16631 
16632   if (!offset_ok_for_ldrd_strd (offset))
16633     return false;
16634 
16635   t = REGNO (rt);
16636   t2 = REGNO (rt2);
16637   n = REGNO (rn);
16638 
16639   if ((TARGET_THUMB2)
16640       && ((wback && (n == t || n == t2))
16641           || (t == SP_REGNUM)
16642           || (t == PC_REGNUM)
16643           || (t2 == SP_REGNUM)
16644           || (t2 == PC_REGNUM)
16645           || (!load && (n == PC_REGNUM))
16646           || (load && (t == t2))
16647           /* Triggers Cortex-M3 LDRD errata.  */
16648           || (!wback && load && fix_cm3_ldrd && (n == t))))
16649     return false;
16650 
16651   if ((TARGET_ARM)
16652       && ((wback && (n == t || n == t2))
16653           || (t2 == PC_REGNUM)
16654           || (t % 2 != 0)   /* First destination register is not even.  */
16655           || (t2 != t + 1)
16656           /* PC can be used as base register (for offset addressing only),
16657              but it is depricated.  */
16658           || (n == PC_REGNUM)))
16659     return false;
16660 
16661   return true;
16662 }
16663 
16664 /* Return true if a 64-bit access with alignment ALIGN and with a
16665    constant offset OFFSET from the base pointer is permitted on this
16666    architecture.  */
16667 static bool
align_ok_ldrd_strd(HOST_WIDE_INT align,HOST_WIDE_INT offset)16668 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16669 {
16670   return (unaligned_access
16671 	  ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16672 	  : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16673 }
16674 
16675 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
16676    operand MEM's address contains an immediate offset from the base
16677    register and has no side effects, in which case it sets BASE,
16678    OFFSET and ALIGN accordingly.  */
16679 static bool
mem_ok_for_ldrd_strd(rtx mem,rtx * base,rtx * offset,HOST_WIDE_INT * align)16680 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16681 {
16682   rtx addr;
16683 
16684   gcc_assert (base != NULL && offset != NULL);
16685 
16686   /* TODO: Handle more general memory operand patterns, such as
16687      PRE_DEC and PRE_INC.  */
16688 
16689   if (side_effects_p (mem))
16690     return false;
16691 
16692   /* Can't deal with subregs.  */
16693   if (GET_CODE (mem) == SUBREG)
16694     return false;
16695 
16696   gcc_assert (MEM_P (mem));
16697 
16698   *offset = const0_rtx;
16699   *align = MEM_ALIGN (mem);
16700 
16701   addr = XEXP (mem, 0);
16702 
16703   /* If addr isn't valid for DImode, then we can't handle it.  */
16704   if (!arm_legitimate_address_p (DImode, addr,
16705 				 reload_in_progress || reload_completed))
16706     return false;
16707 
16708   if (REG_P (addr))
16709     {
16710       *base = addr;
16711       return true;
16712     }
16713   else if (GET_CODE (addr) == PLUS)
16714     {
16715       *base = XEXP (addr, 0);
16716       *offset = XEXP (addr, 1);
16717       return (REG_P (*base) && CONST_INT_P (*offset));
16718     }
16719 
16720   return false;
16721 }
16722 
16723 /* Called from a peephole2 to replace two word-size accesses with a
16724    single LDRD/STRD instruction.  Returns true iff we can generate a
16725    new instruction sequence.  That is, both accesses use the same base
16726    register and the gap between constant offsets is 4.  This function
16727    may reorder its operands to match ldrd/strd RTL templates.
16728    OPERANDS are the operands found by the peephole matcher;
16729    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16730    corresponding memory operands.  LOAD indicaates whether the access
16731    is load or store.  CONST_STORE indicates a store of constant
16732    integer values held in OPERANDS[4,5] and assumes that the pattern
16733    is of length 4 insn, for the purpose of checking dead registers.
16734    COMMUTE indicates that register operands may be reordered.  */
16735 bool
gen_operands_ldrd_strd(rtx * operands,bool load,bool const_store,bool commute)16736 gen_operands_ldrd_strd (rtx *operands, bool load,
16737                         bool const_store, bool commute)
16738 {
16739   int nops = 2;
16740   HOST_WIDE_INT offsets[2], offset, align[2];
16741   rtx base = NULL_RTX;
16742   rtx cur_base, cur_offset, tmp;
16743   int i, gap;
16744   HARD_REG_SET regset;
16745 
16746   gcc_assert (!const_store || !load);
16747   /* Check that the memory references are immediate offsets from the
16748      same base register.  Extract the base register, the destination
16749      registers, and the corresponding memory offsets.  */
16750   for (i = 0; i < nops; i++)
16751     {
16752       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16753 				 &align[i]))
16754         return false;
16755 
16756       if (i == 0)
16757         base = cur_base;
16758       else if (REGNO (base) != REGNO (cur_base))
16759         return false;
16760 
16761       offsets[i] = INTVAL (cur_offset);
16762       if (GET_CODE (operands[i]) == SUBREG)
16763         {
16764           tmp = SUBREG_REG (operands[i]);
16765           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16766           operands[i] = tmp;
16767         }
16768     }
16769 
16770   /* Make sure there is no dependency between the individual loads.  */
16771   if (load && REGNO (operands[0]) == REGNO (base))
16772     return false; /* RAW */
16773 
16774   if (load && REGNO (operands[0]) == REGNO (operands[1]))
16775     return false; /* WAW */
16776 
16777   /* If the same input register is used in both stores
16778      when storing different constants, try to find a free register.
16779      For example, the code
16780 	mov r0, 0
16781 	str r0, [r2]
16782 	mov r0, 1
16783 	str r0, [r2, #4]
16784      can be transformed into
16785 	mov r1, 0
16786 	mov r0, 1
16787 	strd r1, r0, [r2]
16788      in Thumb mode assuming that r1 is free.
16789      For ARM mode do the same but only if the starting register
16790      can be made to be even.  */
16791   if (const_store
16792       && REGNO (operands[0]) == REGNO (operands[1])
16793       && INTVAL (operands[4]) != INTVAL (operands[5]))
16794     {
16795     if (TARGET_THUMB2)
16796       {
16797         CLEAR_HARD_REG_SET (regset);
16798         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16799         if (tmp == NULL_RTX)
16800           return false;
16801 
16802         /* Use the new register in the first load to ensure that
16803            if the original input register is not dead after peephole,
16804            then it will have the correct constant value.  */
16805         operands[0] = tmp;
16806       }
16807     else if (TARGET_ARM)
16808       {
16809         int regno = REGNO (operands[0]);
16810         if (!peep2_reg_dead_p (4, operands[0]))
16811           {
16812             /* When the input register is even and is not dead after the
16813                pattern, it has to hold the second constant but we cannot
16814                form a legal STRD in ARM mode with this register as the second
16815                register.  */
16816             if (regno % 2 == 0)
16817               return false;
16818 
16819             /* Is regno-1 free? */
16820             SET_HARD_REG_SET (regset);
16821             CLEAR_HARD_REG_BIT(regset, regno - 1);
16822             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16823             if (tmp == NULL_RTX)
16824               return false;
16825 
16826             operands[0] = tmp;
16827           }
16828         else
16829           {
16830             /* Find a DImode register.  */
16831             CLEAR_HARD_REG_SET (regset);
16832             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16833             if (tmp != NULL_RTX)
16834               {
16835                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16836                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16837               }
16838             else
16839               {
16840                 /* Can we use the input register to form a DI register?  */
16841                 SET_HARD_REG_SET (regset);
16842                 CLEAR_HARD_REG_BIT(regset,
16843                                    regno % 2 == 0 ? regno + 1 : regno - 1);
16844                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16845                 if (tmp == NULL_RTX)
16846                   return false;
16847                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16848               }
16849           }
16850 
16851         gcc_assert (operands[0] != NULL_RTX);
16852         gcc_assert (operands[1] != NULL_RTX);
16853         gcc_assert (REGNO (operands[0]) % 2 == 0);
16854         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16855       }
16856     }
16857 
16858   /* Make sure the instructions are ordered with lower memory access first.  */
16859   if (offsets[0] > offsets[1])
16860     {
16861       gap = offsets[0] - offsets[1];
16862       offset = offsets[1];
16863 
16864       /* Swap the instructions such that lower memory is accessed first.  */
16865       std::swap (operands[0], operands[1]);
16866       std::swap (operands[2], operands[3]);
16867       std::swap (align[0], align[1]);
16868       if (const_store)
16869         std::swap (operands[4], operands[5]);
16870     }
16871   else
16872     {
16873       gap = offsets[1] - offsets[0];
16874       offset = offsets[0];
16875     }
16876 
16877   /* Make sure accesses are to consecutive memory locations.  */
16878   if (gap != GET_MODE_SIZE (SImode))
16879     return false;
16880 
16881   if (!align_ok_ldrd_strd (align[0], offset))
16882     return false;
16883 
16884   /* Make sure we generate legal instructions.  */
16885   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16886                              false, load))
16887     return true;
16888 
16889   /* In Thumb state, where registers are almost unconstrained, there
16890      is little hope to fix it.  */
16891   if (TARGET_THUMB2)
16892     return false;
16893 
16894   if (load && commute)
16895     {
16896       /* Try reordering registers.  */
16897       std::swap (operands[0], operands[1]);
16898       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16899                                  false, load))
16900         return true;
16901     }
16902 
16903   if (const_store)
16904     {
16905       /* If input registers are dead after this pattern, they can be
16906          reordered or replaced by other registers that are free in the
16907          current pattern.  */
16908       if (!peep2_reg_dead_p (4, operands[0])
16909           || !peep2_reg_dead_p (4, operands[1]))
16910         return false;
16911 
16912       /* Try to reorder the input registers.  */
16913       /* For example, the code
16914            mov r0, 0
16915            mov r1, 1
16916            str r1, [r2]
16917            str r0, [r2, #4]
16918          can be transformed into
16919            mov r1, 0
16920            mov r0, 1
16921            strd r0, [r2]
16922       */
16923       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16924                                   false, false))
16925         {
16926           std::swap (operands[0], operands[1]);
16927           return true;
16928         }
16929 
16930       /* Try to find a free DI register.  */
16931       CLEAR_HARD_REG_SET (regset);
16932       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16933       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16934       while (true)
16935         {
16936           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16937           if (tmp == NULL_RTX)
16938             return false;
16939 
16940           /* DREG must be an even-numbered register in DImode.
16941              Split it into SI registers.  */
16942           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16943           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16944           gcc_assert (operands[0] != NULL_RTX);
16945           gcc_assert (operands[1] != NULL_RTX);
16946           gcc_assert (REGNO (operands[0]) % 2 == 0);
16947           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16948 
16949           return (operands_ok_ldrd_strd (operands[0], operands[1],
16950                                          base, offset,
16951                                          false, load));
16952         }
16953     }
16954 
16955   return false;
16956 }
16957 
16958 
16959 /* Return true if parallel execution of the two word-size accesses provided
16960    could be satisfied with a single LDRD/STRD instruction.  Two word-size
16961    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16962    register operands and OPERANDS[2,3] are the corresponding memory operands.
16963    */
16964 bool
valid_operands_ldrd_strd(rtx * operands,bool load)16965 valid_operands_ldrd_strd (rtx *operands, bool load)
16966 {
16967   int nops = 2;
16968   HOST_WIDE_INT offsets[2], offset, align[2];
16969   rtx base = NULL_RTX;
16970   rtx cur_base, cur_offset;
16971   int i, gap;
16972 
16973   /* Check that the memory references are immediate offsets from the
16974      same base register.  Extract the base register, the destination
16975      registers, and the corresponding memory offsets.  */
16976   for (i = 0; i < nops; i++)
16977     {
16978       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16979 				 &align[i]))
16980 	return false;
16981 
16982       if (i == 0)
16983 	base = cur_base;
16984       else if (REGNO (base) != REGNO (cur_base))
16985 	return false;
16986 
16987       offsets[i] = INTVAL (cur_offset);
16988       if (GET_CODE (operands[i]) == SUBREG)
16989 	return false;
16990     }
16991 
16992   if (offsets[0] > offsets[1])
16993     return false;
16994 
16995   gap = offsets[1] - offsets[0];
16996   offset = offsets[0];
16997 
16998   /* Make sure accesses are to consecutive memory locations.  */
16999   if (gap != GET_MODE_SIZE (SImode))
17000     return false;
17001 
17002   if (!align_ok_ldrd_strd (align[0], offset))
17003     return false;
17004 
17005   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17006 				false, load);
17007 }
17008 
17009 
17010 /* Print a symbolic form of X to the debug file, F.  */
17011 static void
arm_print_value(FILE * f,rtx x)17012 arm_print_value (FILE *f, rtx x)
17013 {
17014   switch (GET_CODE (x))
17015     {
17016     case CONST_INT:
17017       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17018       return;
17019 
17020     case CONST_DOUBLE:
17021       {
17022 	char fpstr[20];
17023 	real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17024 			 sizeof (fpstr), 0, 1);
17025 	fputs (fpstr, f);
17026       }
17027       return;
17028 
17029     case CONST_VECTOR:
17030       {
17031 	int i;
17032 
17033 	fprintf (f, "<");
17034 	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17035 	  {
17036 	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17037 	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
17038 	      fputc (',', f);
17039 	  }
17040 	fprintf (f, ">");
17041       }
17042       return;
17043 
17044     case CONST_STRING:
17045       fprintf (f, "\"%s\"", XSTR (x, 0));
17046       return;
17047 
17048     case SYMBOL_REF:
17049       fprintf (f, "`%s'", XSTR (x, 0));
17050       return;
17051 
17052     case LABEL_REF:
17053       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17054       return;
17055 
17056     case CONST:
17057       arm_print_value (f, XEXP (x, 0));
17058       return;
17059 
17060     case PLUS:
17061       arm_print_value (f, XEXP (x, 0));
17062       fprintf (f, "+");
17063       arm_print_value (f, XEXP (x, 1));
17064       return;
17065 
17066     case PC:
17067       fprintf (f, "pc");
17068       return;
17069 
17070     default:
17071       fprintf (f, "????");
17072       return;
17073     }
17074 }
17075 
17076 /* Routines for manipulation of the constant pool.  */
17077 
17078 /* Arm instructions cannot load a large constant directly into a
17079    register; they have to come from a pc relative load.  The constant
17080    must therefore be placed in the addressable range of the pc
17081    relative load.  Depending on the precise pc relative load
17082    instruction the range is somewhere between 256 bytes and 4k.  This
17083    means that we often have to dump a constant inside a function, and
17084    generate code to branch around it.
17085 
17086    It is important to minimize this, since the branches will slow
17087    things down and make the code larger.
17088 
17089    Normally we can hide the table after an existing unconditional
17090    branch so that there is no interruption of the flow, but in the
17091    worst case the code looks like this:
17092 
17093 	ldr	rn, L1
17094 	...
17095 	b	L2
17096 	align
17097 	L1:	.long value
17098 	L2:
17099 	...
17100 
17101 	ldr	rn, L3
17102 	...
17103 	b	L4
17104 	align
17105 	L3:	.long value
17106 	L4:
17107 	...
17108 
17109    We fix this by performing a scan after scheduling, which notices
17110    which instructions need to have their operands fetched from the
17111    constant table and builds the table.
17112 
17113    The algorithm starts by building a table of all the constants that
17114    need fixing up and all the natural barriers in the function (places
17115    where a constant table can be dropped without breaking the flow).
17116    For each fixup we note how far the pc-relative replacement will be
17117    able to reach and the offset of the instruction into the function.
17118 
17119    Having built the table we then group the fixes together to form
17120    tables that are as large as possible (subject to addressing
17121    constraints) and emit each table of constants after the last
17122    barrier that is within range of all the instructions in the group.
17123    If a group does not contain a barrier, then we forcibly create one
17124    by inserting a jump instruction into the flow.  Once the table has
17125    been inserted, the insns are then modified to reference the
17126    relevant entry in the pool.
17127 
17128    Possible enhancements to the algorithm (not implemented) are:
17129 
17130    1) For some processors and object formats, there may be benefit in
17131    aligning the pools to the start of cache lines; this alignment
17132    would need to be taken into account when calculating addressability
17133    of a pool.  */
17134 
17135 /* These typedefs are located at the start of this file, so that
17136    they can be used in the prototypes there.  This comment is to
17137    remind readers of that fact so that the following structures
17138    can be understood more easily.
17139 
17140      typedef struct minipool_node    Mnode;
17141      typedef struct minipool_fixup   Mfix;  */
17142 
17143 struct minipool_node
17144 {
17145   /* Doubly linked chain of entries.  */
17146   Mnode * next;
17147   Mnode * prev;
17148   /* The maximum offset into the code that this entry can be placed.  While
17149      pushing fixes for forward references, all entries are sorted in order
17150      of increasing max_address.  */
17151   HOST_WIDE_INT max_address;
17152   /* Similarly for an entry inserted for a backwards ref.  */
17153   HOST_WIDE_INT min_address;
17154   /* The number of fixes referencing this entry.  This can become zero
17155      if we "unpush" an entry.  In this case we ignore the entry when we
17156      come to emit the code.  */
17157   int refcount;
17158   /* The offset from the start of the minipool.  */
17159   HOST_WIDE_INT offset;
17160   /* The value in table.  */
17161   rtx value;
17162   /* The mode of value.  */
17163   machine_mode mode;
17164   /* The size of the value.  With iWMMXt enabled
17165      sizes > 4 also imply an alignment of 8-bytes.  */
17166   int fix_size;
17167 };
17168 
17169 struct minipool_fixup
17170 {
17171   Mfix *            next;
17172   rtx_insn *        insn;
17173   HOST_WIDE_INT     address;
17174   rtx *             loc;
17175   machine_mode mode;
17176   int               fix_size;
17177   rtx               value;
17178   Mnode *           minipool;
17179   HOST_WIDE_INT     forwards;
17180   HOST_WIDE_INT     backwards;
17181 };
17182 
17183 /* Fixes less than a word need padding out to a word boundary.  */
17184 #define MINIPOOL_FIX_SIZE(mode) \
17185   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17186 
17187 static Mnode *	minipool_vector_head;
17188 static Mnode *	minipool_vector_tail;
17189 static rtx_code_label	*minipool_vector_label;
17190 static int	minipool_pad;
17191 
17192 /* The linked list of all minipool fixes required for this function.  */
17193 Mfix * 		minipool_fix_head;
17194 Mfix * 		minipool_fix_tail;
17195 /* The fix entry for the current minipool, once it has been placed.  */
17196 Mfix *		minipool_barrier;
17197 
17198 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17199 #define JUMP_TABLES_IN_TEXT_SECTION 0
17200 #endif
17201 
17202 static HOST_WIDE_INT
get_jump_table_size(rtx_jump_table_data * insn)17203 get_jump_table_size (rtx_jump_table_data *insn)
17204 {
17205   /* ADDR_VECs only take room if read-only data does into the text
17206      section.  */
17207   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17208     {
17209       rtx body = PATTERN (insn);
17210       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17211       HOST_WIDE_INT size;
17212       HOST_WIDE_INT modesize;
17213 
17214       modesize = GET_MODE_SIZE (GET_MODE (body));
17215       size = modesize * XVECLEN (body, elt);
17216       switch (modesize)
17217 	{
17218 	case 1:
17219 	  /* Round up size  of TBB table to a halfword boundary.  */
17220 	  size = (size + 1) & ~HOST_WIDE_INT_1;
17221 	  break;
17222 	case 2:
17223 	  /* No padding necessary for TBH.  */
17224 	  break;
17225 	case 4:
17226 	  /* Add two bytes for alignment on Thumb.  */
17227 	  if (TARGET_THUMB)
17228 	    size += 2;
17229 	  break;
17230 	default:
17231 	  gcc_unreachable ();
17232 	}
17233       return size;
17234     }
17235 
17236   return 0;
17237 }
17238 
17239 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17240    function descriptor) into a register and the GOT address into the
17241    FDPIC register, returning an rtx for the register holding the
17242    function address.  */
17243 
17244 rtx
arm_load_function_descriptor(rtx funcdesc)17245 arm_load_function_descriptor (rtx funcdesc)
17246 {
17247   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17248   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17249   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17250   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17251 
17252   emit_move_insn (fnaddr_reg, fnaddr);
17253 
17254   /* The ABI requires the entry point address to be loaded first, but
17255      since we cannot support lazy binding for lack of atomic load of
17256      two 32-bits values, we do not need to bother to prevent the
17257      previous load from being moved after that of the GOT address.  */
17258   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17259 
17260   return fnaddr_reg;
17261 }
17262 
17263 /* Return the maximum amount of padding that will be inserted before
17264    label LABEL.  */
17265 static HOST_WIDE_INT
get_label_padding(rtx label)17266 get_label_padding (rtx label)
17267 {
17268   HOST_WIDE_INT align, min_insn_size;
17269 
17270   align = 1 << label_to_alignment (label).levels[0].log;
17271   min_insn_size = TARGET_THUMB ? 2 : 4;
17272   return align > min_insn_size ? align - min_insn_size : 0;
17273 }
17274 
17275 /* Move a minipool fix MP from its current location to before MAX_MP.
17276    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17277    constraints may need updating.  */
17278 static Mnode *
move_minipool_fix_forward_ref(Mnode * mp,Mnode * max_mp,HOST_WIDE_INT max_address)17279 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17280 			       HOST_WIDE_INT max_address)
17281 {
17282   /* The code below assumes these are different.  */
17283   gcc_assert (mp != max_mp);
17284 
17285   if (max_mp == NULL)
17286     {
17287       if (max_address < mp->max_address)
17288 	mp->max_address = max_address;
17289     }
17290   else
17291     {
17292       if (max_address > max_mp->max_address - mp->fix_size)
17293 	mp->max_address = max_mp->max_address - mp->fix_size;
17294       else
17295 	mp->max_address = max_address;
17296 
17297       /* Unlink MP from its current position.  Since max_mp is non-null,
17298        mp->prev must be non-null.  */
17299       mp->prev->next = mp->next;
17300       if (mp->next != NULL)
17301 	mp->next->prev = mp->prev;
17302       else
17303 	minipool_vector_tail = mp->prev;
17304 
17305       /* Re-insert it before MAX_MP.  */
17306       mp->next = max_mp;
17307       mp->prev = max_mp->prev;
17308       max_mp->prev = mp;
17309 
17310       if (mp->prev != NULL)
17311 	mp->prev->next = mp;
17312       else
17313 	minipool_vector_head = mp;
17314     }
17315 
17316   /* Save the new entry.  */
17317   max_mp = mp;
17318 
17319   /* Scan over the preceding entries and adjust their addresses as
17320      required.  */
17321   while (mp->prev != NULL
17322 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17323     {
17324       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17325       mp = mp->prev;
17326     }
17327 
17328   return max_mp;
17329 }
17330 
17331 /* Add a constant to the minipool for a forward reference.  Returns the
17332    node added or NULL if the constant will not fit in this pool.  */
17333 static Mnode *
add_minipool_forward_ref(Mfix * fix)17334 add_minipool_forward_ref (Mfix *fix)
17335 {
17336   /* If set, max_mp is the first pool_entry that has a lower
17337      constraint than the one we are trying to add.  */
17338   Mnode *       max_mp = NULL;
17339   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17340   Mnode *       mp;
17341 
17342   /* If the minipool starts before the end of FIX->INSN then this FIX
17343      cannot be placed into the current pool.  Furthermore, adding the
17344      new constant pool entry may cause the pool to start FIX_SIZE bytes
17345      earlier.  */
17346   if (minipool_vector_head &&
17347       (fix->address + get_attr_length (fix->insn)
17348        >= minipool_vector_head->max_address - fix->fix_size))
17349     return NULL;
17350 
17351   /* Scan the pool to see if a constant with the same value has
17352      already been added.  While we are doing this, also note the
17353      location where we must insert the constant if it doesn't already
17354      exist.  */
17355   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17356     {
17357       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17358 	  && fix->mode == mp->mode
17359 	  && (!LABEL_P (fix->value)
17360 	      || (CODE_LABEL_NUMBER (fix->value)
17361 		  == CODE_LABEL_NUMBER (mp->value)))
17362 	  && rtx_equal_p (fix->value, mp->value))
17363 	{
17364 	  /* More than one fix references this entry.  */
17365 	  mp->refcount++;
17366 	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17367 	}
17368 
17369       /* Note the insertion point if necessary.  */
17370       if (max_mp == NULL
17371 	  && mp->max_address > max_address)
17372 	max_mp = mp;
17373 
17374       /* If we are inserting an 8-bytes aligned quantity and
17375 	 we have not already found an insertion point, then
17376 	 make sure that all such 8-byte aligned quantities are
17377 	 placed at the start of the pool.  */
17378       if (ARM_DOUBLEWORD_ALIGN
17379 	  && max_mp == NULL
17380 	  && fix->fix_size >= 8
17381 	  && mp->fix_size < 8)
17382 	{
17383 	  max_mp = mp;
17384 	  max_address = mp->max_address;
17385 	}
17386     }
17387 
17388   /* The value is not currently in the minipool, so we need to create
17389      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17390      the end of the list since the placement is less constrained than
17391      any existing entry.  Otherwise, we insert the new fix before
17392      MAX_MP and, if necessary, adjust the constraints on the other
17393      entries.  */
17394   mp = XNEW (Mnode);
17395   mp->fix_size = fix->fix_size;
17396   mp->mode = fix->mode;
17397   mp->value = fix->value;
17398   mp->refcount = 1;
17399   /* Not yet required for a backwards ref.  */
17400   mp->min_address = -65536;
17401 
17402   if (max_mp == NULL)
17403     {
17404       mp->max_address = max_address;
17405       mp->next = NULL;
17406       mp->prev = minipool_vector_tail;
17407 
17408       if (mp->prev == NULL)
17409 	{
17410 	  minipool_vector_head = mp;
17411 	  minipool_vector_label = gen_label_rtx ();
17412 	}
17413       else
17414 	mp->prev->next = mp;
17415 
17416       minipool_vector_tail = mp;
17417     }
17418   else
17419     {
17420       if (max_address > max_mp->max_address - mp->fix_size)
17421 	mp->max_address = max_mp->max_address - mp->fix_size;
17422       else
17423 	mp->max_address = max_address;
17424 
17425       mp->next = max_mp;
17426       mp->prev = max_mp->prev;
17427       max_mp->prev = mp;
17428       if (mp->prev != NULL)
17429 	mp->prev->next = mp;
17430       else
17431 	minipool_vector_head = mp;
17432     }
17433 
17434   /* Save the new entry.  */
17435   max_mp = mp;
17436 
17437   /* Scan over the preceding entries and adjust their addresses as
17438      required.  */
17439   while (mp->prev != NULL
17440 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17441     {
17442       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17443       mp = mp->prev;
17444     }
17445 
17446   return max_mp;
17447 }
17448 
17449 static Mnode *
move_minipool_fix_backward_ref(Mnode * mp,Mnode * min_mp,HOST_WIDE_INT min_address)17450 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17451 				HOST_WIDE_INT  min_address)
17452 {
17453   HOST_WIDE_INT offset;
17454 
17455   /* The code below assumes these are different.  */
17456   gcc_assert (mp != min_mp);
17457 
17458   if (min_mp == NULL)
17459     {
17460       if (min_address > mp->min_address)
17461 	mp->min_address = min_address;
17462     }
17463   else
17464     {
17465       /* We will adjust this below if it is too loose.  */
17466       mp->min_address = min_address;
17467 
17468       /* Unlink MP from its current position.  Since min_mp is non-null,
17469 	 mp->next must be non-null.  */
17470       mp->next->prev = mp->prev;
17471       if (mp->prev != NULL)
17472 	mp->prev->next = mp->next;
17473       else
17474 	minipool_vector_head = mp->next;
17475 
17476       /* Reinsert it after MIN_MP.  */
17477       mp->prev = min_mp;
17478       mp->next = min_mp->next;
17479       min_mp->next = mp;
17480       if (mp->next != NULL)
17481 	mp->next->prev = mp;
17482       else
17483 	minipool_vector_tail = mp;
17484     }
17485 
17486   min_mp = mp;
17487 
17488   offset = 0;
17489   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17490     {
17491       mp->offset = offset;
17492       if (mp->refcount > 0)
17493 	offset += mp->fix_size;
17494 
17495       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17496 	mp->next->min_address = mp->min_address + mp->fix_size;
17497     }
17498 
17499   return min_mp;
17500 }
17501 
17502 /* Add a constant to the minipool for a backward reference.  Returns the
17503    node added or NULL if the constant will not fit in this pool.
17504 
17505    Note that the code for insertion for a backwards reference can be
17506    somewhat confusing because the calculated offsets for each fix do
17507    not take into account the size of the pool (which is still under
17508    construction.  */
17509 static Mnode *
add_minipool_backward_ref(Mfix * fix)17510 add_minipool_backward_ref (Mfix *fix)
17511 {
17512   /* If set, min_mp is the last pool_entry that has a lower constraint
17513      than the one we are trying to add.  */
17514   Mnode *min_mp = NULL;
17515   /* This can be negative, since it is only a constraint.  */
17516   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17517   Mnode *mp;
17518 
17519   /* If we can't reach the current pool from this insn, or if we can't
17520      insert this entry at the end of the pool without pushing other
17521      fixes out of range, then we don't try.  This ensures that we
17522      can't fail later on.  */
17523   if (min_address >= minipool_barrier->address
17524       || (minipool_vector_tail->min_address + fix->fix_size
17525 	  >= minipool_barrier->address))
17526     return NULL;
17527 
17528   /* Scan the pool to see if a constant with the same value has
17529      already been added.  While we are doing this, also note the
17530      location where we must insert the constant if it doesn't already
17531      exist.  */
17532   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17533     {
17534       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17535 	  && fix->mode == mp->mode
17536 	  && (!LABEL_P (fix->value)
17537 	      || (CODE_LABEL_NUMBER (fix->value)
17538 		  == CODE_LABEL_NUMBER (mp->value)))
17539 	  && rtx_equal_p (fix->value, mp->value)
17540 	  /* Check that there is enough slack to move this entry to the
17541 	     end of the table (this is conservative).  */
17542 	  && (mp->max_address
17543 	      > (minipool_barrier->address
17544 		 + minipool_vector_tail->offset
17545 		 + minipool_vector_tail->fix_size)))
17546 	{
17547 	  mp->refcount++;
17548 	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17549 	}
17550 
17551       if (min_mp != NULL)
17552 	mp->min_address += fix->fix_size;
17553       else
17554 	{
17555 	  /* Note the insertion point if necessary.  */
17556 	  if (mp->min_address < min_address)
17557 	    {
17558 	      /* For now, we do not allow the insertion of 8-byte alignment
17559 		 requiring nodes anywhere but at the start of the pool.  */
17560 	      if (ARM_DOUBLEWORD_ALIGN
17561 		  && fix->fix_size >= 8 && mp->fix_size < 8)
17562 		return NULL;
17563 	      else
17564 		min_mp = mp;
17565 	    }
17566 	  else if (mp->max_address
17567 		   < minipool_barrier->address + mp->offset + fix->fix_size)
17568 	    {
17569 	      /* Inserting before this entry would push the fix beyond
17570 		 its maximum address (which can happen if we have
17571 		 re-located a forwards fix); force the new fix to come
17572 		 after it.  */
17573 	      if (ARM_DOUBLEWORD_ALIGN
17574 		  && fix->fix_size >= 8 && mp->fix_size < 8)
17575 		return NULL;
17576 	      else
17577 		{
17578 		  min_mp = mp;
17579 		  min_address = mp->min_address + fix->fix_size;
17580 		}
17581 	    }
17582 	  /* Do not insert a non-8-byte aligned quantity before 8-byte
17583 	     aligned quantities.  */
17584 	  else if (ARM_DOUBLEWORD_ALIGN
17585 		   && fix->fix_size < 8
17586 		   && mp->fix_size >= 8)
17587 	    {
17588 	      min_mp = mp;
17589 	      min_address = mp->min_address + fix->fix_size;
17590 	    }
17591 	}
17592     }
17593 
17594   /* We need to create a new entry.  */
17595   mp = XNEW (Mnode);
17596   mp->fix_size = fix->fix_size;
17597   mp->mode = fix->mode;
17598   mp->value = fix->value;
17599   mp->refcount = 1;
17600   mp->max_address = minipool_barrier->address + 65536;
17601 
17602   mp->min_address = min_address;
17603 
17604   if (min_mp == NULL)
17605     {
17606       mp->prev = NULL;
17607       mp->next = minipool_vector_head;
17608 
17609       if (mp->next == NULL)
17610 	{
17611 	  minipool_vector_tail = mp;
17612 	  minipool_vector_label = gen_label_rtx ();
17613 	}
17614       else
17615 	mp->next->prev = mp;
17616 
17617       minipool_vector_head = mp;
17618     }
17619   else
17620     {
17621       mp->next = min_mp->next;
17622       mp->prev = min_mp;
17623       min_mp->next = mp;
17624 
17625       if (mp->next != NULL)
17626 	mp->next->prev = mp;
17627       else
17628 	minipool_vector_tail = mp;
17629     }
17630 
17631   /* Save the new entry.  */
17632   min_mp = mp;
17633 
17634   if (mp->prev)
17635     mp = mp->prev;
17636   else
17637     mp->offset = 0;
17638 
17639   /* Scan over the following entries and adjust their offsets.  */
17640   while (mp->next != NULL)
17641     {
17642       if (mp->next->min_address < mp->min_address + mp->fix_size)
17643 	mp->next->min_address = mp->min_address + mp->fix_size;
17644 
17645       if (mp->refcount)
17646 	mp->next->offset = mp->offset + mp->fix_size;
17647       else
17648 	mp->next->offset = mp->offset;
17649 
17650       mp = mp->next;
17651     }
17652 
17653   return min_mp;
17654 }
17655 
17656 static void
assign_minipool_offsets(Mfix * barrier)17657 assign_minipool_offsets (Mfix *barrier)
17658 {
17659   HOST_WIDE_INT offset = 0;
17660   Mnode *mp;
17661 
17662   minipool_barrier = barrier;
17663 
17664   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17665     {
17666       mp->offset = offset;
17667 
17668       if (mp->refcount > 0)
17669 	offset += mp->fix_size;
17670     }
17671 }
17672 
17673 /* Output the literal table */
17674 static void
dump_minipool(rtx_insn * scan)17675 dump_minipool (rtx_insn *scan)
17676 {
17677   Mnode * mp;
17678   Mnode * nmp;
17679   int align64 = 0;
17680 
17681   if (ARM_DOUBLEWORD_ALIGN)
17682     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17683       if (mp->refcount > 0 && mp->fix_size >= 8)
17684 	{
17685 	  align64 = 1;
17686 	  break;
17687 	}
17688 
17689   if (dump_file)
17690     fprintf (dump_file,
17691 	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17692 	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17693 
17694   scan = emit_label_after (gen_label_rtx (), scan);
17695   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17696   scan = emit_label_after (minipool_vector_label, scan);
17697 
17698   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17699     {
17700       if (mp->refcount > 0)
17701 	{
17702 	  if (dump_file)
17703 	    {
17704 	      fprintf (dump_file,
17705 		       ";;  Offset %u, min %ld, max %ld ",
17706 		       (unsigned) mp->offset, (unsigned long) mp->min_address,
17707 		       (unsigned long) mp->max_address);
17708 	      arm_print_value (dump_file, mp->value);
17709 	      fputc ('\n', dump_file);
17710 	    }
17711 
17712 	  rtx val = copy_rtx (mp->value);
17713 
17714 	  switch (GET_MODE_SIZE (mp->mode))
17715 	    {
17716 #ifdef HAVE_consttable_1
17717 	    case 1:
17718 	      scan = emit_insn_after (gen_consttable_1 (val), scan);
17719 	      break;
17720 
17721 #endif
17722 #ifdef HAVE_consttable_2
17723 	    case 2:
17724 	      scan = emit_insn_after (gen_consttable_2 (val), scan);
17725 	      break;
17726 
17727 #endif
17728 #ifdef HAVE_consttable_4
17729 	    case 4:
17730 	      scan = emit_insn_after (gen_consttable_4 (val), scan);
17731 	      break;
17732 
17733 #endif
17734 #ifdef HAVE_consttable_8
17735 	    case 8:
17736 	      scan = emit_insn_after (gen_consttable_8 (val), scan);
17737 	      break;
17738 
17739 #endif
17740 #ifdef HAVE_consttable_16
17741 	    case 16:
17742               scan = emit_insn_after (gen_consttable_16 (val), scan);
17743               break;
17744 
17745 #endif
17746 	    default:
17747 	      gcc_unreachable ();
17748 	    }
17749 	}
17750 
17751       nmp = mp->next;
17752       free (mp);
17753     }
17754 
17755   minipool_vector_head = minipool_vector_tail = NULL;
17756   scan = emit_insn_after (gen_consttable_end (), scan);
17757   scan = emit_barrier_after (scan);
17758 }
17759 
17760 /* Return the cost of forcibly inserting a barrier after INSN.  */
17761 static int
arm_barrier_cost(rtx_insn * insn)17762 arm_barrier_cost (rtx_insn *insn)
17763 {
17764   /* Basing the location of the pool on the loop depth is preferable,
17765      but at the moment, the basic block information seems to be
17766      corrupt by this stage of the compilation.  */
17767   int base_cost = 50;
17768   rtx_insn *next = next_nonnote_insn (insn);
17769 
17770   if (next != NULL && LABEL_P (next))
17771     base_cost -= 20;
17772 
17773   switch (GET_CODE (insn))
17774     {
17775     case CODE_LABEL:
17776       /* It will always be better to place the table before the label, rather
17777 	 than after it.  */
17778       return 50;
17779 
17780     case INSN:
17781     case CALL_INSN:
17782       return base_cost;
17783 
17784     case JUMP_INSN:
17785       return base_cost - 10;
17786 
17787     default:
17788       return base_cost + 10;
17789     }
17790 }
17791 
17792 /* Find the best place in the insn stream in the range
17793    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17794    Create the barrier by inserting a jump and add a new fix entry for
17795    it.  */
17796 static Mfix *
create_fix_barrier(Mfix * fix,HOST_WIDE_INT max_address)17797 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
17798 {
17799   HOST_WIDE_INT count = 0;
17800   rtx_barrier *barrier;
17801   rtx_insn *from = fix->insn;
17802   /* The instruction after which we will insert the jump.  */
17803   rtx_insn *selected = NULL;
17804   int selected_cost;
17805   /* The address at which the jump instruction will be placed.  */
17806   HOST_WIDE_INT selected_address;
17807   Mfix * new_fix;
17808   HOST_WIDE_INT max_count = max_address - fix->address;
17809   rtx_code_label *label = gen_label_rtx ();
17810 
17811   selected_cost = arm_barrier_cost (from);
17812   selected_address = fix->address;
17813 
17814   while (from && count < max_count)
17815     {
17816       rtx_jump_table_data *tmp;
17817       int new_cost;
17818 
17819       /* This code shouldn't have been called if there was a natural barrier
17820 	 within range.  */
17821       gcc_assert (!BARRIER_P (from));
17822 
17823       /* Count the length of this insn.  This must stay in sync with the
17824 	 code that pushes minipool fixes.  */
17825       if (LABEL_P (from))
17826 	count += get_label_padding (from);
17827       else
17828 	count += get_attr_length (from);
17829 
17830       /* If there is a jump table, add its length.  */
17831       if (tablejump_p (from, NULL, &tmp))
17832 	{
17833 	  count += get_jump_table_size (tmp);
17834 
17835 	  /* Jump tables aren't in a basic block, so base the cost on
17836 	     the dispatch insn.  If we select this location, we will
17837 	     still put the pool after the table.  */
17838 	  new_cost = arm_barrier_cost (from);
17839 
17840 	  if (count < max_count
17841 	      && (!selected || new_cost <= selected_cost))
17842 	    {
17843 	      selected = tmp;
17844 	      selected_cost = new_cost;
17845 	      selected_address = fix->address + count;
17846 	    }
17847 
17848 	  /* Continue after the dispatch table.  */
17849 	  from = NEXT_INSN (tmp);
17850 	  continue;
17851 	}
17852 
17853       new_cost = arm_barrier_cost (from);
17854 
17855       if (count < max_count
17856 	  && (!selected || new_cost <= selected_cost))
17857 	{
17858 	  selected = from;
17859 	  selected_cost = new_cost;
17860 	  selected_address = fix->address + count;
17861 	}
17862 
17863       from = NEXT_INSN (from);
17864     }
17865 
17866   /* Make sure that we found a place to insert the jump.  */
17867   gcc_assert (selected);
17868 
17869   /* Create a new JUMP_INSN that branches around a barrier.  */
17870   from = emit_jump_insn_after (gen_jump (label), selected);
17871   JUMP_LABEL (from) = label;
17872   barrier = emit_barrier_after (from);
17873   emit_label_after (label, barrier);
17874 
17875   /* Create a minipool barrier entry for the new barrier.  */
17876   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17877   new_fix->insn = barrier;
17878   new_fix->address = selected_address;
17879   new_fix->next = fix->next;
17880   fix->next = new_fix;
17881 
17882   return new_fix;
17883 }
17884 
17885 /* Record that there is a natural barrier in the insn stream at
17886    ADDRESS.  */
17887 static void
push_minipool_barrier(rtx_insn * insn,HOST_WIDE_INT address)17888 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17889 {
17890   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17891 
17892   fix->insn = insn;
17893   fix->address = address;
17894 
17895   fix->next = NULL;
17896   if (minipool_fix_head != NULL)
17897     minipool_fix_tail->next = fix;
17898   else
17899     minipool_fix_head = fix;
17900 
17901   minipool_fix_tail = fix;
17902 }
17903 
17904 /* Record INSN, which will need fixing up to load a value from the
17905    minipool.  ADDRESS is the offset of the insn since the start of the
17906    function; LOC is a pointer to the part of the insn which requires
17907    fixing; VALUE is the constant that must be loaded, which is of type
17908    MODE.  */
17909 static void
push_minipool_fix(rtx_insn * insn,HOST_WIDE_INT address,rtx * loc,machine_mode mode,rtx value)17910 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17911 		   machine_mode mode, rtx value)
17912 {
17913   gcc_assert (!arm_disable_literal_pool);
17914   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17915 
17916   fix->insn = insn;
17917   fix->address = address;
17918   fix->loc = loc;
17919   fix->mode = mode;
17920   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17921   fix->value = value;
17922   fix->forwards = get_attr_pool_range (insn);
17923   fix->backwards = get_attr_neg_pool_range (insn);
17924   fix->minipool = NULL;
17925 
17926   /* If an insn doesn't have a range defined for it, then it isn't
17927      expecting to be reworked by this code.  Better to stop now than
17928      to generate duff assembly code.  */
17929   gcc_assert (fix->forwards || fix->backwards);
17930 
17931   /* If an entry requires 8-byte alignment then assume all constant pools
17932      require 4 bytes of padding.  Trying to do this later on a per-pool
17933      basis is awkward because existing pool entries have to be modified.  */
17934   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17935     minipool_pad = 4;
17936 
17937   if (dump_file)
17938     {
17939       fprintf (dump_file,
17940 	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17941 	       GET_MODE_NAME (mode),
17942 	       INSN_UID (insn), (unsigned long) address,
17943 	       -1 * (long)fix->backwards, (long)fix->forwards);
17944       arm_print_value (dump_file, fix->value);
17945       fprintf (dump_file, "\n");
17946     }
17947 
17948   /* Add it to the chain of fixes.  */
17949   fix->next = NULL;
17950 
17951   if (minipool_fix_head != NULL)
17952     minipool_fix_tail->next = fix;
17953   else
17954     minipool_fix_head = fix;
17955 
17956   minipool_fix_tail = fix;
17957 }
17958 
17959 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17960    Returns the number of insns needed, or 99 if we always want to synthesize
17961    the value.  */
17962 int
arm_max_const_double_inline_cost()17963 arm_max_const_double_inline_cost ()
17964 {
17965   return ((optimize_size || arm_ld_sched) ? 3 : 4);
17966 }
17967 
17968 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17969    Returns the number of insns needed, or 99 if we don't know how to
17970    do it.  */
17971 int
arm_const_double_inline_cost(rtx val)17972 arm_const_double_inline_cost (rtx val)
17973 {
17974   rtx lowpart, highpart;
17975   machine_mode mode;
17976 
17977   mode = GET_MODE (val);
17978 
17979   if (mode == VOIDmode)
17980     mode = DImode;
17981 
17982   gcc_assert (GET_MODE_SIZE (mode) == 8);
17983 
17984   lowpart = gen_lowpart (SImode, val);
17985   highpart = gen_highpart_mode (SImode, mode, val);
17986 
17987   gcc_assert (CONST_INT_P (lowpart));
17988   gcc_assert (CONST_INT_P (highpart));
17989 
17990   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17991 			    NULL_RTX, NULL_RTX, 0, 0)
17992 	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17993 			      NULL_RTX, NULL_RTX, 0, 0));
17994 }
17995 
17996 /* Cost of loading a SImode constant.  */
17997 static inline int
arm_const_inline_cost(enum rtx_code code,rtx val)17998 arm_const_inline_cost (enum rtx_code code, rtx val)
17999 {
18000   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18001                            NULL_RTX, NULL_RTX, 1, 0);
18002 }
18003 
18004 /* Return true if it is worthwhile to split a 64-bit constant into two
18005    32-bit operations.  This is the case if optimizing for size, or
18006    if we have load delay slots, or if one 32-bit part can be done with
18007    a single data operation.  */
18008 bool
arm_const_double_by_parts(rtx val)18009 arm_const_double_by_parts (rtx val)
18010 {
18011   machine_mode mode = GET_MODE (val);
18012   rtx part;
18013 
18014   if (optimize_size || arm_ld_sched)
18015     return true;
18016 
18017   if (mode == VOIDmode)
18018     mode = DImode;
18019 
18020   part = gen_highpart_mode (SImode, mode, val);
18021 
18022   gcc_assert (CONST_INT_P (part));
18023 
18024   if (const_ok_for_arm (INTVAL (part))
18025       || const_ok_for_arm (~INTVAL (part)))
18026     return true;
18027 
18028   part = gen_lowpart (SImode, val);
18029 
18030   gcc_assert (CONST_INT_P (part));
18031 
18032   if (const_ok_for_arm (INTVAL (part))
18033       || const_ok_for_arm (~INTVAL (part)))
18034     return true;
18035 
18036   return false;
18037 }
18038 
18039 /* Return true if it is possible to inline both the high and low parts
18040    of a 64-bit constant into 32-bit data processing instructions.  */
18041 bool
arm_const_double_by_immediates(rtx val)18042 arm_const_double_by_immediates (rtx val)
18043 {
18044   machine_mode mode = GET_MODE (val);
18045   rtx part;
18046 
18047   if (mode == VOIDmode)
18048     mode = DImode;
18049 
18050   part = gen_highpart_mode (SImode, mode, val);
18051 
18052   gcc_assert (CONST_INT_P (part));
18053 
18054   if (!const_ok_for_arm (INTVAL (part)))
18055     return false;
18056 
18057   part = gen_lowpart (SImode, val);
18058 
18059   gcc_assert (CONST_INT_P (part));
18060 
18061   if (!const_ok_for_arm (INTVAL (part)))
18062     return false;
18063 
18064   return true;
18065 }
18066 
18067 /* Scan INSN and note any of its operands that need fixing.
18068    If DO_PUSHES is false we do not actually push any of the fixups
18069    needed.  */
18070 static void
note_invalid_constants(rtx_insn * insn,HOST_WIDE_INT address,int do_pushes)18071 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18072 {
18073   int opno;
18074 
18075   extract_constrain_insn (insn);
18076 
18077   if (recog_data.n_alternatives == 0)
18078     return;
18079 
18080   /* Fill in recog_op_alt with information about the constraints of
18081      this insn.  */
18082   preprocess_constraints (insn);
18083 
18084   const operand_alternative *op_alt = which_op_alt ();
18085   for (opno = 0; opno < recog_data.n_operands; opno++)
18086     {
18087       /* Things we need to fix can only occur in inputs.  */
18088       if (recog_data.operand_type[opno] != OP_IN)
18089 	continue;
18090 
18091       /* If this alternative is a memory reference, then any mention
18092 	 of constants in this alternative is really to fool reload
18093 	 into allowing us to accept one there.  We need to fix them up
18094 	 now so that we output the right code.  */
18095       if (op_alt[opno].memory_ok)
18096 	{
18097 	  rtx op = recog_data.operand[opno];
18098 
18099 	  if (CONSTANT_P (op))
18100 	    {
18101 	      if (do_pushes)
18102 		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18103 				   recog_data.operand_mode[opno], op);
18104 	    }
18105 	  else if (MEM_P (op)
18106 		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18107 		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18108 	    {
18109 	      if (do_pushes)
18110 		{
18111 		  rtx cop = avoid_constant_pool_reference (op);
18112 
18113 		  /* Casting the address of something to a mode narrower
18114 		     than a word can cause avoid_constant_pool_reference()
18115 		     to return the pool reference itself.  That's no good to
18116 		     us here.  Lets just hope that we can use the
18117 		     constant pool value directly.  */
18118 		  if (op == cop)
18119 		    cop = get_pool_constant (XEXP (op, 0));
18120 
18121 		  push_minipool_fix (insn, address,
18122 				     recog_data.operand_loc[opno],
18123 				     recog_data.operand_mode[opno], cop);
18124 		}
18125 
18126 	    }
18127 	}
18128     }
18129 
18130   return;
18131 }
18132 
18133 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18134    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18135    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18136    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18137    or four masks, depending on whether it is being computed for a
18138    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18139    respectively.  The tree for the type of the argument or a field within an
18140    argument is passed in ARG_TYPE, the current register this argument or field
18141    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18142    argument or field starts at is passed in STARTING_BIT and the last used bit
18143    is kept in LAST_USED_BIT which is also updated accordingly.  */
18144 
18145 static unsigned HOST_WIDE_INT
comp_not_to_clear_mask_str_un(tree arg_type,int * regno,uint32_t * padding_bits_to_clear,unsigned starting_bit,int * last_used_bit)18146 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18147 			       uint32_t * padding_bits_to_clear,
18148 			       unsigned starting_bit, int * last_used_bit)
18149 
18150 {
18151   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18152 
18153   if (TREE_CODE (arg_type) == RECORD_TYPE)
18154     {
18155       unsigned current_bit = starting_bit;
18156       tree field;
18157       long int offset, size;
18158 
18159 
18160       field = TYPE_FIELDS (arg_type);
18161       while (field)
18162 	{
18163 	  /* The offset within a structure is always an offset from
18164 	     the start of that structure.  Make sure we take that into the
18165 	     calculation of the register based offset that we use here.  */
18166 	  offset = starting_bit;
18167 	  offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18168 	  offset %= 32;
18169 
18170 	  /* This is the actual size of the field, for bitfields this is the
18171 	     bitfield width and not the container size.  */
18172 	  size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18173 
18174 	  if (*last_used_bit != offset)
18175 	    {
18176 	      if (offset < *last_used_bit)
18177 		{
18178 		  /* This field's offset is before the 'last_used_bit', that
18179 		     means this field goes on the next register.  So we need to
18180 		     pad the rest of the current register and increase the
18181 		     register number.  */
18182 		  uint32_t mask;
18183 		  mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18184 		  mask++;
18185 
18186 		  padding_bits_to_clear[*regno] |= mask;
18187 		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18188 		  (*regno)++;
18189 		}
18190 	      else
18191 		{
18192 		  /* Otherwise we pad the bits between the last field's end and
18193 		     the start of the new field.  */
18194 		  uint32_t mask;
18195 
18196 		  mask = ((uint32_t)-1) >> (32 - offset);
18197 		  mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18198 		  padding_bits_to_clear[*regno] |= mask;
18199 		}
18200 	      current_bit = offset;
18201 	    }
18202 
18203 	  /* Calculate further padding bits for inner structs/unions too.  */
18204 	  if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18205 	    {
18206 	      *last_used_bit = current_bit;
18207 	      not_to_clear_reg_mask
18208 		|= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18209 						  padding_bits_to_clear, offset,
18210 						  last_used_bit);
18211 	    }
18212 	  else
18213 	    {
18214 	      /* Update 'current_bit' with this field's size.  If the
18215 		 'current_bit' lies in a subsequent register, update 'regno' and
18216 		 reset 'current_bit' to point to the current bit in that new
18217 		 register.  */
18218 	      current_bit += size;
18219 	      while (current_bit >= 32)
18220 		{
18221 		  current_bit-=32;
18222 		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18223 		  (*regno)++;
18224 		}
18225 	      *last_used_bit = current_bit;
18226 	    }
18227 
18228 	  field = TREE_CHAIN (field);
18229 	}
18230       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18231     }
18232   else if (TREE_CODE (arg_type) == UNION_TYPE)
18233     {
18234       tree field, field_t;
18235       int i, regno_t, field_size;
18236       int max_reg = -1;
18237       int max_bit = -1;
18238       uint32_t mask;
18239       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18240 	= {-1, -1, -1, -1};
18241 
18242       /* To compute the padding bits in a union we only consider bits as
18243 	 padding bits if they are always either a padding bit or fall outside a
18244 	 fields size for all fields in the union.  */
18245       field = TYPE_FIELDS (arg_type);
18246       while (field)
18247 	{
18248 	  uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18249 	    = {0U, 0U, 0U, 0U};
18250 	  int last_used_bit_t = *last_used_bit;
18251 	  regno_t = *regno;
18252 	  field_t = TREE_TYPE (field);
18253 
18254 	  /* If the field's type is either a record or a union make sure to
18255 	     compute their padding bits too.  */
18256 	  if (RECORD_OR_UNION_TYPE_P (field_t))
18257 	    not_to_clear_reg_mask
18258 	      |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18259 						&padding_bits_to_clear_t[0],
18260 						starting_bit, &last_used_bit_t);
18261 	  else
18262 	    {
18263 	      field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18264 	      regno_t = (field_size / 32) + *regno;
18265 	      last_used_bit_t = (starting_bit + field_size) % 32;
18266 	    }
18267 
18268 	  for (i = *regno; i < regno_t; i++)
18269 	    {
18270 	      /* For all but the last register used by this field only keep the
18271 		 padding bits that were padding bits in this field.  */
18272 	      padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18273 	    }
18274 
18275 	    /* For the last register, keep all padding bits that were padding
18276 	       bits in this field and any padding bits that are still valid
18277 	       as padding bits but fall outside of this field's size.  */
18278 	    mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18279 	    padding_bits_to_clear_res[regno_t]
18280 	      &= padding_bits_to_clear_t[regno_t] | mask;
18281 
18282 	  /* Update the maximum size of the fields in terms of registers used
18283 	     ('max_reg') and the 'last_used_bit' in said register.  */
18284 	  if (max_reg < regno_t)
18285 	    {
18286 	      max_reg = regno_t;
18287 	      max_bit = last_used_bit_t;
18288 	    }
18289 	  else if (max_reg == regno_t && max_bit < last_used_bit_t)
18290 	    max_bit = last_used_bit_t;
18291 
18292 	  field = TREE_CHAIN (field);
18293 	}
18294 
18295       /* Update the current padding_bits_to_clear using the intersection of the
18296 	 padding bits of all the fields.  */
18297       for (i=*regno; i < max_reg; i++)
18298 	padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18299 
18300       /* Do not keep trailing padding bits, we do not know yet whether this
18301 	 is the end of the argument.  */
18302       mask = ((uint32_t) 1 << max_bit) - 1;
18303       padding_bits_to_clear[max_reg]
18304 	|= padding_bits_to_clear_res[max_reg] & mask;
18305 
18306       *regno = max_reg;
18307       *last_used_bit = max_bit;
18308     }
18309   else
18310     /* This function should only be used for structs and unions.  */
18311     gcc_unreachable ();
18312 
18313   return not_to_clear_reg_mask;
18314 }
18315 
18316 /* In the context of ARMv8-M Security Extensions, this function is used for both
18317    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18318    registers are used when returning or passing arguments, which is then
18319    returned as a mask.  It will also compute a mask to indicate padding/unused
18320    bits for each of these registers, and passes this through the
18321    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18322    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18323    the starting register used to pass this argument or return value is passed
18324    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18325    for struct and union types.  */
18326 
18327 static unsigned HOST_WIDE_INT
compute_not_to_clear_mask(tree arg_type,rtx arg_rtx,int regno,uint32_t * padding_bits_to_clear)18328 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18329 			     uint32_t * padding_bits_to_clear)
18330 
18331 {
18332   int last_used_bit = 0;
18333   unsigned HOST_WIDE_INT not_to_clear_mask;
18334 
18335   if (RECORD_OR_UNION_TYPE_P (arg_type))
18336     {
18337       not_to_clear_mask
18338 	= comp_not_to_clear_mask_str_un (arg_type, &regno,
18339 					 padding_bits_to_clear, 0,
18340 					 &last_used_bit);
18341 
18342 
18343       /* If the 'last_used_bit' is not zero, that means we are still using a
18344 	 part of the last 'regno'.  In such cases we must clear the trailing
18345 	 bits.  Otherwise we are not using regno and we should mark it as to
18346 	 clear.  */
18347       if (last_used_bit != 0)
18348 	padding_bits_to_clear[regno]
18349 	  |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18350       else
18351 	not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18352     }
18353   else
18354     {
18355       not_to_clear_mask = 0;
18356       /* We are not dealing with structs nor unions.  So these arguments may be
18357 	 passed in floating point registers too.  In some cases a BLKmode is
18358 	 used when returning or passing arguments in multiple VFP registers.  */
18359       if (GET_MODE (arg_rtx) == BLKmode)
18360 	{
18361 	  int i, arg_regs;
18362 	  rtx reg;
18363 
18364 	  /* This should really only occur when dealing with the hard-float
18365 	     ABI.  */
18366 	  gcc_assert (TARGET_HARD_FLOAT_ABI);
18367 
18368 	  for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18369 	    {
18370 	      reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18371 	      gcc_assert (REG_P (reg));
18372 
18373 	      not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18374 
18375 	      /* If we are dealing with DF mode, make sure we don't
18376 		 clear either of the registers it addresses.  */
18377 	      arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18378 	      if (arg_regs > 1)
18379 		{
18380 		  unsigned HOST_WIDE_INT mask;
18381 		  mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18382 		  mask -= HOST_WIDE_INT_1U << REGNO (reg);
18383 		  not_to_clear_mask |= mask;
18384 		}
18385 	    }
18386 	}
18387       else
18388 	{
18389 	  /* Otherwise we can rely on the MODE to determine how many registers
18390 	     are being used by this argument.  */
18391 	  int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18392 	  not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18393 	  if (arg_regs > 1)
18394 	    {
18395 	      unsigned HOST_WIDE_INT
18396 	      mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18397 	      mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18398 	      not_to_clear_mask |= mask;
18399 	    }
18400 	}
18401     }
18402 
18403   return not_to_clear_mask;
18404 }
18405 
18406 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18407    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18408    are to be fully cleared, using the value in register CLEARING_REG if more
18409    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18410    the bits that needs to be cleared in caller-saved core registers, with
18411    SCRATCH_REG used as a scratch register for that clearing.
18412 
18413    NOTE: one of three following assertions must hold:
18414    - SCRATCH_REG is a low register
18415    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18416      in TO_CLEAR_BITMAP)
18417    - CLEARING_REG is a low register.  */
18418 
18419 static void
cmse_clear_registers(sbitmap to_clear_bitmap,uint32_t * padding_bits_to_clear,int padding_bits_len,rtx scratch_reg,rtx clearing_reg)18420 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18421 		      int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18422 {
18423   bool saved_clearing = false;
18424   rtx saved_clearing_reg = NULL_RTX;
18425   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18426 
18427   gcc_assert (arm_arch_cmse);
18428 
18429   if (!bitmap_empty_p (to_clear_bitmap))
18430     {
18431       minregno = bitmap_first_set_bit (to_clear_bitmap);
18432       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18433     }
18434   clearing_regno = REGNO (clearing_reg);
18435 
18436   /* Clear padding bits.  */
18437   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18438   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18439     {
18440       uint64_t mask;
18441       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18442 
18443       if (padding_bits_to_clear[i] == 0)
18444 	continue;
18445 
18446       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18447 	 CLEARING_REG as scratch.  */
18448       if (TARGET_THUMB1
18449 	  && REGNO (scratch_reg) > LAST_LO_REGNUM)
18450 	{
18451 	  /* clearing_reg is not to be cleared, copy its value into scratch_reg
18452 	     such that we can use clearing_reg to clear the unused bits in the
18453 	     arguments.  */
18454 	  if ((clearing_regno > maxregno
18455 	       || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18456 	      && !saved_clearing)
18457 	    {
18458 	      gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18459 	      emit_move_insn (scratch_reg, clearing_reg);
18460 	      saved_clearing = true;
18461 	      saved_clearing_reg = scratch_reg;
18462 	    }
18463 	  scratch_reg = clearing_reg;
18464 	}
18465 
18466       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18467       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18468       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18469 
18470       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18471       mask = (~padding_bits_to_clear[i]) >> 16;
18472       rtx16 = gen_int_mode (16, SImode);
18473       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18474       if (mask)
18475 	emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18476 
18477       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18478     }
18479   if (saved_clearing)
18480     emit_move_insn (clearing_reg, saved_clearing_reg);
18481 
18482 
18483   /* Clear full registers.  */
18484 
18485   if (TARGET_HAVE_FPCXT_CMSE)
18486     {
18487       rtvec vunspec_vec;
18488       int i, j, k, nb_regs;
18489       rtx use_seq, par, reg, set, vunspec;
18490       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18491       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18492       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18493 
18494       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18495 	{
18496 	  /* Find next register to clear and exit if none.  */
18497 	  for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18498 	  if (i > maxregno)
18499 	    break;
18500 
18501 	  /* Compute number of consecutive registers to clear.  */
18502 	  for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18503 	       j++);
18504 	  nb_regs = j - i;
18505 
18506 	  /* Create VSCCLRM RTX pattern.  */
18507 	  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18508 	  vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18509 	  vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18510 					     VUNSPEC_VSCCLRM_VPR);
18511 	  XVECEXP (par, 0, 0) = vunspec;
18512 
18513 	  /* Insert VFP register clearing RTX in the pattern.  */
18514 	  start_sequence ();
18515 	  for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18516 	    {
18517 	      if (!bitmap_bit_p (to_clear_bitmap, j))
18518 		continue;
18519 
18520 	      reg = gen_rtx_REG (SFmode, j);
18521 	      set = gen_rtx_SET (reg, const0_rtx);
18522 	      XVECEXP (par, 0, k++) = set;
18523 	      emit_use (reg);
18524 	    }
18525 	  use_seq = get_insns ();
18526 	  end_sequence ();
18527 
18528 	  emit_insn_after (use_seq, emit_insn (par));
18529 	}
18530 
18531       /* Get set of core registers to clear.  */
18532       bitmap_clear (core_regs_bitmap);
18533       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18534 			IP_REGNUM - R0_REGNUM + 1);
18535       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18536 		  core_regs_bitmap);
18537       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18538 
18539       if (bitmap_empty_p (to_clear_core_bitmap))
18540 	return;
18541 
18542       /* Create clrm RTX pattern.  */
18543       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18544       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18545 
18546       /* Insert core register clearing RTX in the pattern.  */
18547       start_sequence ();
18548       for (j = 0, i = minregno; j < nb_regs; i++)
18549 	{
18550 	  if (!bitmap_bit_p (to_clear_core_bitmap, i))
18551 	    continue;
18552 
18553 	  reg = gen_rtx_REG (SImode, i);
18554 	  set = gen_rtx_SET (reg, const0_rtx);
18555 	  XVECEXP (par, 0, j++) = set;
18556 	  emit_use (reg);
18557 	}
18558 
18559       /* Insert APSR register clearing RTX in the pattern
18560        * along with clobbering CC.  */
18561       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18562       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18563 					 VUNSPEC_CLRM_APSR);
18564 
18565       XVECEXP (par, 0, j++) = vunspec;
18566 
18567       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18568       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18569       XVECEXP (par, 0, j) = clobber;
18570 
18571       use_seq = get_insns ();
18572       end_sequence ();
18573 
18574       emit_insn_after (use_seq, emit_insn (par));
18575     }
18576   else
18577     {
18578       /* If not marked for clearing, clearing_reg already does not contain
18579 	 any secret.  */
18580       if (clearing_regno <= maxregno
18581 	  && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18582 	{
18583 	  emit_move_insn (clearing_reg, const0_rtx);
18584 	  emit_use (clearing_reg);
18585 	  bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18586 	}
18587 
18588       for (regno = minregno; regno <= maxregno; regno++)
18589 	{
18590 	  if (!bitmap_bit_p (to_clear_bitmap, regno))
18591 	    continue;
18592 
18593 	  if (IS_VFP_REGNUM (regno))
18594 	    {
18595 	      /* If regno is an even vfp register and its successor is also to
18596 		 be cleared, use vmov.  */
18597 	      if (TARGET_VFP_DOUBLE
18598 		  && VFP_REGNO_OK_FOR_DOUBLE (regno)
18599 		  && bitmap_bit_p (to_clear_bitmap, regno + 1))
18600 		{
18601 		  emit_move_insn (gen_rtx_REG (DFmode, regno),
18602 				  CONST1_RTX (DFmode));
18603 		  emit_use (gen_rtx_REG (DFmode, regno));
18604 		  regno++;
18605 		}
18606 	      else
18607 		{
18608 		  emit_move_insn (gen_rtx_REG (SFmode, regno),
18609 				  CONST1_RTX (SFmode));
18610 		  emit_use (gen_rtx_REG (SFmode, regno));
18611 		}
18612 	    }
18613 	  else
18614 	    {
18615 	      emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18616 	      emit_use (gen_rtx_REG (SImode, regno));
18617 	    }
18618 	}
18619     }
18620 }
18621 
18622 /* Clear core and caller-saved VFP registers not used to pass arguments before
18623    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18624    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18625    libgcc/config/arm/cmse_nonsecure_call.S.  */
18626 
18627 static void
cmse_nonsecure_call_inline_register_clear(void)18628 cmse_nonsecure_call_inline_register_clear (void)
18629 {
18630   basic_block bb;
18631 
18632   FOR_EACH_BB_FN (bb, cfun)
18633     {
18634       rtx_insn *insn;
18635 
18636       FOR_BB_INSNS (bb, insn)
18637 	{
18638 	  bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18639 	  /* frame = VFP regs + FPSCR + VPR.  */
18640 	  unsigned lazy_store_stack_frame_size
18641 	    = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18642 	  unsigned long callee_saved_mask
18643 	    = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18644 	    & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18645 	  unsigned address_regnum, regno;
18646 	  unsigned max_int_regno
18647 	    = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18648 	  unsigned max_fp_regno
18649 	    = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18650 	  unsigned maxregno
18651 	    = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18652 	  auto_sbitmap to_clear_bitmap (maxregno + 1);
18653 	  rtx_insn *seq;
18654 	  rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18655 	  rtx address;
18656 	  CUMULATIVE_ARGS args_so_far_v;
18657 	  cumulative_args_t args_so_far;
18658 	  tree arg_type, fntype;
18659 	  bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18660 	  function_args_iterator args_iter;
18661 	  uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18662 
18663 	  if (!NONDEBUG_INSN_P (insn))
18664 	    continue;
18665 
18666 	  if (!CALL_P (insn))
18667 	    continue;
18668 
18669 	  pat = PATTERN (insn);
18670 	  gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18671 	  call = XVECEXP (pat, 0, 0);
18672 
18673 	  /* Get the real call RTX if the insn sets a value, ie. returns.  */
18674 	  if (GET_CODE (call) == SET)
18675 	      call = SET_SRC (call);
18676 
18677 	  /* Check if it is a cmse_nonsecure_call.  */
18678 	  unspec = XEXP (call, 0);
18679 	  if (GET_CODE (unspec) != UNSPEC
18680 	      || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18681 	    continue;
18682 
18683 	  /* Mark registers that needs to be cleared.  Those that holds a
18684 	     parameter are removed from the set further below.  */
18685 	  bitmap_clear (to_clear_bitmap);
18686 	  bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18687 			    max_int_regno - R0_REGNUM + 1);
18688 
18689 	  /* Only look at the caller-saved floating point registers in case of
18690 	     -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
18691 	     lazy store and loads which clear both caller- and callee-saved
18692 	     registers.  */
18693 	  if (!lazy_fpclear)
18694 	    {
18695 	      auto_sbitmap float_bitmap (maxregno + 1);
18696 
18697 	      bitmap_clear (float_bitmap);
18698 	      bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18699 				max_fp_regno - FIRST_VFP_REGNUM + 1);
18700 	      bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18701 	    }
18702 
18703 	  /* Make sure the register used to hold the function address is not
18704 	     cleared.  */
18705 	  address = RTVEC_ELT (XVEC (unspec, 0), 0);
18706 	  gcc_assert (MEM_P (address));
18707 	  gcc_assert (REG_P (XEXP (address, 0)));
18708 	  address_regnum = REGNO (XEXP (address, 0));
18709 	  if (address_regnum <= max_int_regno)
18710 	    bitmap_clear_bit (to_clear_bitmap, address_regnum);
18711 
18712 	  /* Set basic block of call insn so that df rescan is performed on
18713 	     insns inserted here.  */
18714 	  set_block_for_insn (insn, bb);
18715 	  df_set_flags (DF_DEFER_INSN_RESCAN);
18716 	  start_sequence ();
18717 
18718 	  /* Make sure the scheduler doesn't schedule other insns beyond
18719 	     here.  */
18720 	  emit_insn (gen_blockage ());
18721 
18722 	  /* Walk through all arguments and clear registers appropriately.
18723 	  */
18724 	  fntype = TREE_TYPE (MEM_EXPR (address));
18725 	  arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18726 				    NULL_TREE);
18727 	  args_so_far = pack_cumulative_args (&args_so_far_v);
18728 	  FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18729 	    {
18730 	      rtx arg_rtx;
18731 	      uint64_t to_clear_args_mask;
18732 
18733 	      if (VOID_TYPE_P (arg_type))
18734 		continue;
18735 
18736 	      function_arg_info arg (arg_type, /*named=*/true);
18737 	      if (!first_param)
18738 		/* ??? We should advance after processing the argument and pass
18739 		   the argument we're advancing past.  */
18740 		arm_function_arg_advance (args_so_far, arg);
18741 
18742 	      arg_rtx = arm_function_arg (args_so_far, arg);
18743 	      gcc_assert (REG_P (arg_rtx));
18744 	      to_clear_args_mask
18745 		= compute_not_to_clear_mask (arg_type, arg_rtx,
18746 					     REGNO (arg_rtx),
18747 					     &padding_bits_to_clear[0]);
18748 	      if (to_clear_args_mask)
18749 		{
18750 		  for (regno = R0_REGNUM; regno <= maxregno; regno++)
18751 		    {
18752 		      if (to_clear_args_mask & (1ULL << regno))
18753 			bitmap_clear_bit (to_clear_bitmap, regno);
18754 		    }
18755 		}
18756 
18757 	      first_param = false;
18758 	    }
18759 
18760 	  /* We use right shift and left shift to clear the LSB of the address
18761 	     we jump to instead of using bic, to avoid having to use an extra
18762 	     register on Thumb-1.  */
18763 	  clearing_reg = XEXP (address, 0);
18764 	  shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18765 	  emit_insn (gen_rtx_SET (clearing_reg, shift));
18766 	  shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18767 	  emit_insn (gen_rtx_SET (clearing_reg, shift));
18768 
18769 	  if (clear_callee_saved)
18770 	    {
18771 	      rtx push_insn =
18772 		emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
18773 	      /* Disable frame debug info in push because it needs to be
18774 		 disabled for pop (see below).  */
18775 	      RTX_FRAME_RELATED_P (push_insn) = 0;
18776 
18777 	      /* Lazy store multiple.  */
18778 	      if (lazy_fpclear)
18779 		{
18780 		  rtx imm;
18781 		  rtx_insn *add_insn;
18782 
18783 		  imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
18784 		  add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
18785 						    stack_pointer_rtx, imm));
18786 		  /* If we have the frame pointer, then it will be the
18787 		     CFA reg.  Otherwise, the stack pointer is the CFA
18788 		     reg, so we need to emit a CFA adjust.  */
18789 		  if (!frame_pointer_needed)
18790 		    arm_add_cfa_adjust_cfa_note (add_insn,
18791 						 - lazy_store_stack_frame_size,
18792 						 stack_pointer_rtx,
18793 						 stack_pointer_rtx);
18794 		  emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
18795 		}
18796 	      /* Save VFP callee-saved registers.  */
18797 	      else
18798 		{
18799 		  vfp_emit_fstmd (D7_VFP_REGNUM + 1,
18800 				  (max_fp_regno - D7_VFP_REGNUM) / 2);
18801 		  /* Disable frame debug info in push because it needs to be
18802 		     disabled for vpop (see below).  */
18803 		  RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18804 		}
18805 	    }
18806 
18807 	  /* Clear caller-saved registers that leak before doing a non-secure
18808 	     call.  */
18809 	  ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
18810 	  cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
18811 				NUM_ARG_REGS, ip_reg, clearing_reg);
18812 
18813 	  seq = get_insns ();
18814 	  end_sequence ();
18815 	  emit_insn_before (seq, insn);
18816 
18817 	  if (TARGET_HAVE_FPCXT_CMSE)
18818 	    {
18819 	      rtx_insn *last, *pop_insn, *after = insn;
18820 
18821 	      start_sequence ();
18822 
18823 	      /* Lazy load multiple done as part of libcall in Armv8-M.  */
18824 	      if (lazy_fpclear)
18825 		{
18826 		  rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
18827 		  emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
18828 		  rtx_insn *add_insn =
18829 		    emit_insn (gen_addsi3 (stack_pointer_rtx,
18830 					   stack_pointer_rtx, imm));
18831 		  if (!frame_pointer_needed)
18832 		    arm_add_cfa_adjust_cfa_note (add_insn,
18833 						 lazy_store_stack_frame_size,
18834 						 stack_pointer_rtx,
18835 						 stack_pointer_rtx);
18836 		}
18837 	      /* Restore VFP callee-saved registers.  */
18838 	      else
18839 		{
18840 		  int nb_callee_saved_vfp_regs =
18841 		    (max_fp_regno - D7_VFP_REGNUM) / 2;
18842 		  arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
18843 					      nb_callee_saved_vfp_regs,
18844 					      stack_pointer_rtx);
18845 		  /* Disable frame debug info in vpop because the SP adjustment
18846 		     is made using a CFA adjustment note while CFA used is
18847 		     sometimes R7.  This then causes an assert failure in the
18848 		     CFI note creation code.  */
18849 		  RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18850 		}
18851 
18852 	      arm_emit_multi_reg_pop (callee_saved_mask);
18853 	      pop_insn = get_last_insn ();
18854 
18855 	      /* Disable frame debug info in pop because they reset the state
18856 		 of popped registers to what it was at the beginning of the
18857 		 function, before the prologue.  This leads to incorrect state
18858 		 when doing the pop after the nonsecure call for registers that
18859 		 are pushed both in prologue and before the nonsecure call.
18860 
18861 		 It also occasionally triggers an assert failure in CFI note
18862 		 creation code when there are two codepaths to the epilogue,
18863 		 one of which does not go through the nonsecure call.
18864 		 Obviously this mean that debugging between the push and pop is
18865 		 not reliable.  */
18866 	      RTX_FRAME_RELATED_P (pop_insn) = 0;
18867 
18868 	      seq = get_insns ();
18869 	      last = get_last_insn ();
18870 	      end_sequence ();
18871 
18872 	      emit_insn_after (seq, after);
18873 
18874 	      /* Skip pop we have just inserted after nonsecure call, we know
18875 		 it does not contain a nonsecure call.  */
18876 	      insn = last;
18877 	    }
18878 	}
18879     }
18880 }
18881 
18882 /* Rewrite move insn into subtract of 0 if the condition codes will
18883    be useful in next conditional jump insn.  */
18884 
18885 static void
thumb1_reorg(void)18886 thumb1_reorg (void)
18887 {
18888   basic_block bb;
18889 
18890   FOR_EACH_BB_FN (bb, cfun)
18891     {
18892       rtx dest, src;
18893       rtx cmp, op0, op1, set = NULL;
18894       rtx_insn *prev, *insn = BB_END (bb);
18895       bool insn_clobbered = false;
18896 
18897       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
18898 	insn = PREV_INSN (insn);
18899 
18900       /* Find the last cbranchsi4_insn in basic block BB.  */
18901       if (insn == BB_HEAD (bb)
18902 	  || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
18903 	continue;
18904 
18905       /* Get the register with which we are comparing.  */
18906       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
18907       op0 = XEXP (cmp, 0);
18908       op1 = XEXP (cmp, 1);
18909 
18910       /* Check that comparison is against ZERO.  */
18911       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
18912 	continue;
18913 
18914       /* Find the first flag setting insn before INSN in basic block BB.  */
18915       gcc_assert (insn != BB_HEAD (bb));
18916       for (prev = PREV_INSN (insn);
18917 	   (!insn_clobbered
18918 	    && prev != BB_HEAD (bb)
18919 	    && (NOTE_P (prev)
18920 		|| DEBUG_INSN_P (prev)
18921 		|| ((set = single_set (prev)) != NULL
18922 		    && get_attr_conds (prev) == CONDS_NOCOND)));
18923 	   prev = PREV_INSN (prev))
18924 	{
18925 	  if (reg_set_p (op0, prev))
18926 	    insn_clobbered = true;
18927 	}
18928 
18929       /* Skip if op0 is clobbered by insn other than prev. */
18930       if (insn_clobbered)
18931 	continue;
18932 
18933       if (!set)
18934 	continue;
18935 
18936       dest = SET_DEST (set);
18937       src = SET_SRC (set);
18938       if (!low_register_operand (dest, SImode)
18939 	  || !low_register_operand (src, SImode))
18940 	continue;
18941 
18942       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
18943 	 in INSN.  Both src and dest of the move insn are checked.  */
18944       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
18945 	{
18946 	  dest = copy_rtx (dest);
18947 	  src = copy_rtx (src);
18948 	  src = gen_rtx_MINUS (SImode, src, const0_rtx);
18949 	  PATTERN (prev) = gen_rtx_SET (dest, src);
18950 	  INSN_CODE (prev) = -1;
18951 	  /* Set test register in INSN to dest.  */
18952 	  XEXP (cmp, 0) = copy_rtx (dest);
18953 	  INSN_CODE (insn) = -1;
18954 	}
18955     }
18956 }
18957 
18958 /* Convert instructions to their cc-clobbering variant if possible, since
18959    that allows us to use smaller encodings.  */
18960 
18961 static void
thumb2_reorg(void)18962 thumb2_reorg (void)
18963 {
18964   basic_block bb;
18965   regset_head live;
18966 
18967   INIT_REG_SET (&live);
18968 
18969   /* We are freeing block_for_insn in the toplev to keep compatibility
18970      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
18971   compute_bb_for_insn ();
18972   df_analyze ();
18973 
18974   enum Convert_Action {SKIP, CONV, SWAP_CONV};
18975 
18976   FOR_EACH_BB_FN (bb, cfun)
18977     {
18978       if ((current_tune->disparage_flag_setting_t16_encodings
18979 	   == tune_params::DISPARAGE_FLAGS_ALL)
18980 	  && optimize_bb_for_speed_p (bb))
18981 	continue;
18982 
18983       rtx_insn *insn;
18984       Convert_Action action = SKIP;
18985       Convert_Action action_for_partial_flag_setting
18986 	= ((current_tune->disparage_flag_setting_t16_encodings
18987 	    != tune_params::DISPARAGE_FLAGS_NEITHER)
18988 	   && optimize_bb_for_speed_p (bb))
18989 	  ? SKIP : CONV;
18990 
18991       COPY_REG_SET (&live, DF_LR_OUT (bb));
18992       df_simulate_initialize_backwards (bb, &live);
18993       FOR_BB_INSNS_REVERSE (bb, insn)
18994 	{
18995 	  if (NONJUMP_INSN_P (insn)
18996 	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
18997 	      && GET_CODE (PATTERN (insn)) == SET)
18998 	    {
18999 	      action = SKIP;
19000 	      rtx pat = PATTERN (insn);
19001 	      rtx dst = XEXP (pat, 0);
19002 	      rtx src = XEXP (pat, 1);
19003 	      rtx op0 = NULL_RTX, op1 = NULL_RTX;
19004 
19005 	      if (UNARY_P (src) || BINARY_P (src))
19006 		  op0 = XEXP (src, 0);
19007 
19008 	      if (BINARY_P (src))
19009 		  op1 = XEXP (src, 1);
19010 
19011 	      if (low_register_operand (dst, SImode))
19012 		{
19013 		  switch (GET_CODE (src))
19014 		    {
19015 		    case PLUS:
19016 		      /* Adding two registers and storing the result
19017 			 in the first source is already a 16-bit
19018 			 operation.  */
19019 		      if (rtx_equal_p (dst, op0)
19020 			  && register_operand (op1, SImode))
19021 			break;
19022 
19023 		      if (low_register_operand (op0, SImode))
19024 			{
19025 			  /* ADDS <Rd>,<Rn>,<Rm>  */
19026 			  if (low_register_operand (op1, SImode))
19027 			    action = CONV;
19028 			  /* ADDS <Rdn>,#<imm8>  */
19029 			  /* SUBS <Rdn>,#<imm8>  */
19030 			  else if (rtx_equal_p (dst, op0)
19031 				   && CONST_INT_P (op1)
19032 				   && IN_RANGE (INTVAL (op1), -255, 255))
19033 			    action = CONV;
19034 			  /* ADDS <Rd>,<Rn>,#<imm3>  */
19035 			  /* SUBS <Rd>,<Rn>,#<imm3>  */
19036 			  else if (CONST_INT_P (op1)
19037 				   && IN_RANGE (INTVAL (op1), -7, 7))
19038 			    action = CONV;
19039 			}
19040 		      /* ADCS <Rd>, <Rn>  */
19041 		      else if (GET_CODE (XEXP (src, 0)) == PLUS
19042 			      && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19043 			      && low_register_operand (XEXP (XEXP (src, 0), 1),
19044 						       SImode)
19045 			      && COMPARISON_P (op1)
19046 			      && cc_register (XEXP (op1, 0), VOIDmode)
19047 			      && maybe_get_arm_condition_code (op1) == ARM_CS
19048 			      && XEXP (op1, 1) == const0_rtx)
19049 		        action = CONV;
19050 		      break;
19051 
19052 		    case MINUS:
19053 		      /* RSBS <Rd>,<Rn>,#0
19054 			 Not handled here: see NEG below.  */
19055 		      /* SUBS <Rd>,<Rn>,#<imm3>
19056 			 SUBS <Rdn>,#<imm8>
19057 			 Not handled here: see PLUS above.  */
19058 		      /* SUBS <Rd>,<Rn>,<Rm>  */
19059 		      if (low_register_operand (op0, SImode)
19060 			  && low_register_operand (op1, SImode))
19061 			    action = CONV;
19062 		      break;
19063 
19064 		    case MULT:
19065 		      /* MULS <Rdm>,<Rn>,<Rdm>
19066 			 As an exception to the rule, this is only used
19067 			 when optimizing for size since MULS is slow on all
19068 			 known implementations.  We do not even want to use
19069 			 MULS in cold code, if optimizing for speed, so we
19070 			 test the global flag here.  */
19071 		      if (!optimize_size)
19072 			break;
19073 		      /* Fall through.  */
19074 		    case AND:
19075 		    case IOR:
19076 		    case XOR:
19077 		      /* ANDS <Rdn>,<Rm>  */
19078 		      if (rtx_equal_p (dst, op0)
19079 			  && low_register_operand (op1, SImode))
19080 			action = action_for_partial_flag_setting;
19081 		      else if (rtx_equal_p (dst, op1)
19082 			       && low_register_operand (op0, SImode))
19083 			action = action_for_partial_flag_setting == SKIP
19084 				 ? SKIP : SWAP_CONV;
19085 		      break;
19086 
19087 		    case ASHIFTRT:
19088 		    case ASHIFT:
19089 		    case LSHIFTRT:
19090 		      /* ASRS <Rdn>,<Rm> */
19091 		      /* LSRS <Rdn>,<Rm> */
19092 		      /* LSLS <Rdn>,<Rm> */
19093 		      if (rtx_equal_p (dst, op0)
19094 			  && low_register_operand (op1, SImode))
19095 			action = action_for_partial_flag_setting;
19096 		      /* ASRS <Rd>,<Rm>,#<imm5> */
19097 		      /* LSRS <Rd>,<Rm>,#<imm5> */
19098 		      /* LSLS <Rd>,<Rm>,#<imm5> */
19099 		      else if (low_register_operand (op0, SImode)
19100 			       && CONST_INT_P (op1)
19101 			       && IN_RANGE (INTVAL (op1), 0, 31))
19102 			action = action_for_partial_flag_setting;
19103 		      break;
19104 
19105 		    case ROTATERT:
19106 		      /* RORS <Rdn>,<Rm>  */
19107 		      if (rtx_equal_p (dst, op0)
19108 			  && low_register_operand (op1, SImode))
19109 			action = action_for_partial_flag_setting;
19110 		      break;
19111 
19112 		    case NOT:
19113 		      /* MVNS <Rd>,<Rm>  */
19114 		      if (low_register_operand (op0, SImode))
19115 			action = action_for_partial_flag_setting;
19116 		      break;
19117 
19118 		    case NEG:
19119 		      /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19120 		      if (low_register_operand (op0, SImode))
19121 			action = CONV;
19122 		      break;
19123 
19124 		    case CONST_INT:
19125 		      /* MOVS <Rd>,#<imm8>  */
19126 		      if (CONST_INT_P (src)
19127 			  && IN_RANGE (INTVAL (src), 0, 255))
19128 			action = action_for_partial_flag_setting;
19129 		      break;
19130 
19131 		    case REG:
19132 		      /* MOVS and MOV<c> with registers have different
19133 			 encodings, so are not relevant here.  */
19134 		      break;
19135 
19136 		    default:
19137 		      break;
19138 		    }
19139 		}
19140 
19141 	      if (action != SKIP)
19142 		{
19143 		  rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19144 		  rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19145 		  rtvec vec;
19146 
19147 		  if (action == SWAP_CONV)
19148 		    {
19149 		      src = copy_rtx (src);
19150 		      XEXP (src, 0) = op1;
19151 		      XEXP (src, 1) = op0;
19152 		      pat = gen_rtx_SET (dst, src);
19153 		      vec = gen_rtvec (2, pat, clobber);
19154 		    }
19155 		  else /* action == CONV */
19156 		    vec = gen_rtvec (2, pat, clobber);
19157 
19158 		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19159 		  INSN_CODE (insn) = -1;
19160 		}
19161 	    }
19162 
19163 	  if (NONDEBUG_INSN_P (insn))
19164 	    df_simulate_one_insn_backwards (bb, insn, &live);
19165 	}
19166     }
19167 
19168   CLEAR_REG_SET (&live);
19169 }
19170 
19171 /* Gcc puts the pool in the wrong place for ARM, since we can only
19172    load addresses a limited distance around the pc.  We do some
19173    special munging to move the constant pool values to the correct
19174    point in the code.  */
19175 static void
arm_reorg(void)19176 arm_reorg (void)
19177 {
19178   rtx_insn *insn;
19179   HOST_WIDE_INT address = 0;
19180   Mfix * fix;
19181 
19182   if (use_cmse)
19183     cmse_nonsecure_call_inline_register_clear ();
19184 
19185   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19186   if (cfun->is_thunk)
19187     ;
19188   else if (TARGET_THUMB1)
19189     thumb1_reorg ();
19190   else if (TARGET_THUMB2)
19191     thumb2_reorg ();
19192 
19193   /* Ensure all insns that must be split have been split at this point.
19194      Otherwise, the pool placement code below may compute incorrect
19195      insn lengths.  Note that when optimizing, all insns have already
19196      been split at this point.  */
19197   if (!optimize)
19198     split_all_insns_noflow ();
19199 
19200   /* Make sure we do not attempt to create a literal pool even though it should
19201      no longer be necessary to create any.  */
19202   if (arm_disable_literal_pool)
19203     return ;
19204 
19205   minipool_fix_head = minipool_fix_tail = NULL;
19206 
19207   /* The first insn must always be a note, or the code below won't
19208      scan it properly.  */
19209   insn = get_insns ();
19210   gcc_assert (NOTE_P (insn));
19211   minipool_pad = 0;
19212 
19213   /* Scan all the insns and record the operands that will need fixing.  */
19214   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19215     {
19216       if (BARRIER_P (insn))
19217 	push_minipool_barrier (insn, address);
19218       else if (INSN_P (insn))
19219 	{
19220 	  rtx_jump_table_data *table;
19221 
19222 	  note_invalid_constants (insn, address, true);
19223 	  address += get_attr_length (insn);
19224 
19225 	  /* If the insn is a vector jump, add the size of the table
19226 	     and skip the table.  */
19227 	  if (tablejump_p (insn, NULL, &table))
19228 	    {
19229 	      address += get_jump_table_size (table);
19230 	      insn = table;
19231 	    }
19232 	}
19233       else if (LABEL_P (insn))
19234 	/* Add the worst-case padding due to alignment.  We don't add
19235 	   the _current_ padding because the minipool insertions
19236 	   themselves might change it.  */
19237 	address += get_label_padding (insn);
19238     }
19239 
19240   fix = minipool_fix_head;
19241 
19242   /* Now scan the fixups and perform the required changes.  */
19243   while (fix)
19244     {
19245       Mfix * ftmp;
19246       Mfix * fdel;
19247       Mfix *  last_added_fix;
19248       Mfix * last_barrier = NULL;
19249       Mfix * this_fix;
19250 
19251       /* Skip any further barriers before the next fix.  */
19252       while (fix && BARRIER_P (fix->insn))
19253 	fix = fix->next;
19254 
19255       /* No more fixes.  */
19256       if (fix == NULL)
19257 	break;
19258 
19259       last_added_fix = NULL;
19260 
19261       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19262 	{
19263 	  if (BARRIER_P (ftmp->insn))
19264 	    {
19265 	      if (ftmp->address >= minipool_vector_head->max_address)
19266 		break;
19267 
19268 	      last_barrier = ftmp;
19269 	    }
19270 	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19271 	    break;
19272 
19273 	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19274 	}
19275 
19276       /* If we found a barrier, drop back to that; any fixes that we
19277 	 could have reached but come after the barrier will now go in
19278 	 the next mini-pool.  */
19279       if (last_barrier != NULL)
19280 	{
19281 	  /* Reduce the refcount for those fixes that won't go into this
19282 	     pool after all.  */
19283 	  for (fdel = last_barrier->next;
19284 	       fdel && fdel != ftmp;
19285 	       fdel = fdel->next)
19286 	    {
19287 	      fdel->minipool->refcount--;
19288 	      fdel->minipool = NULL;
19289 	    }
19290 
19291 	  ftmp = last_barrier;
19292 	}
19293       else
19294         {
19295 	  /* ftmp is first fix that we can't fit into this pool and
19296 	     there no natural barriers that we could use.  Insert a
19297 	     new barrier in the code somewhere between the previous
19298 	     fix and this one, and arrange to jump around it.  */
19299 	  HOST_WIDE_INT max_address;
19300 
19301 	  /* The last item on the list of fixes must be a barrier, so
19302 	     we can never run off the end of the list of fixes without
19303 	     last_barrier being set.  */
19304 	  gcc_assert (ftmp);
19305 
19306 	  max_address = minipool_vector_head->max_address;
19307 	  /* Check that there isn't another fix that is in range that
19308 	     we couldn't fit into this pool because the pool was
19309 	     already too large: we need to put the pool before such an
19310 	     instruction.  The pool itself may come just after the
19311 	     fix because create_fix_barrier also allows space for a
19312 	     jump instruction.  */
19313 	  if (ftmp->address < max_address)
19314 	    max_address = ftmp->address + 1;
19315 
19316 	  last_barrier = create_fix_barrier (last_added_fix, max_address);
19317 	}
19318 
19319       assign_minipool_offsets (last_barrier);
19320 
19321       while (ftmp)
19322 	{
19323 	  if (!BARRIER_P (ftmp->insn)
19324 	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19325 		  == NULL))
19326 	    break;
19327 
19328 	  ftmp = ftmp->next;
19329 	}
19330 
19331       /* Scan over the fixes we have identified for this pool, fixing them
19332 	 up and adding the constants to the pool itself.  */
19333       for (this_fix = fix; this_fix && ftmp != this_fix;
19334 	   this_fix = this_fix->next)
19335 	if (!BARRIER_P (this_fix->insn))
19336 	  {
19337 	    rtx addr
19338 	      = plus_constant (Pmode,
19339 			       gen_rtx_LABEL_REF (VOIDmode,
19340 						  minipool_vector_label),
19341 			       this_fix->minipool->offset);
19342 	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19343 	  }
19344 
19345       dump_minipool (last_barrier->insn);
19346       fix = ftmp;
19347     }
19348 
19349   /* From now on we must synthesize any constants that we can't handle
19350      directly.  This can happen if the RTL gets split during final
19351      instruction generation.  */
19352   cfun->machine->after_arm_reorg = 1;
19353 
19354   /* Free the minipool memory.  */
19355   obstack_free (&minipool_obstack, minipool_startobj);
19356 }
19357 
19358 /* Routines to output assembly language.  */
19359 
19360 /* Return string representation of passed in real value.  */
19361 static const char *
fp_const_from_val(REAL_VALUE_TYPE * r)19362 fp_const_from_val (REAL_VALUE_TYPE *r)
19363 {
19364   if (!fp_consts_inited)
19365     init_fp_table ();
19366 
19367   gcc_assert (real_equal (r, &value_fp0));
19368   return "0";
19369 }
19370 
19371 /* OPERANDS[0] is the entire list of insns that constitute pop,
19372    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19373    is in the list, UPDATE is true iff the list contains explicit
19374    update of base register.  */
19375 void
arm_output_multireg_pop(rtx * operands,bool return_pc,rtx cond,bool reverse,bool update)19376 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19377                          bool update)
19378 {
19379   int i;
19380   char pattern[100];
19381   int offset;
19382   const char *conditional;
19383   int num_saves = XVECLEN (operands[0], 0);
19384   unsigned int regno;
19385   unsigned int regno_base = REGNO (operands[1]);
19386   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19387 
19388   offset = 0;
19389   offset += update ? 1 : 0;
19390   offset += return_pc ? 1 : 0;
19391 
19392   /* Is the base register in the list?  */
19393   for (i = offset; i < num_saves; i++)
19394     {
19395       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19396       /* If SP is in the list, then the base register must be SP.  */
19397       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19398       /* If base register is in the list, there must be no explicit update.  */
19399       if (regno == regno_base)
19400         gcc_assert (!update);
19401     }
19402 
19403   conditional = reverse ? "%?%D0" : "%?%d0";
19404   /* Can't use POP if returning from an interrupt.  */
19405   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19406     sprintf (pattern, "pop%s\t{", conditional);
19407   else
19408     {
19409       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19410          It's just a convention, their semantics are identical.  */
19411       if (regno_base == SP_REGNUM)
19412 	sprintf (pattern, "ldmfd%s\t", conditional);
19413       else if (update)
19414 	sprintf (pattern, "ldmia%s\t", conditional);
19415       else
19416 	sprintf (pattern, "ldm%s\t", conditional);
19417 
19418       strcat (pattern, reg_names[regno_base]);
19419       if (update)
19420         strcat (pattern, "!, {");
19421       else
19422         strcat (pattern, ", {");
19423     }
19424 
19425   /* Output the first destination register.  */
19426   strcat (pattern,
19427           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19428 
19429   /* Output the rest of the destination registers.  */
19430   for (i = offset + 1; i < num_saves; i++)
19431     {
19432       strcat (pattern, ", ");
19433       strcat (pattern,
19434               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19435     }
19436 
19437   strcat (pattern, "}");
19438 
19439   if (interrupt_p && return_pc)
19440     strcat (pattern, "^");
19441 
19442   output_asm_insn (pattern, &cond);
19443 }
19444 
19445 
19446 /* Output the assembly for a store multiple.  */
19447 
19448 const char *
vfp_output_vstmd(rtx * operands)19449 vfp_output_vstmd (rtx * operands)
19450 {
19451   char pattern[100];
19452   int p;
19453   int base;
19454   int i;
19455   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19456 		   ? XEXP (operands[0], 0)
19457 		   : XEXP (XEXP (operands[0], 0), 0);
19458   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19459 
19460   if (push_p)
19461     strcpy (pattern, "vpush%?.64\t{%P1");
19462   else
19463     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19464 
19465   p = strlen (pattern);
19466 
19467   gcc_assert (REG_P (operands[1]));
19468 
19469   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19470   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19471     {
19472       p += sprintf (&pattern[p], ", d%d", base + i);
19473     }
19474   strcpy (&pattern[p], "}");
19475 
19476   output_asm_insn (pattern, operands);
19477   return "";
19478 }
19479 
19480 
19481 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19482    number of bytes pushed.  */
19483 
19484 static int
vfp_emit_fstmd(int base_reg,int count)19485 vfp_emit_fstmd (int base_reg, int count)
19486 {
19487   rtx par;
19488   rtx dwarf;
19489   rtx tmp, reg;
19490   int i;
19491 
19492   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19493      register pairs are stored by a store multiple insn.  We avoid this
19494      by pushing an extra pair.  */
19495   if (count == 2 && !arm_arch6)
19496     {
19497       if (base_reg == LAST_VFP_REGNUM - 3)
19498 	base_reg -= 2;
19499       count++;
19500     }
19501 
19502   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19503      larger stores into multiple parts (up to a maximum of two, in
19504      practice).  */
19505   if (count > 16)
19506     {
19507       int saved;
19508       /* NOTE: base_reg is an internal register number, so each D register
19509          counts as 2.  */
19510       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19511       saved += vfp_emit_fstmd (base_reg, 16);
19512       return saved;
19513     }
19514 
19515   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19516   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19517 
19518   reg = gen_rtx_REG (DFmode, base_reg);
19519   base_reg += 2;
19520 
19521   XVECEXP (par, 0, 0)
19522     = gen_rtx_SET (gen_frame_mem
19523 		   (BLKmode,
19524 		    gen_rtx_PRE_MODIFY (Pmode,
19525 					stack_pointer_rtx,
19526 					plus_constant
19527 					(Pmode, stack_pointer_rtx,
19528 					 - (count * 8)))
19529 		    ),
19530 		   gen_rtx_UNSPEC (BLKmode,
19531 				   gen_rtvec (1, reg),
19532 				   UNSPEC_PUSH_MULT));
19533 
19534   tmp = gen_rtx_SET (stack_pointer_rtx,
19535 		     plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19536   RTX_FRAME_RELATED_P (tmp) = 1;
19537   XVECEXP (dwarf, 0, 0) = tmp;
19538 
19539   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19540   RTX_FRAME_RELATED_P (tmp) = 1;
19541   XVECEXP (dwarf, 0, 1) = tmp;
19542 
19543   for (i = 1; i < count; i++)
19544     {
19545       reg = gen_rtx_REG (DFmode, base_reg);
19546       base_reg += 2;
19547       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19548 
19549       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19550 					plus_constant (Pmode,
19551 						       stack_pointer_rtx,
19552 						       i * 8)),
19553 			 reg);
19554       RTX_FRAME_RELATED_P (tmp) = 1;
19555       XVECEXP (dwarf, 0, i + 1) = tmp;
19556     }
19557 
19558   par = emit_insn (par);
19559   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19560   RTX_FRAME_RELATED_P (par) = 1;
19561 
19562   return count * 8;
19563 }
19564 
19565 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19566    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19567 
19568 bool
detect_cmse_nonsecure_call(tree addr)19569 detect_cmse_nonsecure_call (tree addr)
19570 {
19571   if (!addr)
19572     return FALSE;
19573 
19574   tree fntype = TREE_TYPE (addr);
19575   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19576 				    TYPE_ATTRIBUTES (fntype)))
19577     return TRUE;
19578   return FALSE;
19579 }
19580 
19581 
19582 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19583    the call target.  */
19584 
19585 void
arm_emit_call_insn(rtx pat,rtx addr,bool sibcall)19586 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19587 {
19588   rtx insn;
19589 
19590   insn = emit_call_insn (pat);
19591 
19592   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19593      If the call might use such an entry, add a use of the PIC register
19594      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19595   if (TARGET_VXWORKS_RTP
19596       && flag_pic
19597       && !sibcall
19598       && GET_CODE (addr) == SYMBOL_REF
19599       && (SYMBOL_REF_DECL (addr)
19600 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19601 	  : !SYMBOL_REF_LOCAL_P (addr)))
19602     {
19603       require_pic_register (NULL_RTX, false /*compute_now*/);
19604       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19605     }
19606 
19607   if (TARGET_FDPIC)
19608     {
19609       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19610       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19611     }
19612 
19613   if (TARGET_AAPCS_BASED)
19614     {
19615       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19616 	 linker.  We need to add an IP clobber to allow setting
19617 	 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19618 	 is not needed since it's a fixed register.  */
19619       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19620       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19621     }
19622 }
19623 
19624 /* Output a 'call' insn.  */
19625 const char *
output_call(rtx * operands)19626 output_call (rtx *operands)
19627 {
19628   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19629 
19630   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19631   if (REGNO (operands[0]) == LR_REGNUM)
19632     {
19633       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19634       output_asm_insn ("mov%?\t%0, %|lr", operands);
19635     }
19636 
19637   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19638 
19639   if (TARGET_INTERWORK || arm_arch4t)
19640     output_asm_insn ("bx%?\t%0", operands);
19641   else
19642     output_asm_insn ("mov%?\t%|pc, %0", operands);
19643 
19644   return "";
19645 }
19646 
19647 /* Output a move from arm registers to arm registers of a long double
19648    OPERANDS[0] is the destination.
19649    OPERANDS[1] is the source.  */
19650 const char *
output_mov_long_double_arm_from_arm(rtx * operands)19651 output_mov_long_double_arm_from_arm (rtx *operands)
19652 {
19653   /* We have to be careful here because the two might overlap.  */
19654   int dest_start = REGNO (operands[0]);
19655   int src_start = REGNO (operands[1]);
19656   rtx ops[2];
19657   int i;
19658 
19659   if (dest_start < src_start)
19660     {
19661       for (i = 0; i < 3; i++)
19662 	{
19663 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
19664 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
19665 	  output_asm_insn ("mov%?\t%0, %1", ops);
19666 	}
19667     }
19668   else
19669     {
19670       for (i = 2; i >= 0; i--)
19671 	{
19672 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
19673 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
19674 	  output_asm_insn ("mov%?\t%0, %1", ops);
19675 	}
19676     }
19677 
19678   return "";
19679 }
19680 
19681 void
arm_emit_movpair(rtx dest,rtx src)19682 arm_emit_movpair (rtx dest, rtx src)
19683  {
19684   /* If the src is an immediate, simplify it.  */
19685   if (CONST_INT_P (src))
19686     {
19687       HOST_WIDE_INT val = INTVAL (src);
19688       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19689       if ((val >> 16) & 0x0000ffff)
19690 	{
19691 	  emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19692 					       GEN_INT (16)),
19693 			 GEN_INT ((val >> 16) & 0x0000ffff));
19694 	  rtx_insn *insn = get_last_insn ();
19695 	  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19696 	}
19697       return;
19698     }
19699    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19700    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19701    rtx_insn *insn = get_last_insn ();
19702    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19703  }
19704 
19705 /* Output a move between double words.  It must be REG<-MEM
19706    or MEM<-REG.  */
19707 const char *
output_move_double(rtx * operands,bool emit,int * count)19708 output_move_double (rtx *operands, bool emit, int *count)
19709 {
19710   enum rtx_code code0 = GET_CODE (operands[0]);
19711   enum rtx_code code1 = GET_CODE (operands[1]);
19712   rtx otherops[3];
19713   if (count)
19714     *count = 1;
19715 
19716   /* The only case when this might happen is when
19717      you are looking at the length of a DImode instruction
19718      that has an invalid constant in it.  */
19719   if (code0 == REG && code1 != MEM)
19720     {
19721       gcc_assert (!emit);
19722       *count = 2;
19723       return "";
19724     }
19725 
19726   if (code0 == REG)
19727     {
19728       unsigned int reg0 = REGNO (operands[0]);
19729       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19730 
19731       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19732 
19733       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
19734 
19735       switch (GET_CODE (XEXP (operands[1], 0)))
19736 	{
19737 	case REG:
19738 
19739 	  if (emit)
19740 	    {
19741 	      if (can_ldrd
19742 		  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19743 		output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19744 	      else
19745 		output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19746 	    }
19747 	  break;
19748 
19749 	case PRE_INC:
19750 	  gcc_assert (can_ldrd);
19751 	  if (emit)
19752 	    output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19753 	  break;
19754 
19755 	case PRE_DEC:
19756 	  if (emit)
19757 	    {
19758 	      if (can_ldrd)
19759 		output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19760 	      else
19761 		output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19762 	    }
19763 	  break;
19764 
19765 	case POST_INC:
19766 	  if (emit)
19767 	    {
19768 	      if (can_ldrd)
19769 		output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
19770 	      else
19771 		output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
19772 	    }
19773 	  break;
19774 
19775 	case POST_DEC:
19776 	  gcc_assert (can_ldrd);
19777 	  if (emit)
19778 	    output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
19779 	  break;
19780 
19781 	case PRE_MODIFY:
19782 	case POST_MODIFY:
19783 	  /* Autoicrement addressing modes should never have overlapping
19784 	     base and destination registers, and overlapping index registers
19785 	     are already prohibited, so this doesn't need to worry about
19786 	     fix_cm3_ldrd.  */
19787 	  otherops[0] = operands[0];
19788 	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
19789 	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
19790 
19791 	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
19792 	    {
19793 	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
19794 		{
19795 		  /* Registers overlap so split out the increment.  */
19796 		  if (emit)
19797 		    {
19798 		      gcc_assert (can_ldrd);
19799 		      output_asm_insn ("add%?\t%1, %1, %2", otherops);
19800 		      output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
19801 		    }
19802 		  if (count)
19803 		    *count = 2;
19804 		}
19805 	      else
19806 		{
19807 		  /* Use a single insn if we can.
19808 		     FIXME: IWMMXT allows offsets larger than ldrd can
19809 		     handle, fix these up with a pair of ldr.  */
19810 		  if (can_ldrd
19811 		      && (TARGET_THUMB2
19812 		      || !CONST_INT_P (otherops[2])
19813 		      || (INTVAL (otherops[2]) > -256
19814 			  && INTVAL (otherops[2]) < 256)))
19815 		    {
19816 		      if (emit)
19817 			output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
19818 		    }
19819 		  else
19820 		    {
19821 		      if (emit)
19822 			{
19823 			  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
19824 			  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19825 			}
19826 		      if (count)
19827 			*count = 2;
19828 
19829 		    }
19830 		}
19831 	    }
19832 	  else
19833 	    {
19834 	      /* Use a single insn if we can.
19835 		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
19836 		 fix these up with a pair of ldr.  */
19837 	      if (can_ldrd
19838 		  && (TARGET_THUMB2
19839 		  || !CONST_INT_P (otherops[2])
19840 		  || (INTVAL (otherops[2]) > -256
19841 		      && INTVAL (otherops[2]) < 256)))
19842 		{
19843 		  if (emit)
19844 		    output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
19845 		}
19846 	      else
19847 		{
19848 		  if (emit)
19849 		    {
19850 		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19851 		      output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
19852 		    }
19853 		  if (count)
19854 		    *count = 2;
19855 		}
19856 	    }
19857 	  break;
19858 
19859 	case LABEL_REF:
19860 	case CONST:
19861 	  /* We might be able to use ldrd %0, %1 here.  However the range is
19862 	     different to ldr/adr, and it is broken on some ARMv7-M
19863 	     implementations.  */
19864 	  /* Use the second register of the pair to avoid problematic
19865 	     overlap.  */
19866 	  otherops[1] = operands[1];
19867 	  if (emit)
19868 	    output_asm_insn ("adr%?\t%0, %1", otherops);
19869 	  operands[1] = otherops[0];
19870 	  if (emit)
19871 	    {
19872 	      if (can_ldrd)
19873 		output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19874 	      else
19875 		output_asm_insn ("ldmia%?\t%1, %M0", operands);
19876 	    }
19877 
19878 	  if (count)
19879 	    *count = 2;
19880 	  break;
19881 
19882 	  /* ??? This needs checking for thumb2.  */
19883 	default:
19884 	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
19885 			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
19886 	    {
19887 	      otherops[0] = operands[0];
19888 	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
19889 	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
19890 
19891 	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
19892 		{
19893 		  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19894 		    {
19895 		      switch ((int) INTVAL (otherops[2]))
19896 			{
19897 			case -8:
19898 			  if (emit)
19899 			    output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
19900 			  return "";
19901 			case -4:
19902 			  if (TARGET_THUMB2)
19903 			    break;
19904 			  if (emit)
19905 			    output_asm_insn ("ldmda%?\t%1, %M0", otherops);
19906 			  return "";
19907 			case 4:
19908 			  if (TARGET_THUMB2)
19909 			    break;
19910 			  if (emit)
19911 			    output_asm_insn ("ldmib%?\t%1, %M0", otherops);
19912 			  return "";
19913 			}
19914 		    }
19915 		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
19916 		  operands[1] = otherops[0];
19917 		  if (can_ldrd
19918 		      && (REG_P (otherops[2])
19919 			  || TARGET_THUMB2
19920 			  || (CONST_INT_P (otherops[2])
19921 			      && INTVAL (otherops[2]) > -256
19922 			      && INTVAL (otherops[2]) < 256)))
19923 		    {
19924 		      if (reg_overlap_mentioned_p (operands[0],
19925 						   otherops[2]))
19926 			{
19927 			  /* Swap base and index registers over to
19928 			     avoid a conflict.  */
19929 			  std::swap (otherops[1], otherops[2]);
19930 			}
19931 		      /* If both registers conflict, it will usually
19932 			 have been fixed by a splitter.  */
19933 		      if (reg_overlap_mentioned_p (operands[0], otherops[2])
19934 			  || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
19935 			{
19936 			  if (emit)
19937 			    {
19938 			      output_asm_insn ("add%?\t%0, %1, %2", otherops);
19939 			      output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19940 			    }
19941 			  if (count)
19942 			    *count = 2;
19943 			}
19944 		      else
19945 			{
19946 			  otherops[0] = operands[0];
19947 			  if (emit)
19948 			    output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
19949 			}
19950 		      return "";
19951 		    }
19952 
19953 		  if (CONST_INT_P (otherops[2]))
19954 		    {
19955 		      if (emit)
19956 			{
19957 			  if (!(const_ok_for_arm (INTVAL (otherops[2]))))
19958 			    output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
19959 			  else
19960 			    output_asm_insn ("add%?\t%0, %1, %2", otherops);
19961 			}
19962 		    }
19963 		  else
19964 		    {
19965 		      if (emit)
19966 			output_asm_insn ("add%?\t%0, %1, %2", otherops);
19967 		    }
19968 		}
19969 	      else
19970 		{
19971 		  if (emit)
19972 		    output_asm_insn ("sub%?\t%0, %1, %2", otherops);
19973 		}
19974 
19975 	      if (count)
19976 		*count = 2;
19977 
19978 	      if (can_ldrd)
19979 		return "ldrd%?\t%0, [%1]";
19980 
19981 	      return "ldmia%?\t%1, %M0";
19982 	    }
19983 	  else
19984 	    {
19985 	      otherops[1] = adjust_address (operands[1], SImode, 4);
19986 	      /* Take care of overlapping base/data reg.  */
19987 	      if (reg_mentioned_p (operands[0], operands[1]))
19988 		{
19989 		  if (emit)
19990 		    {
19991 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
19992 		      output_asm_insn ("ldr%?\t%0, %1", operands);
19993 		    }
19994 		  if (count)
19995 		    *count = 2;
19996 
19997 		}
19998 	      else
19999 		{
20000 		  if (emit)
20001 		    {
20002 		      output_asm_insn ("ldr%?\t%0, %1", operands);
20003 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
20004 		    }
20005 		  if (count)
20006 		    *count = 2;
20007 		}
20008 	    }
20009 	}
20010     }
20011   else
20012     {
20013       /* Constraints should ensure this.  */
20014       gcc_assert (code0 == MEM && code1 == REG);
20015       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20016                   || (TARGET_ARM && TARGET_LDRD));
20017 
20018       /* For TARGET_ARM the first source register of an STRD
20019 	 must be even.  This is usually the case for double-word
20020 	 values but user assembly constraints can force an odd
20021 	 starting register.  */
20022       bool allow_strd = TARGET_LDRD
20023 			 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20024       switch (GET_CODE (XEXP (operands[0], 0)))
20025         {
20026 	case REG:
20027 	  if (emit)
20028 	    {
20029 	      if (allow_strd)
20030 		output_asm_insn ("strd%?\t%1, [%m0]", operands);
20031 	      else
20032 		output_asm_insn ("stm%?\t%m0, %M1", operands);
20033 	    }
20034 	  break;
20035 
20036         case PRE_INC:
20037 	  gcc_assert (allow_strd);
20038 	  if (emit)
20039 	    output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20040 	  break;
20041 
20042         case PRE_DEC:
20043 	  if (emit)
20044 	    {
20045 	      if (allow_strd)
20046 		output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20047 	      else
20048 		output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20049 	    }
20050 	  break;
20051 
20052         case POST_INC:
20053 	  if (emit)
20054 	    {
20055 	      if (allow_strd)
20056 		output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20057 	      else
20058 		output_asm_insn ("stm%?\t%m0!, %M1", operands);
20059 	    }
20060 	  break;
20061 
20062         case POST_DEC:
20063 	  gcc_assert (allow_strd);
20064 	  if (emit)
20065 	    output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20066 	  break;
20067 
20068 	case PRE_MODIFY:
20069 	case POST_MODIFY:
20070 	  otherops[0] = operands[1];
20071 	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20072 	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20073 
20074 	  /* IWMMXT allows offsets larger than strd can handle,
20075 	     fix these up with a pair of str.  */
20076 	  if (!TARGET_THUMB2
20077 	      && CONST_INT_P (otherops[2])
20078 	      && (INTVAL(otherops[2]) <= -256
20079 		  || INTVAL(otherops[2]) >= 256))
20080 	    {
20081 	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20082 		{
20083 		  if (emit)
20084 		    {
20085 		      output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20086 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20087 		    }
20088 		  if (count)
20089 		    *count = 2;
20090 		}
20091 	      else
20092 		{
20093 		  if (emit)
20094 		    {
20095 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20096 		      output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20097 		    }
20098 		  if (count)
20099 		    *count = 2;
20100 		}
20101 	    }
20102 	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20103 	    {
20104 	      if (emit)
20105 		output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20106 	    }
20107 	  else
20108 	    {
20109 	      if (emit)
20110 		output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20111 	    }
20112 	  break;
20113 
20114 	case PLUS:
20115 	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20116 	  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20117 	    {
20118 	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20119 		{
20120 		case -8:
20121 		  if (emit)
20122 		    output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20123 		  return "";
20124 
20125 		case -4:
20126 		  if (TARGET_THUMB2)
20127 		    break;
20128 		  if (emit)
20129 		    output_asm_insn ("stmda%?\t%m0, %M1", operands);
20130 		  return "";
20131 
20132 		case 4:
20133 		  if (TARGET_THUMB2)
20134 		    break;
20135 		  if (emit)
20136 		    output_asm_insn ("stmib%?\t%m0, %M1", operands);
20137 		  return "";
20138 		}
20139 	    }
20140 	  if (allow_strd
20141 	      && (REG_P (otherops[2])
20142 		  || TARGET_THUMB2
20143 		  || (CONST_INT_P (otherops[2])
20144 		      && INTVAL (otherops[2]) > -256
20145 		      && INTVAL (otherops[2]) < 256)))
20146 	    {
20147 	      otherops[0] = operands[1];
20148 	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20149 	      if (emit)
20150 		output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20151 	      return "";
20152 	    }
20153 	  /* Fall through */
20154 
20155         default:
20156 	  otherops[0] = adjust_address (operands[0], SImode, 4);
20157 	  otherops[1] = operands[1];
20158 	  if (emit)
20159 	    {
20160 	      output_asm_insn ("str%?\t%1, %0", operands);
20161 	      output_asm_insn ("str%?\t%H1, %0", otherops);
20162 	    }
20163 	  if (count)
20164 	    *count = 2;
20165 	}
20166     }
20167 
20168   return "";
20169 }
20170 
20171 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20172    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20173 
20174 const char *
output_move_quad(rtx * operands)20175 output_move_quad (rtx *operands)
20176 {
20177   if (REG_P (operands[0]))
20178     {
20179       /* Load, or reg->reg move.  */
20180 
20181       if (MEM_P (operands[1]))
20182         {
20183           switch (GET_CODE (XEXP (operands[1], 0)))
20184             {
20185             case REG:
20186               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20187               break;
20188 
20189             case LABEL_REF:
20190             case CONST:
20191               output_asm_insn ("adr%?\t%0, %1", operands);
20192               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20193               break;
20194 
20195             default:
20196               gcc_unreachable ();
20197             }
20198         }
20199       else
20200         {
20201           rtx ops[2];
20202           int dest, src, i;
20203 
20204           gcc_assert (REG_P (operands[1]));
20205 
20206           dest = REGNO (operands[0]);
20207           src = REGNO (operands[1]);
20208 
20209           /* This seems pretty dumb, but hopefully GCC won't try to do it
20210              very often.  */
20211           if (dest < src)
20212             for (i = 0; i < 4; i++)
20213               {
20214                 ops[0] = gen_rtx_REG (SImode, dest + i);
20215                 ops[1] = gen_rtx_REG (SImode, src + i);
20216                 output_asm_insn ("mov%?\t%0, %1", ops);
20217               }
20218           else
20219             for (i = 3; i >= 0; i--)
20220               {
20221                 ops[0] = gen_rtx_REG (SImode, dest + i);
20222                 ops[1] = gen_rtx_REG (SImode, src + i);
20223                 output_asm_insn ("mov%?\t%0, %1", ops);
20224               }
20225         }
20226     }
20227   else
20228     {
20229       gcc_assert (MEM_P (operands[0]));
20230       gcc_assert (REG_P (operands[1]));
20231       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20232 
20233       switch (GET_CODE (XEXP (operands[0], 0)))
20234         {
20235         case REG:
20236           output_asm_insn ("stm%?\t%m0, %M1", operands);
20237           break;
20238 
20239         default:
20240           gcc_unreachable ();
20241         }
20242     }
20243 
20244   return "";
20245 }
20246 
20247 /* Output a VFP load or store instruction.  */
20248 
20249 const char *
output_move_vfp(rtx * operands)20250 output_move_vfp (rtx *operands)
20251 {
20252   rtx reg, mem, addr, ops[2];
20253   int load = REG_P (operands[0]);
20254   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20255   int sp = (!TARGET_VFP_FP16INST
20256 	    || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20257   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20258   const char *templ;
20259   char buff[50];
20260   machine_mode mode;
20261 
20262   reg = operands[!load];
20263   mem = operands[load];
20264 
20265   mode = GET_MODE (reg);
20266 
20267   gcc_assert (REG_P (reg));
20268   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20269   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20270 	      || mode == SFmode
20271 	      || mode == DFmode
20272 	      || mode == HImode
20273 	      || mode == SImode
20274 	      || mode == DImode
20275               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20276   gcc_assert (MEM_P (mem));
20277 
20278   addr = XEXP (mem, 0);
20279 
20280   switch (GET_CODE (addr))
20281     {
20282     case PRE_DEC:
20283       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20284       ops[0] = XEXP (addr, 0);
20285       ops[1] = reg;
20286       break;
20287 
20288     case POST_INC:
20289       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20290       ops[0] = XEXP (addr, 0);
20291       ops[1] = reg;
20292       break;
20293 
20294     default:
20295       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20296       ops[0] = reg;
20297       ops[1] = mem;
20298       break;
20299     }
20300 
20301   sprintf (buff, templ,
20302 	   load ? "ld" : "st",
20303 	   dp ? "64" : sp ? "32" : "16",
20304 	   dp ? "P" : "",
20305 	   integer_p ? "\t%@ int" : "");
20306   output_asm_insn (buff, ops);
20307 
20308   return "";
20309 }
20310 
20311 /* Output a Neon double-word or quad-word load or store, or a load
20312    or store for larger structure modes.
20313 
20314    WARNING: The ordering of elements is weird in big-endian mode,
20315    because the EABI requires that vectors stored in memory appear
20316    as though they were stored by a VSTM, as required by the EABI.
20317    GCC RTL defines element ordering based on in-memory order.
20318    This can be different from the architectural ordering of elements
20319    within a NEON register. The intrinsics defined in arm_neon.h use the
20320    NEON register element ordering, not the GCC RTL element ordering.
20321 
20322    For example, the in-memory ordering of a big-endian a quadword
20323    vector with 16-bit elements when stored from register pair {d0,d1}
20324    will be (lowest address first, d0[N] is NEON register element N):
20325 
20326      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20327 
20328    When necessary, quadword registers (dN, dN+1) are moved to ARM
20329    registers from rN in the order:
20330 
20331      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20332 
20333    So that STM/LDM can be used on vectors in ARM registers, and the
20334    same memory layout will result as if VSTM/VLDM were used.
20335 
20336    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20337    possible, which allows use of appropriate alignment tags.
20338    Note that the choice of "64" is independent of the actual vector
20339    element size; this size simply ensures that the behavior is
20340    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20341 
20342    Due to limitations of those instructions, use of VST1.64/VLD1.64
20343    is not possible if:
20344     - the address contains PRE_DEC, or
20345     - the mode refers to more than 4 double-word registers
20346 
20347    In those cases, it would be possible to replace VSTM/VLDM by a
20348    sequence of instructions; this is not currently implemented since
20349    this is not certain to actually improve performance.  */
20350 
20351 const char *
output_move_neon(rtx * operands)20352 output_move_neon (rtx *operands)
20353 {
20354   rtx reg, mem, addr, ops[2];
20355   int regno, nregs, load = REG_P (operands[0]);
20356   const char *templ;
20357   char buff[50];
20358   machine_mode mode;
20359 
20360   reg = operands[!load];
20361   mem = operands[load];
20362 
20363   mode = GET_MODE (reg);
20364 
20365   gcc_assert (REG_P (reg));
20366   regno = REGNO (reg);
20367   nregs = REG_NREGS (reg) / 2;
20368   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20369 	      || NEON_REGNO_OK_FOR_QUAD (regno));
20370   gcc_assert (VALID_NEON_DREG_MODE (mode)
20371 	      || VALID_NEON_QREG_MODE (mode)
20372 	      || VALID_NEON_STRUCT_MODE (mode));
20373   gcc_assert (MEM_P (mem));
20374 
20375   addr = XEXP (mem, 0);
20376 
20377   /* Strip off const from addresses like (const (plus (...))).  */
20378   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20379     addr = XEXP (addr, 0);
20380 
20381   switch (GET_CODE (addr))
20382     {
20383     case POST_INC:
20384       /* We have to use vldm / vstm for too-large modes.  */
20385       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20386 	{
20387 	  templ = "v%smia%%?\t%%0!, %%h1";
20388 	  ops[0] = XEXP (addr, 0);
20389 	}
20390       else
20391 	{
20392 	  templ = "v%s1.64\t%%h1, %%A0";
20393 	  ops[0] = mem;
20394 	}
20395       ops[1] = reg;
20396       break;
20397 
20398     case PRE_DEC:
20399       /* We have to use vldm / vstm in this case, since there is no
20400 	 pre-decrement form of the vld1 / vst1 instructions.  */
20401       templ = "v%smdb%%?\t%%0!, %%h1";
20402       ops[0] = XEXP (addr, 0);
20403       ops[1] = reg;
20404       break;
20405 
20406     case POST_MODIFY:
20407       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20408       gcc_unreachable ();
20409 
20410     case REG:
20411       /* We have to use vldm / vstm for too-large modes.  */
20412       if (nregs > 1)
20413 	{
20414 	  if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20415 	    templ = "v%smia%%?\t%%m0, %%h1";
20416 	  else
20417 	    templ = "v%s1.64\t%%h1, %%A0";
20418 
20419 	  ops[0] = mem;
20420 	  ops[1] = reg;
20421 	  break;
20422 	}
20423       /* Fall through.  */
20424     case PLUS:
20425       if (GET_CODE (addr) == PLUS)
20426 	addr = XEXP (addr, 0);
20427       /* Fall through.  */
20428     case LABEL_REF:
20429       {
20430 	int i;
20431 	int overlap = -1;
20432 	for (i = 0; i < nregs; i++)
20433 	  {
20434 	    /* We're only using DImode here because it's a convenient
20435 	       size.  */
20436 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20437 	    ops[1] = adjust_address (mem, DImode, 8 * i);
20438 	    if (reg_overlap_mentioned_p (ops[0], mem))
20439 	      {
20440 		gcc_assert (overlap == -1);
20441 		overlap = i;
20442 	      }
20443 	    else
20444 	      {
20445 		if (TARGET_HAVE_MVE && GET_CODE (addr) == LABEL_REF)
20446 		  sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20447 		else
20448 		  sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20449 		output_asm_insn (buff, ops);
20450 	      }
20451 	  }
20452 	if (overlap != -1)
20453 	  {
20454 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20455 	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
20456 	    if (TARGET_HAVE_MVE && GET_CODE (addr) == LABEL_REF)
20457 	      sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20458 	    else
20459 	      sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20460 	    output_asm_insn (buff, ops);
20461 	  }
20462 
20463         return "";
20464       }
20465 
20466     default:
20467       gcc_unreachable ();
20468     }
20469 
20470   sprintf (buff, templ, load ? "ld" : "st");
20471   output_asm_insn (buff, ops);
20472 
20473   return "";
20474 }
20475 
20476 /* Compute and return the length of neon_mov<mode>, where <mode> is
20477    one of VSTRUCT modes: EI, OI, CI or XI.  */
20478 int
arm_attr_length_move_neon(rtx_insn * insn)20479 arm_attr_length_move_neon (rtx_insn *insn)
20480 {
20481   rtx reg, mem, addr;
20482   int load;
20483   machine_mode mode;
20484 
20485   extract_insn_cached (insn);
20486 
20487   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20488     {
20489       mode = GET_MODE (recog_data.operand[0]);
20490       switch (mode)
20491 	{
20492 	case E_EImode:
20493 	case E_OImode:
20494 	  return 8;
20495 	case E_CImode:
20496 	  return 12;
20497 	case E_XImode:
20498 	  return 16;
20499 	default:
20500 	  gcc_unreachable ();
20501 	}
20502     }
20503 
20504   load = REG_P (recog_data.operand[0]);
20505   reg = recog_data.operand[!load];
20506   mem = recog_data.operand[load];
20507 
20508   gcc_assert (MEM_P (mem));
20509 
20510   addr = XEXP (mem, 0);
20511 
20512   /* Strip off const from addresses like (const (plus (...))).  */
20513   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20514     addr = XEXP (addr, 0);
20515 
20516   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
20517     {
20518       int insns = REG_NREGS (reg) / 2;
20519       return insns * 4;
20520     }
20521   else
20522     return 4;
20523 }
20524 
20525 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20526    return zero.  */
20527 
20528 int
arm_address_offset_is_imm(rtx_insn * insn)20529 arm_address_offset_is_imm (rtx_insn *insn)
20530 {
20531   rtx mem, addr;
20532 
20533   extract_insn_cached (insn);
20534 
20535   if (REG_P (recog_data.operand[0]))
20536     return 0;
20537 
20538   mem = recog_data.operand[0];
20539 
20540   gcc_assert (MEM_P (mem));
20541 
20542   addr = XEXP (mem, 0);
20543 
20544   if (REG_P (addr)
20545       || (GET_CODE (addr) == PLUS
20546 	  && REG_P (XEXP (addr, 0))
20547 	  && CONST_INT_P (XEXP (addr, 1))))
20548     return 1;
20549   else
20550     return 0;
20551 }
20552 
20553 /* Output an ADD r, s, #n where n may be too big for one instruction.
20554    If adding zero to one register, output nothing.  */
20555 const char *
output_add_immediate(rtx * operands)20556 output_add_immediate (rtx *operands)
20557 {
20558   HOST_WIDE_INT n = INTVAL (operands[2]);
20559 
20560   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20561     {
20562       if (n < 0)
20563 	output_multi_immediate (operands,
20564 				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20565 				-n);
20566       else
20567 	output_multi_immediate (operands,
20568 				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20569 				n);
20570     }
20571 
20572   return "";
20573 }
20574 
20575 /* Output a multiple immediate operation.
20576    OPERANDS is the vector of operands referred to in the output patterns.
20577    INSTR1 is the output pattern to use for the first constant.
20578    INSTR2 is the output pattern to use for subsequent constants.
20579    IMMED_OP is the index of the constant slot in OPERANDS.
20580    N is the constant value.  */
20581 static const char *
output_multi_immediate(rtx * operands,const char * instr1,const char * instr2,int immed_op,HOST_WIDE_INT n)20582 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20583 			int immed_op, HOST_WIDE_INT n)
20584 {
20585 #if HOST_BITS_PER_WIDE_INT > 32
20586   n &= 0xffffffff;
20587 #endif
20588 
20589   if (n == 0)
20590     {
20591       /* Quick and easy output.  */
20592       operands[immed_op] = const0_rtx;
20593       output_asm_insn (instr1, operands);
20594     }
20595   else
20596     {
20597       int i;
20598       const char * instr = instr1;
20599 
20600       /* Note that n is never zero here (which would give no output).  */
20601       for (i = 0; i < 32; i += 2)
20602 	{
20603 	  if (n & (3 << i))
20604 	    {
20605 	      operands[immed_op] = GEN_INT (n & (255 << i));
20606 	      output_asm_insn (instr, operands);
20607 	      instr = instr2;
20608 	      i += 6;
20609 	    }
20610 	}
20611     }
20612 
20613   return "";
20614 }
20615 
20616 /* Return the name of a shifter operation.  */
20617 static const char *
arm_shift_nmem(enum rtx_code code)20618 arm_shift_nmem(enum rtx_code code)
20619 {
20620   switch (code)
20621     {
20622     case ASHIFT:
20623       return ARM_LSL_NAME;
20624 
20625     case ASHIFTRT:
20626       return "asr";
20627 
20628     case LSHIFTRT:
20629       return "lsr";
20630 
20631     case ROTATERT:
20632       return "ror";
20633 
20634     default:
20635       abort();
20636     }
20637 }
20638 
20639 /* Return the appropriate ARM instruction for the operation code.
20640    The returned result should not be overwritten.  OP is the rtx of the
20641    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20642    was shifted.  */
20643 const char *
arithmetic_instr(rtx op,int shift_first_arg)20644 arithmetic_instr (rtx op, int shift_first_arg)
20645 {
20646   switch (GET_CODE (op))
20647     {
20648     case PLUS:
20649       return "add";
20650 
20651     case MINUS:
20652       return shift_first_arg ? "rsb" : "sub";
20653 
20654     case IOR:
20655       return "orr";
20656 
20657     case XOR:
20658       return "eor";
20659 
20660     case AND:
20661       return "and";
20662 
20663     case ASHIFT:
20664     case ASHIFTRT:
20665     case LSHIFTRT:
20666     case ROTATERT:
20667       return arm_shift_nmem(GET_CODE(op));
20668 
20669     default:
20670       gcc_unreachable ();
20671     }
20672 }
20673 
20674 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20675    for the operation code.  The returned result should not be overwritten.
20676    OP is the rtx code of the shift.
20677    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20678    shift.  */
20679 static const char *
shift_op(rtx op,HOST_WIDE_INT * amountp)20680 shift_op (rtx op, HOST_WIDE_INT *amountp)
20681 {
20682   const char * mnem;
20683   enum rtx_code code = GET_CODE (op);
20684 
20685   switch (code)
20686     {
20687     case ROTATE:
20688       if (!CONST_INT_P (XEXP (op, 1)))
20689 	{
20690 	  output_operand_lossage ("invalid shift operand");
20691 	  return NULL;
20692 	}
20693 
20694       code = ROTATERT;
20695       *amountp = 32 - INTVAL (XEXP (op, 1));
20696       mnem = "ror";
20697       break;
20698 
20699     case ASHIFT:
20700     case ASHIFTRT:
20701     case LSHIFTRT:
20702     case ROTATERT:
20703       mnem = arm_shift_nmem(code);
20704       if (CONST_INT_P (XEXP (op, 1)))
20705 	{
20706 	  *amountp = INTVAL (XEXP (op, 1));
20707 	}
20708       else if (REG_P (XEXP (op, 1)))
20709 	{
20710 	  *amountp = -1;
20711 	  return mnem;
20712 	}
20713       else
20714 	{
20715 	  output_operand_lossage ("invalid shift operand");
20716 	  return NULL;
20717 	}
20718       break;
20719 
20720     case MULT:
20721       /* We never have to worry about the amount being other than a
20722 	 power of 2, since this case can never be reloaded from a reg.  */
20723       if (!CONST_INT_P (XEXP (op, 1)))
20724 	{
20725 	  output_operand_lossage ("invalid shift operand");
20726 	  return NULL;
20727 	}
20728 
20729       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20730 
20731       /* Amount must be a power of two.  */
20732       if (*amountp & (*amountp - 1))
20733 	{
20734 	  output_operand_lossage ("invalid shift operand");
20735 	  return NULL;
20736 	}
20737 
20738       *amountp = exact_log2 (*amountp);
20739       gcc_assert (IN_RANGE (*amountp, 0, 31));
20740       return ARM_LSL_NAME;
20741 
20742     default:
20743       output_operand_lossage ("invalid shift operand");
20744       return NULL;
20745     }
20746 
20747   /* This is not 100% correct, but follows from the desire to merge
20748      multiplication by a power of 2 with the recognizer for a
20749      shift.  >=32 is not a valid shift for "lsl", so we must try and
20750      output a shift that produces the correct arithmetical result.
20751      Using lsr #32 is identical except for the fact that the carry bit
20752      is not set correctly if we set the flags; but we never use the
20753      carry bit from such an operation, so we can ignore that.  */
20754   if (code == ROTATERT)
20755     /* Rotate is just modulo 32.  */
20756     *amountp &= 31;
20757   else if (*amountp != (*amountp & 31))
20758     {
20759       if (code == ASHIFT)
20760 	mnem = "lsr";
20761       *amountp = 32;
20762     }
20763 
20764   /* Shifts of 0 are no-ops.  */
20765   if (*amountp == 0)
20766     return NULL;
20767 
20768   return mnem;
20769 }
20770 
20771 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
20772    because /bin/as is horribly restrictive.  The judgement about
20773    whether or not each character is 'printable' (and can be output as
20774    is) or not (and must be printed with an octal escape) must be made
20775    with reference to the *host* character set -- the situation is
20776    similar to that discussed in the comments above pp_c_char in
20777    c-pretty-print.c.  */
20778 
20779 #define MAX_ASCII_LEN 51
20780 
20781 void
output_ascii_pseudo_op(FILE * stream,const unsigned char * p,int len)20782 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
20783 {
20784   int i;
20785   int len_so_far = 0;
20786 
20787   fputs ("\t.ascii\t\"", stream);
20788 
20789   for (i = 0; i < len; i++)
20790     {
20791       int c = p[i];
20792 
20793       if (len_so_far >= MAX_ASCII_LEN)
20794 	{
20795 	  fputs ("\"\n\t.ascii\t\"", stream);
20796 	  len_so_far = 0;
20797 	}
20798 
20799       if (ISPRINT (c))
20800 	{
20801 	  if (c == '\\' || c == '\"')
20802 	    {
20803 	      putc ('\\', stream);
20804 	      len_so_far++;
20805 	    }
20806 	  putc (c, stream);
20807 	  len_so_far++;
20808 	}
20809       else
20810 	{
20811 	  fprintf (stream, "\\%03o", c);
20812 	  len_so_far += 4;
20813 	}
20814     }
20815 
20816   fputs ("\"\n", stream);
20817 }
20818 
20819 
20820 /* Compute the register save mask for registers 0 through 12
20821    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
20822 
20823 static unsigned long
arm_compute_save_reg0_reg12_mask(void)20824 arm_compute_save_reg0_reg12_mask (void)
20825 {
20826   unsigned long func_type = arm_current_func_type ();
20827   unsigned long save_reg_mask = 0;
20828   unsigned int reg;
20829 
20830   if (IS_INTERRUPT (func_type))
20831     {
20832       unsigned int max_reg;
20833       /* Interrupt functions must not corrupt any registers,
20834 	 even call clobbered ones.  If this is a leaf function
20835 	 we can just examine the registers used by the RTL, but
20836 	 otherwise we have to assume that whatever function is
20837 	 called might clobber anything, and so we have to save
20838 	 all the call-clobbered registers as well.  */
20839       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
20840 	/* FIQ handlers have registers r8 - r12 banked, so
20841 	   we only need to check r0 - r7, Normal ISRs only
20842 	   bank r14 and r15, so we must check up to r12.
20843 	   r13 is the stack pointer which is always preserved,
20844 	   so we do not need to consider it here.  */
20845 	max_reg = 7;
20846       else
20847 	max_reg = 12;
20848 
20849       for (reg = 0; reg <= max_reg; reg++)
20850 	if (df_regs_ever_live_p (reg)
20851 	    || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg)))
20852 	  save_reg_mask |= (1 << reg);
20853 
20854       /* Also save the pic base register if necessary.  */
20855       if (PIC_REGISTER_MAY_NEED_SAVING
20856 	  && crtl->uses_pic_offset_table)
20857 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20858     }
20859   else if (IS_VOLATILE(func_type))
20860     {
20861       /* For noreturn functions we historically omitted register saves
20862 	 altogether.  However this really messes up debugging.  As a
20863 	 compromise save just the frame pointers.  Combined with the link
20864 	 register saved elsewhere this should be sufficient to get
20865 	 a backtrace.  */
20866       if (frame_pointer_needed)
20867 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20868       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
20869 	save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20870       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
20871 	save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
20872     }
20873   else
20874     {
20875       /* In the normal case we only need to save those registers
20876 	 which are call saved and which are used by this function.  */
20877       for (reg = 0; reg <= 11; reg++)
20878 	if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20879 	  save_reg_mask |= (1 << reg);
20880 
20881       /* Handle the frame pointer as a special case.  */
20882       if (frame_pointer_needed)
20883 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20884 
20885       /* If we aren't loading the PIC register,
20886 	 don't stack it even though it may be live.  */
20887       if (PIC_REGISTER_MAY_NEED_SAVING
20888 	  && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
20889 	      || crtl->uses_pic_offset_table))
20890 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20891 
20892       /* The prologue will copy SP into R0, so save it.  */
20893       if (IS_STACKALIGN (func_type))
20894 	save_reg_mask |= 1;
20895     }
20896 
20897   /* Save registers so the exception handler can modify them.  */
20898   if (crtl->calls_eh_return)
20899     {
20900       unsigned int i;
20901 
20902       for (i = 0; ; i++)
20903 	{
20904 	  reg = EH_RETURN_DATA_REGNO (i);
20905 	  if (reg == INVALID_REGNUM)
20906 	    break;
20907 	  save_reg_mask |= 1 << reg;
20908 	}
20909     }
20910 
20911   return save_reg_mask;
20912 }
20913 
20914 /* Return true if r3 is live at the start of the function.  */
20915 
20916 static bool
arm_r3_live_at_start_p(void)20917 arm_r3_live_at_start_p (void)
20918 {
20919   /* Just look at cfg info, which is still close enough to correct at this
20920      point.  This gives false positives for broken functions that might use
20921      uninitialized data that happens to be allocated in r3, but who cares?  */
20922   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
20923 }
20924 
20925 /* Compute the number of bytes used to store the static chain register on the
20926    stack, above the stack frame.  We need to know this accurately to get the
20927    alignment of the rest of the stack frame correct.  */
20928 
20929 static int
arm_compute_static_chain_stack_bytes(void)20930 arm_compute_static_chain_stack_bytes (void)
20931 {
20932   /* Once the value is updated from the init value of -1, do not
20933      re-compute.  */
20934   if (cfun->machine->static_chain_stack_bytes != -1)
20935     return cfun->machine->static_chain_stack_bytes;
20936 
20937   /* See the defining assertion in arm_expand_prologue.  */
20938   if (IS_NESTED (arm_current_func_type ())
20939       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20940 	  || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
20941 	       || flag_stack_clash_protection)
20942 	      && !df_regs_ever_live_p (LR_REGNUM)))
20943       && arm_r3_live_at_start_p ()
20944       && crtl->args.pretend_args_size == 0)
20945     return 4;
20946 
20947   return 0;
20948 }
20949 
20950 /* Compute a bit mask of which core registers need to be
20951    saved on the stack for the current function.
20952    This is used by arm_compute_frame_layout, which may add extra registers.  */
20953 
20954 static unsigned long
arm_compute_save_core_reg_mask(void)20955 arm_compute_save_core_reg_mask (void)
20956 {
20957   unsigned int save_reg_mask = 0;
20958   unsigned long func_type = arm_current_func_type ();
20959   unsigned int reg;
20960 
20961   if (IS_NAKED (func_type))
20962     /* This should never really happen.  */
20963     return 0;
20964 
20965   /* If we are creating a stack frame, then we must save the frame pointer,
20966      IP (which will hold the old stack pointer), LR and the PC.  */
20967   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20968     save_reg_mask |=
20969       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
20970       | (1 << IP_REGNUM)
20971       | (1 << LR_REGNUM)
20972       | (1 << PC_REGNUM);
20973 
20974   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
20975 
20976   /* Decide if we need to save the link register.
20977      Interrupt routines have their own banked link register,
20978      so they never need to save it.
20979      Otherwise if we do not use the link register we do not need to save
20980      it.  If we are pushing other registers onto the stack however, we
20981      can save an instruction in the epilogue by pushing the link register
20982      now and then popping it back into the PC.  This incurs extra memory
20983      accesses though, so we only do it when optimizing for size, and only
20984      if we know that we will not need a fancy return sequence.  */
20985   if (df_regs_ever_live_p (LR_REGNUM)
20986       || (save_reg_mask
20987 	  && optimize_size
20988 	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
20989 	  && !crtl->tail_call_emit
20990 	  && !crtl->calls_eh_return))
20991     save_reg_mask |= 1 << LR_REGNUM;
20992 
20993   if (cfun->machine->lr_save_eliminated)
20994     save_reg_mask &= ~ (1 << LR_REGNUM);
20995 
20996   if (TARGET_REALLY_IWMMXT
20997       && ((bit_count (save_reg_mask)
20998 	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
20999 			   arm_compute_static_chain_stack_bytes())
21000 	   ) % 2) != 0)
21001     {
21002       /* The total number of registers that are going to be pushed
21003 	 onto the stack is odd.  We need to ensure that the stack
21004 	 is 64-bit aligned before we start to save iWMMXt registers,
21005 	 and also before we start to create locals.  (A local variable
21006 	 might be a double or long long which we will load/store using
21007 	 an iWMMXt instruction).  Therefore we need to push another
21008 	 ARM register, so that the stack will be 64-bit aligned.  We
21009 	 try to avoid using the arg registers (r0 -r3) as they might be
21010 	 used to pass values in a tail call.  */
21011       for (reg = 4; reg <= 12; reg++)
21012 	if ((save_reg_mask & (1 << reg)) == 0)
21013 	  break;
21014 
21015       if (reg <= 12)
21016 	save_reg_mask |= (1 << reg);
21017       else
21018 	{
21019 	  cfun->machine->sibcall_blocked = 1;
21020 	  save_reg_mask |= (1 << 3);
21021 	}
21022     }
21023 
21024   /* We may need to push an additional register for use initializing the
21025      PIC base register.  */
21026   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21027       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21028     {
21029       reg = thumb_find_work_register (1 << 4);
21030       if (!call_used_or_fixed_reg_p (reg))
21031 	save_reg_mask |= (1 << reg);
21032     }
21033 
21034   return save_reg_mask;
21035 }
21036 
21037 /* Compute a bit mask of which core registers need to be
21038    saved on the stack for the current function.  */
21039 static unsigned long
thumb1_compute_save_core_reg_mask(void)21040 thumb1_compute_save_core_reg_mask (void)
21041 {
21042   unsigned long mask;
21043   unsigned reg;
21044 
21045   mask = 0;
21046   for (reg = 0; reg < 12; reg ++)
21047     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21048       mask |= 1 << reg;
21049 
21050   /* Handle the frame pointer as a special case.  */
21051   if (frame_pointer_needed)
21052     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21053 
21054   if (flag_pic
21055       && !TARGET_SINGLE_PIC_BASE
21056       && arm_pic_register != INVALID_REGNUM
21057       && crtl->uses_pic_offset_table)
21058     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21059 
21060   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21061   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21062     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21063 
21064   /* LR will also be pushed if any lo regs are pushed.  */
21065   if (mask & 0xff || thumb_force_lr_save ())
21066     mask |= (1 << LR_REGNUM);
21067 
21068   bool call_clobbered_scratch
21069     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21070        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21071 
21072   /* Make sure we have a low work register if we need one.  We will
21073      need one if we are going to push a high register, but we are not
21074      currently intending to push a low register.  However if both the
21075      prologue and epilogue have a spare call-clobbered low register,
21076      then we won't need to find an additional work register.  It does
21077      not need to be the same register in the prologue and
21078      epilogue.  */
21079   if ((mask & 0xff) == 0
21080       && !call_clobbered_scratch
21081       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21082     {
21083       /* Use thumb_find_work_register to choose which register
21084 	 we will use.  If the register is live then we will
21085 	 have to push it.  Use LAST_LO_REGNUM as our fallback
21086 	 choice for the register to select.  */
21087       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21088       /* Make sure the register returned by thumb_find_work_register is
21089 	 not part of the return value.  */
21090       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21091 	reg = LAST_LO_REGNUM;
21092 
21093       if (callee_saved_reg_p (reg))
21094 	mask |= 1 << reg;
21095     }
21096 
21097   /* The 504 below is 8 bytes less than 512 because there are two possible
21098      alignment words.  We can't tell here if they will be present or not so we
21099      have to play it safe and assume that they are. */
21100   if ((CALLER_INTERWORKING_SLOT_SIZE +
21101        ROUND_UP_WORD (get_frame_size ()) +
21102        crtl->outgoing_args_size) >= 504)
21103     {
21104       /* This is the same as the code in thumb1_expand_prologue() which
21105 	 determines which register to use for stack decrement. */
21106       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21107 	if (mask & (1 << reg))
21108 	  break;
21109 
21110       if (reg > LAST_LO_REGNUM)
21111 	{
21112 	  /* Make sure we have a register available for stack decrement. */
21113 	  mask |= 1 << LAST_LO_REGNUM;
21114 	}
21115     }
21116 
21117   return mask;
21118 }
21119 
21120 
21121 /* Return the number of bytes required to save VFP registers.  */
21122 static int
arm_get_vfp_saved_size(void)21123 arm_get_vfp_saved_size (void)
21124 {
21125   unsigned int regno;
21126   int count;
21127   int saved;
21128 
21129   saved = 0;
21130   /* Space for saved VFP registers.  */
21131   if (TARGET_VFP_BASE)
21132     {
21133       count = 0;
21134       for (regno = FIRST_VFP_REGNUM;
21135 	   regno < LAST_VFP_REGNUM;
21136 	   regno += 2)
21137 	{
21138 	  if ((!df_regs_ever_live_p (regno)
21139 	       || call_used_or_fixed_reg_p (regno))
21140 	      && (!df_regs_ever_live_p (regno + 1)
21141 		  || call_used_or_fixed_reg_p (regno + 1)))
21142 	    {
21143 	      if (count > 0)
21144 		{
21145 		  /* Workaround ARM10 VFPr1 bug.  */
21146 		  if (count == 2 && !arm_arch6)
21147 		    count++;
21148 		  saved += count * 8;
21149 		}
21150 	      count = 0;
21151 	    }
21152 	  else
21153 	    count++;
21154 	}
21155       if (count > 0)
21156 	{
21157 	  if (count == 2 && !arm_arch6)
21158 	    count++;
21159 	  saved += count * 8;
21160 	}
21161     }
21162   return saved;
21163 }
21164 
21165 
21166 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21167    everything bar the final return instruction.  If simple_return is true,
21168    then do not output epilogue, because it has already been emitted in RTL.
21169 
21170    Note: do not forget to update length attribute of corresponding insn pattern
21171    when changing assembly output (eg. length attribute of
21172    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21173    register clearing sequences).  */
21174 const char *
output_return_instruction(rtx operand,bool really_return,bool reverse,bool simple_return)21175 output_return_instruction (rtx operand, bool really_return, bool reverse,
21176                            bool simple_return)
21177 {
21178   char conditional[10];
21179   char instr[100];
21180   unsigned reg;
21181   unsigned long live_regs_mask;
21182   unsigned long func_type;
21183   arm_stack_offsets *offsets;
21184 
21185   func_type = arm_current_func_type ();
21186 
21187   if (IS_NAKED (func_type))
21188     return "";
21189 
21190   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21191     {
21192       /* If this function was declared non-returning, and we have
21193 	 found a tail call, then we have to trust that the called
21194 	 function won't return.  */
21195       if (really_return)
21196 	{
21197 	  rtx ops[2];
21198 
21199 	  /* Otherwise, trap an attempted return by aborting.  */
21200 	  ops[0] = operand;
21201 	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21202 				       : "abort");
21203 	  assemble_external_libcall (ops[1]);
21204 	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21205 	}
21206 
21207       return "";
21208     }
21209 
21210   gcc_assert (!cfun->calls_alloca || really_return);
21211 
21212   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21213 
21214   cfun->machine->return_used_this_function = 1;
21215 
21216   offsets = arm_get_frame_offsets ();
21217   live_regs_mask = offsets->saved_regs_mask;
21218 
21219   if (!simple_return && live_regs_mask)
21220     {
21221       const char * return_reg;
21222 
21223       /* If we do not have any special requirements for function exit
21224 	 (e.g. interworking) then we can load the return address
21225 	 directly into the PC.  Otherwise we must load it into LR.  */
21226       if (really_return
21227 	  && !IS_CMSE_ENTRY (func_type)
21228 	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21229 	return_reg = reg_names[PC_REGNUM];
21230       else
21231 	return_reg = reg_names[LR_REGNUM];
21232 
21233       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21234 	{
21235 	  /* There are three possible reasons for the IP register
21236 	     being saved.  1) a stack frame was created, in which case
21237 	     IP contains the old stack pointer, or 2) an ISR routine
21238 	     corrupted it, or 3) it was saved to align the stack on
21239 	     iWMMXt.  In case 1, restore IP into SP, otherwise just
21240 	     restore IP.  */
21241 	  if (frame_pointer_needed)
21242 	    {
21243 	      live_regs_mask &= ~ (1 << IP_REGNUM);
21244 	      live_regs_mask |=   (1 << SP_REGNUM);
21245 	    }
21246 	  else
21247 	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21248 	}
21249 
21250       /* On some ARM architectures it is faster to use LDR rather than
21251 	 LDM to load a single register.  On other architectures, the
21252 	 cost is the same.  In 26 bit mode, or for exception handlers,
21253 	 we have to use LDM to load the PC so that the CPSR is also
21254 	 restored.  */
21255       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21256 	if (live_regs_mask == (1U << reg))
21257 	  break;
21258 
21259       if (reg <= LAST_ARM_REGNUM
21260 	  && (reg != LR_REGNUM
21261 	      || ! really_return
21262 	      || ! IS_INTERRUPT (func_type)))
21263 	{
21264 	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21265 		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21266 	}
21267       else
21268 	{
21269 	  char *p;
21270 	  int first = 1;
21271 
21272 	  /* Generate the load multiple instruction to restore the
21273 	     registers.  Note we can get here, even if
21274 	     frame_pointer_needed is true, but only if sp already
21275 	     points to the base of the saved core registers.  */
21276 	  if (live_regs_mask & (1 << SP_REGNUM))
21277 	    {
21278 	      unsigned HOST_WIDE_INT stack_adjust;
21279 
21280 	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21281 	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21282 
21283 	      if (stack_adjust && arm_arch5t && TARGET_ARM)
21284 		  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21285 	      else
21286 		{
21287 		  /* If we can't use ldmib (SA110 bug),
21288 		     then try to pop r3 instead.  */
21289 		  if (stack_adjust)
21290 		    live_regs_mask |= 1 << 3;
21291 
21292 		  sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21293 		}
21294 	    }
21295 	  /* For interrupt returns we have to use an LDM rather than
21296 	     a POP so that we can use the exception return variant.  */
21297 	  else if (IS_INTERRUPT (func_type))
21298 	    sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21299 	  else
21300 	    sprintf (instr, "pop%s\t{", conditional);
21301 
21302 	  p = instr + strlen (instr);
21303 
21304 	  for (reg = 0; reg <= SP_REGNUM; reg++)
21305 	    if (live_regs_mask & (1 << reg))
21306 	      {
21307 		int l = strlen (reg_names[reg]);
21308 
21309 		if (first)
21310 		  first = 0;
21311 		else
21312 		  {
21313 		    memcpy (p, ", ", 2);
21314 		    p += 2;
21315 		  }
21316 
21317 		memcpy (p, "%|", 2);
21318 		memcpy (p + 2, reg_names[reg], l);
21319 		p += l + 2;
21320 	      }
21321 
21322 	  if (live_regs_mask & (1 << LR_REGNUM))
21323 	    {
21324 	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21325 	      /* If returning from an interrupt, restore the CPSR.  */
21326 	      if (IS_INTERRUPT (func_type))
21327 		strcat (p, "^");
21328 	    }
21329 	  else
21330 	    strcpy (p, "}");
21331 	}
21332 
21333       output_asm_insn (instr, & operand);
21334 
21335       /* See if we need to generate an extra instruction to
21336 	 perform the actual function return.  */
21337       if (really_return
21338 	  && func_type != ARM_FT_INTERWORKED
21339 	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21340 	{
21341 	  /* The return has already been handled
21342 	     by loading the LR into the PC.  */
21343           return "";
21344 	}
21345     }
21346 
21347   if (really_return)
21348     {
21349       switch ((int) ARM_FUNC_TYPE (func_type))
21350 	{
21351 	case ARM_FT_ISR:
21352 	case ARM_FT_FIQ:
21353 	  /* ??? This is wrong for unified assembly syntax.  */
21354 	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21355 	  break;
21356 
21357 	case ARM_FT_INTERWORKED:
21358 	  gcc_assert (arm_arch5t || arm_arch4t);
21359 	  sprintf (instr, "bx%s\t%%|lr", conditional);
21360 	  break;
21361 
21362 	case ARM_FT_EXCEPTION:
21363 	  /* ??? This is wrong for unified assembly syntax.  */
21364 	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21365 	  break;
21366 
21367 	default:
21368 	  if (IS_CMSE_ENTRY (func_type))
21369 	    {
21370 	      /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21371 		 emitted by cmse_nonsecure_entry_clear_before_return () and the
21372 		 VSTR/VLDR instructions in the prologue and epilogue.  */
21373 	      if (!TARGET_HAVE_FPCXT_CMSE)
21374 		{
21375 		  /* Check if we have to clear the 'GE bits' which is only used if
21376 		     parallel add and subtraction instructions are available.  */
21377 		  if (TARGET_INT_SIMD)
21378 		    snprintf (instr, sizeof (instr),
21379 			      "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21380 		  else
21381 		    snprintf (instr, sizeof (instr),
21382 			      "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21383 
21384 		  output_asm_insn (instr, & operand);
21385 		  /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21386 		     care of it.  */
21387 		  if (TARGET_HARD_FLOAT)
21388 		    {
21389 		      /* Clear the cumulative exception-status bits (0-4,7) and
21390 			 the condition code bits (28-31) of the FPSCR.  We need
21391 			 to remember to clear the first scratch register used
21392 			 (IP) and save and restore the second (r4).
21393 
21394 			 Important note: the length of the
21395 			 thumb2_cmse_entry_return insn pattern must account for
21396 			 the size of the below instructions.  */
21397 		      output_asm_insn ("push\t{%|r4}", & operand);
21398 		      output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21399 		      output_asm_insn ("movw\t%|r4, #65376", & operand);
21400 		      output_asm_insn ("movt\t%|r4, #4095", & operand);
21401 		      output_asm_insn ("and\t%|ip, %|r4", & operand);
21402 		      output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21403 		      output_asm_insn ("pop\t{%|r4}", & operand);
21404 		      output_asm_insn ("mov\t%|ip, %|lr", & operand);
21405 		    }
21406 		}
21407 	      snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21408 	    }
21409 	  /* Use bx if it's available.  */
21410 	  else if (arm_arch5t || arm_arch4t)
21411 	    sprintf (instr, "bx%s\t%%|lr", conditional);
21412 	  else
21413 	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21414 	  break;
21415 	}
21416 
21417       output_asm_insn (instr, & operand);
21418     }
21419 
21420   return "";
21421 }
21422 
21423 /* Output in FILE asm statements needed to declare the NAME of the function
21424    defined by its DECL node.  */
21425 
21426 void
arm_asm_declare_function_name(FILE * file,const char * name,tree decl)21427 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21428 {
21429   size_t cmse_name_len;
21430   char *cmse_name = 0;
21431   char cmse_prefix[] = "__acle_se_";
21432 
21433   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21434      extra function label for each function with the 'cmse_nonsecure_entry'
21435      attribute.  This extra function label should be prepended with
21436      '__acle_se_', telling the linker that it needs to create secure gateway
21437      veneers for this function.  */
21438   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21439 				    DECL_ATTRIBUTES (decl)))
21440     {
21441       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21442       cmse_name = XALLOCAVEC (char, cmse_name_len);
21443       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21444       targetm.asm_out.globalize_label (file, cmse_name);
21445 
21446       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21447       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21448     }
21449 
21450   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21451   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21452   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21453   ASM_OUTPUT_LABEL (file, name);
21454 
21455   if (cmse_name)
21456     ASM_OUTPUT_LABEL (file, cmse_name);
21457 
21458   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21459 }
21460 
21461 /* Write the function name into the code section, directly preceding
21462    the function prologue.
21463 
21464    Code will be output similar to this:
21465      t0
21466 	 .ascii "arm_poke_function_name", 0
21467 	 .align
21468      t1
21469 	 .word 0xff000000 + (t1 - t0)
21470      arm_poke_function_name
21471 	 mov     ip, sp
21472 	 stmfd   sp!, {fp, ip, lr, pc}
21473 	 sub     fp, ip, #4
21474 
21475    When performing a stack backtrace, code can inspect the value
21476    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21477    at location pc - 12 and the top 8 bits are set, then we know
21478    that there is a function name embedded immediately preceding this
21479    location and has length ((pc[-3]) & 0xff000000).
21480 
21481    We assume that pc is declared as a pointer to an unsigned long.
21482 
21483    It is of no benefit to output the function name if we are assembling
21484    a leaf function.  These function types will not contain a stack
21485    backtrace structure, therefore it is not possible to determine the
21486    function name.  */
21487 void
arm_poke_function_name(FILE * stream,const char * name)21488 arm_poke_function_name (FILE *stream, const char *name)
21489 {
21490   unsigned long alignlength;
21491   unsigned long length;
21492   rtx           x;
21493 
21494   length      = strlen (name) + 1;
21495   alignlength = ROUND_UP_WORD (length);
21496 
21497   ASM_OUTPUT_ASCII (stream, name, length);
21498   ASM_OUTPUT_ALIGN (stream, 2);
21499   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21500   assemble_aligned_integer (UNITS_PER_WORD, x);
21501 }
21502 
21503 /* Place some comments into the assembler stream
21504    describing the current function.  */
21505 static void
arm_output_function_prologue(FILE * f)21506 arm_output_function_prologue (FILE *f)
21507 {
21508   unsigned long func_type;
21509 
21510   /* Sanity check.  */
21511   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21512 
21513   func_type = arm_current_func_type ();
21514 
21515   switch ((int) ARM_FUNC_TYPE (func_type))
21516     {
21517     default:
21518     case ARM_FT_NORMAL:
21519       break;
21520     case ARM_FT_INTERWORKED:
21521       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21522       break;
21523     case ARM_FT_ISR:
21524       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21525       break;
21526     case ARM_FT_FIQ:
21527       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21528       break;
21529     case ARM_FT_EXCEPTION:
21530       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21531       break;
21532     }
21533 
21534   if (IS_NAKED (func_type))
21535     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21536 
21537   if (IS_VOLATILE (func_type))
21538     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21539 
21540   if (IS_NESTED (func_type))
21541     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21542   if (IS_STACKALIGN (func_type))
21543     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21544   if (IS_CMSE_ENTRY (func_type))
21545     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21546 
21547   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21548 	       (HOST_WIDE_INT) crtl->args.size,
21549 	       crtl->args.pretend_args_size,
21550 	       (HOST_WIDE_INT) get_frame_size ());
21551 
21552   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21553 	       frame_pointer_needed,
21554 	       cfun->machine->uses_anonymous_args);
21555 
21556   if (cfun->machine->lr_save_eliminated)
21557     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21558 
21559   if (crtl->calls_eh_return)
21560     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21561 
21562 }
21563 
21564 static void
arm_output_function_epilogue(FILE *)21565 arm_output_function_epilogue (FILE *)
21566 {
21567   arm_stack_offsets *offsets;
21568 
21569   if (TARGET_THUMB1)
21570     {
21571       int regno;
21572 
21573       /* Emit any call-via-reg trampolines that are needed for v4t support
21574 	 of call_reg and call_value_reg type insns.  */
21575       for (regno = 0; regno < LR_REGNUM; regno++)
21576 	{
21577 	  rtx label = cfun->machine->call_via[regno];
21578 
21579 	  if (label != NULL)
21580 	    {
21581 	      switch_to_section (function_section (current_function_decl));
21582 	      targetm.asm_out.internal_label (asm_out_file, "L",
21583 					      CODE_LABEL_NUMBER (label));
21584 	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21585 	    }
21586 	}
21587 
21588       /* ??? Probably not safe to set this here, since it assumes that a
21589 	 function will be emitted as assembly immediately after we generate
21590 	 RTL for it.  This does not happen for inline functions.  */
21591       cfun->machine->return_used_this_function = 0;
21592     }
21593   else /* TARGET_32BIT */
21594     {
21595       /* We need to take into account any stack-frame rounding.  */
21596       offsets = arm_get_frame_offsets ();
21597 
21598       gcc_assert (!use_return_insn (FALSE, NULL)
21599 		  || (cfun->machine->return_used_this_function != 0)
21600 		  || offsets->saved_regs == offsets->outgoing_args
21601 		  || frame_pointer_needed);
21602     }
21603 }
21604 
21605 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21606    STR and STRD.  If an even number of registers are being pushed, one
21607    or more STRD patterns are created for each register pair.  If an
21608    odd number of registers are pushed, emit an initial STR followed by
21609    as many STRD instructions as are needed.  This works best when the
21610    stack is initially 64-bit aligned (the normal case), since it
21611    ensures that each STRD is also 64-bit aligned.  */
21612 static void
thumb2_emit_strd_push(unsigned long saved_regs_mask)21613 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21614 {
21615   int num_regs = 0;
21616   int i;
21617   int regno;
21618   rtx par = NULL_RTX;
21619   rtx dwarf = NULL_RTX;
21620   rtx tmp;
21621   bool first = true;
21622 
21623   num_regs = bit_count (saved_regs_mask);
21624 
21625   /* Must be at least one register to save, and can't save SP or PC.  */
21626   gcc_assert (num_regs > 0 && num_regs <= 14);
21627   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21628   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21629 
21630   /* Create sequence for DWARF info.  All the frame-related data for
21631      debugging is held in this wrapper.  */
21632   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21633 
21634   /* Describe the stack adjustment.  */
21635   tmp = gen_rtx_SET (stack_pointer_rtx,
21636 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21637   RTX_FRAME_RELATED_P (tmp) = 1;
21638   XVECEXP (dwarf, 0, 0) = tmp;
21639 
21640   /* Find the first register.  */
21641   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21642     ;
21643 
21644   i = 0;
21645 
21646   /* If there's an odd number of registers to push.  Start off by
21647      pushing a single register.  This ensures that subsequent strd
21648      operations are dword aligned (assuming that SP was originally
21649      64-bit aligned).  */
21650   if ((num_regs & 1) != 0)
21651     {
21652       rtx reg, mem, insn;
21653 
21654       reg = gen_rtx_REG (SImode, regno);
21655       if (num_regs == 1)
21656 	mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21657 						     stack_pointer_rtx));
21658       else
21659 	mem = gen_frame_mem (Pmode,
21660 			     gen_rtx_PRE_MODIFY
21661 			     (Pmode, stack_pointer_rtx,
21662 			      plus_constant (Pmode, stack_pointer_rtx,
21663 					     -4 * num_regs)));
21664 
21665       tmp = gen_rtx_SET (mem, reg);
21666       RTX_FRAME_RELATED_P (tmp) = 1;
21667       insn = emit_insn (tmp);
21668       RTX_FRAME_RELATED_P (insn) = 1;
21669       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21670       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21671       RTX_FRAME_RELATED_P (tmp) = 1;
21672       i++;
21673       regno++;
21674       XVECEXP (dwarf, 0, i) = tmp;
21675       first = false;
21676     }
21677 
21678   while (i < num_regs)
21679     if (saved_regs_mask & (1 << regno))
21680       {
21681 	rtx reg1, reg2, mem1, mem2;
21682 	rtx tmp0, tmp1, tmp2;
21683 	int regno2;
21684 
21685 	/* Find the register to pair with this one.  */
21686 	for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21687 	     regno2++)
21688 	  ;
21689 
21690 	reg1 = gen_rtx_REG (SImode, regno);
21691 	reg2 = gen_rtx_REG (SImode, regno2);
21692 
21693 	if (first)
21694 	  {
21695 	    rtx insn;
21696 
21697 	    first = false;
21698 	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21699 							stack_pointer_rtx,
21700 							-4 * num_regs));
21701 	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21702 							stack_pointer_rtx,
21703 							-4 * (num_regs - 1)));
21704 	    tmp0 = gen_rtx_SET (stack_pointer_rtx,
21705 				plus_constant (Pmode, stack_pointer_rtx,
21706 					       -4 * (num_regs)));
21707 	    tmp1 = gen_rtx_SET (mem1, reg1);
21708 	    tmp2 = gen_rtx_SET (mem2, reg2);
21709 	    RTX_FRAME_RELATED_P (tmp0) = 1;
21710 	    RTX_FRAME_RELATED_P (tmp1) = 1;
21711 	    RTX_FRAME_RELATED_P (tmp2) = 1;
21712 	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21713 	    XVECEXP (par, 0, 0) = tmp0;
21714 	    XVECEXP (par, 0, 1) = tmp1;
21715 	    XVECEXP (par, 0, 2) = tmp2;
21716 	    insn = emit_insn (par);
21717 	    RTX_FRAME_RELATED_P (insn) = 1;
21718 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21719 	  }
21720 	else
21721 	  {
21722 	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21723 							stack_pointer_rtx,
21724 							4 * i));
21725 	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21726 							stack_pointer_rtx,
21727 							4 * (i + 1)));
21728 	    tmp1 = gen_rtx_SET (mem1, reg1);
21729 	    tmp2 = gen_rtx_SET (mem2, reg2);
21730 	    RTX_FRAME_RELATED_P (tmp1) = 1;
21731 	    RTX_FRAME_RELATED_P (tmp2) = 1;
21732 	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21733 	    XVECEXP (par, 0, 0) = tmp1;
21734 	    XVECEXP (par, 0, 1) = tmp2;
21735 	    emit_insn (par);
21736 	  }
21737 
21738 	/* Create unwind information.  This is an approximation.  */
21739 	tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21740 					   plus_constant (Pmode,
21741 							  stack_pointer_rtx,
21742 							  4 * i)),
21743 			    reg1);
21744 	tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21745 					   plus_constant (Pmode,
21746 							  stack_pointer_rtx,
21747 							  4 * (i + 1))),
21748 			    reg2);
21749 
21750 	RTX_FRAME_RELATED_P (tmp1) = 1;
21751 	RTX_FRAME_RELATED_P (tmp2) = 1;
21752 	XVECEXP (dwarf, 0, i + 1) = tmp1;
21753 	XVECEXP (dwarf, 0, i + 2) = tmp2;
21754 	i += 2;
21755 	regno = regno2 + 1;
21756       }
21757     else
21758       regno++;
21759 
21760   return;
21761 }
21762 
21763 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
21764    whenever possible, otherwise it emits single-word stores.  The first store
21765    also allocates stack space for all saved registers, using writeback with
21766    post-addressing mode.  All other stores use offset addressing.  If no STRD
21767    can be emitted, this function emits a sequence of single-word stores,
21768    and not an STM as before, because single-word stores provide more freedom
21769    scheduling and can be turned into an STM by peephole optimizations.  */
21770 static void
arm_emit_strd_push(unsigned long saved_regs_mask)21771 arm_emit_strd_push (unsigned long saved_regs_mask)
21772 {
21773   int num_regs = 0;
21774   int i, j, dwarf_index  = 0;
21775   int offset = 0;
21776   rtx dwarf = NULL_RTX;
21777   rtx insn = NULL_RTX;
21778   rtx tmp, mem;
21779 
21780   /* TODO: A more efficient code can be emitted by changing the
21781      layout, e.g., first push all pairs that can use STRD to keep the
21782      stack aligned, and then push all other registers.  */
21783   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21784     if (saved_regs_mask & (1 << i))
21785       num_regs++;
21786 
21787   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21788   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21789   gcc_assert (num_regs > 0);
21790 
21791   /* Create sequence for DWARF info.  */
21792   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21793 
21794   /* For dwarf info, we generate explicit stack update.  */
21795   tmp = gen_rtx_SET (stack_pointer_rtx,
21796                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21797   RTX_FRAME_RELATED_P (tmp) = 1;
21798   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21799 
21800   /* Save registers.  */
21801   offset = - 4 * num_regs;
21802   j = 0;
21803   while (j <= LAST_ARM_REGNUM)
21804     if (saved_regs_mask & (1 << j))
21805       {
21806         if ((j % 2 == 0)
21807             && (saved_regs_mask & (1 << (j + 1))))
21808           {
21809             /* Current register and previous register form register pair for
21810                which STRD can be generated.  */
21811             if (offset < 0)
21812               {
21813                 /* Allocate stack space for all saved registers.  */
21814                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21815                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21816                 mem = gen_frame_mem (DImode, tmp);
21817                 offset = 0;
21818               }
21819             else if (offset > 0)
21820               mem = gen_frame_mem (DImode,
21821                                    plus_constant (Pmode,
21822                                                   stack_pointer_rtx,
21823                                                   offset));
21824             else
21825               mem = gen_frame_mem (DImode, stack_pointer_rtx);
21826 
21827             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
21828             RTX_FRAME_RELATED_P (tmp) = 1;
21829             tmp = emit_insn (tmp);
21830 
21831             /* Record the first store insn.  */
21832             if (dwarf_index == 1)
21833               insn = tmp;
21834 
21835             /* Generate dwarf info.  */
21836             mem = gen_frame_mem (SImode,
21837                                  plus_constant (Pmode,
21838                                                 stack_pointer_rtx,
21839                                                 offset));
21840             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21841             RTX_FRAME_RELATED_P (tmp) = 1;
21842             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21843 
21844             mem = gen_frame_mem (SImode,
21845                                  plus_constant (Pmode,
21846                                                 stack_pointer_rtx,
21847                                                 offset + 4));
21848             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
21849             RTX_FRAME_RELATED_P (tmp) = 1;
21850             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21851 
21852             offset += 8;
21853             j += 2;
21854           }
21855         else
21856           {
21857             /* Emit a single word store.  */
21858             if (offset < 0)
21859               {
21860                 /* Allocate stack space for all saved registers.  */
21861                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21862                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21863                 mem = gen_frame_mem (SImode, tmp);
21864                 offset = 0;
21865               }
21866             else if (offset > 0)
21867               mem = gen_frame_mem (SImode,
21868                                    plus_constant (Pmode,
21869                                                   stack_pointer_rtx,
21870                                                   offset));
21871             else
21872               mem = gen_frame_mem (SImode, stack_pointer_rtx);
21873 
21874             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21875             RTX_FRAME_RELATED_P (tmp) = 1;
21876             tmp = emit_insn (tmp);
21877 
21878             /* Record the first store insn.  */
21879             if (dwarf_index == 1)
21880               insn = tmp;
21881 
21882             /* Generate dwarf info.  */
21883             mem = gen_frame_mem (SImode,
21884                                  plus_constant(Pmode,
21885                                                stack_pointer_rtx,
21886                                                offset));
21887             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21888             RTX_FRAME_RELATED_P (tmp) = 1;
21889             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21890 
21891             offset += 4;
21892             j += 1;
21893           }
21894       }
21895     else
21896       j++;
21897 
21898   /* Attach dwarf info to the first insn we generate.  */
21899   gcc_assert (insn != NULL_RTX);
21900   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21901   RTX_FRAME_RELATED_P (insn) = 1;
21902 }
21903 
21904 /* Generate and emit an insn that we will recognize as a push_multi.
21905    Unfortunately, since this insn does not reflect very well the actual
21906    semantics of the operation, we need to annotate the insn for the benefit
21907    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
21908    MASK for registers that should be annotated for DWARF2 frame unwind
21909    information.  */
21910 static rtx
emit_multi_reg_push(unsigned long mask,unsigned long dwarf_regs_mask)21911 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
21912 {
21913   int num_regs = 0;
21914   int num_dwarf_regs = 0;
21915   int i, j;
21916   rtx par;
21917   rtx dwarf;
21918   int dwarf_par_index;
21919   rtx tmp, reg;
21920 
21921   /* We don't record the PC in the dwarf frame information.  */
21922   dwarf_regs_mask &= ~(1 << PC_REGNUM);
21923 
21924   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21925     {
21926       if (mask & (1 << i))
21927 	num_regs++;
21928       if (dwarf_regs_mask & (1 << i))
21929 	num_dwarf_regs++;
21930     }
21931 
21932   gcc_assert (num_regs && num_regs <= 16);
21933   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
21934 
21935   /* For the body of the insn we are going to generate an UNSPEC in
21936      parallel with several USEs.  This allows the insn to be recognized
21937      by the push_multi pattern in the arm.md file.
21938 
21939      The body of the insn looks something like this:
21940 
21941        (parallel [
21942            (set (mem:BLK (pre_modify:SI (reg:SI sp)
21943 	                                (const_int:SI <num>)))
21944 	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
21945            (use (reg:SI XX))
21946            (use (reg:SI YY))
21947 	   ...
21948         ])
21949 
21950      For the frame note however, we try to be more explicit and actually
21951      show each register being stored into the stack frame, plus a (single)
21952      decrement of the stack pointer.  We do it this way in order to be
21953      friendly to the stack unwinding code, which only wants to see a single
21954      stack decrement per instruction.  The RTL we generate for the note looks
21955      something like this:
21956 
21957       (sequence [
21958            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
21959            (set (mem:SI (reg:SI sp)) (reg:SI r4))
21960            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
21961            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
21962 	   ...
21963         ])
21964 
21965      FIXME:: In an ideal world the PRE_MODIFY would not exist and
21966      instead we'd have a parallel expression detailing all
21967      the stores to the various memory addresses so that debug
21968      information is more up-to-date. Remember however while writing
21969      this to take care of the constraints with the push instruction.
21970 
21971      Note also that this has to be taken care of for the VFP registers.
21972 
21973      For more see PR43399.  */
21974 
21975   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
21976   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
21977   dwarf_par_index = 1;
21978 
21979   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21980     {
21981       if (mask & (1 << i))
21982 	{
21983 	  reg = gen_rtx_REG (SImode, i);
21984 
21985 	  XVECEXP (par, 0, 0)
21986 	    = gen_rtx_SET (gen_frame_mem
21987 			   (BLKmode,
21988 			    gen_rtx_PRE_MODIFY (Pmode,
21989 						stack_pointer_rtx,
21990 						plus_constant
21991 						(Pmode, stack_pointer_rtx,
21992 						 -4 * num_regs))
21993 			    ),
21994 			   gen_rtx_UNSPEC (BLKmode,
21995 					   gen_rtvec (1, reg),
21996 					   UNSPEC_PUSH_MULT));
21997 
21998 	  if (dwarf_regs_mask & (1 << i))
21999 	    {
22000 	      tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22001 				 reg);
22002 	      RTX_FRAME_RELATED_P (tmp) = 1;
22003 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22004 	    }
22005 
22006 	  break;
22007 	}
22008     }
22009 
22010   for (j = 1, i++; j < num_regs; i++)
22011     {
22012       if (mask & (1 << i))
22013 	{
22014 	  reg = gen_rtx_REG (SImode, i);
22015 
22016 	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22017 
22018 	  if (dwarf_regs_mask & (1 << i))
22019 	    {
22020 	      tmp
22021 		= gen_rtx_SET (gen_frame_mem
22022 			       (SImode,
22023 				plus_constant (Pmode, stack_pointer_rtx,
22024 					       4 * j)),
22025 			       reg);
22026 	      RTX_FRAME_RELATED_P (tmp) = 1;
22027 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22028 	    }
22029 
22030 	  j++;
22031 	}
22032     }
22033 
22034   par = emit_insn (par);
22035 
22036   tmp = gen_rtx_SET (stack_pointer_rtx,
22037 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22038   RTX_FRAME_RELATED_P (tmp) = 1;
22039   XVECEXP (dwarf, 0, 0) = tmp;
22040 
22041   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22042 
22043   return par;
22044 }
22045 
22046 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22047    SIZE is the offset to be adjusted.
22048    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22049 static void
arm_add_cfa_adjust_cfa_note(rtx insn,int size,rtx dest,rtx src)22050 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22051 {
22052   rtx dwarf;
22053 
22054   RTX_FRAME_RELATED_P (insn) = 1;
22055   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22056   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22057 }
22058 
22059 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22060    SAVED_REGS_MASK shows which registers need to be restored.
22061 
22062    Unfortunately, since this insn does not reflect very well the actual
22063    semantics of the operation, we need to annotate the insn for the benefit
22064    of DWARF2 frame unwind information.  */
22065 static void
arm_emit_multi_reg_pop(unsigned long saved_regs_mask)22066 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22067 {
22068   int num_regs = 0;
22069   int i, j;
22070   rtx par;
22071   rtx dwarf = NULL_RTX;
22072   rtx tmp, reg;
22073   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22074   int offset_adj;
22075   int emit_update;
22076 
22077   offset_adj = return_in_pc ? 1 : 0;
22078   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22079     if (saved_regs_mask & (1 << i))
22080       num_regs++;
22081 
22082   gcc_assert (num_regs && num_regs <= 16);
22083 
22084   /* If SP is in reglist, then we don't emit SP update insn.  */
22085   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22086 
22087   /* The parallel needs to hold num_regs SETs
22088      and one SET for the stack update.  */
22089   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22090 
22091   if (return_in_pc)
22092     XVECEXP (par, 0, 0) = ret_rtx;
22093 
22094   if (emit_update)
22095     {
22096       /* Increment the stack pointer, based on there being
22097          num_regs 4-byte registers to restore.  */
22098       tmp = gen_rtx_SET (stack_pointer_rtx,
22099                          plus_constant (Pmode,
22100                                         stack_pointer_rtx,
22101                                         4 * num_regs));
22102       RTX_FRAME_RELATED_P (tmp) = 1;
22103       XVECEXP (par, 0, offset_adj) = tmp;
22104     }
22105 
22106   /* Now restore every reg, which may include PC.  */
22107   for (j = 0, i = 0; j < num_regs; i++)
22108     if (saved_regs_mask & (1 << i))
22109       {
22110         reg = gen_rtx_REG (SImode, i);
22111         if ((num_regs == 1) && emit_update && !return_in_pc)
22112           {
22113             /* Emit single load with writeback.  */
22114             tmp = gen_frame_mem (SImode,
22115                                  gen_rtx_POST_INC (Pmode,
22116                                                    stack_pointer_rtx));
22117             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22118             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22119             return;
22120           }
22121 
22122         tmp = gen_rtx_SET (reg,
22123                            gen_frame_mem
22124                            (SImode,
22125                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22126         RTX_FRAME_RELATED_P (tmp) = 1;
22127         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22128 
22129         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22130            should not have PC, skip PC.  */
22131         if (i != PC_REGNUM)
22132           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22133 
22134         j++;
22135       }
22136 
22137   if (return_in_pc)
22138     par = emit_jump_insn (par);
22139   else
22140     par = emit_insn (par);
22141 
22142   REG_NOTES (par) = dwarf;
22143   if (!return_in_pc)
22144     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22145 				 stack_pointer_rtx, stack_pointer_rtx);
22146 }
22147 
22148 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22149    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22150 
22151    Unfortunately, since this insn does not reflect very well the actual
22152    semantics of the operation, we need to annotate the insn for the benefit
22153    of DWARF2 frame unwind information.  */
22154 static void
arm_emit_vfp_multi_reg_pop(int first_reg,int num_regs,rtx base_reg)22155 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22156 {
22157   int i, j;
22158   rtx par;
22159   rtx dwarf = NULL_RTX;
22160   rtx tmp, reg;
22161 
22162   gcc_assert (num_regs && num_regs <= 32);
22163 
22164     /* Workaround ARM10 VFPr1 bug.  */
22165   if (num_regs == 2 && !arm_arch6)
22166     {
22167       if (first_reg == 15)
22168         first_reg--;
22169 
22170       num_regs++;
22171     }
22172 
22173   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22174      there could be up to 32 D-registers to restore.
22175      If there are more than 16 D-registers, make two recursive calls,
22176      each of which emits one pop_multi instruction.  */
22177   if (num_regs > 16)
22178     {
22179       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22180       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22181       return;
22182     }
22183 
22184   /* The parallel needs to hold num_regs SETs
22185      and one SET for the stack update.  */
22186   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22187 
22188   /* Increment the stack pointer, based on there being
22189      num_regs 8-byte registers to restore.  */
22190   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22191   RTX_FRAME_RELATED_P (tmp) = 1;
22192   XVECEXP (par, 0, 0) = tmp;
22193 
22194   /* Now show every reg that will be restored, using a SET for each.  */
22195   for (j = 0, i=first_reg; j < num_regs; i += 2)
22196     {
22197       reg = gen_rtx_REG (DFmode, i);
22198 
22199       tmp = gen_rtx_SET (reg,
22200                          gen_frame_mem
22201                          (DFmode,
22202                           plus_constant (Pmode, base_reg, 8 * j)));
22203       RTX_FRAME_RELATED_P (tmp) = 1;
22204       XVECEXP (par, 0, j + 1) = tmp;
22205 
22206       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22207 
22208       j++;
22209     }
22210 
22211   par = emit_insn (par);
22212   REG_NOTES (par) = dwarf;
22213 
22214   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22215   if (REGNO (base_reg) == IP_REGNUM)
22216     {
22217       RTX_FRAME_RELATED_P (par) = 1;
22218       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22219     }
22220   else
22221     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22222 				 base_reg, base_reg);
22223 }
22224 
22225 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22226    number of registers are being popped, multiple LDRD patterns are created for
22227    all register pairs.  If odd number of registers are popped, last register is
22228    loaded by using LDR pattern.  */
22229 static void
thumb2_emit_ldrd_pop(unsigned long saved_regs_mask)22230 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22231 {
22232   int num_regs = 0;
22233   int i, j;
22234   rtx par = NULL_RTX;
22235   rtx dwarf = NULL_RTX;
22236   rtx tmp, reg, tmp1;
22237   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22238 
22239   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22240     if (saved_regs_mask & (1 << i))
22241       num_regs++;
22242 
22243   gcc_assert (num_regs && num_regs <= 16);
22244 
22245   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22246      to be popped.  So, if num_regs is even, now it will become odd,
22247      and we can generate pop with PC.  If num_regs is odd, it will be
22248      even now, and ldr with return can be generated for PC.  */
22249   if (return_in_pc)
22250     num_regs--;
22251 
22252   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22253 
22254   /* Var j iterates over all the registers to gather all the registers in
22255      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22256      A PARALLEL RTX of register-pair is created here, so that pattern for
22257      LDRD can be matched.  As PC is always last register to be popped, and
22258      we have already decremented num_regs if PC, we don't have to worry
22259      about PC in this loop.  */
22260   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22261     if (saved_regs_mask & (1 << j))
22262       {
22263         /* Create RTX for memory load.  */
22264         reg = gen_rtx_REG (SImode, j);
22265         tmp = gen_rtx_SET (reg,
22266                            gen_frame_mem (SImode,
22267                                plus_constant (Pmode,
22268                                               stack_pointer_rtx, 4 * i)));
22269         RTX_FRAME_RELATED_P (tmp) = 1;
22270 
22271         if (i % 2 == 0)
22272           {
22273             /* When saved-register index (i) is even, the RTX to be emitted is
22274                yet to be created.  Hence create it first.  The LDRD pattern we
22275                are generating is :
22276                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22277                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22278                where target registers need not be consecutive.  */
22279             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22280             dwarf = NULL_RTX;
22281           }
22282 
22283         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22284            added as 0th element and if i is odd, reg_i is added as 1st element
22285            of LDRD pattern shown above.  */
22286         XVECEXP (par, 0, (i % 2)) = tmp;
22287         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22288 
22289         if ((i % 2) == 1)
22290           {
22291             /* When saved-register index (i) is odd, RTXs for both the registers
22292                to be loaded are generated in above given LDRD pattern, and the
22293                pattern can be emitted now.  */
22294             par = emit_insn (par);
22295             REG_NOTES (par) = dwarf;
22296 	    RTX_FRAME_RELATED_P (par) = 1;
22297           }
22298 
22299         i++;
22300       }
22301 
22302   /* If the number of registers pushed is odd AND return_in_pc is false OR
22303      number of registers are even AND return_in_pc is true, last register is
22304      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22305      then LDR with post increment.  */
22306 
22307   /* Increment the stack pointer, based on there being
22308      num_regs 4-byte registers to restore.  */
22309   tmp = gen_rtx_SET (stack_pointer_rtx,
22310                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22311   RTX_FRAME_RELATED_P (tmp) = 1;
22312   tmp = emit_insn (tmp);
22313   if (!return_in_pc)
22314     {
22315       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22316 				   stack_pointer_rtx, stack_pointer_rtx);
22317     }
22318 
22319   dwarf = NULL_RTX;
22320 
22321   if (((num_regs % 2) == 1 && !return_in_pc)
22322       || ((num_regs % 2) == 0 && return_in_pc))
22323     {
22324       /* Scan for the single register to be popped.  Skip until the saved
22325          register is found.  */
22326       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22327 
22328       /* Gen LDR with post increment here.  */
22329       tmp1 = gen_rtx_MEM (SImode,
22330                           gen_rtx_POST_INC (SImode,
22331                                             stack_pointer_rtx));
22332       set_mem_alias_set (tmp1, get_frame_alias_set ());
22333 
22334       reg = gen_rtx_REG (SImode, j);
22335       tmp = gen_rtx_SET (reg, tmp1);
22336       RTX_FRAME_RELATED_P (tmp) = 1;
22337       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22338 
22339       if (return_in_pc)
22340         {
22341           /* If return_in_pc, j must be PC_REGNUM.  */
22342           gcc_assert (j == PC_REGNUM);
22343           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22344           XVECEXP (par, 0, 0) = ret_rtx;
22345           XVECEXP (par, 0, 1) = tmp;
22346           par = emit_jump_insn (par);
22347         }
22348       else
22349         {
22350           par = emit_insn (tmp);
22351 	  REG_NOTES (par) = dwarf;
22352 	  arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22353 				       stack_pointer_rtx, stack_pointer_rtx);
22354         }
22355 
22356     }
22357   else if ((num_regs % 2) == 1 && return_in_pc)
22358     {
22359       /* There are 2 registers to be popped.  So, generate the pattern
22360          pop_multiple_with_stack_update_and_return to pop in PC.  */
22361       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22362     }
22363 
22364   return;
22365 }
22366 
22367 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22368    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22369    offset addressing and then generates one separate stack udpate. This provides
22370    more scheduling freedom, compared to writeback on every load.  However,
22371    if the function returns using load into PC directly
22372    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22373    before the last load.  TODO: Add a peephole optimization to recognize
22374    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22375    peephole optimization to merge the load at stack-offset zero
22376    with the stack update instruction using load with writeback
22377    in post-index addressing mode.  */
22378 static void
arm_emit_ldrd_pop(unsigned long saved_regs_mask)22379 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22380 {
22381   int j = 0;
22382   int offset = 0;
22383   rtx par = NULL_RTX;
22384   rtx dwarf = NULL_RTX;
22385   rtx tmp, mem;
22386 
22387   /* Restore saved registers.  */
22388   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22389   j = 0;
22390   while (j <= LAST_ARM_REGNUM)
22391     if (saved_regs_mask & (1 << j))
22392       {
22393         if ((j % 2) == 0
22394             && (saved_regs_mask & (1 << (j + 1)))
22395             && (j + 1) != PC_REGNUM)
22396           {
22397             /* Current register and next register form register pair for which
22398                LDRD can be generated. PC is always the last register popped, and
22399                we handle it separately.  */
22400             if (offset > 0)
22401               mem = gen_frame_mem (DImode,
22402                                    plus_constant (Pmode,
22403                                                   stack_pointer_rtx,
22404                                                   offset));
22405             else
22406               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22407 
22408             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22409             tmp = emit_insn (tmp);
22410 	    RTX_FRAME_RELATED_P (tmp) = 1;
22411 
22412             /* Generate dwarf info.  */
22413 
22414             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22415                                     gen_rtx_REG (SImode, j),
22416                                     NULL_RTX);
22417             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22418                                     gen_rtx_REG (SImode, j + 1),
22419                                     dwarf);
22420 
22421             REG_NOTES (tmp) = dwarf;
22422 
22423             offset += 8;
22424             j += 2;
22425           }
22426         else if (j != PC_REGNUM)
22427           {
22428             /* Emit a single word load.  */
22429             if (offset > 0)
22430               mem = gen_frame_mem (SImode,
22431                                    plus_constant (Pmode,
22432                                                   stack_pointer_rtx,
22433                                                   offset));
22434             else
22435               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22436 
22437             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22438             tmp = emit_insn (tmp);
22439 	    RTX_FRAME_RELATED_P (tmp) = 1;
22440 
22441             /* Generate dwarf info.  */
22442             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22443                                               gen_rtx_REG (SImode, j),
22444                                               NULL_RTX);
22445 
22446             offset += 4;
22447             j += 1;
22448           }
22449         else /* j == PC_REGNUM */
22450           j++;
22451       }
22452     else
22453       j++;
22454 
22455   /* Update the stack.  */
22456   if (offset > 0)
22457     {
22458       tmp = gen_rtx_SET (stack_pointer_rtx,
22459                          plus_constant (Pmode,
22460                                         stack_pointer_rtx,
22461                                         offset));
22462       tmp = emit_insn (tmp);
22463       arm_add_cfa_adjust_cfa_note (tmp, offset,
22464 				   stack_pointer_rtx, stack_pointer_rtx);
22465       offset = 0;
22466     }
22467 
22468   if (saved_regs_mask & (1 << PC_REGNUM))
22469     {
22470       /* Only PC is to be popped.  */
22471       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22472       XVECEXP (par, 0, 0) = ret_rtx;
22473       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22474                          gen_frame_mem (SImode,
22475                                         gen_rtx_POST_INC (SImode,
22476                                                           stack_pointer_rtx)));
22477       RTX_FRAME_RELATED_P (tmp) = 1;
22478       XVECEXP (par, 0, 1) = tmp;
22479       par = emit_jump_insn (par);
22480 
22481       /* Generate dwarf info.  */
22482       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22483                               gen_rtx_REG (SImode, PC_REGNUM),
22484                               NULL_RTX);
22485       REG_NOTES (par) = dwarf;
22486       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22487 				   stack_pointer_rtx, stack_pointer_rtx);
22488     }
22489 }
22490 
22491 /* Calculate the size of the return value that is passed in registers.  */
22492 static unsigned
arm_size_return_regs(void)22493 arm_size_return_regs (void)
22494 {
22495   machine_mode mode;
22496 
22497   if (crtl->return_rtx != 0)
22498     mode = GET_MODE (crtl->return_rtx);
22499   else
22500     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22501 
22502   return GET_MODE_SIZE (mode);
22503 }
22504 
22505 /* Return true if the current function needs to save/restore LR.  */
22506 static bool
thumb_force_lr_save(void)22507 thumb_force_lr_save (void)
22508 {
22509   return !cfun->machine->lr_save_eliminated
22510 	 && (!crtl->is_leaf
22511 	     || thumb_far_jump_used_p ()
22512 	     || df_regs_ever_live_p (LR_REGNUM));
22513 }
22514 
22515 /* We do not know if r3 will be available because
22516    we do have an indirect tailcall happening in this
22517    particular case.  */
22518 static bool
is_indirect_tailcall_p(rtx call)22519 is_indirect_tailcall_p (rtx call)
22520 {
22521   rtx pat = PATTERN (call);
22522 
22523   /* Indirect tail call.  */
22524   pat = XVECEXP (pat, 0, 0);
22525   if (GET_CODE (pat) == SET)
22526     pat = SET_SRC (pat);
22527 
22528   pat = XEXP (XEXP (pat, 0), 0);
22529   return REG_P (pat);
22530 }
22531 
22532 /* Return true if r3 is used by any of the tail call insns in the
22533    current function.  */
22534 static bool
any_sibcall_could_use_r3(void)22535 any_sibcall_could_use_r3 (void)
22536 {
22537   edge_iterator ei;
22538   edge e;
22539 
22540   if (!crtl->tail_call_emit)
22541     return false;
22542   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22543     if (e->flags & EDGE_SIBCALL)
22544       {
22545 	rtx_insn *call = BB_END (e->src);
22546 	if (!CALL_P (call))
22547 	  call = prev_nonnote_nondebug_insn (call);
22548 	gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22549 	if (find_regno_fusage (call, USE, 3)
22550 	    || is_indirect_tailcall_p (call))
22551 	  return true;
22552       }
22553   return false;
22554 }
22555 
22556 
22557 /* Compute the distance from register FROM to register TO.
22558    These can be the arg pointer (26), the soft frame pointer (25),
22559    the stack pointer (13) or the hard frame pointer (11).
22560    In thumb mode r7 is used as the soft frame pointer, if needed.
22561    Typical stack layout looks like this:
22562 
22563        old stack pointer -> |    |
22564                              ----
22565                             |    | \
22566                             |    |   saved arguments for
22567                             |    |   vararg functions
22568 			    |    | /
22569                               --
22570    hard FP & arg pointer -> |    | \
22571                             |    |   stack
22572                             |    |   frame
22573                             |    | /
22574                               --
22575                             |    | \
22576                             |    |   call saved
22577                             |    |   registers
22578       soft frame pointer -> |    | /
22579                               --
22580                             |    | \
22581                             |    |   local
22582                             |    |   variables
22583      locals base pointer -> |    | /
22584                               --
22585                             |    | \
22586                             |    |   outgoing
22587                             |    |   arguments
22588    current stack pointer -> |    | /
22589                               --
22590 
22591   For a given function some or all of these stack components
22592   may not be needed, giving rise to the possibility of
22593   eliminating some of the registers.
22594 
22595   The values returned by this function must reflect the behavior
22596   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22597 
22598   The sign of the number returned reflects the direction of stack
22599   growth, so the values are positive for all eliminations except
22600   from the soft frame pointer to the hard frame pointer.
22601 
22602   SFP may point just inside the local variables block to ensure correct
22603   alignment.  */
22604 
22605 
22606 /* Return cached stack offsets.  */
22607 
22608 static arm_stack_offsets *
arm_get_frame_offsets(void)22609 arm_get_frame_offsets (void)
22610 {
22611   struct arm_stack_offsets *offsets;
22612 
22613   offsets = &cfun->machine->stack_offsets;
22614 
22615   return offsets;
22616 }
22617 
22618 
22619 /* Calculate stack offsets.  These are used to calculate register elimination
22620    offsets and in prologue/epilogue code.  Also calculates which registers
22621    should be saved.  */
22622 
22623 static void
arm_compute_frame_layout(void)22624 arm_compute_frame_layout (void)
22625 {
22626   struct arm_stack_offsets *offsets;
22627   unsigned long func_type;
22628   int saved;
22629   int core_saved;
22630   HOST_WIDE_INT frame_size;
22631   int i;
22632 
22633   offsets = &cfun->machine->stack_offsets;
22634 
22635   /* Initially this is the size of the local variables.  It will translated
22636      into an offset once we have determined the size of preceding data.  */
22637   frame_size = ROUND_UP_WORD (get_frame_size ());
22638 
22639   /* Space for variadic functions.  */
22640   offsets->saved_args = crtl->args.pretend_args_size;
22641 
22642   /* In Thumb mode this is incorrect, but never used.  */
22643   offsets->frame
22644     = (offsets->saved_args
22645        + arm_compute_static_chain_stack_bytes ()
22646        + (frame_pointer_needed ? 4 : 0));
22647 
22648   if (TARGET_32BIT)
22649     {
22650       unsigned int regno;
22651 
22652       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22653       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22654       saved = core_saved;
22655 
22656       /* We know that SP will be doubleword aligned on entry, and we must
22657 	 preserve that condition at any subroutine call.  We also require the
22658 	 soft frame pointer to be doubleword aligned.  */
22659 
22660       if (TARGET_REALLY_IWMMXT)
22661 	{
22662 	  /* Check for the call-saved iWMMXt registers.  */
22663 	  for (regno = FIRST_IWMMXT_REGNUM;
22664 	       regno <= LAST_IWMMXT_REGNUM;
22665 	       regno++)
22666 	    if (df_regs_ever_live_p (regno)
22667 		&& !call_used_or_fixed_reg_p (regno))
22668 	      saved += 8;
22669 	}
22670 
22671       func_type = arm_current_func_type ();
22672       /* Space for saved VFP registers.  */
22673       if (! IS_VOLATILE (func_type)
22674 	  && TARGET_VFP_BASE)
22675 	saved += arm_get_vfp_saved_size ();
22676 
22677       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22678 	 nonecure entry functions with VSTR/VLDR.  */
22679       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22680 	saved += 4;
22681     }
22682   else /* TARGET_THUMB1 */
22683     {
22684       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22685       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22686       saved = core_saved;
22687       if (TARGET_BACKTRACE)
22688 	saved += 16;
22689     }
22690 
22691   /* Saved registers include the stack frame.  */
22692   offsets->saved_regs
22693     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22694   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22695 
22696   /* A leaf function does not need any stack alignment if it has nothing
22697      on the stack.  */
22698   if (crtl->is_leaf && frame_size == 0
22699       /* However if it calls alloca(), we have a dynamically allocated
22700 	 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
22701       && ! cfun->calls_alloca)
22702     {
22703       offsets->outgoing_args = offsets->soft_frame;
22704       offsets->locals_base = offsets->soft_frame;
22705       return;
22706     }
22707 
22708   /* Ensure SFP has the correct alignment.  */
22709   if (ARM_DOUBLEWORD_ALIGN
22710       && (offsets->soft_frame & 7))
22711     {
22712       offsets->soft_frame += 4;
22713       /* Try to align stack by pushing an extra reg.  Don't bother doing this
22714          when there is a stack frame as the alignment will be rolled into
22715 	 the normal stack adjustment.  */
22716       if (frame_size + crtl->outgoing_args_size == 0)
22717 	{
22718 	  int reg = -1;
22719 
22720 	  /* Register r3 is caller-saved.  Normally it does not need to be
22721 	     saved on entry by the prologue.  However if we choose to save
22722 	     it for padding then we may confuse the compiler into thinking
22723 	     a prologue sequence is required when in fact it is not.  This
22724 	     will occur when shrink-wrapping if r3 is used as a scratch
22725 	     register and there are no other callee-saved writes.
22726 
22727 	     This situation can be avoided when other callee-saved registers
22728 	     are available and r3 is not mandatory if we choose a callee-saved
22729 	     register for padding.  */
22730 	  bool prefer_callee_reg_p = false;
22731 
22732 	  /* If it is safe to use r3, then do so.  This sometimes
22733 	     generates better code on Thumb-2 by avoiding the need to
22734 	     use 32-bit push/pop instructions.  */
22735           if (! any_sibcall_could_use_r3 ()
22736 	      && arm_size_return_regs () <= 12
22737 	      && (offsets->saved_regs_mask & (1 << 3)) == 0
22738 	      && (TARGET_THUMB2
22739 		  || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22740 	    {
22741 	      reg = 3;
22742 	      if (!TARGET_THUMB2)
22743 		prefer_callee_reg_p = true;
22744 	    }
22745 	  if (reg == -1
22746 	      || prefer_callee_reg_p)
22747 	    {
22748 	      for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22749 		{
22750 		  /* Avoid fixed registers; they may be changed at
22751 		     arbitrary times so it's unsafe to restore them
22752 		     during the epilogue.  */
22753 		  if (!fixed_regs[i]
22754 		      && (offsets->saved_regs_mask & (1 << i)) == 0)
22755 		    {
22756 		      reg = i;
22757 		      break;
22758 		    }
22759 		}
22760 	    }
22761 
22762 	  if (reg != -1)
22763 	    {
22764 	      offsets->saved_regs += 4;
22765 	      offsets->saved_regs_mask |= (1 << reg);
22766 	    }
22767 	}
22768     }
22769 
22770   offsets->locals_base = offsets->soft_frame + frame_size;
22771   offsets->outgoing_args = (offsets->locals_base
22772 			    + crtl->outgoing_args_size);
22773 
22774   if (ARM_DOUBLEWORD_ALIGN)
22775     {
22776       /* Ensure SP remains doubleword aligned.  */
22777       if (offsets->outgoing_args & 7)
22778 	offsets->outgoing_args += 4;
22779       gcc_assert (!(offsets->outgoing_args & 7));
22780     }
22781 }
22782 
22783 
22784 /* Calculate the relative offsets for the different stack pointers.  Positive
22785    offsets are in the direction of stack growth.  */
22786 
22787 HOST_WIDE_INT
arm_compute_initial_elimination_offset(unsigned int from,unsigned int to)22788 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22789 {
22790   arm_stack_offsets *offsets;
22791 
22792   offsets = arm_get_frame_offsets ();
22793 
22794   /* OK, now we have enough information to compute the distances.
22795      There must be an entry in these switch tables for each pair
22796      of registers in ELIMINABLE_REGS, even if some of the entries
22797      seem to be redundant or useless.  */
22798   switch (from)
22799     {
22800     case ARG_POINTER_REGNUM:
22801       switch (to)
22802 	{
22803 	case THUMB_HARD_FRAME_POINTER_REGNUM:
22804 	  return 0;
22805 
22806 	case FRAME_POINTER_REGNUM:
22807 	  /* This is the reverse of the soft frame pointer
22808 	     to hard frame pointer elimination below.  */
22809 	  return offsets->soft_frame - offsets->saved_args;
22810 
22811 	case ARM_HARD_FRAME_POINTER_REGNUM:
22812 	  /* This is only non-zero in the case where the static chain register
22813 	     is stored above the frame.  */
22814 	  return offsets->frame - offsets->saved_args - 4;
22815 
22816 	case STACK_POINTER_REGNUM:
22817 	  /* If nothing has been pushed on the stack at all
22818 	     then this will return -4.  This *is* correct!  */
22819 	  return offsets->outgoing_args - (offsets->saved_args + 4);
22820 
22821 	default:
22822 	  gcc_unreachable ();
22823 	}
22824       gcc_unreachable ();
22825 
22826     case FRAME_POINTER_REGNUM:
22827       switch (to)
22828 	{
22829 	case THUMB_HARD_FRAME_POINTER_REGNUM:
22830 	  return 0;
22831 
22832 	case ARM_HARD_FRAME_POINTER_REGNUM:
22833 	  /* The hard frame pointer points to the top entry in the
22834 	     stack frame.  The soft frame pointer to the bottom entry
22835 	     in the stack frame.  If there is no stack frame at all,
22836 	     then they are identical.  */
22837 
22838 	  return offsets->frame - offsets->soft_frame;
22839 
22840 	case STACK_POINTER_REGNUM:
22841 	  return offsets->outgoing_args - offsets->soft_frame;
22842 
22843 	default:
22844 	  gcc_unreachable ();
22845 	}
22846       gcc_unreachable ();
22847 
22848     default:
22849       /* You cannot eliminate from the stack pointer.
22850 	 In theory you could eliminate from the hard frame
22851 	 pointer to the stack pointer, but this will never
22852 	 happen, since if a stack frame is not needed the
22853 	 hard frame pointer will never be used.  */
22854       gcc_unreachable ();
22855     }
22856 }
22857 
22858 /* Given FROM and TO register numbers, say whether this elimination is
22859    allowed.  Frame pointer elimination is automatically handled.
22860 
22861    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
22862    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
22863    pointer, we must eliminate FRAME_POINTER_REGNUM into
22864    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
22865    ARG_POINTER_REGNUM.  */
22866 
22867 bool
arm_can_eliminate(const int from,const int to)22868 arm_can_eliminate (const int from, const int to)
22869 {
22870   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
22871           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
22872           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
22873           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
22874            true);
22875 }
22876 
22877 /* Emit RTL to save coprocessor registers on function entry.  Returns the
22878    number of bytes pushed.  */
22879 
22880 static int
arm_save_coproc_regs(void)22881 arm_save_coproc_regs(void)
22882 {
22883   int saved_size = 0;
22884   unsigned reg;
22885   unsigned start_reg;
22886   rtx insn;
22887 
22888   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
22889     if (df_regs_ever_live_p (reg) && !call_used_or_fixed_reg_p (reg))
22890       {
22891 	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22892 	insn = gen_rtx_MEM (V2SImode, insn);
22893 	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
22894 	RTX_FRAME_RELATED_P (insn) = 1;
22895 	saved_size += 8;
22896       }
22897 
22898   if (TARGET_VFP_BASE)
22899     {
22900       start_reg = FIRST_VFP_REGNUM;
22901 
22902       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
22903 	{
22904 	  if ((!df_regs_ever_live_p (reg) || call_used_or_fixed_reg_p (reg))
22905 	      && (!df_regs_ever_live_p (reg + 1)
22906 		  || call_used_or_fixed_reg_p (reg + 1)))
22907 	    {
22908 	      if (start_reg != reg)
22909 		saved_size += vfp_emit_fstmd (start_reg,
22910 					      (reg - start_reg) / 2);
22911 	      start_reg = reg + 2;
22912 	    }
22913 	}
22914       if (start_reg != reg)
22915 	saved_size += vfp_emit_fstmd (start_reg,
22916 				      (reg - start_reg) / 2);
22917     }
22918   return saved_size;
22919 }
22920 
22921 
22922 /* Set the Thumb frame pointer from the stack pointer.  */
22923 
22924 static void
thumb_set_frame_pointer(arm_stack_offsets * offsets)22925 thumb_set_frame_pointer (arm_stack_offsets *offsets)
22926 {
22927   HOST_WIDE_INT amount;
22928   rtx insn, dwarf;
22929 
22930   amount = offsets->outgoing_args - offsets->locals_base;
22931   if (amount < 1024)
22932     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22933 				  stack_pointer_rtx, GEN_INT (amount)));
22934   else
22935     {
22936       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
22937       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
22938          expects the first two operands to be the same.  */
22939       if (TARGET_THUMB2)
22940 	{
22941 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22942 					stack_pointer_rtx,
22943 					hard_frame_pointer_rtx));
22944 	}
22945       else
22946 	{
22947 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22948 					hard_frame_pointer_rtx,
22949 					stack_pointer_rtx));
22950 	}
22951       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
22952 			   plus_constant (Pmode, stack_pointer_rtx, amount));
22953       RTX_FRAME_RELATED_P (dwarf) = 1;
22954       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22955     }
22956 
22957   RTX_FRAME_RELATED_P (insn) = 1;
22958 }
22959 
22960 struct scratch_reg {
22961   rtx reg;
22962   bool saved;
22963 };
22964 
22965 /* Return a short-lived scratch register for use as a 2nd scratch register on
22966    function entry after the registers are saved in the prologue.  This register
22967    must be released by means of release_scratch_register_on_entry.  IP is not
22968    considered since it is always used as the 1st scratch register if available.
22969 
22970    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
22971    mask of live registers.  */
22972 
22973 static void
get_scratch_register_on_entry(struct scratch_reg * sr,unsigned int regno1,unsigned long live_regs)22974 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
22975 			       unsigned long live_regs)
22976 {
22977   int regno = -1;
22978 
22979   sr->saved = false;
22980 
22981   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
22982     regno = LR_REGNUM;
22983   else
22984     {
22985       unsigned int i;
22986 
22987       for (i = 4; i < 11; i++)
22988 	if (regno1 != i && (live_regs & (1 << i)) != 0)
22989 	  {
22990 	    regno = i;
22991 	    break;
22992 	  }
22993 
22994       if (regno < 0)
22995 	{
22996 	  /* If IP is used as the 1st scratch register for a nested function,
22997 	     then either r3 wasn't available or is used to preserve IP.  */
22998 	  if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
22999 	    regno1 = 3;
23000 	  regno = (regno1 == 3 ? 2 : 3);
23001 	  sr->saved
23002 	    = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23003 			       regno);
23004 	}
23005     }
23006 
23007   sr->reg = gen_rtx_REG (SImode, regno);
23008   if (sr->saved)
23009     {
23010       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23011       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23012       rtx x = gen_rtx_SET (stack_pointer_rtx,
23013 		           plus_constant (Pmode, stack_pointer_rtx, -4));
23014       RTX_FRAME_RELATED_P (insn) = 1;
23015       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23016     }
23017 }
23018 
23019 /* Release a scratch register obtained from the preceding function.  */
23020 
23021 static void
release_scratch_register_on_entry(struct scratch_reg * sr)23022 release_scratch_register_on_entry (struct scratch_reg *sr)
23023 {
23024   if (sr->saved)
23025     {
23026       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23027       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23028       rtx x = gen_rtx_SET (stack_pointer_rtx,
23029 			   plus_constant (Pmode, stack_pointer_rtx, 4));
23030       RTX_FRAME_RELATED_P (insn) = 1;
23031       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23032     }
23033 }
23034 
23035 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23036 
23037 #if PROBE_INTERVAL > 4096
23038 #error Cannot use indexed addressing mode for stack probing
23039 #endif
23040 
23041 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23042    inclusive.  These are offsets from the current stack pointer.  REGNO1
23043    is the index number of the 1st scratch register and LIVE_REGS is the
23044    mask of live registers.  */
23045 
23046 static void
arm_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size,unsigned int regno1,unsigned long live_regs)23047 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23048 			    unsigned int regno1, unsigned long live_regs)
23049 {
23050   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23051 
23052   /* See if we have a constant small number of probes to generate.  If so,
23053      that's the easy case.  */
23054   if (size <= PROBE_INTERVAL)
23055     {
23056       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23057       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23058       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23059     }
23060 
23061   /* The run-time loop is made up of 10 insns in the generic case while the
23062      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23063   else if (size <= 5 * PROBE_INTERVAL)
23064     {
23065       HOST_WIDE_INT i, rem;
23066 
23067       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23068       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23069       emit_stack_probe (reg1);
23070 
23071       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23072 	 it exceeds SIZE.  If only two probes are needed, this will not
23073 	 generate any code.  Then probe at FIRST + SIZE.  */
23074       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23075 	{
23076 	  emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23077 	  emit_stack_probe (reg1);
23078 	}
23079 
23080       rem = size - (i - PROBE_INTERVAL);
23081       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23082 	{
23083 	  emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23084 	  emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23085 	}
23086       else
23087 	emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23088     }
23089 
23090   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23091      extra careful with variables wrapping around because we might be at
23092      the very top (or the very bottom) of the address space and we have
23093      to be able to handle this case properly; in particular, we use an
23094      equality test for the loop condition.  */
23095   else
23096     {
23097       HOST_WIDE_INT rounded_size;
23098       struct scratch_reg sr;
23099 
23100       get_scratch_register_on_entry (&sr, regno1, live_regs);
23101 
23102       emit_move_insn (reg1, GEN_INT (first));
23103 
23104 
23105       /* Step 1: round SIZE to the previous multiple of the interval.  */
23106 
23107       rounded_size = size & -PROBE_INTERVAL;
23108       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23109 
23110 
23111       /* Step 2: compute initial and final value of the loop counter.  */
23112 
23113       /* TEST_ADDR = SP + FIRST.  */
23114       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23115 
23116       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23117       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23118 
23119 
23120       /* Step 3: the loop
23121 
23122 	 do
23123 	   {
23124 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23125 	     probe at TEST_ADDR
23126 	   }
23127 	 while (TEST_ADDR != LAST_ADDR)
23128 
23129 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23130 	 until it is equal to ROUNDED_SIZE.  */
23131 
23132       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23133 
23134 
23135       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23136 	 that SIZE is equal to ROUNDED_SIZE.  */
23137 
23138       if (size != rounded_size)
23139 	{
23140 	  HOST_WIDE_INT rem = size - rounded_size;
23141 
23142 	  if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23143 	    {
23144 	      emit_set_insn (sr.reg,
23145 			     plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23146 	      emit_stack_probe (plus_constant (Pmode, sr.reg,
23147 					       PROBE_INTERVAL - rem));
23148 	    }
23149 	  else
23150 	    emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23151 	}
23152 
23153       release_scratch_register_on_entry (&sr);
23154     }
23155 
23156   /* Make sure nothing is scheduled before we are done.  */
23157   emit_insn (gen_blockage ());
23158 }
23159 
23160 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23161    absolute addresses.  */
23162 
23163 const char *
output_probe_stack_range(rtx reg1,rtx reg2)23164 output_probe_stack_range (rtx reg1, rtx reg2)
23165 {
23166   static int labelno = 0;
23167   char loop_lab[32];
23168   rtx xops[2];
23169 
23170   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23171 
23172   /* Loop.  */
23173   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23174 
23175   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23176   xops[0] = reg1;
23177   xops[1] = GEN_INT (PROBE_INTERVAL);
23178   output_asm_insn ("sub\t%0, %0, %1", xops);
23179 
23180   /* Probe at TEST_ADDR.  */
23181   output_asm_insn ("str\tr0, [%0, #0]", xops);
23182 
23183   /* Test if TEST_ADDR == LAST_ADDR.  */
23184   xops[1] = reg2;
23185   output_asm_insn ("cmp\t%0, %1", xops);
23186 
23187   /* Branch.  */
23188   fputs ("\tbne\t", asm_out_file);
23189   assemble_name_raw (asm_out_file, loop_lab);
23190   fputc ('\n', asm_out_file);
23191 
23192   return "";
23193 }
23194 
23195 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23196    function.  */
23197 void
arm_expand_prologue(void)23198 arm_expand_prologue (void)
23199 {
23200   rtx amount;
23201   rtx insn;
23202   rtx ip_rtx;
23203   unsigned long live_regs_mask;
23204   unsigned long func_type;
23205   int fp_offset = 0;
23206   int saved_pretend_args = 0;
23207   int saved_regs = 0;
23208   unsigned HOST_WIDE_INT args_to_push;
23209   HOST_WIDE_INT size;
23210   arm_stack_offsets *offsets;
23211   bool clobber_ip;
23212 
23213   func_type = arm_current_func_type ();
23214 
23215   /* Naked functions don't have prologues.  */
23216   if (IS_NAKED (func_type))
23217     {
23218       if (flag_stack_usage_info)
23219 	current_function_static_stack_size = 0;
23220       return;
23221     }
23222 
23223   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23224   args_to_push = crtl->args.pretend_args_size;
23225 
23226   /* Compute which register we will have to save onto the stack.  */
23227   offsets = arm_get_frame_offsets ();
23228   live_regs_mask = offsets->saved_regs_mask;
23229 
23230   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23231 
23232   if (IS_STACKALIGN (func_type))
23233     {
23234       rtx r0, r1;
23235 
23236       /* Handle a word-aligned stack pointer.  We generate the following:
23237 
23238 	  mov r0, sp
23239 	  bic r1, r0, #7
23240 	  mov sp, r1
23241 	  <save and restore r0 in normal prologue/epilogue>
23242 	  mov sp, r0
23243 	  bx lr
23244 
23245 	 The unwinder doesn't need to know about the stack realignment.
23246 	 Just tell it we saved SP in r0.  */
23247       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23248 
23249       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23250       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23251 
23252       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23253       RTX_FRAME_RELATED_P (insn) = 1;
23254       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23255 
23256       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23257 
23258       /* ??? The CFA changes here, which may cause GDB to conclude that it
23259 	 has entered a different function.  That said, the unwind info is
23260 	 correct, individually, before and after this instruction because
23261 	 we've described the save of SP, which will override the default
23262 	 handling of SP as restoring from the CFA.  */
23263       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23264     }
23265 
23266   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23267      now the value must be -1 as stored by arm_init_machine_status ().  */
23268   cfun->machine->static_chain_stack_bytes
23269     = arm_compute_static_chain_stack_bytes ();
23270 
23271   /* The static chain register is the same as the IP register.  If it is
23272      clobbered when creating the frame, we need to save and restore it.  */
23273   clobber_ip = IS_NESTED (func_type)
23274 	       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23275 		   || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23276 			|| flag_stack_clash_protection)
23277 		       && !df_regs_ever_live_p (LR_REGNUM)
23278 		       && arm_r3_live_at_start_p ()));
23279 
23280   /* Find somewhere to store IP whilst the frame is being created.
23281      We try the following places in order:
23282 
23283        1. The last argument register r3 if it is available.
23284        2. A slot on the stack above the frame if there are no
23285 	  arguments to push onto the stack.
23286        3. Register r3 again, after pushing the argument registers
23287 	  onto the stack, if this is a varargs function.
23288        4. The last slot on the stack created for the arguments to
23289 	  push, if this isn't a varargs function.
23290 
23291      Note - we only need to tell the dwarf2 backend about the SP
23292      adjustment in the second variant; the static chain register
23293      doesn't need to be unwound, as it doesn't contain a value
23294      inherited from the caller.  */
23295   if (clobber_ip)
23296     {
23297       if (!arm_r3_live_at_start_p ())
23298 	insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23299       else if (args_to_push == 0)
23300 	{
23301 	  rtx addr, dwarf;
23302 
23303 	  gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23304 	  saved_regs += 4;
23305 
23306 	  addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23307 	  insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23308 	  fp_offset = 4;
23309 
23310 	  /* Just tell the dwarf backend that we adjusted SP.  */
23311 	  dwarf = gen_rtx_SET (stack_pointer_rtx,
23312 			       plus_constant (Pmode, stack_pointer_rtx,
23313 					      -fp_offset));
23314 	  RTX_FRAME_RELATED_P (insn) = 1;
23315 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23316 	}
23317       else
23318 	{
23319 	  /* Store the args on the stack.  */
23320 	  if (cfun->machine->uses_anonymous_args)
23321 	    {
23322 	      insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23323 					  (0xf0 >> (args_to_push / 4)) & 0xf);
23324 	      emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23325 	      saved_pretend_args = 1;
23326 	    }
23327 	  else
23328 	    {
23329 	      rtx addr, dwarf;
23330 
23331 	      if (args_to_push == 4)
23332 		addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23333 	      else
23334 		addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23335 					   plus_constant (Pmode,
23336 							  stack_pointer_rtx,
23337 							  -args_to_push));
23338 
23339 	      insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23340 
23341 	      /* Just tell the dwarf backend that we adjusted SP.  */
23342 	      dwarf = gen_rtx_SET (stack_pointer_rtx,
23343 				   plus_constant (Pmode, stack_pointer_rtx,
23344 						  -args_to_push));
23345 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23346 	    }
23347 
23348 	  RTX_FRAME_RELATED_P (insn) = 1;
23349 	  fp_offset = args_to_push;
23350 	  args_to_push = 0;
23351 	}
23352     }
23353 
23354   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23355     {
23356       if (IS_INTERRUPT (func_type))
23357 	{
23358 	  /* Interrupt functions must not corrupt any registers.
23359 	     Creating a frame pointer however, corrupts the IP
23360 	     register, so we must push it first.  */
23361 	  emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23362 
23363 	  /* Do not set RTX_FRAME_RELATED_P on this insn.
23364 	     The dwarf stack unwinding code only wants to see one
23365 	     stack decrement per function, and this is not it.  If
23366 	     this instruction is labeled as being part of the frame
23367 	     creation sequence then dwarf2out_frame_debug_expr will
23368 	     die when it encounters the assignment of IP to FP
23369 	     later on, since the use of SP here establishes SP as
23370 	     the CFA register and not IP.
23371 
23372 	     Anyway this instruction is not really part of the stack
23373 	     frame creation although it is part of the prologue.  */
23374 	}
23375 
23376       insn = emit_set_insn (ip_rtx,
23377 			    plus_constant (Pmode, stack_pointer_rtx,
23378 					   fp_offset));
23379       RTX_FRAME_RELATED_P (insn) = 1;
23380     }
23381 
23382   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23383   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23384     {
23385       saved_regs += 4;
23386       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23387 						GEN_INT (FPCXTNS_ENUM)));
23388       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23389 			  plus_constant (Pmode, stack_pointer_rtx, -4));
23390       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23391       RTX_FRAME_RELATED_P (insn) = 1;
23392     }
23393 
23394   if (args_to_push)
23395     {
23396       /* Push the argument registers, or reserve space for them.  */
23397       if (cfun->machine->uses_anonymous_args)
23398 	insn = emit_multi_reg_push
23399 	  ((0xf0 >> (args_to_push / 4)) & 0xf,
23400 	   (0xf0 >> (args_to_push / 4)) & 0xf);
23401       else
23402 	insn = emit_insn
23403 	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23404 		       GEN_INT (- args_to_push)));
23405       RTX_FRAME_RELATED_P (insn) = 1;
23406     }
23407 
23408   /* If this is an interrupt service routine, and the link register
23409      is going to be pushed, and we're not generating extra
23410      push of IP (needed when frame is needed and frame layout if apcs),
23411      subtracting four from LR now will mean that the function return
23412      can be done with a single instruction.  */
23413   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23414       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23415       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23416       && TARGET_ARM)
23417     {
23418       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23419 
23420       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23421     }
23422 
23423   if (live_regs_mask)
23424     {
23425       unsigned long dwarf_regs_mask = live_regs_mask;
23426 
23427       saved_regs += bit_count (live_regs_mask) * 4;
23428       if (optimize_size && !frame_pointer_needed
23429 	  && saved_regs == offsets->saved_regs - offsets->saved_args)
23430 	{
23431 	  /* If no coprocessor registers are being pushed and we don't have
23432 	     to worry about a frame pointer then push extra registers to
23433 	     create the stack frame.  This is done in a way that does not
23434 	     alter the frame layout, so is independent of the epilogue.  */
23435 	  int n;
23436 	  int frame;
23437 	  n = 0;
23438 	  while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23439 	    n++;
23440 	  frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23441 	  if (frame && n * 4 >= frame)
23442 	    {
23443 	      n = frame / 4;
23444 	      live_regs_mask |= (1 << n) - 1;
23445 	      saved_regs += frame;
23446 	    }
23447 	}
23448 
23449       if (TARGET_LDRD
23450 	  && current_tune->prefer_ldrd_strd
23451           && !optimize_function_for_size_p (cfun))
23452         {
23453 	  gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23454           if (TARGET_THUMB2)
23455 	    thumb2_emit_strd_push (live_regs_mask);
23456           else if (TARGET_ARM
23457                    && !TARGET_APCS_FRAME
23458                    && !IS_INTERRUPT (func_type))
23459 	    arm_emit_strd_push (live_regs_mask);
23460           else
23461             {
23462 	      insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23463               RTX_FRAME_RELATED_P (insn) = 1;
23464             }
23465         }
23466       else
23467         {
23468 	  insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23469           RTX_FRAME_RELATED_P (insn) = 1;
23470         }
23471     }
23472 
23473   if (! IS_VOLATILE (func_type))
23474     saved_regs += arm_save_coproc_regs ();
23475 
23476   if (frame_pointer_needed && TARGET_ARM)
23477     {
23478       /* Create the new frame pointer.  */
23479       if (TARGET_APCS_FRAME)
23480 	{
23481 	  insn = GEN_INT (-(4 + args_to_push + fp_offset));
23482 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23483 	  RTX_FRAME_RELATED_P (insn) = 1;
23484 	}
23485       else
23486 	{
23487 	  insn = GEN_INT (saved_regs - (4 + fp_offset));
23488 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23489 					stack_pointer_rtx, insn));
23490 	  RTX_FRAME_RELATED_P (insn) = 1;
23491 	}
23492     }
23493 
23494   size = offsets->outgoing_args - offsets->saved_args;
23495   if (flag_stack_usage_info)
23496     current_function_static_stack_size = size;
23497 
23498   /* If this isn't an interrupt service routine and we have a frame, then do
23499      stack checking.  We use IP as the first scratch register, except for the
23500      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23501   if (!IS_INTERRUPT (func_type)
23502       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23503 	  || flag_stack_clash_protection))
23504     {
23505       unsigned int regno;
23506 
23507       if (!IS_NESTED (func_type) || clobber_ip)
23508 	regno = IP_REGNUM;
23509       else if (df_regs_ever_live_p (LR_REGNUM))
23510 	regno = LR_REGNUM;
23511       else
23512 	regno = 3;
23513 
23514       if (crtl->is_leaf && !cfun->calls_alloca)
23515 	{
23516 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23517 	    arm_emit_probe_stack_range (get_stack_check_protect (),
23518 					size - get_stack_check_protect (),
23519 					regno, live_regs_mask);
23520 	}
23521       else if (size > 0)
23522 	arm_emit_probe_stack_range (get_stack_check_protect (), size,
23523 				    regno, live_regs_mask);
23524     }
23525 
23526   /* Recover the static chain register.  */
23527   if (clobber_ip)
23528     {
23529       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23530 	insn = gen_rtx_REG (SImode, 3);
23531       else
23532 	{
23533 	  insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23534 	  insn = gen_frame_mem (SImode, insn);
23535 	}
23536       emit_set_insn (ip_rtx, insn);
23537       emit_insn (gen_force_register_use (ip_rtx));
23538     }
23539 
23540   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23541     {
23542       /* This add can produce multiple insns for a large constant, so we
23543 	 need to get tricky.  */
23544       rtx_insn *last = get_last_insn ();
23545 
23546       amount = GEN_INT (offsets->saved_args + saved_regs
23547 			- offsets->outgoing_args);
23548 
23549       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23550 				    amount));
23551       do
23552 	{
23553 	  last = last ? NEXT_INSN (last) : get_insns ();
23554 	  RTX_FRAME_RELATED_P (last) = 1;
23555 	}
23556       while (last != insn);
23557 
23558       /* If the frame pointer is needed, emit a special barrier that
23559 	 will prevent the scheduler from moving stores to the frame
23560 	 before the stack adjustment.  */
23561       if (frame_pointer_needed)
23562 	emit_insn (gen_stack_tie (stack_pointer_rtx,
23563 				  hard_frame_pointer_rtx));
23564     }
23565 
23566 
23567   if (frame_pointer_needed && TARGET_THUMB2)
23568     thumb_set_frame_pointer (offsets);
23569 
23570   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23571     {
23572       unsigned long mask;
23573 
23574       mask = live_regs_mask;
23575       mask &= THUMB2_WORK_REGS;
23576       if (!IS_NESTED (func_type))
23577 	mask |= (1 << IP_REGNUM);
23578       arm_load_pic_register (mask, NULL_RTX);
23579     }
23580 
23581   /* If we are profiling, make sure no instructions are scheduled before
23582      the call to mcount.  Similarly if the user has requested no
23583      scheduling in the prolog.  Similarly if we want non-call exceptions
23584      using the EABI unwinder, to prevent faulting instructions from being
23585      swapped with a stack adjustment.  */
23586   if (crtl->profile || !TARGET_SCHED_PROLOG
23587       || (arm_except_unwind_info (&global_options) == UI_TARGET
23588 	  && cfun->can_throw_non_call_exceptions))
23589     emit_insn (gen_blockage ());
23590 
23591   /* If the link register is being kept alive, with the return address in it,
23592      then make sure that it does not get reused by the ce2 pass.  */
23593   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23594     cfun->machine->lr_save_eliminated = 1;
23595 }
23596 
23597 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23598 static void
arm_print_condition(FILE * stream)23599 arm_print_condition (FILE *stream)
23600 {
23601   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23602     {
23603       /* Branch conversion is not implemented for Thumb-2.  */
23604       if (TARGET_THUMB)
23605 	{
23606 	  output_operand_lossage ("predicated Thumb instruction");
23607 	  return;
23608 	}
23609       if (current_insn_predicate != NULL)
23610 	{
23611 	  output_operand_lossage
23612 	    ("predicated instruction in conditional sequence");
23613 	  return;
23614 	}
23615 
23616       fputs (arm_condition_codes[arm_current_cc], stream);
23617     }
23618   else if (current_insn_predicate)
23619     {
23620       enum arm_cond_code code;
23621 
23622       if (TARGET_THUMB1)
23623 	{
23624 	  output_operand_lossage ("predicated Thumb instruction");
23625 	  return;
23626 	}
23627 
23628       code = get_arm_condition_code (current_insn_predicate);
23629       fputs (arm_condition_codes[code], stream);
23630     }
23631 }
23632 
23633 
23634 /* Globally reserved letters: acln
23635    Puncutation letters currently used: @_|?().!#
23636    Lower case letters currently used: bcdefhimpqtvwxyz
23637    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU
23638    Letters previously used, but now deprecated/obsolete: sVWXYZ.
23639 
23640    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23641 
23642    If CODE is 'd', then the X is a condition operand and the instruction
23643    should only be executed if the condition is true.
23644    if CODE is 'D', then the X is a condition operand and the instruction
23645    should only be executed if the condition is false: however, if the mode
23646    of the comparison is CCFPEmode, then always execute the instruction -- we
23647    do this because in these circumstances !GE does not necessarily imply LT;
23648    in these cases the instruction pattern will take care to make sure that
23649    an instruction containing %d will follow, thereby undoing the effects of
23650    doing this instruction unconditionally.
23651    If CODE is 'N' then X is a floating point operand that must be negated
23652    before output.
23653    If CODE is 'B' then output a bitwise inverted value of X (a const int).
23654    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
23655 static void
arm_print_operand(FILE * stream,rtx x,int code)23656 arm_print_operand (FILE *stream, rtx x, int code)
23657 {
23658   switch (code)
23659     {
23660     case '@':
23661       fputs (ASM_COMMENT_START, stream);
23662       return;
23663 
23664     case '_':
23665       fputs (user_label_prefix, stream);
23666       return;
23667 
23668     case '|':
23669       fputs (REGISTER_PREFIX, stream);
23670       return;
23671 
23672     case '?':
23673       arm_print_condition (stream);
23674       return;
23675 
23676     case '.':
23677       /* The current condition code for a condition code setting instruction.
23678 	 Preceded by 's' in unified syntax, otherwise followed by 's'.  */
23679       fputc('s', stream);
23680       arm_print_condition (stream);
23681       return;
23682 
23683     case '!':
23684       /* If the instruction is conditionally executed then print
23685 	 the current condition code, otherwise print 's'.  */
23686       gcc_assert (TARGET_THUMB2);
23687       if (current_insn_predicate)
23688 	arm_print_condition (stream);
23689       else
23690 	fputc('s', stream);
23691       break;
23692 
23693     /* %# is a "break" sequence. It doesn't output anything, but is used to
23694        separate e.g. operand numbers from following text, if that text consists
23695        of further digits which we don't want to be part of the operand
23696        number.  */
23697     case '#':
23698       return;
23699 
23700     case 'N':
23701       {
23702 	REAL_VALUE_TYPE r;
23703 	r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23704 	fprintf (stream, "%s", fp_const_from_val (&r));
23705       }
23706       return;
23707 
23708     /* An integer or symbol address without a preceding # sign.  */
23709     case 'c':
23710       switch (GET_CODE (x))
23711 	{
23712 	case CONST_INT:
23713 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23714 	  break;
23715 
23716 	case SYMBOL_REF:
23717 	  output_addr_const (stream, x);
23718 	  break;
23719 
23720 	case CONST:
23721 	  if (GET_CODE (XEXP (x, 0)) == PLUS
23722 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23723 	    {
23724 	      output_addr_const (stream, x);
23725 	      break;
23726 	    }
23727 	  /* Fall through.  */
23728 
23729 	default:
23730 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
23731 	}
23732       return;
23733 
23734     /* An integer that we want to print in HEX.  */
23735     case 'x':
23736       switch (GET_CODE (x))
23737 	{
23738 	case CONST_INT:
23739 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23740 	  break;
23741 
23742 	default:
23743 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
23744 	}
23745       return;
23746 
23747     case 'B':
23748       if (CONST_INT_P (x))
23749 	{
23750 	  HOST_WIDE_INT val;
23751 	  val = ARM_SIGN_EXTEND (~INTVAL (x));
23752 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23753 	}
23754       else
23755 	{
23756 	  putc ('~', stream);
23757 	  output_addr_const (stream, x);
23758 	}
23759       return;
23760 
23761     case 'b':
23762       /* Print the log2 of a CONST_INT.  */
23763       {
23764 	HOST_WIDE_INT val;
23765 
23766 	if (!CONST_INT_P (x)
23767 	    || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23768 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
23769 	else
23770 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23771       }
23772       return;
23773 
23774     case 'L':
23775       /* The low 16 bits of an immediate constant.  */
23776       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
23777       return;
23778 
23779     case 'i':
23780       fprintf (stream, "%s", arithmetic_instr (x, 1));
23781       return;
23782 
23783     case 'I':
23784       fprintf (stream, "%s", arithmetic_instr (x, 0));
23785       return;
23786 
23787     case 'S':
23788       {
23789 	HOST_WIDE_INT val;
23790 	const char *shift;
23791 
23792 	shift = shift_op (x, &val);
23793 
23794 	if (shift)
23795 	  {
23796 	    fprintf (stream, ", %s ", shift);
23797 	    if (val == -1)
23798 	      arm_print_operand (stream, XEXP (x, 1), 0);
23799 	    else
23800 	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23801 	  }
23802       }
23803       return;
23804 
23805       /* An explanation of the 'Q', 'R' and 'H' register operands:
23806 
23807 	 In a pair of registers containing a DI or DF value the 'Q'
23808 	 operand returns the register number of the register containing
23809 	 the least significant part of the value.  The 'R' operand returns
23810 	 the register number of the register containing the most
23811 	 significant part of the value.
23812 
23813 	 The 'H' operand returns the higher of the two register numbers.
23814 	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
23815 	 same as the 'Q' operand, since the most significant part of the
23816 	 value is held in the lower number register.  The reverse is true
23817 	 on systems where WORDS_BIG_ENDIAN is false.
23818 
23819 	 The purpose of these operands is to distinguish between cases
23820 	 where the endian-ness of the values is important (for example
23821 	 when they are added together), and cases where the endian-ness
23822 	 is irrelevant, but the order of register operations is important.
23823 	 For example when loading a value from memory into a register
23824 	 pair, the endian-ness does not matter.  Provided that the value
23825 	 from the lower memory address is put into the lower numbered
23826 	 register, and the value from the higher address is put into the
23827 	 higher numbered register, the load will work regardless of whether
23828 	 the value being loaded is big-wordian or little-wordian.  The
23829 	 order of the two register loads can matter however, if the address
23830 	 of the memory location is actually held in one of the registers
23831 	 being overwritten by the load.
23832 
23833 	 The 'Q' and 'R' constraints are also available for 64-bit
23834 	 constants.  */
23835     case 'Q':
23836       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23837 	{
23838 	  rtx part = gen_lowpart (SImode, x);
23839 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23840 	  return;
23841 	}
23842 
23843       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23844 	{
23845 	  output_operand_lossage ("invalid operand for code '%c'", code);
23846 	  return;
23847 	}
23848 
23849       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
23850       return;
23851 
23852     case 'R':
23853       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23854 	{
23855 	  machine_mode mode = GET_MODE (x);
23856 	  rtx part;
23857 
23858 	  if (mode == VOIDmode)
23859 	    mode = DImode;
23860 	  part = gen_highpart_mode (SImode, mode, x);
23861 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23862 	  return;
23863 	}
23864 
23865       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23866 	{
23867 	  output_operand_lossage ("invalid operand for code '%c'", code);
23868 	  return;
23869 	}
23870 
23871       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
23872       return;
23873 
23874     case 'H':
23875       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23876 	{
23877 	  output_operand_lossage ("invalid operand for code '%c'", code);
23878 	  return;
23879 	}
23880 
23881       asm_fprintf (stream, "%r", REGNO (x) + 1);
23882       return;
23883 
23884     case 'J':
23885       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23886 	{
23887 	  output_operand_lossage ("invalid operand for code '%c'", code);
23888 	  return;
23889 	}
23890 
23891       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
23892       return;
23893 
23894     case 'K':
23895       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23896 	{
23897 	  output_operand_lossage ("invalid operand for code '%c'", code);
23898 	  return;
23899 	}
23900 
23901       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
23902       return;
23903 
23904     case 'm':
23905       asm_fprintf (stream, "%r",
23906 		   REG_P (XEXP (x, 0))
23907 		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
23908       return;
23909 
23910     case 'M':
23911       asm_fprintf (stream, "{%r-%r}",
23912 		   REGNO (x),
23913 		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
23914       return;
23915 
23916     /* Like 'M', but writing doubleword vector registers, for use by Neon
23917        insns.  */
23918     case 'h':
23919       {
23920         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
23921         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
23922         if (numregs == 1)
23923           asm_fprintf (stream, "{d%d}", regno);
23924         else
23925           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
23926       }
23927       return;
23928 
23929     case 'd':
23930       /* CONST_TRUE_RTX means always -- that's the default.  */
23931       if (x == const_true_rtx)
23932 	return;
23933 
23934       if (!COMPARISON_P (x))
23935 	{
23936 	  output_operand_lossage ("invalid operand for code '%c'", code);
23937 	  return;
23938 	}
23939 
23940       fputs (arm_condition_codes[get_arm_condition_code (x)],
23941 	     stream);
23942       return;
23943 
23944     case 'D':
23945       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
23946 	 want to do that.  */
23947       if (x == const_true_rtx)
23948 	{
23949 	  output_operand_lossage ("instruction never executed");
23950 	  return;
23951 	}
23952       if (!COMPARISON_P (x))
23953 	{
23954 	  output_operand_lossage ("invalid operand for code '%c'", code);
23955 	  return;
23956 	}
23957 
23958       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
23959 				 (get_arm_condition_code (x))],
23960 	     stream);
23961       return;
23962 
23963     case 's':
23964     case 'V':
23965     case 'W':
23966     case 'X':
23967     case 'Y':
23968     case 'Z':
23969       /* Former Maverick support, removed after GCC-4.7.  */
23970       output_operand_lossage ("obsolete Maverick format code '%c'", code);
23971       return;
23972 
23973     case 'U':
23974       if (!REG_P (x)
23975 	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
23976 	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
23977 	/* Bad value for wCG register number.  */
23978 	{
23979 	  output_operand_lossage ("invalid operand for code '%c'", code);
23980 	  return;
23981 	}
23982 
23983       else
23984 	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
23985       return;
23986 
23987       /* Print an iWMMXt control register name.  */
23988     case 'w':
23989       if (!CONST_INT_P (x)
23990 	  || INTVAL (x) < 0
23991 	  || INTVAL (x) >= 16)
23992 	/* Bad value for wC register number.  */
23993 	{
23994 	  output_operand_lossage ("invalid operand for code '%c'", code);
23995 	  return;
23996 	}
23997 
23998       else
23999 	{
24000 	  static const char * wc_reg_names [16] =
24001 	    {
24002 	      "wCID",  "wCon",  "wCSSF", "wCASF",
24003 	      "wC4",   "wC5",   "wC6",   "wC7",
24004 	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24005 	      "wC12",  "wC13",  "wC14",  "wC15"
24006 	    };
24007 
24008 	  fputs (wc_reg_names [INTVAL (x)], stream);
24009 	}
24010       return;
24011 
24012     /* Print the high single-precision register of a VFP double-precision
24013        register.  */
24014     case 'p':
24015       {
24016         machine_mode mode = GET_MODE (x);
24017         int regno;
24018 
24019         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24020           {
24021 	    output_operand_lossage ("invalid operand for code '%c'", code);
24022 	    return;
24023           }
24024 
24025         regno = REGNO (x);
24026         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24027           {
24028 	    output_operand_lossage ("invalid operand for code '%c'", code);
24029 	    return;
24030           }
24031 
24032 	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24033       }
24034       return;
24035 
24036     /* Print a VFP/Neon double precision or quad precision register name.  */
24037     case 'P':
24038     case 'q':
24039       {
24040 	machine_mode mode = GET_MODE (x);
24041 	int is_quad = (code == 'q');
24042 	int regno;
24043 
24044 	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24045 	  {
24046 	    output_operand_lossage ("invalid operand for code '%c'", code);
24047 	    return;
24048 	  }
24049 
24050 	if (!REG_P (x)
24051 	    || !IS_VFP_REGNUM (REGNO (x)))
24052 	  {
24053 	    output_operand_lossage ("invalid operand for code '%c'", code);
24054 	    return;
24055 	  }
24056 
24057 	regno = REGNO (x);
24058 	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24059             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24060 	  {
24061 	    output_operand_lossage ("invalid operand for code '%c'", code);
24062 	    return;
24063 	  }
24064 
24065 	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24066 	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24067       }
24068       return;
24069 
24070     /* These two codes print the low/high doubleword register of a Neon quad
24071        register, respectively.  For pair-structure types, can also print
24072        low/high quadword registers.  */
24073     case 'e':
24074     case 'f':
24075       {
24076         machine_mode mode = GET_MODE (x);
24077         int regno;
24078 
24079         if ((GET_MODE_SIZE (mode) != 16
24080 	     && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24081           {
24082 	    output_operand_lossage ("invalid operand for code '%c'", code);
24083 	    return;
24084           }
24085 
24086         regno = REGNO (x);
24087         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24088           {
24089 	    output_operand_lossage ("invalid operand for code '%c'", code);
24090 	    return;
24091           }
24092 
24093         if (GET_MODE_SIZE (mode) == 16)
24094           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24095 				  + (code == 'f' ? 1 : 0));
24096         else
24097           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24098 				  + (code == 'f' ? 1 : 0));
24099       }
24100       return;
24101 
24102     /* Print a VFPv3 floating-point constant, represented as an integer
24103        index.  */
24104     case 'G':
24105       {
24106         int index = vfp3_const_double_index (x);
24107 	gcc_assert (index != -1);
24108 	fprintf (stream, "%d", index);
24109       }
24110       return;
24111 
24112     /* Print bits representing opcode features for Neon.
24113 
24114        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24115        and polynomials as unsigned.
24116 
24117        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24118 
24119        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24120 
24121     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24122     case 'T':
24123       {
24124         HOST_WIDE_INT bits = INTVAL (x);
24125         fputc ("uspf"[bits & 3], stream);
24126       }
24127       return;
24128 
24129     /* Likewise, but signed and unsigned integers are both 'i'.  */
24130     case 'F':
24131       {
24132         HOST_WIDE_INT bits = INTVAL (x);
24133         fputc ("iipf"[bits & 3], stream);
24134       }
24135       return;
24136 
24137     /* As for 'T', but emit 'u' instead of 'p'.  */
24138     case 't':
24139       {
24140         HOST_WIDE_INT bits = INTVAL (x);
24141         fputc ("usuf"[bits & 3], stream);
24142       }
24143       return;
24144 
24145     /* Bit 2: rounding (vs none).  */
24146     case 'O':
24147       {
24148         HOST_WIDE_INT bits = INTVAL (x);
24149         fputs ((bits & 4) != 0 ? "r" : "", stream);
24150       }
24151       return;
24152 
24153     /* Memory operand for vld1/vst1 instruction.  */
24154     case 'A':
24155       {
24156 	rtx addr;
24157 	bool postinc = FALSE;
24158 	rtx postinc_reg = NULL;
24159 	unsigned align, memsize, align_bits;
24160 
24161 	gcc_assert (MEM_P (x));
24162 	addr = XEXP (x, 0);
24163 	if (GET_CODE (addr) == POST_INC)
24164 	  {
24165 	    postinc = 1;
24166 	    addr = XEXP (addr, 0);
24167 	  }
24168 	if (GET_CODE (addr) == POST_MODIFY)
24169 	  {
24170 	    postinc_reg = XEXP( XEXP (addr, 1), 1);
24171 	    addr = XEXP (addr, 0);
24172 	  }
24173 	asm_fprintf (stream, "[%r", REGNO (addr));
24174 
24175 	/* We know the alignment of this access, so we can emit a hint in the
24176 	   instruction (for some alignments) as an aid to the memory subsystem
24177 	   of the target.  */
24178 	align = MEM_ALIGN (x) >> 3;
24179 	memsize = MEM_SIZE (x);
24180 
24181 	/* Only certain alignment specifiers are supported by the hardware.  */
24182 	if (memsize == 32 && (align % 32) == 0)
24183 	  align_bits = 256;
24184 	else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24185 	  align_bits = 128;
24186 	else if (memsize >= 8 && (align % 8) == 0)
24187 	  align_bits = 64;
24188 	else
24189 	  align_bits = 0;
24190 
24191 	if (align_bits != 0)
24192 	  asm_fprintf (stream, ":%d", align_bits);
24193 
24194 	asm_fprintf (stream, "]");
24195 
24196 	if (postinc)
24197 	  fputs("!", stream);
24198 	if (postinc_reg)
24199 	  asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24200       }
24201       return;
24202 
24203     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24204        rtx_code the memory operands output looks like following.
24205        1. [Rn], #+/-<imm>
24206        2. [Rn, #+/-<imm>]!
24207        3. [Rn, #+/-<imm>]
24208        4. [Rn].  */
24209     case 'E':
24210       {
24211 	rtx addr;
24212 	rtx postinc_reg = NULL;
24213 	unsigned inc_val = 0;
24214 	enum rtx_code code;
24215 
24216 	gcc_assert (MEM_P (x));
24217 	addr = XEXP (x, 0);
24218 	code = GET_CODE (addr);
24219 	if (code == POST_INC || code == POST_DEC || code == PRE_INC
24220 	    || code  == PRE_DEC)
24221 	  {
24222 	    asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24223 	    inc_val = GET_MODE_SIZE (GET_MODE (x));
24224 	    if (code == POST_INC || code == POST_DEC)
24225 	      asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24226 					      ? "": "-", inc_val);
24227 	    else
24228 	      asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24229 					       ? "": "-", inc_val);
24230 	  }
24231 	else if (code == POST_MODIFY || code == PRE_MODIFY)
24232 	  {
24233 	    asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24234 	    postinc_reg = XEXP ( XEXP (x, 1), 1);
24235 	    if (postinc_reg && CONST_INT_P (postinc_reg))
24236 	      {
24237 		if (code == POST_MODIFY)
24238 		  asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24239 		else
24240 		  asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24241 	      }
24242 	  }
24243 	else if (code == PLUS)
24244 	  {
24245 	    rtx base = XEXP (addr, 0);
24246 	    rtx index = XEXP (addr, 1);
24247 
24248 	    gcc_assert (REG_P (base) && CONST_INT_P (index));
24249 
24250 	    HOST_WIDE_INT offset = INTVAL (index);
24251 	    asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24252 	  }
24253 	else
24254 	  {
24255 	    gcc_assert (REG_P (addr));
24256 	    asm_fprintf (stream, "[%r]",REGNO (addr));
24257 	  }
24258       }
24259       return;
24260 
24261     case 'C':
24262       {
24263 	rtx addr;
24264 
24265 	gcc_assert (MEM_P (x));
24266 	addr = XEXP (x, 0);
24267 	gcc_assert (REG_P (addr));
24268 	asm_fprintf (stream, "[%r]", REGNO (addr));
24269       }
24270       return;
24271 
24272     /* Translate an S register number into a D register number and element index.  */
24273     case 'y':
24274       {
24275         machine_mode mode = GET_MODE (x);
24276         int regno;
24277 
24278         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24279           {
24280 	    output_operand_lossage ("invalid operand for code '%c'", code);
24281 	    return;
24282           }
24283 
24284         regno = REGNO (x);
24285         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24286           {
24287 	    output_operand_lossage ("invalid operand for code '%c'", code);
24288 	    return;
24289           }
24290 
24291 	regno = regno - FIRST_VFP_REGNUM;
24292 	fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24293       }
24294       return;
24295 
24296     case 'v':
24297 	gcc_assert (CONST_DOUBLE_P (x));
24298 	int result;
24299 	result = vfp3_const_double_for_fract_bits (x);
24300 	if (result == 0)
24301 	  result = vfp3_const_double_for_bits (x);
24302 	fprintf (stream, "#%d", result);
24303 	return;
24304 
24305     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24306        number into a D register number and element index.  */
24307     case 'z':
24308       {
24309         machine_mode mode = GET_MODE (x);
24310         int regno;
24311 
24312         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24313           {
24314 	    output_operand_lossage ("invalid operand for code '%c'", code);
24315 	    return;
24316           }
24317 
24318         regno = REGNO (x);
24319         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24320           {
24321 	    output_operand_lossage ("invalid operand for code '%c'", code);
24322 	    return;
24323           }
24324 
24325 	regno = regno - FIRST_VFP_REGNUM;
24326 	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24327       }
24328       return;
24329 
24330     default:
24331       if (x == 0)
24332 	{
24333 	  output_operand_lossage ("missing operand");
24334 	  return;
24335 	}
24336 
24337       switch (GET_CODE (x))
24338 	{
24339 	case REG:
24340 	  asm_fprintf (stream, "%r", REGNO (x));
24341 	  break;
24342 
24343 	case MEM:
24344 	  output_address (GET_MODE (x), XEXP (x, 0));
24345 	  break;
24346 
24347 	case CONST_DOUBLE:
24348 	  {
24349             char fpstr[20];
24350             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24351 			      sizeof (fpstr), 0, 1);
24352             fprintf (stream, "#%s", fpstr);
24353 	  }
24354 	  break;
24355 
24356 	default:
24357 	  gcc_assert (GET_CODE (x) != NEG);
24358 	  fputc ('#', stream);
24359 	  if (GET_CODE (x) == HIGH)
24360 	    {
24361 	      fputs (":lower16:", stream);
24362 	      x = XEXP (x, 0);
24363 	    }
24364 
24365 	  output_addr_const (stream, x);
24366 	  break;
24367 	}
24368     }
24369 }
24370 
24371 /* Target hook for printing a memory address.  */
24372 static void
arm_print_operand_address(FILE * stream,machine_mode mode,rtx x)24373 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24374 {
24375   if (TARGET_32BIT)
24376     {
24377       int is_minus = GET_CODE (x) == MINUS;
24378 
24379       if (REG_P (x))
24380 	asm_fprintf (stream, "[%r]", REGNO (x));
24381       else if (GET_CODE (x) == PLUS || is_minus)
24382 	{
24383 	  rtx base = XEXP (x, 0);
24384 	  rtx index = XEXP (x, 1);
24385 	  HOST_WIDE_INT offset = 0;
24386 	  if (!REG_P (base)
24387 	      || (REG_P (index) && REGNO (index) == SP_REGNUM))
24388 	    {
24389 	      /* Ensure that BASE is a register.  */
24390 	      /* (one of them must be).  */
24391 	      /* Also ensure the SP is not used as in index register.  */
24392 	      std::swap (base, index);
24393 	    }
24394 	  switch (GET_CODE (index))
24395 	    {
24396 	    case CONST_INT:
24397 	      offset = INTVAL (index);
24398 	      if (is_minus)
24399 		offset = -offset;
24400 	      asm_fprintf (stream, "[%r, #%wd]",
24401 			   REGNO (base), offset);
24402 	      break;
24403 
24404 	    case REG:
24405 	      asm_fprintf (stream, "[%r, %s%r]",
24406 			   REGNO (base), is_minus ? "-" : "",
24407 			   REGNO (index));
24408 	      break;
24409 
24410 	    case MULT:
24411 	    case ASHIFTRT:
24412 	    case LSHIFTRT:
24413 	    case ASHIFT:
24414 	    case ROTATERT:
24415 	      {
24416 		asm_fprintf (stream, "[%r, %s%r",
24417 			     REGNO (base), is_minus ? "-" : "",
24418 			     REGNO (XEXP (index, 0)));
24419 		arm_print_operand (stream, index, 'S');
24420 		fputs ("]", stream);
24421 		break;
24422 	      }
24423 
24424 	    default:
24425 	      gcc_unreachable ();
24426 	    }
24427 	}
24428       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24429 	       || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24430 	{
24431 	  gcc_assert (REG_P (XEXP (x, 0)));
24432 
24433 	  if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24434 	    asm_fprintf (stream, "[%r, #%s%d]!",
24435 			 REGNO (XEXP (x, 0)),
24436 			 GET_CODE (x) == PRE_DEC ? "-" : "",
24437 			 GET_MODE_SIZE (mode));
24438 	  else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24439 	    asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24440 	  else
24441 	    asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24442 			 GET_CODE (x) == POST_DEC ? "-" : "",
24443 			 GET_MODE_SIZE (mode));
24444 	}
24445       else if (GET_CODE (x) == PRE_MODIFY)
24446 	{
24447 	  asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24448 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24449 	    asm_fprintf (stream, "#%wd]!",
24450 			 INTVAL (XEXP (XEXP (x, 1), 1)));
24451 	  else
24452 	    asm_fprintf (stream, "%r]!",
24453 			 REGNO (XEXP (XEXP (x, 1), 1)));
24454 	}
24455       else if (GET_CODE (x) == POST_MODIFY)
24456 	{
24457 	  asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24458 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24459 	    asm_fprintf (stream, "#%wd",
24460 			 INTVAL (XEXP (XEXP (x, 1), 1)));
24461 	  else
24462 	    asm_fprintf (stream, "%r",
24463 			 REGNO (XEXP (XEXP (x, 1), 1)));
24464 	}
24465       else output_addr_const (stream, x);
24466     }
24467   else
24468     {
24469       if (REG_P (x))
24470 	asm_fprintf (stream, "[%r]", REGNO (x));
24471       else if (GET_CODE (x) == POST_INC)
24472 	asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24473       else if (GET_CODE (x) == PLUS)
24474 	{
24475 	  gcc_assert (REG_P (XEXP (x, 0)));
24476 	  if (CONST_INT_P (XEXP (x, 1)))
24477 	    asm_fprintf (stream, "[%r, #%wd]",
24478 			 REGNO (XEXP (x, 0)),
24479 			 INTVAL (XEXP (x, 1)));
24480 	  else
24481 	    asm_fprintf (stream, "[%r, %r]",
24482 			 REGNO (XEXP (x, 0)),
24483 			 REGNO (XEXP (x, 1)));
24484 	}
24485       else
24486 	output_addr_const (stream, x);
24487     }
24488 }
24489 
24490 /* Target hook for indicating whether a punctuation character for
24491    TARGET_PRINT_OPERAND is valid.  */
24492 static bool
arm_print_operand_punct_valid_p(unsigned char code)24493 arm_print_operand_punct_valid_p (unsigned char code)
24494 {
24495   return (code == '@' || code == '|' || code == '.'
24496 	  || code == '(' || code == ')' || code == '#'
24497 	  || (TARGET_32BIT && (code == '?'))
24498 	  || (TARGET_THUMB2 && (code == '!'))
24499 	  || (TARGET_THUMB && (code == '_')));
24500 }
24501 
24502 /* Target hook for assembling integer objects.  The ARM version needs to
24503    handle word-sized values specially.  */
24504 static bool
arm_assemble_integer(rtx x,unsigned int size,int aligned_p)24505 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24506 {
24507   machine_mode mode;
24508 
24509   if (size == UNITS_PER_WORD && aligned_p)
24510     {
24511       fputs ("\t.word\t", asm_out_file);
24512       output_addr_const (asm_out_file, x);
24513 
24514       /* Mark symbols as position independent.  We only do this in the
24515 	 .text segment, not in the .data segment.  */
24516       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24517 	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
24518 	{
24519 	  /* See legitimize_pic_address for an explanation of the
24520 	     TARGET_VXWORKS_RTP check.  */
24521 	  /* References to weak symbols cannot be resolved locally:
24522 	     they may be overridden by a non-weak definition at link
24523 	     time.  */
24524 	  if (!arm_pic_data_is_text_relative
24525 	      || (GET_CODE (x) == SYMBOL_REF
24526 		  && (!SYMBOL_REF_LOCAL_P (x)
24527 		      || (SYMBOL_REF_DECL (x)
24528 			  ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24529 		      || (SYMBOL_REF_FUNCTION_P (x)
24530 			  && !arm_fdpic_local_funcdesc_p (x)))))
24531 	    {
24532 	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24533 		fputs ("(GOTFUNCDESC)", asm_out_file);
24534 	      else
24535 		fputs ("(GOT)", asm_out_file);
24536 	    }
24537 	  else
24538 	    {
24539 	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24540 		fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24541 	      else
24542 		{
24543 		  bool is_readonly;
24544 
24545 		  if (!TARGET_FDPIC
24546 		      || arm_is_segment_info_known (x, &is_readonly))
24547 		    fputs ("(GOTOFF)", asm_out_file);
24548 		  else
24549 		    fputs ("(GOT)", asm_out_file);
24550 		}
24551 	    }
24552 	}
24553 
24554       /* For FDPIC we also have to mark symbol for .data section.  */
24555       if (TARGET_FDPIC
24556 	  && !making_const_table
24557 	  && SYMBOL_REF_P (x)
24558 	  && SYMBOL_REF_FUNCTION_P (x))
24559 	fputs ("(FUNCDESC)", asm_out_file);
24560 
24561       fputc ('\n', asm_out_file);
24562       return true;
24563     }
24564 
24565   mode = GET_MODE (x);
24566 
24567   if (arm_vector_mode_supported_p (mode))
24568     {
24569       int i, units;
24570 
24571       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24572 
24573       units = CONST_VECTOR_NUNITS (x);
24574       size = GET_MODE_UNIT_SIZE (mode);
24575 
24576       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24577         for (i = 0; i < units; i++)
24578 	  {
24579 	    rtx elt = CONST_VECTOR_ELT (x, i);
24580 	    assemble_integer
24581 	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24582 	  }
24583       else
24584         for (i = 0; i < units; i++)
24585           {
24586             rtx elt = CONST_VECTOR_ELT (x, i);
24587 	    assemble_real
24588 	      (*CONST_DOUBLE_REAL_VALUE (elt),
24589 	       as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24590 	       i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24591           }
24592 
24593       return true;
24594     }
24595 
24596   return default_assemble_integer (x, size, aligned_p);
24597 }
24598 
24599 static void
arm_elf_asm_cdtor(rtx symbol,int priority,bool is_ctor)24600 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24601 {
24602   section *s;
24603 
24604   if (!TARGET_AAPCS_BASED)
24605     {
24606       (is_ctor ?
24607        default_named_section_asm_out_constructor
24608        : default_named_section_asm_out_destructor) (symbol, priority);
24609       return;
24610     }
24611 
24612   /* Put these in the .init_array section, using a special relocation.  */
24613   if (priority != DEFAULT_INIT_PRIORITY)
24614     {
24615       char buf[18];
24616       sprintf (buf, "%s.%.5u",
24617 	       is_ctor ? ".init_array" : ".fini_array",
24618 	       priority);
24619       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24620     }
24621   else if (is_ctor)
24622     s = ctors_section;
24623   else
24624     s = dtors_section;
24625 
24626   switch_to_section (s);
24627   assemble_align (POINTER_SIZE);
24628   fputs ("\t.word\t", asm_out_file);
24629   output_addr_const (asm_out_file, symbol);
24630   fputs ("(target1)\n", asm_out_file);
24631 }
24632 
24633 /* Add a function to the list of static constructors.  */
24634 
24635 static void
arm_elf_asm_constructor(rtx symbol,int priority)24636 arm_elf_asm_constructor (rtx symbol, int priority)
24637 {
24638   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24639 }
24640 
24641 /* Add a function to the list of static destructors.  */
24642 
24643 static void
arm_elf_asm_destructor(rtx symbol,int priority)24644 arm_elf_asm_destructor (rtx symbol, int priority)
24645 {
24646   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24647 }
24648 
24649 /* A finite state machine takes care of noticing whether or not instructions
24650    can be conditionally executed, and thus decrease execution time and code
24651    size by deleting branch instructions.  The fsm is controlled by
24652    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
24653 
24654 /* The state of the fsm controlling condition codes are:
24655    0: normal, do nothing special
24656    1: make ASM_OUTPUT_OPCODE not output this instruction
24657    2: make ASM_OUTPUT_OPCODE not output this instruction
24658    3: make instructions conditional
24659    4: make instructions conditional
24660 
24661    State transitions (state->state by whom under condition):
24662    0 -> 1 final_prescan_insn if the `target' is a label
24663    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24664    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24665    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24666    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24667           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24668    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24669           (the target insn is arm_target_insn).
24670 
24671    If the jump clobbers the conditions then we use states 2 and 4.
24672 
24673    A similar thing can be done with conditional return insns.
24674 
24675    XXX In case the `target' is an unconditional branch, this conditionalising
24676    of the instructions always reduces code size, but not always execution
24677    time.  But then, I want to reduce the code size to somewhere near what
24678    /bin/cc produces.  */
24679 
24680 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24681    instructions.  When a COND_EXEC instruction is seen the subsequent
24682    instructions are scanned so that multiple conditional instructions can be
24683    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
24684    specify the length and true/false mask for the IT block.  These will be
24685    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
24686 
24687 /* Returns the index of the ARM condition code string in
24688    `arm_condition_codes', or ARM_NV if the comparison is invalid.
24689    COMPARISON should be an rtx like `(eq (...) (...))'.  */
24690 
24691 enum arm_cond_code
maybe_get_arm_condition_code(rtx comparison)24692 maybe_get_arm_condition_code (rtx comparison)
24693 {
24694   machine_mode mode = GET_MODE (XEXP (comparison, 0));
24695   enum arm_cond_code code;
24696   enum rtx_code comp_code = GET_CODE (comparison);
24697 
24698   if (GET_MODE_CLASS (mode) != MODE_CC)
24699     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24700 			   XEXP (comparison, 1));
24701 
24702   switch (mode)
24703     {
24704     case E_CC_DNEmode: code = ARM_NE; goto dominance;
24705     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24706     case E_CC_DGEmode: code = ARM_GE; goto dominance;
24707     case E_CC_DGTmode: code = ARM_GT; goto dominance;
24708     case E_CC_DLEmode: code = ARM_LE; goto dominance;
24709     case E_CC_DLTmode: code = ARM_LT; goto dominance;
24710     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24711     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24712     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24713     case E_CC_DLTUmode: code = ARM_CC;
24714 
24715     dominance:
24716       if (comp_code == EQ)
24717 	return ARM_INVERSE_CONDITION_CODE (code);
24718       if (comp_code == NE)
24719 	return code;
24720       return ARM_NV;
24721 
24722     case E_CC_NZmode:
24723       switch (comp_code)
24724 	{
24725 	case NE: return ARM_NE;
24726 	case EQ: return ARM_EQ;
24727 	case GE: return ARM_PL;
24728 	case LT: return ARM_MI;
24729 	default: return ARM_NV;
24730 	}
24731 
24732     case E_CC_Zmode:
24733       switch (comp_code)
24734 	{
24735 	case NE: return ARM_NE;
24736 	case EQ: return ARM_EQ;
24737 	default: return ARM_NV;
24738 	}
24739 
24740     case E_CC_Nmode:
24741       switch (comp_code)
24742 	{
24743 	case NE: return ARM_MI;
24744 	case EQ: return ARM_PL;
24745 	default: return ARM_NV;
24746 	}
24747 
24748     case E_CCFPEmode:
24749     case E_CCFPmode:
24750       /* We can handle all cases except UNEQ and LTGT.  */
24751       switch (comp_code)
24752 	{
24753 	case GE: return ARM_GE;
24754 	case GT: return ARM_GT;
24755 	case LE: return ARM_LS;
24756 	case LT: return ARM_MI;
24757 	case NE: return ARM_NE;
24758 	case EQ: return ARM_EQ;
24759 	case ORDERED: return ARM_VC;
24760 	case UNORDERED: return ARM_VS;
24761 	case UNLT: return ARM_LT;
24762 	case UNLE: return ARM_LE;
24763 	case UNGT: return ARM_HI;
24764 	case UNGE: return ARM_PL;
24765 	  /* UNEQ and LTGT do not have a representation.  */
24766 	case UNEQ: /* Fall through.  */
24767 	case LTGT: /* Fall through.  */
24768 	default: return ARM_NV;
24769 	}
24770 
24771     case E_CC_SWPmode:
24772       switch (comp_code)
24773 	{
24774 	case NE: return ARM_NE;
24775 	case EQ: return ARM_EQ;
24776 	case GE: return ARM_LE;
24777 	case GT: return ARM_LT;
24778 	case LE: return ARM_GE;
24779 	case LT: return ARM_GT;
24780 	case GEU: return ARM_LS;
24781 	case GTU: return ARM_CC;
24782 	case LEU: return ARM_CS;
24783 	case LTU: return ARM_HI;
24784 	default: return ARM_NV;
24785 	}
24786 
24787     case E_CC_Cmode:
24788       switch (comp_code)
24789 	{
24790 	case LTU: return ARM_CS;
24791 	case GEU: return ARM_CC;
24792 	default: return ARM_NV;
24793 	}
24794 
24795     case E_CC_NVmode:
24796       switch (comp_code)
24797 	{
24798 	case GE: return ARM_GE;
24799 	case LT: return ARM_LT;
24800 	default: return ARM_NV;
24801 	}
24802 
24803     case E_CC_Bmode:
24804       switch (comp_code)
24805 	{
24806 	case GEU: return ARM_CS;
24807 	case LTU: return ARM_CC;
24808 	default: return ARM_NV;
24809 	}
24810 
24811     case E_CC_Vmode:
24812       switch (comp_code)
24813 	{
24814 	case NE: return ARM_VS;
24815 	case EQ: return ARM_VC;
24816 	default: return ARM_NV;
24817 	}
24818 
24819     case E_CC_ADCmode:
24820       switch (comp_code)
24821 	{
24822 	case GEU: return ARM_CS;
24823 	case LTU: return ARM_CC;
24824 	default: return ARM_NV;
24825 	}
24826 
24827     case E_CCmode:
24828     case E_CC_RSBmode:
24829       switch (comp_code)
24830 	{
24831 	case NE: return ARM_NE;
24832 	case EQ: return ARM_EQ;
24833 	case GE: return ARM_GE;
24834 	case GT: return ARM_GT;
24835 	case LE: return ARM_LE;
24836 	case LT: return ARM_LT;
24837 	case GEU: return ARM_CS;
24838 	case GTU: return ARM_HI;
24839 	case LEU: return ARM_LS;
24840 	case LTU: return ARM_CC;
24841 	default: return ARM_NV;
24842 	}
24843 
24844     default: gcc_unreachable ();
24845     }
24846 }
24847 
24848 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
24849 static enum arm_cond_code
get_arm_condition_code(rtx comparison)24850 get_arm_condition_code (rtx comparison)
24851 {
24852   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
24853   gcc_assert (code != ARM_NV);
24854   return code;
24855 }
24856 
24857 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
24858    code registers when not targetting Thumb1.  The VFP condition register
24859    only exists when generating hard-float code.  */
24860 static bool
arm_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)24861 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
24862 {
24863   if (!TARGET_32BIT)
24864     return false;
24865 
24866   *p1 = CC_REGNUM;
24867   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
24868   return true;
24869 }
24870 
24871 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
24872    instructions.  */
24873 void
thumb2_final_prescan_insn(rtx_insn * insn)24874 thumb2_final_prescan_insn (rtx_insn *insn)
24875 {
24876   rtx_insn *first_insn = insn;
24877   rtx body = PATTERN (insn);
24878   rtx predicate;
24879   enum arm_cond_code code;
24880   int n;
24881   int mask;
24882   int max;
24883 
24884   /* max_insns_skipped in the tune was already taken into account in the
24885      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
24886      just emit the IT blocks as we can.  It does not make sense to split
24887      the IT blocks.  */
24888   max = MAX_INSN_PER_IT_BLOCK;
24889 
24890   /* Remove the previous insn from the count of insns to be output.  */
24891   if (arm_condexec_count)
24892       arm_condexec_count--;
24893 
24894   /* Nothing to do if we are already inside a conditional block.  */
24895   if (arm_condexec_count)
24896     return;
24897 
24898   if (GET_CODE (body) != COND_EXEC)
24899     return;
24900 
24901   /* Conditional jumps are implemented directly.  */
24902   if (JUMP_P (insn))
24903     return;
24904 
24905   predicate = COND_EXEC_TEST (body);
24906   arm_current_cc = get_arm_condition_code (predicate);
24907 
24908   n = get_attr_ce_count (insn);
24909   arm_condexec_count = 1;
24910   arm_condexec_mask = (1 << n) - 1;
24911   arm_condexec_masklen = n;
24912   /* See if subsequent instructions can be combined into the same block.  */
24913   for (;;)
24914     {
24915       insn = next_nonnote_insn (insn);
24916 
24917       /* Jumping into the middle of an IT block is illegal, so a label or
24918          barrier terminates the block.  */
24919       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
24920 	break;
24921 
24922       body = PATTERN (insn);
24923       /* USE and CLOBBER aren't really insns, so just skip them.  */
24924       if (GET_CODE (body) == USE
24925 	  || GET_CODE (body) == CLOBBER)
24926 	continue;
24927 
24928       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
24929       if (GET_CODE (body) != COND_EXEC)
24930 	break;
24931       /* Maximum number of conditionally executed instructions in a block.  */
24932       n = get_attr_ce_count (insn);
24933       if (arm_condexec_masklen + n > max)
24934 	break;
24935 
24936       predicate = COND_EXEC_TEST (body);
24937       code = get_arm_condition_code (predicate);
24938       mask = (1 << n) - 1;
24939       if (arm_current_cc == code)
24940 	arm_condexec_mask |= (mask << arm_condexec_masklen);
24941       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
24942 	break;
24943 
24944       arm_condexec_count++;
24945       arm_condexec_masklen += n;
24946 
24947       /* A jump must be the last instruction in a conditional block.  */
24948       if (JUMP_P (insn))
24949 	break;
24950     }
24951   /* Restore recog_data (getting the attributes of other insns can
24952      destroy this array, but final.c assumes that it remains intact
24953      across this call).  */
24954   extract_constrain_insn_cached (first_insn);
24955 }
24956 
24957 void
arm_final_prescan_insn(rtx_insn * insn)24958 arm_final_prescan_insn (rtx_insn *insn)
24959 {
24960   /* BODY will hold the body of INSN.  */
24961   rtx body = PATTERN (insn);
24962 
24963   /* This will be 1 if trying to repeat the trick, and things need to be
24964      reversed if it appears to fail.  */
24965   int reverse = 0;
24966 
24967   /* If we start with a return insn, we only succeed if we find another one.  */
24968   int seeking_return = 0;
24969   enum rtx_code return_code = UNKNOWN;
24970 
24971   /* START_INSN will hold the insn from where we start looking.  This is the
24972      first insn after the following code_label if REVERSE is true.  */
24973   rtx_insn *start_insn = insn;
24974 
24975   /* If in state 4, check if the target branch is reached, in order to
24976      change back to state 0.  */
24977   if (arm_ccfsm_state == 4)
24978     {
24979       if (insn == arm_target_insn)
24980 	{
24981 	  arm_target_insn = NULL;
24982 	  arm_ccfsm_state = 0;
24983 	}
24984       return;
24985     }
24986 
24987   /* If in state 3, it is possible to repeat the trick, if this insn is an
24988      unconditional branch to a label, and immediately following this branch
24989      is the previous target label which is only used once, and the label this
24990      branch jumps to is not too far off.  */
24991   if (arm_ccfsm_state == 3)
24992     {
24993       if (simplejump_p (insn))
24994 	{
24995 	  start_insn = next_nonnote_insn (start_insn);
24996 	  if (BARRIER_P (start_insn))
24997 	    {
24998 	      /* XXX Isn't this always a barrier?  */
24999 	      start_insn = next_nonnote_insn (start_insn);
25000 	    }
25001 	  if (LABEL_P (start_insn)
25002 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25003 	      && LABEL_NUSES (start_insn) == 1)
25004 	    reverse = TRUE;
25005 	  else
25006 	    return;
25007 	}
25008       else if (ANY_RETURN_P (body))
25009         {
25010 	  start_insn = next_nonnote_insn (start_insn);
25011 	  if (BARRIER_P (start_insn))
25012 	    start_insn = next_nonnote_insn (start_insn);
25013 	  if (LABEL_P (start_insn)
25014 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25015 	      && LABEL_NUSES (start_insn) == 1)
25016 	    {
25017 	      reverse = TRUE;
25018 	      seeking_return = 1;
25019 	      return_code = GET_CODE (body);
25020 	    }
25021 	  else
25022 	    return;
25023         }
25024       else
25025 	return;
25026     }
25027 
25028   gcc_assert (!arm_ccfsm_state || reverse);
25029   if (!JUMP_P (insn))
25030     return;
25031 
25032   /* This jump might be paralleled with a clobber of the condition codes
25033      the jump should always come first */
25034   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25035     body = XVECEXP (body, 0, 0);
25036 
25037   if (reverse
25038       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25039 	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25040     {
25041       int insns_skipped;
25042       int fail = FALSE, succeed = FALSE;
25043       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25044       int then_not_else = TRUE;
25045       rtx_insn *this_insn = start_insn;
25046       rtx label = 0;
25047 
25048       /* Register the insn jumped to.  */
25049       if (reverse)
25050         {
25051 	  if (!seeking_return)
25052 	    label = XEXP (SET_SRC (body), 0);
25053         }
25054       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25055 	label = XEXP (XEXP (SET_SRC (body), 1), 0);
25056       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25057 	{
25058 	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
25059 	  then_not_else = FALSE;
25060 	}
25061       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25062 	{
25063 	  seeking_return = 1;
25064 	  return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25065 	}
25066       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25067         {
25068 	  seeking_return = 1;
25069 	  return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25070 	  then_not_else = FALSE;
25071         }
25072       else
25073 	gcc_unreachable ();
25074 
25075       /* See how many insns this branch skips, and what kind of insns.  If all
25076 	 insns are okay, and the label or unconditional branch to the same
25077 	 label is not too far away, succeed.  */
25078       for (insns_skipped = 0;
25079 	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25080 	{
25081 	  rtx scanbody;
25082 
25083 	  this_insn = next_nonnote_insn (this_insn);
25084 	  if (!this_insn)
25085 	    break;
25086 
25087 	  switch (GET_CODE (this_insn))
25088 	    {
25089 	    case CODE_LABEL:
25090 	      /* Succeed if it is the target label, otherwise fail since
25091 		 control falls in from somewhere else.  */
25092 	      if (this_insn == label)
25093 		{
25094 		  arm_ccfsm_state = 1;
25095 		  succeed = TRUE;
25096 		}
25097 	      else
25098 		fail = TRUE;
25099 	      break;
25100 
25101 	    case BARRIER:
25102 	      /* Succeed if the following insn is the target label.
25103 		 Otherwise fail.
25104 		 If return insns are used then the last insn in a function
25105 		 will be a barrier.  */
25106 	      this_insn = next_nonnote_insn (this_insn);
25107 	      if (this_insn && this_insn == label)
25108 		{
25109 		  arm_ccfsm_state = 1;
25110 		  succeed = TRUE;
25111 		}
25112 	      else
25113 		fail = TRUE;
25114 	      break;
25115 
25116 	    case CALL_INSN:
25117 	      /* The AAPCS says that conditional calls should not be
25118 		 used since they make interworking inefficient (the
25119 		 linker can't transform BL<cond> into BLX).  That's
25120 		 only a problem if the machine has BLX.  */
25121 	      if (arm_arch5t)
25122 		{
25123 		  fail = TRUE;
25124 		  break;
25125 		}
25126 
25127 	      /* Succeed if the following insn is the target label, or
25128 		 if the following two insns are a barrier and the
25129 		 target label.  */
25130 	      this_insn = next_nonnote_insn (this_insn);
25131 	      if (this_insn && BARRIER_P (this_insn))
25132 		this_insn = next_nonnote_insn (this_insn);
25133 
25134 	      if (this_insn && this_insn == label
25135 		  && insns_skipped < max_insns_skipped)
25136 		{
25137 		  arm_ccfsm_state = 1;
25138 		  succeed = TRUE;
25139 		}
25140 	      else
25141 		fail = TRUE;
25142 	      break;
25143 
25144 	    case JUMP_INSN:
25145       	      /* If this is an unconditional branch to the same label, succeed.
25146 		 If it is to another label, do nothing.  If it is conditional,
25147 		 fail.  */
25148 	      /* XXX Probably, the tests for SET and the PC are
25149 		 unnecessary.  */
25150 
25151 	      scanbody = PATTERN (this_insn);
25152 	      if (GET_CODE (scanbody) == SET
25153 		  && GET_CODE (SET_DEST (scanbody)) == PC)
25154 		{
25155 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25156 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25157 		    {
25158 		      arm_ccfsm_state = 2;
25159 		      succeed = TRUE;
25160 		    }
25161 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25162 		    fail = TRUE;
25163 		}
25164 	      /* Fail if a conditional return is undesirable (e.g. on a
25165 		 StrongARM), but still allow this if optimizing for size.  */
25166 	      else if (GET_CODE (scanbody) == return_code
25167 		       && !use_return_insn (TRUE, NULL)
25168 		       && !optimize_size)
25169 		fail = TRUE;
25170 	      else if (GET_CODE (scanbody) == return_code)
25171 	        {
25172 		  arm_ccfsm_state = 2;
25173 		  succeed = TRUE;
25174 	        }
25175 	      else if (GET_CODE (scanbody) == PARALLEL)
25176 	        {
25177 		  switch (get_attr_conds (this_insn))
25178 		    {
25179 		    case CONDS_NOCOND:
25180 		      break;
25181 		    default:
25182 		      fail = TRUE;
25183 		      break;
25184 		    }
25185 		}
25186 	      else
25187 		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
25188 
25189 	      break;
25190 
25191 	    case INSN:
25192 	      /* Instructions using or affecting the condition codes make it
25193 		 fail.  */
25194 	      scanbody = PATTERN (this_insn);
25195 	      if (!(GET_CODE (scanbody) == SET
25196 		    || GET_CODE (scanbody) == PARALLEL)
25197 		  || get_attr_conds (this_insn) != CONDS_NOCOND)
25198 		fail = TRUE;
25199 	      break;
25200 
25201 	    default:
25202 	      break;
25203 	    }
25204 	}
25205       if (succeed)
25206 	{
25207 	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25208 	    arm_target_label = CODE_LABEL_NUMBER (label);
25209 	  else
25210 	    {
25211 	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
25212 
25213 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25214 	        {
25215 		  this_insn = next_nonnote_insn (this_insn);
25216 		  gcc_assert (!this_insn
25217 			      || (!BARRIER_P (this_insn)
25218 				  && !LABEL_P (this_insn)));
25219 	        }
25220 	      if (!this_insn)
25221 	        {
25222 		  /* Oh, dear! we ran off the end.. give up.  */
25223 		  extract_constrain_insn_cached (insn);
25224 		  arm_ccfsm_state = 0;
25225 		  arm_target_insn = NULL;
25226 		  return;
25227 	        }
25228 	      arm_target_insn = this_insn;
25229 	    }
25230 
25231 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25232 	     what it was.  */
25233 	  if (!reverse)
25234 	    arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25235 
25236 	  if (reverse || then_not_else)
25237 	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25238 	}
25239 
25240       /* Restore recog_data (getting the attributes of other insns can
25241 	 destroy this array, but final.c assumes that it remains intact
25242 	 across this call.  */
25243       extract_constrain_insn_cached (insn);
25244     }
25245 }
25246 
25247 /* Output IT instructions.  */
25248 void
thumb2_asm_output_opcode(FILE * stream)25249 thumb2_asm_output_opcode (FILE * stream)
25250 {
25251   char buff[5];
25252   int n;
25253 
25254   if (arm_condexec_mask)
25255     {
25256       for (n = 0; n < arm_condexec_masklen; n++)
25257 	buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25258       buff[n] = 0;
25259       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25260 		  arm_condition_codes[arm_current_cc]);
25261       arm_condexec_mask = 0;
25262     }
25263 }
25264 
25265 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25266    UNITS_PER_WORD bytes wide.  */
25267 static unsigned int
arm_hard_regno_nregs(unsigned int regno,machine_mode mode)25268 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25269 {
25270   if (TARGET_32BIT
25271       && regno > PC_REGNUM
25272       && regno != FRAME_POINTER_REGNUM
25273       && regno != ARG_POINTER_REGNUM
25274       && !IS_VFP_REGNUM (regno))
25275     return 1;
25276 
25277   return ARM_NUM_REGS (mode);
25278 }
25279 
25280 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25281 static bool
arm_hard_regno_mode_ok(unsigned int regno,machine_mode mode)25282 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25283 {
25284   if (GET_MODE_CLASS (mode) == MODE_CC)
25285     return (regno == CC_REGNUM
25286 	    || (TARGET_VFP_BASE
25287 		&& regno == VFPCC_REGNUM));
25288 
25289   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25290     return false;
25291 
25292   if (IS_VPR_REGNUM (regno))
25293     return mode == HImode;
25294 
25295   if (TARGET_THUMB1)
25296     /* For the Thumb we only allow values bigger than SImode in
25297        registers 0 - 6, so that there is always a second low
25298        register available to hold the upper part of the value.
25299        We probably we ought to ensure that the register is the
25300        start of an even numbered register pair.  */
25301     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25302 
25303   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25304     {
25305       if (mode == DFmode || mode == DImode)
25306 	return VFP_REGNO_OK_FOR_DOUBLE (regno);
25307 
25308       if (mode == HFmode || mode == BFmode || mode == HImode
25309 	  || mode == SFmode || mode == SImode)
25310 	return VFP_REGNO_OK_FOR_SINGLE (regno);
25311 
25312       if (TARGET_NEON)
25313         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25314                || (VALID_NEON_QREG_MODE (mode)
25315                    && NEON_REGNO_OK_FOR_QUAD (regno))
25316 	       || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25317 	       || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25318 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25319 	       || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25320 	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25321      if (TARGET_HAVE_MVE)
25322        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25323 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25324 	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25325 
25326       return false;
25327     }
25328 
25329   if (TARGET_REALLY_IWMMXT)
25330     {
25331       if (IS_IWMMXT_GR_REGNUM (regno))
25332 	return mode == SImode;
25333 
25334       if (IS_IWMMXT_REGNUM (regno))
25335 	return VALID_IWMMXT_REG_MODE (mode);
25336     }
25337 
25338   /* We allow almost any value to be stored in the general registers.
25339      Restrict doubleword quantities to even register pairs in ARM state
25340      so that we can use ldrd. The same restriction applies for MVE
25341      in order to support Armv8.1-M Mainline instructions.
25342      Do not allow very large Neon structure  opaque modes in general
25343      registers; they would use too many.  */
25344   if (regno <= LAST_ARM_REGNUM)
25345     {
25346       if (ARM_NUM_REGS (mode) > 4)
25347 	return false;
25348 
25349       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25350 	return true;
25351 
25352       return !((TARGET_LDRD || TARGET_CDE)
25353 	       && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25354     }
25355 
25356   if (regno == FRAME_POINTER_REGNUM
25357       || regno == ARG_POINTER_REGNUM)
25358     /* We only allow integers in the fake hard registers.  */
25359     return GET_MODE_CLASS (mode) == MODE_INT;
25360 
25361   return false;
25362 }
25363 
25364 /* Implement TARGET_MODES_TIEABLE_P.  */
25365 
25366 static bool
arm_modes_tieable_p(machine_mode mode1,machine_mode mode2)25367 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25368 {
25369   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25370     return true;
25371 
25372   /* We specifically want to allow elements of "structure" modes to
25373      be tieable to the structure.  This more general condition allows
25374      other rarer situations too.  */
25375   if ((TARGET_NEON
25376        && (VALID_NEON_DREG_MODE (mode1)
25377 	   || VALID_NEON_QREG_MODE (mode1)
25378 	   || VALID_NEON_STRUCT_MODE (mode1))
25379        && (VALID_NEON_DREG_MODE (mode2)
25380 	   || VALID_NEON_QREG_MODE (mode2)
25381 	   || VALID_NEON_STRUCT_MODE (mode2)))
25382       || (TARGET_HAVE_MVE
25383 	  && (VALID_MVE_MODE (mode1)
25384 	      || VALID_MVE_STRUCT_MODE (mode1))
25385 	  && (VALID_MVE_MODE (mode2)
25386 	      || VALID_MVE_STRUCT_MODE (mode2))))
25387     return true;
25388 
25389   return false;
25390 }
25391 
25392 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25393    not used in arm mode.  */
25394 
25395 enum reg_class
arm_regno_class(int regno)25396 arm_regno_class (int regno)
25397 {
25398   if (regno == PC_REGNUM)
25399     return NO_REGS;
25400 
25401   if (IS_VPR_REGNUM (regno))
25402     return VPR_REG;
25403 
25404   if (TARGET_THUMB1)
25405     {
25406       if (regno == STACK_POINTER_REGNUM)
25407 	return STACK_REG;
25408       if (regno == CC_REGNUM)
25409 	return CC_REG;
25410       if (regno < 8)
25411 	return LO_REGS;
25412       return HI_REGS;
25413     }
25414 
25415   if (TARGET_THUMB2 && regno < 8)
25416     return LO_REGS;
25417 
25418   if (   regno <= LAST_ARM_REGNUM
25419       || regno == FRAME_POINTER_REGNUM
25420       || regno == ARG_POINTER_REGNUM)
25421     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25422 
25423   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25424     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25425 
25426   if (IS_VFP_REGNUM (regno))
25427     {
25428       if (regno <= D7_VFP_REGNUM)
25429 	return VFP_D0_D7_REGS;
25430       else if (regno <= LAST_LO_VFP_REGNUM)
25431         return VFP_LO_REGS;
25432       else
25433         return VFP_HI_REGS;
25434     }
25435 
25436   if (IS_IWMMXT_REGNUM (regno))
25437     return IWMMXT_REGS;
25438 
25439   if (IS_IWMMXT_GR_REGNUM (regno))
25440     return IWMMXT_GR_REGS;
25441 
25442   return NO_REGS;
25443 }
25444 
25445 /* Handle a special case when computing the offset
25446    of an argument from the frame pointer.  */
25447 int
arm_debugger_arg_offset(int value,rtx addr)25448 arm_debugger_arg_offset (int value, rtx addr)
25449 {
25450   rtx_insn *insn;
25451 
25452   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25453   if (value != 0)
25454     return 0;
25455 
25456   /* We can only cope with the case where the address is held in a register.  */
25457   if (!REG_P (addr))
25458     return 0;
25459 
25460   /* If we are using the frame pointer to point at the argument, then
25461      an offset of 0 is correct.  */
25462   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25463     return 0;
25464 
25465   /* If we are using the stack pointer to point at the
25466      argument, then an offset of 0 is correct.  */
25467   /* ??? Check this is consistent with thumb2 frame layout.  */
25468   if ((TARGET_THUMB || !frame_pointer_needed)
25469       && REGNO (addr) == SP_REGNUM)
25470     return 0;
25471 
25472   /* Oh dear.  The argument is pointed to by a register rather
25473      than being held in a register, or being stored at a known
25474      offset from the frame pointer.  Since GDB only understands
25475      those two kinds of argument we must translate the address
25476      held in the register into an offset from the frame pointer.
25477      We do this by searching through the insns for the function
25478      looking to see where this register gets its value.  If the
25479      register is initialized from the frame pointer plus an offset
25480      then we are in luck and we can continue, otherwise we give up.
25481 
25482      This code is exercised by producing debugging information
25483      for a function with arguments like this:
25484 
25485            double func (double a, double b, int c, double d) {return d;}
25486 
25487      Without this code the stab for parameter 'd' will be set to
25488      an offset of 0 from the frame pointer, rather than 8.  */
25489 
25490   /* The if() statement says:
25491 
25492      If the insn is a normal instruction
25493      and if the insn is setting the value in a register
25494      and if the register being set is the register holding the address of the argument
25495      and if the address is computing by an addition
25496      that involves adding to a register
25497      which is the frame pointer
25498      a constant integer
25499 
25500      then...  */
25501 
25502   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25503     {
25504       if (   NONJUMP_INSN_P (insn)
25505 	  && GET_CODE (PATTERN (insn)) == SET
25506 	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25507 	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25508 	  && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25509 	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25510 	  && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25511 	     )
25512 	{
25513 	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25514 
25515 	  break;
25516 	}
25517     }
25518 
25519   if (value == 0)
25520     {
25521       debug_rtx (addr);
25522       warning (0, "unable to compute real location of stacked parameter");
25523       value = 8; /* XXX magic hack */
25524     }
25525 
25526   return value;
25527 }
25528 
25529 /* Implement TARGET_PROMOTED_TYPE.  */
25530 
25531 static tree
arm_promoted_type(const_tree t)25532 arm_promoted_type (const_tree t)
25533 {
25534   if (SCALAR_FLOAT_TYPE_P (t)
25535       && TYPE_PRECISION (t) == 16
25536       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25537     return float_type_node;
25538   return NULL_TREE;
25539 }
25540 
25541 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25542    This simply adds HFmode as a supported mode; even though we don't
25543    implement arithmetic on this type directly, it's supported by
25544    optabs conversions, much the way the double-word arithmetic is
25545    special-cased in the default hook.  */
25546 
25547 static bool
arm_scalar_mode_supported_p(scalar_mode mode)25548 arm_scalar_mode_supported_p (scalar_mode mode)
25549 {
25550   if (mode == HFmode)
25551     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25552   else if (ALL_FIXED_POINT_MODE_P (mode))
25553     return true;
25554   else
25555     return default_scalar_mode_supported_p (mode);
25556 }
25557 
25558 /* Set the value of FLT_EVAL_METHOD.
25559    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25560 
25561     0: evaluate all operations and constants, whose semantic type has at
25562        most the range and precision of type float, to the range and
25563        precision of float; evaluate all other operations and constants to
25564        the range and precision of the semantic type;
25565 
25566     N, where _FloatN is a supported interchange floating type
25567        evaluate all operations and constants, whose semantic type has at
25568        most the range and precision of _FloatN type, to the range and
25569        precision of the _FloatN type; evaluate all other operations and
25570        constants to the range and precision of the semantic type;
25571 
25572    If we have the ARMv8.2-A extensions then we support _Float16 in native
25573    precision, so we should set this to 16.  Otherwise, we support the type,
25574    but want to evaluate expressions in float precision, so set this to
25575    0.  */
25576 
25577 static enum flt_eval_method
arm_excess_precision(enum excess_precision_type type)25578 arm_excess_precision (enum excess_precision_type type)
25579 {
25580   switch (type)
25581     {
25582       case EXCESS_PRECISION_TYPE_FAST:
25583       case EXCESS_PRECISION_TYPE_STANDARD:
25584 	/* We can calculate either in 16-bit range and precision or
25585 	   32-bit range and precision.  Make that decision based on whether
25586 	   we have native support for the ARMv8.2-A 16-bit floating-point
25587 	   instructions or not.  */
25588 	return (TARGET_VFP_FP16INST
25589 		? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25590 		: FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25591       case EXCESS_PRECISION_TYPE_IMPLICIT:
25592 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25593       default:
25594 	gcc_unreachable ();
25595     }
25596   return FLT_EVAL_METHOD_UNPREDICTABLE;
25597 }
25598 
25599 
25600 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
25601    _Float16 if we are using anything other than ieee format for 16-bit
25602    floating point.  Otherwise, punt to the default implementation.  */
25603 static opt_scalar_float_mode
arm_floatn_mode(int n,bool extended)25604 arm_floatn_mode (int n, bool extended)
25605 {
25606   if (!extended && n == 16)
25607     {
25608       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25609 	return HFmode;
25610       return opt_scalar_float_mode ();
25611     }
25612 
25613   return default_floatn_mode (n, extended);
25614 }
25615 
25616 
25617 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25618    not to early-clobber SRC registers in the process.
25619 
25620    We assume that the operands described by SRC and DEST represent a
25621    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25622    number of components into which the copy has been decomposed.  */
25623 void
neon_disambiguate_copy(rtx * operands,rtx * dest,rtx * src,unsigned int count)25624 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25625 {
25626   unsigned int i;
25627 
25628   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25629       || REGNO (operands[0]) < REGNO (operands[1]))
25630     {
25631       for (i = 0; i < count; i++)
25632 	{
25633 	  operands[2 * i] = dest[i];
25634 	  operands[2 * i + 1] = src[i];
25635 	}
25636     }
25637   else
25638     {
25639       for (i = 0; i < count; i++)
25640 	{
25641 	  operands[2 * i] = dest[count - i - 1];
25642 	  operands[2 * i + 1] = src[count - i - 1];
25643 	}
25644     }
25645 }
25646 
25647 /* Split operands into moves from op[1] + op[2] into op[0].  */
25648 
25649 void
neon_split_vcombine(rtx operands[3])25650 neon_split_vcombine (rtx operands[3])
25651 {
25652   unsigned int dest = REGNO (operands[0]);
25653   unsigned int src1 = REGNO (operands[1]);
25654   unsigned int src2 = REGNO (operands[2]);
25655   machine_mode halfmode = GET_MODE (operands[1]);
25656   unsigned int halfregs = REG_NREGS (operands[1]);
25657   rtx destlo, desthi;
25658 
25659   if (src1 == dest && src2 == dest + halfregs)
25660     {
25661       /* No-op move.  Can't split to nothing; emit something.  */
25662       emit_note (NOTE_INSN_DELETED);
25663       return;
25664     }
25665 
25666   /* Preserve register attributes for variable tracking.  */
25667   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25668   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25669 			       GET_MODE_SIZE (halfmode));
25670 
25671   /* Special case of reversed high/low parts.  Use VSWP.  */
25672   if (src2 == dest && src1 == dest + halfregs)
25673     {
25674       rtx x = gen_rtx_SET (destlo, operands[1]);
25675       rtx y = gen_rtx_SET (desthi, operands[2]);
25676       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25677       return;
25678     }
25679 
25680   if (!reg_overlap_mentioned_p (operands[2], destlo))
25681     {
25682       /* Try to avoid unnecessary moves if part of the result
25683 	 is in the right place already.  */
25684       if (src1 != dest)
25685 	emit_move_insn (destlo, operands[1]);
25686       if (src2 != dest + halfregs)
25687 	emit_move_insn (desthi, operands[2]);
25688     }
25689   else
25690     {
25691       if (src2 != dest + halfregs)
25692 	emit_move_insn (desthi, operands[2]);
25693       if (src1 != dest)
25694 	emit_move_insn (destlo, operands[1]);
25695     }
25696 }
25697 
25698 /* Return the number (counting from 0) of
25699    the least significant set bit in MASK.  */
25700 
25701 inline static int
number_of_first_bit_set(unsigned mask)25702 number_of_first_bit_set (unsigned mask)
25703 {
25704   return ctz_hwi (mask);
25705 }
25706 
25707 /* Like emit_multi_reg_push, but allowing for a different set of
25708    registers to be described as saved.  MASK is the set of registers
25709    to be saved; REAL_REGS is the set of registers to be described as
25710    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
25711 
25712 static rtx_insn *
thumb1_emit_multi_reg_push(unsigned long mask,unsigned long real_regs)25713 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25714 {
25715   unsigned long regno;
25716   rtx par[10], tmp, reg;
25717   rtx_insn *insn;
25718   int i, j;
25719 
25720   /* Build the parallel of the registers actually being stored.  */
25721   for (i = 0; mask; ++i, mask &= mask - 1)
25722     {
25723       regno = ctz_hwi (mask);
25724       reg = gen_rtx_REG (SImode, regno);
25725 
25726       if (i == 0)
25727 	tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25728       else
25729 	tmp = gen_rtx_USE (VOIDmode, reg);
25730 
25731       par[i] = tmp;
25732     }
25733 
25734   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25735   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25736   tmp = gen_frame_mem (BLKmode, tmp);
25737   tmp = gen_rtx_SET (tmp, par[0]);
25738   par[0] = tmp;
25739 
25740   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25741   insn = emit_insn (tmp);
25742 
25743   /* Always build the stack adjustment note for unwind info.  */
25744   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25745   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
25746   par[0] = tmp;
25747 
25748   /* Build the parallel of the registers recorded as saved for unwind.  */
25749   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25750     {
25751       regno = ctz_hwi (real_regs);
25752       reg = gen_rtx_REG (SImode, regno);
25753 
25754       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25755       tmp = gen_frame_mem (SImode, tmp);
25756       tmp = gen_rtx_SET (tmp, reg);
25757       RTX_FRAME_RELATED_P (tmp) = 1;
25758       par[j + 1] = tmp;
25759     }
25760 
25761   if (j == 0)
25762     tmp = par[0];
25763   else
25764     {
25765       RTX_FRAME_RELATED_P (par[0]) = 1;
25766       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25767     }
25768 
25769   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25770 
25771   return insn;
25772 }
25773 
25774 /* Emit code to push or pop registers to or from the stack.  F is the
25775    assembly file.  MASK is the registers to pop.  */
25776 static void
thumb_pop(FILE * f,unsigned long mask)25777 thumb_pop (FILE *f, unsigned long mask)
25778 {
25779   int regno;
25780   int lo_mask = mask & 0xFF;
25781 
25782   gcc_assert (mask);
25783 
25784   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25785     {
25786       /* Special case.  Do not generate a POP PC statement here, do it in
25787 	 thumb_exit() */
25788       thumb_exit (f, -1);
25789       return;
25790     }
25791 
25792   fprintf (f, "\tpop\t{");
25793 
25794   /* Look at the low registers first.  */
25795   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25796     {
25797       if (lo_mask & 1)
25798 	{
25799 	  asm_fprintf (f, "%r", regno);
25800 
25801 	  if ((lo_mask & ~1) != 0)
25802 	    fprintf (f, ", ");
25803 	}
25804     }
25805 
25806   if (mask & (1 << PC_REGNUM))
25807     {
25808       /* Catch popping the PC.  */
25809       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
25810 	  || IS_CMSE_ENTRY (arm_current_func_type ()))
25811 	{
25812 	  /* The PC is never poped directly, instead
25813 	     it is popped into r3 and then BX is used.  */
25814 	  fprintf (f, "}\n");
25815 
25816 	  thumb_exit (f, -1);
25817 
25818 	  return;
25819 	}
25820       else
25821 	{
25822 	  if (mask & 0xFF)
25823 	    fprintf (f, ", ");
25824 
25825 	  asm_fprintf (f, "%r", PC_REGNUM);
25826 	}
25827     }
25828 
25829   fprintf (f, "}\n");
25830 }
25831 
25832 /* Generate code to return from a thumb function.
25833    If 'reg_containing_return_addr' is -1, then the return address is
25834    actually on the stack, at the stack pointer.
25835 
25836    Note: do not forget to update length attribute of corresponding insn pattern
25837    when changing assembly output (eg. length attribute of epilogue_insns when
25838    updating Armv8-M Baseline Security Extensions register clearing
25839    sequences).  */
25840 static void
thumb_exit(FILE * f,int reg_containing_return_addr)25841 thumb_exit (FILE *f, int reg_containing_return_addr)
25842 {
25843   unsigned regs_available_for_popping;
25844   unsigned regs_to_pop;
25845   int pops_needed;
25846   unsigned available;
25847   unsigned required;
25848   machine_mode mode;
25849   int size;
25850   int restore_a4 = FALSE;
25851 
25852   /* Compute the registers we need to pop.  */
25853   regs_to_pop = 0;
25854   pops_needed = 0;
25855 
25856   if (reg_containing_return_addr == -1)
25857     {
25858       regs_to_pop |= 1 << LR_REGNUM;
25859       ++pops_needed;
25860     }
25861 
25862   if (TARGET_BACKTRACE)
25863     {
25864       /* Restore the (ARM) frame pointer and stack pointer.  */
25865       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25866       pops_needed += 2;
25867     }
25868 
25869   /* If there is nothing to pop then just emit the BX instruction and
25870      return.  */
25871   if (pops_needed == 0)
25872     {
25873       if (crtl->calls_eh_return)
25874 	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25875 
25876       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25877 	{
25878 	  /* For Armv8.1-M, this is cleared as part of the CLRM instruction
25879 	     emitted by cmse_nonsecure_entry_clear_before_return ().  */
25880 	  if (!TARGET_HAVE_FPCXT_CMSE)
25881 	    asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
25882 			 reg_containing_return_addr);
25883 	  asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
25884 	}
25885       else
25886 	asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25887       return;
25888     }
25889   /* Otherwise if we are not supporting interworking and we have not created
25890      a backtrace structure and the function was not entered in ARM mode then
25891      just pop the return address straight into the PC.  */
25892   else if (!TARGET_INTERWORK
25893 	   && !TARGET_BACKTRACE
25894 	   && !is_called_in_ARM_mode (current_function_decl)
25895 	   && !crtl->calls_eh_return
25896 	   && !IS_CMSE_ENTRY (arm_current_func_type ()))
25897     {
25898       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25899       return;
25900     }
25901 
25902   /* Find out how many of the (return) argument registers we can corrupt.  */
25903   regs_available_for_popping = 0;
25904 
25905   /* If returning via __builtin_eh_return, the bottom three registers
25906      all contain information needed for the return.  */
25907   if (crtl->calls_eh_return)
25908     size = 12;
25909   else
25910     {
25911       /* If we can deduce the registers used from the function's
25912 	 return value.  This is more reliable that examining
25913 	 df_regs_ever_live_p () because that will be set if the register is
25914 	 ever used in the function, not just if the register is used
25915 	 to hold a return value.  */
25916 
25917       if (crtl->return_rtx != 0)
25918 	mode = GET_MODE (crtl->return_rtx);
25919       else
25920 	mode = DECL_MODE (DECL_RESULT (current_function_decl));
25921 
25922       size = GET_MODE_SIZE (mode);
25923 
25924       if (size == 0)
25925 	{
25926 	  /* In a void function we can use any argument register.
25927 	     In a function that returns a structure on the stack
25928 	     we can use the second and third argument registers.  */
25929 	  if (mode == VOIDmode)
25930 	    regs_available_for_popping =
25931 	      (1 << ARG_REGISTER (1))
25932 	      | (1 << ARG_REGISTER (2))
25933 	      | (1 << ARG_REGISTER (3));
25934 	  else
25935 	    regs_available_for_popping =
25936 	      (1 << ARG_REGISTER (2))
25937 	      | (1 << ARG_REGISTER (3));
25938 	}
25939       else if (size <= 4)
25940 	regs_available_for_popping =
25941 	  (1 << ARG_REGISTER (2))
25942 	  | (1 << ARG_REGISTER (3));
25943       else if (size <= 8)
25944 	regs_available_for_popping =
25945 	  (1 << ARG_REGISTER (3));
25946     }
25947 
25948   /* Match registers to be popped with registers into which we pop them.  */
25949   for (available = regs_available_for_popping,
25950        required  = regs_to_pop;
25951        required != 0 && available != 0;
25952        available &= ~(available & - available),
25953        required  &= ~(required  & - required))
25954     -- pops_needed;
25955 
25956   /* If we have any popping registers left over, remove them.  */
25957   if (available > 0)
25958     regs_available_for_popping &= ~available;
25959 
25960   /* Otherwise if we need another popping register we can use
25961      the fourth argument register.  */
25962   else if (pops_needed)
25963     {
25964       /* If we have not found any free argument registers and
25965 	 reg a4 contains the return address, we must move it.  */
25966       if (regs_available_for_popping == 0
25967 	  && reg_containing_return_addr == LAST_ARG_REGNUM)
25968 	{
25969 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25970 	  reg_containing_return_addr = LR_REGNUM;
25971 	}
25972       else if (size > 12)
25973 	{
25974 	  /* Register a4 is being used to hold part of the return value,
25975 	     but we have dire need of a free, low register.  */
25976 	  restore_a4 = TRUE;
25977 
25978 	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25979 	}
25980 
25981       if (reg_containing_return_addr != LAST_ARG_REGNUM)
25982 	{
25983 	  /* The fourth argument register is available.  */
25984 	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25985 
25986 	  --pops_needed;
25987 	}
25988     }
25989 
25990   /* Pop as many registers as we can.  */
25991   thumb_pop (f, regs_available_for_popping);
25992 
25993   /* Process the registers we popped.  */
25994   if (reg_containing_return_addr == -1)
25995     {
25996       /* The return address was popped into the lowest numbered register.  */
25997       regs_to_pop &= ~(1 << LR_REGNUM);
25998 
25999       reg_containing_return_addr =
26000 	number_of_first_bit_set (regs_available_for_popping);
26001 
26002       /* Remove this register for the mask of available registers, so that
26003          the return address will not be corrupted by further pops.  */
26004       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26005     }
26006 
26007   /* If we popped other registers then handle them here.  */
26008   if (regs_available_for_popping)
26009     {
26010       int frame_pointer;
26011 
26012       /* Work out which register currently contains the frame pointer.  */
26013       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26014 
26015       /* Move it into the correct place.  */
26016       asm_fprintf (f, "\tmov\t%r, %r\n",
26017 		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26018 
26019       /* (Temporarily) remove it from the mask of popped registers.  */
26020       regs_available_for_popping &= ~(1 << frame_pointer);
26021       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26022 
26023       if (regs_available_for_popping)
26024 	{
26025 	  int stack_pointer;
26026 
26027 	  /* We popped the stack pointer as well,
26028 	     find the register that contains it.  */
26029 	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26030 
26031 	  /* Move it into the stack register.  */
26032 	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26033 
26034 	  /* At this point we have popped all necessary registers, so
26035 	     do not worry about restoring regs_available_for_popping
26036 	     to its correct value:
26037 
26038 	     assert (pops_needed == 0)
26039 	     assert (regs_available_for_popping == (1 << frame_pointer))
26040 	     assert (regs_to_pop == (1 << STACK_POINTER))  */
26041 	}
26042       else
26043 	{
26044 	  /* Since we have just move the popped value into the frame
26045 	     pointer, the popping register is available for reuse, and
26046 	     we know that we still have the stack pointer left to pop.  */
26047 	  regs_available_for_popping |= (1 << frame_pointer);
26048 	}
26049     }
26050 
26051   /* If we still have registers left on the stack, but we no longer have
26052      any registers into which we can pop them, then we must move the return
26053      address into the link register and make available the register that
26054      contained it.  */
26055   if (regs_available_for_popping == 0 && pops_needed > 0)
26056     {
26057       regs_available_for_popping |= 1 << reg_containing_return_addr;
26058 
26059       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26060 		   reg_containing_return_addr);
26061 
26062       reg_containing_return_addr = LR_REGNUM;
26063     }
26064 
26065   /* If we have registers left on the stack then pop some more.
26066      We know that at most we will want to pop FP and SP.  */
26067   if (pops_needed > 0)
26068     {
26069       int  popped_into;
26070       int  move_to;
26071 
26072       thumb_pop (f, regs_available_for_popping);
26073 
26074       /* We have popped either FP or SP.
26075 	 Move whichever one it is into the correct register.  */
26076       popped_into = number_of_first_bit_set (regs_available_for_popping);
26077       move_to     = number_of_first_bit_set (regs_to_pop);
26078 
26079       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26080       --pops_needed;
26081     }
26082 
26083   /* If we still have not popped everything then we must have only
26084      had one register available to us and we are now popping the SP.  */
26085   if (pops_needed > 0)
26086     {
26087       int  popped_into;
26088 
26089       thumb_pop (f, regs_available_for_popping);
26090 
26091       popped_into = number_of_first_bit_set (regs_available_for_popping);
26092 
26093       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26094       /*
26095 	assert (regs_to_pop == (1 << STACK_POINTER))
26096 	assert (pops_needed == 1)
26097       */
26098     }
26099 
26100   /* If necessary restore the a4 register.  */
26101   if (restore_a4)
26102     {
26103       if (reg_containing_return_addr != LR_REGNUM)
26104 	{
26105 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26106 	  reg_containing_return_addr = LR_REGNUM;
26107 	}
26108 
26109       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26110     }
26111 
26112   if (crtl->calls_eh_return)
26113     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26114 
26115   /* Return to caller.  */
26116   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26117     {
26118       /* This is for the cases where LR is not being used to contain the return
26119          address.  It may therefore contain information that we might not want
26120 	 to leak, hence it must be cleared.  The value in R0 will never be a
26121 	 secret at this point, so it is safe to use it, see the clearing code
26122 	 in cmse_nonsecure_entry_clear_before_return ().  */
26123       if (reg_containing_return_addr != LR_REGNUM)
26124 	asm_fprintf (f, "\tmov\tlr, r0\n");
26125 
26126       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26127 	 by cmse_nonsecure_entry_clear_before_return ().  */
26128       if (!TARGET_HAVE_FPCXT_CMSE)
26129 	asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26130       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26131     }
26132   else
26133     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26134 }
26135 
26136 /* Scan INSN just before assembler is output for it.
26137    For Thumb-1, we track the status of the condition codes; this
26138    information is used in the cbranchsi4_insn pattern.  */
26139 void
thumb1_final_prescan_insn(rtx_insn * insn)26140 thumb1_final_prescan_insn (rtx_insn *insn)
26141 {
26142   if (flag_print_asm_name)
26143     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26144 		 INSN_ADDRESSES (INSN_UID (insn)));
26145   /* Don't overwrite the previous setter when we get to a cbranch.  */
26146   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26147     {
26148       enum attr_conds conds;
26149 
26150       if (cfun->machine->thumb1_cc_insn)
26151 	{
26152 	  if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26153 	      || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26154 	    CC_STATUS_INIT;
26155 	}
26156       conds = get_attr_conds (insn);
26157       if (conds == CONDS_SET)
26158 	{
26159 	  rtx set = single_set (insn);
26160 	  cfun->machine->thumb1_cc_insn = insn;
26161 	  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26162 	  cfun->machine->thumb1_cc_op1 = const0_rtx;
26163 	  cfun->machine->thumb1_cc_mode = CC_NZmode;
26164 	  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26165 	    {
26166 	      rtx src1 = XEXP (SET_SRC (set), 1);
26167 	      if (src1 == const0_rtx)
26168 		cfun->machine->thumb1_cc_mode = CCmode;
26169 	    }
26170 	  else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26171 	    {
26172 	      /* Record the src register operand instead of dest because
26173 		 cprop_hardreg pass propagates src.  */
26174 	      cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26175 	    }
26176 	}
26177       else if (conds != CONDS_NOCOND)
26178 	cfun->machine->thumb1_cc_insn = NULL_RTX;
26179     }
26180 
26181     /* Check if unexpected far jump is used.  */
26182     if (cfun->machine->lr_save_eliminated
26183         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26184       internal_error("Unexpected thumb1 far jump");
26185 }
26186 
26187 int
thumb_shiftable_const(unsigned HOST_WIDE_INT val)26188 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26189 {
26190   unsigned HOST_WIDE_INT mask = 0xff;
26191   int i;
26192 
26193   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26194   if (val == 0) /* XXX */
26195     return 0;
26196 
26197   for (i = 0; i < 25; i++)
26198     if ((val & (mask << i)) == val)
26199       return 1;
26200 
26201   return 0;
26202 }
26203 
26204 /* Returns nonzero if the current function contains,
26205    or might contain a far jump.  */
26206 static int
thumb_far_jump_used_p(void)26207 thumb_far_jump_used_p (void)
26208 {
26209   rtx_insn *insn;
26210   bool far_jump = false;
26211   unsigned int func_size = 0;
26212 
26213   /* If we have already decided that far jumps may be used,
26214      do not bother checking again, and always return true even if
26215      it turns out that they are not being used.  Once we have made
26216      the decision that far jumps are present (and that hence the link
26217      register will be pushed onto the stack) we cannot go back on it.  */
26218   if (cfun->machine->far_jump_used)
26219     return 1;
26220 
26221   /* If this function is not being called from the prologue/epilogue
26222      generation code then it must be being called from the
26223      INITIAL_ELIMINATION_OFFSET macro.  */
26224   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26225     {
26226       /* In this case we know that we are being asked about the elimination
26227 	 of the arg pointer register.  If that register is not being used,
26228 	 then there are no arguments on the stack, and we do not have to
26229 	 worry that a far jump might force the prologue to push the link
26230 	 register, changing the stack offsets.  In this case we can just
26231 	 return false, since the presence of far jumps in the function will
26232 	 not affect stack offsets.
26233 
26234 	 If the arg pointer is live (or if it was live, but has now been
26235 	 eliminated and so set to dead) then we do have to test to see if
26236 	 the function might contain a far jump.  This test can lead to some
26237 	 false negatives, since before reload is completed, then length of
26238 	 branch instructions is not known, so gcc defaults to returning their
26239 	 longest length, which in turn sets the far jump attribute to true.
26240 
26241 	 A false negative will not result in bad code being generated, but it
26242 	 will result in a needless push and pop of the link register.  We
26243 	 hope that this does not occur too often.
26244 
26245 	 If we need doubleword stack alignment this could affect the other
26246 	 elimination offsets so we can't risk getting it wrong.  */
26247       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26248 	cfun->machine->arg_pointer_live = 1;
26249       else if (!cfun->machine->arg_pointer_live)
26250 	return 0;
26251     }
26252 
26253   /* We should not change far_jump_used during or after reload, as there is
26254      no chance to change stack frame layout.  */
26255   if (reload_in_progress || reload_completed)
26256     return 0;
26257 
26258   /* Check to see if the function contains a branch
26259      insn with the far jump attribute set.  */
26260   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26261     {
26262       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26263 	{
26264 	  far_jump = true;
26265 	}
26266       func_size += get_attr_length (insn);
26267     }
26268 
26269   /* Attribute far_jump will always be true for thumb1 before
26270      shorten_branch pass.  So checking far_jump attribute before
26271      shorten_branch isn't much useful.
26272 
26273      Following heuristic tries to estimate more accurately if a far jump
26274      may finally be used.  The heuristic is very conservative as there is
26275      no chance to roll-back the decision of not to use far jump.
26276 
26277      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26278      2-byte insn is associated with a 4 byte constant pool.  Using
26279      function size 2048/3 as the threshold is conservative enough.  */
26280   if (far_jump)
26281     {
26282       if ((func_size * 3) >= 2048)
26283         {
26284 	  /* Record the fact that we have decided that
26285 	     the function does use far jumps.  */
26286 	  cfun->machine->far_jump_used = 1;
26287 	  return 1;
26288 	}
26289     }
26290 
26291   return 0;
26292 }
26293 
26294 /* Return nonzero if FUNC must be entered in ARM mode.  */
26295 static bool
is_called_in_ARM_mode(tree func)26296 is_called_in_ARM_mode (tree func)
26297 {
26298   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26299 
26300   /* Ignore the problem about functions whose address is taken.  */
26301   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26302     return true;
26303 
26304 #ifdef ARM_PE
26305   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26306 #else
26307   return false;
26308 #endif
26309 }
26310 
26311 /* Given the stack offsets and register mask in OFFSETS, decide how
26312    many additional registers to push instead of subtracting a constant
26313    from SP.  For epilogues the principle is the same except we use pop.
26314    FOR_PROLOGUE indicates which we're generating.  */
26315 static int
thumb1_extra_regs_pushed(arm_stack_offsets * offsets,bool for_prologue)26316 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26317 {
26318   HOST_WIDE_INT amount;
26319   unsigned long live_regs_mask = offsets->saved_regs_mask;
26320   /* Extract a mask of the ones we can give to the Thumb's push/pop
26321      instruction.  */
26322   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26323   /* Then count how many other high registers will need to be pushed.  */
26324   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26325   int n_free, reg_base, size;
26326 
26327   if (!for_prologue && frame_pointer_needed)
26328     amount = offsets->locals_base - offsets->saved_regs;
26329   else
26330     amount = offsets->outgoing_args - offsets->saved_regs;
26331 
26332   /* If the stack frame size is 512 exactly, we can save one load
26333      instruction, which should make this a win even when optimizing
26334      for speed.  */
26335   if (!optimize_size && amount != 512)
26336     return 0;
26337 
26338   /* Can't do this if there are high registers to push.  */
26339   if (high_regs_pushed != 0)
26340     return 0;
26341 
26342   /* Shouldn't do it in the prologue if no registers would normally
26343      be pushed at all.  In the epilogue, also allow it if we'll have
26344      a pop insn for the PC.  */
26345   if  (l_mask == 0
26346        && (for_prologue
26347 	   || TARGET_BACKTRACE
26348 	   || (live_regs_mask & 1 << LR_REGNUM) == 0
26349 	   || TARGET_INTERWORK
26350 	   || crtl->args.pretend_args_size != 0))
26351     return 0;
26352 
26353   /* Don't do this if thumb_expand_prologue wants to emit instructions
26354      between the push and the stack frame allocation.  */
26355   if (for_prologue
26356       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26357 	  || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26358     return 0;
26359 
26360   reg_base = 0;
26361   n_free = 0;
26362   if (!for_prologue)
26363     {
26364       size = arm_size_return_regs ();
26365       reg_base = ARM_NUM_INTS (size);
26366       live_regs_mask >>= reg_base;
26367     }
26368 
26369   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26370 	 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26371     {
26372       live_regs_mask >>= 1;
26373       n_free++;
26374     }
26375 
26376   if (n_free == 0)
26377     return 0;
26378   gcc_assert (amount / 4 * 4 == amount);
26379 
26380   if (amount >= 512 && (amount - n_free * 4) < 512)
26381     return (amount - 508) / 4;
26382   if (amount <= n_free * 4)
26383     return amount / 4;
26384   return 0;
26385 }
26386 
26387 /* The bits which aren't usefully expanded as rtl.  */
26388 const char *
thumb1_unexpanded_epilogue(void)26389 thumb1_unexpanded_epilogue (void)
26390 {
26391   arm_stack_offsets *offsets;
26392   int regno;
26393   unsigned long live_regs_mask = 0;
26394   int high_regs_pushed = 0;
26395   int extra_pop;
26396   int had_to_push_lr;
26397   int size;
26398 
26399   if (cfun->machine->return_used_this_function != 0)
26400     return "";
26401 
26402   if (IS_NAKED (arm_current_func_type ()))
26403     return "";
26404 
26405   offsets = arm_get_frame_offsets ();
26406   live_regs_mask = offsets->saved_regs_mask;
26407   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26408 
26409   /* If we can deduce the registers used from the function's return value.
26410      This is more reliable that examining df_regs_ever_live_p () because that
26411      will be set if the register is ever used in the function, not just if
26412      the register is used to hold a return value.  */
26413   size = arm_size_return_regs ();
26414 
26415   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26416   if (extra_pop > 0)
26417     {
26418       unsigned long extra_mask = (1 << extra_pop) - 1;
26419       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26420     }
26421 
26422   /* The prolog may have pushed some high registers to use as
26423      work registers.  e.g. the testsuite file:
26424      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26425      compiles to produce:
26426 	push	{r4, r5, r6, r7, lr}
26427 	mov	r7, r9
26428 	mov	r6, r8
26429 	push	{r6, r7}
26430      as part of the prolog.  We have to undo that pushing here.  */
26431 
26432   if (high_regs_pushed)
26433     {
26434       unsigned long mask = live_regs_mask & 0xff;
26435       int next_hi_reg;
26436 
26437       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26438 
26439       if (mask == 0)
26440 	/* Oh dear!  We have no low registers into which we can pop
26441            high registers!  */
26442 	internal_error
26443 	  ("no low registers available for popping high registers");
26444 
26445       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26446 	if (live_regs_mask & (1 << next_hi_reg))
26447 	  break;
26448 
26449       while (high_regs_pushed)
26450 	{
26451 	  /* Find lo register(s) into which the high register(s) can
26452              be popped.  */
26453 	  for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26454 	    {
26455 	      if (mask & (1 << regno))
26456 		high_regs_pushed--;
26457 	      if (high_regs_pushed == 0)
26458 		break;
26459 	    }
26460 
26461 	  if (high_regs_pushed == 0 && regno >= 0)
26462 	    mask &= ~((1 << regno) - 1);
26463 
26464 	  /* Pop the values into the low register(s).  */
26465 	  thumb_pop (asm_out_file, mask);
26466 
26467 	  /* Move the value(s) into the high registers.  */
26468 	  for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26469 	    {
26470 	      if (mask & (1 << regno))
26471 		{
26472 		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26473 			       regno);
26474 
26475 		  for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26476 		       next_hi_reg--)
26477 		    if (live_regs_mask & (1 << next_hi_reg))
26478 		      break;
26479 		}
26480 	    }
26481 	}
26482       live_regs_mask &= ~0x0f00;
26483     }
26484 
26485   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26486   live_regs_mask &= 0xff;
26487 
26488   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26489     {
26490       /* Pop the return address into the PC.  */
26491       if (had_to_push_lr)
26492 	live_regs_mask |= 1 << PC_REGNUM;
26493 
26494       /* Either no argument registers were pushed or a backtrace
26495 	 structure was created which includes an adjusted stack
26496 	 pointer, so just pop everything.  */
26497       if (live_regs_mask)
26498 	thumb_pop (asm_out_file, live_regs_mask);
26499 
26500       /* We have either just popped the return address into the
26501 	 PC or it is was kept in LR for the entire function.
26502 	 Note that thumb_pop has already called thumb_exit if the
26503 	 PC was in the list.  */
26504       if (!had_to_push_lr)
26505 	thumb_exit (asm_out_file, LR_REGNUM);
26506     }
26507   else
26508     {
26509       /* Pop everything but the return address.  */
26510       if (live_regs_mask)
26511 	thumb_pop (asm_out_file, live_regs_mask);
26512 
26513       if (had_to_push_lr)
26514 	{
26515 	  if (size > 12)
26516 	    {
26517 	      /* We have no free low regs, so save one.  */
26518 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26519 			   LAST_ARG_REGNUM);
26520 	    }
26521 
26522 	  /* Get the return address into a temporary register.  */
26523 	  thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26524 
26525 	  if (size > 12)
26526 	    {
26527 	      /* Move the return address to lr.  */
26528 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26529 			   LAST_ARG_REGNUM);
26530 	      /* Restore the low register.  */
26531 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26532 			   IP_REGNUM);
26533 	      regno = LR_REGNUM;
26534 	    }
26535 	  else
26536 	    regno = LAST_ARG_REGNUM;
26537 	}
26538       else
26539 	regno = LR_REGNUM;
26540 
26541       /* Remove the argument registers that were pushed onto the stack.  */
26542       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26543 		   SP_REGNUM, SP_REGNUM,
26544 		   crtl->args.pretend_args_size);
26545 
26546       thumb_exit (asm_out_file, regno);
26547     }
26548 
26549   return "";
26550 }
26551 
26552 /* Functions to save and restore machine-specific function data.  */
26553 static struct machine_function *
arm_init_machine_status(void)26554 arm_init_machine_status (void)
26555 {
26556   struct machine_function *machine;
26557   machine = ggc_cleared_alloc<machine_function> ();
26558 
26559 #if ARM_FT_UNKNOWN != 0
26560   machine->func_type = ARM_FT_UNKNOWN;
26561 #endif
26562   machine->static_chain_stack_bytes = -1;
26563   return machine;
26564 }
26565 
26566 /* Return an RTX indicating where the return address to the
26567    calling function can be found.  */
26568 rtx
arm_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)26569 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26570 {
26571   if (count != 0)
26572     return NULL_RTX;
26573 
26574   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26575 }
26576 
26577 /* Do anything needed before RTL is emitted for each function.  */
26578 void
arm_init_expanders(void)26579 arm_init_expanders (void)
26580 {
26581   /* Arrange to initialize and mark the machine per-function status.  */
26582   init_machine_status = arm_init_machine_status;
26583 
26584   /* This is to stop the combine pass optimizing away the alignment
26585      adjustment of va_arg.  */
26586   /* ??? It is claimed that this should not be necessary.  */
26587   if (cfun)
26588     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26589 }
26590 
26591 /* Check that FUNC is called with a different mode.  */
26592 
26593 bool
arm_change_mode_p(tree func)26594 arm_change_mode_p (tree func)
26595 {
26596   if (TREE_CODE (func) != FUNCTION_DECL)
26597     return false;
26598 
26599   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26600 
26601   if (!callee_tree)
26602     callee_tree = target_option_default_node;
26603 
26604   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26605   int flags = callee_opts->x_target_flags;
26606 
26607   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26608 }
26609 
26610 /* Like arm_compute_initial_elimination offset.  Simpler because there
26611    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
26612    to point at the base of the local variables after static stack
26613    space for a function has been allocated.  */
26614 
26615 HOST_WIDE_INT
thumb_compute_initial_elimination_offset(unsigned int from,unsigned int to)26616 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26617 {
26618   arm_stack_offsets *offsets;
26619 
26620   offsets = arm_get_frame_offsets ();
26621 
26622   switch (from)
26623     {
26624     case ARG_POINTER_REGNUM:
26625       switch (to)
26626 	{
26627 	case STACK_POINTER_REGNUM:
26628 	  return offsets->outgoing_args - offsets->saved_args;
26629 
26630 	case FRAME_POINTER_REGNUM:
26631 	  return offsets->soft_frame - offsets->saved_args;
26632 
26633 	case ARM_HARD_FRAME_POINTER_REGNUM:
26634 	  return offsets->saved_regs - offsets->saved_args;
26635 
26636 	case THUMB_HARD_FRAME_POINTER_REGNUM:
26637 	  return offsets->locals_base - offsets->saved_args;
26638 
26639 	default:
26640 	  gcc_unreachable ();
26641 	}
26642       break;
26643 
26644     case FRAME_POINTER_REGNUM:
26645       switch (to)
26646 	{
26647 	case STACK_POINTER_REGNUM:
26648 	  return offsets->outgoing_args - offsets->soft_frame;
26649 
26650 	case ARM_HARD_FRAME_POINTER_REGNUM:
26651 	  return offsets->saved_regs - offsets->soft_frame;
26652 
26653 	case THUMB_HARD_FRAME_POINTER_REGNUM:
26654 	  return offsets->locals_base - offsets->soft_frame;
26655 
26656 	default:
26657 	  gcc_unreachable ();
26658 	}
26659       break;
26660 
26661     default:
26662       gcc_unreachable ();
26663     }
26664 }
26665 
26666 /* Generate the function's prologue.  */
26667 
26668 void
thumb1_expand_prologue(void)26669 thumb1_expand_prologue (void)
26670 {
26671   rtx_insn *insn;
26672 
26673   HOST_WIDE_INT amount;
26674   HOST_WIDE_INT size;
26675   arm_stack_offsets *offsets;
26676   unsigned long func_type;
26677   int regno;
26678   unsigned long live_regs_mask;
26679   unsigned long l_mask;
26680   unsigned high_regs_pushed = 0;
26681   bool lr_needs_saving;
26682 
26683   func_type = arm_current_func_type ();
26684 
26685   /* Naked functions don't have prologues.  */
26686   if (IS_NAKED (func_type))
26687     {
26688       if (flag_stack_usage_info)
26689 	current_function_static_stack_size = 0;
26690       return;
26691     }
26692 
26693   if (IS_INTERRUPT (func_type))
26694     {
26695       error ("interrupt Service Routines cannot be coded in Thumb mode");
26696       return;
26697     }
26698 
26699   if (is_called_in_ARM_mode (current_function_decl))
26700     emit_insn (gen_prologue_thumb1_interwork ());
26701 
26702   offsets = arm_get_frame_offsets ();
26703   live_regs_mask = offsets->saved_regs_mask;
26704   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26705 
26706   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
26707   l_mask = live_regs_mask & 0x40ff;
26708   /* Then count how many other high registers will need to be pushed.  */
26709   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26710 
26711   if (crtl->args.pretend_args_size)
26712     {
26713       rtx x = GEN_INT (-crtl->args.pretend_args_size);
26714 
26715       if (cfun->machine->uses_anonymous_args)
26716 	{
26717 	  int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26718 	  unsigned long mask;
26719 
26720 	  mask = 1ul << (LAST_ARG_REGNUM + 1);
26721 	  mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26722 
26723 	  insn = thumb1_emit_multi_reg_push (mask, 0);
26724 	}
26725       else
26726 	{
26727 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26728 					stack_pointer_rtx, x));
26729 	}
26730       RTX_FRAME_RELATED_P (insn) = 1;
26731     }
26732 
26733   if (TARGET_BACKTRACE)
26734     {
26735       HOST_WIDE_INT offset = 0;
26736       unsigned work_register;
26737       rtx work_reg, x, arm_hfp_rtx;
26738 
26739       /* We have been asked to create a stack backtrace structure.
26740          The code looks like this:
26741 
26742 	 0   .align 2
26743 	 0   func:
26744          0     sub   SP, #16         Reserve space for 4 registers.
26745 	 2     push  {R7}            Push low registers.
26746          4     add   R7, SP, #20     Get the stack pointer before the push.
26747          6     str   R7, [SP, #8]    Store the stack pointer
26748 					(before reserving the space).
26749          8     mov   R7, PC          Get hold of the start of this code + 12.
26750         10     str   R7, [SP, #16]   Store it.
26751         12     mov   R7, FP          Get hold of the current frame pointer.
26752         14     str   R7, [SP, #4]    Store it.
26753         16     mov   R7, LR          Get hold of the current return address.
26754         18     str   R7, [SP, #12]   Store it.
26755         20     add   R7, SP, #16     Point at the start of the
26756 					backtrace structure.
26757         22     mov   FP, R7          Put this value into the frame pointer.  */
26758 
26759       work_register = thumb_find_work_register (live_regs_mask);
26760       work_reg = gen_rtx_REG (SImode, work_register);
26761       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26762 
26763       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26764 				    stack_pointer_rtx, GEN_INT (-16)));
26765       RTX_FRAME_RELATED_P (insn) = 1;
26766 
26767       if (l_mask)
26768 	{
26769 	  insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26770 	  RTX_FRAME_RELATED_P (insn) = 1;
26771 	  lr_needs_saving = false;
26772 
26773 	  offset = bit_count (l_mask) * UNITS_PER_WORD;
26774 	}
26775 
26776       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26777       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26778 
26779       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26780       x = gen_frame_mem (SImode, x);
26781       emit_move_insn (x, work_reg);
26782 
26783       /* Make sure that the instruction fetching the PC is in the right place
26784 	 to calculate "start of backtrace creation code + 12".  */
26785       /* ??? The stores using the common WORK_REG ought to be enough to
26786 	 prevent the scheduler from doing anything weird.  Failing that
26787 	 we could always move all of the following into an UNSPEC_VOLATILE.  */
26788       if (l_mask)
26789 	{
26790 	  x = gen_rtx_REG (SImode, PC_REGNUM);
26791 	  emit_move_insn (work_reg, x);
26792 
26793 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26794 	  x = gen_frame_mem (SImode, x);
26795 	  emit_move_insn (x, work_reg);
26796 
26797 	  emit_move_insn (work_reg, arm_hfp_rtx);
26798 
26799 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
26800 	  x = gen_frame_mem (SImode, x);
26801 	  emit_move_insn (x, work_reg);
26802 	}
26803       else
26804 	{
26805 	  emit_move_insn (work_reg, arm_hfp_rtx);
26806 
26807 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
26808 	  x = gen_frame_mem (SImode, x);
26809 	  emit_move_insn (x, work_reg);
26810 
26811 	  x = gen_rtx_REG (SImode, PC_REGNUM);
26812 	  emit_move_insn (work_reg, x);
26813 
26814 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26815 	  x = gen_frame_mem (SImode, x);
26816 	  emit_move_insn (x, work_reg);
26817 	}
26818 
26819       x = gen_rtx_REG (SImode, LR_REGNUM);
26820       emit_move_insn (work_reg, x);
26821 
26822       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26823       x = gen_frame_mem (SImode, x);
26824       emit_move_insn (x, work_reg);
26825 
26826       x = GEN_INT (offset + 12);
26827       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26828 
26829       emit_move_insn (arm_hfp_rtx, work_reg);
26830     }
26831   /* Optimization:  If we are not pushing any low registers but we are going
26832      to push some high registers then delay our first push.  This will just
26833      be a push of LR and we can combine it with the push of the first high
26834      register.  */
26835   else if ((l_mask & 0xff) != 0
26836 	   || (high_regs_pushed == 0 && lr_needs_saving))
26837     {
26838       unsigned long mask = l_mask;
26839       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26840       insn = thumb1_emit_multi_reg_push (mask, mask);
26841       RTX_FRAME_RELATED_P (insn) = 1;
26842       lr_needs_saving = false;
26843     }
26844 
26845   if (high_regs_pushed)
26846     {
26847       unsigned pushable_regs;
26848       unsigned next_hi_reg;
26849       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26850 						 : crtl->args.info.nregs;
26851       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26852 
26853       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26854 	if (live_regs_mask & (1 << next_hi_reg))
26855 	  break;
26856 
26857       /* Here we need to mask out registers used for passing arguments
26858 	 even if they can be pushed.  This is to avoid using them to
26859 	 stash the high registers.  Such kind of stash may clobber the
26860 	 use of arguments.  */
26861       pushable_regs = l_mask & (~arg_regs_mask);
26862       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
26863 
26864       /* Normally, LR can be used as a scratch register once it has been
26865 	 saved; but if the function examines its own return address then
26866 	 the value is still live and we need to avoid using it.  */
26867       bool return_addr_live
26868 	= REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
26869 			   LR_REGNUM);
26870 
26871       if (lr_needs_saving || return_addr_live)
26872 	pushable_regs &= ~(1 << LR_REGNUM);
26873 
26874       if (pushable_regs == 0)
26875 	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26876 
26877       while (high_regs_pushed > 0)
26878 	{
26879 	  unsigned long real_regs_mask = 0;
26880 	  unsigned long push_mask = 0;
26881 
26882 	  for (regno = LR_REGNUM; regno >= 0; regno --)
26883 	    {
26884 	      if (pushable_regs & (1 << regno))
26885 		{
26886 		  emit_move_insn (gen_rtx_REG (SImode, regno),
26887 				  gen_rtx_REG (SImode, next_hi_reg));
26888 
26889 		  high_regs_pushed --;
26890 		  real_regs_mask |= (1 << next_hi_reg);
26891 		  push_mask |= (1 << regno);
26892 
26893 		  if (high_regs_pushed)
26894 		    {
26895 		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26896 			   next_hi_reg --)
26897 			if (live_regs_mask & (1 << next_hi_reg))
26898 			  break;
26899 		    }
26900 		  else
26901 		    break;
26902 		}
26903 	    }
26904 
26905 	  /* If we had to find a work register and we have not yet
26906 	     saved the LR then add it to the list of regs to push.  */
26907 	  if (lr_needs_saving)
26908 	    {
26909 	      push_mask |= 1 << LR_REGNUM;
26910 	      real_regs_mask |= 1 << LR_REGNUM;
26911 	      lr_needs_saving = false;
26912 	      /* If the return address is not live at this point, we
26913 		 can add LR to the list of registers that we can use
26914 		 for pushes.  */
26915 	      if (!return_addr_live)
26916 		pushable_regs |= 1 << LR_REGNUM;
26917 	    }
26918 
26919 	  insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
26920 	  RTX_FRAME_RELATED_P (insn) = 1;
26921 	}
26922     }
26923 
26924   /* Load the pic register before setting the frame pointer,
26925      so we can use r7 as a temporary work register.  */
26926   if (flag_pic && arm_pic_register != INVALID_REGNUM)
26927     arm_load_pic_register (live_regs_mask, NULL_RTX);
26928 
26929   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26930     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26931 		    stack_pointer_rtx);
26932 
26933   size = offsets->outgoing_args - offsets->saved_args;
26934   if (flag_stack_usage_info)
26935     current_function_static_stack_size = size;
26936 
26937   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
26938   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
26939        || flag_stack_clash_protection)
26940       && size)
26941     sorry ("%<-fstack-check=specific%> for Thumb-1");
26942 
26943   amount = offsets->outgoing_args - offsets->saved_regs;
26944   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26945   if (amount)
26946     {
26947       if (amount < 512)
26948 	{
26949 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26950 					GEN_INT (- amount)));
26951 	  RTX_FRAME_RELATED_P (insn) = 1;
26952 	}
26953       else
26954 	{
26955 	  rtx reg, dwarf;
26956 
26957 	  /* The stack decrement is too big for an immediate value in a single
26958 	     insn.  In theory we could issue multiple subtracts, but after
26959 	     three of them it becomes more space efficient to place the full
26960 	     value in the constant pool and load into a register.  (Also the
26961 	     ARM debugger really likes to see only one stack decrement per
26962 	     function).  So instead we look for a scratch register into which
26963 	     we can load the decrement, and then we subtract this from the
26964 	     stack pointer.  Unfortunately on the thumb the only available
26965 	     scratch registers are the argument registers, and we cannot use
26966 	     these as they may hold arguments to the function.  Instead we
26967 	     attempt to locate a call preserved register which is used by this
26968 	     function.  If we can find one, then we know that it will have
26969 	     been pushed at the start of the prologue and so we can corrupt
26970 	     it now.  */
26971 	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26972 	    if (live_regs_mask & (1 << regno))
26973 	      break;
26974 
26975 	  gcc_assert(regno <= LAST_LO_REGNUM);
26976 
26977 	  reg = gen_rtx_REG (SImode, regno);
26978 
26979 	  emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26980 
26981 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26982 					stack_pointer_rtx, reg));
26983 
26984 	  dwarf = gen_rtx_SET (stack_pointer_rtx,
26985 			       plus_constant (Pmode, stack_pointer_rtx,
26986 					      -amount));
26987 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26988 	  RTX_FRAME_RELATED_P (insn) = 1;
26989 	}
26990     }
26991 
26992   if (frame_pointer_needed)
26993     thumb_set_frame_pointer (offsets);
26994 
26995   /* If we are profiling, make sure no instructions are scheduled before
26996      the call to mcount.  Similarly if the user has requested no
26997      scheduling in the prolog.  Similarly if we want non-call exceptions
26998      using the EABI unwinder, to prevent faulting instructions from being
26999      swapped with a stack adjustment.  */
27000   if (crtl->profile || !TARGET_SCHED_PROLOG
27001       || (arm_except_unwind_info (&global_options) == UI_TARGET
27002 	  && cfun->can_throw_non_call_exceptions))
27003     emit_insn (gen_blockage ());
27004 
27005   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27006   if (live_regs_mask & 0xff)
27007     cfun->machine->lr_save_eliminated = 0;
27008 }
27009 
27010 /* Clear caller saved registers not used to pass return values and leaked
27011    condition flags before exiting a cmse_nonsecure_entry function.  */
27012 
27013 void
cmse_nonsecure_entry_clear_before_return(void)27014 cmse_nonsecure_entry_clear_before_return (void)
27015 {
27016   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27017   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27018   uint32_t padding_bits_to_clear = 0;
27019   auto_sbitmap to_clear_bitmap (maxregno + 1);
27020   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27021   tree result_type;
27022 
27023   bitmap_clear (to_clear_bitmap);
27024   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27025   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27026 
27027   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27028      registers.  */
27029   if (clear_vfpregs)
27030     {
27031       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27032 
27033       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27034 
27035       if (!TARGET_HAVE_FPCXT_CMSE)
27036 	{
27037 	  /* Make sure we don't clear the two scratch registers used to clear
27038 	     the relevant FPSCR bits in output_return_instruction.  */
27039 	  emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27040 	  bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27041 	  emit_use (gen_rtx_REG (SImode, 4));
27042 	  bitmap_clear_bit (to_clear_bitmap, 4);
27043 	}
27044     }
27045 
27046   /* If the user has defined registers to be caller saved, these are no longer
27047      restored by the function before returning and must thus be cleared for
27048      security purposes.  */
27049   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27050     {
27051       /* We do not touch registers that can be used to pass arguments as per
27052 	 the AAPCS, since these should never be made callee-saved by user
27053 	 options.  */
27054       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27055 	continue;
27056       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27057 	continue;
27058       if (!callee_saved_reg_p (regno)
27059 	  && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27060 	      || TARGET_HARD_FLOAT))
27061 	bitmap_set_bit (to_clear_bitmap, regno);
27062     }
27063 
27064   /* Make sure we do not clear the registers used to return the result in.  */
27065   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27066   if (!VOID_TYPE_P (result_type))
27067     {
27068       uint64_t to_clear_return_mask;
27069       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27070 
27071       /* No need to check that we return in registers, because we don't
27072 	 support returning on stack yet.  */
27073       gcc_assert (REG_P (result_rtl));
27074       to_clear_return_mask
27075 	= compute_not_to_clear_mask (result_type, result_rtl, 0,
27076 				     &padding_bits_to_clear);
27077       if (to_clear_return_mask)
27078 	{
27079 	  gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27080 	  for (regno = R0_REGNUM; regno <= maxregno; regno++)
27081 	    {
27082 	      if (to_clear_return_mask & (1ULL << regno))
27083 		bitmap_clear_bit (to_clear_bitmap, regno);
27084 	    }
27085 	}
27086     }
27087 
27088   if (padding_bits_to_clear != 0)
27089     {
27090       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27091       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27092 
27093       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27094 	 returning a composite type, which only uses r0.  Let's make sure that
27095 	 r1-r3 is cleared too.  */
27096       bitmap_clear (to_clear_arg_regs_bitmap);
27097       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27098       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27099     }
27100 
27101   /* Clear full registers that leak before returning.  */
27102   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27103   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27104   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27105 			clearing_reg);
27106 }
27107 
27108 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27109    POP instruction can be generated.  LR should be replaced by PC.  All
27110    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27111    all we really need to check here is if single register is to be
27112    returned, or multiple register return.  */
27113 void
thumb2_expand_return(bool simple_return)27114 thumb2_expand_return (bool simple_return)
27115 {
27116   int i, num_regs;
27117   unsigned long saved_regs_mask;
27118   arm_stack_offsets *offsets;
27119 
27120   offsets = arm_get_frame_offsets ();
27121   saved_regs_mask = offsets->saved_regs_mask;
27122 
27123   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27124     if (saved_regs_mask & (1 << i))
27125       num_regs++;
27126 
27127   if (!simple_return && saved_regs_mask)
27128     {
27129       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27130 	 functions or adapt code to handle according to ACLE.  This path should
27131 	 not be reachable for cmse_nonsecure_entry functions though we prefer
27132 	 to assert it for now to ensure that future code changes do not silently
27133 	 change this behavior.  */
27134       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27135       if (num_regs == 1)
27136         {
27137           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27138           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27139           rtx addr = gen_rtx_MEM (SImode,
27140                                   gen_rtx_POST_INC (SImode,
27141                                                     stack_pointer_rtx));
27142           set_mem_alias_set (addr, get_frame_alias_set ());
27143           XVECEXP (par, 0, 0) = ret_rtx;
27144           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27145           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27146           emit_jump_insn (par);
27147         }
27148       else
27149         {
27150           saved_regs_mask &= ~ (1 << LR_REGNUM);
27151           saved_regs_mask |=   (1 << PC_REGNUM);
27152           arm_emit_multi_reg_pop (saved_regs_mask);
27153         }
27154     }
27155   else
27156     {
27157       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27158 	cmse_nonsecure_entry_clear_before_return ();
27159       emit_jump_insn (simple_return_rtx);
27160     }
27161 }
27162 
27163 void
thumb1_expand_epilogue(void)27164 thumb1_expand_epilogue (void)
27165 {
27166   HOST_WIDE_INT amount;
27167   arm_stack_offsets *offsets;
27168   int regno;
27169 
27170   /* Naked functions don't have prologues.  */
27171   if (IS_NAKED (arm_current_func_type ()))
27172     return;
27173 
27174   offsets = arm_get_frame_offsets ();
27175   amount = offsets->outgoing_args - offsets->saved_regs;
27176 
27177   if (frame_pointer_needed)
27178     {
27179       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27180       amount = offsets->locals_base - offsets->saved_regs;
27181     }
27182   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27183 
27184   gcc_assert (amount >= 0);
27185   if (amount)
27186     {
27187       emit_insn (gen_blockage ());
27188 
27189       if (amount < 512)
27190 	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27191 			       GEN_INT (amount)));
27192       else
27193 	{
27194 	  /* r3 is always free in the epilogue.  */
27195 	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27196 
27197 	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
27198 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27199 	}
27200     }
27201 
27202   /* Emit a USE (stack_pointer_rtx), so that
27203      the stack adjustment will not be deleted.  */
27204   emit_insn (gen_force_register_use (stack_pointer_rtx));
27205 
27206   if (crtl->profile || !TARGET_SCHED_PROLOG)
27207     emit_insn (gen_blockage ());
27208 
27209   /* Emit a clobber for each insn that will be restored in the epilogue,
27210      so that flow2 will get register lifetimes correct.  */
27211   for (regno = 0; regno < 13; regno++)
27212     if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
27213       emit_clobber (gen_rtx_REG (SImode, regno));
27214 
27215   if (! df_regs_ever_live_p (LR_REGNUM))
27216     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27217 
27218   /* Clear all caller-saved regs that are not used to return.  */
27219   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27220     cmse_nonsecure_entry_clear_before_return ();
27221 }
27222 
27223 /* Epilogue code for APCS frame.  */
27224 static void
arm_expand_epilogue_apcs_frame(bool really_return)27225 arm_expand_epilogue_apcs_frame (bool really_return)
27226 {
27227   unsigned long func_type;
27228   unsigned long saved_regs_mask;
27229   int num_regs = 0;
27230   int i;
27231   int floats_from_frame = 0;
27232   arm_stack_offsets *offsets;
27233 
27234   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27235   func_type = arm_current_func_type ();
27236 
27237   /* Get frame offsets for ARM.  */
27238   offsets = arm_get_frame_offsets ();
27239   saved_regs_mask = offsets->saved_regs_mask;
27240 
27241   /* Find the offset of the floating-point save area in the frame.  */
27242   floats_from_frame
27243     = (offsets->saved_args
27244        + arm_compute_static_chain_stack_bytes ()
27245        - offsets->frame);
27246 
27247   /* Compute how many core registers saved and how far away the floats are.  */
27248   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27249     if (saved_regs_mask & (1 << i))
27250       {
27251         num_regs++;
27252         floats_from_frame += 4;
27253       }
27254 
27255   if (TARGET_VFP_BASE)
27256     {
27257       int start_reg;
27258       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27259 
27260       /* The offset is from IP_REGNUM.  */
27261       int saved_size = arm_get_vfp_saved_size ();
27262       if (saved_size > 0)
27263         {
27264 	  rtx_insn *insn;
27265           floats_from_frame += saved_size;
27266           insn = emit_insn (gen_addsi3 (ip_rtx,
27267 					hard_frame_pointer_rtx,
27268 					GEN_INT (-floats_from_frame)));
27269 	  arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27270 				       ip_rtx, hard_frame_pointer_rtx);
27271         }
27272 
27273       /* Generate VFP register multi-pop.  */
27274       start_reg = FIRST_VFP_REGNUM;
27275 
27276       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27277         /* Look for a case where a reg does not need restoring.  */
27278         if ((!df_regs_ever_live_p (i) || call_used_or_fixed_reg_p (i))
27279             && (!df_regs_ever_live_p (i + 1)
27280                 || call_used_or_fixed_reg_p (i + 1)))
27281           {
27282             if (start_reg != i)
27283               arm_emit_vfp_multi_reg_pop (start_reg,
27284                                           (i - start_reg) / 2,
27285                                           gen_rtx_REG (SImode,
27286                                                        IP_REGNUM));
27287             start_reg = i + 2;
27288           }
27289 
27290       /* Restore the remaining regs that we have discovered (or possibly
27291          even all of them, if the conditional in the for loop never
27292          fired).  */
27293       if (start_reg != i)
27294         arm_emit_vfp_multi_reg_pop (start_reg,
27295                                     (i - start_reg) / 2,
27296                                     gen_rtx_REG (SImode, IP_REGNUM));
27297     }
27298 
27299   if (TARGET_IWMMXT)
27300     {
27301       /* The frame pointer is guaranteed to be non-double-word aligned, as
27302          it is set to double-word-aligned old_stack_pointer - 4.  */
27303       rtx_insn *insn;
27304       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27305 
27306       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27307         if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
27308           {
27309             rtx addr = gen_frame_mem (V2SImode,
27310                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27311                                                 - lrm_count * 4));
27312             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27313             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27314                                                gen_rtx_REG (V2SImode, i),
27315                                                NULL_RTX);
27316             lrm_count += 2;
27317           }
27318     }
27319 
27320   /* saved_regs_mask should contain IP which contains old stack pointer
27321      at the time of activation creation.  Since SP and IP are adjacent registers,
27322      we can restore the value directly into SP.  */
27323   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27324   saved_regs_mask &= ~(1 << IP_REGNUM);
27325   saved_regs_mask |= (1 << SP_REGNUM);
27326 
27327   /* There are two registers left in saved_regs_mask - LR and PC.  We
27328      only need to restore LR (the return address), but to
27329      save time we can load it directly into PC, unless we need a
27330      special function exit sequence, or we are not really returning.  */
27331   if (really_return
27332       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27333       && !crtl->calls_eh_return)
27334     /* Delete LR from the register mask, so that LR on
27335        the stack is loaded into the PC in the register mask.  */
27336     saved_regs_mask &= ~(1 << LR_REGNUM);
27337   else
27338     saved_regs_mask &= ~(1 << PC_REGNUM);
27339 
27340   num_regs = bit_count (saved_regs_mask);
27341   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27342     {
27343       rtx_insn *insn;
27344       emit_insn (gen_blockage ());
27345       /* Unwind the stack to just below the saved registers.  */
27346       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27347 				    hard_frame_pointer_rtx,
27348 				    GEN_INT (- 4 * num_regs)));
27349 
27350       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27351 				   stack_pointer_rtx, hard_frame_pointer_rtx);
27352     }
27353 
27354   arm_emit_multi_reg_pop (saved_regs_mask);
27355 
27356   if (IS_INTERRUPT (func_type))
27357     {
27358       /* Interrupt handlers will have pushed the
27359          IP onto the stack, so restore it now.  */
27360       rtx_insn *insn;
27361       rtx addr = gen_rtx_MEM (SImode,
27362                               gen_rtx_POST_INC (SImode,
27363                               stack_pointer_rtx));
27364       set_mem_alias_set (addr, get_frame_alias_set ());
27365       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27366       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27367                                          gen_rtx_REG (SImode, IP_REGNUM),
27368                                          NULL_RTX);
27369     }
27370 
27371   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27372     return;
27373 
27374   if (crtl->calls_eh_return)
27375     emit_insn (gen_addsi3 (stack_pointer_rtx,
27376 			   stack_pointer_rtx,
27377 			   gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27378 
27379   if (IS_STACKALIGN (func_type))
27380     /* Restore the original stack pointer.  Before prologue, the stack was
27381        realigned and the original stack pointer saved in r0.  For details,
27382        see comment in arm_expand_prologue.  */
27383     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27384 
27385   emit_jump_insn (simple_return_rtx);
27386 }
27387 
27388 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27389    function is not a sibcall.  */
27390 void
arm_expand_epilogue(bool really_return)27391 arm_expand_epilogue (bool really_return)
27392 {
27393   unsigned long func_type;
27394   unsigned long saved_regs_mask;
27395   int num_regs = 0;
27396   int i;
27397   int amount;
27398   arm_stack_offsets *offsets;
27399 
27400   func_type = arm_current_func_type ();
27401 
27402   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27403      let output_return_instruction take care of instruction emission if any.  */
27404   if (IS_NAKED (func_type)
27405       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27406     {
27407       if (really_return)
27408         emit_jump_insn (simple_return_rtx);
27409       return;
27410     }
27411 
27412   /* If we are throwing an exception, then we really must be doing a
27413      return, so we can't tail-call.  */
27414   gcc_assert (!crtl->calls_eh_return || really_return);
27415 
27416   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27417     {
27418       arm_expand_epilogue_apcs_frame (really_return);
27419       return;
27420     }
27421 
27422   /* Get frame offsets for ARM.  */
27423   offsets = arm_get_frame_offsets ();
27424   saved_regs_mask = offsets->saved_regs_mask;
27425   num_regs = bit_count (saved_regs_mask);
27426 
27427   if (frame_pointer_needed)
27428     {
27429       rtx_insn *insn;
27430       /* Restore stack pointer if necessary.  */
27431       if (TARGET_ARM)
27432         {
27433           /* In ARM mode, frame pointer points to first saved register.
27434              Restore stack pointer to last saved register.  */
27435           amount = offsets->frame - offsets->saved_regs;
27436 
27437           /* Force out any pending memory operations that reference stacked data
27438              before stack de-allocation occurs.  */
27439           emit_insn (gen_blockage ());
27440 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27441 			    hard_frame_pointer_rtx,
27442 			    GEN_INT (amount)));
27443 	  arm_add_cfa_adjust_cfa_note (insn, amount,
27444 				       stack_pointer_rtx,
27445 				       hard_frame_pointer_rtx);
27446 
27447           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27448              deleted.  */
27449           emit_insn (gen_force_register_use (stack_pointer_rtx));
27450         }
27451       else
27452         {
27453           /* In Thumb-2 mode, the frame pointer points to the last saved
27454              register.  */
27455 	  amount = offsets->locals_base - offsets->saved_regs;
27456 	  if (amount)
27457 	    {
27458 	      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27459 				hard_frame_pointer_rtx,
27460 				GEN_INT (amount)));
27461 	      arm_add_cfa_adjust_cfa_note (insn, amount,
27462 					   hard_frame_pointer_rtx,
27463 					   hard_frame_pointer_rtx);
27464 	    }
27465 
27466           /* Force out any pending memory operations that reference stacked data
27467              before stack de-allocation occurs.  */
27468           emit_insn (gen_blockage ());
27469 	  insn = emit_insn (gen_movsi (stack_pointer_rtx,
27470 				       hard_frame_pointer_rtx));
27471 	  arm_add_cfa_adjust_cfa_note (insn, 0,
27472 				       stack_pointer_rtx,
27473 				       hard_frame_pointer_rtx);
27474           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27475              deleted.  */
27476           emit_insn (gen_force_register_use (stack_pointer_rtx));
27477         }
27478     }
27479   else
27480     {
27481       /* Pop off outgoing args and local frame to adjust stack pointer to
27482          last saved register.  */
27483       amount = offsets->outgoing_args - offsets->saved_regs;
27484       if (amount)
27485         {
27486 	  rtx_insn *tmp;
27487           /* Force out any pending memory operations that reference stacked data
27488              before stack de-allocation occurs.  */
27489           emit_insn (gen_blockage ());
27490 	  tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27491 				       stack_pointer_rtx,
27492 				       GEN_INT (amount)));
27493 	  arm_add_cfa_adjust_cfa_note (tmp, amount,
27494 				       stack_pointer_rtx, stack_pointer_rtx);
27495           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27496              not deleted.  */
27497           emit_insn (gen_force_register_use (stack_pointer_rtx));
27498         }
27499     }
27500 
27501   if (TARGET_VFP_BASE)
27502     {
27503       /* Generate VFP register multi-pop.  */
27504       int end_reg = LAST_VFP_REGNUM + 1;
27505 
27506       /* Scan the registers in reverse order.  We need to match
27507          any groupings made in the prologue and generate matching
27508          vldm operations.  The need to match groups is because,
27509          unlike pop, vldm can only do consecutive regs.  */
27510       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27511         /* Look for a case where a reg does not need restoring.  */
27512         if ((!df_regs_ever_live_p (i) || call_used_or_fixed_reg_p (i))
27513             && (!df_regs_ever_live_p (i + 1)
27514                 || call_used_or_fixed_reg_p (i + 1)))
27515           {
27516             /* Restore the regs discovered so far (from reg+2 to
27517                end_reg).  */
27518             if (end_reg > i + 2)
27519               arm_emit_vfp_multi_reg_pop (i + 2,
27520                                           (end_reg - (i + 2)) / 2,
27521                                           stack_pointer_rtx);
27522             end_reg = i;
27523           }
27524 
27525       /* Restore the remaining regs that we have discovered (or possibly
27526          even all of them, if the conditional in the for loop never
27527          fired).  */
27528       if (end_reg > i + 2)
27529         arm_emit_vfp_multi_reg_pop (i + 2,
27530                                     (end_reg - (i + 2)) / 2,
27531                                     stack_pointer_rtx);
27532     }
27533 
27534   if (TARGET_IWMMXT)
27535     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27536       if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
27537         {
27538           rtx_insn *insn;
27539           rtx addr = gen_rtx_MEM (V2SImode,
27540                                   gen_rtx_POST_INC (SImode,
27541                                                     stack_pointer_rtx));
27542           set_mem_alias_set (addr, get_frame_alias_set ());
27543           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27544           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27545                                              gen_rtx_REG (V2SImode, i),
27546                                              NULL_RTX);
27547 	  arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27548 				       stack_pointer_rtx, stack_pointer_rtx);
27549         }
27550 
27551   if (saved_regs_mask)
27552     {
27553       rtx insn;
27554       bool return_in_pc = false;
27555 
27556       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27557           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27558 	  && !IS_CMSE_ENTRY (func_type)
27559           && !IS_STACKALIGN (func_type)
27560           && really_return
27561           && crtl->args.pretend_args_size == 0
27562           && saved_regs_mask & (1 << LR_REGNUM)
27563           && !crtl->calls_eh_return)
27564         {
27565           saved_regs_mask &= ~(1 << LR_REGNUM);
27566           saved_regs_mask |= (1 << PC_REGNUM);
27567           return_in_pc = true;
27568         }
27569 
27570       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27571         {
27572           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27573             if (saved_regs_mask & (1 << i))
27574               {
27575                 rtx addr = gen_rtx_MEM (SImode,
27576                                         gen_rtx_POST_INC (SImode,
27577                                                           stack_pointer_rtx));
27578                 set_mem_alias_set (addr, get_frame_alias_set ());
27579 
27580                 if (i == PC_REGNUM)
27581                   {
27582                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27583                     XVECEXP (insn, 0, 0) = ret_rtx;
27584                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27585                                                         addr);
27586                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27587                     insn = emit_jump_insn (insn);
27588                   }
27589                 else
27590                   {
27591                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27592                                                  addr));
27593                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27594                                                        gen_rtx_REG (SImode, i),
27595                                                        NULL_RTX);
27596 		    arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27597 						 stack_pointer_rtx,
27598 						 stack_pointer_rtx);
27599                   }
27600               }
27601         }
27602       else
27603         {
27604           if (TARGET_LDRD
27605 	      && current_tune->prefer_ldrd_strd
27606               && !optimize_function_for_size_p (cfun))
27607             {
27608               if (TARGET_THUMB2)
27609                 thumb2_emit_ldrd_pop (saved_regs_mask);
27610               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27611                 arm_emit_ldrd_pop (saved_regs_mask);
27612               else
27613                 arm_emit_multi_reg_pop (saved_regs_mask);
27614             }
27615           else
27616             arm_emit_multi_reg_pop (saved_regs_mask);
27617         }
27618 
27619       if (return_in_pc)
27620         return;
27621     }
27622 
27623   amount
27624     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27625   if (amount)
27626     {
27627       int i, j;
27628       rtx dwarf = NULL_RTX;
27629       rtx_insn *tmp =
27630 	emit_insn (gen_addsi3 (stack_pointer_rtx,
27631 			       stack_pointer_rtx,
27632 			       GEN_INT (amount)));
27633 
27634       RTX_FRAME_RELATED_P (tmp) = 1;
27635 
27636       if (cfun->machine->uses_anonymous_args)
27637 	{
27638 	  /* Restore pretend args.  Refer arm_expand_prologue on how to save
27639 	     pretend_args in stack.  */
27640 	  int num_regs = crtl->args.pretend_args_size / 4;
27641 	  saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27642 	  for (j = 0, i = 0; j < num_regs; i++)
27643 	    if (saved_regs_mask & (1 << i))
27644 	      {
27645 		rtx reg = gen_rtx_REG (SImode, i);
27646 		dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27647 		j++;
27648 	      }
27649 	  REG_NOTES (tmp) = dwarf;
27650 	}
27651       arm_add_cfa_adjust_cfa_note (tmp, amount,
27652 				   stack_pointer_rtx, stack_pointer_rtx);
27653     }
27654 
27655   if (IS_CMSE_ENTRY (func_type))
27656     {
27657       /* CMSE_ENTRY always returns.  */
27658       gcc_assert (really_return);
27659       /* Clear all caller-saved regs that are not used to return.  */
27660       cmse_nonsecure_entry_clear_before_return ();
27661 
27662       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27663 	 VLDR.  */
27664       if (TARGET_HAVE_FPCXT_CMSE)
27665 	{
27666 	  rtx_insn *insn;
27667 
27668 	  insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27669 						   GEN_INT (FPCXTNS_ENUM)));
27670 	  rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27671 				  plus_constant (Pmode, stack_pointer_rtx, 4));
27672 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27673 	  RTX_FRAME_RELATED_P (insn) = 1;
27674 	}
27675     }
27676 
27677   if (!really_return)
27678     return;
27679 
27680   if (crtl->calls_eh_return)
27681     emit_insn (gen_addsi3 (stack_pointer_rtx,
27682                            stack_pointer_rtx,
27683                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27684 
27685   if (IS_STACKALIGN (func_type))
27686     /* Restore the original stack pointer.  Before prologue, the stack was
27687        realigned and the original stack pointer saved in r0.  For details,
27688        see comment in arm_expand_prologue.  */
27689     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27690 
27691   emit_jump_insn (simple_return_rtx);
27692 }
27693 
27694 /* Implementation of insn prologue_thumb1_interwork.  This is the first
27695    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
27696 
27697 const char *
thumb1_output_interwork(void)27698 thumb1_output_interwork (void)
27699 {
27700   const char * name;
27701   FILE *f = asm_out_file;
27702 
27703   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27704   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27705 	      == SYMBOL_REF);
27706   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27707 
27708   /* Generate code sequence to switch us into Thumb mode.  */
27709   /* The .code 32 directive has already been emitted by
27710      ASM_DECLARE_FUNCTION_NAME.  */
27711   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27712   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27713 
27714   /* Generate a label, so that the debugger will notice the
27715      change in instruction sets.  This label is also used by
27716      the assembler to bypass the ARM code when this function
27717      is called from a Thumb encoded function elsewhere in the
27718      same file.  Hence the definition of STUB_NAME here must
27719      agree with the definition in gas/config/tc-arm.c.  */
27720 
27721 #define STUB_NAME ".real_start_of"
27722 
27723   fprintf (f, "\t.code\t16\n");
27724 #ifdef ARM_PE
27725   if (arm_dllexport_name_p (name))
27726     name = arm_strip_name_encoding (name);
27727 #endif
27728   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27729   fprintf (f, "\t.thumb_func\n");
27730   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27731 
27732   return "";
27733 }
27734 
27735 /* Handle the case of a double word load into a low register from
27736    a computed memory address.  The computed address may involve a
27737    register which is overwritten by the load.  */
27738 const char *
thumb_load_double_from_address(rtx * operands)27739 thumb_load_double_from_address (rtx *operands)
27740 {
27741   rtx addr;
27742   rtx base;
27743   rtx offset;
27744   rtx arg1;
27745   rtx arg2;
27746 
27747   gcc_assert (REG_P (operands[0]));
27748   gcc_assert (MEM_P (operands[1]));
27749 
27750   /* Get the memory address.  */
27751   addr = XEXP (operands[1], 0);
27752 
27753   /* Work out how the memory address is computed.  */
27754   switch (GET_CODE (addr))
27755     {
27756     case REG:
27757       operands[2] = adjust_address (operands[1], SImode, 4);
27758 
27759       if (REGNO (operands[0]) == REGNO (addr))
27760 	{
27761 	  output_asm_insn ("ldr\t%H0, %2", operands);
27762 	  output_asm_insn ("ldr\t%0, %1", operands);
27763 	}
27764       else
27765 	{
27766 	  output_asm_insn ("ldr\t%0, %1", operands);
27767 	  output_asm_insn ("ldr\t%H0, %2", operands);
27768 	}
27769       break;
27770 
27771     case CONST:
27772       /* Compute <address> + 4 for the high order load.  */
27773       operands[2] = adjust_address (operands[1], SImode, 4);
27774 
27775       output_asm_insn ("ldr\t%0, %1", operands);
27776       output_asm_insn ("ldr\t%H0, %2", operands);
27777       break;
27778 
27779     case PLUS:
27780       arg1   = XEXP (addr, 0);
27781       arg2   = XEXP (addr, 1);
27782 
27783       if (CONSTANT_P (arg1))
27784 	base = arg2, offset = arg1;
27785       else
27786 	base = arg1, offset = arg2;
27787 
27788       gcc_assert (REG_P (base));
27789 
27790       /* Catch the case of <address> = <reg> + <reg> */
27791       if (REG_P (offset))
27792 	{
27793 	  int reg_offset = REGNO (offset);
27794 	  int reg_base   = REGNO (base);
27795 	  int reg_dest   = REGNO (operands[0]);
27796 
27797 	  /* Add the base and offset registers together into the
27798              higher destination register.  */
27799 	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27800 		       reg_dest + 1, reg_base, reg_offset);
27801 
27802 	  /* Load the lower destination register from the address in
27803              the higher destination register.  */
27804 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27805 		       reg_dest, reg_dest + 1);
27806 
27807 	  /* Load the higher destination register from its own address
27808              plus 4.  */
27809 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27810 		       reg_dest + 1, reg_dest + 1);
27811 	}
27812       else
27813 	{
27814 	  /* Compute <address> + 4 for the high order load.  */
27815 	  operands[2] = adjust_address (operands[1], SImode, 4);
27816 
27817 	  /* If the computed address is held in the low order register
27818 	     then load the high order register first, otherwise always
27819 	     load the low order register first.  */
27820 	  if (REGNO (operands[0]) == REGNO (base))
27821 	    {
27822 	      output_asm_insn ("ldr\t%H0, %2", operands);
27823 	      output_asm_insn ("ldr\t%0, %1", operands);
27824 	    }
27825 	  else
27826 	    {
27827 	      output_asm_insn ("ldr\t%0, %1", operands);
27828 	      output_asm_insn ("ldr\t%H0, %2", operands);
27829 	    }
27830 	}
27831       break;
27832 
27833     case LABEL_REF:
27834       /* With no registers to worry about we can just load the value
27835          directly.  */
27836       operands[2] = adjust_address (operands[1], SImode, 4);
27837 
27838       output_asm_insn ("ldr\t%H0, %2", operands);
27839       output_asm_insn ("ldr\t%0, %1", operands);
27840       break;
27841 
27842     default:
27843       gcc_unreachable ();
27844     }
27845 
27846   return "";
27847 }
27848 
27849 const char *
thumb_output_move_mem_multiple(int n,rtx * operands)27850 thumb_output_move_mem_multiple (int n, rtx *operands)
27851 {
27852   switch (n)
27853     {
27854     case 2:
27855       if (REGNO (operands[4]) > REGNO (operands[5]))
27856 	std::swap (operands[4], operands[5]);
27857 
27858       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27859       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27860       break;
27861 
27862     case 3:
27863       if (REGNO (operands[4]) > REGNO (operands[5]))
27864         std::swap (operands[4], operands[5]);
27865       if (REGNO (operands[5]) > REGNO (operands[6]))
27866         std::swap (operands[5], operands[6]);
27867       if (REGNO (operands[4]) > REGNO (operands[5]))
27868         std::swap (operands[4], operands[5]);
27869 
27870       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27871       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27872       break;
27873 
27874     default:
27875       gcc_unreachable ();
27876     }
27877 
27878   return "";
27879 }
27880 
27881 /* Output a call-via instruction for thumb state.  */
27882 const char *
thumb_call_via_reg(rtx reg)27883 thumb_call_via_reg (rtx reg)
27884 {
27885   int regno = REGNO (reg);
27886   rtx *labelp;
27887 
27888   gcc_assert (regno < LR_REGNUM);
27889 
27890   /* If we are in the normal text section we can use a single instance
27891      per compilation unit.  If we are doing function sections, then we need
27892      an entry per section, since we can't rely on reachability.  */
27893   if (in_section == text_section)
27894     {
27895       thumb_call_reg_needed = 1;
27896 
27897       if (thumb_call_via_label[regno] == NULL)
27898 	thumb_call_via_label[regno] = gen_label_rtx ();
27899       labelp = thumb_call_via_label + regno;
27900     }
27901   else
27902     {
27903       if (cfun->machine->call_via[regno] == NULL)
27904 	cfun->machine->call_via[regno] = gen_label_rtx ();
27905       labelp = cfun->machine->call_via + regno;
27906     }
27907 
27908   output_asm_insn ("bl\t%a0", labelp);
27909   return "";
27910 }
27911 
27912 /* Routines for generating rtl.  */
27913 void
thumb_expand_cpymemqi(rtx * operands)27914 thumb_expand_cpymemqi (rtx *operands)
27915 {
27916   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27917   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27918   HOST_WIDE_INT len = INTVAL (operands[2]);
27919   HOST_WIDE_INT offset = 0;
27920 
27921   while (len >= 12)
27922     {
27923       emit_insn (gen_cpymem12b (out, in, out, in));
27924       len -= 12;
27925     }
27926 
27927   if (len >= 8)
27928     {
27929       emit_insn (gen_cpymem8b (out, in, out, in));
27930       len -= 8;
27931     }
27932 
27933   if (len >= 4)
27934     {
27935       rtx reg = gen_reg_rtx (SImode);
27936       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27937       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27938       len -= 4;
27939       offset += 4;
27940     }
27941 
27942   if (len >= 2)
27943     {
27944       rtx reg = gen_reg_rtx (HImode);
27945       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27946 					      plus_constant (Pmode, in,
27947 							     offset))));
27948       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27949 								offset)),
27950 			    reg));
27951       len -= 2;
27952       offset += 2;
27953     }
27954 
27955   if (len)
27956     {
27957       rtx reg = gen_reg_rtx (QImode);
27958       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27959 					      plus_constant (Pmode, in,
27960 							     offset))));
27961       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27962 								offset)),
27963 			    reg));
27964     }
27965 }
27966 
27967 void
thumb_reload_out_hi(rtx * operands)27968 thumb_reload_out_hi (rtx *operands)
27969 {
27970   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27971 }
27972 
27973 /* Return the length of a function name prefix
27974     that starts with the character 'c'.  */
27975 static int
arm_get_strip_length(int c)27976 arm_get_strip_length (int c)
27977 {
27978   switch (c)
27979     {
27980     ARM_NAME_ENCODING_LENGTHS
27981       default: return 0;
27982     }
27983 }
27984 
27985 /* Return a pointer to a function's name with any
27986    and all prefix encodings stripped from it.  */
27987 const char *
arm_strip_name_encoding(const char * name)27988 arm_strip_name_encoding (const char *name)
27989 {
27990   int skip;
27991 
27992   while ((skip = arm_get_strip_length (* name)))
27993     name += skip;
27994 
27995   return name;
27996 }
27997 
27998 /* If there is a '*' anywhere in the name's prefix, then
27999    emit the stripped name verbatim, otherwise prepend an
28000    underscore if leading underscores are being used.  */
28001 void
arm_asm_output_labelref(FILE * stream,const char * name)28002 arm_asm_output_labelref (FILE *stream, const char *name)
28003 {
28004   int skip;
28005   int verbatim = 0;
28006 
28007   while ((skip = arm_get_strip_length (* name)))
28008     {
28009       verbatim |= (*name == '*');
28010       name += skip;
28011     }
28012 
28013   if (verbatim)
28014     fputs (name, stream);
28015   else
28016     asm_fprintf (stream, "%U%s", name);
28017 }
28018 
28019 /* This function is used to emit an EABI tag and its associated value.
28020    We emit the numerical value of the tag in case the assembler does not
28021    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28022    the tag name in a comment so that anyone reading the assembler output
28023    will know which tag is being set.
28024 
28025    This function is not static because arm-c.c needs it too.  */
28026 
28027 void
arm_emit_eabi_attribute(const char * name,int num,int val)28028 arm_emit_eabi_attribute (const char *name, int num, int val)
28029 {
28030   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28031   if (flag_verbose_asm || flag_debug_asm)
28032     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28033   asm_fprintf (asm_out_file, "\n");
28034 }
28035 
28036 /* This function is used to print CPU tuning information as comment
28037    in assembler file.  Pointers are not printed for now.  */
28038 
28039 void
arm_print_tune_info(void)28040 arm_print_tune_info (void)
28041 {
28042   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28043   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28044 	       current_tune->constant_limit);
28045   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28046 	       "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28047   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28048 	       "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28049   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28050 	       "prefetch.l1_cache_size:\t%d\n",
28051 	       current_tune->prefetch.l1_cache_size);
28052   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28053 	       "prefetch.l1_cache_line_size:\t%d\n",
28054 	       current_tune->prefetch.l1_cache_line_size);
28055   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28056 	       "prefer_constant_pool:\t%d\n",
28057 	       (int) current_tune->prefer_constant_pool);
28058   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28059 	       "branch_cost:\t(s:speed, p:predictable)\n");
28060   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28061   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28062 	       current_tune->branch_cost (false, false));
28063   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28064 	       current_tune->branch_cost (false, true));
28065   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28066 	       current_tune->branch_cost (true, false));
28067   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28068 	       current_tune->branch_cost (true, true));
28069   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28070 	       "prefer_ldrd_strd:\t%d\n",
28071 	       (int) current_tune->prefer_ldrd_strd);
28072   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28073 	       "logical_op_non_short_circuit:\t[%d,%d]\n",
28074 	       (int) current_tune->logical_op_non_short_circuit_thumb,
28075 	       (int) current_tune->logical_op_non_short_circuit_arm);
28076   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28077 	       "disparage_flag_setting_t16_encodings:\t%d\n",
28078 	       (int) current_tune->disparage_flag_setting_t16_encodings);
28079   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28080 	       "string_ops_prefer_neon:\t%d\n",
28081 	       (int) current_tune->string_ops_prefer_neon);
28082   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28083 	       "max_insns_inline_memset:\t%d\n",
28084 	       current_tune->max_insns_inline_memset);
28085   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28086 	       current_tune->fusible_ops);
28087   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28088 	       (int) current_tune->sched_autopref);
28089 }
28090 
28091 /* The last set of target options used to emit .arch directives, etc.  This
28092    could be a function-local static if it were not required to expose it as a
28093    root to the garbage collector.  */
28094 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28095 
28096 /* Print .arch and .arch_extension directives corresponding to the
28097    current architecture configuration.  */
28098 static void
arm_print_asm_arch_directives(FILE * stream,cl_target_option * targ_options)28099 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28100 {
28101   arm_build_target build_target;
28102   /* If the target options haven't changed since the last time we were called
28103      there is nothing to do.  This should be sufficient to suppress the
28104      majority of redundant work.  */
28105   if (last_asm_targ_options == targ_options)
28106     return;
28107 
28108   last_asm_targ_options = targ_options;
28109 
28110   build_target.isa = sbitmap_alloc (isa_num_bits);
28111   arm_configure_build_target (&build_target, targ_options, false);
28112 
28113   if (build_target.core_name
28114       && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28115     {
28116       const char* truncated_name
28117 	= arm_rewrite_selected_cpu (build_target.core_name);
28118       asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28119     }
28120 
28121   const arch_option *arch
28122     = arm_parse_arch_option_name (all_architectures, "-march",
28123 				  build_target.arch_name);
28124   auto_sbitmap opt_bits (isa_num_bits);
28125 
28126   gcc_assert (arch);
28127 
28128   if (strcmp (build_target.arch_name, "armv7ve") == 0)
28129     {
28130       /* Keep backward compatability for assemblers which don't support
28131 	 armv7ve.  Fortunately, none of the following extensions are reset
28132 	 by a .fpu directive.  */
28133       asm_fprintf (stream, "\t.arch armv7-a\n");
28134       asm_fprintf (stream, "\t.arch_extension virt\n");
28135       asm_fprintf (stream, "\t.arch_extension idiv\n");
28136       asm_fprintf (stream, "\t.arch_extension sec\n");
28137       asm_fprintf (stream, "\t.arch_extension mp\n");
28138     }
28139   else
28140     asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28141 
28142   /* The .fpu directive will reset any architecture extensions from the
28143      assembler that relate to the fp/vector extensions.  So put this out before
28144      any .arch_extension directives.  */
28145   const char *fpu_name = (TARGET_SOFT_FLOAT
28146 			  ? "softvfp"
28147 			  : arm_identify_fpu_from_isa (build_target.isa));
28148   asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28149 
28150   if (!arch->common.extensions)
28151     return;
28152 
28153   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28154        opt->name != NULL;
28155        opt++)
28156     {
28157       if (!opt->remove)
28158 	{
28159 	  arm_initialize_isa (opt_bits, opt->isa_bits);
28160 
28161 	  /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28162 	     "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28163 	     floating point instructions is disabled.  So the following check
28164 	     restricts the printing of ".arch_extension mve" and
28165 	     ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28166 	     this special behaviour because the feature bit "mve" and
28167 	     "mve_float" are not part of "fpu bits", so they are not cleared
28168 	     when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28169 	     TARGET_HAVE_MVE_FLOAT are disabled.  */
28170 	  if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28171 	      || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28172 		  && !TARGET_HAVE_MVE_FLOAT))
28173 	    continue;
28174 
28175 	  /* If every feature bit of this option is set in the target ISA
28176 	     specification, print out the option name.  However, don't print
28177 	     anything if all the bits are part of the FPU specification.  */
28178 	  if (bitmap_subset_p (opt_bits, build_target.isa)
28179 	      && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28180 	    asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28181 	}
28182     }
28183 }
28184 
28185 static void
arm_file_start(void)28186 arm_file_start (void)
28187 {
28188   int val;
28189 
28190   arm_print_asm_arch_directives
28191     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28192 
28193   if (TARGET_BPABI)
28194     {
28195       /* If we have a named cpu, but we the assembler does not support that
28196 	 name via .cpu, put out a cpu name attribute; but don't do this if the
28197 	 name starts with the fictitious prefix, 'generic'.  */
28198       if (arm_active_target.core_name
28199 	  && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28200 	  && strncmp (arm_active_target.core_name, "generic", 7) != 0)
28201 	{
28202 	  const char* truncated_name
28203 	    = arm_rewrite_selected_cpu (arm_active_target.core_name);
28204 	  if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28205 	    asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28206 			 truncated_name);
28207 	}
28208 
28209       if (print_tune_info)
28210 	arm_print_tune_info ();
28211 
28212       if (! TARGET_SOFT_FLOAT)
28213 	{
28214 	  if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28215 	    arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28216 
28217 	  if (TARGET_HARD_FLOAT_ABI)
28218 	    arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28219 	}
28220 
28221       /* Some of these attributes only apply when the corresponding features
28222 	 are used.  However we don't have any easy way of figuring this out.
28223 	 Conservatively record the setting that would have been used.  */
28224 
28225       if (flag_rounding_math)
28226 	arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28227 
28228       if (!flag_unsafe_math_optimizations)
28229 	{
28230 	  arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28231 	  arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28232 	}
28233       if (flag_signaling_nans)
28234 	arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28235 
28236       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28237 			   flag_finite_math_only ? 1 : 3);
28238 
28239       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28240       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28241       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28242 			       flag_short_enums ? 1 : 2);
28243 
28244       /* Tag_ABI_optimization_goals.  */
28245       if (optimize_size)
28246 	val = 4;
28247       else if (optimize >= 2)
28248 	val = 2;
28249       else if (optimize)
28250 	val = 1;
28251       else
28252 	val = 6;
28253       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28254 
28255       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28256 			       unaligned_access);
28257 
28258       if (arm_fp16_format)
28259 	arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28260 			     (int) arm_fp16_format);
28261 
28262       if (arm_lang_output_object_attributes_hook)
28263 	arm_lang_output_object_attributes_hook();
28264     }
28265 
28266   default_file_start ();
28267 }
28268 
28269 static void
arm_file_end(void)28270 arm_file_end (void)
28271 {
28272   int regno;
28273 
28274   /* Just in case the last function output in the assembler had non-default
28275      architecture directives, we force the assembler state back to the default
28276      set, so that any 'calculated' build attributes are based on the default
28277      options rather than the special options for that function.  */
28278   arm_print_asm_arch_directives
28279     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28280 
28281   if (NEED_INDICATE_EXEC_STACK)
28282     /* Add .note.GNU-stack.  */
28283     file_end_indicate_exec_stack ();
28284 
28285   if (! thumb_call_reg_needed)
28286     return;
28287 
28288   switch_to_section (text_section);
28289   asm_fprintf (asm_out_file, "\t.code 16\n");
28290   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28291 
28292   for (regno = 0; regno < LR_REGNUM; regno++)
28293     {
28294       rtx label = thumb_call_via_label[regno];
28295 
28296       if (label != 0)
28297 	{
28298 	  targetm.asm_out.internal_label (asm_out_file, "L",
28299 					  CODE_LABEL_NUMBER (label));
28300 	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28301 	}
28302     }
28303 }
28304 
28305 #ifndef ARM_PE
28306 /* Symbols in the text segment can be accessed without indirecting via the
28307    constant pool; it may take an extra binary operation, but this is still
28308    faster than indirecting via memory.  Don't do this when not optimizing,
28309    since we won't be calculating al of the offsets necessary to do this
28310    simplification.  */
28311 
28312 static void
arm_encode_section_info(tree decl,rtx rtl,int first)28313 arm_encode_section_info (tree decl, rtx rtl, int first)
28314 {
28315   if (optimize > 0 && TREE_CONSTANT (decl))
28316     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28317 
28318   default_encode_section_info (decl, rtl, first);
28319 }
28320 #endif /* !ARM_PE */
28321 
28322 static void
arm_internal_label(FILE * stream,const char * prefix,unsigned long labelno)28323 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28324 {
28325   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28326       && !strcmp (prefix, "L"))
28327     {
28328       arm_ccfsm_state = 0;
28329       arm_target_insn = NULL;
28330     }
28331   default_internal_label (stream, prefix, labelno);
28332 }
28333 
28334 /* Output code to add DELTA to the first argument, and then jump
28335    to FUNCTION.  Used for C++ multiple inheritance.  */
28336 
28337 static void
arm_thumb1_mi_thunk(FILE * file,tree,HOST_WIDE_INT delta,HOST_WIDE_INT,tree function)28338 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28339 		     HOST_WIDE_INT, tree function)
28340 {
28341   static int thunk_label = 0;
28342   char label[256];
28343   char labelpc[256];
28344   int mi_delta = delta;
28345   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28346   int shift = 0;
28347   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28348                     ? 1 : 0);
28349   if (mi_delta < 0)
28350     mi_delta = - mi_delta;
28351 
28352   final_start_function (emit_barrier (), file, 1);
28353 
28354   if (TARGET_THUMB1)
28355     {
28356       int labelno = thunk_label++;
28357       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28358       /* Thunks are entered in arm mode when available.  */
28359       if (TARGET_THUMB1_ONLY)
28360 	{
28361 	  /* push r3 so we can use it as a temporary.  */
28362 	  /* TODO: Omit this save if r3 is not used.  */
28363 	  fputs ("\tpush {r3}\n", file);
28364 
28365 	  /* With -mpure-code, we cannot load the address from the
28366 	     constant pool: we build it explicitly.  */
28367 	  if (target_pure_code)
28368 	    {
28369 	      fputs ("\tmovs\tr3, #:upper8_15:#", file);
28370 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28371 	      fputc ('\n', file);
28372 	      fputs ("\tlsls r3, #8\n", file);
28373 	      fputs ("\tadds\tr3, #:upper0_7:#", file);
28374 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28375 	      fputc ('\n', file);
28376 	      fputs ("\tlsls r3, #8\n", file);
28377 	      fputs ("\tadds\tr3, #:lower8_15:#", file);
28378 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28379 	      fputc ('\n', file);
28380 	      fputs ("\tlsls r3, #8\n", file);
28381 	      fputs ("\tadds\tr3, #:lower0_7:#", file);
28382 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28383 	      fputc ('\n', file);
28384 	    }
28385 	  else
28386 	    fputs ("\tldr\tr3, ", file);
28387 	}
28388       else
28389 	{
28390 	  fputs ("\tldr\tr12, ", file);
28391 	}
28392 
28393       if (!target_pure_code)
28394 	{
28395 	  assemble_name (file, label);
28396 	  fputc ('\n', file);
28397 	}
28398 
28399       if (flag_pic)
28400 	{
28401 	  /* If we are generating PIC, the ldr instruction below loads
28402 	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28403 	     the address of the add + 8, so we have:
28404 
28405 	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28406 	         = target + 1.
28407 
28408 	     Note that we have "+ 1" because some versions of GNU ld
28409 	     don't set the low bit of the result for R_ARM_REL32
28410 	     relocations against thumb function symbols.
28411 	     On ARMv6M this is +4, not +8.  */
28412 	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28413 	  assemble_name (file, labelpc);
28414 	  fputs (":\n", file);
28415 	  if (TARGET_THUMB1_ONLY)
28416 	    {
28417 	      /* This is 2 insns after the start of the thunk, so we know it
28418 	         is 4-byte aligned.  */
28419 	      fputs ("\tadd\tr3, pc, r3\n", file);
28420 	      fputs ("\tmov r12, r3\n", file);
28421 	    }
28422 	  else
28423 	    fputs ("\tadd\tr12, pc, r12\n", file);
28424 	}
28425       else if (TARGET_THUMB1_ONLY)
28426 	fputs ("\tmov r12, r3\n", file);
28427     }
28428   if (TARGET_THUMB1_ONLY)
28429     {
28430       if (mi_delta > 255)
28431 	{
28432 	  fputs ("\tldr\tr3, ", file);
28433 	  assemble_name (file, label);
28434 	  fputs ("+4\n", file);
28435 	  asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28436 		       mi_op, this_regno, this_regno);
28437 	}
28438       else if (mi_delta != 0)
28439 	{
28440 	  /* Thumb1 unified syntax requires s suffix in instruction name when
28441 	     one of the operands is immediate.  */
28442 	  asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28443 		       mi_op, this_regno, this_regno,
28444 		       mi_delta);
28445 	}
28446     }
28447   else
28448     {
28449       /* TODO: Use movw/movt for large constants when available.  */
28450       while (mi_delta != 0)
28451 	{
28452 	  if ((mi_delta & (3 << shift)) == 0)
28453 	    shift += 2;
28454 	  else
28455 	    {
28456 	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28457 			   mi_op, this_regno, this_regno,
28458 			   mi_delta & (0xff << shift));
28459 	      mi_delta &= ~(0xff << shift);
28460 	      shift += 8;
28461 	    }
28462 	}
28463     }
28464   if (TARGET_THUMB1)
28465     {
28466       if (TARGET_THUMB1_ONLY)
28467 	fputs ("\tpop\t{r3}\n", file);
28468 
28469       fprintf (file, "\tbx\tr12\n");
28470       ASM_OUTPUT_ALIGN (file, 2);
28471       assemble_name (file, label);
28472       fputs (":\n", file);
28473       if (flag_pic)
28474 	{
28475 	  /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
28476 	  rtx tem = XEXP (DECL_RTL (function), 0);
28477 	  /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28478 	     pipeline offset is four rather than eight.  Adjust the offset
28479 	     accordingly.  */
28480 	  tem = plus_constant (GET_MODE (tem), tem,
28481 			       TARGET_THUMB1_ONLY ? -3 : -7);
28482 	  tem = gen_rtx_MINUS (GET_MODE (tem),
28483 			       tem,
28484 			       gen_rtx_SYMBOL_REF (Pmode,
28485 						   ggc_strdup (labelpc)));
28486 	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
28487 	}
28488       else
28489 	/* Output ".word .LTHUNKn".  */
28490 	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28491 
28492       if (TARGET_THUMB1_ONLY && mi_delta > 255)
28493 	assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28494     }
28495   else
28496     {
28497       fputs ("\tb\t", file);
28498       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28499       if (NEED_PLT_RELOC)
28500         fputs ("(PLT)", file);
28501       fputc ('\n', file);
28502     }
28503 
28504   final_end_function ();
28505 }
28506 
28507 /* MI thunk handling for TARGET_32BIT.  */
28508 
28509 static void
arm32_output_mi_thunk(FILE * file,tree,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)28510 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28511 		       HOST_WIDE_INT vcall_offset, tree function)
28512 {
28513   const bool long_call_p = arm_is_long_call_p (function);
28514 
28515   /* On ARM, this_regno is R0 or R1 depending on
28516      whether the function returns an aggregate or not.
28517   */
28518   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
28519 				       function)
28520 		    ? R1_REGNUM : R0_REGNUM);
28521 
28522   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
28523   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
28524   reload_completed = 1;
28525   emit_note (NOTE_INSN_PROLOGUE_END);
28526 
28527   /* Add DELTA to THIS_RTX.  */
28528   if (delta != 0)
28529     arm_split_constant (PLUS, Pmode, NULL_RTX,
28530 			delta, this_rtx, this_rtx, false);
28531 
28532   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
28533   if (vcall_offset != 0)
28534     {
28535       /* Load *THIS_RTX.  */
28536       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
28537       /* Compute *THIS_RTX + VCALL_OFFSET.  */
28538       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
28539 			  false);
28540       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
28541       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
28542       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
28543     }
28544 
28545   /* Generate a tail call to the target function.  */
28546   if (!TREE_USED (function))
28547     {
28548       assemble_external (function);
28549       TREE_USED (function) = 1;
28550     }
28551   rtx funexp = XEXP (DECL_RTL (function), 0);
28552   if (long_call_p)
28553     {
28554       emit_move_insn (temp, funexp);
28555       funexp = temp;
28556     }
28557   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28558   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
28559   SIBLING_CALL_P (insn) = 1;
28560   emit_barrier ();
28561 
28562   /* Indirect calls require a bit of fixup in PIC mode.  */
28563   if (long_call_p)
28564     {
28565       split_all_insns_noflow ();
28566       arm_reorg ();
28567     }
28568 
28569   insn = get_insns ();
28570   shorten_branches (insn);
28571   final_start_function (insn, file, 1);
28572   final (insn, file, 1);
28573   final_end_function ();
28574 
28575   /* Stop pretending this is a post-reload pass.  */
28576   reload_completed = 0;
28577 }
28578 
28579 /* Output code to add DELTA to the first argument, and then jump
28580    to FUNCTION.  Used for C++ multiple inheritance.  */
28581 
28582 static void
arm_output_mi_thunk(FILE * file,tree thunk,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)28583 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
28584 		     HOST_WIDE_INT vcall_offset, tree function)
28585 {
28586   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
28587 
28588   assemble_start_function (thunk, fnname);
28589   if (TARGET_32BIT)
28590     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
28591   else
28592     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
28593   assemble_end_function (thunk, fnname);
28594 }
28595 
28596 int
arm_emit_vector_const(FILE * file,rtx x)28597 arm_emit_vector_const (FILE *file, rtx x)
28598 {
28599   int i;
28600   const char * pattern;
28601 
28602   gcc_assert (GET_CODE (x) == CONST_VECTOR);
28603 
28604   switch (GET_MODE (x))
28605     {
28606     case E_V2SImode: pattern = "%08x"; break;
28607     case E_V4HImode: pattern = "%04x"; break;
28608     case E_V8QImode: pattern = "%02x"; break;
28609     default:       gcc_unreachable ();
28610     }
28611 
28612   fprintf (file, "0x");
28613   for (i = CONST_VECTOR_NUNITS (x); i--;)
28614     {
28615       rtx element;
28616 
28617       element = CONST_VECTOR_ELT (x, i);
28618       fprintf (file, pattern, INTVAL (element));
28619     }
28620 
28621   return 1;
28622 }
28623 
28624 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28625    HFmode constant pool entries are actually loaded with ldr.  */
28626 void
arm_emit_fp16_const(rtx c)28627 arm_emit_fp16_const (rtx c)
28628 {
28629   long bits;
28630 
28631   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
28632   if (WORDS_BIG_ENDIAN)
28633     assemble_zeros (2);
28634   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28635   if (!WORDS_BIG_ENDIAN)
28636     assemble_zeros (2);
28637 }
28638 
28639 const char *
arm_output_load_gr(rtx * operands)28640 arm_output_load_gr (rtx *operands)
28641 {
28642   rtx reg;
28643   rtx offset;
28644   rtx wcgr;
28645   rtx sum;
28646 
28647   if (!MEM_P (operands [1])
28648       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28649       || !REG_P (reg = XEXP (sum, 0))
28650       || !CONST_INT_P (offset = XEXP (sum, 1))
28651       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28652     return "wldrw%?\t%0, %1";
28653 
28654   /* Fix up an out-of-range load of a GR register.  */
28655   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28656   wcgr = operands[0];
28657   operands[0] = reg;
28658   output_asm_insn ("ldr%?\t%0, %1", operands);
28659 
28660   operands[0] = wcgr;
28661   operands[1] = reg;
28662   output_asm_insn ("tmcr%?\t%0, %1", operands);
28663   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28664 
28665   return "";
28666 }
28667 
28668 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28669 
28670    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28671    named arg and all anonymous args onto the stack.
28672    XXX I know the prologue shouldn't be pushing registers, but it is faster
28673    that way.  */
28674 
28675 static void
arm_setup_incoming_varargs(cumulative_args_t pcum_v,const function_arg_info & arg,int * pretend_size,int second_time ATTRIBUTE_UNUSED)28676 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28677 			    const function_arg_info &arg,
28678 			    int *pretend_size,
28679 			    int second_time ATTRIBUTE_UNUSED)
28680 {
28681   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28682   int nregs;
28683 
28684   cfun->machine->uses_anonymous_args = 1;
28685   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28686     {
28687       nregs = pcum->aapcs_ncrn;
28688       if (nregs & 1)
28689 	{
28690 	  int res = arm_needs_doubleword_align (arg.mode, arg.type);
28691 	  if (res < 0 && warn_psabi)
28692 	    inform (input_location, "parameter passing for argument of "
28693 		    "type %qT changed in GCC 7.1", arg.type);
28694 	  else if (res > 0)
28695 	    {
28696 	      nregs++;
28697 	      if (res > 1 && warn_psabi)
28698 		inform (input_location,
28699 			"parameter passing for argument of type "
28700 			"%qT changed in GCC 9.1", arg.type);
28701 	    }
28702 	}
28703     }
28704   else
28705     nregs = pcum->nregs;
28706 
28707   if (nregs < NUM_ARG_REGS)
28708     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28709 }
28710 
28711 /* We can't rely on the caller doing the proper promotion when
28712    using APCS or ATPCS.  */
28713 
28714 static bool
arm_promote_prototypes(const_tree t ATTRIBUTE_UNUSED)28715 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28716 {
28717     return !TARGET_AAPCS_BASED;
28718 }
28719 
28720 static machine_mode
arm_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)28721 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28722                            machine_mode mode,
28723                            int *punsignedp ATTRIBUTE_UNUSED,
28724                            const_tree fntype ATTRIBUTE_UNUSED,
28725                            int for_return ATTRIBUTE_UNUSED)
28726 {
28727   if (GET_MODE_CLASS (mode) == MODE_INT
28728       && GET_MODE_SIZE (mode) < 4)
28729     return SImode;
28730 
28731   return mode;
28732 }
28733 
28734 
28735 static bool
arm_default_short_enums(void)28736 arm_default_short_enums (void)
28737 {
28738   return ARM_DEFAULT_SHORT_ENUMS;
28739 }
28740 
28741 
28742 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
28743 
28744 static bool
arm_align_anon_bitfield(void)28745 arm_align_anon_bitfield (void)
28746 {
28747   return TARGET_AAPCS_BASED;
28748 }
28749 
28750 
28751 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
28752 
28753 static tree
arm_cxx_guard_type(void)28754 arm_cxx_guard_type (void)
28755 {
28756   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28757 }
28758 
28759 
28760 /* The EABI says test the least significant bit of a guard variable.  */
28761 
28762 static bool
arm_cxx_guard_mask_bit(void)28763 arm_cxx_guard_mask_bit (void)
28764 {
28765   return TARGET_AAPCS_BASED;
28766 }
28767 
28768 
28769 /* The EABI specifies that all array cookies are 8 bytes long.  */
28770 
28771 static tree
arm_get_cookie_size(tree type)28772 arm_get_cookie_size (tree type)
28773 {
28774   tree size;
28775 
28776   if (!TARGET_AAPCS_BASED)
28777     return default_cxx_get_cookie_size (type);
28778 
28779   size = build_int_cst (sizetype, 8);
28780   return size;
28781 }
28782 
28783 
28784 /* The EABI says that array cookies should also contain the element size.  */
28785 
28786 static bool
arm_cookie_has_size(void)28787 arm_cookie_has_size (void)
28788 {
28789   return TARGET_AAPCS_BASED;
28790 }
28791 
28792 
28793 /* The EABI says constructors and destructors should return a pointer to
28794    the object constructed/destroyed.  */
28795 
28796 static bool
arm_cxx_cdtor_returns_this(void)28797 arm_cxx_cdtor_returns_this (void)
28798 {
28799   return TARGET_AAPCS_BASED;
28800 }
28801 
28802 /* The EABI says that an inline function may never be the key
28803    method.  */
28804 
28805 static bool
arm_cxx_key_method_may_be_inline(void)28806 arm_cxx_key_method_may_be_inline (void)
28807 {
28808   return !TARGET_AAPCS_BASED;
28809 }
28810 
28811 static void
arm_cxx_determine_class_data_visibility(tree decl)28812 arm_cxx_determine_class_data_visibility (tree decl)
28813 {
28814   if (!TARGET_AAPCS_BASED
28815       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28816     return;
28817 
28818   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28819      is exported.  However, on systems without dynamic vague linkage,
28820      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
28821   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28822     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28823   else
28824     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28825   DECL_VISIBILITY_SPECIFIED (decl) = 1;
28826 }
28827 
28828 static bool
arm_cxx_class_data_always_comdat(void)28829 arm_cxx_class_data_always_comdat (void)
28830 {
28831   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28832      vague linkage if the class has no key function.  */
28833   return !TARGET_AAPCS_BASED;
28834 }
28835 
28836 
28837 /* The EABI says __aeabi_atexit should be used to register static
28838    destructors.  */
28839 
28840 static bool
arm_cxx_use_aeabi_atexit(void)28841 arm_cxx_use_aeabi_atexit (void)
28842 {
28843   return TARGET_AAPCS_BASED;
28844 }
28845 
28846 
28847 void
arm_set_return_address(rtx source,rtx scratch)28848 arm_set_return_address (rtx source, rtx scratch)
28849 {
28850   arm_stack_offsets *offsets;
28851   HOST_WIDE_INT delta;
28852   rtx addr, mem;
28853   unsigned long saved_regs;
28854 
28855   offsets = arm_get_frame_offsets ();
28856   saved_regs = offsets->saved_regs_mask;
28857 
28858   if ((saved_regs & (1 << LR_REGNUM)) == 0)
28859     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28860   else
28861     {
28862       if (frame_pointer_needed)
28863 	addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28864       else
28865 	{
28866 	  /* LR will be the first saved register.  */
28867 	  delta = offsets->outgoing_args - (offsets->frame + 4);
28868 
28869 
28870 	  if (delta >= 4096)
28871 	    {
28872 	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28873 				     GEN_INT (delta & ~4095)));
28874 	      addr = scratch;
28875 	      delta &= 4095;
28876 	    }
28877 	  else
28878 	    addr = stack_pointer_rtx;
28879 
28880 	  addr = plus_constant (Pmode, addr, delta);
28881 	}
28882 
28883       /* The store needs to be marked to prevent DSE from deleting
28884 	 it as dead if it is based on fp.  */
28885       mem = gen_frame_mem (Pmode, addr);
28886       MEM_VOLATILE_P (mem) = true;
28887       emit_move_insn (mem, source);
28888     }
28889 }
28890 
28891 
28892 void
thumb_set_return_address(rtx source,rtx scratch)28893 thumb_set_return_address (rtx source, rtx scratch)
28894 {
28895   arm_stack_offsets *offsets;
28896   HOST_WIDE_INT delta;
28897   HOST_WIDE_INT limit;
28898   int reg;
28899   rtx addr, mem;
28900   unsigned long mask;
28901 
28902   emit_use (source);
28903 
28904   offsets = arm_get_frame_offsets ();
28905   mask = offsets->saved_regs_mask;
28906   if (mask & (1 << LR_REGNUM))
28907     {
28908       limit = 1024;
28909       /* Find the saved regs.  */
28910       if (frame_pointer_needed)
28911 	{
28912 	  delta = offsets->soft_frame - offsets->saved_args;
28913 	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28914 	  if (TARGET_THUMB1)
28915 	    limit = 128;
28916 	}
28917       else
28918 	{
28919 	  delta = offsets->outgoing_args - offsets->saved_args;
28920 	  reg = SP_REGNUM;
28921 	}
28922       /* Allow for the stack frame.  */
28923       if (TARGET_THUMB1 && TARGET_BACKTRACE)
28924 	delta -= 16;
28925       /* The link register is always the first saved register.  */
28926       delta -= 4;
28927 
28928       /* Construct the address.  */
28929       addr = gen_rtx_REG (SImode, reg);
28930       if (delta > limit)
28931 	{
28932 	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28933 	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28934 	  addr = scratch;
28935 	}
28936       else
28937 	addr = plus_constant (Pmode, addr, delta);
28938 
28939       /* The store needs to be marked to prevent DSE from deleting
28940 	 it as dead if it is based on fp.  */
28941       mem = gen_frame_mem (Pmode, addr);
28942       MEM_VOLATILE_P (mem) = true;
28943       emit_move_insn (mem, source);
28944     }
28945   else
28946     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28947 }
28948 
28949 /* Implements target hook vector_mode_supported_p.  */
28950 bool
arm_vector_mode_supported_p(machine_mode mode)28951 arm_vector_mode_supported_p (machine_mode mode)
28952 {
28953   /* Neon also supports V2SImode, etc. listed in the clause below.  */
28954   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28955       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
28956       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
28957       || mode == V8BFmode))
28958     return true;
28959 
28960   if ((TARGET_NEON || TARGET_IWMMXT)
28961       && ((mode == V2SImode)
28962 	  || (mode == V4HImode)
28963 	  || (mode == V8QImode)))
28964     return true;
28965 
28966   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28967       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28968       || mode == V2HAmode))
28969     return true;
28970 
28971   if (TARGET_HAVE_MVE
28972       && (mode == V2DImode || mode == V4SImode || mode == V8HImode
28973 	  || mode == V16QImode))
28974       return true;
28975 
28976   if (TARGET_HAVE_MVE_FLOAT
28977       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
28978       return true;
28979 
28980   return false;
28981 }
28982 
28983 /* Implements target hook array_mode_supported_p.  */
28984 
28985 static bool
arm_array_mode_supported_p(machine_mode mode,unsigned HOST_WIDE_INT nelems)28986 arm_array_mode_supported_p (machine_mode mode,
28987 			    unsigned HOST_WIDE_INT nelems)
28988 {
28989   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
28990      for now, as the lane-swapping logic needs to be extended in the expanders.
28991      See PR target/82518.  */
28992   if (TARGET_NEON && !BYTES_BIG_ENDIAN
28993       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28994       && (nelems >= 2 && nelems <= 4))
28995     return true;
28996 
28997   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
28998       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
28999     return true;
29000 
29001   return false;
29002 }
29003 
29004 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29005    registers when autovectorizing for Neon, at least until multiple vector
29006    widths are supported properly by the middle-end.  */
29007 
29008 static machine_mode
arm_preferred_simd_mode(scalar_mode mode)29009 arm_preferred_simd_mode (scalar_mode mode)
29010 {
29011   if (TARGET_NEON)
29012     switch (mode)
29013       {
29014       case E_SFmode:
29015 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29016       case E_SImode:
29017 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29018       case E_HImode:
29019 	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29020       case E_QImode:
29021 	return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29022       case E_DImode:
29023 	if (!TARGET_NEON_VECTORIZE_DOUBLE)
29024 	  return V2DImode;
29025 	break;
29026 
29027       default:;
29028       }
29029 
29030   if (TARGET_REALLY_IWMMXT)
29031     switch (mode)
29032       {
29033       case E_SImode:
29034 	return V2SImode;
29035       case E_HImode:
29036 	return V4HImode;
29037       case E_QImode:
29038 	return V8QImode;
29039 
29040       default:;
29041       }
29042 
29043   return word_mode;
29044 }
29045 
29046 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29047 
29048    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29049    using r0-r4 for function arguments, r7 for the stack frame and don't have
29050    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29051    potentially problematic instructions accept high registers so this is not
29052    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29053    that require many low registers.  */
29054 static bool
arm_class_likely_spilled_p(reg_class_t rclass)29055 arm_class_likely_spilled_p (reg_class_t rclass)
29056 {
29057   if ((TARGET_THUMB1 && rclass == LO_REGS)
29058       || rclass  == CC_REG)
29059     return true;
29060 
29061   return false;
29062 }
29063 
29064 /* Implements target hook small_register_classes_for_mode_p.  */
29065 bool
arm_small_register_classes_for_mode_p(machine_mode mode ATTRIBUTE_UNUSED)29066 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29067 {
29068   return TARGET_THUMB1;
29069 }
29070 
29071 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29072    ARM insns and therefore guarantee that the shift count is modulo 256.
29073    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29074    guarantee no particular behavior for out-of-range counts.  */
29075 
29076 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask(machine_mode mode)29077 arm_shift_truncation_mask (machine_mode mode)
29078 {
29079   return mode == SImode ? 255 : 0;
29080 }
29081 
29082 
29083 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29084 
29085 unsigned int
arm_dbx_register_number(unsigned int regno)29086 arm_dbx_register_number (unsigned int regno)
29087 {
29088   if (regno < 16)
29089     return regno;
29090 
29091   if (IS_VFP_REGNUM (regno))
29092     {
29093       /* See comment in arm_dwarf_register_span.  */
29094       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29095 	return 64 + regno - FIRST_VFP_REGNUM;
29096       else
29097 	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29098     }
29099 
29100   if (IS_IWMMXT_GR_REGNUM (regno))
29101     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29102 
29103   if (IS_IWMMXT_REGNUM (regno))
29104     return 112 + regno - FIRST_IWMMXT_REGNUM;
29105 
29106   return DWARF_FRAME_REGISTERS;
29107 }
29108 
29109 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29110    GCC models tham as 64 32-bit registers, so we need to describe this to
29111    the DWARF generation code.  Other registers can use the default.  */
29112 static rtx
arm_dwarf_register_span(rtx rtl)29113 arm_dwarf_register_span (rtx rtl)
29114 {
29115   machine_mode mode;
29116   unsigned regno;
29117   rtx parts[16];
29118   int nregs;
29119   int i;
29120 
29121   regno = REGNO (rtl);
29122   if (!IS_VFP_REGNUM (regno))
29123     return NULL_RTX;
29124 
29125   /* XXX FIXME: The EABI defines two VFP register ranges:
29126 	64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29127 	256-287: D0-D31
29128      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29129      corresponding D register.  Until GDB supports this, we shall use the
29130      legacy encodings.  We also use these encodings for D0-D15 for
29131      compatibility with older debuggers.  */
29132   mode = GET_MODE (rtl);
29133   if (GET_MODE_SIZE (mode) < 8)
29134     return NULL_RTX;
29135 
29136   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29137     {
29138       nregs = GET_MODE_SIZE (mode) / 4;
29139       for (i = 0; i < nregs; i += 2)
29140 	if (TARGET_BIG_END)
29141 	  {
29142 	    parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29143 	    parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29144 	  }
29145 	else
29146 	  {
29147 	    parts[i] = gen_rtx_REG (SImode, regno + i);
29148 	    parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29149 	  }
29150     }
29151   else
29152     {
29153       nregs = GET_MODE_SIZE (mode) / 8;
29154       for (i = 0; i < nregs; i++)
29155 	parts[i] = gen_rtx_REG (DImode, regno + i);
29156     }
29157 
29158   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29159 }
29160 
29161 #if ARM_UNWIND_INFO
29162 /* Emit unwind directives for a store-multiple instruction or stack pointer
29163    push during alignment.
29164    These should only ever be generated by the function prologue code, so
29165    expect them to have a particular form.
29166    The store-multiple instruction sometimes pushes pc as the last register,
29167    although it should not be tracked into unwind information, or for -Os
29168    sometimes pushes some dummy registers before first register that needs
29169    to be tracked in unwind information; such dummy registers are there just
29170    to avoid separate stack adjustment, and will not be restored in the
29171    epilogue.  */
29172 
29173 static void
arm_unwind_emit_sequence(FILE * asm_out_file,rtx p)29174 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29175 {
29176   int i;
29177   HOST_WIDE_INT offset;
29178   HOST_WIDE_INT nregs;
29179   int reg_size;
29180   unsigned reg;
29181   unsigned lastreg;
29182   unsigned padfirst = 0, padlast = 0;
29183   rtx e;
29184 
29185   e = XVECEXP (p, 0, 0);
29186   gcc_assert (GET_CODE (e) == SET);
29187 
29188   /* First insn will adjust the stack pointer.  */
29189   gcc_assert (GET_CODE (e) == SET
29190 	      && REG_P (SET_DEST (e))
29191 	      && REGNO (SET_DEST (e)) == SP_REGNUM
29192 	      && GET_CODE (SET_SRC (e)) == PLUS);
29193 
29194   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29195   nregs = XVECLEN (p, 0) - 1;
29196   gcc_assert (nregs);
29197 
29198   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29199   if (reg < 16)
29200     {
29201       /* For -Os dummy registers can be pushed at the beginning to
29202 	 avoid separate stack pointer adjustment.  */
29203       e = XVECEXP (p, 0, 1);
29204       e = XEXP (SET_DEST (e), 0);
29205       if (GET_CODE (e) == PLUS)
29206 	padfirst = INTVAL (XEXP (e, 1));
29207       gcc_assert (padfirst == 0 || optimize_size);
29208       /* The function prologue may also push pc, but not annotate it as it is
29209 	 never restored.  We turn this into a stack pointer adjustment.  */
29210       e = XVECEXP (p, 0, nregs);
29211       e = XEXP (SET_DEST (e), 0);
29212       if (GET_CODE (e) == PLUS)
29213 	padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29214       else
29215 	padlast = offset - 4;
29216       gcc_assert (padlast == 0 || padlast == 4);
29217       if (padlast == 4)
29218 	fprintf (asm_out_file, "\t.pad #4\n");
29219       reg_size = 4;
29220       fprintf (asm_out_file, "\t.save {");
29221     }
29222   else if (IS_VFP_REGNUM (reg))
29223     {
29224       reg_size = 8;
29225       fprintf (asm_out_file, "\t.vsave {");
29226     }
29227   else
29228     /* Unknown register type.  */
29229     gcc_unreachable ();
29230 
29231   /* If the stack increment doesn't match the size of the saved registers,
29232      something has gone horribly wrong.  */
29233   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29234 
29235   offset = padfirst;
29236   lastreg = 0;
29237   /* The remaining insns will describe the stores.  */
29238   for (i = 1; i <= nregs; i++)
29239     {
29240       /* Expect (set (mem <addr>) (reg)).
29241          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29242       e = XVECEXP (p, 0, i);
29243       gcc_assert (GET_CODE (e) == SET
29244 		  && MEM_P (SET_DEST (e))
29245 		  && REG_P (SET_SRC (e)));
29246 
29247       reg = REGNO (SET_SRC (e));
29248       gcc_assert (reg >= lastreg);
29249 
29250       if (i != 1)
29251 	fprintf (asm_out_file, ", ");
29252       /* We can't use %r for vfp because we need to use the
29253 	 double precision register names.  */
29254       if (IS_VFP_REGNUM (reg))
29255 	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29256       else
29257 	asm_fprintf (asm_out_file, "%r", reg);
29258 
29259       if (flag_checking)
29260 	{
29261 	  /* Check that the addresses are consecutive.  */
29262 	  e = XEXP (SET_DEST (e), 0);
29263 	  if (GET_CODE (e) == PLUS)
29264 	    gcc_assert (REG_P (XEXP (e, 0))
29265 			&& REGNO (XEXP (e, 0)) == SP_REGNUM
29266 			&& CONST_INT_P (XEXP (e, 1))
29267 			&& offset == INTVAL (XEXP (e, 1)));
29268 	  else
29269 	    gcc_assert (i == 1
29270 			&& REG_P (e)
29271 			&& REGNO (e) == SP_REGNUM);
29272 	  offset += reg_size;
29273 	}
29274     }
29275   fprintf (asm_out_file, "}\n");
29276   if (padfirst)
29277     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29278 }
29279 
29280 /*  Emit unwind directives for a SET.  */
29281 
29282 static void
arm_unwind_emit_set(FILE * asm_out_file,rtx p)29283 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29284 {
29285   rtx e0;
29286   rtx e1;
29287   unsigned reg;
29288 
29289   e0 = XEXP (p, 0);
29290   e1 = XEXP (p, 1);
29291   switch (GET_CODE (e0))
29292     {
29293     case MEM:
29294       /* Pushing a single register.  */
29295       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29296 	  || !REG_P (XEXP (XEXP (e0, 0), 0))
29297 	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29298 	abort ();
29299 
29300       asm_fprintf (asm_out_file, "\t.save ");
29301       if (IS_VFP_REGNUM (REGNO (e1)))
29302 	asm_fprintf(asm_out_file, "{d%d}\n",
29303 		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29304       else
29305 	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29306       break;
29307 
29308     case REG:
29309       if (REGNO (e0) == SP_REGNUM)
29310 	{
29311 	  /* A stack increment.  */
29312 	  if (GET_CODE (e1) != PLUS
29313 	      || !REG_P (XEXP (e1, 0))
29314 	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
29315 	      || !CONST_INT_P (XEXP (e1, 1)))
29316 	    abort ();
29317 
29318 	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29319 		       -INTVAL (XEXP (e1, 1)));
29320 	}
29321       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29322 	{
29323 	  HOST_WIDE_INT offset;
29324 
29325 	  if (GET_CODE (e1) == PLUS)
29326 	    {
29327 	      if (!REG_P (XEXP (e1, 0))
29328 		  || !CONST_INT_P (XEXP (e1, 1)))
29329 		abort ();
29330 	      reg = REGNO (XEXP (e1, 0));
29331 	      offset = INTVAL (XEXP (e1, 1));
29332 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29333 			   HARD_FRAME_POINTER_REGNUM, reg,
29334 			   offset);
29335 	    }
29336 	  else if (REG_P (e1))
29337 	    {
29338 	      reg = REGNO (e1);
29339 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29340 			   HARD_FRAME_POINTER_REGNUM, reg);
29341 	    }
29342 	  else
29343 	    abort ();
29344 	}
29345       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29346 	{
29347 	  /* Move from sp to reg.  */
29348 	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29349 	}
29350      else if (GET_CODE (e1) == PLUS
29351 	      && REG_P (XEXP (e1, 0))
29352 	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
29353 	      && CONST_INT_P (XEXP (e1, 1)))
29354 	{
29355 	  /* Set reg to offset from sp.  */
29356 	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29357 		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29358 	}
29359       else
29360 	abort ();
29361       break;
29362 
29363     default:
29364       abort ();
29365     }
29366 }
29367 
29368 
29369 /* Emit unwind directives for the given insn.  */
29370 
29371 static void
arm_unwind_emit(FILE * asm_out_file,rtx_insn * insn)29372 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29373 {
29374   rtx note, pat;
29375   bool handled_one = false;
29376 
29377   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29378     return;
29379 
29380   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29381       && (TREE_NOTHROW (current_function_decl)
29382 	  || crtl->all_throwers_are_sibcalls))
29383     return;
29384 
29385   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29386     return;
29387 
29388   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29389     {
29390       switch (REG_NOTE_KIND (note))
29391 	{
29392 	case REG_FRAME_RELATED_EXPR:
29393 	  pat = XEXP (note, 0);
29394 	  goto found;
29395 
29396 	case REG_CFA_REGISTER:
29397 	  pat = XEXP (note, 0);
29398 	  if (pat == NULL)
29399 	    {
29400 	      pat = PATTERN (insn);
29401 	      if (GET_CODE (pat) == PARALLEL)
29402 		pat = XVECEXP (pat, 0, 0);
29403 	    }
29404 
29405 	  /* Only emitted for IS_STACKALIGN re-alignment.  */
29406 	  {
29407 	    rtx dest, src;
29408 	    unsigned reg;
29409 
29410 	    src = SET_SRC (pat);
29411 	    dest = SET_DEST (pat);
29412 
29413 	    gcc_assert (src == stack_pointer_rtx);
29414 	    reg = REGNO (dest);
29415 	    asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29416 			 reg + 0x90, reg);
29417 	  }
29418 	  handled_one = true;
29419 	  break;
29420 
29421 	/* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
29422 	   to get correct dwarf information for shrink-wrap.  We should not
29423 	   emit unwind information for it because these are used either for
29424 	   pretend arguments or notes to adjust sp and restore registers from
29425 	   stack.  */
29426 	case REG_CFA_DEF_CFA:
29427 	case REG_CFA_ADJUST_CFA:
29428 	case REG_CFA_RESTORE:
29429 	  return;
29430 
29431 	case REG_CFA_EXPRESSION:
29432 	case REG_CFA_OFFSET:
29433 	  /* ??? Only handling here what we actually emit.  */
29434 	  gcc_unreachable ();
29435 
29436 	default:
29437 	  break;
29438 	}
29439     }
29440   if (handled_one)
29441     return;
29442   pat = PATTERN (insn);
29443  found:
29444 
29445   switch (GET_CODE (pat))
29446     {
29447     case SET:
29448       arm_unwind_emit_set (asm_out_file, pat);
29449       break;
29450 
29451     case SEQUENCE:
29452       /* Store multiple.  */
29453       arm_unwind_emit_sequence (asm_out_file, pat);
29454       break;
29455 
29456     default:
29457       abort();
29458     }
29459 }
29460 
29461 
29462 /* Output a reference from a function exception table to the type_info
29463    object X.  The EABI specifies that the symbol should be relocated by
29464    an R_ARM_TARGET2 relocation.  */
29465 
29466 static bool
arm_output_ttype(rtx x)29467 arm_output_ttype (rtx x)
29468 {
29469   fputs ("\t.word\t", asm_out_file);
29470   output_addr_const (asm_out_file, x);
29471   /* Use special relocations for symbol references.  */
29472   if (!CONST_INT_P (x))
29473     fputs ("(TARGET2)", asm_out_file);
29474   fputc ('\n', asm_out_file);
29475 
29476   return TRUE;
29477 }
29478 
29479 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
29480 
29481 static void
arm_asm_emit_except_personality(rtx personality)29482 arm_asm_emit_except_personality (rtx personality)
29483 {
29484   fputs ("\t.personality\t", asm_out_file);
29485   output_addr_const (asm_out_file, personality);
29486   fputc ('\n', asm_out_file);
29487 }
29488 #endif /* ARM_UNWIND_INFO */
29489 
29490 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
29491 
29492 static void
arm_asm_init_sections(void)29493 arm_asm_init_sections (void)
29494 {
29495 #if ARM_UNWIND_INFO
29496   exception_section = get_unnamed_section (0, output_section_asm_op,
29497 					   "\t.handlerdata");
29498 #endif /* ARM_UNWIND_INFO */
29499 
29500 #ifdef OBJECT_FORMAT_ELF
29501   if (target_pure_code)
29502     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
29503 #endif
29504 }
29505 
29506 /* Output unwind directives for the start/end of a function.  */
29507 
29508 void
arm_output_fn_unwind(FILE * f,bool prologue)29509 arm_output_fn_unwind (FILE * f, bool prologue)
29510 {
29511   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29512     return;
29513 
29514   if (prologue)
29515     fputs ("\t.fnstart\n", f);
29516   else
29517     {
29518       /* If this function will never be unwound, then mark it as such.
29519          The came condition is used in arm_unwind_emit to suppress
29520 	 the frame annotations.  */
29521       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29522 	  && (TREE_NOTHROW (current_function_decl)
29523 	      || crtl->all_throwers_are_sibcalls))
29524 	fputs("\t.cantunwind\n", f);
29525 
29526       fputs ("\t.fnend\n", f);
29527     }
29528 }
29529 
29530 static bool
arm_emit_tls_decoration(FILE * fp,rtx x)29531 arm_emit_tls_decoration (FILE *fp, rtx x)
29532 {
29533   enum tls_reloc reloc;
29534   rtx val;
29535 
29536   val = XVECEXP (x, 0, 0);
29537   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29538 
29539   output_addr_const (fp, val);
29540 
29541   switch (reloc)
29542     {
29543     case TLS_GD32:
29544       fputs ("(tlsgd)", fp);
29545       break;
29546     case TLS_GD32_FDPIC:
29547       fputs ("(tlsgd_fdpic)", fp);
29548       break;
29549     case TLS_LDM32:
29550       fputs ("(tlsldm)", fp);
29551       break;
29552     case TLS_LDM32_FDPIC:
29553       fputs ("(tlsldm_fdpic)", fp);
29554       break;
29555     case TLS_LDO32:
29556       fputs ("(tlsldo)", fp);
29557       break;
29558     case TLS_IE32:
29559       fputs ("(gottpoff)", fp);
29560       break;
29561     case TLS_IE32_FDPIC:
29562       fputs ("(gottpoff_fdpic)", fp);
29563       break;
29564     case TLS_LE32:
29565       fputs ("(tpoff)", fp);
29566       break;
29567     case TLS_DESCSEQ:
29568       fputs ("(tlsdesc)", fp);
29569       break;
29570     default:
29571       gcc_unreachable ();
29572     }
29573 
29574   switch (reloc)
29575     {
29576     case TLS_GD32:
29577     case TLS_LDM32:
29578     case TLS_IE32:
29579     case TLS_DESCSEQ:
29580       fputs (" + (. - ", fp);
29581       output_addr_const (fp, XVECEXP (x, 0, 2));
29582       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29583       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29584       output_addr_const (fp, XVECEXP (x, 0, 3));
29585       fputc (')', fp);
29586       break;
29587     default:
29588       break;
29589     }
29590 
29591   return TRUE;
29592 }
29593 
29594 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
29595 
29596 static void
arm_output_dwarf_dtprel(FILE * file,int size,rtx x)29597 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29598 {
29599   gcc_assert (size == 4);
29600   fputs ("\t.word\t", file);
29601   output_addr_const (file, x);
29602   fputs ("(tlsldo)", file);
29603 }
29604 
29605 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
29606 
29607 static bool
arm_output_addr_const_extra(FILE * fp,rtx x)29608 arm_output_addr_const_extra (FILE *fp, rtx x)
29609 {
29610   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29611     return arm_emit_tls_decoration (fp, x);
29612   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29613     {
29614       char label[256];
29615       int labelno = INTVAL (XVECEXP (x, 0, 0));
29616 
29617       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29618       assemble_name_raw (fp, label);
29619 
29620       return TRUE;
29621     }
29622   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29623     {
29624       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29625       if (GOT_PCREL)
29626 	fputs ("+.", fp);
29627       fputs ("-(", fp);
29628       output_addr_const (fp, XVECEXP (x, 0, 0));
29629       fputc (')', fp);
29630       return TRUE;
29631     }
29632   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29633     {
29634       output_addr_const (fp, XVECEXP (x, 0, 0));
29635       if (GOT_PCREL)
29636         fputs ("+.", fp);
29637       fputs ("-(", fp);
29638       output_addr_const (fp, XVECEXP (x, 0, 1));
29639       fputc (')', fp);
29640       return TRUE;
29641     }
29642   else if (GET_CODE (x) == CONST_VECTOR)
29643     return arm_emit_vector_const (fp, x);
29644 
29645   return FALSE;
29646 }
29647 
29648 /* Output assembly for a shift instruction.
29649    SET_FLAGS determines how the instruction modifies the condition codes.
29650    0 - Do not set condition codes.
29651    1 - Set condition codes.
29652    2 - Use smallest instruction.  */
29653 const char *
arm_output_shift(rtx * operands,int set_flags)29654 arm_output_shift(rtx * operands, int set_flags)
29655 {
29656   char pattern[100];
29657   static const char flag_chars[3] = {'?', '.', '!'};
29658   const char *shift;
29659   HOST_WIDE_INT val;
29660   char c;
29661 
29662   c = flag_chars[set_flags];
29663   shift = shift_op(operands[3], &val);
29664   if (shift)
29665     {
29666       if (val != -1)
29667 	operands[2] = GEN_INT(val);
29668       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29669     }
29670   else
29671     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29672 
29673   output_asm_insn (pattern, operands);
29674   return "";
29675 }
29676 
29677 /* Output assembly for a WMMX immediate shift instruction.  */
29678 const char *
arm_output_iwmmxt_shift_immediate(const char * insn_name,rtx * operands,bool wror_or_wsra)29679 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29680 {
29681   int shift = INTVAL (operands[2]);
29682   char templ[50];
29683   machine_mode opmode = GET_MODE (operands[0]);
29684 
29685   gcc_assert (shift >= 0);
29686 
29687   /* If the shift value in the register versions is > 63 (for D qualifier),
29688      31 (for W qualifier) or 15 (for H qualifier).  */
29689   if (((opmode == V4HImode) && (shift > 15))
29690 	|| ((opmode == V2SImode) && (shift > 31))
29691 	|| ((opmode == DImode) && (shift > 63)))
29692   {
29693     if (wror_or_wsra)
29694       {
29695         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29696         output_asm_insn (templ, operands);
29697         if (opmode == DImode)
29698           {
29699 	    sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29700 	    output_asm_insn (templ, operands);
29701           }
29702       }
29703     else
29704       {
29705         /* The destination register will contain all zeros.  */
29706         sprintf (templ, "wzero\t%%0");
29707         output_asm_insn (templ, operands);
29708       }
29709     return "";
29710   }
29711 
29712   if ((opmode == DImode) && (shift > 32))
29713     {
29714       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29715       output_asm_insn (templ, operands);
29716       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29717       output_asm_insn (templ, operands);
29718     }
29719   else
29720     {
29721       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29722       output_asm_insn (templ, operands);
29723     }
29724   return "";
29725 }
29726 
29727 /* Output assembly for a WMMX tinsr instruction.  */
29728 const char *
arm_output_iwmmxt_tinsr(rtx * operands)29729 arm_output_iwmmxt_tinsr (rtx *operands)
29730 {
29731   int mask = INTVAL (operands[3]);
29732   int i;
29733   char templ[50];
29734   int units = mode_nunits[GET_MODE (operands[0])];
29735   gcc_assert ((mask & (mask - 1)) == 0);
29736   for (i = 0; i < units; ++i)
29737     {
29738       if ((mask & 0x01) == 1)
29739         {
29740           break;
29741         }
29742       mask >>= 1;
29743     }
29744   gcc_assert (i < units);
29745   {
29746     switch (GET_MODE (operands[0]))
29747       {
29748       case E_V8QImode:
29749 	sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29750 	break;
29751       case E_V4HImode:
29752 	sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29753 	break;
29754       case E_V2SImode:
29755 	sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29756 	break;
29757       default:
29758 	gcc_unreachable ();
29759 	break;
29760       }
29761     output_asm_insn (templ, operands);
29762   }
29763   return "";
29764 }
29765 
29766 /* Output a Thumb-1 casesi dispatch sequence.  */
29767 const char *
thumb1_output_casesi(rtx * operands)29768 thumb1_output_casesi (rtx *operands)
29769 {
29770   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29771 
29772   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29773 
29774   switch (GET_MODE(diff_vec))
29775     {
29776     case E_QImode:
29777       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29778 	      "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29779     case E_HImode:
29780       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29781 	      "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29782     case E_SImode:
29783       return "bl\t%___gnu_thumb1_case_si";
29784     default:
29785       gcc_unreachable ();
29786     }
29787 }
29788 
29789 /* Output a Thumb-2 casesi instruction.  */
29790 const char *
thumb2_output_casesi(rtx * operands)29791 thumb2_output_casesi (rtx *operands)
29792 {
29793   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29794 
29795   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29796 
29797   output_asm_insn ("cmp\t%0, %1", operands);
29798   output_asm_insn ("bhi\t%l3", operands);
29799   switch (GET_MODE(diff_vec))
29800     {
29801     case E_QImode:
29802       return "tbb\t[%|pc, %0]";
29803     case E_HImode:
29804       return "tbh\t[%|pc, %0, lsl #1]";
29805     case E_SImode:
29806       if (flag_pic)
29807 	{
29808 	  output_asm_insn ("adr\t%4, %l2", operands);
29809 	  output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29810 	  output_asm_insn ("add\t%4, %4, %5", operands);
29811 	  return "bx\t%4";
29812 	}
29813       else
29814 	{
29815 	  output_asm_insn ("adr\t%4, %l2", operands);
29816 	  return "ldr\t%|pc, [%4, %0, lsl #2]";
29817 	}
29818     default:
29819       gcc_unreachable ();
29820     }
29821 }
29822 
29823 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
29824    per-core tuning structs.  */
29825 static int
arm_issue_rate(void)29826 arm_issue_rate (void)
29827 {
29828   return current_tune->issue_rate;
29829 }
29830 
29831 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
29832 static int
arm_sched_variable_issue(FILE *,int,rtx_insn * insn,int more)29833 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
29834 {
29835   if (DEBUG_INSN_P (insn))
29836     return more;
29837 
29838   rtx_code code = GET_CODE (PATTERN (insn));
29839   if (code == USE || code == CLOBBER)
29840     return more;
29841 
29842   if (get_attr_type (insn) == TYPE_NO_INSN)
29843     return more;
29844 
29845   return more - 1;
29846 }
29847 
29848 /* Return how many instructions should scheduler lookahead to choose the
29849    best one.  */
29850 static int
arm_first_cycle_multipass_dfa_lookahead(void)29851 arm_first_cycle_multipass_dfa_lookahead (void)
29852 {
29853   int issue_rate = arm_issue_rate ();
29854 
29855   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
29856 }
29857 
29858 /* Enable modeling of L2 auto-prefetcher.  */
29859 static int
arm_first_cycle_multipass_dfa_lookahead_guard(rtx_insn * insn,int ready_index)29860 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
29861 {
29862   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
29863 }
29864 
29865 const char *
arm_mangle_type(const_tree type)29866 arm_mangle_type (const_tree type)
29867 {
29868   /* The ARM ABI documents (10th October 2008) say that "__va_list"
29869      has to be managled as if it is in the "std" namespace.  */
29870   if (TARGET_AAPCS_BASED
29871       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29872     return "St9__va_list";
29873 
29874   /* Half-precision floating point types.  */
29875   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29876     {
29877       if (TYPE_MODE (type) == BFmode)
29878 	return "u6__bf16";
29879       else
29880 	return "Dh";
29881     }
29882 
29883   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
29884      builtin type.  */
29885   if (TYPE_NAME (type) != NULL)
29886     return arm_mangle_builtin_type (type);
29887 
29888   /* Use the default mangling.  */
29889   return NULL;
29890 }
29891 
29892 /* Order of allocation of core registers for Thumb: this allocation is
29893    written over the corresponding initial entries of the array
29894    initialized with REG_ALLOC_ORDER.  We allocate all low registers
29895    first.  Saving and restoring a low register is usually cheaper than
29896    using a call-clobbered high register.  */
29897 
29898 static const int thumb_core_reg_alloc_order[] =
29899 {
29900    3,  2,  1,  0,  4,  5,  6,  7,
29901   12, 14,  8,  9, 10, 11
29902 };
29903 
29904 /* Adjust register allocation order when compiling for Thumb.  */
29905 
29906 void
arm_order_regs_for_local_alloc(void)29907 arm_order_regs_for_local_alloc (void)
29908 {
29909   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29910   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29911   if (TARGET_THUMB)
29912     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29913             sizeof (thumb_core_reg_alloc_order));
29914 }
29915 
29916 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
29917 
29918 bool
arm_frame_pointer_required(void)29919 arm_frame_pointer_required (void)
29920 {
29921   if (SUBTARGET_FRAME_POINTER_REQUIRED)
29922     return true;
29923 
29924   /* If the function receives nonlocal gotos, it needs to save the frame
29925      pointer in the nonlocal_goto_save_area object.  */
29926   if (cfun->has_nonlocal_label)
29927     return true;
29928 
29929   /* The frame pointer is required for non-leaf APCS frames.  */
29930   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
29931     return true;
29932 
29933   /* If we are probing the stack in the prologue, we will have a faulting
29934      instruction prior to the stack adjustment and this requires a frame
29935      pointer if we want to catch the exception using the EABI unwinder.  */
29936   if (!IS_INTERRUPT (arm_current_func_type ())
29937       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
29938 	  || flag_stack_clash_protection)
29939       && arm_except_unwind_info (&global_options) == UI_TARGET
29940       && cfun->can_throw_non_call_exceptions)
29941     {
29942       HOST_WIDE_INT size = get_frame_size ();
29943 
29944       /* That's irrelevant if there is no stack adjustment.  */
29945       if (size <= 0)
29946 	return false;
29947 
29948       /* That's relevant only if there is a stack probe.  */
29949       if (crtl->is_leaf && !cfun->calls_alloca)
29950 	{
29951 	  /* We don't have the final size of the frame so adjust.  */
29952 	  size += 32 * UNITS_PER_WORD;
29953 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
29954 	    return true;
29955 	}
29956       else
29957 	return true;
29958     }
29959 
29960   return false;
29961 }
29962 
29963 /* Only thumb1 can't support conditional execution, so return true if
29964    the target is not thumb1.  */
29965 static bool
arm_have_conditional_execution(void)29966 arm_have_conditional_execution (void)
29967 {
29968   return !TARGET_THUMB1;
29969 }
29970 
29971 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
29972 static HOST_WIDE_INT
arm_vector_alignment(const_tree type)29973 arm_vector_alignment (const_tree type)
29974 {
29975   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29976 
29977   if (TARGET_AAPCS_BASED)
29978     align = MIN (align, 64);
29979 
29980   return align;
29981 }
29982 
29983 static unsigned int
arm_autovectorize_vector_modes(vector_modes * modes,bool)29984 arm_autovectorize_vector_modes (vector_modes *modes, bool)
29985 {
29986   if (!TARGET_NEON_VECTORIZE_DOUBLE)
29987     {
29988       modes->safe_push (V16QImode);
29989       modes->safe_push (V8QImode);
29990     }
29991   return 0;
29992 }
29993 
29994 static bool
arm_vector_alignment_reachable(const_tree type,bool is_packed)29995 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29996 {
29997   /* Vectors which aren't in packed structures will not be less aligned than
29998      the natural alignment of their element type, so this is safe.  */
29999   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30000     return !is_packed;
30001 
30002   return default_builtin_vector_alignment_reachable (type, is_packed);
30003 }
30004 
30005 static bool
arm_builtin_support_vector_misalignment(machine_mode mode,const_tree type,int misalignment,bool is_packed)30006 arm_builtin_support_vector_misalignment (machine_mode mode,
30007 					 const_tree type, int misalignment,
30008 					 bool is_packed)
30009 {
30010   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30011     {
30012       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30013 
30014       if (is_packed)
30015         return align == 1;
30016 
30017       /* If the misalignment is unknown, we should be able to handle the access
30018 	 so long as it is not to a member of a packed data structure.  */
30019       if (misalignment == -1)
30020         return true;
30021 
30022       /* Return true if the misalignment is a multiple of the natural alignment
30023          of the vector's element type.  This is probably always going to be
30024 	 true in practice, since we've already established that this isn't a
30025 	 packed access.  */
30026       return ((misalignment % align) == 0);
30027     }
30028 
30029   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30030 						      is_packed);
30031 }
30032 
30033 static void
arm_conditional_register_usage(void)30034 arm_conditional_register_usage (void)
30035 {
30036   int regno;
30037 
30038   if (TARGET_THUMB1 && optimize_size)
30039     {
30040       /* When optimizing for size on Thumb-1, it's better not
30041         to use the HI regs, because of the overhead of
30042         stacking them.  */
30043       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30044 	fixed_regs[regno] = call_used_regs[regno] = 1;
30045     }
30046 
30047   /* The link register can be clobbered by any branch insn,
30048      but we have no way to track that at present, so mark
30049      it as unavailable.  */
30050   if (TARGET_THUMB1)
30051     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30052 
30053   if (TARGET_32BIT && TARGET_VFP_BASE)
30054     {
30055       /* VFPv3 registers are disabled when earlier VFP
30056 	 versions are selected due to the definition of
30057 	 LAST_VFP_REGNUM.  */
30058       for (regno = FIRST_VFP_REGNUM;
30059 	   regno <= LAST_VFP_REGNUM; ++ regno)
30060 	{
30061 	  fixed_regs[regno] = 0;
30062 	  call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30063 	    || regno >= FIRST_VFP_REGNUM + 32;
30064 	}
30065       if (TARGET_HAVE_MVE)
30066 	fixed_regs[VPR_REGNUM] = 0;
30067     }
30068 
30069   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30070     {
30071       regno = FIRST_IWMMXT_GR_REGNUM;
30072       /* The 2002/10/09 revision of the XScale ABI has wCG0
30073          and wCG1 as call-preserved registers.  The 2002/11/21
30074          revision changed this so that all wCG registers are
30075          scratch registers.  */
30076       for (regno = FIRST_IWMMXT_GR_REGNUM;
30077 	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30078 	fixed_regs[regno] = 0;
30079       /* The XScale ABI has wR0 - wR9 as scratch registers,
30080 	 the rest as call-preserved registers.  */
30081       for (regno = FIRST_IWMMXT_REGNUM;
30082 	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
30083 	{
30084 	  fixed_regs[regno] = 0;
30085 	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30086 	}
30087     }
30088 
30089   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30090     {
30091       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30092       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30093     }
30094   else if (TARGET_APCS_STACK)
30095     {
30096       fixed_regs[10]     = 1;
30097       call_used_regs[10] = 1;
30098     }
30099   /* -mcaller-super-interworking reserves r11 for calls to
30100      _interwork_r11_call_via_rN().  Making the register global
30101      is an easy way of ensuring that it remains valid for all
30102      calls.  */
30103   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30104       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30105     {
30106       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30107       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30108       if (TARGET_CALLER_INTERWORKING)
30109 	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30110     }
30111 
30112   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30113   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30114   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30115 
30116   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30117 }
30118 
30119 static reg_class_t
arm_preferred_rename_class(reg_class_t rclass)30120 arm_preferred_rename_class (reg_class_t rclass)
30121 {
30122   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30123      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30124      and code size can be reduced.  */
30125   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30126     return LO_REGS;
30127   else
30128     return NO_REGS;
30129 }
30130 
30131 /* Compute the attribute "length" of insn "*push_multi".
30132    So this function MUST be kept in sync with that insn pattern.  */
30133 int
arm_attr_length_push_multi(rtx parallel_op,rtx first_op)30134 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30135 {
30136   int i, regno, hi_reg;
30137   int num_saves = XVECLEN (parallel_op, 0);
30138 
30139   /* ARM mode.  */
30140   if (TARGET_ARM)
30141     return 4;
30142   /* Thumb1 mode.  */
30143   if (TARGET_THUMB1)
30144     return 2;
30145 
30146   /* Thumb2 mode.  */
30147   regno = REGNO (first_op);
30148   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30149      list is 8-bit.  Normally this means all registers in the list must be
30150      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30151      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30152      with 16-bit encoding.  */
30153   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30154   for (i = 1; i < num_saves && !hi_reg; i++)
30155     {
30156       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30157       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30158     }
30159 
30160   if (!hi_reg)
30161     return 2;
30162   return 4;
30163 }
30164 
30165 /* Compute the attribute "length" of insn.  Currently, this function is used
30166    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30167    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30168    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30169    true if OPERANDS contains insn which explicit updates base register.  */
30170 
30171 int
arm_attr_length_pop_multi(rtx * operands,bool return_pc,bool write_back_p)30172 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30173 {
30174   /* ARM mode.  */
30175   if (TARGET_ARM)
30176     return 4;
30177   /* Thumb1 mode.  */
30178   if (TARGET_THUMB1)
30179     return 2;
30180 
30181   rtx parallel_op = operands[0];
30182   /* Initialize to elements number of PARALLEL.  */
30183   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30184   /* Initialize the value to base register.  */
30185   unsigned regno = REGNO (operands[1]);
30186   /* Skip return and write back pattern.
30187      We only need register pop pattern for later analysis.  */
30188   unsigned first_indx = 0;
30189   first_indx += return_pc ? 1 : 0;
30190   first_indx += write_back_p ? 1 : 0;
30191 
30192   /* A pop operation can be done through LDM or POP.  If the base register is SP
30193      and if it's with write back, then a LDM will be alias of POP.  */
30194   bool pop_p = (regno == SP_REGNUM && write_back_p);
30195   bool ldm_p = !pop_p;
30196 
30197   /* Check base register for LDM.  */
30198   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30199     return 4;
30200 
30201   /* Check each register in the list.  */
30202   for (; indx >= first_indx; indx--)
30203     {
30204       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30205       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30206 	 comment in arm_attr_length_push_multi.  */
30207       if (REGNO_REG_CLASS (regno) == HI_REGS
30208 	  && (regno != PC_REGNUM || ldm_p))
30209 	return 4;
30210     }
30211 
30212   return 2;
30213 }
30214 
30215 /* Compute the number of instructions emitted by output_move_double.  */
30216 int
arm_count_output_move_double_insns(rtx * operands)30217 arm_count_output_move_double_insns (rtx *operands)
30218 {
30219   int count;
30220   rtx ops[2];
30221   /* output_move_double may modify the operands array, so call it
30222      here on a copy of the array.  */
30223   ops[0] = operands[0];
30224   ops[1] = operands[1];
30225   output_move_double (ops, false, &count);
30226   return count;
30227 }
30228 
30229 /* Same as above, but operands are a register/memory pair in SImode.
30230    Assumes operands has the base register in position 0 and memory in position
30231    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
30232 int
arm_count_ldrdstrd_insns(rtx * operands,bool load)30233 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30234 {
30235   int count;
30236   rtx ops[2];
30237   int regnum, memnum;
30238   if (load)
30239     regnum = 0, memnum = 1;
30240   else
30241     regnum = 1, memnum = 0;
30242   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30243   ops[memnum] = adjust_address (operands[2], DImode, 0);
30244   output_move_double (ops, false, &count);
30245   return count;
30246 }
30247 
30248 
30249 int
vfp3_const_double_for_fract_bits(rtx operand)30250 vfp3_const_double_for_fract_bits (rtx operand)
30251 {
30252   REAL_VALUE_TYPE r0;
30253 
30254   if (!CONST_DOUBLE_P (operand))
30255     return 0;
30256 
30257   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30258   if (exact_real_inverse (DFmode, &r0)
30259       && !REAL_VALUE_NEGATIVE (r0))
30260     {
30261       if (exact_real_truncate (DFmode, &r0))
30262 	{
30263 	  HOST_WIDE_INT value = real_to_integer (&r0);
30264 	  value = value & 0xffffffff;
30265 	  if ((value != 0) && ( (value & (value - 1)) == 0))
30266 	    {
30267 	      int ret = exact_log2 (value);
30268 	      gcc_assert (IN_RANGE (ret, 0, 31));
30269 	      return ret;
30270 	    }
30271 	}
30272     }
30273   return 0;
30274 }
30275 
30276 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30277    log2 is in [1, 32], return that log2.  Otherwise return -1.
30278    This is used in the patterns for vcvt.s32.f32 floating-point to
30279    fixed-point conversions.  */
30280 
30281 int
vfp3_const_double_for_bits(rtx x)30282 vfp3_const_double_for_bits (rtx x)
30283 {
30284   const REAL_VALUE_TYPE *r;
30285 
30286   if (!CONST_DOUBLE_P (x))
30287     return -1;
30288 
30289   r = CONST_DOUBLE_REAL_VALUE (x);
30290 
30291   if (REAL_VALUE_NEGATIVE (*r)
30292       || REAL_VALUE_ISNAN (*r)
30293       || REAL_VALUE_ISINF (*r)
30294       || !real_isinteger (r, SFmode))
30295     return -1;
30296 
30297   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30298 
30299 /* The exact_log2 above will have returned -1 if this is
30300    not an exact log2.  */
30301   if (!IN_RANGE (hwint, 1, 32))
30302     return -1;
30303 
30304   return hwint;
30305 }
30306 
30307 
30308 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30309 
30310 static void
arm_pre_atomic_barrier(enum memmodel model)30311 arm_pre_atomic_barrier (enum memmodel model)
30312 {
30313   if (need_atomic_barrier_p (model, true))
30314     emit_insn (gen_memory_barrier ());
30315 }
30316 
30317 static void
arm_post_atomic_barrier(enum memmodel model)30318 arm_post_atomic_barrier (enum memmodel model)
30319 {
30320   if (need_atomic_barrier_p (model, false))
30321     emit_insn (gen_memory_barrier ());
30322 }
30323 
30324 /* Emit the load-exclusive and store-exclusive instructions.
30325    Use acquire and release versions if necessary.  */
30326 
30327 static void
arm_emit_load_exclusive(machine_mode mode,rtx rval,rtx mem,bool acq)30328 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30329 {
30330   rtx (*gen) (rtx, rtx);
30331 
30332   if (acq)
30333     {
30334       switch (mode)
30335         {
30336         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30337         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30338         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30339         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30340         default:
30341           gcc_unreachable ();
30342         }
30343     }
30344   else
30345     {
30346       switch (mode)
30347         {
30348         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30349         case E_HImode: gen = gen_arm_load_exclusivehi; break;
30350         case E_SImode: gen = gen_arm_load_exclusivesi; break;
30351         case E_DImode: gen = gen_arm_load_exclusivedi; break;
30352         default:
30353           gcc_unreachable ();
30354         }
30355     }
30356 
30357   emit_insn (gen (rval, mem));
30358 }
30359 
30360 static void
arm_emit_store_exclusive(machine_mode mode,rtx bval,rtx rval,rtx mem,bool rel)30361 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30362                           rtx mem, bool rel)
30363 {
30364   rtx (*gen) (rtx, rtx, rtx);
30365 
30366   if (rel)
30367     {
30368       switch (mode)
30369         {
30370         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30371         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30372         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30373         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30374         default:
30375           gcc_unreachable ();
30376         }
30377     }
30378   else
30379     {
30380       switch (mode)
30381         {
30382         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30383         case E_HImode: gen = gen_arm_store_exclusivehi; break;
30384         case E_SImode: gen = gen_arm_store_exclusivesi; break;
30385         case E_DImode: gen = gen_arm_store_exclusivedi; break;
30386         default:
30387           gcc_unreachable ();
30388         }
30389     }
30390 
30391   emit_insn (gen (bval, rval, mem));
30392 }
30393 
30394 /* Mark the previous jump instruction as unlikely.  */
30395 
30396 static void
emit_unlikely_jump(rtx insn)30397 emit_unlikely_jump (rtx insn)
30398 {
30399   rtx_insn *jump = emit_jump_insn (insn);
30400   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30401 }
30402 
30403 /* Expand a compare and swap pattern.  */
30404 
30405 void
arm_expand_compare_and_swap(rtx operands[])30406 arm_expand_compare_and_swap (rtx operands[])
30407 {
30408   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30409   machine_mode mode, cmp_mode;
30410 
30411   bval = operands[0];
30412   rval = operands[1];
30413   mem = operands[2];
30414   oldval = operands[3];
30415   newval = operands[4];
30416   is_weak = operands[5];
30417   mod_s = operands[6];
30418   mod_f = operands[7];
30419   mode = GET_MODE (mem);
30420 
30421   /* Normally the succ memory model must be stronger than fail, but in the
30422      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30423      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
30424 
30425   if (TARGET_HAVE_LDACQ
30426       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30427       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30428     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30429 
30430   switch (mode)
30431     {
30432     case E_QImode:
30433     case E_HImode:
30434       /* For narrow modes, we're going to perform the comparison in SImode,
30435 	 so do the zero-extension now.  */
30436       rval = gen_reg_rtx (SImode);
30437       oldval = convert_modes (SImode, mode, oldval, true);
30438       /* FALLTHRU */
30439 
30440     case E_SImode:
30441       /* Force the value into a register if needed.  We waited until after
30442 	 the zero-extension above to do this properly.  */
30443       if (!arm_add_operand (oldval, SImode))
30444 	oldval = force_reg (SImode, oldval);
30445       break;
30446 
30447     case E_DImode:
30448       if (!cmpdi_operand (oldval, mode))
30449 	oldval = force_reg (mode, oldval);
30450       break;
30451 
30452     default:
30453       gcc_unreachable ();
30454     }
30455 
30456   if (TARGET_THUMB1)
30457     cmp_mode = E_SImode;
30458   else
30459     cmp_mode = CC_Zmode;
30460 
30461   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30462   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30463                                         oldval, newval, is_weak, mod_s, mod_f));
30464 
30465   if (mode == QImode || mode == HImode)
30466     emit_move_insn (operands[1], gen_lowpart (mode, rval));
30467 
30468   /* In all cases, we arrange for success to be signaled by Z set.
30469      This arrangement allows for the boolean result to be used directly
30470      in a subsequent branch, post optimization.  For Thumb-1 targets, the
30471      boolean negation of the result is also stored in bval because Thumb-1
30472      backend lacks dependency tracking for CC flag due to flag-setting not
30473      being represented at RTL level.  */
30474   if (TARGET_THUMB1)
30475       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
30476   else
30477     {
30478       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
30479       emit_insn (gen_rtx_SET (bval, x));
30480     }
30481 }
30482 
30483 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
30484    another memory store between the load-exclusive and store-exclusive can
30485    reset the monitor from Exclusive to Open state.  This means we must wait
30486    until after reload to split the pattern, lest we get a register spill in
30487    the middle of the atomic sequence.  Success of the compare and swap is
30488    indicated by the Z flag set for 32bit targets and by neg_bval being zero
30489    for Thumb-1 targets (ie. negation of the boolean value returned by
30490    atomic_compare_and_swapmode standard pattern in operand 0).  */
30491 
30492 void
arm_split_compare_and_swap(rtx operands[])30493 arm_split_compare_and_swap (rtx operands[])
30494 {
30495   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
30496   machine_mode mode;
30497   enum memmodel mod_s, mod_f;
30498   bool is_weak;
30499   rtx_code_label *label1, *label2;
30500   rtx x, cond;
30501 
30502   rval = operands[1];
30503   mem = operands[2];
30504   oldval = operands[3];
30505   newval = operands[4];
30506   is_weak = (operands[5] != const0_rtx);
30507   mod_s_rtx = operands[6];
30508   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
30509   mod_f = memmodel_from_int (INTVAL (operands[7]));
30510   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
30511   mode = GET_MODE (mem);
30512 
30513   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
30514 
30515   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
30516   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
30517 
30518   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
30519      a full barrier is emitted after the store-release.  */
30520   if (is_armv8_sync)
30521     use_acquire = false;
30522 
30523   /* Checks whether a barrier is needed and emits one accordingly.  */
30524   if (!(use_acquire || use_release))
30525     arm_pre_atomic_barrier (mod_s);
30526 
30527   label1 = NULL;
30528   if (!is_weak)
30529     {
30530       label1 = gen_label_rtx ();
30531       emit_label (label1);
30532     }
30533   label2 = gen_label_rtx ();
30534 
30535   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30536 
30537   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
30538      as required to communicate with arm_expand_compare_and_swap.  */
30539   if (TARGET_32BIT)
30540     {
30541       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
30542       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30543       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30544 				gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30545       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30546     }
30547   else
30548     {
30549       cond = gen_rtx_NE (VOIDmode, rval, oldval);
30550       if (thumb1_cmpneg_operand (oldval, SImode))
30551 	{
30552 	  rtx src = rval;
30553 	  if (!satisfies_constraint_L (oldval))
30554 	    {
30555 	      gcc_assert (satisfies_constraint_J (oldval));
30556 
30557 	      /* For such immediates, ADDS needs the source and destination regs
30558 		 to be the same.
30559 
30560 		 Normally this would be handled by RA, but this is all happening
30561 		 after RA.  */
30562 	      emit_move_insn (neg_bval, rval);
30563 	      src = neg_bval;
30564 	    }
30565 
30566 	  emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
30567 						       label2, cond));
30568 	}
30569       else
30570 	{
30571 	  emit_move_insn (neg_bval, const1_rtx);
30572 	  emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
30573 	}
30574     }
30575 
30576   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
30577 
30578   /* Weak or strong, we want EQ to be true for success, so that we
30579      match the flags that we got from the compare above.  */
30580   if (TARGET_32BIT)
30581     {
30582       cond = gen_rtx_REG (CCmode, CC_REGNUM);
30583       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
30584       emit_insn (gen_rtx_SET (cond, x));
30585     }
30586 
30587   if (!is_weak)
30588     {
30589       /* Z is set to boolean value of !neg_bval, as required to communicate
30590 	 with arm_expand_compare_and_swap.  */
30591       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
30592       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
30593     }
30594 
30595   if (!is_mm_relaxed (mod_f))
30596     emit_label (label2);
30597 
30598   /* Checks whether a barrier is needed and emits one accordingly.  */
30599   if (is_armv8_sync
30600       || !(use_acquire || use_release))
30601     arm_post_atomic_barrier (mod_s);
30602 
30603   if (is_mm_relaxed (mod_f))
30604     emit_label (label2);
30605 }
30606 
30607 /* Split an atomic operation pattern.  Operation is given by CODE and is one
30608    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
30609    operation).  Operation is performed on the content at MEM and on VALUE
30610    following the memory model MODEL_RTX.  The content at MEM before and after
30611    the operation is returned in OLD_OUT and NEW_OUT respectively while the
30612    success of the operation is returned in COND.  Using a scratch register or
30613    an operand register for these determines what result is returned for that
30614    pattern.  */
30615 
30616 void
arm_split_atomic_op(enum rtx_code code,rtx old_out,rtx new_out,rtx mem,rtx value,rtx model_rtx,rtx cond)30617 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30618 		     rtx value, rtx model_rtx, rtx cond)
30619 {
30620   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
30621   machine_mode mode = GET_MODE (mem);
30622   machine_mode wmode = (mode == DImode ? DImode : SImode);
30623   rtx_code_label *label;
30624   bool all_low_regs, bind_old_new;
30625   rtx x;
30626 
30627   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
30628 
30629   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
30630   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
30631 
30632   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
30633      a full barrier is emitted after the store-release.  */
30634   if (is_armv8_sync)
30635     use_acquire = false;
30636 
30637   /* Checks whether a barrier is needed and emits one accordingly.  */
30638   if (!(use_acquire || use_release))
30639     arm_pre_atomic_barrier (model);
30640 
30641   label = gen_label_rtx ();
30642   emit_label (label);
30643 
30644   if (new_out)
30645     new_out = gen_lowpart (wmode, new_out);
30646   if (old_out)
30647     old_out = gen_lowpart (wmode, old_out);
30648   else
30649     old_out = new_out;
30650   value = simplify_gen_subreg (wmode, value, mode, 0);
30651 
30652   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30653 
30654   /* Does the operation require destination and first operand to use the same
30655      register?  This is decided by register constraints of relevant insn
30656      patterns in thumb1.md.  */
30657   gcc_assert (!new_out || REG_P (new_out));
30658   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
30659 		 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
30660 		 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
30661   bind_old_new =
30662     (TARGET_THUMB1
30663      && code != SET
30664      && code != MINUS
30665      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
30666 
30667   /* We want to return the old value while putting the result of the operation
30668      in the same register as the old value so copy the old value over to the
30669      destination register and use that register for the operation.  */
30670   if (old_out && bind_old_new)
30671     {
30672       emit_move_insn (new_out, old_out);
30673       old_out = new_out;
30674     }
30675 
30676   switch (code)
30677     {
30678     case SET:
30679       new_out = value;
30680       break;
30681 
30682     case NOT:
30683       x = gen_rtx_AND (wmode, old_out, value);
30684       emit_insn (gen_rtx_SET (new_out, x));
30685       x = gen_rtx_NOT (wmode, new_out);
30686       emit_insn (gen_rtx_SET (new_out, x));
30687       break;
30688 
30689     case MINUS:
30690       if (CONST_INT_P (value))
30691 	{
30692 	  value = gen_int_mode (-INTVAL (value), wmode);
30693 	  code = PLUS;
30694 	}
30695       /* FALLTHRU */
30696 
30697     case PLUS:
30698       if (mode == DImode)
30699 	{
30700 	  /* DImode plus/minus need to clobber flags.  */
30701 	  /* The adddi3 and subdi3 patterns are incorrectly written so that
30702 	     they require matching operands, even when we could easily support
30703 	     three operands.  Thankfully, this can be fixed up post-splitting,
30704 	     as the individual add+adc patterns do accept three operands and
30705 	     post-reload cprop can make these moves go away.  */
30706 	  emit_move_insn (new_out, old_out);
30707 	  if (code == PLUS)
30708 	    x = gen_adddi3 (new_out, new_out, value);
30709 	  else
30710 	    x = gen_subdi3 (new_out, new_out, value);
30711 	  emit_insn (x);
30712 	  break;
30713 	}
30714       /* FALLTHRU */
30715 
30716     default:
30717       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30718       emit_insn (gen_rtx_SET (new_out, x));
30719       break;
30720     }
30721 
30722   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30723                             use_release);
30724 
30725   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30726   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30727 
30728   /* Checks whether a barrier is needed and emits one accordingly.  */
30729   if (is_armv8_sync
30730       || !(use_acquire || use_release))
30731     arm_post_atomic_barrier (model);
30732 }
30733 
30734 #define MAX_VECT_LEN 16
30735 
30736 struct expand_vec_perm_d
30737 {
30738   rtx target, op0, op1;
30739   vec_perm_indices perm;
30740   machine_mode vmode;
30741   bool one_vector_p;
30742   bool testing_p;
30743 };
30744 
30745 /* Generate a variable permutation.  */
30746 
30747 static void
arm_expand_vec_perm_1(rtx target,rtx op0,rtx op1,rtx sel)30748 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30749 {
30750   machine_mode vmode = GET_MODE (target);
30751   bool one_vector_p = rtx_equal_p (op0, op1);
30752 
30753   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30754   gcc_checking_assert (GET_MODE (op0) == vmode);
30755   gcc_checking_assert (GET_MODE (op1) == vmode);
30756   gcc_checking_assert (GET_MODE (sel) == vmode);
30757   gcc_checking_assert (TARGET_NEON);
30758 
30759   if (one_vector_p)
30760     {
30761       if (vmode == V8QImode)
30762 	emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30763       else
30764 	emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30765     }
30766   else
30767     {
30768       rtx pair;
30769 
30770       if (vmode == V8QImode)
30771 	{
30772 	  pair = gen_reg_rtx (V16QImode);
30773 	  emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30774 	  pair = gen_lowpart (TImode, pair);
30775 	  emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30776 	}
30777       else
30778 	{
30779 	  pair = gen_reg_rtx (OImode);
30780 	  emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30781 	  emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30782 	}
30783     }
30784 }
30785 
30786 void
arm_expand_vec_perm(rtx target,rtx op0,rtx op1,rtx sel)30787 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30788 {
30789   machine_mode vmode = GET_MODE (target);
30790   unsigned int nelt = GET_MODE_NUNITS (vmode);
30791   bool one_vector_p = rtx_equal_p (op0, op1);
30792   rtx mask;
30793 
30794   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
30795      numbering of elements for big-endian, we must reverse the order.  */
30796   gcc_checking_assert (!BYTES_BIG_ENDIAN);
30797 
30798   /* The VTBL instruction does not use a modulo index, so we must take care
30799      of that ourselves.  */
30800   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30801   mask = gen_const_vec_duplicate (vmode, mask);
30802   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30803 
30804   arm_expand_vec_perm_1 (target, op0, op1, sel);
30805 }
30806 
30807 /* Map lane ordering between architectural lane order, and GCC lane order,
30808    taking into account ABI.  See comment above output_move_neon for details.  */
30809 
30810 static int
neon_endian_lane_map(machine_mode mode,int lane)30811 neon_endian_lane_map (machine_mode mode, int lane)
30812 {
30813   if (BYTES_BIG_ENDIAN)
30814   {
30815     int nelems = GET_MODE_NUNITS (mode);
30816     /* Reverse lane order.  */
30817     lane = (nelems - 1 - lane);
30818     /* Reverse D register order, to match ABI.  */
30819     if (GET_MODE_SIZE (mode) == 16)
30820       lane = lane ^ (nelems / 2);
30821   }
30822   return lane;
30823 }
30824 
30825 /* Some permutations index into pairs of vectors, this is a helper function
30826    to map indexes into those pairs of vectors.  */
30827 
30828 static int
neon_pair_endian_lane_map(machine_mode mode,int lane)30829 neon_pair_endian_lane_map (machine_mode mode, int lane)
30830 {
30831   int nelem = GET_MODE_NUNITS (mode);
30832   if (BYTES_BIG_ENDIAN)
30833     lane =
30834       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
30835   return lane;
30836 }
30837 
30838 /* Generate or test for an insn that supports a constant permutation.  */
30839 
30840 /* Recognize patterns for the VUZP insns.  */
30841 
30842 static bool
arm_evpc_neon_vuzp(struct expand_vec_perm_d * d)30843 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30844 {
30845   unsigned int i, odd, mask, nelt = d->perm.length ();
30846   rtx out0, out1, in0, in1;
30847   int first_elem;
30848   int swap_nelt;
30849 
30850   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30851     return false;
30852 
30853   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
30854      big endian pattern on 64 bit vectors, so we correct for that.  */
30855   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
30856     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
30857 
30858   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
30859 
30860   if (first_elem == neon_endian_lane_map (d->vmode, 0))
30861     odd = 0;
30862   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
30863     odd = 1;
30864   else
30865     return false;
30866   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30867 
30868   for (i = 0; i < nelt; i++)
30869     {
30870       unsigned elt =
30871 	(neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
30872       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
30873 	return false;
30874     }
30875 
30876   /* Success!  */
30877   if (d->testing_p)
30878     return true;
30879 
30880   in0 = d->op0;
30881   in1 = d->op1;
30882   if (swap_nelt != 0)
30883     std::swap (in0, in1);
30884 
30885   out0 = d->target;
30886   out1 = gen_reg_rtx (d->vmode);
30887   if (odd)
30888     std::swap (out0, out1);
30889 
30890   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
30891   return true;
30892 }
30893 
30894 /* Recognize patterns for the VZIP insns.  */
30895 
30896 static bool
arm_evpc_neon_vzip(struct expand_vec_perm_d * d)30897 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30898 {
30899   unsigned int i, high, mask, nelt = d->perm.length ();
30900   rtx out0, out1, in0, in1;
30901   int first_elem;
30902   bool is_swapped;
30903 
30904   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30905     return false;
30906 
30907   is_swapped = BYTES_BIG_ENDIAN;
30908 
30909   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
30910 
30911   high = nelt / 2;
30912   if (first_elem == neon_endian_lane_map (d->vmode, high))
30913     ;
30914   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
30915     high = 0;
30916   else
30917     return false;
30918   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30919 
30920   for (i = 0; i < nelt / 2; i++)
30921     {
30922       unsigned elt =
30923 	neon_pair_endian_lane_map (d->vmode, i + high) & mask;
30924       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
30925 	  != elt)
30926 	return false;
30927       elt =
30928 	neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
30929       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
30930 	  != elt)
30931 	return false;
30932     }
30933 
30934   /* Success!  */
30935   if (d->testing_p)
30936     return true;
30937 
30938   in0 = d->op0;
30939   in1 = d->op1;
30940   if (is_swapped)
30941     std::swap (in0, in1);
30942 
30943   out0 = d->target;
30944   out1 = gen_reg_rtx (d->vmode);
30945   if (high)
30946     std::swap (out0, out1);
30947 
30948   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
30949   return true;
30950 }
30951 
30952 /* Recognize patterns for the VREV insns.  */
30953 static bool
arm_evpc_neon_vrev(struct expand_vec_perm_d * d)30954 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30955 {
30956   unsigned int i, j, diff, nelt = d->perm.length ();
30957   rtx (*gen) (machine_mode, rtx, rtx);
30958 
30959   if (!d->one_vector_p)
30960     return false;
30961 
30962   diff = d->perm[0];
30963   switch (diff)
30964     {
30965     case 7:
30966        switch (d->vmode)
30967         {
30968          case E_V16QImode:
30969          case E_V8QImode:
30970           gen = gen_neon_vrev64;
30971           break;
30972          default:
30973           return false;
30974         }
30975        break;
30976     case 3:
30977        switch (d->vmode)
30978         {
30979 	case E_V16QImode:
30980 	case E_V8QImode:
30981           gen = gen_neon_vrev32;
30982           break;
30983 	case E_V8HImode:
30984 	case E_V4HImode:
30985 	case E_V8HFmode:
30986 	case E_V4HFmode:
30987           gen = gen_neon_vrev64;
30988           break;
30989 	default:
30990 	  return false;
30991 	}
30992       break;
30993     case 1:
30994       switch (d->vmode)
30995 	{
30996 	case E_V16QImode:
30997 	case E_V8QImode:
30998           gen = gen_neon_vrev16;
30999           break;
31000 	case E_V8HImode:
31001 	case E_V4HImode:
31002           gen = gen_neon_vrev32;
31003           break;
31004 	case E_V4SImode:
31005 	case E_V2SImode:
31006 	case E_V4SFmode:
31007 	case E_V2SFmode:
31008           gen = gen_neon_vrev64;
31009 	  break;
31010         default:
31011 	  return false;
31012 	}
31013       break;
31014     default:
31015       return false;
31016     }
31017 
31018   for (i = 0; i < nelt ; i += diff + 1)
31019     for (j = 0; j <= diff; j += 1)
31020       {
31021 	/* This is guaranteed to be true as the value of diff
31022 	   is 7, 3, 1 and we should have enough elements in the
31023 	   queue to generate this. Getting a vector mask with a
31024 	   value of diff other than these values implies that
31025 	   something is wrong by the time we get here.  */
31026 	gcc_assert (i + j < nelt);
31027 	if (d->perm[i + j] != i + diff - j)
31028 	  return false;
31029       }
31030 
31031   /* Success! */
31032   if (d->testing_p)
31033     return true;
31034 
31035   emit_insn (gen (d->vmode, d->target, d->op0));
31036   return true;
31037 }
31038 
31039 /* Recognize patterns for the VTRN insns.  */
31040 
31041 static bool
arm_evpc_neon_vtrn(struct expand_vec_perm_d * d)31042 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31043 {
31044   unsigned int i, odd, mask, nelt = d->perm.length ();
31045   rtx out0, out1, in0, in1;
31046 
31047   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31048     return false;
31049 
31050   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31051   if (d->perm[0] == 0)
31052     odd = 0;
31053   else if (d->perm[0] == 1)
31054     odd = 1;
31055   else
31056     return false;
31057   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31058 
31059   for (i = 0; i < nelt; i += 2)
31060     {
31061       if (d->perm[i] != i + odd)
31062 	return false;
31063       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31064 	return false;
31065     }
31066 
31067   /* Success!  */
31068   if (d->testing_p)
31069     return true;
31070 
31071   in0 = d->op0;
31072   in1 = d->op1;
31073   if (BYTES_BIG_ENDIAN)
31074     {
31075       std::swap (in0, in1);
31076       odd = !odd;
31077     }
31078 
31079   out0 = d->target;
31080   out1 = gen_reg_rtx (d->vmode);
31081   if (odd)
31082     std::swap (out0, out1);
31083 
31084   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31085   return true;
31086 }
31087 
31088 /* Recognize patterns for the VEXT insns.  */
31089 
31090 static bool
arm_evpc_neon_vext(struct expand_vec_perm_d * d)31091 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31092 {
31093   unsigned int i, nelt = d->perm.length ();
31094   rtx offset;
31095 
31096   unsigned int location;
31097 
31098   unsigned int next  = d->perm[0] + 1;
31099 
31100   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31101   if (BYTES_BIG_ENDIAN)
31102     return false;
31103 
31104   /* Check if the extracted indexes are increasing by one.  */
31105   for (i = 1; i < nelt; next++, i++)
31106     {
31107       /* If we hit the most significant element of the 2nd vector in
31108 	 the previous iteration, no need to test further.  */
31109       if (next == 2 * nelt)
31110 	return false;
31111 
31112       /* If we are operating on only one vector: it could be a
31113 	 rotation.  If there are only two elements of size < 64, let
31114 	 arm_evpc_neon_vrev catch it.  */
31115       if (d->one_vector_p && (next == nelt))
31116 	{
31117 	  if ((nelt == 2) && (d->vmode != V2DImode))
31118 	    return false;
31119 	  else
31120 	    next = 0;
31121 	}
31122 
31123       if (d->perm[i] != next)
31124 	return false;
31125     }
31126 
31127   location = d->perm[0];
31128 
31129   /* Success! */
31130   if (d->testing_p)
31131     return true;
31132 
31133   offset = GEN_INT (location);
31134 
31135   if(d->vmode == E_DImode)
31136     return false;
31137 
31138   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31139   return true;
31140 }
31141 
31142 /* The NEON VTBL instruction is a fully variable permuation that's even
31143    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
31144    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
31145    can do slightly better by expanding this as a constant where we don't
31146    have to apply a mask.  */
31147 
31148 static bool
arm_evpc_neon_vtbl(struct expand_vec_perm_d * d)31149 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31150 {
31151   rtx rperm[MAX_VECT_LEN], sel;
31152   machine_mode vmode = d->vmode;
31153   unsigned int i, nelt = d->perm.length ();
31154 
31155   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31156      numbering of elements for big-endian, we must reverse the order.  */
31157   if (BYTES_BIG_ENDIAN)
31158     return false;
31159 
31160   if (d->testing_p)
31161     return true;
31162 
31163   /* Generic code will try constant permutation twice.  Once with the
31164      original mode and again with the elements lowered to QImode.
31165      So wait and don't do the selector expansion ourselves.  */
31166   if (vmode != V8QImode && vmode != V16QImode)
31167     return false;
31168 
31169   for (i = 0; i < nelt; ++i)
31170     rperm[i] = GEN_INT (d->perm[i]);
31171   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31172   sel = force_reg (vmode, sel);
31173 
31174   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31175   return true;
31176 }
31177 
31178 static bool
arm_expand_vec_perm_const_1(struct expand_vec_perm_d * d)31179 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31180 {
31181   /* Check if the input mask matches vext before reordering the
31182      operands.  */
31183   if (TARGET_NEON)
31184     if (arm_evpc_neon_vext (d))
31185       return true;
31186 
31187   /* The pattern matching functions above are written to look for a small
31188      number to begin the sequence (0, 1, N/2).  If we begin with an index
31189      from the second operand, we can swap the operands.  */
31190   unsigned int nelt = d->perm.length ();
31191   if (d->perm[0] >= nelt)
31192     {
31193       d->perm.rotate_inputs (1);
31194       std::swap (d->op0, d->op1);
31195     }
31196 
31197   if (TARGET_NEON)
31198     {
31199       if (arm_evpc_neon_vuzp (d))
31200 	return true;
31201       if (arm_evpc_neon_vzip (d))
31202 	return true;
31203       if (arm_evpc_neon_vrev (d))
31204 	return true;
31205       if (arm_evpc_neon_vtrn (d))
31206 	return true;
31207       return arm_evpc_neon_vtbl (d);
31208     }
31209   return false;
31210 }
31211 
31212 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
31213 
31214 static bool
arm_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)31215 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
31216 			      const vec_perm_indices &sel)
31217 {
31218   struct expand_vec_perm_d d;
31219   int i, nelt, which;
31220 
31221   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31222     return false;
31223 
31224   d.target = target;
31225   d.op0 = op0;
31226   d.op1 = op1;
31227 
31228   d.vmode = vmode;
31229   gcc_assert (VECTOR_MODE_P (d.vmode));
31230   d.testing_p = !target;
31231 
31232   nelt = GET_MODE_NUNITS (d.vmode);
31233   for (i = which = 0; i < nelt; ++i)
31234     {
31235       int ei = sel[i] & (2 * nelt - 1);
31236       which |= (ei < nelt ? 1 : 2);
31237     }
31238 
31239   switch (which)
31240     {
31241     default:
31242       gcc_unreachable();
31243 
31244     case 3:
31245       d.one_vector_p = false;
31246       if (d.testing_p || !rtx_equal_p (op0, op1))
31247 	break;
31248 
31249       /* The elements of PERM do not suggest that only the first operand
31250 	 is used, but both operands are identical.  Allow easier matching
31251 	 of the permutation by folding the permutation into the single
31252 	 input vector.  */
31253       /* FALLTHRU */
31254     case 2:
31255       d.op0 = op1;
31256       d.one_vector_p = true;
31257       break;
31258 
31259     case 1:
31260       d.op1 = op0;
31261       d.one_vector_p = true;
31262       break;
31263     }
31264 
31265   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
31266 
31267   if (!d.testing_p)
31268     return arm_expand_vec_perm_const_1 (&d);
31269 
31270   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31271   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31272   if (!d.one_vector_p)
31273     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31274 
31275   start_sequence ();
31276   bool ret = arm_expand_vec_perm_const_1 (&d);
31277   end_sequence ();
31278 
31279   return ret;
31280 }
31281 
31282 bool
arm_autoinc_modes_ok_p(machine_mode mode,enum arm_auto_incmodes code)31283 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31284 {
31285   /* If we are soft float and we do not have ldrd
31286      then all auto increment forms are ok.  */
31287   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31288     return true;
31289 
31290   switch (code)
31291     {
31292       /* Post increment and Pre Decrement are supported for all
31293 	 instruction forms except for vector forms.  */
31294     case ARM_POST_INC:
31295     case ARM_PRE_DEC:
31296       if (VECTOR_MODE_P (mode))
31297 	{
31298 	  if (code != ARM_PRE_DEC)
31299 	    return true;
31300 	  else
31301 	    return false;
31302 	}
31303 
31304       return true;
31305 
31306     case ARM_POST_DEC:
31307     case ARM_PRE_INC:
31308       /* Without LDRD and mode size greater than
31309 	 word size, there is no point in auto-incrementing
31310          because ldm and stm will not have these forms.  */
31311       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31312 	return false;
31313 
31314       /* Vector and floating point modes do not support
31315 	 these auto increment forms.  */
31316       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31317 	return false;
31318 
31319       return true;
31320 
31321     default:
31322       return false;
31323 
31324     }
31325 
31326   return false;
31327 }
31328 
31329 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31330    on ARM, since we know that shifts by negative amounts are no-ops.
31331    Additionally, the default expansion code is not available or suitable
31332    for post-reload insn splits (this can occur when the register allocator
31333    chooses not to do a shift in NEON).
31334 
31335    This function is used in both initial expand and post-reload splits, and
31336    handles all kinds of 64-bit shifts.
31337 
31338    Input requirements:
31339     - It is safe for the input and output to be the same register, but
31340       early-clobber rules apply for the shift amount and scratch registers.
31341     - Shift by register requires both scratch registers.  In all other cases
31342       the scratch registers may be NULL.
31343     - Ashiftrt by a register also clobbers the CC register.  */
31344 void
arm_emit_coreregs_64bit_shift(enum rtx_code code,rtx out,rtx in,rtx amount,rtx scratch1,rtx scratch2)31345 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31346 			       rtx amount, rtx scratch1, rtx scratch2)
31347 {
31348   rtx out_high = gen_highpart (SImode, out);
31349   rtx out_low = gen_lowpart (SImode, out);
31350   rtx in_high = gen_highpart (SImode, in);
31351   rtx in_low = gen_lowpart (SImode, in);
31352 
31353   /* Terminology:
31354 	in = the register pair containing the input value.
31355 	out = the destination register pair.
31356 	up = the high- or low-part of each pair.
31357 	down = the opposite part to "up".
31358      In a shift, we can consider bits to shift from "up"-stream to
31359      "down"-stream, so in a left-shift "up" is the low-part and "down"
31360      is the high-part of each register pair.  */
31361 
31362   rtx out_up   = code == ASHIFT ? out_low : out_high;
31363   rtx out_down = code == ASHIFT ? out_high : out_low;
31364   rtx in_up   = code == ASHIFT ? in_low : in_high;
31365   rtx in_down = code == ASHIFT ? in_high : in_low;
31366 
31367   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31368   gcc_assert (out
31369 	      && (REG_P (out) || GET_CODE (out) == SUBREG)
31370 	      && GET_MODE (out) == DImode);
31371   gcc_assert (in
31372 	      && (REG_P (in) || GET_CODE (in) == SUBREG)
31373 	      && GET_MODE (in) == DImode);
31374   gcc_assert (amount
31375 	      && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31376 		   && GET_MODE (amount) == SImode)
31377 		  || CONST_INT_P (amount)));
31378   gcc_assert (scratch1 == NULL
31379 	      || (GET_CODE (scratch1) == SCRATCH)
31380 	      || (GET_MODE (scratch1) == SImode
31381 		  && REG_P (scratch1)));
31382   gcc_assert (scratch2 == NULL
31383 	      || (GET_CODE (scratch2) == SCRATCH)
31384 	      || (GET_MODE (scratch2) == SImode
31385 		  && REG_P (scratch2)));
31386   gcc_assert (!REG_P (out) || !REG_P (amount)
31387 	      || !HARD_REGISTER_P (out)
31388 	      || (REGNO (out) != REGNO (amount)
31389 		  && REGNO (out) + 1 != REGNO (amount)));
31390 
31391   /* Macros to make following code more readable.  */
31392   #define SUB_32(DEST,SRC) \
31393 	    gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31394   #define RSB_32(DEST,SRC) \
31395 	    gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31396   #define SUB_S_32(DEST,SRC) \
31397 	    gen_addsi3_compare0 ((DEST), (SRC), \
31398 				 GEN_INT (-32))
31399   #define SET(DEST,SRC) \
31400 	    gen_rtx_SET ((DEST), (SRC))
31401   #define SHIFT(CODE,SRC,AMOUNT) \
31402 	    gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31403   #define LSHIFT(CODE,SRC,AMOUNT) \
31404 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31405 			    SImode, (SRC), (AMOUNT))
31406   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31407 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31408 			    SImode, (SRC), (AMOUNT))
31409   #define ORR(A,B) \
31410 	    gen_rtx_IOR (SImode, (A), (B))
31411   #define BRANCH(COND,LABEL) \
31412 	    gen_arm_cond_branch ((LABEL), \
31413 				 gen_rtx_ ## COND (CCmode, cc_reg, \
31414 						   const0_rtx), \
31415 				 cc_reg)
31416 
31417   /* Shifts by register and shifts by constant are handled separately.  */
31418   if (CONST_INT_P (amount))
31419     {
31420       /* We have a shift-by-constant.  */
31421 
31422       /* First, handle out-of-range shift amounts.
31423 	 In both cases we try to match the result an ARM instruction in a
31424 	 shift-by-register would give.  This helps reduce execution
31425 	 differences between optimization levels, but it won't stop other
31426          parts of the compiler doing different things.  This is "undefined
31427          behavior, in any case.  */
31428       if (INTVAL (amount) <= 0)
31429 	emit_insn (gen_movdi (out, in));
31430       else if (INTVAL (amount) >= 64)
31431 	{
31432 	  if (code == ASHIFTRT)
31433 	    {
31434 	      rtx const31_rtx = GEN_INT (31);
31435 	      emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31436 	      emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31437 	    }
31438 	  else
31439 	    emit_insn (gen_movdi (out, const0_rtx));
31440 	}
31441 
31442       /* Now handle valid shifts. */
31443       else if (INTVAL (amount) < 32)
31444 	{
31445 	  /* Shifts by a constant less than 32.  */
31446 	  rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31447 
31448 	  /* Clearing the out register in DImode first avoids lots
31449 	     of spilling and results in less stack usage.
31450 	     Later this redundant insn is completely removed.
31451 	     Do that only if "in" and "out" are different registers.  */
31452 	  if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31453 	    emit_insn (SET (out, const0_rtx));
31454 	  emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31455 	  emit_insn (SET (out_down,
31456 			  ORR (REV_LSHIFT (code, in_up, reverse_amount),
31457 			       out_down)));
31458 	  emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31459 	}
31460       else
31461 	{
31462 	  /* Shifts by a constant greater than 31.  */
31463 	  rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31464 
31465 	  if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31466 	    emit_insn (SET (out, const0_rtx));
31467 	  emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31468 	  if (code == ASHIFTRT)
31469 	    emit_insn (gen_ashrsi3 (out_up, in_up,
31470 				    GEN_INT (31)));
31471 	  else
31472 	    emit_insn (SET (out_up, const0_rtx));
31473 	}
31474     }
31475   else
31476     {
31477       /* We have a shift-by-register.  */
31478       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
31479 
31480       /* This alternative requires the scratch registers.  */
31481       gcc_assert (scratch1 && REG_P (scratch1));
31482       gcc_assert (scratch2 && REG_P (scratch2));
31483 
31484       /* We will need the values "amount-32" and "32-amount" later.
31485          Swapping them around now allows the later code to be more general. */
31486       switch (code)
31487 	{
31488 	case ASHIFT:
31489 	  emit_insn (SUB_32 (scratch1, amount));
31490 	  emit_insn (RSB_32 (scratch2, amount));
31491 	  break;
31492 	case ASHIFTRT:
31493 	  emit_insn (RSB_32 (scratch1, amount));
31494 	  /* Also set CC = amount > 32.  */
31495 	  emit_insn (SUB_S_32 (scratch2, amount));
31496 	  break;
31497 	case LSHIFTRT:
31498 	  emit_insn (RSB_32 (scratch1, amount));
31499 	  emit_insn (SUB_32 (scratch2, amount));
31500 	  break;
31501 	default:
31502 	  gcc_unreachable ();
31503 	}
31504 
31505       /* Emit code like this:
31506 
31507 	 arithmetic-left:
31508 	    out_down = in_down << amount;
31509 	    out_down = (in_up << (amount - 32)) | out_down;
31510 	    out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31511 	    out_up = in_up << amount;
31512 
31513 	 arithmetic-right:
31514 	    out_down = in_down >> amount;
31515 	    out_down = (in_up << (32 - amount)) | out_down;
31516 	    if (amount < 32)
31517 	      out_down = ((signed)in_up >> (amount - 32)) | out_down;
31518 	    out_up = in_up << amount;
31519 
31520 	 logical-right:
31521 	    out_down = in_down >> amount;
31522 	    out_down = (in_up << (32 - amount)) | out_down;
31523 	    if (amount < 32)
31524 	      out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31525 	    out_up = in_up << amount;
31526 
31527 	  The ARM and Thumb2 variants are the same but implemented slightly
31528 	  differently.  If this were only called during expand we could just
31529 	  use the Thumb2 case and let combine do the right thing, but this
31530 	  can also be called from post-reload splitters.  */
31531 
31532       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31533 
31534       if (!TARGET_THUMB2)
31535 	{
31536 	  /* Emit code for ARM mode.  */
31537 	  emit_insn (SET (out_down,
31538 			  ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31539 	  if (code == ASHIFTRT)
31540 	    {
31541 	      rtx_code_label *done_label = gen_label_rtx ();
31542 	      emit_jump_insn (BRANCH (LT, done_label));
31543 	      emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31544 					     out_down)));
31545 	      emit_label (done_label);
31546 	    }
31547 	  else
31548 	    emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31549 					   out_down)));
31550 	}
31551       else
31552 	{
31553 	  /* Emit code for Thumb2 mode.
31554 	     Thumb2 can't do shift and or in one insn.  */
31555 	  emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31556 	  emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31557 
31558 	  if (code == ASHIFTRT)
31559 	    {
31560 	      rtx_code_label *done_label = gen_label_rtx ();
31561 	      emit_jump_insn (BRANCH (LT, done_label));
31562 	      emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31563 	      emit_insn (SET (out_down, ORR (out_down, scratch2)));
31564 	      emit_label (done_label);
31565 	    }
31566 	  else
31567 	    {
31568 	      emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31569 	      emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31570 	    }
31571 	}
31572 
31573       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31574     }
31575 
31576   #undef SUB_32
31577   #undef RSB_32
31578   #undef SUB_S_32
31579   #undef SET
31580   #undef SHIFT
31581   #undef LSHIFT
31582   #undef REV_LSHIFT
31583   #undef ORR
31584   #undef BRANCH
31585 }
31586 
31587 /* Returns true if the pattern is a valid symbolic address, which is either a
31588    symbol_ref or (symbol_ref + addend).
31589 
31590    According to the ARM ELF ABI, the initial addend of REL-type relocations
31591    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
31592    literal field of the instruction as a 16-bit signed value in the range
31593    -32768 <= A < 32768.  */
31594 
31595 bool
arm_valid_symbolic_address_p(rtx addr)31596 arm_valid_symbolic_address_p (rtx addr)
31597 {
31598   rtx xop0, xop1 = NULL_RTX;
31599   rtx tmp = addr;
31600 
31601   if (target_word_relocations)
31602     return false;
31603 
31604   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
31605     return true;
31606 
31607   /* (const (plus: symbol_ref const_int))  */
31608   if (GET_CODE (addr) == CONST)
31609     tmp = XEXP (addr, 0);
31610 
31611   if (GET_CODE (tmp) == PLUS)
31612     {
31613       xop0 = XEXP (tmp, 0);
31614       xop1 = XEXP (tmp, 1);
31615 
31616       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
31617 	  return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
31618     }
31619 
31620   return false;
31621 }
31622 
31623 /* Returns true if a valid comparison operation and makes
31624    the operands in a form that is valid.  */
31625 bool
arm_validize_comparison(rtx * comparison,rtx * op1,rtx * op2)31626 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31627 {
31628   enum rtx_code code = GET_CODE (*comparison);
31629   int code_int;
31630   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31631     ? GET_MODE (*op2) : GET_MODE (*op1);
31632 
31633   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31634 
31635   if (code == UNEQ || code == LTGT)
31636     return false;
31637 
31638   code_int = (int)code;
31639   arm_canonicalize_comparison (&code_int, op1, op2, 0);
31640   PUT_CODE (*comparison, (enum rtx_code)code_int);
31641 
31642   switch (mode)
31643     {
31644     case E_SImode:
31645       if (!arm_add_operand (*op1, mode))
31646 	*op1 = force_reg (mode, *op1);
31647       if (!arm_add_operand (*op2, mode))
31648 	*op2 = force_reg (mode, *op2);
31649       return true;
31650 
31651     case E_DImode:
31652       /* gen_compare_reg() will sort out any invalid operands.  */
31653       return true;
31654 
31655     case E_HFmode:
31656       if (!TARGET_VFP_FP16INST)
31657 	break;
31658       /* FP16 comparisons are done in SF mode.  */
31659       mode = SFmode;
31660       *op1 = convert_to_mode (mode, *op1, 1);
31661       *op2 = convert_to_mode (mode, *op2, 1);
31662       /* Fall through.  */
31663     case E_SFmode:
31664     case E_DFmode:
31665       if (!vfp_compare_operand (*op1, mode))
31666 	*op1 = force_reg (mode, *op1);
31667       if (!vfp_compare_operand (*op2, mode))
31668 	*op2 = force_reg (mode, *op2);
31669       return true;
31670     default:
31671       break;
31672     }
31673 
31674   return false;
31675 
31676 }
31677 
31678 /* Maximum number of instructions to set block of memory.  */
31679 static int
arm_block_set_max_insns(void)31680 arm_block_set_max_insns (void)
31681 {
31682   if (optimize_function_for_size_p (cfun))
31683     return 4;
31684   else
31685     return current_tune->max_insns_inline_memset;
31686 }
31687 
31688 /* Return TRUE if it's profitable to set block of memory for
31689    non-vectorized case.  VAL is the value to set the memory
31690    with.  LENGTH is the number of bytes to set.  ALIGN is the
31691    alignment of the destination memory in bytes.  UNALIGNED_P
31692    is TRUE if we can only set the memory with instructions
31693    meeting alignment requirements.  USE_STRD_P is TRUE if we
31694    can use strd to set the memory.  */
31695 static bool
arm_block_set_non_vect_profit_p(rtx val,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT align,bool unaligned_p,bool use_strd_p)31696 arm_block_set_non_vect_profit_p (rtx val,
31697 				 unsigned HOST_WIDE_INT length,
31698 				 unsigned HOST_WIDE_INT align,
31699 				 bool unaligned_p, bool use_strd_p)
31700 {
31701   int num = 0;
31702   /* For leftovers in bytes of 0-7, we can set the memory block using
31703      strb/strh/str with minimum instruction number.  */
31704   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31705 
31706   if (unaligned_p)
31707     {
31708       num = arm_const_inline_cost (SET, val);
31709       num += length / align + length % align;
31710     }
31711   else if (use_strd_p)
31712     {
31713       num = arm_const_double_inline_cost (val);
31714       num += (length >> 3) + leftover[length & 7];
31715     }
31716   else
31717     {
31718       num = arm_const_inline_cost (SET, val);
31719       num += (length >> 2) + leftover[length & 3];
31720     }
31721 
31722   /* We may be able to combine last pair STRH/STRB into a single STR
31723      by shifting one byte back.  */
31724   if (unaligned_access && length > 3 && (length & 3) == 3)
31725     num--;
31726 
31727   return (num <= arm_block_set_max_insns ());
31728 }
31729 
31730 /* Return TRUE if it's profitable to set block of memory for
31731    vectorized case.  LENGTH is the number of bytes to set.
31732    ALIGN is the alignment of destination memory in bytes.
31733    MODE is the vector mode used to set the memory.  */
31734 static bool
arm_block_set_vect_profit_p(unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT align,machine_mode mode)31735 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31736 			     unsigned HOST_WIDE_INT align,
31737 			     machine_mode mode)
31738 {
31739   int num;
31740   bool unaligned_p = ((align & 3) != 0);
31741   unsigned int nelt = GET_MODE_NUNITS (mode);
31742 
31743   /* Instruction loading constant value.  */
31744   num = 1;
31745   /* Instructions storing the memory.  */
31746   num += (length + nelt - 1) / nelt;
31747   /* Instructions adjusting the address expression.  Only need to
31748      adjust address expression if it's 4 bytes aligned and bytes
31749      leftover can only be stored by mis-aligned store instruction.  */
31750   if (!unaligned_p && (length & 3) != 0)
31751     num++;
31752 
31753   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
31754   if (!unaligned_p && mode == V16QImode)
31755     num--;
31756 
31757   return (num <= arm_block_set_max_insns ());
31758 }
31759 
31760 /* Set a block of memory using vectorization instructions for the
31761    unaligned case.  We fill the first LENGTH bytes of the memory
31762    area starting from DSTBASE with byte constant VALUE.  ALIGN is
31763    the alignment requirement of memory.  Return TRUE if succeeded.  */
31764 static bool
arm_block_set_unaligned_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)31765 arm_block_set_unaligned_vect (rtx dstbase,
31766 			      unsigned HOST_WIDE_INT length,
31767 			      unsigned HOST_WIDE_INT value,
31768 			      unsigned HOST_WIDE_INT align)
31769 {
31770   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
31771   rtx dst, mem;
31772   rtx val_vec, reg;
31773   rtx (*gen_func) (rtx, rtx);
31774   machine_mode mode;
31775   unsigned HOST_WIDE_INT v = value;
31776   unsigned int offset = 0;
31777   gcc_assert ((align & 0x3) != 0);
31778   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31779   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31780   if (length >= nelt_v16)
31781     {
31782       mode = V16QImode;
31783       gen_func = gen_movmisalignv16qi;
31784     }
31785   else
31786     {
31787       mode = V8QImode;
31788       gen_func = gen_movmisalignv8qi;
31789     }
31790   nelt_mode = GET_MODE_NUNITS (mode);
31791   gcc_assert (length >= nelt_mode);
31792   /* Skip if it isn't profitable.  */
31793   if (!arm_block_set_vect_profit_p (length, align, mode))
31794     return false;
31795 
31796   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31797   mem = adjust_automodify_address (dstbase, mode, dst, offset);
31798 
31799   v = sext_hwi (v, BITS_PER_WORD);
31800 
31801   reg = gen_reg_rtx (mode);
31802   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
31803   /* Emit instruction loading the constant value.  */
31804   emit_move_insn (reg, val_vec);
31805 
31806   /* Handle nelt_mode bytes in a vector.  */
31807   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31808     {
31809       emit_insn ((*gen_func) (mem, reg));
31810       if (i + 2 * nelt_mode <= length)
31811 	{
31812 	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31813 	  offset += nelt_mode;
31814 	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
31815 	}
31816     }
31817 
31818   /* If there are not less than nelt_v8 bytes leftover, we must be in
31819      V16QI mode.  */
31820   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31821 
31822   /* Handle (8, 16) bytes leftover.  */
31823   if (i + nelt_v8 < length)
31824     {
31825       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31826       offset += length - i;
31827       mem = adjust_automodify_address (dstbase, mode, dst, offset);
31828 
31829       /* We are shifting bytes back, set the alignment accordingly.  */
31830       if ((length & 1) != 0 && align >= 2)
31831 	set_mem_align (mem, BITS_PER_UNIT);
31832 
31833       emit_insn (gen_movmisalignv16qi (mem, reg));
31834     }
31835   /* Handle (0, 8] bytes leftover.  */
31836   else if (i < length && i + nelt_v8 >= length)
31837     {
31838       if (mode == V16QImode)
31839 	reg = gen_lowpart (V8QImode, reg);
31840 
31841       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31842 					      + (nelt_mode - nelt_v8))));
31843       offset += (length - i) + (nelt_mode - nelt_v8);
31844       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
31845 
31846       /* We are shifting bytes back, set the alignment accordingly.  */
31847       if ((length & 1) != 0 && align >= 2)
31848 	set_mem_align (mem, BITS_PER_UNIT);
31849 
31850       emit_insn (gen_movmisalignv8qi (mem, reg));
31851     }
31852 
31853   return true;
31854 }
31855 
31856 /* Set a block of memory using vectorization instructions for the
31857    aligned case.  We fill the first LENGTH bytes of the memory area
31858    starting from DSTBASE with byte constant VALUE.  ALIGN is the
31859    alignment requirement of memory.  Return TRUE if succeeded.  */
31860 static bool
arm_block_set_aligned_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)31861 arm_block_set_aligned_vect (rtx dstbase,
31862 			    unsigned HOST_WIDE_INT length,
31863 			    unsigned HOST_WIDE_INT value,
31864 			    unsigned HOST_WIDE_INT align)
31865 {
31866   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
31867   rtx dst, addr, mem;
31868   rtx val_vec, reg;
31869   machine_mode mode;
31870   unsigned int offset = 0;
31871 
31872   gcc_assert ((align & 0x3) == 0);
31873   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31874   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31875   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31876     mode = V16QImode;
31877   else
31878     mode = V8QImode;
31879 
31880   nelt_mode = GET_MODE_NUNITS (mode);
31881   gcc_assert (length >= nelt_mode);
31882   /* Skip if it isn't profitable.  */
31883   if (!arm_block_set_vect_profit_p (length, align, mode))
31884     return false;
31885 
31886   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31887 
31888   reg = gen_reg_rtx (mode);
31889   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
31890   /* Emit instruction loading the constant value.  */
31891   emit_move_insn (reg, val_vec);
31892 
31893   i = 0;
31894   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
31895   if (mode == V16QImode)
31896     {
31897       mem = adjust_automodify_address (dstbase, mode, dst, offset);
31898       emit_insn (gen_movmisalignv16qi (mem, reg));
31899       i += nelt_mode;
31900       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
31901       if (i + nelt_v8 < length && i + nelt_v16 > length)
31902 	{
31903 	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31904 	  offset += length - nelt_mode;
31905 	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
31906 	  /* We are shifting bytes back, set the alignment accordingly.  */
31907 	  if ((length & 0x3) == 0)
31908 	    set_mem_align (mem, BITS_PER_UNIT * 4);
31909 	  else if ((length & 0x1) == 0)
31910 	    set_mem_align (mem, BITS_PER_UNIT * 2);
31911 	  else
31912 	    set_mem_align (mem, BITS_PER_UNIT);
31913 
31914 	  emit_insn (gen_movmisalignv16qi (mem, reg));
31915 	  return true;
31916 	}
31917       /* Fall through for bytes leftover.  */
31918       mode = V8QImode;
31919       nelt_mode = GET_MODE_NUNITS (mode);
31920       reg = gen_lowpart (V8QImode, reg);
31921     }
31922 
31923   /* Handle 8 bytes in a vector.  */
31924   for (; (i + nelt_mode <= length); i += nelt_mode)
31925     {
31926       addr = plus_constant (Pmode, dst, i);
31927       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
31928       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
31929 	emit_move_insn (mem, reg);
31930       else
31931 	emit_insn (gen_unaligned_storev8qi (mem, reg));
31932     }
31933 
31934   /* Handle single word leftover by shifting 4 bytes back.  We can
31935      use aligned access for this case.  */
31936   if (i + UNITS_PER_WORD == length)
31937     {
31938       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31939       offset += i - UNITS_PER_WORD;
31940       mem = adjust_automodify_address (dstbase, mode, addr, offset);
31941       /* We are shifting 4 bytes back, set the alignment accordingly.  */
31942       if (align > UNITS_PER_WORD)
31943 	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31944 
31945       emit_insn (gen_unaligned_storev8qi (mem, reg));
31946     }
31947   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31948      We have to use unaligned access for this case.  */
31949   else if (i < length)
31950     {
31951       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31952       offset += length - nelt_mode;
31953       mem = adjust_automodify_address (dstbase, mode, dst, offset);
31954       /* We are shifting bytes back, set the alignment accordingly.  */
31955       if ((length & 1) == 0)
31956 	set_mem_align (mem, BITS_PER_UNIT * 2);
31957       else
31958 	set_mem_align (mem, BITS_PER_UNIT);
31959 
31960       emit_insn (gen_movmisalignv8qi (mem, reg));
31961     }
31962 
31963   return true;
31964 }
31965 
31966 /* Set a block of memory using plain strh/strb instructions, only
31967    using instructions allowed by ALIGN on processor.  We fill the
31968    first LENGTH bytes of the memory area starting from DSTBASE
31969    with byte constant VALUE.  ALIGN is the alignment requirement
31970    of memory.  */
31971 static bool
arm_block_set_unaligned_non_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)31972 arm_block_set_unaligned_non_vect (rtx dstbase,
31973 				  unsigned HOST_WIDE_INT length,
31974 				  unsigned HOST_WIDE_INT value,
31975 				  unsigned HOST_WIDE_INT align)
31976 {
31977   unsigned int i;
31978   rtx dst, addr, mem;
31979   rtx val_exp, val_reg, reg;
31980   machine_mode mode;
31981   HOST_WIDE_INT v = value;
31982 
31983   gcc_assert (align == 1 || align == 2);
31984 
31985   if (align == 2)
31986     v |= (value << BITS_PER_UNIT);
31987 
31988   v = sext_hwi (v, BITS_PER_WORD);
31989   val_exp = GEN_INT (v);
31990   /* Skip if it isn't profitable.  */
31991   if (!arm_block_set_non_vect_profit_p (val_exp, length,
31992 					align, true, false))
31993     return false;
31994 
31995   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31996   mode = (align == 2 ? HImode : QImode);
31997   val_reg = force_reg (SImode, val_exp);
31998   reg = gen_lowpart (mode, val_reg);
31999 
32000   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32001     {
32002       addr = plus_constant (Pmode, dst, i);
32003       mem = adjust_automodify_address (dstbase, mode, addr, i);
32004       emit_move_insn (mem, reg);
32005     }
32006 
32007   /* Handle single byte leftover.  */
32008   if (i + 1 == length)
32009     {
32010       reg = gen_lowpart (QImode, val_reg);
32011       addr = plus_constant (Pmode, dst, i);
32012       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32013       emit_move_insn (mem, reg);
32014       i++;
32015     }
32016 
32017   gcc_assert (i == length);
32018   return true;
32019 }
32020 
32021 /* Set a block of memory using plain strd/str/strh/strb instructions,
32022    to permit unaligned copies on processors which support unaligned
32023    semantics for those instructions.  We fill the first LENGTH bytes
32024    of the memory area starting from DSTBASE with byte constant VALUE.
32025    ALIGN is the alignment requirement of memory.  */
32026 static bool
arm_block_set_aligned_non_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)32027 arm_block_set_aligned_non_vect (rtx dstbase,
32028 				unsigned HOST_WIDE_INT length,
32029 				unsigned HOST_WIDE_INT value,
32030 				unsigned HOST_WIDE_INT align)
32031 {
32032   unsigned int i;
32033   rtx dst, addr, mem;
32034   rtx val_exp, val_reg, reg;
32035   unsigned HOST_WIDE_INT v;
32036   bool use_strd_p;
32037 
32038   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32039 		&& TARGET_LDRD && current_tune->prefer_ldrd_strd);
32040 
32041   v = (value | (value << 8) | (value << 16) | (value << 24));
32042   if (length < UNITS_PER_WORD)
32043     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32044 
32045   if (use_strd_p)
32046     v |= (v << BITS_PER_WORD);
32047   else
32048     v = sext_hwi (v, BITS_PER_WORD);
32049 
32050   val_exp = GEN_INT (v);
32051   /* Skip if it isn't profitable.  */
32052   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32053 					align, false, use_strd_p))
32054     {
32055       if (!use_strd_p)
32056 	return false;
32057 
32058       /* Try without strd.  */
32059       v = (v >> BITS_PER_WORD);
32060       v = sext_hwi (v, BITS_PER_WORD);
32061       val_exp = GEN_INT (v);
32062       use_strd_p = false;
32063       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32064 					    align, false, use_strd_p))
32065 	return false;
32066     }
32067 
32068   i = 0;
32069   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32070   /* Handle double words using strd if possible.  */
32071   if (use_strd_p)
32072     {
32073       val_reg = force_reg (DImode, val_exp);
32074       reg = val_reg;
32075       for (; (i + 8 <= length); i += 8)
32076 	{
32077 	  addr = plus_constant (Pmode, dst, i);
32078 	  mem = adjust_automodify_address (dstbase, DImode, addr, i);
32079 	  if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32080 	    emit_move_insn (mem, reg);
32081 	  else
32082 	    emit_insn (gen_unaligned_storedi (mem, reg));
32083 	}
32084     }
32085   else
32086     val_reg = force_reg (SImode, val_exp);
32087 
32088   /* Handle words.  */
32089   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32090   for (; (i + 4 <= length); i += 4)
32091     {
32092       addr = plus_constant (Pmode, dst, i);
32093       mem = adjust_automodify_address (dstbase, SImode, addr, i);
32094       if ((align & 3) == 0)
32095 	emit_move_insn (mem, reg);
32096       else
32097 	emit_insn (gen_unaligned_storesi (mem, reg));
32098     }
32099 
32100   /* Merge last pair of STRH and STRB into a STR if possible.  */
32101   if (unaligned_access && i > 0 && (i + 3) == length)
32102     {
32103       addr = plus_constant (Pmode, dst, i - 1);
32104       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32105       /* We are shifting one byte back, set the alignment accordingly.  */
32106       if ((align & 1) == 0)
32107 	set_mem_align (mem, BITS_PER_UNIT);
32108 
32109       /* Most likely this is an unaligned access, and we can't tell at
32110 	 compilation time.  */
32111       emit_insn (gen_unaligned_storesi (mem, reg));
32112       return true;
32113     }
32114 
32115   /* Handle half word leftover.  */
32116   if (i + 2 <= length)
32117     {
32118       reg = gen_lowpart (HImode, val_reg);
32119       addr = plus_constant (Pmode, dst, i);
32120       mem = adjust_automodify_address (dstbase, HImode, addr, i);
32121       if ((align & 1) == 0)
32122 	emit_move_insn (mem, reg);
32123       else
32124 	emit_insn (gen_unaligned_storehi (mem, reg));
32125 
32126       i += 2;
32127     }
32128 
32129   /* Handle single byte leftover.  */
32130   if (i + 1 == length)
32131     {
32132       reg = gen_lowpart (QImode, val_reg);
32133       addr = plus_constant (Pmode, dst, i);
32134       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32135       emit_move_insn (mem, reg);
32136     }
32137 
32138   return true;
32139 }
32140 
32141 /* Set a block of memory using vectorization instructions for both
32142    aligned and unaligned cases.  We fill the first LENGTH bytes of
32143    the memory area starting from DSTBASE with byte constant VALUE.
32144    ALIGN is the alignment requirement of memory.  */
32145 static bool
arm_block_set_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)32146 arm_block_set_vect (rtx dstbase,
32147 		    unsigned HOST_WIDE_INT length,
32148 		    unsigned HOST_WIDE_INT value,
32149 		    unsigned HOST_WIDE_INT align)
32150 {
32151   /* Check whether we need to use unaligned store instruction.  */
32152   if (((align & 3) != 0 || (length & 3) != 0)
32153       /* Check whether unaligned store instruction is available.  */
32154       && (!unaligned_access || BYTES_BIG_ENDIAN))
32155     return false;
32156 
32157   if ((align & 3) == 0)
32158     return arm_block_set_aligned_vect (dstbase, length, value, align);
32159   else
32160     return arm_block_set_unaligned_vect (dstbase, length, value, align);
32161 }
32162 
32163 /* Expand string store operation.  Firstly we try to do that by using
32164    vectorization instructions, then try with ARM unaligned access and
32165    double-word store if profitable.  OPERANDS[0] is the destination,
32166    OPERANDS[1] is the number of bytes, operands[2] is the value to
32167    initialize the memory, OPERANDS[3] is the known alignment of the
32168    destination.  */
32169 bool
arm_gen_setmem(rtx * operands)32170 arm_gen_setmem (rtx *operands)
32171 {
32172   rtx dstbase = operands[0];
32173   unsigned HOST_WIDE_INT length;
32174   unsigned HOST_WIDE_INT value;
32175   unsigned HOST_WIDE_INT align;
32176 
32177   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32178     return false;
32179 
32180   length = UINTVAL (operands[1]);
32181   if (length > 64)
32182     return false;
32183 
32184   value = (UINTVAL (operands[2]) & 0xFF);
32185   align = UINTVAL (operands[3]);
32186   if (TARGET_NEON && length >= 8
32187       && current_tune->string_ops_prefer_neon
32188       && arm_block_set_vect (dstbase, length, value, align))
32189     return true;
32190 
32191   if (!unaligned_access && (align & 3) != 0)
32192     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32193 
32194   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32195 }
32196 
32197 
32198 static bool
arm_macro_fusion_p(void)32199 arm_macro_fusion_p (void)
32200 {
32201   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32202 }
32203 
32204 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32205    for MOVW / MOVT macro fusion.  */
32206 
32207 static bool
arm_sets_movw_movt_fusible_p(rtx prev_set,rtx curr_set)32208 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32209 {
32210   /* We are trying to fuse
32211      movw imm / movt imm
32212     instructions as a group that gets scheduled together.  */
32213 
32214   rtx set_dest = SET_DEST (curr_set);
32215 
32216   if (GET_MODE (set_dest) != SImode)
32217     return false;
32218 
32219   /* We are trying to match:
32220      prev (movw)  == (set (reg r0) (const_int imm16))
32221      curr (movt) == (set (zero_extract (reg r0)
32222 					(const_int 16)
32223 					(const_int 16))
32224 			  (const_int imm16_1))
32225      or
32226      prev (movw) == (set (reg r1)
32227 			  (high (symbol_ref ("SYM"))))
32228     curr (movt) == (set (reg r0)
32229 			(lo_sum (reg r1)
32230 				(symbol_ref ("SYM"))))  */
32231 
32232     if (GET_CODE (set_dest) == ZERO_EXTRACT)
32233       {
32234 	if (CONST_INT_P (SET_SRC (curr_set))
32235 	    && CONST_INT_P (SET_SRC (prev_set))
32236 	    && REG_P (XEXP (set_dest, 0))
32237 	    && REG_P (SET_DEST (prev_set))
32238 	    && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32239 	  return true;
32240 
32241       }
32242     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32243 	     && REG_P (SET_DEST (curr_set))
32244 	     && REG_P (SET_DEST (prev_set))
32245 	     && GET_CODE (SET_SRC (prev_set)) == HIGH
32246 	     && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32247       return true;
32248 
32249   return false;
32250 }
32251 
32252 static bool
aarch_macro_fusion_pair_p(rtx_insn * prev,rtx_insn * curr)32253 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
32254 {
32255   rtx prev_set = single_set (prev);
32256   rtx curr_set = single_set (curr);
32257 
32258   if (!prev_set
32259       || !curr_set)
32260     return false;
32261 
32262   if (any_condjump_p (curr))
32263     return false;
32264 
32265   if (!arm_macro_fusion_p ())
32266     return false;
32267 
32268   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
32269       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
32270     return true;
32271 
32272   return false;
32273 }
32274 
32275 /* Return true iff the instruction fusion described by OP is enabled.  */
32276 bool
arm_fusion_enabled_p(tune_params::fuse_ops op)32277 arm_fusion_enabled_p (tune_params::fuse_ops op)
32278 {
32279   return current_tune->fusible_ops & op;
32280 }
32281 
32282 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
32283    scheduled for speculative execution.  Reject the long-running division
32284    and square-root instructions.  */
32285 
32286 static bool
arm_sched_can_speculate_insn(rtx_insn * insn)32287 arm_sched_can_speculate_insn (rtx_insn *insn)
32288 {
32289   switch (get_attr_type (insn))
32290     {
32291       case TYPE_SDIV:
32292       case TYPE_UDIV:
32293       case TYPE_FDIVS:
32294       case TYPE_FDIVD:
32295       case TYPE_FSQRTS:
32296       case TYPE_FSQRTD:
32297       case TYPE_NEON_FP_SQRT_S:
32298       case TYPE_NEON_FP_SQRT_D:
32299       case TYPE_NEON_FP_SQRT_S_Q:
32300       case TYPE_NEON_FP_SQRT_D_Q:
32301       case TYPE_NEON_FP_DIV_S:
32302       case TYPE_NEON_FP_DIV_D:
32303       case TYPE_NEON_FP_DIV_S_Q:
32304       case TYPE_NEON_FP_DIV_D_Q:
32305 	return false;
32306       default:
32307 	return true;
32308     }
32309 }
32310 
32311 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
32312 
32313 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset(void)32314 arm_asan_shadow_offset (void)
32315 {
32316   return HOST_WIDE_INT_1U << 29;
32317 }
32318 
32319 
32320 /* This is a temporary fix for PR60655.  Ideally we need
32321    to handle most of these cases in the generic part but
32322    currently we reject minus (..) (sym_ref).  We try to
32323    ameliorate the case with minus (sym_ref1) (sym_ref2)
32324    where they are in the same section.  */
32325 
32326 static bool
arm_const_not_ok_for_debug_p(rtx p)32327 arm_const_not_ok_for_debug_p (rtx p)
32328 {
32329   tree decl_op0 = NULL;
32330   tree decl_op1 = NULL;
32331 
32332   if (GET_CODE (p) == UNSPEC)
32333     return true;
32334   if (GET_CODE (p) == MINUS)
32335     {
32336       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32337 	{
32338 	  decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32339 	  if (decl_op1
32340 	      && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32341 	      && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32342 	    {
32343 	      if ((VAR_P (decl_op1)
32344 		   || TREE_CODE (decl_op1) == CONST_DECL)
32345 		  && (VAR_P (decl_op0)
32346 		      || TREE_CODE (decl_op0) == CONST_DECL))
32347 		return (get_variable_section (decl_op1, false)
32348 			!= get_variable_section (decl_op0, false));
32349 
32350 	      if (TREE_CODE (decl_op1) == LABEL_DECL
32351 		  && TREE_CODE (decl_op0) == LABEL_DECL)
32352 		return (DECL_CONTEXT (decl_op1)
32353 			!= DECL_CONTEXT (decl_op0));
32354 	    }
32355 
32356 	  return true;
32357 	}
32358     }
32359 
32360   return false;
32361 }
32362 
32363 /* return TRUE if x is a reference to a value in a constant pool */
32364 extern bool
arm_is_constant_pool_ref(rtx x)32365 arm_is_constant_pool_ref (rtx x)
32366 {
32367   return (MEM_P (x)
32368 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32369 	  && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32370 }
32371 
32372 /* Remember the last target of arm_set_current_function.  */
32373 static GTY(()) tree arm_previous_fndecl;
32374 
32375 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
32376 
32377 void
save_restore_target_globals(tree new_tree)32378 save_restore_target_globals (tree new_tree)
32379 {
32380   /* If we have a previous state, use it.  */
32381   if (TREE_TARGET_GLOBALS (new_tree))
32382     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32383   else if (new_tree == target_option_default_node)
32384     restore_target_globals (&default_target_globals);
32385   else
32386     {
32387       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
32388       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
32389     }
32390 
32391   arm_option_params_internal ();
32392 }
32393 
32394 /* Invalidate arm_previous_fndecl.  */
32395 
32396 void
arm_reset_previous_fndecl(void)32397 arm_reset_previous_fndecl (void)
32398 {
32399   arm_previous_fndecl = NULL_TREE;
32400 }
32401 
32402 /* Establish appropriate back-end context for processing the function
32403    FNDECL.  The argument might be NULL to indicate processing at top
32404    level, outside of any function scope.  */
32405 
32406 static void
arm_set_current_function(tree fndecl)32407 arm_set_current_function (tree fndecl)
32408 {
32409   if (!fndecl || fndecl == arm_previous_fndecl)
32410     return;
32411 
32412   tree old_tree = (arm_previous_fndecl
32413 		   ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
32414 		   : NULL_TREE);
32415 
32416   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32417 
32418   /* If current function has no attributes but previous one did,
32419      use the default node.  */
32420   if (! new_tree && old_tree)
32421     new_tree = target_option_default_node;
32422 
32423   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
32424      the default have been handled by save_restore_target_globals from
32425      arm_pragma_target_parse.  */
32426   if (old_tree == new_tree)
32427     return;
32428 
32429   arm_previous_fndecl = fndecl;
32430 
32431   /* First set the target options.  */
32432   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
32433 
32434   save_restore_target_globals (new_tree);
32435 }
32436 
32437 /* Implement TARGET_OPTION_PRINT.  */
32438 
32439 static void
arm_option_print(FILE * file,int indent,struct cl_target_option * ptr)32440 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
32441 {
32442   int flags = ptr->x_target_flags;
32443   const char *fpu_name;
32444 
32445   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
32446 	      ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
32447 
32448   fprintf (file, "%*sselected isa %s\n", indent, "",
32449 	   TARGET_THUMB2_P (flags) ? "thumb2" :
32450 	   TARGET_THUMB_P (flags) ? "thumb1" :
32451 	   "arm");
32452 
32453   if (ptr->x_arm_arch_string)
32454     fprintf (file, "%*sselected architecture %s\n", indent, "",
32455 	     ptr->x_arm_arch_string);
32456 
32457   if (ptr->x_arm_cpu_string)
32458     fprintf (file, "%*sselected CPU %s\n", indent, "",
32459 	     ptr->x_arm_cpu_string);
32460 
32461   if (ptr->x_arm_tune_string)
32462     fprintf (file, "%*sselected tune %s\n", indent, "",
32463 	     ptr->x_arm_tune_string);
32464 
32465   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
32466 }
32467 
32468 /* Hook to determine if one function can safely inline another.  */
32469 
32470 static bool
arm_can_inline_p(tree caller,tree callee)32471 arm_can_inline_p (tree caller, tree callee)
32472 {
32473   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32474   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32475   bool can_inline = true;
32476 
32477   struct cl_target_option *caller_opts
32478 	= TREE_TARGET_OPTION (caller_tree ? caller_tree
32479 					   : target_option_default_node);
32480 
32481   struct cl_target_option *callee_opts
32482 	= TREE_TARGET_OPTION (callee_tree ? callee_tree
32483 					   : target_option_default_node);
32484 
32485   if (callee_opts == caller_opts)
32486     return true;
32487 
32488   /* Callee's ISA features should be a subset of the caller's.  */
32489   struct arm_build_target caller_target;
32490   struct arm_build_target callee_target;
32491   caller_target.isa = sbitmap_alloc (isa_num_bits);
32492   callee_target.isa = sbitmap_alloc (isa_num_bits);
32493 
32494   arm_configure_build_target (&caller_target, caller_opts, false);
32495   arm_configure_build_target (&callee_target, callee_opts, false);
32496   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
32497     can_inline = false;
32498 
32499   sbitmap_free (caller_target.isa);
32500   sbitmap_free (callee_target.isa);
32501 
32502   /* OK to inline between different modes.
32503      Function with mode specific instructions, e.g using asm,
32504      must be explicitly protected with noinline.  */
32505   return can_inline;
32506 }
32507 
32508 /* Hook to fix function's alignment affected by target attribute.  */
32509 
32510 static void
arm_relayout_function(tree fndecl)32511 arm_relayout_function (tree fndecl)
32512 {
32513   if (DECL_USER_ALIGN (fndecl))
32514     return;
32515 
32516   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32517 
32518   if (!callee_tree)
32519     callee_tree = target_option_default_node;
32520 
32521   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
32522   SET_DECL_ALIGN
32523     (fndecl,
32524      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
32525 }
32526 
32527 /* Inner function to process the attribute((target(...))), take an argument and
32528    set the current options from the argument.  If we have a list, recursively
32529    go over the list.  */
32530 
32531 static bool
arm_valid_target_attribute_rec(tree args,struct gcc_options * opts)32532 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
32533 {
32534   if (TREE_CODE (args) == TREE_LIST)
32535     {
32536       bool ret = true;
32537 
32538       for (; args; args = TREE_CHAIN (args))
32539 	if (TREE_VALUE (args)
32540 	    && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
32541 	  ret = false;
32542       return ret;
32543     }
32544 
32545   else if (TREE_CODE (args) != STRING_CST)
32546     {
32547       error ("attribute %<target%> argument not a string");
32548       return false;
32549     }
32550 
32551   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
32552   char *q;
32553 
32554   while ((q = strtok (argstr, ",")) != NULL)
32555     {
32556       argstr = NULL;
32557       if (!strcmp (q, "thumb"))
32558 	{
32559 	  opts->x_target_flags |= MASK_THUMB;
32560 	  if (TARGET_FDPIC && !arm_arch_thumb2)
32561 	    sorry ("FDPIC mode is not supported in Thumb-1 mode");
32562 	}
32563 
32564       else if (!strcmp (q, "arm"))
32565 	opts->x_target_flags &= ~MASK_THUMB;
32566 
32567       else if (!strcmp (q, "general-regs-only"))
32568 	opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
32569 
32570       else if (!strncmp (q, "fpu=", 4))
32571 	{
32572 	  int fpu_index;
32573 	  if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
32574 				       &fpu_index, CL_TARGET))
32575 	    {
32576 	      error ("invalid fpu for target attribute or pragma %qs", q);
32577 	      return false;
32578 	    }
32579 	  if (fpu_index == TARGET_FPU_auto)
32580 	    {
32581 	      /* This doesn't really make sense until we support
32582 		 general dynamic selection of the architecture and all
32583 		 sub-features.  */
32584 	      sorry ("auto fpu selection not currently permitted here");
32585 	      return false;
32586 	    }
32587 	  opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
32588 	}
32589       else if (!strncmp (q, "arch=", 5))
32590 	{
32591 	  char *arch = q + 5;
32592 	  const arch_option *arm_selected_arch
32593 	     = arm_parse_arch_option_name (all_architectures, "arch", arch);
32594 
32595 	  if (!arm_selected_arch)
32596 	    {
32597 	      error ("invalid architecture for target attribute or pragma %qs",
32598 		     q);
32599 	      return false;
32600 	    }
32601 
32602 	  opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
32603 	}
32604       else if (q[0] == '+')
32605 	{
32606 	  opts->x_arm_arch_string
32607 	    = xasprintf ("%s%s", opts->x_arm_arch_string, q);
32608 	}
32609       else
32610 	{
32611 	  error ("unknown target attribute or pragma %qs", q);
32612 	  return false;
32613 	}
32614     }
32615 
32616   return true;
32617 }
32618 
32619 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
32620 
32621 tree
arm_valid_target_attribute_tree(tree args,struct gcc_options * opts,struct gcc_options * opts_set)32622 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
32623 				 struct gcc_options *opts_set)
32624 {
32625   struct cl_target_option cl_opts;
32626 
32627   if (!arm_valid_target_attribute_rec (args, opts))
32628     return NULL_TREE;
32629 
32630   cl_target_option_save (&cl_opts, opts);
32631   arm_configure_build_target (&arm_active_target, &cl_opts, false);
32632   arm_option_check_internal (opts);
32633   /* Do any overrides, such as global options arch=xxx.
32634      We do this since arm_active_target was overridden.  */
32635   arm_option_reconfigure_globals ();
32636   arm_options_perform_arch_sanity_checks ();
32637   arm_option_override_internal (opts, opts_set);
32638 
32639   return build_target_option_node (opts);
32640 }
32641 
32642 static void
add_attribute(const char * mode,tree * attributes)32643 add_attribute  (const char * mode, tree *attributes)
32644 {
32645   size_t len = strlen (mode);
32646   tree value = build_string (len, mode);
32647 
32648   TREE_TYPE (value) = build_array_type (char_type_node,
32649 					build_index_type (size_int (len)));
32650 
32651   *attributes = tree_cons (get_identifier ("target"),
32652 			   build_tree_list (NULL_TREE, value),
32653 			   *attributes);
32654 }
32655 
32656 /* For testing. Insert thumb or arm modes alternatively on functions.  */
32657 
32658 static void
arm_insert_attributes(tree fndecl,tree * attributes)32659 arm_insert_attributes (tree fndecl, tree * attributes)
32660 {
32661   const char *mode;
32662 
32663   if (! TARGET_FLIP_THUMB)
32664     return;
32665 
32666   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
32667       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
32668    return;
32669 
32670   /* Nested definitions must inherit mode.  */
32671   if (current_function_decl)
32672    {
32673      mode = TARGET_THUMB ? "thumb" : "arm";
32674      add_attribute (mode, attributes);
32675      return;
32676    }
32677 
32678   /* If there is already a setting don't change it.  */
32679   if (lookup_attribute ("target", *attributes) != NULL)
32680     return;
32681 
32682   mode = thumb_flipper ? "thumb" : "arm";
32683   add_attribute (mode, attributes);
32684 
32685   thumb_flipper = !thumb_flipper;
32686 }
32687 
32688 /* Hook to validate attribute((target("string"))).  */
32689 
32690 static bool
arm_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))32691 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
32692 			      tree args, int ARG_UNUSED (flags))
32693 {
32694   bool ret = true;
32695   struct gcc_options func_options;
32696   tree cur_tree, new_optimize;
32697   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32698 
32699   /* Get the optimization options of the current function.  */
32700   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32701 
32702   /* If the function changed the optimization levels as well as setting target
32703      options, start with the optimizations specified.  */
32704   if (!func_optimize)
32705     func_optimize = optimization_default_node;
32706 
32707   /* Init func_options.  */
32708   memset (&func_options, 0, sizeof (func_options));
32709   init_options_struct (&func_options, NULL);
32710   lang_hooks.init_options_struct (&func_options);
32711 
32712   /* Initialize func_options to the defaults.  */
32713   cl_optimization_restore (&func_options,
32714 			   TREE_OPTIMIZATION (func_optimize));
32715 
32716   cl_target_option_restore (&func_options,
32717 			    TREE_TARGET_OPTION (target_option_default_node));
32718 
32719   /* Set func_options flags with new target mode.  */
32720   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
32721 					      &global_options_set);
32722 
32723   if (cur_tree == NULL_TREE)
32724     ret = false;
32725 
32726   new_optimize = build_optimization_node (&func_options);
32727 
32728   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
32729 
32730   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32731 
32732   return ret;
32733 }
32734 
32735 /* Match an ISA feature bitmap to a named FPU.  We always use the
32736    first entry that exactly matches the feature set, so that we
32737    effectively canonicalize the FPU name for the assembler.  */
32738 static const char*
arm_identify_fpu_from_isa(sbitmap isa)32739 arm_identify_fpu_from_isa (sbitmap isa)
32740 {
32741   auto_sbitmap fpubits (isa_num_bits);
32742   auto_sbitmap cand_fpubits (isa_num_bits);
32743 
32744   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
32745 
32746   /* If there are no ISA feature bits relating to the FPU, we must be
32747      doing soft-float.  */
32748   if (bitmap_empty_p (fpubits))
32749     return "softvfp";
32750 
32751   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
32752     {
32753       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
32754       if (bitmap_equal_p (fpubits, cand_fpubits))
32755 	return all_fpus[i].name;
32756     }
32757   /* We must find an entry, or things have gone wrong.  */
32758   gcc_unreachable ();
32759 }
32760 
32761 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
32762    by the function fndecl.  */
32763 void
arm_declare_function_name(FILE * stream,const char * name,tree decl)32764 arm_declare_function_name (FILE *stream, const char *name, tree decl)
32765 {
32766   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
32767 
32768   struct cl_target_option *targ_options;
32769   if (target_parts)
32770     targ_options = TREE_TARGET_OPTION (target_parts);
32771   else
32772     targ_options = TREE_TARGET_OPTION (target_option_current_node);
32773   gcc_assert (targ_options);
32774 
32775   arm_print_asm_arch_directives (stream, targ_options);
32776 
32777   fprintf (stream, "\t.syntax unified\n");
32778 
32779   if (TARGET_THUMB)
32780     {
32781       if (is_called_in_ARM_mode (decl)
32782 	  || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
32783 	      && cfun->is_thunk))
32784 	fprintf (stream, "\t.code 32\n");
32785       else if (TARGET_THUMB1)
32786 	fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
32787       else
32788 	fprintf (stream, "\t.thumb\n\t.thumb_func\n");
32789     }
32790   else
32791     fprintf (stream, "\t.arm\n");
32792 
32793   if (TARGET_POKE_FUNCTION_NAME)
32794     arm_poke_function_name (stream, (const char *) name);
32795 }
32796 
32797 /* If MEM is in the form of [base+offset], extract the two parts
32798    of address and set to BASE and OFFSET, otherwise return false
32799    after clearing BASE and OFFSET.  */
32800 
32801 static bool
extract_base_offset_in_addr(rtx mem,rtx * base,rtx * offset)32802 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
32803 {
32804   rtx addr;
32805 
32806   gcc_assert (MEM_P (mem));
32807 
32808   addr = XEXP (mem, 0);
32809 
32810   /* Strip off const from addresses like (const (addr)).  */
32811   if (GET_CODE (addr) == CONST)
32812     addr = XEXP (addr, 0);
32813 
32814   if (GET_CODE (addr) == REG)
32815     {
32816       *base = addr;
32817       *offset = const0_rtx;
32818       return true;
32819     }
32820 
32821   if (GET_CODE (addr) == PLUS
32822       && GET_CODE (XEXP (addr, 0)) == REG
32823       && CONST_INT_P (XEXP (addr, 1)))
32824     {
32825       *base = XEXP (addr, 0);
32826       *offset = XEXP (addr, 1);
32827       return true;
32828     }
32829 
32830   *base = NULL_RTX;
32831   *offset = NULL_RTX;
32832 
32833   return false;
32834 }
32835 
32836 /* If INSN is a load or store of address in the form of [base+offset],
32837    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
32838    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
32839    otherwise return FALSE.  */
32840 
32841 static bool
fusion_load_store(rtx_insn * insn,rtx * base,rtx * offset,bool * is_load)32842 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
32843 {
32844   rtx x, dest, src;
32845 
32846   gcc_assert (INSN_P (insn));
32847   x = PATTERN (insn);
32848   if (GET_CODE (x) != SET)
32849     return false;
32850 
32851   src = SET_SRC (x);
32852   dest = SET_DEST (x);
32853   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
32854     {
32855       *is_load = false;
32856       extract_base_offset_in_addr (dest, base, offset);
32857     }
32858   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
32859     {
32860       *is_load = true;
32861       extract_base_offset_in_addr (src, base, offset);
32862     }
32863   else
32864     return false;
32865 
32866   return (*base != NULL_RTX && *offset != NULL_RTX);
32867 }
32868 
32869 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
32870 
32871    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
32872    and PRI are only calculated for these instructions.  For other instruction,
32873    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
32874    instruction fusion can be supported by returning different priorities.
32875 
32876    It's important that irrelevant instructions get the largest FUSION_PRI.  */
32877 
32878 static void
arm_sched_fusion_priority(rtx_insn * insn,int max_pri,int * fusion_pri,int * pri)32879 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
32880 			   int *fusion_pri, int *pri)
32881 {
32882   int tmp, off_val;
32883   bool is_load;
32884   rtx base, offset;
32885 
32886   gcc_assert (INSN_P (insn));
32887 
32888   tmp = max_pri - 1;
32889   if (!fusion_load_store (insn, &base, &offset, &is_load))
32890     {
32891       *pri = tmp;
32892       *fusion_pri = tmp;
32893       return;
32894     }
32895 
32896   /* Load goes first.  */
32897   if (is_load)
32898     *fusion_pri = tmp - 1;
32899   else
32900     *fusion_pri = tmp - 2;
32901 
32902   tmp /= 2;
32903 
32904   /* INSN with smaller base register goes first.  */
32905   tmp -= ((REGNO (base) & 0xff) << 20);
32906 
32907   /* INSN with smaller offset goes first.  */
32908   off_val = (int)(INTVAL (offset));
32909   if (off_val >= 0)
32910     tmp -= (off_val & 0xfffff);
32911   else
32912     tmp += ((- off_val) & 0xfffff);
32913 
32914   *pri = tmp;
32915   return;
32916 }
32917 
32918 
32919 /* Construct and return a PARALLEL RTX vector with elements numbering the
32920    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
32921    the vector - from the perspective of the architecture.  This does not
32922    line up with GCC's perspective on lane numbers, so we end up with
32923    different masks depending on our target endian-ness.  The diagram
32924    below may help.  We must draw the distinction when building masks
32925    which select one half of the vector.  An instruction selecting
32926    architectural low-lanes for a big-endian target, must be described using
32927    a mask selecting GCC high-lanes.
32928 
32929                  Big-Endian             Little-Endian
32930 
32931 GCC             0   1   2   3           3   2   1   0
32932               | x | x | x | x |       | x | x | x | x |
32933 Architecture    3   2   1   0           3   2   1   0
32934 
32935 Low Mask:         { 2, 3 }                { 0, 1 }
32936 High Mask:        { 0, 1 }                { 2, 3 }
32937 */
32938 
32939 rtx
arm_simd_vect_par_cnst_half(machine_mode mode,bool high)32940 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
32941 {
32942   int nunits = GET_MODE_NUNITS (mode);
32943   rtvec v = rtvec_alloc (nunits / 2);
32944   int high_base = nunits / 2;
32945   int low_base = 0;
32946   int base;
32947   rtx t1;
32948   int i;
32949 
32950   if (BYTES_BIG_ENDIAN)
32951     base = high ? low_base : high_base;
32952   else
32953     base = high ? high_base : low_base;
32954 
32955   for (i = 0; i < nunits / 2; i++)
32956     RTVEC_ELT (v, i) = GEN_INT (base + i);
32957 
32958   t1 = gen_rtx_PARALLEL (mode, v);
32959   return t1;
32960 }
32961 
32962 /* Check OP for validity as a PARALLEL RTX vector with elements
32963    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
32964    from the perspective of the architecture.  See the diagram above
32965    arm_simd_vect_par_cnst_half_p for more details.  */
32966 
32967 bool
arm_simd_check_vect_par_cnst_half_p(rtx op,machine_mode mode,bool high)32968 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
32969 				       bool high)
32970 {
32971   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
32972   HOST_WIDE_INT count_op = XVECLEN (op, 0);
32973   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
32974   int i = 0;
32975 
32976   if (!VECTOR_MODE_P (mode))
32977     return false;
32978 
32979   if (count_op != count_ideal)
32980     return false;
32981 
32982   for (i = 0; i < count_ideal; i++)
32983     {
32984       rtx elt_op = XVECEXP (op, 0, i);
32985       rtx elt_ideal = XVECEXP (ideal, 0, i);
32986 
32987       if (!CONST_INT_P (elt_op)
32988 	  || INTVAL (elt_ideal) != INTVAL (elt_op))
32989 	return false;
32990     }
32991   return true;
32992 }
32993 
32994 /* Can output mi_thunk for all cases except for non-zero vcall_offset
32995    in Thumb1.  */
32996 static bool
arm_can_output_mi_thunk(const_tree,HOST_WIDE_INT,HOST_WIDE_INT vcall_offset,const_tree)32997 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
32998 			 const_tree)
32999 {
33000   /* For now, we punt and not handle this for TARGET_THUMB1.  */
33001   if (vcall_offset && TARGET_THUMB1)
33002     return false;
33003 
33004   /* Otherwise ok.  */
33005   return true;
33006 }
33007 
33008 /* Generate RTL for a conditional branch with rtx comparison CODE in
33009    mode CC_MODE. The destination of the unlikely conditional branch
33010    is LABEL_REF.  */
33011 
33012 void
arm_gen_unlikely_cbranch(enum rtx_code code,machine_mode cc_mode,rtx label_ref)33013 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33014 			  rtx label_ref)
33015 {
33016   rtx x;
33017   x = gen_rtx_fmt_ee (code, VOIDmode,
33018 		      gen_rtx_REG (cc_mode, CC_REGNUM),
33019 		      const0_rtx);
33020 
33021   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33022 			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
33023 			    pc_rtx);
33024   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33025 }
33026 
33027 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33028 
33029    For pure-code sections there is no letter code for this attribute, so
33030    output all the section flags numerically when this is needed.  */
33031 
33032 static bool
arm_asm_elf_flags_numeric(unsigned int flags,unsigned int * num)33033 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33034 {
33035 
33036   if (flags & SECTION_ARM_PURECODE)
33037     {
33038       *num = 0x20000000;
33039 
33040       if (!(flags & SECTION_DEBUG))
33041 	*num |= 0x2;
33042       if (flags & SECTION_EXCLUDE)
33043 	*num |= 0x80000000;
33044       if (flags & SECTION_WRITE)
33045 	*num |= 0x1;
33046       if (flags & SECTION_CODE)
33047 	*num |= 0x4;
33048       if (flags & SECTION_MERGE)
33049 	*num |= 0x10;
33050       if (flags & SECTION_STRINGS)
33051 	*num |= 0x20;
33052       if (flags & SECTION_TLS)
33053 	*num |= 0x400;
33054       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33055 	*num |= 0x200;
33056 
33057 	return true;
33058     }
33059 
33060   return false;
33061 }
33062 
33063 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33064 
33065    If pure-code is passed as an option, make sure all functions are in
33066    sections that have the SHF_ARM_PURECODE attribute.  */
33067 
33068 static section *
arm_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)33069 arm_function_section (tree decl, enum node_frequency freq,
33070 		      bool startup, bool exit)
33071 {
33072   const char * section_name;
33073   section * sec;
33074 
33075   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33076     return default_function_section (decl, freq, startup, exit);
33077 
33078   if (!target_pure_code)
33079     return default_function_section (decl, freq, startup, exit);
33080 
33081 
33082   section_name = DECL_SECTION_NAME (decl);
33083 
33084   /* If a function is not in a named section then it falls under the 'default'
33085      text section, also known as '.text'.  We can preserve previous behavior as
33086      the default text section already has the SHF_ARM_PURECODE section
33087      attribute.  */
33088   if (!section_name)
33089     {
33090       section *default_sec = default_function_section (decl, freq, startup,
33091 						       exit);
33092 
33093       /* If default_sec is not null, then it must be a special section like for
33094 	 example .text.startup.  We set the pure-code attribute and return the
33095 	 same section to preserve existing behavior.  */
33096       if (default_sec)
33097 	  default_sec->common.flags |= SECTION_ARM_PURECODE;
33098       return default_sec;
33099     }
33100 
33101   /* Otherwise look whether a section has already been created with
33102      'section_name'.  */
33103   sec = get_named_section (decl, section_name, 0);
33104   if (!sec)
33105     /* If that is not the case passing NULL as the section's name to
33106        'get_named_section' will create a section with the declaration's
33107        section name.  */
33108     sec = get_named_section (decl, NULL, 0);
33109 
33110   /* Set the SHF_ARM_PURECODE attribute.  */
33111   sec->common.flags |= SECTION_ARM_PURECODE;
33112 
33113   return sec;
33114 }
33115 
33116 /* Implements the TARGET_SECTION_FLAGS hook.
33117 
33118    If DECL is a function declaration and pure-code is passed as an option
33119    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
33120    section's name and RELOC indicates whether the declarations initializer may
33121    contain runtime relocations.  */
33122 
33123 static unsigned int
arm_elf_section_type_flags(tree decl,const char * name,int reloc)33124 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33125 {
33126   unsigned int flags = default_section_type_flags (decl, name, reloc);
33127 
33128   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33129     flags |= SECTION_ARM_PURECODE;
33130 
33131   return flags;
33132 }
33133 
33134 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
33135 
33136 static void
arm_expand_divmod_libfunc(rtx libfunc,machine_mode mode,rtx op0,rtx op1,rtx * quot_p,rtx * rem_p)33137 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33138 			   rtx op0, rtx op1,
33139 			   rtx *quot_p, rtx *rem_p)
33140 {
33141   if (mode == SImode)
33142     gcc_assert (!TARGET_IDIV);
33143 
33144   scalar_int_mode libval_mode
33145     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33146 
33147   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33148 					libval_mode,
33149 					op0, GET_MODE (op0),
33150 					op1, GET_MODE (op1));
33151 
33152   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33153   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33154 				       GET_MODE_SIZE (mode));
33155 
33156   gcc_assert (quotient);
33157   gcc_assert (remainder);
33158 
33159   *quot_p = quotient;
33160   *rem_p = remainder;
33161 }
33162 
33163 /*  This function checks for the availability of the coprocessor builtin passed
33164     in BUILTIN for the current target.  Returns true if it is available and
33165     false otherwise.  If a BUILTIN is passed for which this function has not
33166     been implemented it will cause an exception.  */
33167 
33168 bool
arm_coproc_builtin_available(enum unspecv builtin)33169 arm_coproc_builtin_available (enum unspecv builtin)
33170 {
33171   /* None of these builtins are available in Thumb mode if the target only
33172      supports Thumb-1.  */
33173   if (TARGET_THUMB1)
33174     return false;
33175 
33176   switch (builtin)
33177     {
33178       case VUNSPEC_CDP:
33179       case VUNSPEC_LDC:
33180       case VUNSPEC_LDCL:
33181       case VUNSPEC_STC:
33182       case VUNSPEC_STCL:
33183       case VUNSPEC_MCR:
33184       case VUNSPEC_MRC:
33185 	if (arm_arch4)
33186 	  return true;
33187 	break;
33188       case VUNSPEC_CDP2:
33189       case VUNSPEC_LDC2:
33190       case VUNSPEC_LDC2L:
33191       case VUNSPEC_STC2:
33192       case VUNSPEC_STC2L:
33193       case VUNSPEC_MCR2:
33194       case VUNSPEC_MRC2:
33195 	/* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33196 	   ARMv8-{A,M}.  */
33197 	if (arm_arch5t)
33198 	  return true;
33199 	break;
33200       case VUNSPEC_MCRR:
33201       case VUNSPEC_MRRC:
33202 	/* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33203 	   ARMv8-{A,M}.  */
33204 	if (arm_arch6 || arm_arch5te)
33205 	  return true;
33206 	break;
33207       case VUNSPEC_MCRR2:
33208       case VUNSPEC_MRRC2:
33209 	if (arm_arch6)
33210 	  return true;
33211 	break;
33212       default:
33213 	gcc_unreachable ();
33214     }
33215   return false;
33216 }
33217 
33218 /* This function returns true if OP is a valid memory operand for the ldc and
33219    stc coprocessor instructions and false otherwise.  */
33220 
33221 bool
arm_coproc_ldc_stc_legitimate_address(rtx op)33222 arm_coproc_ldc_stc_legitimate_address (rtx op)
33223 {
33224   HOST_WIDE_INT range;
33225   /* Has to be a memory operand.  */
33226   if (!MEM_P (op))
33227     return false;
33228 
33229   op = XEXP (op, 0);
33230 
33231   /* We accept registers.  */
33232   if (REG_P (op))
33233     return true;
33234 
33235   switch GET_CODE (op)
33236     {
33237       case PLUS:
33238 	{
33239 	  /* Or registers with an offset.  */
33240 	  if (!REG_P (XEXP (op, 0)))
33241 	    return false;
33242 
33243 	  op = XEXP (op, 1);
33244 
33245 	  /* The offset must be an immediate though.  */
33246 	  if (!CONST_INT_P (op))
33247 	    return false;
33248 
33249 	  range = INTVAL (op);
33250 
33251 	  /* Within the range of [-1020,1020].  */
33252 	  if (!IN_RANGE (range, -1020, 1020))
33253 	    return false;
33254 
33255 	  /* And a multiple of 4.  */
33256 	  return (range % 4) == 0;
33257 	}
33258       case PRE_INC:
33259       case POST_INC:
33260       case PRE_DEC:
33261       case POST_DEC:
33262 	return REG_P (XEXP (op, 0));
33263       default:
33264 	gcc_unreachable ();
33265     }
33266   return false;
33267 }
33268 
33269 /* Return the diagnostic message string if conversion from FROMTYPE to
33270    TOTYPE is not allowed, NULL otherwise.  */
33271 
33272 static const char *
arm_invalid_conversion(const_tree fromtype,const_tree totype)33273 arm_invalid_conversion (const_tree fromtype, const_tree totype)
33274 {
33275   if (element_mode (fromtype) != element_mode (totype))
33276     {
33277       /* Do no allow conversions to/from BFmode scalar types.  */
33278       if (TYPE_MODE (fromtype) == BFmode)
33279 	return N_("invalid conversion from type %<bfloat16_t%>");
33280       if (TYPE_MODE (totype) == BFmode)
33281 	return N_("invalid conversion to type %<bfloat16_t%>");
33282     }
33283 
33284   /* Conversion allowed.  */
33285   return NULL;
33286 }
33287 
33288 /* Return the diagnostic message string if the unary operation OP is
33289    not permitted on TYPE, NULL otherwise.  */
33290 
33291 static const char *
arm_invalid_unary_op(int op,const_tree type)33292 arm_invalid_unary_op (int op, const_tree type)
33293 {
33294   /* Reject all single-operand operations on BFmode except for &.  */
33295   if (element_mode (type) == BFmode && op != ADDR_EXPR)
33296     return N_("operation not permitted on type %<bfloat16_t%>");
33297 
33298   /* Operation allowed.  */
33299   return NULL;
33300 }
33301 
33302 /* Return the diagnostic message string if the binary operation OP is
33303    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
33304 
33305 static const char *
arm_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)33306 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
33307 			   const_tree type2)
33308 {
33309   /* Reject all 2-operand operations on BFmode.  */
33310   if (element_mode (type1) == BFmode
33311       || element_mode (type2) == BFmode)
33312     return N_("operation not permitted on type %<bfloat16_t%>");
33313 
33314   /* Operation allowed.  */
33315   return NULL;
33316 }
33317 
33318 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
33319 
33320    In VFPv1, VFP registers could only be accessed in the mode they were
33321    set, so subregs would be invalid there.  However, we don't support
33322    VFPv1 at the moment, and the restriction was lifted in VFPv2.
33323 
33324    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
33325    VFP registers in little-endian order.  We can't describe that accurately to
33326    GCC, so avoid taking subregs of such values.
33327 
33328    The only exception is going from a 128-bit to a 64-bit type.  In that
33329    case the data layout happens to be consistent for big-endian, so we
33330    explicitly allow that case.  */
33331 
33332 static bool
arm_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)33333 arm_can_change_mode_class (machine_mode from, machine_mode to,
33334 			   reg_class_t rclass)
33335 {
33336   if (TARGET_BIG_END
33337       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
33338       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
33339 	  || GET_MODE_SIZE (to) > UNITS_PER_WORD)
33340       && reg_classes_intersect_p (VFP_REGS, rclass))
33341     return false;
33342   return true;
33343 }
33344 
33345 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
33346    strcpy from constants will be faster.  */
33347 
33348 static HOST_WIDE_INT
arm_constant_alignment(const_tree exp,HOST_WIDE_INT align)33349 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
33350 {
33351   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
33352   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
33353     return MAX (align, BITS_PER_WORD * factor);
33354   return align;
33355 }
33356 
33357 /* Emit a speculation barrier on target architectures that do not have
33358    DSB/ISB directly.  Such systems probably don't need a barrier
33359    themselves, but if the code is ever run on a later architecture, it
33360    might become a problem.  */
33361 void
arm_emit_speculation_barrier_function()33362 arm_emit_speculation_barrier_function ()
33363 {
33364   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
33365 }
33366 
33367 /* Have we recorded an explicit access to the Q bit of APSR?.  */
33368 bool
arm_q_bit_access(void)33369 arm_q_bit_access (void)
33370 {
33371   if (cfun && cfun->decl)
33372     return lookup_attribute ("acle qbit",
33373 			     DECL_ATTRIBUTES (cfun->decl));
33374   return true;
33375 }
33376 
33377 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
33378 bool
arm_ge_bits_access(void)33379 arm_ge_bits_access (void)
33380 {
33381   if (cfun && cfun->decl)
33382     return lookup_attribute ("acle gebits",
33383 			     DECL_ATTRIBUTES (cfun->decl));
33384   return true;
33385 }
33386 
33387 #if CHECKING_P
33388 namespace selftest {
33389 
33390 /* Scan the static data tables generated by parsecpu.awk looking for
33391    potential issues with the data.  We primarily check for
33392    inconsistencies in the option extensions at present (extensions
33393    that duplicate others but aren't marked as aliases).  Furthermore,
33394    for correct canonicalization later options must never be a subset
33395    of an earlier option.  Any extension should also only specify other
33396    feature bits and never an architecture bit.  The architecture is inferred
33397    from the declaration of the extension.  */
33398 static void
arm_test_cpu_arch_data(void)33399 arm_test_cpu_arch_data (void)
33400 {
33401   const arch_option *arch;
33402   const cpu_option *cpu;
33403   auto_sbitmap target_isa (isa_num_bits);
33404   auto_sbitmap isa1 (isa_num_bits);
33405   auto_sbitmap isa2 (isa_num_bits);
33406 
33407   for (arch = all_architectures; arch->common.name != NULL; ++arch)
33408     {
33409       const cpu_arch_extension *ext1, *ext2;
33410 
33411       if (arch->common.extensions == NULL)
33412 	continue;
33413 
33414       arm_initialize_isa (target_isa, arch->common.isa_bits);
33415 
33416       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
33417 	{
33418 	  if (ext1->alias)
33419 	    continue;
33420 
33421 	  arm_initialize_isa (isa1, ext1->isa_bits);
33422 	  for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33423 	    {
33424 	      if (ext2->alias || ext1->remove != ext2->remove)
33425 		continue;
33426 
33427 	      arm_initialize_isa (isa2, ext2->isa_bits);
33428 	      /* If the option is a subset of the parent option, it doesn't
33429 		 add anything and so isn't useful.  */
33430 	      ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33431 
33432 	      /* If the extension specifies any architectural bits then
33433 		 disallow it.  Extensions should only specify feature bits.  */
33434 	      ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33435 	    }
33436 	}
33437     }
33438 
33439   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
33440     {
33441       const cpu_arch_extension *ext1, *ext2;
33442 
33443       if (cpu->common.extensions == NULL)
33444 	continue;
33445 
33446       arm_initialize_isa (target_isa, arch->common.isa_bits);
33447 
33448       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
33449 	{
33450 	  if (ext1->alias)
33451 	    continue;
33452 
33453 	  arm_initialize_isa (isa1, ext1->isa_bits);
33454 	  for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33455 	    {
33456 	      if (ext2->alias || ext1->remove != ext2->remove)
33457 		continue;
33458 
33459 	      arm_initialize_isa (isa2, ext2->isa_bits);
33460 	      /* If the option is a subset of the parent option, it doesn't
33461 		 add anything and so isn't useful.  */
33462 	      ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33463 
33464 	      /* If the extension specifies any architectural bits then
33465 		 disallow it.  Extensions should only specify feature bits.  */
33466 	      ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33467 	    }
33468 	}
33469     }
33470 }
33471 
33472 /* Scan the static data tables generated by parsecpu.awk looking for
33473    potential issues with the data.  Here we check for consistency between the
33474    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
33475    a feature bit that is not defined by any FPU flag.  */
33476 static void
arm_test_fpu_data(void)33477 arm_test_fpu_data (void)
33478 {
33479   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
33480   auto_sbitmap fpubits (isa_num_bits);
33481   auto_sbitmap tmpset (isa_num_bits);
33482 
33483   static const enum isa_feature fpu_bitlist_internal[]
33484     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
33485   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
33486 
33487   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33488   {
33489     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
33490     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
33491     bitmap_clear (isa_all_fpubits_internal);
33492     bitmap_copy (isa_all_fpubits_internal, tmpset);
33493   }
33494 
33495   if (!bitmap_empty_p (isa_all_fpubits_internal))
33496     {
33497 	fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
33498 			 " group that are not defined by any FPU.\n"
33499 			 "       Check your arm-cpus.in.\n");
33500 	ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
33501     }
33502 }
33503 
33504 static void
arm_run_selftests(void)33505 arm_run_selftests (void)
33506 {
33507   arm_test_cpu_arch_data ();
33508   arm_test_fpu_data ();
33509 }
33510 } /* Namespace selftest.  */
33511 
33512 #undef TARGET_RUN_TARGET_SELFTESTS
33513 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
33514 #endif /* CHECKING_P */
33515 
33516 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
33517    Unlike the arm version, we do NOT implement asm flag outputs.  */
33518 
33519 rtx_insn *
thumb1_md_asm_adjust(vec<rtx> & outputs,vec<rtx> &,vec<const char * > & constraints,vec<rtx> &,HARD_REG_SET &)33520 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
33521 		      vec<const char *> &constraints,
33522 		      vec<rtx> &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
33523 {
33524   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
33525     if (strncmp (constraints[i], "=@cc", 4) == 0)
33526       {
33527 	sorry ("asm flags not supported in thumb1 mode");
33528 	break;
33529       }
33530   return NULL;
33531 }
33532 
33533 /* Generate code to enable conditional branches in functions over 1 MiB.
33534    Parameters are:
33535      operands: is the operands list of the asm insn (see arm_cond_branch or
33536        arm_cond_branch_reversed).
33537      pos_label: is an index into the operands array where operands[pos_label] is
33538        the asm label of the final jump destination.
33539      dest: is a string which is used to generate the asm label of the intermediate
33540        destination
33541    branch_format: is a string denoting the intermediate branch format, e.g.
33542      "beq", "bne", etc.  */
33543 
33544 const char *
arm_gen_far_branch(rtx * operands,int pos_label,const char * dest,const char * branch_format)33545 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
33546 		    const char * branch_format)
33547 {
33548   rtx_code_label * tmp_label = gen_label_rtx ();
33549   char label_buf[256];
33550   char buffer[128];
33551   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
33552 			CODE_LABEL_NUMBER (tmp_label));
33553   const char *label_ptr = arm_strip_name_encoding (label_buf);
33554   rtx dest_label = operands[pos_label];
33555   operands[pos_label] = tmp_label;
33556 
33557   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
33558   output_asm_insn (buffer, operands);
33559 
33560   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
33561   operands[pos_label] = dest_label;
33562   output_asm_insn (buffer, operands);
33563   return "";
33564 }
33565 
33566 /* If given mode matches, load from memory to LO_REGS.
33567    (i.e [Rn], Rn <= LO_REGS).  */
33568 enum reg_class
arm_mode_base_reg_class(machine_mode mode)33569 arm_mode_base_reg_class (machine_mode mode)
33570 {
33571   if (TARGET_HAVE_MVE
33572       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
33573     return LO_REGS;
33574 
33575   return MODE_BASE_REG_REG_CLASS (mode);
33576 }
33577 
33578 struct gcc_target targetm = TARGET_INITIALIZER;
33579 
33580 #include "gt-arm.h"
33581