1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "cselib.h"
63 #include "sched-int.h"
64 #include "opts.h"
65 #include "tree-pass.h"
66 #include "context.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "tree-vectorizer.h"
71 #include "shrink-wrap.h"
72 #include "builtins.h"
73 #include "rtl-iter.h"
74 #include "tree-iterator.h"
75 #include "dbgcnt.h"
76 #include "case-cfn-macros.h"
77 #include "dojump.h"
78 #include "fold-const-call.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "selftest.h"
82 #include "selftest-rtl.h"
83 #include "print-rtl.h"
84 #include "intl.h"
85 #include "ifcvt.h"
86 #include "symbol-summary.h"
87 #include "ipa-prop.h"
88 #include "ipa-fnsummary.h"
89 #include "wide-int-bitmask.h"
90 #include "tree-vector-builder.h"
91 #include "debug.h"
92 #include "dwarf2out.h"
93 #include "i386-options.h"
94 #include "i386-builtins.h"
95 #include "i386-expand.h"
96 #include "i386-features.h"
97 #include "function-abi.h"
98
99 /* This file should be included last. */
100 #include "target-def.h"
101
102 static rtx legitimize_dllimport_symbol (rtx, bool);
103 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx);
106
107
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
110 #endif
111
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
118 : 4)
119
120
121 /* Set by -mtune. */
122 const struct processor_costs *ix86_tune_cost = NULL;
123
124 /* Set by -mtune or -Os. */
125 const struct processor_costs *ix86_cost = NULL;
126
127 /* In case the average insn count for single function invocation is
128 lower than this constant, emit fast (but longer) prologue and
129 epilogue code. */
130 #define FAST_PROLOGUE_INSN_COUNT 20
131
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
133 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
134 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
135 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
136
137 /* Array of the smallest class containing reg number REGNO, indexed by
138 REGNO. Used by REGNO_REG_CLASS in i386.h. */
139
140 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
141 {
142 /* ax, dx, cx, bx */
143 AREG, DREG, CREG, BREG,
144 /* si, di, bp, sp */
145 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
146 /* FP registers */
147 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
148 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
149 /* arg pointer, flags, fpsr, frame */
150 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
151 /* SSE registers */
152 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
153 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
154 /* MMX registers */
155 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
156 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
157 /* REX registers */
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 /* SSE REX registers */
161 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
162 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
163 /* AVX-512 SSE registers */
164 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
165 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
166 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168 /* Mask registers. */
169 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
170 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
171 };
172
173 /* The "default" register map used in 32bit mode. */
174
175 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
176 {
177 /* general regs */
178 0, 2, 1, 3, 6, 7, 4, 5,
179 /* fp regs */
180 12, 13, 14, 15, 16, 17, 18, 19,
181 /* arg, flags, fpsr, frame */
182 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
183 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
184 /* SSE */
185 21, 22, 23, 24, 25, 26, 27, 28,
186 /* MMX */
187 29, 30, 31, 32, 33, 34, 35, 36,
188 /* extended integer registers */
189 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
190 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
191 /* extended sse registers */
192 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
193 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
194 /* AVX-512 registers 16-23 */
195 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
196 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
197 /* AVX-512 registers 24-31 */
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 /* Mask registers */
201 93, 94, 95, 96, 97, 98, 99, 100
202 };
203
204 /* The "default" register map used in 64bit mode. */
205
206 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
207 {
208 /* general regs */
209 0, 1, 2, 3, 4, 5, 6, 7,
210 /* fp regs */
211 33, 34, 35, 36, 37, 38, 39, 40,
212 /* arg, flags, fpsr, frame */
213 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
214 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
215 /* SSE */
216 17, 18, 19, 20, 21, 22, 23, 24,
217 /* MMX */
218 41, 42, 43, 44, 45, 46, 47, 48,
219 /* extended integer registers */
220 8, 9, 10, 11, 12, 13, 14, 15,
221 /* extended SSE registers */
222 25, 26, 27, 28, 29, 30, 31, 32,
223 /* AVX-512 registers 16-23 */
224 67, 68, 69, 70, 71, 72, 73, 74,
225 /* AVX-512 registers 24-31 */
226 75, 76, 77, 78, 79, 80, 81, 82,
227 /* Mask registers */
228 118, 119, 120, 121, 122, 123, 124, 125
229 };
230
231 /* Define the register numbers to be used in Dwarf debugging information.
232 The SVR4 reference port C compiler uses the following register numbers
233 in its Dwarf output code:
234 0 for %eax (gcc regno = 0)
235 1 for %ecx (gcc regno = 2)
236 2 for %edx (gcc regno = 1)
237 3 for %ebx (gcc regno = 3)
238 4 for %esp (gcc regno = 7)
239 5 for %ebp (gcc regno = 6)
240 6 for %esi (gcc regno = 4)
241 7 for %edi (gcc regno = 5)
242 The following three DWARF register numbers are never generated by
243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244 believed these numbers have these meanings.
245 8 for %eip (no gcc equivalent)
246 9 for %eflags (gcc regno = 17)
247 10 for %trapno (no gcc equivalent)
248 It is not at all clear how we should number the FP stack registers
249 for the x86 architecture. If the version of SDB on x86/svr4 were
250 a bit less brain dead with respect to floating-point then we would
251 have a precedent to follow with respect to DWARF register numbers
252 for x86 FP registers, but the SDB on x86/svr4 was so completely
253 broken with respect to FP registers that it is hardly worth thinking
254 of it as something to strive for compatibility with.
255 The version of x86/svr4 SDB I had does (partially)
256 seem to believe that DWARF register number 11 is associated with
257 the x86 register %st(0), but that's about all. Higher DWARF
258 register numbers don't seem to be associated with anything in
259 particular, and even for DWARF regno 11, SDB only seemed to under-
260 stand that it should say that a variable lives in %st(0) (when
261 asked via an `=' command) if we said it was in DWARF regno 11,
262 but SDB still printed garbage when asked for the value of the
263 variable in question (via a `/' command).
264 (Also note that the labels SDB printed for various FP stack regs
265 when doing an `x' command were all wrong.)
266 Note that these problems generally don't affect the native SVR4
267 C compiler because it doesn't allow the use of -O with -g and
268 because when it is *not* optimizing, it allocates a memory
269 location for each floating-point variable, and the memory
270 location is what gets described in the DWARF AT_location
271 attribute for the variable in question.
272 Regardless of the severe mental illness of the x86/svr4 SDB, we
273 do something sensible here and we use the following DWARF
274 register numbers. Note that these are all stack-top-relative
275 numbers.
276 11 for %st(0) (gcc regno = 8)
277 12 for %st(1) (gcc regno = 9)
278 13 for %st(2) (gcc regno = 10)
279 14 for %st(3) (gcc regno = 11)
280 15 for %st(4) (gcc regno = 12)
281 16 for %st(5) (gcc regno = 13)
282 17 for %st(6) (gcc regno = 14)
283 18 for %st(7) (gcc regno = 15)
284 */
285 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
286 {
287 /* general regs */
288 0, 2, 1, 3, 6, 7, 5, 4,
289 /* fp regs */
290 11, 12, 13, 14, 15, 16, 17, 18,
291 /* arg, flags, fpsr, frame */
292 IGNORED_DWARF_REGNUM, 9,
293 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
294 /* SSE registers */
295 21, 22, 23, 24, 25, 26, 27, 28,
296 /* MMX registers */
297 29, 30, 31, 32, 33, 34, 35, 36,
298 /* extended integer registers */
299 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
300 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
301 /* extended sse registers */
302 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
303 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
304 /* AVX-512 registers 16-23 */
305 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
306 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
307 /* AVX-512 registers 24-31 */
308 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
309 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
310 /* Mask registers */
311 93, 94, 95, 96, 97, 98, 99, 100
312 };
313
314 /* Define parameter passing and return registers. */
315
316 static int const x86_64_int_parameter_registers[6] =
317 {
318 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
319 };
320
321 static int const x86_64_ms_abi_int_parameter_registers[4] =
322 {
323 CX_REG, DX_REG, R8_REG, R9_REG
324 };
325
326 static int const x86_64_int_return_registers[4] =
327 {
328 AX_REG, DX_REG, DI_REG, SI_REG
329 };
330
331 /* Define the structure for the machine field in struct function. */
332
333 struct GTY(()) stack_local_entry {
334 unsigned short mode;
335 unsigned short n;
336 rtx rtl;
337 struct stack_local_entry *next;
338 };
339
340 /* Which cpu are we scheduling for. */
341 enum attr_cpu ix86_schedule;
342
343 /* Which cpu are we optimizing for. */
344 enum processor_type ix86_tune;
345
346 /* Which instruction set architecture to use. */
347 enum processor_type ix86_arch;
348
349 /* True if processor has SSE prefetch instruction. */
350 unsigned char x86_prefetch_sse;
351
352 /* Preferred alignment for stack boundary in bits. */
353 unsigned int ix86_preferred_stack_boundary;
354
355 /* Alignment for incoming stack boundary in bits specified at
356 command line. */
357 unsigned int ix86_user_incoming_stack_boundary;
358
359 /* Default alignment for incoming stack boundary in bits. */
360 unsigned int ix86_default_incoming_stack_boundary;
361
362 /* Alignment for incoming stack boundary in bits. */
363 unsigned int ix86_incoming_stack_boundary;
364
365 /* Calling abi specific va_list type nodes. */
366 tree sysv_va_list_type_node;
367 tree ms_va_list_type_node;
368
369 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
370 char internal_label_prefix[16];
371 int internal_label_prefix_len;
372
373 /* Fence to use after loop using movnt. */
374 tree x86_mfence;
375
376 /* Register class used for passing given 64bit part of the argument.
377 These represent classes as documented by the PS ABI, with the exception
378 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
379 use SF or DFmode move instead of DImode to avoid reformatting penalties.
380
381 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
382 whenever possible (upper half does contain padding). */
383 enum x86_64_reg_class
384 {
385 X86_64_NO_CLASS,
386 X86_64_INTEGER_CLASS,
387 X86_64_INTEGERSI_CLASS,
388 X86_64_SSE_CLASS,
389 X86_64_SSESF_CLASS,
390 X86_64_SSEDF_CLASS,
391 X86_64_SSEUP_CLASS,
392 X86_64_X87_CLASS,
393 X86_64_X87UP_CLASS,
394 X86_64_COMPLEX_X87_CLASS,
395 X86_64_MEMORY_CLASS
396 };
397
398 #define MAX_CLASSES 8
399
400 /* Table of constants used by fldpi, fldln2, etc.... */
401 static REAL_VALUE_TYPE ext_80387_constants_table [5];
402 static bool ext_80387_constants_init;
403
404
405 static rtx ix86_function_value (const_tree, const_tree, bool);
406 static bool ix86_function_value_regno_p (const unsigned int);
407 static unsigned int ix86_function_arg_boundary (machine_mode,
408 const_tree);
409 static rtx ix86_static_chain (const_tree, bool);
410 static int ix86_function_regparm (const_tree, const_tree);
411 static void ix86_compute_frame_layout (void);
412 static tree ix86_canonical_va_list_type (tree);
413 static unsigned int split_stack_prologue_scratch_regno (void);
414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
415
416 static bool ix86_can_inline_p (tree, tree);
417 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
418
419
420 /* Whether -mtune= or -march= were specified */
421 int ix86_tune_defaulted;
422 int ix86_arch_specified;
423
424 /* Return true if a red-zone is in use. We can't use red-zone when
425 there are local indirect jumps, like "indirect_jump" or "tablejump",
426 which jumps to another place in the function, since "call" in the
427 indirect thunk pushes the return address onto stack, destroying
428 red-zone.
429
430 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
431 for CALL, in red-zone, we can allow local indirect jumps with
432 indirect thunk. */
433
434 bool
ix86_using_red_zone(void)435 ix86_using_red_zone (void)
436 {
437 return (TARGET_RED_ZONE
438 && !TARGET_64BIT_MS_ABI
439 && (!cfun->machine->has_local_indirect_jump
440 || cfun->machine->indirect_branch_type == indirect_branch_keep));
441 }
442
443 /* Return true, if profiling code should be emitted before
444 prologue. Otherwise it returns false.
445 Note: For x86 with "hotfix" it is sorried. */
446 static bool
ix86_profile_before_prologue(void)447 ix86_profile_before_prologue (void)
448 {
449 return flag_fentry != 0;
450 }
451
452 /* Update register usage after having seen the compiler flags. */
453
454 static void
ix86_conditional_register_usage(void)455 ix86_conditional_register_usage (void)
456 {
457 int i, c_mask;
458
459 /* If there are no caller-saved registers, preserve all registers.
460 except fixed_regs and registers used for function return value
461 since aggregate_value_p checks call_used_regs[regno] on return
462 value. */
463 if (cfun && cfun->machine->no_caller_saved_registers)
464 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
465 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
466 call_used_regs[i] = 0;
467
468 /* For 32-bit targets, disable the REX registers. */
469 if (! TARGET_64BIT)
470 {
471 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
472 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
473 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
474 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
475 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
476 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
477 }
478
479 /* See the definition of CALL_USED_REGISTERS in i386.h. */
480 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
481
482 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
483
484 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
485 {
486 /* Set/reset conditionally defined registers from
487 CALL_USED_REGISTERS initializer. */
488 if (call_used_regs[i] > 1)
489 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
490
491 /* Calculate registers of CLOBBERED_REGS register set
492 as call used registers from GENERAL_REGS register set. */
493 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
494 && call_used_regs[i])
495 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
496 }
497
498 /* If MMX is disabled, disable the registers. */
499 if (! TARGET_MMX)
500 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
501
502 /* If SSE is disabled, disable the registers. */
503 if (! TARGET_SSE)
504 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
505
506 /* If the FPU is disabled, disable the registers. */
507 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
508 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
509
510 /* If AVX512F is disabled, disable the registers. */
511 if (! TARGET_AVX512F)
512 {
513 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
514 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
515
516 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
517 }
518 }
519
520 /* Canonicalize a comparison from one we don't have to one we do have. */
521
522 static void
ix86_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)523 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
524 bool op0_preserve_value)
525 {
526 /* The order of operands in x87 ficom compare is forced by combine in
527 simplify_comparison () function. Float operator is treated as RTX_OBJ
528 with a precedence over other operators and is always put in the first
529 place. Swap condition and operands to match ficom instruction. */
530 if (!op0_preserve_value
531 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
532 {
533 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
534
535 /* We are called only for compares that are split to SAHF instruction.
536 Ensure that we have setcc/jcc insn for the swapped condition. */
537 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
538 {
539 std::swap (*op0, *op1);
540 *code = (int) scode;
541 }
542 }
543 }
544
545
546 /* Hook to determine if one function can safely inline another. */
547
548 static bool
ix86_can_inline_p(tree caller,tree callee)549 ix86_can_inline_p (tree caller, tree callee)
550 {
551 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
552 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
553
554 /* Changes of those flags can be tolerated for always inlines. Lets hope
555 user knows what he is doing. */
556 const unsigned HOST_WIDE_INT always_inline_safe_mask
557 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
558 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
559 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
560 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
561 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
562 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
563 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
564
565
566 if (!callee_tree)
567 callee_tree = target_option_default_node;
568 if (!caller_tree)
569 caller_tree = target_option_default_node;
570 if (callee_tree == caller_tree)
571 return true;
572
573 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
574 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
575 bool ret = false;
576 bool always_inline
577 = (DECL_DISREGARD_INLINE_LIMITS (callee)
578 && lookup_attribute ("always_inline",
579 DECL_ATTRIBUTES (callee)));
580
581 cgraph_node *callee_node = cgraph_node::get (callee);
582 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
583 function can inline a SSE2 function but a SSE2 function can't inline
584 a SSE4 function. */
585 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
586 != callee_opts->x_ix86_isa_flags)
587 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
588 != callee_opts->x_ix86_isa_flags2))
589 ret = false;
590
591 /* See if we have the same non-isa options. */
592 else if ((!always_inline
593 && caller_opts->x_target_flags != callee_opts->x_target_flags)
594 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
595 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
596 ret = false;
597
598 /* See if arch, tune, etc. are the same. */
599 else if (caller_opts->arch != callee_opts->arch)
600 ret = false;
601
602 else if (!always_inline && caller_opts->tune != callee_opts->tune)
603 ret = false;
604
605 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
606 /* If the calle doesn't use FP expressions differences in
607 ix86_fpmath can be ignored. We are called from FEs
608 for multi-versioning call optimization, so beware of
609 ipa_fn_summaries not available. */
610 && (! ipa_fn_summaries
611 || ipa_fn_summaries->get (callee_node) == NULL
612 || ipa_fn_summaries->get (callee_node)->fp_expressions))
613 ret = false;
614
615 else if (!always_inline
616 && caller_opts->branch_cost != callee_opts->branch_cost)
617 ret = false;
618
619 else
620 ret = true;
621
622 return ret;
623 }
624
625 /* Return true if this goes in large data/bss. */
626
627 static bool
ix86_in_large_data_p(tree exp)628 ix86_in_large_data_p (tree exp)
629 {
630 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
631 return false;
632
633 if (exp == NULL_TREE)
634 return false;
635
636 /* Functions are never large data. */
637 if (TREE_CODE (exp) == FUNCTION_DECL)
638 return false;
639
640 /* Automatic variables are never large data. */
641 if (VAR_P (exp) && !is_global_var (exp))
642 return false;
643
644 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
645 {
646 const char *section = DECL_SECTION_NAME (exp);
647 if (strcmp (section, ".ldata") == 0
648 || strcmp (section, ".lbss") == 0)
649 return true;
650 return false;
651 }
652 else
653 {
654 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
655
656 /* If this is an incomplete type with size 0, then we can't put it
657 in data because it might be too big when completed. Also,
658 int_size_in_bytes returns -1 if size can vary or is larger than
659 an integer in which case also it is safer to assume that it goes in
660 large data. */
661 if (size <= 0 || size > ix86_section_threshold)
662 return true;
663 }
664
665 return false;
666 }
667
668 /* i386-specific section flag to mark large sections. */
669 #define SECTION_LARGE SECTION_MACH_DEP
670
671 /* Switch to the appropriate section for output of DECL.
672 DECL is either a `VAR_DECL' node or a constant of some sort.
673 RELOC indicates whether forming the initial value of DECL requires
674 link-time relocations. */
675
676 ATTRIBUTE_UNUSED static section *
x86_64_elf_select_section(tree decl,int reloc,unsigned HOST_WIDE_INT align)677 x86_64_elf_select_section (tree decl, int reloc,
678 unsigned HOST_WIDE_INT align)
679 {
680 if (ix86_in_large_data_p (decl))
681 {
682 const char *sname = NULL;
683 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
684 switch (categorize_decl_for_section (decl, reloc))
685 {
686 case SECCAT_DATA:
687 sname = ".ldata";
688 break;
689 case SECCAT_DATA_REL:
690 sname = ".ldata.rel";
691 break;
692 case SECCAT_DATA_REL_LOCAL:
693 sname = ".ldata.rel.local";
694 break;
695 case SECCAT_DATA_REL_RO:
696 sname = ".ldata.rel.ro";
697 break;
698 case SECCAT_DATA_REL_RO_LOCAL:
699 sname = ".ldata.rel.ro.local";
700 break;
701 case SECCAT_BSS:
702 sname = ".lbss";
703 flags |= SECTION_BSS;
704 break;
705 case SECCAT_RODATA:
706 case SECCAT_RODATA_MERGE_STR:
707 case SECCAT_RODATA_MERGE_STR_INIT:
708 case SECCAT_RODATA_MERGE_CONST:
709 sname = ".lrodata";
710 flags &= ~SECTION_WRITE;
711 break;
712 case SECCAT_SRODATA:
713 case SECCAT_SDATA:
714 case SECCAT_SBSS:
715 gcc_unreachable ();
716 case SECCAT_TEXT:
717 case SECCAT_TDATA:
718 case SECCAT_TBSS:
719 /* We don't split these for medium model. Place them into
720 default sections and hope for best. */
721 break;
722 }
723 if (sname)
724 {
725 /* We might get called with string constants, but get_named_section
726 doesn't like them as they are not DECLs. Also, we need to set
727 flags in that case. */
728 if (!DECL_P (decl))
729 return get_section (sname, flags, NULL);
730 return get_named_section (decl, sname, reloc);
731 }
732 }
733 return default_elf_select_section (decl, reloc, align);
734 }
735
736 /* Select a set of attributes for section NAME based on the properties
737 of DECL and whether or not RELOC indicates that DECL's initializer
738 might contain runtime relocations. */
739
740 static unsigned int ATTRIBUTE_UNUSED
x86_64_elf_section_type_flags(tree decl,const char * name,int reloc)741 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
742 {
743 unsigned int flags = default_section_type_flags (decl, name, reloc);
744
745 if (ix86_in_large_data_p (decl))
746 flags |= SECTION_LARGE;
747
748 if (decl == NULL_TREE
749 && (strcmp (name, ".ldata.rel.ro") == 0
750 || strcmp (name, ".ldata.rel.ro.local") == 0))
751 flags |= SECTION_RELRO;
752
753 if (strcmp (name, ".lbss") == 0
754 || strncmp (name, ".lbss.", sizeof (".lbss.") - 1) == 0
755 || strncmp (name, ".gnu.linkonce.lb.",
756 sizeof (".gnu.linkonce.lb.") - 1) == 0)
757 flags |= SECTION_BSS;
758
759 return flags;
760 }
761
762 /* Build up a unique section name, expressed as a
763 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
764 RELOC indicates whether the initial value of EXP requires
765 link-time relocations. */
766
767 static void ATTRIBUTE_UNUSED
x86_64_elf_unique_section(tree decl,int reloc)768 x86_64_elf_unique_section (tree decl, int reloc)
769 {
770 if (ix86_in_large_data_p (decl))
771 {
772 const char *prefix = NULL;
773 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
774 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
775
776 switch (categorize_decl_for_section (decl, reloc))
777 {
778 case SECCAT_DATA:
779 case SECCAT_DATA_REL:
780 case SECCAT_DATA_REL_LOCAL:
781 case SECCAT_DATA_REL_RO:
782 case SECCAT_DATA_REL_RO_LOCAL:
783 prefix = one_only ? ".ld" : ".ldata";
784 break;
785 case SECCAT_BSS:
786 prefix = one_only ? ".lb" : ".lbss";
787 break;
788 case SECCAT_RODATA:
789 case SECCAT_RODATA_MERGE_STR:
790 case SECCAT_RODATA_MERGE_STR_INIT:
791 case SECCAT_RODATA_MERGE_CONST:
792 prefix = one_only ? ".lr" : ".lrodata";
793 break;
794 case SECCAT_SRODATA:
795 case SECCAT_SDATA:
796 case SECCAT_SBSS:
797 gcc_unreachable ();
798 case SECCAT_TEXT:
799 case SECCAT_TDATA:
800 case SECCAT_TBSS:
801 /* We don't split these for medium model. Place them into
802 default sections and hope for best. */
803 break;
804 }
805 if (prefix)
806 {
807 const char *name, *linkonce;
808 char *string;
809
810 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
811 name = targetm.strip_name_encoding (name);
812
813 /* If we're using one_only, then there needs to be a .gnu.linkonce
814 prefix to the section name. */
815 linkonce = one_only ? ".gnu.linkonce" : "";
816
817 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
818
819 set_decl_section_name (decl, string);
820 return;
821 }
822 }
823 default_unique_section (decl, reloc);
824 }
825
826 #ifdef COMMON_ASM_OP
827
828 #ifndef LARGECOMM_SECTION_ASM_OP
829 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
830 #endif
831
832 /* This says how to output assembler code to declare an
833 uninitialized external linkage data object.
834
835 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
836 large objects. */
837 void
x86_elf_aligned_decl_common(FILE * file,tree decl,const char * name,unsigned HOST_WIDE_INT size,int align)838 x86_elf_aligned_decl_common (FILE *file, tree decl,
839 const char *name, unsigned HOST_WIDE_INT size,
840 int align)
841 {
842 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
843 && size > (unsigned int)ix86_section_threshold)
844 {
845 switch_to_section (get_named_section (decl, ".lbss", 0));
846 fputs (LARGECOMM_SECTION_ASM_OP, file);
847 }
848 else
849 fputs (COMMON_ASM_OP, file);
850 assemble_name (file, name);
851 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
852 size, align / BITS_PER_UNIT);
853 }
854 #endif
855
856 /* Utility function for targets to use in implementing
857 ASM_OUTPUT_ALIGNED_BSS. */
858
859 void
x86_output_aligned_bss(FILE * file,tree decl,const char * name,unsigned HOST_WIDE_INT size,int align)860 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
861 unsigned HOST_WIDE_INT size, int align)
862 {
863 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
864 && size > (unsigned int)ix86_section_threshold)
865 switch_to_section (get_named_section (decl, ".lbss", 0));
866 else
867 switch_to_section (bss_section);
868 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
869 #ifdef ASM_DECLARE_OBJECT_NAME
870 last_assemble_variable_decl = decl;
871 ASM_DECLARE_OBJECT_NAME (file, name, decl);
872 #else
873 /* Standard thing is just output label for the object. */
874 ASM_OUTPUT_LABEL (file, name);
875 #endif /* ASM_DECLARE_OBJECT_NAME */
876 ASM_OUTPUT_SKIP (file, size ? size : 1);
877 }
878
879 /* Decide whether we must probe the stack before any space allocation
880 on this target. It's essentially TARGET_STACK_PROBE except when
881 -fstack-check causes the stack to be already probed differently. */
882
883 bool
ix86_target_stack_probe(void)884 ix86_target_stack_probe (void)
885 {
886 /* Do not probe the stack twice if static stack checking is enabled. */
887 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
888 return false;
889
890 return TARGET_STACK_PROBE;
891 }
892
893 /* Decide whether we can make a sibling call to a function. DECL is the
894 declaration of the function being targeted by the call and EXP is the
895 CALL_EXPR representing the call. */
896
897 static bool
ix86_function_ok_for_sibcall(tree decl,tree exp)898 ix86_function_ok_for_sibcall (tree decl, tree exp)
899 {
900 tree type, decl_or_type;
901 rtx a, b;
902 bool bind_global = decl && !targetm.binds_local_p (decl);
903
904 if (ix86_function_naked (current_function_decl))
905 return false;
906
907 /* Sibling call isn't OK if there are no caller-saved registers
908 since all registers must be preserved before return. */
909 if (cfun->machine->no_caller_saved_registers)
910 return false;
911
912 /* If we are generating position-independent code, we cannot sibcall
913 optimize direct calls to global functions, as the PLT requires
914 %ebx be live. (Darwin does not have a PLT.) */
915 if (!TARGET_MACHO
916 && !TARGET_64BIT
917 && flag_pic
918 && flag_plt
919 && bind_global)
920 return false;
921
922 /* If we need to align the outgoing stack, then sibcalling would
923 unalign the stack, which may break the called function. */
924 if (ix86_minimum_incoming_stack_boundary (true)
925 < PREFERRED_STACK_BOUNDARY)
926 return false;
927
928 if (decl)
929 {
930 decl_or_type = decl;
931 type = TREE_TYPE (decl);
932 }
933 else
934 {
935 /* We're looking at the CALL_EXPR, we need the type of the function. */
936 type = CALL_EXPR_FN (exp); /* pointer expression */
937 type = TREE_TYPE (type); /* pointer type */
938 type = TREE_TYPE (type); /* function type */
939 decl_or_type = type;
940 }
941
942 /* Check that the return value locations are the same. Like
943 if we are returning floats on the 80387 register stack, we cannot
944 make a sibcall from a function that doesn't return a float to a
945 function that does or, conversely, from a function that does return
946 a float to a function that doesn't; the necessary stack adjustment
947 would not be executed. This is also the place we notice
948 differences in the return value ABI. Note that it is ok for one
949 of the functions to have void return type as long as the return
950 value of the other is passed in a register. */
951 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
952 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
953 cfun->decl, false);
954 if (STACK_REG_P (a) || STACK_REG_P (b))
955 {
956 if (!rtx_equal_p (a, b))
957 return false;
958 }
959 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
960 ;
961 else if (!rtx_equal_p (a, b))
962 return false;
963
964 if (TARGET_64BIT)
965 {
966 /* The SYSV ABI has more call-clobbered registers;
967 disallow sibcalls from MS to SYSV. */
968 if (cfun->machine->call_abi == MS_ABI
969 && ix86_function_type_abi (type) == SYSV_ABI)
970 return false;
971 }
972 else
973 {
974 /* If this call is indirect, we'll need to be able to use a
975 call-clobbered register for the address of the target function.
976 Make sure that all such registers are not used for passing
977 parameters. Note that DLLIMPORT functions and call to global
978 function via GOT slot are indirect. */
979 if (!decl
980 || (bind_global && flag_pic && !flag_plt)
981 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
982 || flag_force_indirect_call)
983 {
984 /* Check if regparm >= 3 since arg_reg_available is set to
985 false if regparm == 0. If regparm is 1 or 2, there is
986 always a call-clobbered register available.
987
988 ??? The symbol indirect call doesn't need a call-clobbered
989 register. But we don't know if this is a symbol indirect
990 call or not here. */
991 if (ix86_function_regparm (type, decl) >= 3
992 && !cfun->machine->arg_reg_available)
993 return false;
994 }
995 }
996
997 /* Otherwise okay. That also includes certain types of indirect calls. */
998 return true;
999 }
1000
1001 /* This function determines from TYPE the calling-convention. */
1002
1003 unsigned int
ix86_get_callcvt(const_tree type)1004 ix86_get_callcvt (const_tree type)
1005 {
1006 unsigned int ret = 0;
1007 bool is_stdarg;
1008 tree attrs;
1009
1010 if (TARGET_64BIT)
1011 return IX86_CALLCVT_CDECL;
1012
1013 attrs = TYPE_ATTRIBUTES (type);
1014 if (attrs != NULL_TREE)
1015 {
1016 if (lookup_attribute ("cdecl", attrs))
1017 ret |= IX86_CALLCVT_CDECL;
1018 else if (lookup_attribute ("stdcall", attrs))
1019 ret |= IX86_CALLCVT_STDCALL;
1020 else if (lookup_attribute ("fastcall", attrs))
1021 ret |= IX86_CALLCVT_FASTCALL;
1022 else if (lookup_attribute ("thiscall", attrs))
1023 ret |= IX86_CALLCVT_THISCALL;
1024
1025 /* Regparam isn't allowed for thiscall and fastcall. */
1026 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1027 {
1028 if (lookup_attribute ("regparm", attrs))
1029 ret |= IX86_CALLCVT_REGPARM;
1030 if (lookup_attribute ("sseregparm", attrs))
1031 ret |= IX86_CALLCVT_SSEREGPARM;
1032 }
1033
1034 if (IX86_BASE_CALLCVT(ret) != 0)
1035 return ret;
1036 }
1037
1038 is_stdarg = stdarg_p (type);
1039 if (TARGET_RTD && !is_stdarg)
1040 return IX86_CALLCVT_STDCALL | ret;
1041
1042 if (ret != 0
1043 || is_stdarg
1044 || TREE_CODE (type) != METHOD_TYPE
1045 || ix86_function_type_abi (type) != MS_ABI)
1046 return IX86_CALLCVT_CDECL | ret;
1047
1048 return IX86_CALLCVT_THISCALL;
1049 }
1050
1051 /* Return 0 if the attributes for two types are incompatible, 1 if they
1052 are compatible, and 2 if they are nearly compatible (which causes a
1053 warning to be generated). */
1054
1055 static int
ix86_comp_type_attributes(const_tree type1,const_tree type2)1056 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1057 {
1058 unsigned int ccvt1, ccvt2;
1059
1060 if (TREE_CODE (type1) != FUNCTION_TYPE
1061 && TREE_CODE (type1) != METHOD_TYPE)
1062 return 1;
1063
1064 ccvt1 = ix86_get_callcvt (type1);
1065 ccvt2 = ix86_get_callcvt (type2);
1066 if (ccvt1 != ccvt2)
1067 return 0;
1068 if (ix86_function_regparm (type1, NULL)
1069 != ix86_function_regparm (type2, NULL))
1070 return 0;
1071
1072 return 1;
1073 }
1074
1075 /* Return the regparm value for a function with the indicated TYPE and DECL.
1076 DECL may be NULL when calling function indirectly
1077 or considering a libcall. */
1078
1079 static int
ix86_function_regparm(const_tree type,const_tree decl)1080 ix86_function_regparm (const_tree type, const_tree decl)
1081 {
1082 tree attr;
1083 int regparm;
1084 unsigned int ccvt;
1085
1086 if (TARGET_64BIT)
1087 return (ix86_function_type_abi (type) == SYSV_ABI
1088 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1089 ccvt = ix86_get_callcvt (type);
1090 regparm = ix86_regparm;
1091
1092 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1093 {
1094 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1095 if (attr)
1096 {
1097 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1098 return regparm;
1099 }
1100 }
1101 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1102 return 2;
1103 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1104 return 1;
1105
1106 /* Use register calling convention for local functions when possible. */
1107 if (decl
1108 && TREE_CODE (decl) == FUNCTION_DECL)
1109 {
1110 cgraph_node *target = cgraph_node::get (decl);
1111 if (target)
1112 target = target->function_symbol ();
1113
1114 /* Caller and callee must agree on the calling convention, so
1115 checking here just optimize means that with
1116 __attribute__((optimize (...))) caller could use regparm convention
1117 and callee not, or vice versa. Instead look at whether the callee
1118 is optimized or not. */
1119 if (target && opt_for_fn (target->decl, optimize)
1120 && !(profile_flag && !flag_fentry))
1121 {
1122 if (target->local && target->can_change_signature)
1123 {
1124 int local_regparm, globals = 0, regno;
1125
1126 /* Make sure no regparm register is taken by a
1127 fixed register variable. */
1128 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1129 local_regparm++)
1130 if (fixed_regs[local_regparm])
1131 break;
1132
1133 /* We don't want to use regparm(3) for nested functions as
1134 these use a static chain pointer in the third argument. */
1135 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1136 local_regparm = 2;
1137
1138 /* Save a register for the split stack. */
1139 if (flag_split_stack)
1140 {
1141 if (local_regparm == 3)
1142 local_regparm = 2;
1143 else if (local_regparm == 2
1144 && DECL_STATIC_CHAIN (target->decl))
1145 local_regparm = 1;
1146 }
1147
1148 /* Each fixed register usage increases register pressure,
1149 so less registers should be used for argument passing.
1150 This functionality can be overriden by an explicit
1151 regparm value. */
1152 for (regno = AX_REG; regno <= DI_REG; regno++)
1153 if (fixed_regs[regno])
1154 globals++;
1155
1156 local_regparm
1157 = globals < local_regparm ? local_regparm - globals : 0;
1158
1159 if (local_regparm > regparm)
1160 regparm = local_regparm;
1161 }
1162 }
1163 }
1164
1165 return regparm;
1166 }
1167
1168 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1169 DFmode (2) arguments in SSE registers for a function with the
1170 indicated TYPE and DECL. DECL may be NULL when calling function
1171 indirectly or considering a libcall. Return -1 if any FP parameter
1172 should be rejected by error. This is used in siutation we imply SSE
1173 calling convetion but the function is called from another function with
1174 SSE disabled. Otherwise return 0. */
1175
1176 static int
ix86_function_sseregparm(const_tree type,const_tree decl,bool warn)1177 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1178 {
1179 gcc_assert (!TARGET_64BIT);
1180
1181 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1182 by the sseregparm attribute. */
1183 if (TARGET_SSEREGPARM
1184 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1185 {
1186 if (!TARGET_SSE)
1187 {
1188 if (warn)
1189 {
1190 if (decl)
1191 error ("calling %qD with attribute sseregparm without "
1192 "SSE/SSE2 enabled", decl);
1193 else
1194 error ("calling %qT with attribute sseregparm without "
1195 "SSE/SSE2 enabled", type);
1196 }
1197 return 0;
1198 }
1199
1200 return 2;
1201 }
1202
1203 if (!decl)
1204 return 0;
1205
1206 cgraph_node *target = cgraph_node::get (decl);
1207 if (target)
1208 target = target->function_symbol ();
1209
1210 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1211 (and DFmode for SSE2) arguments in SSE registers. */
1212 if (target
1213 /* TARGET_SSE_MATH */
1214 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1215 && opt_for_fn (target->decl, optimize)
1216 && !(profile_flag && !flag_fentry))
1217 {
1218 if (target->local && target->can_change_signature)
1219 {
1220 /* Refuse to produce wrong code when local function with SSE enabled
1221 is called from SSE disabled function.
1222 FIXME: We need a way to detect these cases cross-ltrans partition
1223 and avoid using SSE calling conventions on local functions called
1224 from function with SSE disabled. For now at least delay the
1225 warning until we know we are going to produce wrong code.
1226 See PR66047 */
1227 if (!TARGET_SSE && warn)
1228 return -1;
1229 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1230 ->x_ix86_isa_flags) ? 2 : 1;
1231 }
1232 }
1233
1234 return 0;
1235 }
1236
1237 /* Return true if EAX is live at the start of the function. Used by
1238 ix86_expand_prologue to determine if we need special help before
1239 calling allocate_stack_worker. */
1240
1241 static bool
ix86_eax_live_at_start_p(void)1242 ix86_eax_live_at_start_p (void)
1243 {
1244 /* Cheat. Don't bother working forward from ix86_function_regparm
1245 to the function type to whether an actual argument is located in
1246 eax. Instead just look at cfg info, which is still close enough
1247 to correct at this point. This gives false positives for broken
1248 functions that might use uninitialized data that happens to be
1249 allocated in eax, but who cares? */
1250 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1251 }
1252
1253 static bool
ix86_keep_aggregate_return_pointer(tree fntype)1254 ix86_keep_aggregate_return_pointer (tree fntype)
1255 {
1256 tree attr;
1257
1258 if (!TARGET_64BIT)
1259 {
1260 attr = lookup_attribute ("callee_pop_aggregate_return",
1261 TYPE_ATTRIBUTES (fntype));
1262 if (attr)
1263 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1264
1265 /* For 32-bit MS-ABI the default is to keep aggregate
1266 return pointer. */
1267 if (ix86_function_type_abi (fntype) == MS_ABI)
1268 return true;
1269 }
1270 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1271 }
1272
1273 /* Value is the number of bytes of arguments automatically
1274 popped when returning from a subroutine call.
1275 FUNDECL is the declaration node of the function (as a tree),
1276 FUNTYPE is the data type of the function (as a tree),
1277 or for a library call it is an identifier node for the subroutine name.
1278 SIZE is the number of bytes of arguments passed on the stack.
1279
1280 On the 80386, the RTD insn may be used to pop them if the number
1281 of args is fixed, but if the number is variable then the caller
1282 must pop them all. RTD can't be used for library calls now
1283 because the library is compiled with the Unix compiler.
1284 Use of RTD is a selectable option, since it is incompatible with
1285 standard Unix calling sequences. If the option is not selected,
1286 the caller must always pop the args.
1287
1288 The attribute stdcall is equivalent to RTD on a per module basis. */
1289
1290 static poly_int64
ix86_return_pops_args(tree fundecl,tree funtype,poly_int64 size)1291 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1292 {
1293 unsigned int ccvt;
1294
1295 /* None of the 64-bit ABIs pop arguments. */
1296 if (TARGET_64BIT)
1297 return 0;
1298
1299 ccvt = ix86_get_callcvt (funtype);
1300
1301 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1302 | IX86_CALLCVT_THISCALL)) != 0
1303 && ! stdarg_p (funtype))
1304 return size;
1305
1306 /* Lose any fake structure return argument if it is passed on the stack. */
1307 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1308 && !ix86_keep_aggregate_return_pointer (funtype))
1309 {
1310 int nregs = ix86_function_regparm (funtype, fundecl);
1311 if (nregs == 0)
1312 return GET_MODE_SIZE (Pmode);
1313 }
1314
1315 return 0;
1316 }
1317
1318 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1319
1320 static bool
ix86_legitimate_combined_insn(rtx_insn * insn)1321 ix86_legitimate_combined_insn (rtx_insn *insn)
1322 {
1323 int i;
1324
1325 /* Check operand constraints in case hard registers were propagated
1326 into insn pattern. This check prevents combine pass from
1327 generating insn patterns with invalid hard register operands.
1328 These invalid insns can eventually confuse reload to error out
1329 with a spill failure. See also PRs 46829 and 46843. */
1330
1331 gcc_assert (INSN_CODE (insn) >= 0);
1332
1333 extract_insn (insn);
1334 preprocess_constraints (insn);
1335
1336 int n_operands = recog_data.n_operands;
1337 int n_alternatives = recog_data.n_alternatives;
1338 for (i = 0; i < n_operands; i++)
1339 {
1340 rtx op = recog_data.operand[i];
1341 machine_mode mode = GET_MODE (op);
1342 const operand_alternative *op_alt;
1343 int offset = 0;
1344 bool win;
1345 int j;
1346
1347 /* A unary operator may be accepted by the predicate, but it
1348 is irrelevant for matching constraints. */
1349 if (UNARY_P (op))
1350 op = XEXP (op, 0);
1351
1352 if (SUBREG_P (op))
1353 {
1354 if (REG_P (SUBREG_REG (op))
1355 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1356 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1357 GET_MODE (SUBREG_REG (op)),
1358 SUBREG_BYTE (op),
1359 GET_MODE (op));
1360 op = SUBREG_REG (op);
1361 }
1362
1363 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1364 continue;
1365
1366 op_alt = recog_op_alt;
1367
1368 /* Operand has no constraints, anything is OK. */
1369 win = !n_alternatives;
1370
1371 alternative_mask preferred = get_preferred_alternatives (insn);
1372 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1373 {
1374 if (!TEST_BIT (preferred, j))
1375 continue;
1376 if (op_alt[i].anything_ok
1377 || (op_alt[i].matches != -1
1378 && operands_match_p
1379 (recog_data.operand[i],
1380 recog_data.operand[op_alt[i].matches]))
1381 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1382 {
1383 win = true;
1384 break;
1385 }
1386 }
1387
1388 if (!win)
1389 return false;
1390 }
1391
1392 return true;
1393 }
1394
1395 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1396
1397 static unsigned HOST_WIDE_INT
ix86_asan_shadow_offset(void)1398 ix86_asan_shadow_offset (void)
1399 {
1400 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
1401 : HOST_WIDE_INT_C (0x7fff8000))
1402 : (HOST_WIDE_INT_1 << X86_32_ASAN_BIT_OFFSET);
1403 }
1404
1405 /* Argument support functions. */
1406
1407 /* Return true when register may be used to pass function parameters. */
1408 bool
ix86_function_arg_regno_p(int regno)1409 ix86_function_arg_regno_p (int regno)
1410 {
1411 int i;
1412 enum calling_abi call_abi;
1413 const int *parm_regs;
1414
1415 if (!TARGET_64BIT)
1416 {
1417 if (TARGET_MACHO)
1418 return (regno < REGPARM_MAX
1419 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1420 else
1421 return (regno < REGPARM_MAX
1422 || (TARGET_MMX && MMX_REGNO_P (regno)
1423 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
1424 || (TARGET_SSE && SSE_REGNO_P (regno)
1425 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
1426 }
1427
1428 if (TARGET_SSE && SSE_REGNO_P (regno)
1429 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
1430 return true;
1431
1432 /* TODO: The function should depend on current function ABI but
1433 builtins.c would need updating then. Therefore we use the
1434 default ABI. */
1435 call_abi = ix86_cfun_abi ();
1436
1437 /* RAX is used as hidden argument to va_arg functions. */
1438 if (call_abi == SYSV_ABI && regno == AX_REG)
1439 return true;
1440
1441 if (call_abi == MS_ABI)
1442 parm_regs = x86_64_ms_abi_int_parameter_registers;
1443 else
1444 parm_regs = x86_64_int_parameter_registers;
1445
1446 for (i = 0; i < (call_abi == MS_ABI
1447 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1448 if (regno == parm_regs[i])
1449 return true;
1450 return false;
1451 }
1452
1453 /* Return if we do not know how to pass ARG solely in registers. */
1454
1455 static bool
ix86_must_pass_in_stack(const function_arg_info & arg)1456 ix86_must_pass_in_stack (const function_arg_info &arg)
1457 {
1458 if (must_pass_in_stack_var_size_or_pad (arg))
1459 return true;
1460
1461 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1462 The layout_type routine is crafty and tries to trick us into passing
1463 currently unsupported vector types on the stack by using TImode. */
1464 return (!TARGET_64BIT && arg.mode == TImode
1465 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1466 }
1467
1468 /* It returns the size, in bytes, of the area reserved for arguments passed
1469 in registers for the function represented by fndecl dependent to the used
1470 abi format. */
1471 int
ix86_reg_parm_stack_space(const_tree fndecl)1472 ix86_reg_parm_stack_space (const_tree fndecl)
1473 {
1474 enum calling_abi call_abi = SYSV_ABI;
1475 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1476 call_abi = ix86_function_abi (fndecl);
1477 else
1478 call_abi = ix86_function_type_abi (fndecl);
1479 if (TARGET_64BIT && call_abi == MS_ABI)
1480 return 32;
1481 return 0;
1482 }
1483
1484 /* We add this as a workaround in order to use libc_has_function
1485 hook in i386.md. */
1486 bool
ix86_libc_has_function(enum function_class fn_class)1487 ix86_libc_has_function (enum function_class fn_class)
1488 {
1489 return targetm.libc_has_function (fn_class);
1490 }
1491
1492 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1493 specifying the call abi used. */
1494 enum calling_abi
ix86_function_type_abi(const_tree fntype)1495 ix86_function_type_abi (const_tree fntype)
1496 {
1497 enum calling_abi abi = ix86_abi;
1498
1499 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1500 return abi;
1501
1502 if (abi == SYSV_ABI
1503 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1504 {
1505 static int warned;
1506 if (TARGET_X32 && !warned)
1507 {
1508 error ("X32 does not support %<ms_abi%> attribute");
1509 warned = 1;
1510 }
1511
1512 abi = MS_ABI;
1513 }
1514 else if (abi == MS_ABI
1515 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1516 abi = SYSV_ABI;
1517
1518 return abi;
1519 }
1520
1521 enum calling_abi
ix86_function_abi(const_tree fndecl)1522 ix86_function_abi (const_tree fndecl)
1523 {
1524 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1525 }
1526
1527 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1528 specifying the call abi used. */
1529 enum calling_abi
ix86_cfun_abi(void)1530 ix86_cfun_abi (void)
1531 {
1532 return cfun ? cfun->machine->call_abi : ix86_abi;
1533 }
1534
1535 bool
ix86_function_ms_hook_prologue(const_tree fn)1536 ix86_function_ms_hook_prologue (const_tree fn)
1537 {
1538 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1539 {
1540 if (decl_function_context (fn) != NULL_TREE)
1541 error_at (DECL_SOURCE_LOCATION (fn),
1542 "%<ms_hook_prologue%> attribute is not compatible "
1543 "with nested function");
1544 else
1545 return true;
1546 }
1547 return false;
1548 }
1549
1550 bool
ix86_function_naked(const_tree fn)1551 ix86_function_naked (const_tree fn)
1552 {
1553 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1554 return true;
1555
1556 return false;
1557 }
1558
1559 /* Write the extra assembler code needed to declare a function properly. */
1560
1561 void
ix86_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)1562 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
1563 tree decl)
1564 {
1565 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1566
1567 if (is_ms_hook)
1568 {
1569 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1570 unsigned int filler_cc = 0xcccccccc;
1571
1572 for (i = 0; i < filler_count; i += 4)
1573 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
1574 }
1575
1576 #ifdef SUBTARGET_ASM_UNWIND_INIT
1577 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
1578 #endif
1579
1580 ASM_OUTPUT_LABEL (asm_out_file, fname);
1581
1582 /* Output magic byte marker, if hot-patch attribute is set. */
1583 if (is_ms_hook)
1584 {
1585 if (TARGET_64BIT)
1586 {
1587 /* leaq [%rsp + 0], %rsp */
1588 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1589 asm_out_file);
1590 }
1591 else
1592 {
1593 /* movl.s %edi, %edi
1594 push %ebp
1595 movl.s %esp, %ebp */
1596 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
1597 }
1598 }
1599 }
1600
1601 /* Implementation of call abi switching target hook. Specific to FNDECL
1602 the specific call register sets are set. See also
1603 ix86_conditional_register_usage for more details. */
1604 void
ix86_call_abi_override(const_tree fndecl)1605 ix86_call_abi_override (const_tree fndecl)
1606 {
1607 cfun->machine->call_abi = ix86_function_abi (fndecl);
1608 }
1609
1610 /* Return 1 if pseudo register should be created and used to hold
1611 GOT address for PIC code. */
1612 bool
ix86_use_pseudo_pic_reg(void)1613 ix86_use_pseudo_pic_reg (void)
1614 {
1615 if ((TARGET_64BIT
1616 && (ix86_cmodel == CM_SMALL_PIC
1617 || TARGET_PECOFF))
1618 || !flag_pic)
1619 return false;
1620 return true;
1621 }
1622
1623 /* Initialize large model PIC register. */
1624
1625 static void
ix86_init_large_pic_reg(unsigned int tmp_regno)1626 ix86_init_large_pic_reg (unsigned int tmp_regno)
1627 {
1628 rtx_code_label *label;
1629 rtx tmp_reg;
1630
1631 gcc_assert (Pmode == DImode);
1632 label = gen_label_rtx ();
1633 emit_label (label);
1634 LABEL_PRESERVE_P (label) = 1;
1635 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1636 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1637 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1638 label));
1639 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1640 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1641 const char *name = LABEL_NAME (label);
1642 PUT_CODE (label, NOTE);
1643 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1644 NOTE_DELETED_LABEL_NAME (label) = name;
1645 }
1646
1647 /* Create and initialize PIC register if required. */
1648 static void
ix86_init_pic_reg(void)1649 ix86_init_pic_reg (void)
1650 {
1651 edge entry_edge;
1652 rtx_insn *seq;
1653
1654 if (!ix86_use_pseudo_pic_reg ())
1655 return;
1656
1657 start_sequence ();
1658
1659 if (TARGET_64BIT)
1660 {
1661 if (ix86_cmodel == CM_LARGE_PIC)
1662 ix86_init_large_pic_reg (R11_REG);
1663 else
1664 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1665 }
1666 else
1667 {
1668 /* If there is future mcount call in the function it is more profitable
1669 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1670 rtx reg = crtl->profile
1671 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1672 : pic_offset_table_rtx;
1673 rtx_insn *insn = emit_insn (gen_set_got (reg));
1674 RTX_FRAME_RELATED_P (insn) = 1;
1675 if (crtl->profile)
1676 emit_move_insn (pic_offset_table_rtx, reg);
1677 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1678 }
1679
1680 seq = get_insns ();
1681 end_sequence ();
1682
1683 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1684 insert_insn_on_edge (seq, entry_edge);
1685 commit_one_edge_insertion (entry_edge);
1686 }
1687
1688 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1689 for a call to a function whose data type is FNTYPE.
1690 For a library call, FNTYPE is 0. */
1691
1692 void
init_cumulative_args(CUMULATIVE_ARGS * cum,tree fntype,rtx libname,tree fndecl,int caller)1693 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1694 tree fntype, /* tree ptr for function decl */
1695 rtx libname, /* SYMBOL_REF of library name or 0 */
1696 tree fndecl,
1697 int caller)
1698 {
1699 struct cgraph_node *local_info_node = NULL;
1700 struct cgraph_node *target = NULL;
1701
1702 memset (cum, 0, sizeof (*cum));
1703
1704 if (fndecl)
1705 {
1706 target = cgraph_node::get (fndecl);
1707 if (target)
1708 {
1709 target = target->function_symbol ();
1710 local_info_node = cgraph_node::local_info_node (target->decl);
1711 cum->call_abi = ix86_function_abi (target->decl);
1712 }
1713 else
1714 cum->call_abi = ix86_function_abi (fndecl);
1715 }
1716 else
1717 cum->call_abi = ix86_function_type_abi (fntype);
1718
1719 cum->caller = caller;
1720
1721 /* Set up the number of registers to use for passing arguments. */
1722 cum->nregs = ix86_regparm;
1723 if (TARGET_64BIT)
1724 {
1725 cum->nregs = (cum->call_abi == SYSV_ABI
1726 ? X86_64_REGPARM_MAX
1727 : X86_64_MS_REGPARM_MAX);
1728 }
1729 if (TARGET_SSE)
1730 {
1731 cum->sse_nregs = SSE_REGPARM_MAX;
1732 if (TARGET_64BIT)
1733 {
1734 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1735 ? X86_64_SSE_REGPARM_MAX
1736 : X86_64_MS_SSE_REGPARM_MAX);
1737 }
1738 }
1739 if (TARGET_MMX)
1740 cum->mmx_nregs = MMX_REGPARM_MAX;
1741 cum->warn_avx512f = true;
1742 cum->warn_avx = true;
1743 cum->warn_sse = true;
1744 cum->warn_mmx = true;
1745
1746 /* Because type might mismatch in between caller and callee, we need to
1747 use actual type of function for local calls.
1748 FIXME: cgraph_analyze can be told to actually record if function uses
1749 va_start so for local functions maybe_vaarg can be made aggressive
1750 helping K&R code.
1751 FIXME: once typesytem is fixed, we won't need this code anymore. */
1752 if (local_info_node && local_info_node->local
1753 && local_info_node->can_change_signature)
1754 fntype = TREE_TYPE (target->decl);
1755 cum->stdarg = stdarg_p (fntype);
1756 cum->maybe_vaarg = (fntype
1757 ? (!prototype_p (fntype) || stdarg_p (fntype))
1758 : !libname);
1759
1760 cum->decl = fndecl;
1761
1762 cum->warn_empty = !warn_abi || cum->stdarg;
1763 if (!cum->warn_empty && fntype)
1764 {
1765 function_args_iterator iter;
1766 tree argtype;
1767 bool seen_empty_type = false;
1768 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1769 {
1770 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1771 break;
1772 if (TYPE_EMPTY_P (argtype))
1773 seen_empty_type = true;
1774 else if (seen_empty_type)
1775 {
1776 cum->warn_empty = true;
1777 break;
1778 }
1779 }
1780 }
1781
1782 if (!TARGET_64BIT)
1783 {
1784 /* If there are variable arguments, then we won't pass anything
1785 in registers in 32-bit mode. */
1786 if (stdarg_p (fntype))
1787 {
1788 cum->nregs = 0;
1789 /* Since in 32-bit, variable arguments are always passed on
1790 stack, there is scratch register available for indirect
1791 sibcall. */
1792 cfun->machine->arg_reg_available = true;
1793 cum->sse_nregs = 0;
1794 cum->mmx_nregs = 0;
1795 cum->warn_avx512f = false;
1796 cum->warn_avx = false;
1797 cum->warn_sse = false;
1798 cum->warn_mmx = false;
1799 return;
1800 }
1801
1802 /* Use ecx and edx registers if function has fastcall attribute,
1803 else look for regparm information. */
1804 if (fntype)
1805 {
1806 unsigned int ccvt = ix86_get_callcvt (fntype);
1807 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1808 {
1809 cum->nregs = 1;
1810 cum->fastcall = 1; /* Same first register as in fastcall. */
1811 }
1812 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1813 {
1814 cum->nregs = 2;
1815 cum->fastcall = 1;
1816 }
1817 else
1818 cum->nregs = ix86_function_regparm (fntype, fndecl);
1819 }
1820
1821 /* Set up the number of SSE registers used for passing SFmode
1822 and DFmode arguments. Warn for mismatching ABI. */
1823 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1824 }
1825
1826 cfun->machine->arg_reg_available = (cum->nregs > 0);
1827 }
1828
1829 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1830 But in the case of vector types, it is some vector mode.
1831
1832 When we have only some of our vector isa extensions enabled, then there
1833 are some modes for which vector_mode_supported_p is false. For these
1834 modes, the generic vector support in gcc will choose some non-vector mode
1835 in order to implement the type. By computing the natural mode, we'll
1836 select the proper ABI location for the operand and not depend on whatever
1837 the middle-end decides to do with these vector types.
1838
1839 The midde-end can't deal with the vector types > 16 bytes. In this
1840 case, we return the original mode and warn ABI change if CUM isn't
1841 NULL.
1842
1843 If INT_RETURN is true, warn ABI change if the vector mode isn't
1844 available for function return value. */
1845
1846 static machine_mode
type_natural_mode(const_tree type,const CUMULATIVE_ARGS * cum,bool in_return)1847 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1848 bool in_return)
1849 {
1850 machine_mode mode = TYPE_MODE (type);
1851
1852 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
1853 {
1854 HOST_WIDE_INT size = int_size_in_bytes (type);
1855 if ((size == 8 || size == 16 || size == 32 || size == 64)
1856 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1857 && TYPE_VECTOR_SUBPARTS (type) > 1)
1858 {
1859 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1860
1861 /* There are no XFmode vector modes. */
1862 if (innermode == XFmode)
1863 return mode;
1864
1865 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
1866 mode = MIN_MODE_VECTOR_FLOAT;
1867 else
1868 mode = MIN_MODE_VECTOR_INT;
1869
1870 /* Get the mode which has this inner mode and number of units. */
1871 FOR_EACH_MODE_FROM (mode, mode)
1872 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1873 && GET_MODE_INNER (mode) == innermode)
1874 {
1875 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1876 {
1877 static bool warnedavx512f;
1878 static bool warnedavx512f_ret;
1879
1880 if (cum && cum->warn_avx512f && !warnedavx512f)
1881 {
1882 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1883 "without AVX512F enabled changes the ABI"))
1884 warnedavx512f = true;
1885 }
1886 else if (in_return && !warnedavx512f_ret)
1887 {
1888 if (warning (OPT_Wpsabi, "AVX512F vector return "
1889 "without AVX512F enabled changes the ABI"))
1890 warnedavx512f_ret = true;
1891 }
1892
1893 return TYPE_MODE (type);
1894 }
1895 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1896 {
1897 static bool warnedavx;
1898 static bool warnedavx_ret;
1899
1900 if (cum && cum->warn_avx && !warnedavx)
1901 {
1902 if (warning (OPT_Wpsabi, "AVX vector argument "
1903 "without AVX enabled changes the ABI"))
1904 warnedavx = true;
1905 }
1906 else if (in_return && !warnedavx_ret)
1907 {
1908 if (warning (OPT_Wpsabi, "AVX vector return "
1909 "without AVX enabled changes the ABI"))
1910 warnedavx_ret = true;
1911 }
1912
1913 return TYPE_MODE (type);
1914 }
1915 else if (((size == 8 && TARGET_64BIT) || size == 16)
1916 && !TARGET_SSE
1917 && !TARGET_IAMCU)
1918 {
1919 static bool warnedsse;
1920 static bool warnedsse_ret;
1921
1922 if (cum && cum->warn_sse && !warnedsse)
1923 {
1924 if (warning (OPT_Wpsabi, "SSE vector argument "
1925 "without SSE enabled changes the ABI"))
1926 warnedsse = true;
1927 }
1928 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1929 {
1930 if (warning (OPT_Wpsabi, "SSE vector return "
1931 "without SSE enabled changes the ABI"))
1932 warnedsse_ret = true;
1933 }
1934 }
1935 else if ((size == 8 && !TARGET_64BIT)
1936 && (!cfun
1937 || cfun->machine->func_type == TYPE_NORMAL)
1938 && !TARGET_MMX
1939 && !TARGET_IAMCU)
1940 {
1941 static bool warnedmmx;
1942 static bool warnedmmx_ret;
1943
1944 if (cum && cum->warn_mmx && !warnedmmx)
1945 {
1946 if (warning (OPT_Wpsabi, "MMX vector argument "
1947 "without MMX enabled changes the ABI"))
1948 warnedmmx = true;
1949 }
1950 else if (in_return && !warnedmmx_ret)
1951 {
1952 if (warning (OPT_Wpsabi, "MMX vector return "
1953 "without MMX enabled changes the ABI"))
1954 warnedmmx_ret = true;
1955 }
1956 }
1957 return mode;
1958 }
1959
1960 gcc_unreachable ();
1961 }
1962 }
1963
1964 return mode;
1965 }
1966
1967 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1968 this may not agree with the mode that the type system has chosen for the
1969 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1970 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1971
1972 static rtx
gen_reg_or_parallel(machine_mode mode,machine_mode orig_mode,unsigned int regno)1973 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
1974 unsigned int regno)
1975 {
1976 rtx tmp;
1977
1978 if (orig_mode != BLKmode)
1979 tmp = gen_rtx_REG (orig_mode, regno);
1980 else
1981 {
1982 tmp = gen_rtx_REG (mode, regno);
1983 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
1984 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
1985 }
1986
1987 return tmp;
1988 }
1989
1990 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1991 of this code is to classify each 8bytes of incoming argument by the register
1992 class and assign registers accordingly. */
1993
1994 /* Return the union class of CLASS1 and CLASS2.
1995 See the x86-64 PS ABI for details. */
1996
1997 static enum x86_64_reg_class
merge_classes(enum x86_64_reg_class class1,enum x86_64_reg_class class2)1998 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1999 {
2000 /* Rule #1: If both classes are equal, this is the resulting class. */
2001 if (class1 == class2)
2002 return class1;
2003
2004 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2005 the other class. */
2006 if (class1 == X86_64_NO_CLASS)
2007 return class2;
2008 if (class2 == X86_64_NO_CLASS)
2009 return class1;
2010
2011 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2012 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2013 return X86_64_MEMORY_CLASS;
2014
2015 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2016 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2017 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2018 return X86_64_INTEGERSI_CLASS;
2019 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2020 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2021 return X86_64_INTEGER_CLASS;
2022
2023 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2024 MEMORY is used. */
2025 if (class1 == X86_64_X87_CLASS
2026 || class1 == X86_64_X87UP_CLASS
2027 || class1 == X86_64_COMPLEX_X87_CLASS
2028 || class2 == X86_64_X87_CLASS
2029 || class2 == X86_64_X87UP_CLASS
2030 || class2 == X86_64_COMPLEX_X87_CLASS)
2031 return X86_64_MEMORY_CLASS;
2032
2033 /* Rule #6: Otherwise class SSE is used. */
2034 return X86_64_SSE_CLASS;
2035 }
2036
2037 /* Classify the argument of type TYPE and mode MODE.
2038 CLASSES will be filled by the register class used to pass each word
2039 of the operand. The number of words is returned. In case the parameter
2040 should be passed in memory, 0 is returned. As a special case for zero
2041 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2042
2043 BIT_OFFSET is used internally for handling records and specifies offset
2044 of the offset in bits modulo 512 to avoid overflow cases.
2045
2046 See the x86-64 PS ABI for details.
2047 */
2048
2049 static int
classify_argument(machine_mode mode,const_tree type,enum x86_64_reg_class classes[MAX_CLASSES],int bit_offset)2050 classify_argument (machine_mode mode, const_tree type,
2051 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2052 {
2053 HOST_WIDE_INT bytes
2054 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2055 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2056
2057 /* Variable sized entities are always passed/returned in memory. */
2058 if (bytes < 0)
2059 return 0;
2060
2061 if (mode != VOIDmode)
2062 {
2063 /* The value of "named" doesn't matter. */
2064 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2065 if (targetm.calls.must_pass_in_stack (arg))
2066 return 0;
2067 }
2068
2069 if (type && AGGREGATE_TYPE_P (type))
2070 {
2071 int i;
2072 tree field;
2073 enum x86_64_reg_class subclasses[MAX_CLASSES];
2074
2075 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2076 if (bytes > 64)
2077 return 0;
2078
2079 for (i = 0; i < words; i++)
2080 classes[i] = X86_64_NO_CLASS;
2081
2082 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2083 signalize memory class, so handle it as special case. */
2084 if (!words)
2085 {
2086 classes[0] = X86_64_NO_CLASS;
2087 return 1;
2088 }
2089
2090 /* Classify each field of record and merge classes. */
2091 switch (TREE_CODE (type))
2092 {
2093 case RECORD_TYPE:
2094 /* And now merge the fields of structure. */
2095 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2096 {
2097 if (TREE_CODE (field) == FIELD_DECL)
2098 {
2099 int num;
2100
2101 if (TREE_TYPE (field) == error_mark_node)
2102 continue;
2103
2104 /* Bitfields are always classified as integer. Handle them
2105 early, since later code would consider them to be
2106 misaligned integers. */
2107 if (DECL_BIT_FIELD (field))
2108 {
2109 for (i = (int_bit_position (field)
2110 + (bit_offset % 64)) / 8 / 8;
2111 i < ((int_bit_position (field) + (bit_offset % 64))
2112 + tree_to_shwi (DECL_SIZE (field))
2113 + 63) / 8 / 8; i++)
2114 classes[i]
2115 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2116 }
2117 else
2118 {
2119 int pos;
2120
2121 type = TREE_TYPE (field);
2122
2123 /* Flexible array member is ignored. */
2124 if (TYPE_MODE (type) == BLKmode
2125 && TREE_CODE (type) == ARRAY_TYPE
2126 && TYPE_SIZE (type) == NULL_TREE
2127 && TYPE_DOMAIN (type) != NULL_TREE
2128 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2129 == NULL_TREE))
2130 {
2131 static bool warned;
2132
2133 if (!warned && warn_psabi)
2134 {
2135 warned = true;
2136 inform (input_location,
2137 "the ABI of passing struct with"
2138 " a flexible array member has"
2139 " changed in GCC 4.4");
2140 }
2141 continue;
2142 }
2143 num = classify_argument (TYPE_MODE (type), type,
2144 subclasses,
2145 (int_bit_position (field)
2146 + bit_offset) % 512);
2147 if (!num)
2148 return 0;
2149 pos = (int_bit_position (field)
2150 + (bit_offset % 64)) / 8 / 8;
2151 for (i = 0; i < num && (i + pos) < words; i++)
2152 classes[i + pos]
2153 = merge_classes (subclasses[i], classes[i + pos]);
2154 }
2155 }
2156 }
2157 break;
2158
2159 case ARRAY_TYPE:
2160 /* Arrays are handled as small records. */
2161 {
2162 int num;
2163 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2164 TREE_TYPE (type), subclasses, bit_offset);
2165 if (!num)
2166 return 0;
2167
2168 /* The partial classes are now full classes. */
2169 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2170 subclasses[0] = X86_64_SSE_CLASS;
2171 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2172 && !((bit_offset % 64) == 0 && bytes == 4))
2173 subclasses[0] = X86_64_INTEGER_CLASS;
2174
2175 for (i = 0; i < words; i++)
2176 classes[i] = subclasses[i % num];
2177
2178 break;
2179 }
2180 case UNION_TYPE:
2181 case QUAL_UNION_TYPE:
2182 /* Unions are similar to RECORD_TYPE but offset is always 0.
2183 */
2184 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2185 {
2186 if (TREE_CODE (field) == FIELD_DECL)
2187 {
2188 int num;
2189
2190 if (TREE_TYPE (field) == error_mark_node)
2191 continue;
2192
2193 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2194 TREE_TYPE (field), subclasses,
2195 bit_offset);
2196 if (!num)
2197 return 0;
2198 for (i = 0; i < num && i < words; i++)
2199 classes[i] = merge_classes (subclasses[i], classes[i]);
2200 }
2201 }
2202 break;
2203
2204 default:
2205 gcc_unreachable ();
2206 }
2207
2208 if (words > 2)
2209 {
2210 /* When size > 16 bytes, if the first one isn't
2211 X86_64_SSE_CLASS or any other ones aren't
2212 X86_64_SSEUP_CLASS, everything should be passed in
2213 memory. */
2214 if (classes[0] != X86_64_SSE_CLASS)
2215 return 0;
2216
2217 for (i = 1; i < words; i++)
2218 if (classes[i] != X86_64_SSEUP_CLASS)
2219 return 0;
2220 }
2221
2222 /* Final merger cleanup. */
2223 for (i = 0; i < words; i++)
2224 {
2225 /* If one class is MEMORY, everything should be passed in
2226 memory. */
2227 if (classes[i] == X86_64_MEMORY_CLASS)
2228 return 0;
2229
2230 /* The X86_64_SSEUP_CLASS should be always preceded by
2231 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2232 if (classes[i] == X86_64_SSEUP_CLASS
2233 && classes[i - 1] != X86_64_SSE_CLASS
2234 && classes[i - 1] != X86_64_SSEUP_CLASS)
2235 {
2236 /* The first one should never be X86_64_SSEUP_CLASS. */
2237 gcc_assert (i != 0);
2238 classes[i] = X86_64_SSE_CLASS;
2239 }
2240
2241 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2242 everything should be passed in memory. */
2243 if (classes[i] == X86_64_X87UP_CLASS
2244 && (classes[i - 1] != X86_64_X87_CLASS))
2245 {
2246 static bool warned;
2247
2248 /* The first one should never be X86_64_X87UP_CLASS. */
2249 gcc_assert (i != 0);
2250 if (!warned && warn_psabi)
2251 {
2252 warned = true;
2253 inform (input_location,
2254 "the ABI of passing union with %<long double%>"
2255 " has changed in GCC 4.4");
2256 }
2257 return 0;
2258 }
2259 }
2260 return words;
2261 }
2262
2263 /* Compute alignment needed. We align all types to natural boundaries with
2264 exception of XFmode that is aligned to 64bits. */
2265 if (mode != VOIDmode && mode != BLKmode)
2266 {
2267 int mode_alignment = GET_MODE_BITSIZE (mode);
2268
2269 if (mode == XFmode)
2270 mode_alignment = 128;
2271 else if (mode == XCmode)
2272 mode_alignment = 256;
2273 if (COMPLEX_MODE_P (mode))
2274 mode_alignment /= 2;
2275 /* Misaligned fields are always returned in memory. */
2276 if (bit_offset % mode_alignment)
2277 return 0;
2278 }
2279
2280 /* for V1xx modes, just use the base mode */
2281 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2282 && GET_MODE_UNIT_SIZE (mode) == bytes)
2283 mode = GET_MODE_INNER (mode);
2284
2285 /* Classification of atomic types. */
2286 switch (mode)
2287 {
2288 case E_SDmode:
2289 case E_DDmode:
2290 classes[0] = X86_64_SSE_CLASS;
2291 return 1;
2292 case E_TDmode:
2293 classes[0] = X86_64_SSE_CLASS;
2294 classes[1] = X86_64_SSEUP_CLASS;
2295 return 2;
2296 case E_DImode:
2297 case E_SImode:
2298 case E_HImode:
2299 case E_QImode:
2300 case E_CSImode:
2301 case E_CHImode:
2302 case E_CQImode:
2303 {
2304 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2305
2306 /* Analyze last 128 bits only. */
2307 size = (size - 1) & 0x7f;
2308
2309 if (size < 32)
2310 {
2311 classes[0] = X86_64_INTEGERSI_CLASS;
2312 return 1;
2313 }
2314 else if (size < 64)
2315 {
2316 classes[0] = X86_64_INTEGER_CLASS;
2317 return 1;
2318 }
2319 else if (size < 64+32)
2320 {
2321 classes[0] = X86_64_INTEGER_CLASS;
2322 classes[1] = X86_64_INTEGERSI_CLASS;
2323 return 2;
2324 }
2325 else if (size < 64+64)
2326 {
2327 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2328 return 2;
2329 }
2330 else
2331 gcc_unreachable ();
2332 }
2333 case E_CDImode:
2334 case E_TImode:
2335 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2336 return 2;
2337 case E_COImode:
2338 case E_OImode:
2339 /* OImode shouldn't be used directly. */
2340 gcc_unreachable ();
2341 case E_CTImode:
2342 return 0;
2343 case E_SFmode:
2344 if (!(bit_offset % 64))
2345 classes[0] = X86_64_SSESF_CLASS;
2346 else
2347 classes[0] = X86_64_SSE_CLASS;
2348 return 1;
2349 case E_DFmode:
2350 classes[0] = X86_64_SSEDF_CLASS;
2351 return 1;
2352 case E_XFmode:
2353 classes[0] = X86_64_X87_CLASS;
2354 classes[1] = X86_64_X87UP_CLASS;
2355 return 2;
2356 case E_TFmode:
2357 classes[0] = X86_64_SSE_CLASS;
2358 classes[1] = X86_64_SSEUP_CLASS;
2359 return 2;
2360 case E_SCmode:
2361 classes[0] = X86_64_SSE_CLASS;
2362 if (!(bit_offset % 64))
2363 return 1;
2364 else
2365 {
2366 static bool warned;
2367
2368 if (!warned && warn_psabi)
2369 {
2370 warned = true;
2371 inform (input_location,
2372 "the ABI of passing structure with %<complex float%>"
2373 " member has changed in GCC 4.4");
2374 }
2375 classes[1] = X86_64_SSESF_CLASS;
2376 return 2;
2377 }
2378 case E_DCmode:
2379 classes[0] = X86_64_SSEDF_CLASS;
2380 classes[1] = X86_64_SSEDF_CLASS;
2381 return 2;
2382 case E_XCmode:
2383 classes[0] = X86_64_COMPLEX_X87_CLASS;
2384 return 1;
2385 case E_TCmode:
2386 /* This modes is larger than 16 bytes. */
2387 return 0;
2388 case E_V8SFmode:
2389 case E_V8SImode:
2390 case E_V32QImode:
2391 case E_V16HImode:
2392 case E_V4DFmode:
2393 case E_V4DImode:
2394 classes[0] = X86_64_SSE_CLASS;
2395 classes[1] = X86_64_SSEUP_CLASS;
2396 classes[2] = X86_64_SSEUP_CLASS;
2397 classes[3] = X86_64_SSEUP_CLASS;
2398 return 4;
2399 case E_V8DFmode:
2400 case E_V16SFmode:
2401 case E_V8DImode:
2402 case E_V16SImode:
2403 case E_V32HImode:
2404 case E_V64QImode:
2405 classes[0] = X86_64_SSE_CLASS;
2406 classes[1] = X86_64_SSEUP_CLASS;
2407 classes[2] = X86_64_SSEUP_CLASS;
2408 classes[3] = X86_64_SSEUP_CLASS;
2409 classes[4] = X86_64_SSEUP_CLASS;
2410 classes[5] = X86_64_SSEUP_CLASS;
2411 classes[6] = X86_64_SSEUP_CLASS;
2412 classes[7] = X86_64_SSEUP_CLASS;
2413 return 8;
2414 case E_V4SFmode:
2415 case E_V4SImode:
2416 case E_V16QImode:
2417 case E_V8HImode:
2418 case E_V2DFmode:
2419 case E_V2DImode:
2420 classes[0] = X86_64_SSE_CLASS;
2421 classes[1] = X86_64_SSEUP_CLASS;
2422 return 2;
2423 case E_V1TImode:
2424 case E_V1DImode:
2425 case E_V2SFmode:
2426 case E_V2SImode:
2427 case E_V4HImode:
2428 case E_V8QImode:
2429 classes[0] = X86_64_SSE_CLASS;
2430 return 1;
2431 case E_BLKmode:
2432 case E_VOIDmode:
2433 return 0;
2434 default:
2435 gcc_assert (VECTOR_MODE_P (mode));
2436
2437 if (bytes > 16)
2438 return 0;
2439
2440 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2441
2442 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2443 classes[0] = X86_64_INTEGERSI_CLASS;
2444 else
2445 classes[0] = X86_64_INTEGER_CLASS;
2446 classes[1] = X86_64_INTEGER_CLASS;
2447 return 1 + (bytes > 8);
2448 }
2449 }
2450
2451 /* Examine the argument and return set number of register required in each
2452 class. Return true iff parameter should be passed in memory. */
2453
2454 static bool
examine_argument(machine_mode mode,const_tree type,int in_return,int * int_nregs,int * sse_nregs)2455 examine_argument (machine_mode mode, const_tree type, int in_return,
2456 int *int_nregs, int *sse_nregs)
2457 {
2458 enum x86_64_reg_class regclass[MAX_CLASSES];
2459 int n = classify_argument (mode, type, regclass, 0);
2460
2461 *int_nregs = 0;
2462 *sse_nregs = 0;
2463
2464 if (!n)
2465 return true;
2466 for (n--; n >= 0; n--)
2467 switch (regclass[n])
2468 {
2469 case X86_64_INTEGER_CLASS:
2470 case X86_64_INTEGERSI_CLASS:
2471 (*int_nregs)++;
2472 break;
2473 case X86_64_SSE_CLASS:
2474 case X86_64_SSESF_CLASS:
2475 case X86_64_SSEDF_CLASS:
2476 (*sse_nregs)++;
2477 break;
2478 case X86_64_NO_CLASS:
2479 case X86_64_SSEUP_CLASS:
2480 break;
2481 case X86_64_X87_CLASS:
2482 case X86_64_X87UP_CLASS:
2483 case X86_64_COMPLEX_X87_CLASS:
2484 if (!in_return)
2485 return true;
2486 break;
2487 case X86_64_MEMORY_CLASS:
2488 gcc_unreachable ();
2489 }
2490
2491 return false;
2492 }
2493
2494 /* Construct container for the argument used by GCC interface. See
2495 FUNCTION_ARG for the detailed description. */
2496
2497 static rtx
construct_container(machine_mode mode,machine_mode orig_mode,const_tree type,int in_return,int nintregs,int nsseregs,const int * intreg,int sse_regno)2498 construct_container (machine_mode mode, machine_mode orig_mode,
2499 const_tree type, int in_return, int nintregs, int nsseregs,
2500 const int *intreg, int sse_regno)
2501 {
2502 /* The following variables hold the static issued_error state. */
2503 static bool issued_sse_arg_error;
2504 static bool issued_sse_ret_error;
2505 static bool issued_x87_ret_error;
2506
2507 machine_mode tmpmode;
2508 int bytes
2509 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2510 enum x86_64_reg_class regclass[MAX_CLASSES];
2511 int n;
2512 int i;
2513 int nexps = 0;
2514 int needed_sseregs, needed_intregs;
2515 rtx exp[MAX_CLASSES];
2516 rtx ret;
2517
2518 n = classify_argument (mode, type, regclass, 0);
2519 if (!n)
2520 return NULL;
2521 if (examine_argument (mode, type, in_return, &needed_intregs,
2522 &needed_sseregs))
2523 return NULL;
2524 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2525 return NULL;
2526
2527 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2528 some less clueful developer tries to use floating-point anyway. */
2529 if (needed_sseregs && !TARGET_SSE)
2530 {
2531 if (in_return)
2532 {
2533 if (!issued_sse_ret_error)
2534 {
2535 error ("SSE register return with SSE disabled");
2536 issued_sse_ret_error = true;
2537 }
2538 }
2539 else if (!issued_sse_arg_error)
2540 {
2541 error ("SSE register argument with SSE disabled");
2542 issued_sse_arg_error = true;
2543 }
2544 return NULL;
2545 }
2546
2547 /* Likewise, error if the ABI requires us to return values in the
2548 x87 registers and the user specified -mno-80387. */
2549 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2550 for (i = 0; i < n; i++)
2551 if (regclass[i] == X86_64_X87_CLASS
2552 || regclass[i] == X86_64_X87UP_CLASS
2553 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2554 {
2555 if (!issued_x87_ret_error)
2556 {
2557 error ("x87 register return with x87 disabled");
2558 issued_x87_ret_error = true;
2559 }
2560 return NULL;
2561 }
2562
2563 /* First construct simple cases. Avoid SCmode, since we want to use
2564 single register to pass this type. */
2565 if (n == 1 && mode != SCmode)
2566 switch (regclass[0])
2567 {
2568 case X86_64_INTEGER_CLASS:
2569 case X86_64_INTEGERSI_CLASS:
2570 return gen_rtx_REG (mode, intreg[0]);
2571 case X86_64_SSE_CLASS:
2572 case X86_64_SSESF_CLASS:
2573 case X86_64_SSEDF_CLASS:
2574 if (mode != BLKmode)
2575 return gen_reg_or_parallel (mode, orig_mode,
2576 GET_SSE_REGNO (sse_regno));
2577 break;
2578 case X86_64_X87_CLASS:
2579 case X86_64_COMPLEX_X87_CLASS:
2580 return gen_rtx_REG (mode, FIRST_STACK_REG);
2581 case X86_64_NO_CLASS:
2582 /* Zero sized array, struct or class. */
2583 return NULL;
2584 default:
2585 gcc_unreachable ();
2586 }
2587 if (n == 2
2588 && regclass[0] == X86_64_SSE_CLASS
2589 && regclass[1] == X86_64_SSEUP_CLASS
2590 && mode != BLKmode)
2591 return gen_reg_or_parallel (mode, orig_mode,
2592 GET_SSE_REGNO (sse_regno));
2593 if (n == 4
2594 && regclass[0] == X86_64_SSE_CLASS
2595 && regclass[1] == X86_64_SSEUP_CLASS
2596 && regclass[2] == X86_64_SSEUP_CLASS
2597 && regclass[3] == X86_64_SSEUP_CLASS
2598 && mode != BLKmode)
2599 return gen_reg_or_parallel (mode, orig_mode,
2600 GET_SSE_REGNO (sse_regno));
2601 if (n == 8
2602 && regclass[0] == X86_64_SSE_CLASS
2603 && regclass[1] == X86_64_SSEUP_CLASS
2604 && regclass[2] == X86_64_SSEUP_CLASS
2605 && regclass[3] == X86_64_SSEUP_CLASS
2606 && regclass[4] == X86_64_SSEUP_CLASS
2607 && regclass[5] == X86_64_SSEUP_CLASS
2608 && regclass[6] == X86_64_SSEUP_CLASS
2609 && regclass[7] == X86_64_SSEUP_CLASS
2610 && mode != BLKmode)
2611 return gen_reg_or_parallel (mode, orig_mode,
2612 GET_SSE_REGNO (sse_regno));
2613 if (n == 2
2614 && regclass[0] == X86_64_X87_CLASS
2615 && regclass[1] == X86_64_X87UP_CLASS)
2616 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2617
2618 if (n == 2
2619 && regclass[0] == X86_64_INTEGER_CLASS
2620 && regclass[1] == X86_64_INTEGER_CLASS
2621 && (mode == CDImode || mode == TImode || mode == BLKmode)
2622 && intreg[0] + 1 == intreg[1])
2623 {
2624 if (mode == BLKmode)
2625 {
2626 /* Use TImode for BLKmode values in 2 integer registers. */
2627 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2628 gen_rtx_REG (TImode, intreg[0]),
2629 GEN_INT (0));
2630 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2631 XVECEXP (ret, 0, 0) = exp[0];
2632 return ret;
2633 }
2634 else
2635 return gen_rtx_REG (mode, intreg[0]);
2636 }
2637
2638 /* Otherwise figure out the entries of the PARALLEL. */
2639 for (i = 0; i < n; i++)
2640 {
2641 int pos;
2642
2643 switch (regclass[i])
2644 {
2645 case X86_64_NO_CLASS:
2646 break;
2647 case X86_64_INTEGER_CLASS:
2648 case X86_64_INTEGERSI_CLASS:
2649 /* Merge TImodes on aligned occasions here too. */
2650 if (i * 8 + 8 > bytes)
2651 {
2652 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2653 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2654 /* We've requested 24 bytes we
2655 don't have mode for. Use DImode. */
2656 tmpmode = DImode;
2657 }
2658 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2659 tmpmode = SImode;
2660 else
2661 tmpmode = DImode;
2662 exp [nexps++]
2663 = gen_rtx_EXPR_LIST (VOIDmode,
2664 gen_rtx_REG (tmpmode, *intreg),
2665 GEN_INT (i*8));
2666 intreg++;
2667 break;
2668 case X86_64_SSESF_CLASS:
2669 exp [nexps++]
2670 = gen_rtx_EXPR_LIST (VOIDmode,
2671 gen_rtx_REG (SFmode,
2672 GET_SSE_REGNO (sse_regno)),
2673 GEN_INT (i*8));
2674 sse_regno++;
2675 break;
2676 case X86_64_SSEDF_CLASS:
2677 exp [nexps++]
2678 = gen_rtx_EXPR_LIST (VOIDmode,
2679 gen_rtx_REG (DFmode,
2680 GET_SSE_REGNO (sse_regno)),
2681 GEN_INT (i*8));
2682 sse_regno++;
2683 break;
2684 case X86_64_SSE_CLASS:
2685 pos = i;
2686 switch (n)
2687 {
2688 case 1:
2689 tmpmode = DImode;
2690 break;
2691 case 2:
2692 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2693 {
2694 tmpmode = TImode;
2695 i++;
2696 }
2697 else
2698 tmpmode = DImode;
2699 break;
2700 case 4:
2701 gcc_assert (i == 0
2702 && regclass[1] == X86_64_SSEUP_CLASS
2703 && regclass[2] == X86_64_SSEUP_CLASS
2704 && regclass[3] == X86_64_SSEUP_CLASS);
2705 tmpmode = OImode;
2706 i += 3;
2707 break;
2708 case 8:
2709 gcc_assert (i == 0
2710 && regclass[1] == X86_64_SSEUP_CLASS
2711 && regclass[2] == X86_64_SSEUP_CLASS
2712 && regclass[3] == X86_64_SSEUP_CLASS
2713 && regclass[4] == X86_64_SSEUP_CLASS
2714 && regclass[5] == X86_64_SSEUP_CLASS
2715 && regclass[6] == X86_64_SSEUP_CLASS
2716 && regclass[7] == X86_64_SSEUP_CLASS);
2717 tmpmode = XImode;
2718 i += 7;
2719 break;
2720 default:
2721 gcc_unreachable ();
2722 }
2723 exp [nexps++]
2724 = gen_rtx_EXPR_LIST (VOIDmode,
2725 gen_rtx_REG (tmpmode,
2726 GET_SSE_REGNO (sse_regno)),
2727 GEN_INT (pos*8));
2728 sse_regno++;
2729 break;
2730 default:
2731 gcc_unreachable ();
2732 }
2733 }
2734
2735 /* Empty aligned struct, union or class. */
2736 if (nexps == 0)
2737 return NULL;
2738
2739 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2740 for (i = 0; i < nexps; i++)
2741 XVECEXP (ret, 0, i) = exp [i];
2742 return ret;
2743 }
2744
2745 /* Update the data in CUM to advance over an argument of mode MODE
2746 and data type TYPE. (TYPE is null for libcalls where that information
2747 may not be available.)
2748
2749 Return a number of integer regsiters advanced over. */
2750
2751 static int
function_arg_advance_32(CUMULATIVE_ARGS * cum,machine_mode mode,const_tree type,HOST_WIDE_INT bytes,HOST_WIDE_INT words)2752 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2753 const_tree type, HOST_WIDE_INT bytes,
2754 HOST_WIDE_INT words)
2755 {
2756 int res = 0;
2757 bool error_p = false;
2758
2759 if (TARGET_IAMCU)
2760 {
2761 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2762 bytes in registers. */
2763 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2764 goto pass_in_reg;
2765 return res;
2766 }
2767
2768 switch (mode)
2769 {
2770 default:
2771 break;
2772
2773 case E_BLKmode:
2774 if (bytes < 0)
2775 break;
2776 /* FALLTHRU */
2777
2778 case E_DImode:
2779 case E_SImode:
2780 case E_HImode:
2781 case E_QImode:
2782 pass_in_reg:
2783 cum->words += words;
2784 cum->nregs -= words;
2785 cum->regno += words;
2786 if (cum->nregs >= 0)
2787 res = words;
2788 if (cum->nregs <= 0)
2789 {
2790 cum->nregs = 0;
2791 cfun->machine->arg_reg_available = false;
2792 cum->regno = 0;
2793 }
2794 break;
2795
2796 case E_OImode:
2797 /* OImode shouldn't be used directly. */
2798 gcc_unreachable ();
2799
2800 case E_DFmode:
2801 if (cum->float_in_sse == -1)
2802 error_p = true;
2803 if (cum->float_in_sse < 2)
2804 break;
2805 /* FALLTHRU */
2806 case E_SFmode:
2807 if (cum->float_in_sse == -1)
2808 error_p = true;
2809 if (cum->float_in_sse < 1)
2810 break;
2811 /* FALLTHRU */
2812
2813 case E_V8SFmode:
2814 case E_V8SImode:
2815 case E_V64QImode:
2816 case E_V32HImode:
2817 case E_V16SImode:
2818 case E_V8DImode:
2819 case E_V16SFmode:
2820 case E_V8DFmode:
2821 case E_V32QImode:
2822 case E_V16HImode:
2823 case E_V4DFmode:
2824 case E_V4DImode:
2825 case E_TImode:
2826 case E_V16QImode:
2827 case E_V8HImode:
2828 case E_V4SImode:
2829 case E_V2DImode:
2830 case E_V4SFmode:
2831 case E_V2DFmode:
2832 if (!type || !AGGREGATE_TYPE_P (type))
2833 {
2834 cum->sse_words += words;
2835 cum->sse_nregs -= 1;
2836 cum->sse_regno += 1;
2837 if (cum->sse_nregs <= 0)
2838 {
2839 cum->sse_nregs = 0;
2840 cum->sse_regno = 0;
2841 }
2842 }
2843 break;
2844
2845 case E_V8QImode:
2846 case E_V4HImode:
2847 case E_V2SImode:
2848 case E_V2SFmode:
2849 case E_V1TImode:
2850 case E_V1DImode:
2851 if (!type || !AGGREGATE_TYPE_P (type))
2852 {
2853 cum->mmx_words += words;
2854 cum->mmx_nregs -= 1;
2855 cum->mmx_regno += 1;
2856 if (cum->mmx_nregs <= 0)
2857 {
2858 cum->mmx_nregs = 0;
2859 cum->mmx_regno = 0;
2860 }
2861 }
2862 break;
2863 }
2864 if (error_p)
2865 {
2866 cum->float_in_sse = 0;
2867 error ("calling %qD with SSE calling convention without "
2868 "SSE/SSE2 enabled", cum->decl);
2869 sorry ("this is a GCC bug that can be worked around by adding "
2870 "attribute used to function called");
2871 }
2872
2873 return res;
2874 }
2875
2876 static int
function_arg_advance_64(CUMULATIVE_ARGS * cum,machine_mode mode,const_tree type,HOST_WIDE_INT words,bool named)2877 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
2878 const_tree type, HOST_WIDE_INT words, bool named)
2879 {
2880 int int_nregs, sse_nregs;
2881
2882 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
2883 if (!named && (VALID_AVX512F_REG_MODE (mode)
2884 || VALID_AVX256_REG_MODE (mode)))
2885 return 0;
2886
2887 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
2888 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2889 {
2890 cum->nregs -= int_nregs;
2891 cum->sse_nregs -= sse_nregs;
2892 cum->regno += int_nregs;
2893 cum->sse_regno += sse_nregs;
2894 return int_nregs;
2895 }
2896 else
2897 {
2898 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
2899 cum->words = ROUND_UP (cum->words, align);
2900 cum->words += words;
2901 return 0;
2902 }
2903 }
2904
2905 static int
function_arg_advance_ms_64(CUMULATIVE_ARGS * cum,HOST_WIDE_INT bytes,HOST_WIDE_INT words)2906 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
2907 HOST_WIDE_INT words)
2908 {
2909 /* Otherwise, this should be passed indirect. */
2910 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
2911
2912 cum->words += words;
2913 if (cum->nregs > 0)
2914 {
2915 cum->nregs -= 1;
2916 cum->regno += 1;
2917 return 1;
2918 }
2919 return 0;
2920 }
2921
2922 /* Update the data in CUM to advance over argument ARG. */
2923
2924 static void
ix86_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)2925 ix86_function_arg_advance (cumulative_args_t cum_v,
2926 const function_arg_info &arg)
2927 {
2928 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2929 machine_mode mode = arg.mode;
2930 HOST_WIDE_INT bytes, words;
2931 int nregs;
2932
2933 /* The argument of interrupt handler is a special case and is
2934 handled in ix86_function_arg. */
2935 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
2936 return;
2937
2938 bytes = arg.promoted_size_in_bytes ();
2939 words = CEIL (bytes, UNITS_PER_WORD);
2940
2941 if (arg.type)
2942 mode = type_natural_mode (arg.type, NULL, false);
2943
2944 if (TARGET_64BIT)
2945 {
2946 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
2947
2948 if (call_abi == MS_ABI)
2949 nregs = function_arg_advance_ms_64 (cum, bytes, words);
2950 else
2951 nregs = function_arg_advance_64 (cum, mode, arg.type, words,
2952 arg.named);
2953 }
2954 else
2955 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
2956
2957 if (!nregs)
2958 {
2959 /* Track if there are outgoing arguments on stack. */
2960 if (cum->caller)
2961 cfun->machine->outgoing_args_on_stack = true;
2962 }
2963 }
2964
2965 /* Define where to put the arguments to a function.
2966 Value is zero to push the argument on the stack,
2967 or a hard register in which to store the argument.
2968
2969 MODE is the argument's machine mode.
2970 TYPE is the data type of the argument (as a tree).
2971 This is null for libcalls where that information may
2972 not be available.
2973 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2974 the preceding args and about the function being called.
2975 NAMED is nonzero if this argument is a named parameter
2976 (otherwise it is an extra parameter matching an ellipsis). */
2977
2978 static rtx
function_arg_32(CUMULATIVE_ARGS * cum,machine_mode mode,machine_mode orig_mode,const_tree type,HOST_WIDE_INT bytes,HOST_WIDE_INT words)2979 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2980 machine_mode orig_mode, const_tree type,
2981 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
2982 {
2983 bool error_p = false;
2984
2985 /* Avoid the AL settings for the Unix64 ABI. */
2986 if (mode == VOIDmode)
2987 return constm1_rtx;
2988
2989 if (TARGET_IAMCU)
2990 {
2991 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2992 bytes in registers. */
2993 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2994 goto pass_in_reg;
2995 return NULL_RTX;
2996 }
2997
2998 switch (mode)
2999 {
3000 default:
3001 break;
3002
3003 case E_BLKmode:
3004 if (bytes < 0)
3005 break;
3006 /* FALLTHRU */
3007 case E_DImode:
3008 case E_SImode:
3009 case E_HImode:
3010 case E_QImode:
3011 pass_in_reg:
3012 if (words <= cum->nregs)
3013 {
3014 int regno = cum->regno;
3015
3016 /* Fastcall allocates the first two DWORD (SImode) or
3017 smaller arguments to ECX and EDX if it isn't an
3018 aggregate type . */
3019 if (cum->fastcall)
3020 {
3021 if (mode == BLKmode
3022 || mode == DImode
3023 || (type && AGGREGATE_TYPE_P (type)))
3024 break;
3025
3026 /* ECX not EAX is the first allocated register. */
3027 if (regno == AX_REG)
3028 regno = CX_REG;
3029 }
3030 return gen_rtx_REG (mode, regno);
3031 }
3032 break;
3033
3034 case E_DFmode:
3035 if (cum->float_in_sse == -1)
3036 error_p = true;
3037 if (cum->float_in_sse < 2)
3038 break;
3039 /* FALLTHRU */
3040 case E_SFmode:
3041 if (cum->float_in_sse == -1)
3042 error_p = true;
3043 if (cum->float_in_sse < 1)
3044 break;
3045 /* FALLTHRU */
3046 case E_TImode:
3047 /* In 32bit, we pass TImode in xmm registers. */
3048 case E_V16QImode:
3049 case E_V8HImode:
3050 case E_V4SImode:
3051 case E_V2DImode:
3052 case E_V4SFmode:
3053 case E_V2DFmode:
3054 if (!type || !AGGREGATE_TYPE_P (type))
3055 {
3056 if (cum->sse_nregs)
3057 return gen_reg_or_parallel (mode, orig_mode,
3058 cum->sse_regno + FIRST_SSE_REG);
3059 }
3060 break;
3061
3062 case E_OImode:
3063 case E_XImode:
3064 /* OImode and XImode shouldn't be used directly. */
3065 gcc_unreachable ();
3066
3067 case E_V64QImode:
3068 case E_V32HImode:
3069 case E_V16SImode:
3070 case E_V8DImode:
3071 case E_V16SFmode:
3072 case E_V8DFmode:
3073 case E_V8SFmode:
3074 case E_V8SImode:
3075 case E_V32QImode:
3076 case E_V16HImode:
3077 case E_V4DFmode:
3078 case E_V4DImode:
3079 if (!type || !AGGREGATE_TYPE_P (type))
3080 {
3081 if (cum->sse_nregs)
3082 return gen_reg_or_parallel (mode, orig_mode,
3083 cum->sse_regno + FIRST_SSE_REG);
3084 }
3085 break;
3086
3087 case E_V8QImode:
3088 case E_V4HImode:
3089 case E_V2SImode:
3090 case E_V2SFmode:
3091 case E_V1TImode:
3092 case E_V1DImode:
3093 if (!type || !AGGREGATE_TYPE_P (type))
3094 {
3095 if (cum->mmx_nregs)
3096 return gen_reg_or_parallel (mode, orig_mode,
3097 cum->mmx_regno + FIRST_MMX_REG);
3098 }
3099 break;
3100 }
3101 if (error_p)
3102 {
3103 cum->float_in_sse = 0;
3104 error ("calling %qD with SSE calling convention without "
3105 "SSE/SSE2 enabled", cum->decl);
3106 sorry ("this is a GCC bug that can be worked around by adding "
3107 "attribute used to function called");
3108 }
3109
3110 return NULL_RTX;
3111 }
3112
3113 static rtx
function_arg_64(const CUMULATIVE_ARGS * cum,machine_mode mode,machine_mode orig_mode,const_tree type,bool named)3114 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3115 machine_mode orig_mode, const_tree type, bool named)
3116 {
3117 /* Handle a hidden AL argument containing number of registers
3118 for varargs x86-64 functions. */
3119 if (mode == VOIDmode)
3120 return GEN_INT (cum->maybe_vaarg
3121 ? (cum->sse_nregs < 0
3122 ? X86_64_SSE_REGPARM_MAX
3123 : cum->sse_regno)
3124 : -1);
3125
3126 switch (mode)
3127 {
3128 default:
3129 break;
3130
3131 case E_V8SFmode:
3132 case E_V8SImode:
3133 case E_V32QImode:
3134 case E_V16HImode:
3135 case E_V4DFmode:
3136 case E_V4DImode:
3137 case E_V16SFmode:
3138 case E_V16SImode:
3139 case E_V64QImode:
3140 case E_V32HImode:
3141 case E_V8DFmode:
3142 case E_V8DImode:
3143 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3144 if (!named)
3145 return NULL;
3146 break;
3147 }
3148
3149 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3150 cum->sse_nregs,
3151 &x86_64_int_parameter_registers [cum->regno],
3152 cum->sse_regno);
3153 }
3154
3155 static rtx
function_arg_ms_64(const CUMULATIVE_ARGS * cum,machine_mode mode,machine_mode orig_mode,bool named,const_tree type,HOST_WIDE_INT bytes)3156 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3157 machine_mode orig_mode, bool named, const_tree type,
3158 HOST_WIDE_INT bytes)
3159 {
3160 unsigned int regno;
3161
3162 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3163 We use value of -2 to specify that current function call is MSABI. */
3164 if (mode == VOIDmode)
3165 return GEN_INT (-2);
3166
3167 /* If we've run out of registers, it goes on the stack. */
3168 if (cum->nregs == 0)
3169 return NULL_RTX;
3170
3171 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3172
3173 /* Only floating point modes are passed in anything but integer regs. */
3174 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3175 {
3176 if (named)
3177 {
3178 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3179 regno = cum->regno + FIRST_SSE_REG;
3180 }
3181 else
3182 {
3183 rtx t1, t2;
3184
3185 /* Unnamed floating parameters are passed in both the
3186 SSE and integer registers. */
3187 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3188 t2 = gen_rtx_REG (mode, regno);
3189 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3190 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3191 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3192 }
3193 }
3194 /* Handle aggregated types passed in register. */
3195 if (orig_mode == BLKmode)
3196 {
3197 if (bytes > 0 && bytes <= 8)
3198 mode = (bytes > 4 ? DImode : SImode);
3199 if (mode == BLKmode)
3200 mode = DImode;
3201 }
3202
3203 return gen_reg_or_parallel (mode, orig_mode, regno);
3204 }
3205
3206 /* Return where to put the arguments to a function.
3207 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3208
3209 ARG describes the argument while CUM gives information about the
3210 preceding args and about the function being called. */
3211
3212 static rtx
ix86_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)3213 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3214 {
3215 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3216 machine_mode mode = arg.mode;
3217 HOST_WIDE_INT bytes, words;
3218 rtx reg;
3219
3220 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3221 {
3222 gcc_assert (arg.type != NULL_TREE);
3223 if (POINTER_TYPE_P (arg.type))
3224 {
3225 /* This is the pointer argument. */
3226 gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3227 /* It is at -WORD(AP) in the current frame in interrupt and
3228 exception handlers. */
3229 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3230 }
3231 else
3232 {
3233 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3234 && TREE_CODE (arg.type) == INTEGER_TYPE
3235 && TYPE_MODE (arg.type) == word_mode);
3236 /* The error code is the word-mode integer argument at
3237 -2 * WORD(AP) in the current frame of the exception
3238 handler. */
3239 reg = gen_rtx_MEM (word_mode,
3240 plus_constant (Pmode,
3241 arg_pointer_rtx,
3242 -2 * UNITS_PER_WORD));
3243 }
3244 return reg;
3245 }
3246
3247 bytes = arg.promoted_size_in_bytes ();
3248 words = CEIL (bytes, UNITS_PER_WORD);
3249
3250 /* To simplify the code below, represent vector types with a vector mode
3251 even if MMX/SSE are not active. */
3252 if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE)
3253 mode = type_natural_mode (arg.type, cum, false);
3254
3255 if (TARGET_64BIT)
3256 {
3257 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3258
3259 if (call_abi == MS_ABI)
3260 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3261 arg.type, bytes);
3262 else
3263 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3264 }
3265 else
3266 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3267
3268 /* Track if there are outgoing arguments on stack. */
3269 if (reg == NULL_RTX && cum->caller)
3270 cfun->machine->outgoing_args_on_stack = true;
3271
3272 return reg;
3273 }
3274
3275 /* A C expression that indicates when an argument must be passed by
3276 reference. If nonzero for an argument, a copy of that argument is
3277 made in memory and a pointer to the argument is passed instead of
3278 the argument itself. The pointer is passed in whatever way is
3279 appropriate for passing a pointer to that type. */
3280
3281 static bool
ix86_pass_by_reference(cumulative_args_t cum_v,const function_arg_info & arg)3282 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3283 {
3284 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3285
3286 if (TARGET_64BIT)
3287 {
3288 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3289
3290 /* See Windows x64 Software Convention. */
3291 if (call_abi == MS_ABI)
3292 {
3293 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3294
3295 if (tree type = arg.type)
3296 {
3297 /* Arrays are passed by reference. */
3298 if (TREE_CODE (type) == ARRAY_TYPE)
3299 return true;
3300
3301 if (RECORD_OR_UNION_TYPE_P (type))
3302 {
3303 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3304 are passed by reference. */
3305 msize = int_size_in_bytes (type);
3306 }
3307 }
3308
3309 /* __m128 is passed by reference. */
3310 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3311 }
3312 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3313 return true;
3314 }
3315
3316 return false;
3317 }
3318
3319 /* Return true when TYPE should be 128bit aligned for 32bit argument
3320 passing ABI. XXX: This function is obsolete and is only used for
3321 checking psABI compatibility with previous versions of GCC. */
3322
3323 static bool
ix86_compat_aligned_value_p(const_tree type)3324 ix86_compat_aligned_value_p (const_tree type)
3325 {
3326 machine_mode mode = TYPE_MODE (type);
3327 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3328 || mode == TDmode
3329 || mode == TFmode
3330 || mode == TCmode)
3331 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3332 return true;
3333 if (TYPE_ALIGN (type) < 128)
3334 return false;
3335
3336 if (AGGREGATE_TYPE_P (type))
3337 {
3338 /* Walk the aggregates recursively. */
3339 switch (TREE_CODE (type))
3340 {
3341 case RECORD_TYPE:
3342 case UNION_TYPE:
3343 case QUAL_UNION_TYPE:
3344 {
3345 tree field;
3346
3347 /* Walk all the structure fields. */
3348 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3349 {
3350 if (TREE_CODE (field) == FIELD_DECL
3351 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3352 return true;
3353 }
3354 break;
3355 }
3356
3357 case ARRAY_TYPE:
3358 /* Just for use if some languages passes arrays by value. */
3359 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3360 return true;
3361 break;
3362
3363 default:
3364 gcc_unreachable ();
3365 }
3366 }
3367 return false;
3368 }
3369
3370 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3371 XXX: This function is obsolete and is only used for checking psABI
3372 compatibility with previous versions of GCC. */
3373
3374 static unsigned int
ix86_compat_function_arg_boundary(machine_mode mode,const_tree type,unsigned int align)3375 ix86_compat_function_arg_boundary (machine_mode mode,
3376 const_tree type, unsigned int align)
3377 {
3378 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3379 natural boundaries. */
3380 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3381 {
3382 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3383 make an exception for SSE modes since these require 128bit
3384 alignment.
3385
3386 The handling here differs from field_alignment. ICC aligns MMX
3387 arguments to 4 byte boundaries, while structure fields are aligned
3388 to 8 byte boundaries. */
3389 if (!type)
3390 {
3391 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3392 align = PARM_BOUNDARY;
3393 }
3394 else
3395 {
3396 if (!ix86_compat_aligned_value_p (type))
3397 align = PARM_BOUNDARY;
3398 }
3399 }
3400 if (align > BIGGEST_ALIGNMENT)
3401 align = BIGGEST_ALIGNMENT;
3402 return align;
3403 }
3404
3405 /* Return true when TYPE should be 128bit aligned for 32bit argument
3406 passing ABI. */
3407
3408 static bool
ix86_contains_aligned_value_p(const_tree type)3409 ix86_contains_aligned_value_p (const_tree type)
3410 {
3411 machine_mode mode = TYPE_MODE (type);
3412
3413 if (mode == XFmode || mode == XCmode)
3414 return false;
3415
3416 if (TYPE_ALIGN (type) < 128)
3417 return false;
3418
3419 if (AGGREGATE_TYPE_P (type))
3420 {
3421 /* Walk the aggregates recursively. */
3422 switch (TREE_CODE (type))
3423 {
3424 case RECORD_TYPE:
3425 case UNION_TYPE:
3426 case QUAL_UNION_TYPE:
3427 {
3428 tree field;
3429
3430 /* Walk all the structure fields. */
3431 for (field = TYPE_FIELDS (type);
3432 field;
3433 field = DECL_CHAIN (field))
3434 {
3435 if (TREE_CODE (field) == FIELD_DECL
3436 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3437 return true;
3438 }
3439 break;
3440 }
3441
3442 case ARRAY_TYPE:
3443 /* Just for use if some languages passes arrays by value. */
3444 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3445 return true;
3446 break;
3447
3448 default:
3449 gcc_unreachable ();
3450 }
3451 }
3452 else
3453 return TYPE_ALIGN (type) >= 128;
3454
3455 return false;
3456 }
3457
3458 /* Gives the alignment boundary, in bits, of an argument with the
3459 specified mode and type. */
3460
3461 static unsigned int
ix86_function_arg_boundary(machine_mode mode,const_tree type)3462 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3463 {
3464 unsigned int align;
3465 if (type)
3466 {
3467 /* Since the main variant type is used for call, we convert it to
3468 the main variant type. */
3469 type = TYPE_MAIN_VARIANT (type);
3470 align = TYPE_ALIGN (type);
3471 if (TYPE_EMPTY_P (type))
3472 return PARM_BOUNDARY;
3473 }
3474 else
3475 align = GET_MODE_ALIGNMENT (mode);
3476 if (align < PARM_BOUNDARY)
3477 align = PARM_BOUNDARY;
3478 else
3479 {
3480 static bool warned;
3481 unsigned int saved_align = align;
3482
3483 if (!TARGET_64BIT)
3484 {
3485 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3486 if (!type)
3487 {
3488 if (mode == XFmode || mode == XCmode)
3489 align = PARM_BOUNDARY;
3490 }
3491 else if (!ix86_contains_aligned_value_p (type))
3492 align = PARM_BOUNDARY;
3493
3494 if (align < 128)
3495 align = PARM_BOUNDARY;
3496 }
3497
3498 if (warn_psabi
3499 && !warned
3500 && align != ix86_compat_function_arg_boundary (mode, type,
3501 saved_align))
3502 {
3503 warned = true;
3504 inform (input_location,
3505 "the ABI for passing parameters with %d-byte"
3506 " alignment has changed in GCC 4.6",
3507 align / BITS_PER_UNIT);
3508 }
3509 }
3510
3511 return align;
3512 }
3513
3514 /* Return true if N is a possible register number of function value. */
3515
3516 static bool
ix86_function_value_regno_p(const unsigned int regno)3517 ix86_function_value_regno_p (const unsigned int regno)
3518 {
3519 switch (regno)
3520 {
3521 case AX_REG:
3522 return true;
3523 case DX_REG:
3524 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3525 case DI_REG:
3526 case SI_REG:
3527 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3528
3529 /* Complex values are returned in %st(0)/%st(1) pair. */
3530 case ST0_REG:
3531 case ST1_REG:
3532 /* TODO: The function should depend on current function ABI but
3533 builtins.c would need updating then. Therefore we use the
3534 default ABI. */
3535 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3536 return false;
3537 return TARGET_FLOAT_RETURNS_IN_80387;
3538
3539 /* Complex values are returned in %xmm0/%xmm1 pair. */
3540 case XMM0_REG:
3541 case XMM1_REG:
3542 return TARGET_SSE;
3543
3544 case MM0_REG:
3545 if (TARGET_MACHO || TARGET_64BIT)
3546 return false;
3547 return TARGET_MMX;
3548 }
3549
3550 return false;
3551 }
3552
3553 /* Define how to find the value returned by a function.
3554 VALTYPE is the data type of the value (as a tree).
3555 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3556 otherwise, FUNC is 0. */
3557
3558 static rtx
function_value_32(machine_mode orig_mode,machine_mode mode,const_tree fntype,const_tree fn)3559 function_value_32 (machine_mode orig_mode, machine_mode mode,
3560 const_tree fntype, const_tree fn)
3561 {
3562 unsigned int regno;
3563
3564 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3565 we normally prevent this case when mmx is not available. However
3566 some ABIs may require the result to be returned like DImode. */
3567 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3568 regno = FIRST_MMX_REG;
3569
3570 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3571 we prevent this case when sse is not available. However some ABIs
3572 may require the result to be returned like integer TImode. */
3573 else if (mode == TImode
3574 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3575 regno = FIRST_SSE_REG;
3576
3577 /* 32-byte vector modes in %ymm0. */
3578 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
3579 regno = FIRST_SSE_REG;
3580
3581 /* 64-byte vector modes in %zmm0. */
3582 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
3583 regno = FIRST_SSE_REG;
3584
3585 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
3586 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
3587 regno = FIRST_FLOAT_REG;
3588 else
3589 /* Most things go in %eax. */
3590 regno = AX_REG;
3591
3592 /* Override FP return register with %xmm0 for local functions when
3593 SSE math is enabled or for functions with sseregparm attribute. */
3594 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
3595 {
3596 int sse_level = ix86_function_sseregparm (fntype, fn, false);
3597 if (sse_level == -1)
3598 {
3599 error ("calling %qD with SSE calling convention without "
3600 "SSE/SSE2 enabled", fn);
3601 sorry ("this is a GCC bug that can be worked around by adding "
3602 "attribute used to function called");
3603 }
3604 else if ((sse_level >= 1 && mode == SFmode)
3605 || (sse_level == 2 && mode == DFmode))
3606 regno = FIRST_SSE_REG;
3607 }
3608
3609 /* OImode shouldn't be used directly. */
3610 gcc_assert (mode != OImode);
3611
3612 return gen_rtx_REG (orig_mode, regno);
3613 }
3614
3615 static rtx
function_value_64(machine_mode orig_mode,machine_mode mode,const_tree valtype)3616 function_value_64 (machine_mode orig_mode, machine_mode mode,
3617 const_tree valtype)
3618 {
3619 rtx ret;
3620
3621 /* Handle libcalls, which don't provide a type node. */
3622 if (valtype == NULL)
3623 {
3624 unsigned int regno;
3625
3626 switch (mode)
3627 {
3628 case E_SFmode:
3629 case E_SCmode:
3630 case E_DFmode:
3631 case E_DCmode:
3632 case E_TFmode:
3633 case E_SDmode:
3634 case E_DDmode:
3635 case E_TDmode:
3636 regno = FIRST_SSE_REG;
3637 break;
3638 case E_XFmode:
3639 case E_XCmode:
3640 regno = FIRST_FLOAT_REG;
3641 break;
3642 case E_TCmode:
3643 return NULL;
3644 default:
3645 regno = AX_REG;
3646 }
3647
3648 return gen_rtx_REG (mode, regno);
3649 }
3650 else if (POINTER_TYPE_P (valtype))
3651 {
3652 /* Pointers are always returned in word_mode. */
3653 mode = word_mode;
3654 }
3655
3656 ret = construct_container (mode, orig_mode, valtype, 1,
3657 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
3658 x86_64_int_return_registers, 0);
3659
3660 /* For zero sized structures, construct_container returns NULL, but we
3661 need to keep rest of compiler happy by returning meaningful value. */
3662 if (!ret)
3663 ret = gen_rtx_REG (orig_mode, AX_REG);
3664
3665 return ret;
3666 }
3667
3668 static rtx
function_value_ms_32(machine_mode orig_mode,machine_mode mode,const_tree fntype,const_tree fn,const_tree valtype)3669 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
3670 const_tree fntype, const_tree fn, const_tree valtype)
3671 {
3672 unsigned int regno;
3673
3674 /* Floating point return values in %st(0)
3675 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
3676 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
3677 && (GET_MODE_SIZE (mode) > 8
3678 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
3679 {
3680 regno = FIRST_FLOAT_REG;
3681 return gen_rtx_REG (orig_mode, regno);
3682 }
3683 else
3684 return function_value_32(orig_mode, mode, fntype,fn);
3685 }
3686
3687 static rtx
function_value_ms_64(machine_mode orig_mode,machine_mode mode,const_tree valtype)3688 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
3689 const_tree valtype)
3690 {
3691 unsigned int regno = AX_REG;
3692
3693 if (TARGET_SSE)
3694 {
3695 switch (GET_MODE_SIZE (mode))
3696 {
3697 case 16:
3698 if (valtype != NULL_TREE
3699 && !VECTOR_INTEGER_TYPE_P (valtype)
3700 && !VECTOR_INTEGER_TYPE_P (valtype)
3701 && !INTEGRAL_TYPE_P (valtype)
3702 && !VECTOR_FLOAT_TYPE_P (valtype))
3703 break;
3704 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3705 && !COMPLEX_MODE_P (mode))
3706 regno = FIRST_SSE_REG;
3707 break;
3708 case 8:
3709 case 4:
3710 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
3711 break;
3712 if (mode == SFmode || mode == DFmode)
3713 regno = FIRST_SSE_REG;
3714 break;
3715 default:
3716 break;
3717 }
3718 }
3719 return gen_rtx_REG (orig_mode, regno);
3720 }
3721
3722 static rtx
ix86_function_value_1(const_tree valtype,const_tree fntype_or_decl,machine_mode orig_mode,machine_mode mode)3723 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
3724 machine_mode orig_mode, machine_mode mode)
3725 {
3726 const_tree fn, fntype;
3727
3728 fn = NULL_TREE;
3729 if (fntype_or_decl && DECL_P (fntype_or_decl))
3730 fn = fntype_or_decl;
3731 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3732
3733 if (ix86_function_type_abi (fntype) == MS_ABI)
3734 {
3735 if (TARGET_64BIT)
3736 return function_value_ms_64 (orig_mode, mode, valtype);
3737 else
3738 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
3739 }
3740 else if (TARGET_64BIT)
3741 return function_value_64 (orig_mode, mode, valtype);
3742 else
3743 return function_value_32 (orig_mode, mode, fntype, fn);
3744 }
3745
3746 static rtx
ix86_function_value(const_tree valtype,const_tree fntype_or_decl,bool)3747 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
3748 {
3749 machine_mode mode, orig_mode;
3750
3751 orig_mode = TYPE_MODE (valtype);
3752 mode = type_natural_mode (valtype, NULL, true);
3753 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
3754 }
3755
3756 /* Pointer function arguments and return values are promoted to
3757 word_mode for normal functions. */
3758
3759 static machine_mode
ix86_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype,int for_return)3760 ix86_promote_function_mode (const_tree type, machine_mode mode,
3761 int *punsignedp, const_tree fntype,
3762 int for_return)
3763 {
3764 if (cfun->machine->func_type == TYPE_NORMAL
3765 && type != NULL_TREE
3766 && POINTER_TYPE_P (type))
3767 {
3768 *punsignedp = POINTERS_EXTEND_UNSIGNED;
3769 return word_mode;
3770 }
3771 return default_promote_function_mode (type, mode, punsignedp, fntype,
3772 for_return);
3773 }
3774
3775 /* Return true if a structure, union or array with MODE containing FIELD
3776 should be accessed using BLKmode. */
3777
3778 static bool
ix86_member_type_forces_blk(const_tree field,machine_mode mode)3779 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
3780 {
3781 /* Union with XFmode must be in BLKmode. */
3782 return (mode == XFmode
3783 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
3784 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
3785 }
3786
3787 rtx
ix86_libcall_value(machine_mode mode)3788 ix86_libcall_value (machine_mode mode)
3789 {
3790 return ix86_function_value_1 (NULL, NULL, mode, mode);
3791 }
3792
3793 /* Return true iff type is returned in memory. */
3794
3795 static bool
ix86_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)3796 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3797 {
3798 #ifdef SUBTARGET_RETURN_IN_MEMORY
3799 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
3800 #else
3801 const machine_mode mode = type_natural_mode (type, NULL, true);
3802 HOST_WIDE_INT size;
3803
3804 if (TARGET_64BIT)
3805 {
3806 if (ix86_function_type_abi (fntype) == MS_ABI)
3807 {
3808 size = int_size_in_bytes (type);
3809
3810 /* __m128 is returned in xmm0. */
3811 if ((!type || VECTOR_INTEGER_TYPE_P (type)
3812 || INTEGRAL_TYPE_P (type)
3813 || VECTOR_FLOAT_TYPE_P (type))
3814 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3815 && !COMPLEX_MODE_P (mode)
3816 && (GET_MODE_SIZE (mode) == 16 || size == 16))
3817 return false;
3818
3819 /* Otherwise, the size must be exactly in [1248]. */
3820 return size != 1 && size != 2 && size != 4 && size != 8;
3821 }
3822 else
3823 {
3824 int needed_intregs, needed_sseregs;
3825
3826 return examine_argument (mode, type, 1,
3827 &needed_intregs, &needed_sseregs);
3828 }
3829 }
3830 else
3831 {
3832 size = int_size_in_bytes (type);
3833
3834 /* Intel MCU psABI returns scalars and aggregates no larger than 8
3835 bytes in registers. */
3836 if (TARGET_IAMCU)
3837 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
3838
3839 if (mode == BLKmode)
3840 return true;
3841
3842 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3843 return false;
3844
3845 if (VECTOR_MODE_P (mode) || mode == TImode)
3846 {
3847 /* User-created vectors small enough to fit in EAX. */
3848 if (size < 8)
3849 return false;
3850
3851 /* Unless ABI prescibes otherwise,
3852 MMX/3dNow values are returned in MM0 if available. */
3853
3854 if (size == 8)
3855 return TARGET_VECT8_RETURNS || !TARGET_MMX;
3856
3857 /* SSE values are returned in XMM0 if available. */
3858 if (size == 16)
3859 return !TARGET_SSE;
3860
3861 /* AVX values are returned in YMM0 if available. */
3862 if (size == 32)
3863 return !TARGET_AVX;
3864
3865 /* AVX512F values are returned in ZMM0 if available. */
3866 if (size == 64)
3867 return !TARGET_AVX512F;
3868 }
3869
3870 if (mode == XFmode)
3871 return false;
3872
3873 if (size > 12)
3874 return true;
3875
3876 /* OImode shouldn't be used directly. */
3877 gcc_assert (mode != OImode);
3878
3879 return false;
3880 }
3881 #endif
3882 }
3883
3884
3885 /* Create the va_list data type. */
3886
3887 static tree
ix86_build_builtin_va_list_64(void)3888 ix86_build_builtin_va_list_64 (void)
3889 {
3890 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3891
3892 record = lang_hooks.types.make_type (RECORD_TYPE);
3893 type_decl = build_decl (BUILTINS_LOCATION,
3894 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3895
3896 f_gpr = build_decl (BUILTINS_LOCATION,
3897 FIELD_DECL, get_identifier ("gp_offset"),
3898 unsigned_type_node);
3899 f_fpr = build_decl (BUILTINS_LOCATION,
3900 FIELD_DECL, get_identifier ("fp_offset"),
3901 unsigned_type_node);
3902 f_ovf = build_decl (BUILTINS_LOCATION,
3903 FIELD_DECL, get_identifier ("overflow_arg_area"),
3904 ptr_type_node);
3905 f_sav = build_decl (BUILTINS_LOCATION,
3906 FIELD_DECL, get_identifier ("reg_save_area"),
3907 ptr_type_node);
3908
3909 va_list_gpr_counter_field = f_gpr;
3910 va_list_fpr_counter_field = f_fpr;
3911
3912 DECL_FIELD_CONTEXT (f_gpr) = record;
3913 DECL_FIELD_CONTEXT (f_fpr) = record;
3914 DECL_FIELD_CONTEXT (f_ovf) = record;
3915 DECL_FIELD_CONTEXT (f_sav) = record;
3916
3917 TYPE_STUB_DECL (record) = type_decl;
3918 TYPE_NAME (record) = type_decl;
3919 TYPE_FIELDS (record) = f_gpr;
3920 DECL_CHAIN (f_gpr) = f_fpr;
3921 DECL_CHAIN (f_fpr) = f_ovf;
3922 DECL_CHAIN (f_ovf) = f_sav;
3923
3924 layout_type (record);
3925
3926 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
3927 NULL_TREE, TYPE_ATTRIBUTES (record));
3928
3929 /* The correct type is an array type of one element. */
3930 return build_array_type (record, build_index_type (size_zero_node));
3931 }
3932
3933 /* Setup the builtin va_list data type and for 64-bit the additional
3934 calling convention specific va_list data types. */
3935
3936 static tree
ix86_build_builtin_va_list(void)3937 ix86_build_builtin_va_list (void)
3938 {
3939 if (TARGET_64BIT)
3940 {
3941 /* Initialize ABI specific va_list builtin types.
3942
3943 In lto1, we can encounter two va_list types:
3944 - one as a result of the type-merge across TUs, and
3945 - the one constructed here.
3946 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
3947 a type identity check in canonical_va_list_type based on
3948 TYPE_MAIN_VARIANT (which we used to have) will not work.
3949 Instead, we tag each va_list_type_node with its unique attribute, and
3950 look for the attribute in the type identity check in
3951 canonical_va_list_type.
3952
3953 Tagging sysv_va_list_type_node directly with the attribute is
3954 problematic since it's a array of one record, which will degrade into a
3955 pointer to record when used as parameter (see build_va_arg comments for
3956 an example), dropping the attribute in the process. So we tag the
3957 record instead. */
3958
3959 /* For SYSV_ABI we use an array of one record. */
3960 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
3961
3962 /* For MS_ABI we use plain pointer to argument area. */
3963 tree char_ptr_type = build_pointer_type (char_type_node);
3964 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
3965 TYPE_ATTRIBUTES (char_ptr_type));
3966 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
3967
3968 return ((ix86_abi == MS_ABI)
3969 ? ms_va_list_type_node
3970 : sysv_va_list_type_node);
3971 }
3972 else
3973 {
3974 /* For i386 we use plain pointer to argument area. */
3975 return build_pointer_type (char_type_node);
3976 }
3977 }
3978
3979 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3980
3981 static void
setup_incoming_varargs_64(CUMULATIVE_ARGS * cum)3982 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
3983 {
3984 rtx save_area, mem;
3985 alias_set_type set;
3986 int i, max;
3987
3988 /* GPR size of varargs save area. */
3989 if (cfun->va_list_gpr_size)
3990 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
3991 else
3992 ix86_varargs_gpr_size = 0;
3993
3994 /* FPR size of varargs save area. We don't need it if we don't pass
3995 anything in SSE registers. */
3996 if (TARGET_SSE && cfun->va_list_fpr_size)
3997 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
3998 else
3999 ix86_varargs_fpr_size = 0;
4000
4001 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4002 return;
4003
4004 save_area = frame_pointer_rtx;
4005 set = get_varargs_alias_set ();
4006
4007 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4008 if (max > X86_64_REGPARM_MAX)
4009 max = X86_64_REGPARM_MAX;
4010
4011 for (i = cum->regno; i < max; i++)
4012 {
4013 mem = gen_rtx_MEM (word_mode,
4014 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4015 MEM_NOTRAP_P (mem) = 1;
4016 set_mem_alias_set (mem, set);
4017 emit_move_insn (mem,
4018 gen_rtx_REG (word_mode,
4019 x86_64_int_parameter_registers[i]));
4020 }
4021
4022 if (ix86_varargs_fpr_size)
4023 {
4024 machine_mode smode;
4025 rtx_code_label *label;
4026 rtx test;
4027
4028 /* Now emit code to save SSE registers. The AX parameter contains number
4029 of SSE parameter registers used to call this function, though all we
4030 actually check here is the zero/non-zero status. */
4031
4032 label = gen_label_rtx ();
4033 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4034 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4035 label));
4036
4037 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4038 we used movdqa (i.e. TImode) instead? Perhaps even better would
4039 be if we could determine the real mode of the data, via a hook
4040 into pass_stdarg. Ignore all that for now. */
4041 smode = V4SFmode;
4042 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4043 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4044
4045 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4046 if (max > X86_64_SSE_REGPARM_MAX)
4047 max = X86_64_SSE_REGPARM_MAX;
4048
4049 for (i = cum->sse_regno; i < max; ++i)
4050 {
4051 mem = plus_constant (Pmode, save_area,
4052 i * 16 + ix86_varargs_gpr_size);
4053 mem = gen_rtx_MEM (smode, mem);
4054 MEM_NOTRAP_P (mem) = 1;
4055 set_mem_alias_set (mem, set);
4056 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4057
4058 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4059 }
4060
4061 emit_label (label);
4062 }
4063 }
4064
4065 static void
setup_incoming_varargs_ms_64(CUMULATIVE_ARGS * cum)4066 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4067 {
4068 alias_set_type set = get_varargs_alias_set ();
4069 int i;
4070
4071 /* Reset to zero, as there might be a sysv vaarg used
4072 before. */
4073 ix86_varargs_gpr_size = 0;
4074 ix86_varargs_fpr_size = 0;
4075
4076 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4077 {
4078 rtx reg, mem;
4079
4080 mem = gen_rtx_MEM (Pmode,
4081 plus_constant (Pmode, virtual_incoming_args_rtx,
4082 i * UNITS_PER_WORD));
4083 MEM_NOTRAP_P (mem) = 1;
4084 set_mem_alias_set (mem, set);
4085
4086 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4087 emit_move_insn (mem, reg);
4088 }
4089 }
4090
4091 static void
ix86_setup_incoming_varargs(cumulative_args_t cum_v,const function_arg_info & arg,int *,int no_rtl)4092 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4093 const function_arg_info &arg,
4094 int *, int no_rtl)
4095 {
4096 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4097 CUMULATIVE_ARGS next_cum;
4098 tree fntype;
4099
4100 /* This argument doesn't appear to be used anymore. Which is good,
4101 because the old code here didn't suppress rtl generation. */
4102 gcc_assert (!no_rtl);
4103
4104 if (!TARGET_64BIT)
4105 return;
4106
4107 fntype = TREE_TYPE (current_function_decl);
4108
4109 /* For varargs, we do not want to skip the dummy va_dcl argument.
4110 For stdargs, we do want to skip the last named argument. */
4111 next_cum = *cum;
4112 if (stdarg_p (fntype))
4113 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4114
4115 if (cum->call_abi == MS_ABI)
4116 setup_incoming_varargs_ms_64 (&next_cum);
4117 else
4118 setup_incoming_varargs_64 (&next_cum);
4119 }
4120
4121 /* Checks if TYPE is of kind va_list char *. */
4122
4123 static bool
is_va_list_char_pointer(tree type)4124 is_va_list_char_pointer (tree type)
4125 {
4126 tree canonic;
4127
4128 /* For 32-bit it is always true. */
4129 if (!TARGET_64BIT)
4130 return true;
4131 canonic = ix86_canonical_va_list_type (type);
4132 return (canonic == ms_va_list_type_node
4133 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4134 }
4135
4136 /* Implement va_start. */
4137
4138 static void
ix86_va_start(tree valist,rtx nextarg)4139 ix86_va_start (tree valist, rtx nextarg)
4140 {
4141 HOST_WIDE_INT words, n_gpr, n_fpr;
4142 tree f_gpr, f_fpr, f_ovf, f_sav;
4143 tree gpr, fpr, ovf, sav, t;
4144 tree type;
4145 rtx ovf_rtx;
4146
4147 if (flag_split_stack
4148 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4149 {
4150 unsigned int scratch_regno;
4151
4152 /* When we are splitting the stack, we can't refer to the stack
4153 arguments using internal_arg_pointer, because they may be on
4154 the old stack. The split stack prologue will arrange to
4155 leave a pointer to the old stack arguments in a scratch
4156 register, which we here copy to a pseudo-register. The split
4157 stack prologue can't set the pseudo-register directly because
4158 it (the prologue) runs before any registers have been saved. */
4159
4160 scratch_regno = split_stack_prologue_scratch_regno ();
4161 if (scratch_regno != INVALID_REGNUM)
4162 {
4163 rtx reg;
4164 rtx_insn *seq;
4165
4166 reg = gen_reg_rtx (Pmode);
4167 cfun->machine->split_stack_varargs_pointer = reg;
4168
4169 start_sequence ();
4170 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4171 seq = get_insns ();
4172 end_sequence ();
4173
4174 push_topmost_sequence ();
4175 emit_insn_after (seq, entry_of_function ());
4176 pop_topmost_sequence ();
4177 }
4178 }
4179
4180 /* Only 64bit target needs something special. */
4181 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4182 {
4183 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4184 std_expand_builtin_va_start (valist, nextarg);
4185 else
4186 {
4187 rtx va_r, next;
4188
4189 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4190 next = expand_binop (ptr_mode, add_optab,
4191 cfun->machine->split_stack_varargs_pointer,
4192 crtl->args.arg_offset_rtx,
4193 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4194 convert_move (va_r, next, 0);
4195 }
4196 return;
4197 }
4198
4199 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4200 f_fpr = DECL_CHAIN (f_gpr);
4201 f_ovf = DECL_CHAIN (f_fpr);
4202 f_sav = DECL_CHAIN (f_ovf);
4203
4204 valist = build_simple_mem_ref (valist);
4205 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4206 /* The following should be folded into the MEM_REF offset. */
4207 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4208 f_gpr, NULL_TREE);
4209 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4210 f_fpr, NULL_TREE);
4211 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4212 f_ovf, NULL_TREE);
4213 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4214 f_sav, NULL_TREE);
4215
4216 /* Count number of gp and fp argument registers used. */
4217 words = crtl->args.info.words;
4218 n_gpr = crtl->args.info.regno;
4219 n_fpr = crtl->args.info.sse_regno;
4220
4221 if (cfun->va_list_gpr_size)
4222 {
4223 type = TREE_TYPE (gpr);
4224 t = build2 (MODIFY_EXPR, type,
4225 gpr, build_int_cst (type, n_gpr * 8));
4226 TREE_SIDE_EFFECTS (t) = 1;
4227 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4228 }
4229
4230 if (TARGET_SSE && cfun->va_list_fpr_size)
4231 {
4232 type = TREE_TYPE (fpr);
4233 t = build2 (MODIFY_EXPR, type, fpr,
4234 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4235 TREE_SIDE_EFFECTS (t) = 1;
4236 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4237 }
4238
4239 /* Find the overflow area. */
4240 type = TREE_TYPE (ovf);
4241 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4242 ovf_rtx = crtl->args.internal_arg_pointer;
4243 else
4244 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4245 t = make_tree (type, ovf_rtx);
4246 if (words != 0)
4247 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4248
4249 t = build2 (MODIFY_EXPR, type, ovf, t);
4250 TREE_SIDE_EFFECTS (t) = 1;
4251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4252
4253 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4254 {
4255 /* Find the register save area.
4256 Prologue of the function save it right above stack frame. */
4257 type = TREE_TYPE (sav);
4258 t = make_tree (type, frame_pointer_rtx);
4259 if (!ix86_varargs_gpr_size)
4260 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4261
4262 t = build2 (MODIFY_EXPR, type, sav, t);
4263 TREE_SIDE_EFFECTS (t) = 1;
4264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4265 }
4266 }
4267
4268 /* Implement va_arg. */
4269
4270 static tree
ix86_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)4271 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4272 gimple_seq *post_p)
4273 {
4274 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4275 tree f_gpr, f_fpr, f_ovf, f_sav;
4276 tree gpr, fpr, ovf, sav, t;
4277 int size, rsize;
4278 tree lab_false, lab_over = NULL_TREE;
4279 tree addr, t2;
4280 rtx container;
4281 int indirect_p = 0;
4282 tree ptrtype;
4283 machine_mode nat_mode;
4284 unsigned int arg_boundary;
4285 unsigned int type_align;
4286
4287 /* Only 64bit target needs something special. */
4288 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4289 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4290
4291 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4292 f_fpr = DECL_CHAIN (f_gpr);
4293 f_ovf = DECL_CHAIN (f_fpr);
4294 f_sav = DECL_CHAIN (f_ovf);
4295
4296 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4297 valist, f_gpr, NULL_TREE);
4298
4299 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4300 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4301 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4302
4303 indirect_p = pass_va_arg_by_reference (type);
4304 if (indirect_p)
4305 type = build_pointer_type (type);
4306 size = arg_int_size_in_bytes (type);
4307 rsize = CEIL (size, UNITS_PER_WORD);
4308
4309 nat_mode = type_natural_mode (type, NULL, false);
4310 switch (nat_mode)
4311 {
4312 case E_V8SFmode:
4313 case E_V8SImode:
4314 case E_V32QImode:
4315 case E_V16HImode:
4316 case E_V4DFmode:
4317 case E_V4DImode:
4318 case E_V16SFmode:
4319 case E_V16SImode:
4320 case E_V64QImode:
4321 case E_V32HImode:
4322 case E_V8DFmode:
4323 case E_V8DImode:
4324 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4325 if (!TARGET_64BIT_MS_ABI)
4326 {
4327 container = NULL;
4328 break;
4329 }
4330 /* FALLTHRU */
4331
4332 default:
4333 container = construct_container (nat_mode, TYPE_MODE (type),
4334 type, 0, X86_64_REGPARM_MAX,
4335 X86_64_SSE_REGPARM_MAX, intreg,
4336 0);
4337 break;
4338 }
4339
4340 /* Pull the value out of the saved registers. */
4341
4342 addr = create_tmp_var (ptr_type_node, "addr");
4343 type_align = TYPE_ALIGN (type);
4344
4345 if (container)
4346 {
4347 int needed_intregs, needed_sseregs;
4348 bool need_temp;
4349 tree int_addr, sse_addr;
4350
4351 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4352 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4353
4354 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4355
4356 need_temp = (!REG_P (container)
4357 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4358 || TYPE_ALIGN (type) > 128));
4359
4360 /* In case we are passing structure, verify that it is consecutive block
4361 on the register save area. If not we need to do moves. */
4362 if (!need_temp && !REG_P (container))
4363 {
4364 /* Verify that all registers are strictly consecutive */
4365 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4366 {
4367 int i;
4368
4369 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4370 {
4371 rtx slot = XVECEXP (container, 0, i);
4372 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4373 || INTVAL (XEXP (slot, 1)) != i * 16)
4374 need_temp = true;
4375 }
4376 }
4377 else
4378 {
4379 int i;
4380
4381 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4382 {
4383 rtx slot = XVECEXP (container, 0, i);
4384 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4385 || INTVAL (XEXP (slot, 1)) != i * 8)
4386 need_temp = true;
4387 }
4388 }
4389 }
4390 if (!need_temp)
4391 {
4392 int_addr = addr;
4393 sse_addr = addr;
4394 }
4395 else
4396 {
4397 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4398 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4399 }
4400
4401 /* First ensure that we fit completely in registers. */
4402 if (needed_intregs)
4403 {
4404 t = build_int_cst (TREE_TYPE (gpr),
4405 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4406 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4407 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4408 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4409 gimplify_and_add (t, pre_p);
4410 }
4411 if (needed_sseregs)
4412 {
4413 t = build_int_cst (TREE_TYPE (fpr),
4414 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4415 + X86_64_REGPARM_MAX * 8);
4416 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4417 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4418 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4419 gimplify_and_add (t, pre_p);
4420 }
4421
4422 /* Compute index to start of area used for integer regs. */
4423 if (needed_intregs)
4424 {
4425 /* int_addr = gpr + sav; */
4426 t = fold_build_pointer_plus (sav, gpr);
4427 gimplify_assign (int_addr, t, pre_p);
4428 }
4429 if (needed_sseregs)
4430 {
4431 /* sse_addr = fpr + sav; */
4432 t = fold_build_pointer_plus (sav, fpr);
4433 gimplify_assign (sse_addr, t, pre_p);
4434 }
4435 if (need_temp)
4436 {
4437 int i, prev_size = 0;
4438 tree temp = create_tmp_var (type, "va_arg_tmp");
4439
4440 /* addr = &temp; */
4441 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4442 gimplify_assign (addr, t, pre_p);
4443
4444 for (i = 0; i < XVECLEN (container, 0); i++)
4445 {
4446 rtx slot = XVECEXP (container, 0, i);
4447 rtx reg = XEXP (slot, 0);
4448 machine_mode mode = GET_MODE (reg);
4449 tree piece_type;
4450 tree addr_type;
4451 tree daddr_type;
4452 tree src_addr, src;
4453 int src_offset;
4454 tree dest_addr, dest;
4455 int cur_size = GET_MODE_SIZE (mode);
4456
4457 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4458 prev_size = INTVAL (XEXP (slot, 1));
4459 if (prev_size + cur_size > size)
4460 {
4461 cur_size = size - prev_size;
4462 unsigned int nbits = cur_size * BITS_PER_UNIT;
4463 if (!int_mode_for_size (nbits, 1).exists (&mode))
4464 mode = QImode;
4465 }
4466 piece_type = lang_hooks.types.type_for_mode (mode, 1);
4467 if (mode == GET_MODE (reg))
4468 addr_type = build_pointer_type (piece_type);
4469 else
4470 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4471 true);
4472 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4473 true);
4474
4475 if (SSE_REGNO_P (REGNO (reg)))
4476 {
4477 src_addr = sse_addr;
4478 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4479 }
4480 else
4481 {
4482 src_addr = int_addr;
4483 src_offset = REGNO (reg) * 8;
4484 }
4485 src_addr = fold_convert (addr_type, src_addr);
4486 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4487
4488 dest_addr = fold_convert (daddr_type, addr);
4489 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4490 if (cur_size == GET_MODE_SIZE (mode))
4491 {
4492 src = build_va_arg_indirect_ref (src_addr);
4493 dest = build_va_arg_indirect_ref (dest_addr);
4494
4495 gimplify_assign (dest, src, pre_p);
4496 }
4497 else
4498 {
4499 tree copy
4500 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
4501 3, dest_addr, src_addr,
4502 size_int (cur_size));
4503 gimplify_and_add (copy, pre_p);
4504 }
4505 prev_size += cur_size;
4506 }
4507 }
4508
4509 if (needed_intregs)
4510 {
4511 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4512 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4513 gimplify_assign (gpr, t, pre_p);
4514 /* The GPR save area guarantees only 8-byte alignment. */
4515 if (!need_temp)
4516 type_align = MIN (type_align, 64);
4517 }
4518
4519 if (needed_sseregs)
4520 {
4521 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4522 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4523 gimplify_assign (unshare_expr (fpr), t, pre_p);
4524 }
4525
4526 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
4527
4528 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
4529 }
4530
4531 /* ... otherwise out of the overflow area. */
4532
4533 /* When we align parameter on stack for caller, if the parameter
4534 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4535 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4536 here with caller. */
4537 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
4538 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
4539 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
4540
4541 /* Care for on-stack alignment if needed. */
4542 if (arg_boundary <= 64 || size == 0)
4543 t = ovf;
4544 else
4545 {
4546 HOST_WIDE_INT align = arg_boundary / 8;
4547 t = fold_build_pointer_plus_hwi (ovf, align - 1);
4548 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4549 build_int_cst (TREE_TYPE (t), -align));
4550 }
4551
4552 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4553 gimplify_assign (addr, t, pre_p);
4554
4555 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
4556 gimplify_assign (unshare_expr (ovf), t, pre_p);
4557
4558 if (container)
4559 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
4560
4561 type = build_aligned_type (type, type_align);
4562 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
4563 addr = fold_convert (ptrtype, addr);
4564
4565 if (indirect_p)
4566 addr = build_va_arg_indirect_ref (addr);
4567 return build_va_arg_indirect_ref (addr);
4568 }
4569
4570 /* Return true if OPNUM's MEM should be matched
4571 in movabs* patterns. */
4572
4573 bool
ix86_check_movabs(rtx insn,int opnum)4574 ix86_check_movabs (rtx insn, int opnum)
4575 {
4576 rtx set, mem;
4577
4578 set = PATTERN (insn);
4579 if (GET_CODE (set) == PARALLEL)
4580 set = XVECEXP (set, 0, 0);
4581 gcc_assert (GET_CODE (set) == SET);
4582 mem = XEXP (set, opnum);
4583 while (SUBREG_P (mem))
4584 mem = SUBREG_REG (mem);
4585 gcc_assert (MEM_P (mem));
4586 return volatile_ok || !MEM_VOLATILE_P (mem);
4587 }
4588
4589 /* Return false if INSN contains a MEM with a non-default address space. */
4590 bool
ix86_check_no_addr_space(rtx insn)4591 ix86_check_no_addr_space (rtx insn)
4592 {
4593 subrtx_var_iterator::array_type array;
4594 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
4595 {
4596 rtx x = *iter;
4597 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
4598 return false;
4599 }
4600 return true;
4601 }
4602
4603 /* Initialize the table of extra 80387 mathematical constants. */
4604
4605 static void
init_ext_80387_constants(void)4606 init_ext_80387_constants (void)
4607 {
4608 static const char * cst[5] =
4609 {
4610 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4611 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4612 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4613 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4614 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4615 };
4616 int i;
4617
4618 for (i = 0; i < 5; i++)
4619 {
4620 real_from_string (&ext_80387_constants_table[i], cst[i]);
4621 /* Ensure each constant is rounded to XFmode precision. */
4622 real_convert (&ext_80387_constants_table[i],
4623 XFmode, &ext_80387_constants_table[i]);
4624 }
4625
4626 ext_80387_constants_init = 1;
4627 }
4628
4629 /* Return non-zero if the constant is something that
4630 can be loaded with a special instruction. */
4631
4632 int
standard_80387_constant_p(rtx x)4633 standard_80387_constant_p (rtx x)
4634 {
4635 machine_mode mode = GET_MODE (x);
4636
4637 const REAL_VALUE_TYPE *r;
4638
4639 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
4640 return -1;
4641
4642 if (x == CONST0_RTX (mode))
4643 return 1;
4644 if (x == CONST1_RTX (mode))
4645 return 2;
4646
4647 r = CONST_DOUBLE_REAL_VALUE (x);
4648
4649 /* For XFmode constants, try to find a special 80387 instruction when
4650 optimizing for size or on those CPUs that benefit from them. */
4651 if (mode == XFmode
4652 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
4653 && !flag_rounding_math)
4654 {
4655 int i;
4656
4657 if (! ext_80387_constants_init)
4658 init_ext_80387_constants ();
4659
4660 for (i = 0; i < 5; i++)
4661 if (real_identical (r, &ext_80387_constants_table[i]))
4662 return i + 3;
4663 }
4664
4665 /* Load of the constant -0.0 or -1.0 will be split as
4666 fldz;fchs or fld1;fchs sequence. */
4667 if (real_isnegzero (r))
4668 return 8;
4669 if (real_identical (r, &dconstm1))
4670 return 9;
4671
4672 return 0;
4673 }
4674
4675 /* Return the opcode of the special instruction to be used to load
4676 the constant X. */
4677
4678 const char *
standard_80387_constant_opcode(rtx x)4679 standard_80387_constant_opcode (rtx x)
4680 {
4681 switch (standard_80387_constant_p (x))
4682 {
4683 case 1:
4684 return "fldz";
4685 case 2:
4686 return "fld1";
4687 case 3:
4688 return "fldlg2";
4689 case 4:
4690 return "fldln2";
4691 case 5:
4692 return "fldl2e";
4693 case 6:
4694 return "fldl2t";
4695 case 7:
4696 return "fldpi";
4697 case 8:
4698 case 9:
4699 return "#";
4700 default:
4701 gcc_unreachable ();
4702 }
4703 }
4704
4705 /* Return the CONST_DOUBLE representing the 80387 constant that is
4706 loaded by the specified special instruction. The argument IDX
4707 matches the return value from standard_80387_constant_p. */
4708
4709 rtx
standard_80387_constant_rtx(int idx)4710 standard_80387_constant_rtx (int idx)
4711 {
4712 int i;
4713
4714 if (! ext_80387_constants_init)
4715 init_ext_80387_constants ();
4716
4717 switch (idx)
4718 {
4719 case 3:
4720 case 4:
4721 case 5:
4722 case 6:
4723 case 7:
4724 i = idx - 3;
4725 break;
4726
4727 default:
4728 gcc_unreachable ();
4729 }
4730
4731 return const_double_from_real_value (ext_80387_constants_table[i],
4732 XFmode);
4733 }
4734
4735 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
4736 in supported SSE/AVX vector mode. */
4737
4738 int
standard_sse_constant_p(rtx x,machine_mode pred_mode)4739 standard_sse_constant_p (rtx x, machine_mode pred_mode)
4740 {
4741 machine_mode mode;
4742
4743 if (!TARGET_SSE)
4744 return 0;
4745
4746 mode = GET_MODE (x);
4747
4748 if (x == const0_rtx || const0_operand (x, mode))
4749 return 1;
4750
4751 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4752 {
4753 /* VOIDmode integer constant, get mode from the predicate. */
4754 if (mode == VOIDmode)
4755 mode = pred_mode;
4756
4757 switch (GET_MODE_SIZE (mode))
4758 {
4759 case 64:
4760 if (TARGET_AVX512F)
4761 return 2;
4762 break;
4763 case 32:
4764 if (TARGET_AVX2)
4765 return 2;
4766 break;
4767 case 16:
4768 if (TARGET_SSE2)
4769 return 2;
4770 break;
4771 case 0:
4772 /* VOIDmode */
4773 gcc_unreachable ();
4774 default:
4775 break;
4776 }
4777 }
4778
4779 return 0;
4780 }
4781
4782 /* Return the opcode of the special instruction to be used to load
4783 the constant operands[1] into operands[0]. */
4784
4785 const char *
standard_sse_constant_opcode(rtx_insn * insn,rtx * operands)4786 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
4787 {
4788 machine_mode mode;
4789 rtx x = operands[1];
4790
4791 gcc_assert (TARGET_SSE);
4792
4793 mode = GET_MODE (x);
4794
4795 if (x == const0_rtx || const0_operand (x, mode))
4796 {
4797 switch (get_attr_mode (insn))
4798 {
4799 case MODE_TI:
4800 if (!EXT_REX_SSE_REG_P (operands[0]))
4801 return "%vpxor\t%0, %d0";
4802 /* FALLTHRU */
4803 case MODE_XI:
4804 case MODE_OI:
4805 if (EXT_REX_SSE_REG_P (operands[0]))
4806 return (TARGET_AVX512VL
4807 ? "vpxord\t%x0, %x0, %x0"
4808 : "vpxord\t%g0, %g0, %g0");
4809 return "vpxor\t%x0, %x0, %x0";
4810
4811 case MODE_V2DF:
4812 if (!EXT_REX_SSE_REG_P (operands[0]))
4813 return "%vxorpd\t%0, %d0";
4814 /* FALLTHRU */
4815 case MODE_V8DF:
4816 case MODE_V4DF:
4817 if (!EXT_REX_SSE_REG_P (operands[0]))
4818 return "vxorpd\t%x0, %x0, %x0";
4819 else if (TARGET_AVX512DQ)
4820 return (TARGET_AVX512VL
4821 ? "vxorpd\t%x0, %x0, %x0"
4822 : "vxorpd\t%g0, %g0, %g0");
4823 else
4824 return (TARGET_AVX512VL
4825 ? "vpxorq\t%x0, %x0, %x0"
4826 : "vpxorq\t%g0, %g0, %g0");
4827
4828 case MODE_V4SF:
4829 if (!EXT_REX_SSE_REG_P (operands[0]))
4830 return "%vxorps\t%0, %d0";
4831 /* FALLTHRU */
4832 case MODE_V16SF:
4833 case MODE_V8SF:
4834 if (!EXT_REX_SSE_REG_P (operands[0]))
4835 return "vxorps\t%x0, %x0, %x0";
4836 else if (TARGET_AVX512DQ)
4837 return (TARGET_AVX512VL
4838 ? "vxorps\t%x0, %x0, %x0"
4839 : "vxorps\t%g0, %g0, %g0");
4840 else
4841 return (TARGET_AVX512VL
4842 ? "vpxord\t%x0, %x0, %x0"
4843 : "vpxord\t%g0, %g0, %g0");
4844
4845 default:
4846 gcc_unreachable ();
4847 }
4848 }
4849 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4850 {
4851 enum attr_mode insn_mode = get_attr_mode (insn);
4852
4853 switch (insn_mode)
4854 {
4855 case MODE_XI:
4856 case MODE_V8DF:
4857 case MODE_V16SF:
4858 gcc_assert (TARGET_AVX512F);
4859 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4860
4861 case MODE_OI:
4862 case MODE_V4DF:
4863 case MODE_V8SF:
4864 gcc_assert (TARGET_AVX2);
4865 /* FALLTHRU */
4866 case MODE_TI:
4867 case MODE_V2DF:
4868 case MODE_V4SF:
4869 gcc_assert (TARGET_SSE2);
4870 if (!EXT_REX_SSE_REG_P (operands[0]))
4871 return (TARGET_AVX
4872 ? "vpcmpeqd\t%0, %0, %0"
4873 : "pcmpeqd\t%0, %0");
4874 else if (TARGET_AVX512VL)
4875 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
4876 else
4877 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4878
4879 default:
4880 gcc_unreachable ();
4881 }
4882 }
4883
4884 gcc_unreachable ();
4885 }
4886
4887 /* Returns true if INSN can be transformed from a memory load
4888 to a supported FP constant load. */
4889
4890 bool
ix86_standard_x87sse_constant_load_p(const rtx_insn * insn,rtx dst)4891 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
4892 {
4893 rtx src = find_constant_src (insn);
4894
4895 gcc_assert (REG_P (dst));
4896
4897 if (src == NULL
4898 || (SSE_REGNO_P (REGNO (dst))
4899 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
4900 || (STACK_REGNO_P (REGNO (dst))
4901 && standard_80387_constant_p (src) < 1))
4902 return false;
4903
4904 return true;
4905 }
4906
4907 /* Predicate for pre-reload splitters with associated instructions,
4908 which can match any time before the split1 pass (usually combine),
4909 then are unconditionally split in that pass and should not be
4910 matched again afterwards. */
4911
4912 bool
ix86_pre_reload_split(void)4913 ix86_pre_reload_split (void)
4914 {
4915 return (can_create_pseudo_p ()
4916 && !(cfun->curr_properties & PROP_rtl_split_insns));
4917 }
4918
4919 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
4920 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
4921 TARGET_AVX512VL or it is a register to register move which can
4922 be done with zmm register move. */
4923
4924 static const char *
ix86_get_ssemov(rtx * operands,unsigned size,enum attr_mode insn_mode,machine_mode mode)4925 ix86_get_ssemov (rtx *operands, unsigned size,
4926 enum attr_mode insn_mode, machine_mode mode)
4927 {
4928 char buf[128];
4929 bool misaligned_p = (misaligned_operand (operands[0], mode)
4930 || misaligned_operand (operands[1], mode));
4931 bool evex_reg_p = (size == 64
4932 || EXT_REX_SSE_REG_P (operands[0])
4933 || EXT_REX_SSE_REG_P (operands[1]));
4934 machine_mode scalar_mode;
4935
4936 const char *opcode = NULL;
4937 enum
4938 {
4939 opcode_int,
4940 opcode_float,
4941 opcode_double
4942 } type = opcode_int;
4943
4944 switch (insn_mode)
4945 {
4946 case MODE_V16SF:
4947 case MODE_V8SF:
4948 case MODE_V4SF:
4949 scalar_mode = E_SFmode;
4950 type = opcode_float;
4951 break;
4952 case MODE_V8DF:
4953 case MODE_V4DF:
4954 case MODE_V2DF:
4955 scalar_mode = E_DFmode;
4956 type = opcode_double;
4957 break;
4958 case MODE_XI:
4959 case MODE_OI:
4960 case MODE_TI:
4961 scalar_mode = GET_MODE_INNER (mode);
4962 break;
4963 default:
4964 gcc_unreachable ();
4965 }
4966
4967 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
4968 we can only use zmm register move without memory operand. */
4969 if (evex_reg_p
4970 && !TARGET_AVX512VL
4971 && GET_MODE_SIZE (mode) < 64)
4972 {
4973 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
4974 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
4975 AVX512VL is disabled, LRA can still generate reg to
4976 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
4977 modes. */
4978 if (memory_operand (operands[0], mode)
4979 || memory_operand (operands[1], mode))
4980 gcc_unreachable ();
4981 size = 64;
4982 switch (type)
4983 {
4984 case opcode_int:
4985 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
4986 break;
4987 case opcode_float:
4988 opcode = misaligned_p ? "vmovups" : "vmovaps";
4989 break;
4990 case opcode_double:
4991 opcode = misaligned_p ? "vmovupd" : "vmovapd";
4992 break;
4993 }
4994 }
4995 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
4996 {
4997 switch (scalar_mode)
4998 {
4999 case E_SFmode:
5000 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5001 break;
5002 case E_DFmode:
5003 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5004 break;
5005 case E_TFmode:
5006 if (evex_reg_p)
5007 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5008 else
5009 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5010 break;
5011 default:
5012 gcc_unreachable ();
5013 }
5014 }
5015 else if (SCALAR_INT_MODE_P (scalar_mode))
5016 {
5017 switch (scalar_mode)
5018 {
5019 case E_QImode:
5020 if (evex_reg_p)
5021 opcode = (misaligned_p
5022 ? (TARGET_AVX512BW
5023 ? "vmovdqu8"
5024 : "vmovdqu64")
5025 : "vmovdqa64");
5026 else
5027 opcode = (misaligned_p
5028 ? (TARGET_AVX512BW
5029 ? "vmovdqu8"
5030 : "%vmovdqu")
5031 : "%vmovdqa");
5032 break;
5033 case E_HImode:
5034 if (evex_reg_p)
5035 opcode = (misaligned_p
5036 ? (TARGET_AVX512BW
5037 ? "vmovdqu16"
5038 : "vmovdqu64")
5039 : "vmovdqa64");
5040 else
5041 opcode = (misaligned_p
5042 ? (TARGET_AVX512BW
5043 ? "vmovdqu16"
5044 : "%vmovdqu")
5045 : "%vmovdqa");
5046 break;
5047 case E_SImode:
5048 if (evex_reg_p)
5049 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5050 else
5051 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5052 break;
5053 case E_DImode:
5054 case E_TImode:
5055 case E_OImode:
5056 if (evex_reg_p)
5057 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5058 else
5059 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5060 break;
5061 case E_XImode:
5062 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5063 break;
5064 default:
5065 gcc_unreachable ();
5066 }
5067 }
5068 else
5069 gcc_unreachable ();
5070
5071 switch (size)
5072 {
5073 case 64:
5074 snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5075 opcode);
5076 break;
5077 case 32:
5078 snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5079 opcode);
5080 break;
5081 case 16:
5082 snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5083 opcode);
5084 break;
5085 default:
5086 gcc_unreachable ();
5087 }
5088 output_asm_insn (buf, operands);
5089 return "";
5090 }
5091
5092 /* Return the template of the TYPE_SSEMOV instruction to move
5093 operands[1] into operands[0]. */
5094
5095 const char *
ix86_output_ssemov(rtx_insn * insn,rtx * operands)5096 ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5097 {
5098 machine_mode mode = GET_MODE (operands[0]);
5099 if (get_attr_type (insn) != TYPE_SSEMOV
5100 || mode != GET_MODE (operands[1]))
5101 gcc_unreachable ();
5102
5103 enum attr_mode insn_mode = get_attr_mode (insn);
5104
5105 switch (insn_mode)
5106 {
5107 case MODE_XI:
5108 case MODE_V8DF:
5109 case MODE_V16SF:
5110 return ix86_get_ssemov (operands, 64, insn_mode, mode);
5111
5112 case MODE_OI:
5113 case MODE_V4DF:
5114 case MODE_V8SF:
5115 return ix86_get_ssemov (operands, 32, insn_mode, mode);
5116
5117 case MODE_TI:
5118 case MODE_V2DF:
5119 case MODE_V4SF:
5120 return ix86_get_ssemov (operands, 16, insn_mode, mode);
5121
5122 case MODE_DI:
5123 /* Handle broken assemblers that require movd instead of movq. */
5124 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
5125 && (GENERAL_REG_P (operands[0])
5126 || GENERAL_REG_P (operands[1])))
5127 return "%vmovd\t{%1, %0|%0, %1}";
5128 else
5129 return "%vmovq\t{%1, %0|%0, %1}";
5130
5131 case MODE_SI:
5132 return "%vmovd\t{%1, %0|%0, %1}";
5133
5134 case MODE_DF:
5135 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5136 return "vmovsd\t{%d1, %0|%0, %d1}";
5137 else
5138 return "%vmovsd\t{%1, %0|%0, %1}";
5139
5140 case MODE_SF:
5141 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5142 return "vmovss\t{%d1, %0|%0, %d1}";
5143 else
5144 return "%vmovss\t{%1, %0|%0, %1}";
5145
5146 case MODE_V1DF:
5147 gcc_assert (!TARGET_AVX);
5148 return "movlpd\t{%1, %0|%0, %1}";
5149
5150 case MODE_V2SF:
5151 if (TARGET_AVX && REG_P (operands[0]))
5152 return "vmovlps\t{%1, %d0|%d0, %1}";
5153 else
5154 return "%vmovlps\t{%1, %0|%0, %1}";
5155
5156 default:
5157 gcc_unreachable ();
5158 }
5159 }
5160
5161 /* Returns true if OP contains a symbol reference */
5162
5163 bool
symbolic_reference_mentioned_p(rtx op)5164 symbolic_reference_mentioned_p (rtx op)
5165 {
5166 const char *fmt;
5167 int i;
5168
5169 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5170 return true;
5171
5172 fmt = GET_RTX_FORMAT (GET_CODE (op));
5173 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5174 {
5175 if (fmt[i] == 'E')
5176 {
5177 int j;
5178
5179 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5180 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5181 return true;
5182 }
5183
5184 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5185 return true;
5186 }
5187
5188 return false;
5189 }
5190
5191 /* Return true if it is appropriate to emit `ret' instructions in the
5192 body of a function. Do this only if the epilogue is simple, needing a
5193 couple of insns. Prior to reloading, we can't tell how many registers
5194 must be saved, so return false then. Return false if there is no frame
5195 marker to de-allocate. */
5196
5197 bool
ix86_can_use_return_insn_p(void)5198 ix86_can_use_return_insn_p (void)
5199 {
5200 if (ix86_function_naked (current_function_decl))
5201 return false;
5202
5203 /* Don't use `ret' instruction in interrupt handler. */
5204 if (! reload_completed
5205 || frame_pointer_needed
5206 || cfun->machine->func_type != TYPE_NORMAL)
5207 return 0;
5208
5209 /* Don't allow more than 32k pop, since that's all we can do
5210 with one instruction. */
5211 if (crtl->args.pops_args && crtl->args.size >= 32768)
5212 return 0;
5213
5214 struct ix86_frame &frame = cfun->machine->frame;
5215 return (frame.stack_pointer_offset == UNITS_PER_WORD
5216 && (frame.nregs + frame.nsseregs) == 0);
5217 }
5218
5219 /* Return stack frame size. get_frame_size () returns used stack slots
5220 during compilation, which may be optimized out later. If stack frame
5221 is needed, stack_frame_required should be true. */
5222
5223 static HOST_WIDE_INT
ix86_get_frame_size(void)5224 ix86_get_frame_size (void)
5225 {
5226 if (cfun->machine->stack_frame_required)
5227 return get_frame_size ();
5228 else
5229 return 0;
5230 }
5231
5232 /* Value should be nonzero if functions must have frame pointers.
5233 Zero means the frame pointer need not be set up (and parms may
5234 be accessed via the stack pointer) in functions that seem suitable. */
5235
5236 static bool
ix86_frame_pointer_required(void)5237 ix86_frame_pointer_required (void)
5238 {
5239 /* If we accessed previous frames, then the generated code expects
5240 to be able to access the saved ebp value in our frame. */
5241 if (cfun->machine->accesses_prev_frame)
5242 return true;
5243
5244 /* Several x86 os'es need a frame pointer for other reasons,
5245 usually pertaining to setjmp. */
5246 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5247 return true;
5248
5249 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5250 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5251 return true;
5252
5253 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5254 allocation is 4GB. */
5255 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5256 return true;
5257
5258 /* SSE saves require frame-pointer when stack is misaligned. */
5259 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5260 return true;
5261
5262 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5263 turns off the frame pointer by default. Turn it back on now if
5264 we've not got a leaf function. */
5265 if (TARGET_OMIT_LEAF_FRAME_POINTER
5266 && (!crtl->is_leaf
5267 || ix86_current_function_calls_tls_descriptor))
5268 return true;
5269
5270 if (crtl->profile && !flag_fentry)
5271 return true;
5272
5273 return false;
5274 }
5275
5276 /* Record that the current function accesses previous call frames. */
5277
5278 void
ix86_setup_frame_addresses(void)5279 ix86_setup_frame_addresses (void)
5280 {
5281 cfun->machine->accesses_prev_frame = 1;
5282 }
5283
5284 #ifndef USE_HIDDEN_LINKONCE
5285 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5286 # define USE_HIDDEN_LINKONCE 1
5287 # else
5288 # define USE_HIDDEN_LINKONCE 0
5289 # endif
5290 #endif
5291
5292 /* Label count for call and return thunks. It is used to make unique
5293 labels in call and return thunks. */
5294 static int indirectlabelno;
5295
5296 /* True if call thunk function is needed. */
5297 static bool indirect_thunk_needed = false;
5298
5299 /* Bit masks of integer registers, which contain branch target, used
5300 by call thunk functions. */
5301 static int indirect_thunks_used;
5302
5303 /* True if return thunk function is needed. */
5304 static bool indirect_return_needed = false;
5305
5306 /* True if return thunk function via CX is needed. */
5307 static bool indirect_return_via_cx;
5308
5309 #ifndef INDIRECT_LABEL
5310 # define INDIRECT_LABEL "LIND"
5311 #endif
5312
5313 /* Indicate what prefix is needed for an indirect branch. */
5314 enum indirect_thunk_prefix
5315 {
5316 indirect_thunk_prefix_none,
5317 indirect_thunk_prefix_nt
5318 };
5319
5320 /* Return the prefix needed for an indirect branch INSN. */
5321
5322 enum indirect_thunk_prefix
indirect_thunk_need_prefix(rtx_insn * insn)5323 indirect_thunk_need_prefix (rtx_insn *insn)
5324 {
5325 enum indirect_thunk_prefix need_prefix;
5326 if ((cfun->machine->indirect_branch_type
5327 == indirect_branch_thunk_extern)
5328 && ix86_notrack_prefixed_insn_p (insn))
5329 {
5330 /* NOTRACK prefix is only used with external thunk so that it
5331 can be properly updated to support CET at run-time. */
5332 need_prefix = indirect_thunk_prefix_nt;
5333 }
5334 else
5335 need_prefix = indirect_thunk_prefix_none;
5336 return need_prefix;
5337 }
5338
5339 /* Fills in the label name that should be used for the indirect thunk. */
5340
5341 static void
indirect_thunk_name(char name[32],unsigned int regno,enum indirect_thunk_prefix need_prefix,bool ret_p)5342 indirect_thunk_name (char name[32], unsigned int regno,
5343 enum indirect_thunk_prefix need_prefix,
5344 bool ret_p)
5345 {
5346 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5347 gcc_unreachable ();
5348
5349 if (USE_HIDDEN_LINKONCE)
5350 {
5351 const char *prefix;
5352
5353 if (need_prefix == indirect_thunk_prefix_nt
5354 && regno != INVALID_REGNUM)
5355 {
5356 /* NOTRACK prefix is only used with external thunk via
5357 register so that NOTRACK prefix can be added to indirect
5358 branch via register to support CET at run-time. */
5359 prefix = "_nt";
5360 }
5361 else
5362 prefix = "";
5363
5364 const char *ret = ret_p ? "return" : "indirect";
5365
5366 if (regno != INVALID_REGNUM)
5367 {
5368 const char *reg_prefix;
5369 if (LEGACY_INT_REGNO_P (regno))
5370 reg_prefix = TARGET_64BIT ? "r" : "e";
5371 else
5372 reg_prefix = "";
5373 sprintf (name, "__x86_%s_thunk%s_%s%s",
5374 ret, prefix, reg_prefix, reg_names[regno]);
5375 }
5376 else
5377 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5378 }
5379 else
5380 {
5381 if (regno != INVALID_REGNUM)
5382 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5383 else
5384 {
5385 if (ret_p)
5386 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5387 else
5388 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5389 }
5390 }
5391 }
5392
5393 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5394 the function address is in REGNO and the call and return thunk looks like:
5395
5396 call L2
5397 L1:
5398 pause
5399 lfence
5400 jmp L1
5401 L2:
5402 mov %REG, (%sp)
5403 ret
5404
5405 Otherwise, the function address is on the top of stack and the
5406 call and return thunk looks like:
5407
5408 call L2
5409 L1:
5410 pause
5411 lfence
5412 jmp L1
5413 L2:
5414 lea WORD_SIZE(%sp), %sp
5415 ret
5416 */
5417
5418 static void
output_indirect_thunk(unsigned int regno)5419 output_indirect_thunk (unsigned int regno)
5420 {
5421 char indirectlabel1[32];
5422 char indirectlabel2[32];
5423
5424 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
5425 indirectlabelno++);
5426 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
5427 indirectlabelno++);
5428
5429 /* Call */
5430 fputs ("\tcall\t", asm_out_file);
5431 assemble_name_raw (asm_out_file, indirectlabel2);
5432 fputc ('\n', asm_out_file);
5433
5434 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
5435
5436 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5437 Usage of both pause + lfence is compromise solution. */
5438 fprintf (asm_out_file, "\tpause\n\tlfence\n");
5439
5440 /* Jump. */
5441 fputs ("\tjmp\t", asm_out_file);
5442 assemble_name_raw (asm_out_file, indirectlabel1);
5443 fputc ('\n', asm_out_file);
5444
5445 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
5446
5447 /* The above call insn pushed a word to stack. Adjust CFI info. */
5448 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
5449 {
5450 if (! dwarf2out_do_cfi_asm ())
5451 {
5452 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5453 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
5454 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
5455 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5456 }
5457 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5458 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
5459 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
5460 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5461 dwarf2out_emit_cfi (xcfi);
5462 }
5463
5464 if (regno != INVALID_REGNUM)
5465 {
5466 /* MOV. */
5467 rtx xops[2];
5468 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
5469 xops[1] = gen_rtx_REG (word_mode, regno);
5470 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
5471 }
5472 else
5473 {
5474 /* LEA. */
5475 rtx xops[2];
5476 xops[0] = stack_pointer_rtx;
5477 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
5478 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
5479 }
5480
5481 fputs ("\tret\n", asm_out_file);
5482 }
5483
5484 /* Output a funtion with a call and return thunk for indirect branch.
5485 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5486 Otherwise, the function address is on the top of stack. Thunk is
5487 used for function return if RET_P is true. */
5488
5489 static void
output_indirect_thunk_function(enum indirect_thunk_prefix need_prefix,unsigned int regno,bool ret_p)5490 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
5491 unsigned int regno, bool ret_p)
5492 {
5493 char name[32];
5494 tree decl;
5495
5496 /* Create __x86_indirect_thunk. */
5497 indirect_thunk_name (name, regno, need_prefix, ret_p);
5498 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5499 get_identifier (name),
5500 build_function_type_list (void_type_node, NULL_TREE));
5501 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5502 NULL_TREE, void_type_node);
5503 TREE_PUBLIC (decl) = 1;
5504 TREE_STATIC (decl) = 1;
5505 DECL_IGNORED_P (decl) = 1;
5506
5507 #if TARGET_MACHO
5508 if (TARGET_MACHO)
5509 {
5510 switch_to_section (darwin_sections[picbase_thunk_section]);
5511 fputs ("\t.weak_definition\t", asm_out_file);
5512 assemble_name (asm_out_file, name);
5513 fputs ("\n\t.private_extern\t", asm_out_file);
5514 assemble_name (asm_out_file, name);
5515 putc ('\n', asm_out_file);
5516 ASM_OUTPUT_LABEL (asm_out_file, name);
5517 DECL_WEAK (decl) = 1;
5518 }
5519 else
5520 #endif
5521 if (USE_HIDDEN_LINKONCE)
5522 {
5523 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5524
5525 targetm.asm_out.unique_section (decl, 0);
5526 switch_to_section (get_named_section (decl, NULL, 0));
5527
5528 targetm.asm_out.globalize_label (asm_out_file, name);
5529 fputs ("\t.hidden\t", asm_out_file);
5530 assemble_name (asm_out_file, name);
5531 putc ('\n', asm_out_file);
5532 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5533 }
5534 else
5535 {
5536 switch_to_section (text_section);
5537 ASM_OUTPUT_LABEL (asm_out_file, name);
5538 }
5539
5540 DECL_INITIAL (decl) = make_node (BLOCK);
5541 current_function_decl = decl;
5542 allocate_struct_function (decl, false);
5543 init_function_start (decl);
5544 /* We're about to hide the function body from callees of final_* by
5545 emitting it directly; tell them we're a thunk, if they care. */
5546 cfun->is_thunk = true;
5547 first_function_block_is_cold = false;
5548 /* Make sure unwind info is emitted for the thunk if needed. */
5549 final_start_function (emit_barrier (), asm_out_file, 1);
5550
5551 output_indirect_thunk (regno);
5552
5553 final_end_function ();
5554 init_insn_lengths ();
5555 free_after_compilation (cfun);
5556 set_cfun (NULL);
5557 current_function_decl = NULL;
5558 }
5559
5560 static int pic_labels_used;
5561
5562 /* Fills in the label name that should be used for a pc thunk for
5563 the given register. */
5564
5565 static void
get_pc_thunk_name(char name[32],unsigned int regno)5566 get_pc_thunk_name (char name[32], unsigned int regno)
5567 {
5568 gcc_assert (!TARGET_64BIT);
5569
5570 if (USE_HIDDEN_LINKONCE)
5571 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
5572 else
5573 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5574 }
5575
5576
5577 /* This function generates code for -fpic that loads %ebx with
5578 the return address of the caller and then returns. */
5579
5580 static void
ix86_code_end(void)5581 ix86_code_end (void)
5582 {
5583 rtx xops[2];
5584 unsigned int regno;
5585
5586 if (indirect_return_needed)
5587 output_indirect_thunk_function (indirect_thunk_prefix_none,
5588 INVALID_REGNUM, true);
5589 if (indirect_return_via_cx)
5590 output_indirect_thunk_function (indirect_thunk_prefix_none,
5591 CX_REG, true);
5592 if (indirect_thunk_needed)
5593 output_indirect_thunk_function (indirect_thunk_prefix_none,
5594 INVALID_REGNUM, false);
5595
5596 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
5597 {
5598 unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
5599 if ((indirect_thunks_used & (1 << i)))
5600 output_indirect_thunk_function (indirect_thunk_prefix_none,
5601 regno, false);
5602 }
5603
5604 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
5605 {
5606 char name[32];
5607 tree decl;
5608
5609 if ((indirect_thunks_used & (1 << regno)))
5610 output_indirect_thunk_function (indirect_thunk_prefix_none,
5611 regno, false);
5612
5613 if (!(pic_labels_used & (1 << regno)))
5614 continue;
5615
5616 get_pc_thunk_name (name, regno);
5617
5618 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5619 get_identifier (name),
5620 build_function_type_list (void_type_node, NULL_TREE));
5621 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5622 NULL_TREE, void_type_node);
5623 TREE_PUBLIC (decl) = 1;
5624 TREE_STATIC (decl) = 1;
5625 DECL_IGNORED_P (decl) = 1;
5626
5627 #if TARGET_MACHO
5628 if (TARGET_MACHO)
5629 {
5630 switch_to_section (darwin_sections[picbase_thunk_section]);
5631 fputs ("\t.weak_definition\t", asm_out_file);
5632 assemble_name (asm_out_file, name);
5633 fputs ("\n\t.private_extern\t", asm_out_file);
5634 assemble_name (asm_out_file, name);
5635 putc ('\n', asm_out_file);
5636 ASM_OUTPUT_LABEL (asm_out_file, name);
5637 DECL_WEAK (decl) = 1;
5638 }
5639 else
5640 #endif
5641 if (USE_HIDDEN_LINKONCE)
5642 {
5643 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5644
5645 targetm.asm_out.unique_section (decl, 0);
5646 switch_to_section (get_named_section (decl, NULL, 0));
5647
5648 targetm.asm_out.globalize_label (asm_out_file, name);
5649 fputs ("\t.hidden\t", asm_out_file);
5650 assemble_name (asm_out_file, name);
5651 putc ('\n', asm_out_file);
5652 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5653 }
5654 else
5655 {
5656 switch_to_section (text_section);
5657 ASM_OUTPUT_LABEL (asm_out_file, name);
5658 }
5659
5660 DECL_INITIAL (decl) = make_node (BLOCK);
5661 current_function_decl = decl;
5662 allocate_struct_function (decl, false);
5663 init_function_start (decl);
5664 /* We're about to hide the function body from callees of final_* by
5665 emitting it directly; tell them we're a thunk, if they care. */
5666 cfun->is_thunk = true;
5667 first_function_block_is_cold = false;
5668 /* Make sure unwind info is emitted for the thunk if needed. */
5669 final_start_function (emit_barrier (), asm_out_file, 1);
5670
5671 /* Pad stack IP move with 4 instructions (two NOPs count
5672 as one instruction). */
5673 if (TARGET_PAD_SHORT_FUNCTION)
5674 {
5675 int i = 8;
5676
5677 while (i--)
5678 fputs ("\tnop\n", asm_out_file);
5679 }
5680
5681 xops[0] = gen_rtx_REG (Pmode, regno);
5682 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5683 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
5684 output_asm_insn ("%!ret", NULL);
5685 final_end_function ();
5686 init_insn_lengths ();
5687 free_after_compilation (cfun);
5688 set_cfun (NULL);
5689 current_function_decl = NULL;
5690 }
5691
5692 if (flag_split_stack)
5693 file_end_indicate_split_stack ();
5694 }
5695
5696 /* Emit code for the SET_GOT patterns. */
5697
5698 const char *
output_set_got(rtx dest,rtx label)5699 output_set_got (rtx dest, rtx label)
5700 {
5701 rtx xops[3];
5702
5703 xops[0] = dest;
5704
5705 if (TARGET_VXWORKS_RTP && flag_pic)
5706 {
5707 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5708 xops[2] = gen_rtx_MEM (Pmode,
5709 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5710 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5711
5712 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5713 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5714 an unadorned address. */
5715 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5716 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5717 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5718 return "";
5719 }
5720
5721 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5722
5723 if (flag_pic)
5724 {
5725 char name[32];
5726 get_pc_thunk_name (name, REGNO (dest));
5727 pic_labels_used |= 1 << REGNO (dest);
5728
5729 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5730 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5731 output_asm_insn ("%!call\t%X2", xops);
5732
5733 #if TARGET_MACHO
5734 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
5735 This is what will be referenced by the Mach-O PIC subsystem. */
5736 if (machopic_should_output_picbase_label () || !label)
5737 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
5738
5739 /* When we are restoring the pic base at the site of a nonlocal label,
5740 and we decided to emit the pic base above, we will still output a
5741 local label used for calculating the correction offset (even though
5742 the offset will be 0 in that case). */
5743 if (label)
5744 targetm.asm_out.internal_label (asm_out_file, "L",
5745 CODE_LABEL_NUMBER (label));
5746 #endif
5747 }
5748 else
5749 {
5750 if (TARGET_MACHO)
5751 /* We don't need a pic base, we're not producing pic. */
5752 gcc_unreachable ();
5753
5754 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5755 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
5756 targetm.asm_out.internal_label (asm_out_file, "L",
5757 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5758 }
5759
5760 if (!TARGET_MACHO)
5761 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
5762
5763 return "";
5764 }
5765
5766 /* Generate an "push" pattern for input ARG. */
5767
5768 rtx
gen_push(rtx arg)5769 gen_push (rtx arg)
5770 {
5771 struct machine_function *m = cfun->machine;
5772
5773 if (m->fs.cfa_reg == stack_pointer_rtx)
5774 m->fs.cfa_offset += UNITS_PER_WORD;
5775 m->fs.sp_offset += UNITS_PER_WORD;
5776
5777 if (REG_P (arg) && GET_MODE (arg) != word_mode)
5778 arg = gen_rtx_REG (word_mode, REGNO (arg));
5779
5780 return gen_rtx_SET (gen_rtx_MEM (word_mode,
5781 gen_rtx_PRE_DEC (Pmode,
5782 stack_pointer_rtx)),
5783 arg);
5784 }
5785
5786 /* Generate an "pop" pattern for input ARG. */
5787
5788 rtx
gen_pop(rtx arg)5789 gen_pop (rtx arg)
5790 {
5791 if (REG_P (arg) && GET_MODE (arg) != word_mode)
5792 arg = gen_rtx_REG (word_mode, REGNO (arg));
5793
5794 return gen_rtx_SET (arg,
5795 gen_rtx_MEM (word_mode,
5796 gen_rtx_POST_INC (Pmode,
5797 stack_pointer_rtx)));
5798 }
5799
5800 /* Return >= 0 if there is an unused call-clobbered register available
5801 for the entire function. */
5802
5803 static unsigned int
ix86_select_alt_pic_regnum(void)5804 ix86_select_alt_pic_regnum (void)
5805 {
5806 if (ix86_use_pseudo_pic_reg ())
5807 return INVALID_REGNUM;
5808
5809 if (crtl->is_leaf
5810 && !crtl->profile
5811 && !ix86_current_function_calls_tls_descriptor)
5812 {
5813 int i, drap;
5814 /* Can't use the same register for both PIC and DRAP. */
5815 if (crtl->drap_reg)
5816 drap = REGNO (crtl->drap_reg);
5817 else
5818 drap = -1;
5819 for (i = 2; i >= 0; --i)
5820 if (i != drap && !df_regs_ever_live_p (i))
5821 return i;
5822 }
5823
5824 return INVALID_REGNUM;
5825 }
5826
5827 /* Return true if REGNO is used by the epilogue. */
5828
5829 bool
ix86_epilogue_uses(int regno)5830 ix86_epilogue_uses (int regno)
5831 {
5832 /* If there are no caller-saved registers, we preserve all registers,
5833 except for MMX and x87 registers which aren't supported when saving
5834 and restoring registers. Don't explicitly save SP register since
5835 it is always preserved. */
5836 return (epilogue_completed
5837 && cfun->machine->no_caller_saved_registers
5838 && !fixed_regs[regno]
5839 && !STACK_REGNO_P (regno)
5840 && !MMX_REGNO_P (regno));
5841 }
5842
5843 /* Return nonzero if register REGNO can be used as a scratch register
5844 in peephole2. */
5845
5846 static bool
ix86_hard_regno_scratch_ok(unsigned int regno)5847 ix86_hard_regno_scratch_ok (unsigned int regno)
5848 {
5849 /* If there are no caller-saved registers, we can't use any register
5850 as a scratch register after epilogue and use REGNO as scratch
5851 register only if it has been used before to avoid saving and
5852 restoring it. */
5853 return (!cfun->machine->no_caller_saved_registers
5854 || (!epilogue_completed
5855 && df_regs_ever_live_p (regno)));
5856 }
5857
5858 /* Return TRUE if we need to save REGNO. */
5859
5860 bool
ix86_save_reg(unsigned int regno,bool maybe_eh_return,bool ignore_outlined)5861 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
5862 {
5863 /* If there are no caller-saved registers, we preserve all registers,
5864 except for MMX and x87 registers which aren't supported when saving
5865 and restoring registers. Don't explicitly save SP register since
5866 it is always preserved. */
5867 if (cfun->machine->no_caller_saved_registers)
5868 {
5869 /* Don't preserve registers used for function return value. */
5870 rtx reg = crtl->return_rtx;
5871 if (reg)
5872 {
5873 unsigned int i = REGNO (reg);
5874 unsigned int nregs = REG_NREGS (reg);
5875 while (nregs-- > 0)
5876 if ((i + nregs) == regno)
5877 return false;
5878 }
5879
5880 return (df_regs_ever_live_p (regno)
5881 && !fixed_regs[regno]
5882 && !STACK_REGNO_P (regno)
5883 && !MMX_REGNO_P (regno)
5884 && (regno != HARD_FRAME_POINTER_REGNUM
5885 || !frame_pointer_needed));
5886 }
5887
5888 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
5889 && pic_offset_table_rtx)
5890 {
5891 if (ix86_use_pseudo_pic_reg ())
5892 {
5893 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
5894 _mcount in prologue. */
5895 if (!TARGET_64BIT && flag_pic && crtl->profile)
5896 return true;
5897 }
5898 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5899 || crtl->profile
5900 || crtl->calls_eh_return
5901 || crtl->uses_const_pool
5902 || cfun->has_nonlocal_label)
5903 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
5904 }
5905
5906 if (crtl->calls_eh_return && maybe_eh_return)
5907 {
5908 unsigned i;
5909 for (i = 0; ; i++)
5910 {
5911 unsigned test = EH_RETURN_DATA_REGNO (i);
5912 if (test == INVALID_REGNUM)
5913 break;
5914 if (test == regno)
5915 return true;
5916 }
5917 }
5918
5919 if (ignore_outlined && cfun->machine->call_ms2sysv)
5920 {
5921 unsigned count = cfun->machine->call_ms2sysv_extra_regs
5922 + xlogue_layout::MIN_REGS;
5923 if (xlogue_layout::is_stub_managed_reg (regno, count))
5924 return false;
5925 }
5926
5927 if (crtl->drap_reg
5928 && regno == REGNO (crtl->drap_reg)
5929 && !cfun->machine->no_drap_save_restore)
5930 return true;
5931
5932 return (df_regs_ever_live_p (regno)
5933 && !call_used_or_fixed_reg_p (regno)
5934 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5935 }
5936
5937 /* Return number of saved general prupose registers. */
5938
5939 static int
ix86_nsaved_regs(void)5940 ix86_nsaved_regs (void)
5941 {
5942 int nregs = 0;
5943 int regno;
5944
5945 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5946 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5947 nregs ++;
5948 return nregs;
5949 }
5950
5951 /* Return number of saved SSE registers. */
5952
5953 static int
ix86_nsaved_sseregs(void)5954 ix86_nsaved_sseregs (void)
5955 {
5956 int nregs = 0;
5957 int regno;
5958
5959 if (!TARGET_64BIT_MS_ABI)
5960 return 0;
5961 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5962 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5963 nregs ++;
5964 return nregs;
5965 }
5966
5967 /* Given FROM and TO register numbers, say whether this elimination is
5968 allowed. If stack alignment is needed, we can only replace argument
5969 pointer with hard frame pointer, or replace frame pointer with stack
5970 pointer. Otherwise, frame pointer elimination is automatically
5971 handled and all other eliminations are valid. */
5972
5973 static bool
ix86_can_eliminate(const int from,const int to)5974 ix86_can_eliminate (const int from, const int to)
5975 {
5976 if (stack_realign_fp)
5977 return ((from == ARG_POINTER_REGNUM
5978 && to == HARD_FRAME_POINTER_REGNUM)
5979 || (from == FRAME_POINTER_REGNUM
5980 && to == STACK_POINTER_REGNUM));
5981 else
5982 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
5983 }
5984
5985 /* Return the offset between two registers, one to be eliminated, and the other
5986 its replacement, at the start of a routine. */
5987
5988 HOST_WIDE_INT
ix86_initial_elimination_offset(int from,int to)5989 ix86_initial_elimination_offset (int from, int to)
5990 {
5991 struct ix86_frame &frame = cfun->machine->frame;
5992
5993 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5994 return frame.hard_frame_pointer_offset;
5995 else if (from == FRAME_POINTER_REGNUM
5996 && to == HARD_FRAME_POINTER_REGNUM)
5997 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5998 else
5999 {
6000 gcc_assert (to == STACK_POINTER_REGNUM);
6001
6002 if (from == ARG_POINTER_REGNUM)
6003 return frame.stack_pointer_offset;
6004
6005 gcc_assert (from == FRAME_POINTER_REGNUM);
6006 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6007 }
6008 }
6009
6010 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
warn_once_call_ms2sysv_xlogues(const char * feature)6011 void warn_once_call_ms2sysv_xlogues (const char *feature)
6012 {
6013 static bool warned_once = false;
6014 if (!warned_once)
6015 {
6016 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6017 feature);
6018 warned_once = true;
6019 }
6020 }
6021
6022 /* Return the probing interval for -fstack-clash-protection. */
6023
6024 static HOST_WIDE_INT
get_probe_interval(void)6025 get_probe_interval (void)
6026 {
6027 if (flag_stack_clash_protection)
6028 return (HOST_WIDE_INT_1U
6029 << param_stack_clash_protection_probe_interval);
6030 else
6031 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6032 }
6033
6034 /* When using -fsplit-stack, the allocation routines set a field in
6035 the TCB to the bottom of the stack plus this much space, measured
6036 in bytes. */
6037
6038 #define SPLIT_STACK_AVAILABLE 256
6039
6040 /* Fill structure ix86_frame about frame of currently computed function. */
6041
6042 static void
ix86_compute_frame_layout(void)6043 ix86_compute_frame_layout (void)
6044 {
6045 struct ix86_frame *frame = &cfun->machine->frame;
6046 struct machine_function *m = cfun->machine;
6047 unsigned HOST_WIDE_INT stack_alignment_needed;
6048 HOST_WIDE_INT offset;
6049 unsigned HOST_WIDE_INT preferred_alignment;
6050 HOST_WIDE_INT size = ix86_get_frame_size ();
6051 HOST_WIDE_INT to_allocate;
6052
6053 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6054 * ms_abi functions that call a sysv function. We now need to prune away
6055 * cases where it should be disabled. */
6056 if (TARGET_64BIT && m->call_ms2sysv)
6057 {
6058 gcc_assert (TARGET_64BIT_MS_ABI);
6059 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6060 gcc_assert (!TARGET_SEH);
6061 gcc_assert (TARGET_SSE);
6062 gcc_assert (!ix86_using_red_zone ());
6063
6064 if (crtl->calls_eh_return)
6065 {
6066 gcc_assert (!reload_completed);
6067 m->call_ms2sysv = false;
6068 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6069 }
6070
6071 else if (ix86_static_chain_on_stack)
6072 {
6073 gcc_assert (!reload_completed);
6074 m->call_ms2sysv = false;
6075 warn_once_call_ms2sysv_xlogues ("static call chains");
6076 }
6077
6078 /* Finally, compute which registers the stub will manage. */
6079 else
6080 {
6081 unsigned count = xlogue_layout::count_stub_managed_regs ();
6082 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6083 m->call_ms2sysv_pad_in = 0;
6084 }
6085 }
6086
6087 frame->nregs = ix86_nsaved_regs ();
6088 frame->nsseregs = ix86_nsaved_sseregs ();
6089
6090 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6091 except for function prologues, leaf functions and when the defult
6092 incoming stack boundary is overriden at command line or via
6093 force_align_arg_pointer attribute.
6094
6095 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6096 at call sites, including profile function calls.
6097 */
6098 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6099 && crtl->preferred_stack_boundary < 128)
6100 && (!crtl->is_leaf || cfun->calls_alloca != 0
6101 || ix86_current_function_calls_tls_descriptor
6102 || (TARGET_MACHO && crtl->profile)
6103 || ix86_incoming_stack_boundary < 128))
6104 {
6105 crtl->preferred_stack_boundary = 128;
6106 crtl->stack_alignment_needed = 128;
6107 }
6108
6109 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6110 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6111
6112 gcc_assert (!size || stack_alignment_needed);
6113 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6114 gcc_assert (preferred_alignment <= stack_alignment_needed);
6115
6116 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6117 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6118 if (TARGET_64BIT && m->call_ms2sysv)
6119 {
6120 gcc_assert (stack_alignment_needed >= 16);
6121 gcc_assert (!frame->nsseregs);
6122 }
6123
6124 /* For SEH we have to limit the amount of code movement into the prologue.
6125 At present we do this via a BLOCKAGE, at which point there's very little
6126 scheduling that can be done, which means that there's very little point
6127 in doing anything except PUSHs. */
6128 if (TARGET_SEH)
6129 m->use_fast_prologue_epilogue = false;
6130 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6131 {
6132 int count = frame->nregs;
6133 struct cgraph_node *node = cgraph_node::get (current_function_decl);
6134
6135 /* The fast prologue uses move instead of push to save registers. This
6136 is significantly longer, but also executes faster as modern hardware
6137 can execute the moves in parallel, but can't do that for push/pop.
6138
6139 Be careful about choosing what prologue to emit: When function takes
6140 many instructions to execute we may use slow version as well as in
6141 case function is known to be outside hot spot (this is known with
6142 feedback only). Weight the size of function by number of registers
6143 to save as it is cheap to use one or two push instructions but very
6144 slow to use many of them.
6145
6146 Calling this hook multiple times with the same frame requirements
6147 must produce the same layout, since the RA might otherwise be
6148 unable to reach a fixed point or might fail its final sanity checks.
6149 This means that once we've assumed that a function does or doesn't
6150 have a particular size, we have to stick to that assumption
6151 regardless of how the function has changed since. */
6152 if (count)
6153 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6154 if (node->frequency < NODE_FREQUENCY_NORMAL
6155 || (flag_branch_probabilities
6156 && node->frequency < NODE_FREQUENCY_HOT))
6157 m->use_fast_prologue_epilogue = false;
6158 else
6159 {
6160 if (count != frame->expensive_count)
6161 {
6162 frame->expensive_count = count;
6163 frame->expensive_p = expensive_function_p (count);
6164 }
6165 m->use_fast_prologue_epilogue = !frame->expensive_p;
6166 }
6167 }
6168
6169 frame->save_regs_using_mov
6170 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
6171 /* If static stack checking is enabled and done with probes,
6172 the registers need to be saved before allocating the frame. */
6173 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
6174
6175 /* Skip return address and error code in exception handler. */
6176 offset = INCOMING_FRAME_SP_OFFSET;
6177
6178 /* Skip pushed static chain. */
6179 if (ix86_static_chain_on_stack)
6180 offset += UNITS_PER_WORD;
6181
6182 /* Skip saved base pointer. */
6183 if (frame_pointer_needed)
6184 offset += UNITS_PER_WORD;
6185 frame->hfp_save_offset = offset;
6186
6187 /* The traditional frame pointer location is at the top of the frame. */
6188 frame->hard_frame_pointer_offset = offset;
6189
6190 /* Register save area */
6191 offset += frame->nregs * UNITS_PER_WORD;
6192 frame->reg_save_offset = offset;
6193
6194 /* Calculate the size of the va-arg area (not including padding, if any). */
6195 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6196
6197 /* Also adjust stack_realign_offset for the largest alignment of
6198 stack slot actually used. */
6199 if (stack_realign_fp
6200 || (cfun->machine->max_used_stack_alignment != 0
6201 && (offset % cfun->machine->max_used_stack_alignment) != 0))
6202 {
6203 /* We may need a 16-byte aligned stack for the remainder of the
6204 register save area, but the stack frame for the local function
6205 may require a greater alignment if using AVX/2/512. In order
6206 to avoid wasting space, we first calculate the space needed for
6207 the rest of the register saves, add that to the stack pointer,
6208 and then realign the stack to the boundary of the start of the
6209 frame for the local function. */
6210 HOST_WIDE_INT space_needed = 0;
6211 HOST_WIDE_INT sse_reg_space_needed = 0;
6212
6213 if (TARGET_64BIT)
6214 {
6215 if (m->call_ms2sysv)
6216 {
6217 m->call_ms2sysv_pad_in = 0;
6218 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6219 }
6220
6221 else if (frame->nsseregs)
6222 /* The only ABI that has saved SSE registers (Win64) also has a
6223 16-byte aligned default stack. However, many programs violate
6224 the ABI, and Wine64 forces stack realignment to compensate. */
6225 space_needed = frame->nsseregs * 16;
6226
6227 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6228
6229 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6230 rounding to be pedantic. */
6231 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6232 }
6233 else
6234 space_needed = frame->va_arg_size;
6235
6236 /* Record the allocation size required prior to the realignment AND. */
6237 frame->stack_realign_allocate = space_needed;
6238
6239 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6240 before this point are not directly comparable with values below
6241 this point. Use sp_valid_at to determine if the stack pointer is
6242 valid for a given offset, fp_valid_at for the frame pointer, or
6243 choose_baseaddr to have a base register chosen for you.
6244
6245 Note that the result of (frame->stack_realign_offset
6246 & (stack_alignment_needed - 1)) may not equal zero. */
6247 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6248 frame->stack_realign_offset = offset - space_needed;
6249 frame->sse_reg_save_offset = frame->stack_realign_offset
6250 + sse_reg_space_needed;
6251 }
6252 else
6253 {
6254 frame->stack_realign_offset = offset;
6255
6256 if (TARGET_64BIT && m->call_ms2sysv)
6257 {
6258 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6259 offset += xlogue_layout::get_instance ().get_stack_space_used ();
6260 }
6261
6262 /* Align and set SSE register save area. */
6263 else if (frame->nsseregs)
6264 {
6265 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6266 required and the DRAP re-alignment boundary is at least 16 bytes,
6267 then we want the SSE register save area properly aligned. */
6268 if (ix86_incoming_stack_boundary >= 128
6269 || (stack_realign_drap && stack_alignment_needed >= 16))
6270 offset = ROUND_UP (offset, 16);
6271 offset += frame->nsseregs * 16;
6272 }
6273 frame->sse_reg_save_offset = offset;
6274 offset += frame->va_arg_size;
6275 }
6276
6277 /* Align start of frame for local function. When a function call
6278 is removed, it may become a leaf function. But if argument may
6279 be passed on stack, we need to align the stack when there is no
6280 tail call. */
6281 if (m->call_ms2sysv
6282 || frame->va_arg_size != 0
6283 || size != 0
6284 || !crtl->is_leaf
6285 || (!crtl->tail_call_emit
6286 && cfun->machine->outgoing_args_on_stack)
6287 || cfun->calls_alloca
6288 || ix86_current_function_calls_tls_descriptor)
6289 offset = ROUND_UP (offset, stack_alignment_needed);
6290
6291 /* Frame pointer points here. */
6292 frame->frame_pointer_offset = offset;
6293
6294 offset += size;
6295
6296 /* Add outgoing arguments area. Can be skipped if we eliminated
6297 all the function calls as dead code.
6298 Skipping is however impossible when function calls alloca. Alloca
6299 expander assumes that last crtl->outgoing_args_size
6300 of stack frame are unused. */
6301 if (ACCUMULATE_OUTGOING_ARGS
6302 && (!crtl->is_leaf || cfun->calls_alloca
6303 || ix86_current_function_calls_tls_descriptor))
6304 {
6305 offset += crtl->outgoing_args_size;
6306 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6307 }
6308 else
6309 frame->outgoing_arguments_size = 0;
6310
6311 /* Align stack boundary. Only needed if we're calling another function
6312 or using alloca. */
6313 if (!crtl->is_leaf || cfun->calls_alloca
6314 || ix86_current_function_calls_tls_descriptor)
6315 offset = ROUND_UP (offset, preferred_alignment);
6316
6317 /* We've reached end of stack frame. */
6318 frame->stack_pointer_offset = offset;
6319
6320 /* Size prologue needs to allocate. */
6321 to_allocate = offset - frame->sse_reg_save_offset;
6322
6323 if ((!to_allocate && frame->nregs <= 1)
6324 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6325 /* If stack clash probing needs a loop, then it needs a
6326 scratch register. But the returned register is only guaranteed
6327 to be safe to use after register saves are complete. So if
6328 stack clash protections are enabled and the allocated frame is
6329 larger than the probe interval, then use pushes to save
6330 callee saved registers. */
6331 || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
6332 frame->save_regs_using_mov = false;
6333
6334 if (ix86_using_red_zone ()
6335 && crtl->sp_is_unchanging
6336 && crtl->is_leaf
6337 && !ix86_pc_thunk_call_expanded
6338 && !ix86_current_function_calls_tls_descriptor)
6339 {
6340 frame->red_zone_size = to_allocate;
6341 if (frame->save_regs_using_mov)
6342 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6343 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6344 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6345 }
6346 else
6347 frame->red_zone_size = 0;
6348 frame->stack_pointer_offset -= frame->red_zone_size;
6349
6350 /* The SEH frame pointer location is near the bottom of the frame.
6351 This is enforced by the fact that the difference between the
6352 stack pointer and the frame pointer is limited to 240 bytes in
6353 the unwind data structure. */
6354 if (TARGET_SEH)
6355 {
6356 /* Force the frame pointer to point at or below the lowest register save
6357 area, see the SEH code in config/i386/winnt.c for the rationale. */
6358 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
6359
6360 /* If we can leave the frame pointer where it is, do so; however return
6361 the establisher frame for __builtin_frame_address (0) or else if the
6362 frame overflows the SEH maximum frame size.
6363
6364 Note that the value returned by __builtin_frame_address (0) is quite
6365 constrained, because setjmp is piggybacked on the SEH machinery with
6366 recent versions of MinGW:
6367
6368 # elif defined(__SEH__)
6369 # if defined(__aarch64__) || defined(_ARM64_)
6370 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
6371 # elif (__MINGW_GCC_VERSION < 40702)
6372 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
6373 # else
6374 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
6375 # endif
6376
6377 and the second argument passed to _setjmp, if not null, is forwarded
6378 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
6379 built an ExceptionRecord on the fly describing the setjmp buffer). */
6380 const HOST_WIDE_INT diff
6381 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6382 if (diff <= 255 && !crtl->accesses_prior_frames)
6383 {
6384 /* The resulting diff will be a multiple of 16 lower than 255,
6385 i.e. at most 240 as required by the unwind data structure. */
6386 frame->hard_frame_pointer_offset += (diff & 15);
6387 }
6388 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
6389 {
6390 /* Ideally we'd determine what portion of the local stack frame
6391 (within the constraint of the lowest 240) is most heavily used.
6392 But without that complication, simply bias the frame pointer
6393 by 128 bytes so as to maximize the amount of the local stack
6394 frame that is addressable with 8-bit offsets. */
6395 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6396 }
6397 else
6398 frame->hard_frame_pointer_offset = frame->hfp_save_offset;
6399 }
6400 }
6401
6402 /* This is semi-inlined memory_address_length, but simplified
6403 since we know that we're always dealing with reg+offset, and
6404 to avoid having to create and discard all that rtl. */
6405
6406 static inline int
choose_baseaddr_len(unsigned int regno,HOST_WIDE_INT offset)6407 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6408 {
6409 int len = 4;
6410
6411 if (offset == 0)
6412 {
6413 /* EBP and R13 cannot be encoded without an offset. */
6414 len = (regno == BP_REG || regno == R13_REG);
6415 }
6416 else if (IN_RANGE (offset, -128, 127))
6417 len = 1;
6418
6419 /* ESP and R12 must be encoded with a SIB byte. */
6420 if (regno == SP_REG || regno == R12_REG)
6421 len++;
6422
6423 return len;
6424 }
6425
6426 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6427 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6428
6429 static bool
sp_valid_at(HOST_WIDE_INT cfa_offset)6430 sp_valid_at (HOST_WIDE_INT cfa_offset)
6431 {
6432 const struct machine_frame_state &fs = cfun->machine->fs;
6433 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
6434 {
6435 /* Validate that the cfa_offset isn't in a "no-man's land". */
6436 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
6437 return false;
6438 }
6439 return fs.sp_valid;
6440 }
6441
6442 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6443 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6444
6445 static inline bool
fp_valid_at(HOST_WIDE_INT cfa_offset)6446 fp_valid_at (HOST_WIDE_INT cfa_offset)
6447 {
6448 const struct machine_frame_state &fs = cfun->machine->fs;
6449 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
6450 {
6451 /* Validate that the cfa_offset isn't in a "no-man's land". */
6452 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
6453 return false;
6454 }
6455 return fs.fp_valid;
6456 }
6457
6458 /* Choose a base register based upon alignment requested, speed and/or
6459 size. */
6460
6461 static void
choose_basereg(HOST_WIDE_INT cfa_offset,rtx & base_reg,HOST_WIDE_INT & base_offset,unsigned int align_reqested,unsigned int * align)6462 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
6463 HOST_WIDE_INT &base_offset,
6464 unsigned int align_reqested, unsigned int *align)
6465 {
6466 const struct machine_function *m = cfun->machine;
6467 unsigned int hfp_align;
6468 unsigned int drap_align;
6469 unsigned int sp_align;
6470 bool hfp_ok = fp_valid_at (cfa_offset);
6471 bool drap_ok = m->fs.drap_valid;
6472 bool sp_ok = sp_valid_at (cfa_offset);
6473
6474 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
6475
6476 /* Filter out any registers that don't meet the requested alignment
6477 criteria. */
6478 if (align_reqested)
6479 {
6480 if (m->fs.realigned)
6481 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
6482 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6483 notes (which we would need to use a realigned stack pointer),
6484 so disable on SEH targets. */
6485 else if (m->fs.sp_realigned)
6486 sp_align = crtl->stack_alignment_needed;
6487
6488 hfp_ok = hfp_ok && hfp_align >= align_reqested;
6489 drap_ok = drap_ok && drap_align >= align_reqested;
6490 sp_ok = sp_ok && sp_align >= align_reqested;
6491 }
6492
6493 if (m->use_fast_prologue_epilogue)
6494 {
6495 /* Choose the base register most likely to allow the most scheduling
6496 opportunities. Generally FP is valid throughout the function,
6497 while DRAP must be reloaded within the epilogue. But choose either
6498 over the SP due to increased encoding size. */
6499
6500 if (hfp_ok)
6501 {
6502 base_reg = hard_frame_pointer_rtx;
6503 base_offset = m->fs.fp_offset - cfa_offset;
6504 }
6505 else if (drap_ok)
6506 {
6507 base_reg = crtl->drap_reg;
6508 base_offset = 0 - cfa_offset;
6509 }
6510 else if (sp_ok)
6511 {
6512 base_reg = stack_pointer_rtx;
6513 base_offset = m->fs.sp_offset - cfa_offset;
6514 }
6515 }
6516 else
6517 {
6518 HOST_WIDE_INT toffset;
6519 int len = 16, tlen;
6520
6521 /* Choose the base register with the smallest address encoding.
6522 With a tie, choose FP > DRAP > SP. */
6523 if (sp_ok)
6524 {
6525 base_reg = stack_pointer_rtx;
6526 base_offset = m->fs.sp_offset - cfa_offset;
6527 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
6528 }
6529 if (drap_ok)
6530 {
6531 toffset = 0 - cfa_offset;
6532 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
6533 if (tlen <= len)
6534 {
6535 base_reg = crtl->drap_reg;
6536 base_offset = toffset;
6537 len = tlen;
6538 }
6539 }
6540 if (hfp_ok)
6541 {
6542 toffset = m->fs.fp_offset - cfa_offset;
6543 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
6544 if (tlen <= len)
6545 {
6546 base_reg = hard_frame_pointer_rtx;
6547 base_offset = toffset;
6548 }
6549 }
6550 }
6551
6552 /* Set the align return value. */
6553 if (align)
6554 {
6555 if (base_reg == stack_pointer_rtx)
6556 *align = sp_align;
6557 else if (base_reg == crtl->drap_reg)
6558 *align = drap_align;
6559 else if (base_reg == hard_frame_pointer_rtx)
6560 *align = hfp_align;
6561 }
6562 }
6563
6564 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6565 the alignment of address. If ALIGN is non-null, it should point to
6566 an alignment value (in bits) that is preferred or zero and will
6567 recieve the alignment of the base register that was selected,
6568 irrespective of rather or not CFA_OFFSET is a multiple of that
6569 alignment value. If it is possible for the base register offset to be
6570 non-immediate then SCRATCH_REGNO should specify a scratch register to
6571 use.
6572
6573 The valid base registers are taken from CFUN->MACHINE->FS. */
6574
6575 static rtx
6576 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
6577 unsigned int scratch_regno = INVALID_REGNUM)
6578 {
6579 rtx base_reg = NULL;
6580 HOST_WIDE_INT base_offset = 0;
6581
6582 /* If a specific alignment is requested, try to get a base register
6583 with that alignment first. */
6584 if (align && *align)
6585 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
6586
6587 if (!base_reg)
6588 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
6589
6590 gcc_assert (base_reg != NULL);
6591
6592 rtx base_offset_rtx = GEN_INT (base_offset);
6593
6594 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
6595 {
6596 gcc_assert (scratch_regno != INVALID_REGNUM);
6597
6598 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
6599 emit_move_insn (scratch_reg, base_offset_rtx);
6600
6601 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
6602 }
6603
6604 return plus_constant (Pmode, base_reg, base_offset);
6605 }
6606
6607 /* Emit code to save registers in the prologue. */
6608
6609 static void
ix86_emit_save_regs(void)6610 ix86_emit_save_regs (void)
6611 {
6612 unsigned int regno;
6613 rtx_insn *insn;
6614
6615 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
6616 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6617 {
6618 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
6619 RTX_FRAME_RELATED_P (insn) = 1;
6620 }
6621 }
6622
6623 /* Emit a single register save at CFA - CFA_OFFSET. */
6624
6625 static void
ix86_emit_save_reg_using_mov(machine_mode mode,unsigned int regno,HOST_WIDE_INT cfa_offset)6626 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
6627 HOST_WIDE_INT cfa_offset)
6628 {
6629 struct machine_function *m = cfun->machine;
6630 rtx reg = gen_rtx_REG (mode, regno);
6631 rtx mem, addr, base, insn;
6632 unsigned int align = GET_MODE_ALIGNMENT (mode);
6633
6634 addr = choose_baseaddr (cfa_offset, &align);
6635 mem = gen_frame_mem (mode, addr);
6636
6637 /* The location aligment depends upon the base register. */
6638 align = MIN (GET_MODE_ALIGNMENT (mode), align);
6639 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
6640 set_mem_align (mem, align);
6641
6642 insn = emit_insn (gen_rtx_SET (mem, reg));
6643 RTX_FRAME_RELATED_P (insn) = 1;
6644
6645 base = addr;
6646 if (GET_CODE (base) == PLUS)
6647 base = XEXP (base, 0);
6648 gcc_checking_assert (REG_P (base));
6649
6650 /* When saving registers into a re-aligned local stack frame, avoid
6651 any tricky guessing by dwarf2out. */
6652 if (m->fs.realigned)
6653 {
6654 gcc_checking_assert (stack_realign_drap);
6655
6656 if (regno == REGNO (crtl->drap_reg))
6657 {
6658 /* A bit of a hack. We force the DRAP register to be saved in
6659 the re-aligned stack frame, which provides us with a copy
6660 of the CFA that will last past the prologue. Install it. */
6661 gcc_checking_assert (cfun->machine->fs.fp_valid);
6662 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6663 cfun->machine->fs.fp_offset - cfa_offset);
6664 mem = gen_rtx_MEM (mode, addr);
6665 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
6666 }
6667 else
6668 {
6669 /* The frame pointer is a stable reference within the
6670 aligned frame. Use it. */
6671 gcc_checking_assert (cfun->machine->fs.fp_valid);
6672 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6673 cfun->machine->fs.fp_offset - cfa_offset);
6674 mem = gen_rtx_MEM (mode, addr);
6675 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6676 }
6677 }
6678
6679 else if (base == stack_pointer_rtx && m->fs.sp_realigned
6680 && cfa_offset >= m->fs.sp_realigned_offset)
6681 {
6682 gcc_checking_assert (stack_realign_fp);
6683 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6684 }
6685
6686 /* The memory may not be relative to the current CFA register,
6687 which means that we may need to generate a new pattern for
6688 use by the unwind info. */
6689 else if (base != m->fs.cfa_reg)
6690 {
6691 addr = plus_constant (Pmode, m->fs.cfa_reg,
6692 m->fs.cfa_offset - cfa_offset);
6693 mem = gen_rtx_MEM (mode, addr);
6694 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6695 }
6696 }
6697
6698 /* Emit code to save registers using MOV insns.
6699 First register is stored at CFA - CFA_OFFSET. */
6700 static void
ix86_emit_save_regs_using_mov(HOST_WIDE_INT cfa_offset)6701 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
6702 {
6703 unsigned int regno;
6704
6705 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6706 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6707 {
6708 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
6709 cfa_offset -= UNITS_PER_WORD;
6710 }
6711 }
6712
6713 /* Emit code to save SSE registers using MOV insns.
6714 First register is stored at CFA - CFA_OFFSET. */
6715 static void
ix86_emit_save_sse_regs_using_mov(HOST_WIDE_INT cfa_offset)6716 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
6717 {
6718 unsigned int regno;
6719
6720 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6721 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6722 {
6723 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
6724 cfa_offset -= GET_MODE_SIZE (V4SFmode);
6725 }
6726 }
6727
6728 static GTY(()) rtx queued_cfa_restores;
6729
6730 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
6731 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
6732 Don't add the note if the previously saved value will be left untouched
6733 within stack red-zone till return, as unwinders can find the same value
6734 in the register and on the stack. */
6735
6736 static void
ix86_add_cfa_restore_note(rtx_insn * insn,rtx reg,HOST_WIDE_INT cfa_offset)6737 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
6738 {
6739 if (!crtl->shrink_wrapped
6740 && cfa_offset <= cfun->machine->fs.red_zone_offset)
6741 return;
6742
6743 if (insn)
6744 {
6745 add_reg_note (insn, REG_CFA_RESTORE, reg);
6746 RTX_FRAME_RELATED_P (insn) = 1;
6747 }
6748 else
6749 queued_cfa_restores
6750 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
6751 }
6752
6753 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
6754
6755 static void
ix86_add_queued_cfa_restore_notes(rtx insn)6756 ix86_add_queued_cfa_restore_notes (rtx insn)
6757 {
6758 rtx last;
6759 if (!queued_cfa_restores)
6760 return;
6761 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
6762 ;
6763 XEXP (last, 1) = REG_NOTES (insn);
6764 REG_NOTES (insn) = queued_cfa_restores;
6765 queued_cfa_restores = NULL_RTX;
6766 RTX_FRAME_RELATED_P (insn) = 1;
6767 }
6768
6769 /* Expand prologue or epilogue stack adjustment.
6770 The pattern exist to put a dependency on all ebp-based memory accesses.
6771 STYLE should be negative if instructions should be marked as frame related,
6772 zero if %r11 register is live and cannot be freely used and positive
6773 otherwise. */
6774
6775 static rtx
pro_epilogue_adjust_stack(rtx dest,rtx src,rtx offset,int style,bool set_cfa)6776 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
6777 int style, bool set_cfa)
6778 {
6779 struct machine_function *m = cfun->machine;
6780 rtx addend = offset;
6781 rtx insn;
6782 bool add_frame_related_expr = false;
6783
6784 if (!x86_64_immediate_operand (offset, Pmode))
6785 {
6786 /* r11 is used by indirect sibcall return as well, set before the
6787 epilogue and used after the epilogue. */
6788 if (style)
6789 addend = gen_rtx_REG (Pmode, R11_REG);
6790 else
6791 {
6792 gcc_assert (src != hard_frame_pointer_rtx
6793 && dest != hard_frame_pointer_rtx);
6794 addend = hard_frame_pointer_rtx;
6795 }
6796 emit_insn (gen_rtx_SET (addend, offset));
6797 if (style < 0)
6798 add_frame_related_expr = true;
6799 }
6800
6801 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
6802 (Pmode, dest, src, addend));
6803 if (style >= 0)
6804 ix86_add_queued_cfa_restore_notes (insn);
6805
6806 if (set_cfa)
6807 {
6808 rtx r;
6809
6810 gcc_assert (m->fs.cfa_reg == src);
6811 m->fs.cfa_offset += INTVAL (offset);
6812 m->fs.cfa_reg = dest;
6813
6814 r = gen_rtx_PLUS (Pmode, src, offset);
6815 r = gen_rtx_SET (dest, r);
6816 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
6817 RTX_FRAME_RELATED_P (insn) = 1;
6818 }
6819 else if (style < 0)
6820 {
6821 RTX_FRAME_RELATED_P (insn) = 1;
6822 if (add_frame_related_expr)
6823 {
6824 rtx r = gen_rtx_PLUS (Pmode, src, offset);
6825 r = gen_rtx_SET (dest, r);
6826 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
6827 }
6828 }
6829
6830 if (dest == stack_pointer_rtx)
6831 {
6832 HOST_WIDE_INT ooffset = m->fs.sp_offset;
6833 bool valid = m->fs.sp_valid;
6834 bool realigned = m->fs.sp_realigned;
6835
6836 if (src == hard_frame_pointer_rtx)
6837 {
6838 valid = m->fs.fp_valid;
6839 realigned = false;
6840 ooffset = m->fs.fp_offset;
6841 }
6842 else if (src == crtl->drap_reg)
6843 {
6844 valid = m->fs.drap_valid;
6845 realigned = false;
6846 ooffset = 0;
6847 }
6848 else
6849 {
6850 /* Else there are two possibilities: SP itself, which we set
6851 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
6852 taken care of this by hand along the eh_return path. */
6853 gcc_checking_assert (src == stack_pointer_rtx
6854 || offset == const0_rtx);
6855 }
6856
6857 m->fs.sp_offset = ooffset - INTVAL (offset);
6858 m->fs.sp_valid = valid;
6859 m->fs.sp_realigned = realigned;
6860 }
6861 return insn;
6862 }
6863
6864 /* Find an available register to be used as dynamic realign argument
6865 pointer regsiter. Such a register will be written in prologue and
6866 used in begin of body, so it must not be
6867 1. parameter passing register.
6868 2. GOT pointer.
6869 We reuse static-chain register if it is available. Otherwise, we
6870 use DI for i386 and R13 for x86-64. We chose R13 since it has
6871 shorter encoding.
6872
6873 Return: the regno of chosen register. */
6874
6875 static unsigned int
find_drap_reg(void)6876 find_drap_reg (void)
6877 {
6878 tree decl = cfun->decl;
6879
6880 /* Always use callee-saved register if there are no caller-saved
6881 registers. */
6882 if (TARGET_64BIT)
6883 {
6884 /* Use R13 for nested function or function need static chain.
6885 Since function with tail call may use any caller-saved
6886 registers in epilogue, DRAP must not use caller-saved
6887 register in such case. */
6888 if (DECL_STATIC_CHAIN (decl)
6889 || cfun->machine->no_caller_saved_registers
6890 || crtl->tail_call_emit)
6891 return R13_REG;
6892
6893 return R10_REG;
6894 }
6895 else
6896 {
6897 /* Use DI for nested function or function need static chain.
6898 Since function with tail call may use any caller-saved
6899 registers in epilogue, DRAP must not use caller-saved
6900 register in such case. */
6901 if (DECL_STATIC_CHAIN (decl)
6902 || cfun->machine->no_caller_saved_registers
6903 || crtl->tail_call_emit)
6904 return DI_REG;
6905
6906 /* Reuse static chain register if it isn't used for parameter
6907 passing. */
6908 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
6909 {
6910 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
6911 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
6912 return CX_REG;
6913 }
6914 return DI_REG;
6915 }
6916 }
6917
6918 /* Return minimum incoming stack alignment. */
6919
6920 static unsigned int
ix86_minimum_incoming_stack_boundary(bool sibcall)6921 ix86_minimum_incoming_stack_boundary (bool sibcall)
6922 {
6923 unsigned int incoming_stack_boundary;
6924
6925 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
6926 if (cfun->machine->func_type != TYPE_NORMAL)
6927 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
6928 /* Prefer the one specified at command line. */
6929 else if (ix86_user_incoming_stack_boundary)
6930 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
6931 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
6932 if -mstackrealign is used, it isn't used for sibcall check and
6933 estimated stack alignment is 128bit. */
6934 else if (!sibcall
6935 && ix86_force_align_arg_pointer
6936 && crtl->stack_alignment_estimated == 128)
6937 incoming_stack_boundary = MIN_STACK_BOUNDARY;
6938 else
6939 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
6940
6941 /* Incoming stack alignment can be changed on individual functions
6942 via force_align_arg_pointer attribute. We use the smallest
6943 incoming stack boundary. */
6944 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
6945 && lookup_attribute ("force_align_arg_pointer",
6946 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6947 incoming_stack_boundary = MIN_STACK_BOUNDARY;
6948
6949 /* The incoming stack frame has to be aligned at least at
6950 parm_stack_boundary. */
6951 if (incoming_stack_boundary < crtl->parm_stack_boundary)
6952 incoming_stack_boundary = crtl->parm_stack_boundary;
6953
6954 /* Stack at entrance of main is aligned by runtime. We use the
6955 smallest incoming stack boundary. */
6956 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
6957 && DECL_NAME (current_function_decl)
6958 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6959 && DECL_FILE_SCOPE_P (current_function_decl))
6960 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
6961
6962 return incoming_stack_boundary;
6963 }
6964
6965 /* Update incoming stack boundary and estimated stack alignment. */
6966
6967 static void
ix86_update_stack_boundary(void)6968 ix86_update_stack_boundary (void)
6969 {
6970 ix86_incoming_stack_boundary
6971 = ix86_minimum_incoming_stack_boundary (false);
6972
6973 /* x86_64 vararg needs 16byte stack alignment for register save area. */
6974 if (TARGET_64BIT
6975 && cfun->stdarg
6976 && crtl->stack_alignment_estimated < 128)
6977 crtl->stack_alignment_estimated = 128;
6978
6979 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
6980 if (ix86_tls_descriptor_calls_expanded_in_cfun
6981 && crtl->preferred_stack_boundary < 128)
6982 crtl->preferred_stack_boundary = 128;
6983 }
6984
6985 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
6986 needed or an rtx for DRAP otherwise. */
6987
6988 static rtx
ix86_get_drap_rtx(void)6989 ix86_get_drap_rtx (void)
6990 {
6991 /* We must use DRAP if there are outgoing arguments on stack or
6992 the stack pointer register is clobbered by asm statment and
6993 ACCUMULATE_OUTGOING_ARGS is false. */
6994 if (ix86_force_drap
6995 || ((cfun->machine->outgoing_args_on_stack
6996 || crtl->sp_is_clobbered_by_asm)
6997 && !ACCUMULATE_OUTGOING_ARGS))
6998 crtl->need_drap = true;
6999
7000 if (stack_realign_drap)
7001 {
7002 /* Assign DRAP to vDRAP and returns vDRAP */
7003 unsigned int regno = find_drap_reg ();
7004 rtx drap_vreg;
7005 rtx arg_ptr;
7006 rtx_insn *seq, *insn;
7007
7008 arg_ptr = gen_rtx_REG (Pmode, regno);
7009 crtl->drap_reg = arg_ptr;
7010
7011 start_sequence ();
7012 drap_vreg = copy_to_reg (arg_ptr);
7013 seq = get_insns ();
7014 end_sequence ();
7015
7016 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7017 if (!optimize)
7018 {
7019 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7020 RTX_FRAME_RELATED_P (insn) = 1;
7021 }
7022 return drap_vreg;
7023 }
7024 else
7025 return NULL;
7026 }
7027
7028 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7029
7030 static rtx
ix86_internal_arg_pointer(void)7031 ix86_internal_arg_pointer (void)
7032 {
7033 return virtual_incoming_args_rtx;
7034 }
7035
7036 struct scratch_reg {
7037 rtx reg;
7038 bool saved;
7039 };
7040
7041 /* Return a short-lived scratch register for use on function entry.
7042 In 32-bit mode, it is valid only after the registers are saved
7043 in the prologue. This register must be released by means of
7044 release_scratch_register_on_entry once it is dead. */
7045
7046 static void
get_scratch_register_on_entry(struct scratch_reg * sr)7047 get_scratch_register_on_entry (struct scratch_reg *sr)
7048 {
7049 int regno;
7050
7051 sr->saved = false;
7052
7053 if (TARGET_64BIT)
7054 {
7055 /* We always use R11 in 64-bit mode. */
7056 regno = R11_REG;
7057 }
7058 else
7059 {
7060 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7061 bool fastcall_p
7062 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7063 bool thiscall_p
7064 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7065 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7066 int regparm = ix86_function_regparm (fntype, decl);
7067 int drap_regno
7068 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7069
7070 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7071 for the static chain register. */
7072 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7073 && drap_regno != AX_REG)
7074 regno = AX_REG;
7075 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7076 for the static chain register. */
7077 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7078 regno = AX_REG;
7079 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7080 regno = DX_REG;
7081 /* ecx is the static chain register. */
7082 else if (regparm < 3 && !fastcall_p && !thiscall_p
7083 && !static_chain_p
7084 && drap_regno != CX_REG)
7085 regno = CX_REG;
7086 else if (ix86_save_reg (BX_REG, true, false))
7087 regno = BX_REG;
7088 /* esi is the static chain register. */
7089 else if (!(regparm == 3 && static_chain_p)
7090 && ix86_save_reg (SI_REG, true, false))
7091 regno = SI_REG;
7092 else if (ix86_save_reg (DI_REG, true, false))
7093 regno = DI_REG;
7094 else
7095 {
7096 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7097 sr->saved = true;
7098 }
7099 }
7100
7101 sr->reg = gen_rtx_REG (Pmode, regno);
7102 if (sr->saved)
7103 {
7104 rtx_insn *insn = emit_insn (gen_push (sr->reg));
7105 RTX_FRAME_RELATED_P (insn) = 1;
7106 }
7107 }
7108
7109 /* Release a scratch register obtained from the preceding function.
7110
7111 If RELEASE_VIA_POP is true, we just pop the register off the stack
7112 to release it. This is what non-Linux systems use with -fstack-check.
7113
7114 Otherwise we use OFFSET to locate the saved register and the
7115 allocated stack space becomes part of the local frame and is
7116 deallocated by the epilogue. */
7117
7118 static void
release_scratch_register_on_entry(struct scratch_reg * sr,HOST_WIDE_INT offset,bool release_via_pop)7119 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
7120 bool release_via_pop)
7121 {
7122 if (sr->saved)
7123 {
7124 if (release_via_pop)
7125 {
7126 struct machine_function *m = cfun->machine;
7127 rtx x, insn = emit_insn (gen_pop (sr->reg));
7128
7129 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7130 RTX_FRAME_RELATED_P (insn) = 1;
7131 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
7132 x = gen_rtx_SET (stack_pointer_rtx, x);
7133 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
7134 m->fs.sp_offset -= UNITS_PER_WORD;
7135 }
7136 else
7137 {
7138 rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
7139 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
7140 emit_insn (x);
7141 }
7142 }
7143 }
7144
7145 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7146
7147 This differs from the next routine in that it tries hard to prevent
7148 attacks that jump the stack guard. Thus it is never allowed to allocate
7149 more than PROBE_INTERVAL bytes of stack space without a suitable
7150 probe.
7151
7152 INT_REGISTERS_SAVED is true if integer registers have already been
7153 pushed on the stack. */
7154
7155 static void
ix86_adjust_stack_and_probe_stack_clash(HOST_WIDE_INT size,const bool int_registers_saved)7156 ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
7157 const bool int_registers_saved)
7158 {
7159 struct machine_function *m = cfun->machine;
7160
7161 /* If this function does not statically allocate stack space, then
7162 no probes are needed. */
7163 if (!size)
7164 {
7165 /* However, the allocation of space via pushes for register
7166 saves could be viewed as allocating space, but without the
7167 need to probe. */
7168 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
7169 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7170 else
7171 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7172 return;
7173 }
7174
7175 /* If we are a noreturn function, then we have to consider the
7176 possibility that we're called via a jump rather than a call.
7177
7178 Thus we don't have the implicit probe generated by saving the
7179 return address into the stack at the call. Thus, the stack
7180 pointer could be anywhere in the guard page. The safe thing
7181 to do is emit a probe now.
7182
7183 The probe can be avoided if we have already emitted any callee
7184 register saves into the stack or have a frame pointer (which will
7185 have been saved as well). Those saves will function as implicit
7186 probes.
7187
7188 ?!? This should be revamped to work like aarch64 and s390 where
7189 we track the offset from the most recent probe. Normally that
7190 offset would be zero. For a noreturn function we would reset
7191 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7192 we just probe when we cross PROBE_INTERVAL. */
7193 if (TREE_THIS_VOLATILE (cfun->decl)
7194 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
7195 {
7196 /* We can safely use any register here since we're just going to push
7197 its value and immediately pop it back. But we do try and avoid
7198 argument passing registers so as not to introduce dependencies in
7199 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7200 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
7201 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
7202 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
7203 m->fs.sp_offset -= UNITS_PER_WORD;
7204 if (m->fs.cfa_reg == stack_pointer_rtx)
7205 {
7206 m->fs.cfa_offset -= UNITS_PER_WORD;
7207 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7208 x = gen_rtx_SET (stack_pointer_rtx, x);
7209 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
7210 RTX_FRAME_RELATED_P (insn_push) = 1;
7211 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7212 x = gen_rtx_SET (stack_pointer_rtx, x);
7213 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
7214 RTX_FRAME_RELATED_P (insn_pop) = 1;
7215 }
7216 emit_insn (gen_blockage ());
7217 }
7218
7219 /* If we allocate less than the size of the guard statically,
7220 then no probing is necessary, but we do need to allocate
7221 the stack. */
7222 if (size < (1 << param_stack_clash_protection_guard_size))
7223 {
7224 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7225 GEN_INT (-size), -1,
7226 m->fs.cfa_reg == stack_pointer_rtx);
7227 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7228 return;
7229 }
7230
7231 /* We're allocating a large enough stack frame that we need to
7232 emit probes. Either emit them inline or in a loop depending
7233 on the size. */
7234 HOST_WIDE_INT probe_interval = get_probe_interval ();
7235 if (size <= 4 * probe_interval)
7236 {
7237 HOST_WIDE_INT i;
7238 for (i = probe_interval; i <= size; i += probe_interval)
7239 {
7240 /* Allocate PROBE_INTERVAL bytes. */
7241 rtx insn
7242 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7243 GEN_INT (-probe_interval), -1,
7244 m->fs.cfa_reg == stack_pointer_rtx);
7245 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
7246
7247 /* And probe at *sp. */
7248 emit_stack_probe (stack_pointer_rtx);
7249 emit_insn (gen_blockage ());
7250 }
7251
7252 /* We need to allocate space for the residual, but we do not need
7253 to probe the residual. */
7254 HOST_WIDE_INT residual = (i - probe_interval - size);
7255 if (residual)
7256 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7257 GEN_INT (residual), -1,
7258 m->fs.cfa_reg == stack_pointer_rtx);
7259 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
7260 }
7261 else
7262 {
7263 /* We expect the GP registers to be saved when probes are used
7264 as the probing sequences might need a scratch register and
7265 the routine to allocate one assumes the integer registers
7266 have already been saved. */
7267 gcc_assert (int_registers_saved);
7268
7269 struct scratch_reg sr;
7270 get_scratch_register_on_entry (&sr);
7271
7272 /* If we needed to save a register, then account for any space
7273 that was pushed (we are not going to pop the register when
7274 we do the restore). */
7275 if (sr.saved)
7276 size -= UNITS_PER_WORD;
7277
7278 /* Step 1: round SIZE down to a multiple of the interval. */
7279 HOST_WIDE_INT rounded_size = size & -probe_interval;
7280
7281 /* Step 2: compute final value of the loop counter. Use lea if
7282 possible. */
7283 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
7284 rtx insn;
7285 if (address_no_seg_operand (addr, Pmode))
7286 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
7287 else
7288 {
7289 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7290 insn = emit_insn (gen_rtx_SET (sr.reg,
7291 gen_rtx_PLUS (Pmode, sr.reg,
7292 stack_pointer_rtx)));
7293 }
7294 if (m->fs.cfa_reg == stack_pointer_rtx)
7295 {
7296 add_reg_note (insn, REG_CFA_DEF_CFA,
7297 plus_constant (Pmode, sr.reg,
7298 m->fs.cfa_offset + rounded_size));
7299 RTX_FRAME_RELATED_P (insn) = 1;
7300 }
7301
7302 /* Step 3: the loop. */
7303 rtx size_rtx = GEN_INT (rounded_size);
7304 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
7305 size_rtx));
7306 if (m->fs.cfa_reg == stack_pointer_rtx)
7307 {
7308 m->fs.cfa_offset += rounded_size;
7309 add_reg_note (insn, REG_CFA_DEF_CFA,
7310 plus_constant (Pmode, stack_pointer_rtx,
7311 m->fs.cfa_offset));
7312 RTX_FRAME_RELATED_P (insn) = 1;
7313 }
7314 m->fs.sp_offset += rounded_size;
7315 emit_insn (gen_blockage ());
7316
7317 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7318 is equal to ROUNDED_SIZE. */
7319
7320 if (size != rounded_size)
7321 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7322 GEN_INT (rounded_size - size), -1,
7323 m->fs.cfa_reg == stack_pointer_rtx);
7324 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7325
7326 /* This does not deallocate the space reserved for the scratch
7327 register. That will be deallocated in the epilogue. */
7328 release_scratch_register_on_entry (&sr, size, false);
7329 }
7330
7331 /* Make sure nothing is scheduled before we are done. */
7332 emit_insn (gen_blockage ());
7333 }
7334
7335 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7336
7337 INT_REGISTERS_SAVED is true if integer registers have already been
7338 pushed on the stack. */
7339
7340 static void
ix86_adjust_stack_and_probe(HOST_WIDE_INT size,const bool int_registers_saved)7341 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7342 const bool int_registers_saved)
7343 {
7344 /* We skip the probe for the first interval + a small dope of 4 words and
7345 probe that many bytes past the specified size to maintain a protection
7346 area at the botton of the stack. */
7347 const int dope = 4 * UNITS_PER_WORD;
7348 rtx size_rtx = GEN_INT (size), last;
7349
7350 /* See if we have a constant small number of probes to generate. If so,
7351 that's the easy case. The run-time loop is made up of 9 insns in the
7352 generic case while the compile-time loop is made up of 3+2*(n-1) insns
7353 for n # of intervals. */
7354 if (size <= 4 * get_probe_interval ())
7355 {
7356 HOST_WIDE_INT i, adjust;
7357 bool first_probe = true;
7358
7359 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
7360 values of N from 1 until it exceeds SIZE. If only one probe is
7361 needed, this will not generate any code. Then adjust and probe
7362 to PROBE_INTERVAL + SIZE. */
7363 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
7364 {
7365 if (first_probe)
7366 {
7367 adjust = 2 * get_probe_interval () + dope;
7368 first_probe = false;
7369 }
7370 else
7371 adjust = get_probe_interval ();
7372
7373 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7374 plus_constant (Pmode, stack_pointer_rtx,
7375 -adjust)));
7376 emit_stack_probe (stack_pointer_rtx);
7377 }
7378
7379 if (first_probe)
7380 adjust = size + get_probe_interval () + dope;
7381 else
7382 adjust = size + get_probe_interval () - i;
7383
7384 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7385 plus_constant (Pmode, stack_pointer_rtx,
7386 -adjust)));
7387 emit_stack_probe (stack_pointer_rtx);
7388
7389 /* Adjust back to account for the additional first interval. */
7390 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
7391 plus_constant (Pmode, stack_pointer_rtx,
7392 (get_probe_interval ()
7393 + dope))));
7394 }
7395
7396 /* Otherwise, do the same as above, but in a loop. Note that we must be
7397 extra careful with variables wrapping around because we might be at
7398 the very top (or the very bottom) of the address space and we have
7399 to be able to handle this case properly; in particular, we use an
7400 equality test for the loop condition. */
7401 else
7402 {
7403 /* We expect the GP registers to be saved when probes are used
7404 as the probing sequences might need a scratch register and
7405 the routine to allocate one assumes the integer registers
7406 have already been saved. */
7407 gcc_assert (int_registers_saved);
7408
7409 HOST_WIDE_INT rounded_size;
7410 struct scratch_reg sr;
7411
7412 get_scratch_register_on_entry (&sr);
7413
7414 /* If we needed to save a register, then account for any space
7415 that was pushed (we are not going to pop the register when
7416 we do the restore). */
7417 if (sr.saved)
7418 size -= UNITS_PER_WORD;
7419
7420 /* Step 1: round SIZE to the previous multiple of the interval. */
7421
7422 rounded_size = ROUND_DOWN (size, get_probe_interval ());
7423
7424
7425 /* Step 2: compute initial and final value of the loop counter. */
7426
7427 /* SP = SP_0 + PROBE_INTERVAL. */
7428 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7429 plus_constant (Pmode, stack_pointer_rtx,
7430 - (get_probe_interval () + dope))));
7431
7432 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
7433 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
7434 emit_insn (gen_rtx_SET (sr.reg,
7435 plus_constant (Pmode, stack_pointer_rtx,
7436 -rounded_size)));
7437 else
7438 {
7439 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7440 emit_insn (gen_rtx_SET (sr.reg,
7441 gen_rtx_PLUS (Pmode, sr.reg,
7442 stack_pointer_rtx)));
7443 }
7444
7445
7446 /* Step 3: the loop
7447
7448 do
7449 {
7450 SP = SP + PROBE_INTERVAL
7451 probe at SP
7452 }
7453 while (SP != LAST_ADDR)
7454
7455 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
7456 values of N from 1 until it is equal to ROUNDED_SIZE. */
7457
7458 emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg, size_rtx));
7459
7460
7461 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
7462 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
7463
7464 if (size != rounded_size)
7465 {
7466 emit_insn (gen_rtx_SET (stack_pointer_rtx,
7467 plus_constant (Pmode, stack_pointer_rtx,
7468 rounded_size - size)));
7469 emit_stack_probe (stack_pointer_rtx);
7470 }
7471
7472 /* Adjust back to account for the additional first interval. */
7473 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
7474 plus_constant (Pmode, stack_pointer_rtx,
7475 (get_probe_interval ()
7476 + dope))));
7477
7478 /* This does not deallocate the space reserved for the scratch
7479 register. That will be deallocated in the epilogue. */
7480 release_scratch_register_on_entry (&sr, size, false);
7481 }
7482
7483 /* Even if the stack pointer isn't the CFA register, we need to correctly
7484 describe the adjustments made to it, in particular differentiate the
7485 frame-related ones from the frame-unrelated ones. */
7486 if (size > 0)
7487 {
7488 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
7489 XVECEXP (expr, 0, 0)
7490 = gen_rtx_SET (stack_pointer_rtx,
7491 plus_constant (Pmode, stack_pointer_rtx, -size));
7492 XVECEXP (expr, 0, 1)
7493 = gen_rtx_SET (stack_pointer_rtx,
7494 plus_constant (Pmode, stack_pointer_rtx,
7495 get_probe_interval () + dope + size));
7496 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
7497 RTX_FRAME_RELATED_P (last) = 1;
7498
7499 cfun->machine->fs.sp_offset += size;
7500 }
7501
7502 /* Make sure nothing is scheduled before we are done. */
7503 emit_insn (gen_blockage ());
7504 }
7505
7506 /* Adjust the stack pointer up to REG while probing it. */
7507
7508 const char *
output_adjust_stack_and_probe(rtx reg)7509 output_adjust_stack_and_probe (rtx reg)
7510 {
7511 static int labelno = 0;
7512 char loop_lab[32];
7513 rtx xops[2];
7514
7515 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7516
7517 /* Loop. */
7518 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7519
7520 /* SP = SP + PROBE_INTERVAL. */
7521 xops[0] = stack_pointer_rtx;
7522 xops[1] = GEN_INT (get_probe_interval ());
7523 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7524
7525 /* Probe at SP. */
7526 xops[1] = const0_rtx;
7527 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7528
7529 /* Test if SP == LAST_ADDR. */
7530 xops[0] = stack_pointer_rtx;
7531 xops[1] = reg;
7532 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7533
7534 /* Branch. */
7535 fputs ("\tjne\t", asm_out_file);
7536 assemble_name_raw (asm_out_file, loop_lab);
7537 fputc ('\n', asm_out_file);
7538
7539 return "";
7540 }
7541
7542 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7543 inclusive. These are offsets from the current stack pointer.
7544
7545 INT_REGISTERS_SAVED is true if integer registers have already been
7546 pushed on the stack. */
7547
7548 static void
ix86_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size,const bool int_registers_saved)7549 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7550 const bool int_registers_saved)
7551 {
7552 /* See if we have a constant small number of probes to generate. If so,
7553 that's the easy case. The run-time loop is made up of 6 insns in the
7554 generic case while the compile-time loop is made up of n insns for n #
7555 of intervals. */
7556 if (size <= 6 * get_probe_interval ())
7557 {
7558 HOST_WIDE_INT i;
7559
7560 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7561 it exceeds SIZE. If only one probe is needed, this will not
7562 generate any code. Then probe at FIRST + SIZE. */
7563 for (i = get_probe_interval (); i < size; i += get_probe_interval ())
7564 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7565 -(first + i)));
7566
7567 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7568 -(first + size)));
7569 }
7570
7571 /* Otherwise, do the same as above, but in a loop. Note that we must be
7572 extra careful with variables wrapping around because we might be at
7573 the very top (or the very bottom) of the address space and we have
7574 to be able to handle this case properly; in particular, we use an
7575 equality test for the loop condition. */
7576 else
7577 {
7578 /* We expect the GP registers to be saved when probes are used
7579 as the probing sequences might need a scratch register and
7580 the routine to allocate one assumes the integer registers
7581 have already been saved. */
7582 gcc_assert (int_registers_saved);
7583
7584 HOST_WIDE_INT rounded_size, last;
7585 struct scratch_reg sr;
7586
7587 get_scratch_register_on_entry (&sr);
7588
7589
7590 /* Step 1: round SIZE to the previous multiple of the interval. */
7591
7592 rounded_size = ROUND_DOWN (size, get_probe_interval ());
7593
7594
7595 /* Step 2: compute initial and final value of the loop counter. */
7596
7597 /* TEST_OFFSET = FIRST. */
7598 emit_move_insn (sr.reg, GEN_INT (-first));
7599
7600 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7601 last = first + rounded_size;
7602
7603
7604 /* Step 3: the loop
7605
7606 do
7607 {
7608 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7609 probe at TEST_ADDR
7610 }
7611 while (TEST_ADDR != LAST_ADDR)
7612
7613 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7614 until it is equal to ROUNDED_SIZE. */
7615
7616 emit_insn
7617 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
7618
7619
7620 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7621 that SIZE is equal to ROUNDED_SIZE. */
7622
7623 if (size != rounded_size)
7624 emit_stack_probe (plus_constant (Pmode,
7625 gen_rtx_PLUS (Pmode,
7626 stack_pointer_rtx,
7627 sr.reg),
7628 rounded_size - size));
7629
7630 release_scratch_register_on_entry (&sr, size, true);
7631 }
7632
7633 /* Make sure nothing is scheduled before we are done. */
7634 emit_insn (gen_blockage ());
7635 }
7636
7637 /* Probe a range of stack addresses from REG to END, inclusive. These are
7638 offsets from the current stack pointer. */
7639
7640 const char *
output_probe_stack_range(rtx reg,rtx end)7641 output_probe_stack_range (rtx reg, rtx end)
7642 {
7643 static int labelno = 0;
7644 char loop_lab[32];
7645 rtx xops[3];
7646
7647 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7648
7649 /* Loop. */
7650 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7651
7652 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
7653 xops[0] = reg;
7654 xops[1] = GEN_INT (get_probe_interval ());
7655 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7656
7657 /* Probe at TEST_ADDR. */
7658 xops[0] = stack_pointer_rtx;
7659 xops[1] = reg;
7660 xops[2] = const0_rtx;
7661 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
7662
7663 /* Test if TEST_ADDR == LAST_ADDR. */
7664 xops[0] = reg;
7665 xops[1] = end;
7666 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7667
7668 /* Branch. */
7669 fputs ("\tjne\t", asm_out_file);
7670 assemble_name_raw (asm_out_file, loop_lab);
7671 fputc ('\n', asm_out_file);
7672
7673 return "";
7674 }
7675
7676 /* Set stack_frame_required to false if stack frame isn't required.
7677 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
7678 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
7679
7680 static void
ix86_find_max_used_stack_alignment(unsigned int & stack_alignment,bool check_stack_slot)7681 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
7682 bool check_stack_slot)
7683 {
7684 HARD_REG_SET set_up_by_prologue, prologue_used;
7685 basic_block bb;
7686
7687 CLEAR_HARD_REG_SET (prologue_used);
7688 CLEAR_HARD_REG_SET (set_up_by_prologue);
7689 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
7690 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
7691 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
7692 HARD_FRAME_POINTER_REGNUM);
7693
7694 /* The preferred stack alignment is the minimum stack alignment. */
7695 if (stack_alignment > crtl->preferred_stack_boundary)
7696 stack_alignment = crtl->preferred_stack_boundary;
7697
7698 bool require_stack_frame = false;
7699
7700 FOR_EACH_BB_FN (bb, cfun)
7701 {
7702 rtx_insn *insn;
7703 FOR_BB_INSNS (bb, insn)
7704 if (NONDEBUG_INSN_P (insn)
7705 && requires_stack_frame_p (insn, prologue_used,
7706 set_up_by_prologue))
7707 {
7708 require_stack_frame = true;
7709
7710 if (check_stack_slot)
7711 {
7712 /* Find the maximum stack alignment. */
7713 subrtx_iterator::array_type array;
7714 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
7715 if (MEM_P (*iter)
7716 && (reg_mentioned_p (stack_pointer_rtx,
7717 *iter)
7718 || reg_mentioned_p (frame_pointer_rtx,
7719 *iter)))
7720 {
7721 unsigned int alignment = MEM_ALIGN (*iter);
7722 if (alignment > stack_alignment)
7723 stack_alignment = alignment;
7724 }
7725 }
7726 }
7727 }
7728
7729 cfun->machine->stack_frame_required = require_stack_frame;
7730 }
7731
7732 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7733 will guide prologue/epilogue to be generated in correct form. */
7734
7735 static void
ix86_finalize_stack_frame_flags(void)7736 ix86_finalize_stack_frame_flags (void)
7737 {
7738 /* Check if stack realign is really needed after reload, and
7739 stores result in cfun */
7740 unsigned int incoming_stack_boundary
7741 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7742 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7743 unsigned int stack_alignment
7744 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
7745 ? crtl->max_used_stack_slot_alignment
7746 : crtl->stack_alignment_needed);
7747 unsigned int stack_realign
7748 = (incoming_stack_boundary < stack_alignment);
7749 bool recompute_frame_layout_p = false;
7750
7751 if (crtl->stack_realign_finalized)
7752 {
7753 /* After stack_realign_needed is finalized, we can't no longer
7754 change it. */
7755 gcc_assert (crtl->stack_realign_needed == stack_realign);
7756 return;
7757 }
7758
7759 /* It is always safe to compute max_used_stack_alignment. We
7760 compute it only if 128-bit aligned load/store may be generated
7761 on misaligned stack slot which will lead to segfault. */
7762 bool check_stack_slot
7763 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
7764 ix86_find_max_used_stack_alignment (stack_alignment,
7765 check_stack_slot);
7766
7767 /* If the only reason for frame_pointer_needed is that we conservatively
7768 assumed stack realignment might be needed or -fno-omit-frame-pointer
7769 is used, but in the end nothing that needed the stack alignment had
7770 been spilled nor stack access, clear frame_pointer_needed and say we
7771 don't need stack realignment. */
7772 if ((stack_realign || (!flag_omit_frame_pointer && optimize))
7773 && frame_pointer_needed
7774 && crtl->is_leaf
7775 && crtl->sp_is_unchanging
7776 && !ix86_current_function_calls_tls_descriptor
7777 && !crtl->accesses_prior_frames
7778 && !cfun->calls_alloca
7779 && !crtl->calls_eh_return
7780 /* See ira_setup_eliminable_regset for the rationale. */
7781 && !(STACK_CHECK_MOVING_SP
7782 && flag_stack_check
7783 && flag_exceptions
7784 && cfun->can_throw_non_call_exceptions)
7785 && !ix86_frame_pointer_required ()
7786 && ix86_get_frame_size () == 0
7787 && ix86_nsaved_sseregs () == 0
7788 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
7789 {
7790 if (cfun->machine->stack_frame_required)
7791 {
7792 /* Stack frame is required. If stack alignment needed is less
7793 than incoming stack boundary, don't realign stack. */
7794 stack_realign = incoming_stack_boundary < stack_alignment;
7795 if (!stack_realign)
7796 {
7797 crtl->max_used_stack_slot_alignment
7798 = incoming_stack_boundary;
7799 crtl->stack_alignment_needed
7800 = incoming_stack_boundary;
7801 /* Also update preferred_stack_boundary for leaf
7802 functions. */
7803 crtl->preferred_stack_boundary
7804 = incoming_stack_boundary;
7805 }
7806 }
7807 else
7808 {
7809 /* If drap has been set, but it actually isn't live at the
7810 start of the function, there is no reason to set it up. */
7811 if (crtl->drap_reg)
7812 {
7813 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7814 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
7815 REGNO (crtl->drap_reg)))
7816 {
7817 crtl->drap_reg = NULL_RTX;
7818 crtl->need_drap = false;
7819 }
7820 }
7821 else
7822 cfun->machine->no_drap_save_restore = true;
7823
7824 frame_pointer_needed = false;
7825 stack_realign = false;
7826 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
7827 crtl->stack_alignment_needed = incoming_stack_boundary;
7828 crtl->stack_alignment_estimated = incoming_stack_boundary;
7829 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
7830 crtl->preferred_stack_boundary = incoming_stack_boundary;
7831 df_finish_pass (true);
7832 df_scan_alloc (NULL);
7833 df_scan_blocks ();
7834 df_compute_regs_ever_live (true);
7835 df_analyze ();
7836
7837 if (flag_var_tracking)
7838 {
7839 /* Since frame pointer is no longer available, replace it with
7840 stack pointer - UNITS_PER_WORD in debug insns. */
7841 df_ref ref, next;
7842 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
7843 ref; ref = next)
7844 {
7845 next = DF_REF_NEXT_REG (ref);
7846 if (!DF_REF_INSN_INFO (ref))
7847 continue;
7848
7849 /* Make sure the next ref is for a different instruction,
7850 so that we're not affected by the rescan. */
7851 rtx_insn *insn = DF_REF_INSN (ref);
7852 while (next && DF_REF_INSN (next) == insn)
7853 next = DF_REF_NEXT_REG (next);
7854
7855 if (DEBUG_INSN_P (insn))
7856 {
7857 bool changed = false;
7858 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
7859 {
7860 rtx *loc = DF_REF_LOC (ref);
7861 if (*loc == hard_frame_pointer_rtx)
7862 {
7863 *loc = plus_constant (Pmode,
7864 stack_pointer_rtx,
7865 -UNITS_PER_WORD);
7866 changed = true;
7867 }
7868 }
7869 if (changed)
7870 df_insn_rescan (insn);
7871 }
7872 }
7873 }
7874
7875 recompute_frame_layout_p = true;
7876 }
7877 }
7878 else if (crtl->max_used_stack_slot_alignment >= 128
7879 && cfun->machine->stack_frame_required)
7880 {
7881 /* We don't need to realign stack. max_used_stack_alignment is
7882 used to decide how stack frame should be aligned. This is
7883 independent of any psABIs nor 32-bit vs 64-bit. */
7884 cfun->machine->max_used_stack_alignment
7885 = stack_alignment / BITS_PER_UNIT;
7886 }
7887
7888 if (crtl->stack_realign_needed != stack_realign)
7889 recompute_frame_layout_p = true;
7890 crtl->stack_realign_needed = stack_realign;
7891 crtl->stack_realign_finalized = true;
7892 if (recompute_frame_layout_p)
7893 ix86_compute_frame_layout ();
7894 }
7895
7896 /* Delete SET_GOT right after entry block if it is allocated to reg. */
7897
7898 static void
ix86_elim_entry_set_got(rtx reg)7899 ix86_elim_entry_set_got (rtx reg)
7900 {
7901 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7902 rtx_insn *c_insn = BB_HEAD (bb);
7903 if (!NONDEBUG_INSN_P (c_insn))
7904 c_insn = next_nonnote_nondebug_insn (c_insn);
7905 if (c_insn && NONJUMP_INSN_P (c_insn))
7906 {
7907 rtx pat = PATTERN (c_insn);
7908 if (GET_CODE (pat) == PARALLEL)
7909 {
7910 rtx vec = XVECEXP (pat, 0, 0);
7911 if (GET_CODE (vec) == SET
7912 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
7913 && REGNO (XEXP (vec, 0)) == REGNO (reg))
7914 delete_insn (c_insn);
7915 }
7916 }
7917 }
7918
7919 static rtx
gen_frame_set(rtx reg,rtx frame_reg,int offset,bool store)7920 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
7921 {
7922 rtx addr, mem;
7923
7924 if (offset)
7925 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
7926 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
7927 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
7928 }
7929
7930 static inline rtx
gen_frame_load(rtx reg,rtx frame_reg,int offset)7931 gen_frame_load (rtx reg, rtx frame_reg, int offset)
7932 {
7933 return gen_frame_set (reg, frame_reg, offset, false);
7934 }
7935
7936 static inline rtx
gen_frame_store(rtx reg,rtx frame_reg,int offset)7937 gen_frame_store (rtx reg, rtx frame_reg, int offset)
7938 {
7939 return gen_frame_set (reg, frame_reg, offset, true);
7940 }
7941
7942 static void
ix86_emit_outlined_ms2sysv_save(const struct ix86_frame & frame)7943 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
7944 {
7945 struct machine_function *m = cfun->machine;
7946 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
7947 + m->call_ms2sysv_extra_regs;
7948 rtvec v = rtvec_alloc (ncregs + 1);
7949 unsigned int align, i, vi = 0;
7950 rtx_insn *insn;
7951 rtx sym, addr;
7952 rtx rax = gen_rtx_REG (word_mode, AX_REG);
7953 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
7954
7955 /* AL should only be live with sysv_abi. */
7956 gcc_assert (!ix86_eax_live_at_start_p ());
7957 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
7958
7959 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
7960 we've actually realigned the stack or not. */
7961 align = GET_MODE_ALIGNMENT (V4SFmode);
7962 addr = choose_baseaddr (frame.stack_realign_offset
7963 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
7964 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
7965
7966 emit_insn (gen_rtx_SET (rax, addr));
7967
7968 /* Get the stub symbol. */
7969 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
7970 : XLOGUE_STUB_SAVE);
7971 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
7972
7973 for (i = 0; i < ncregs; ++i)
7974 {
7975 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
7976 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
7977 r.regno);
7978 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
7979 }
7980
7981 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
7982
7983 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
7984 RTX_FRAME_RELATED_P (insn) = true;
7985 }
7986
7987 /* Generate and return an insn body to AND X with Y. */
7988
7989 static rtx_insn *
gen_and2_insn(rtx x,rtx y)7990 gen_and2_insn (rtx x, rtx y)
7991 {
7992 enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
7993
7994 gcc_assert (insn_operand_matches (icode, 0, x));
7995 gcc_assert (insn_operand_matches (icode, 1, x));
7996 gcc_assert (insn_operand_matches (icode, 2, y));
7997
7998 return GEN_FCN (icode) (x, x, y);
7999 }
8000
8001 /* Expand the prologue into a bunch of separate insns. */
8002
8003 void
ix86_expand_prologue(void)8004 ix86_expand_prologue (void)
8005 {
8006 struct machine_function *m = cfun->machine;
8007 rtx insn, t;
8008 HOST_WIDE_INT allocate;
8009 bool int_registers_saved;
8010 bool sse_registers_saved;
8011 bool save_stub_call_needed;
8012 rtx static_chain = NULL_RTX;
8013
8014 if (ix86_function_naked (current_function_decl))
8015 return;
8016
8017 ix86_finalize_stack_frame_flags ();
8018
8019 /* DRAP should not coexist with stack_realign_fp */
8020 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8021
8022 memset (&m->fs, 0, sizeof (m->fs));
8023
8024 /* Initialize CFA state for before the prologue. */
8025 m->fs.cfa_reg = stack_pointer_rtx;
8026 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8027
8028 /* Track SP offset to the CFA. We continue tracking this after we've
8029 swapped the CFA register away from SP. In the case of re-alignment
8030 this is fudged; we're interested to offsets within the local frame. */
8031 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8032 m->fs.sp_valid = true;
8033 m->fs.sp_realigned = false;
8034
8035 const struct ix86_frame &frame = cfun->machine->frame;
8036
8037 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
8038 {
8039 /* We should have already generated an error for any use of
8040 ms_hook on a nested function. */
8041 gcc_checking_assert (!ix86_static_chain_on_stack);
8042
8043 /* Check if profiling is active and we shall use profiling before
8044 prologue variant. If so sorry. */
8045 if (crtl->profile && flag_fentry != 0)
8046 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8047 "with %<-mfentry%> for 32-bit");
8048
8049 /* In ix86_asm_output_function_label we emitted:
8050 8b ff movl.s %edi,%edi
8051 55 push %ebp
8052 8b ec movl.s %esp,%ebp
8053
8054 This matches the hookable function prologue in Win32 API
8055 functions in Microsoft Windows XP Service Pack 2 and newer.
8056 Wine uses this to enable Windows apps to hook the Win32 API
8057 functions provided by Wine.
8058
8059 What that means is that we've already set up the frame pointer. */
8060
8061 if (frame_pointer_needed
8062 && !(crtl->drap_reg && crtl->stack_realign_needed))
8063 {
8064 rtx push, mov;
8065
8066 /* We've decided to use the frame pointer already set up.
8067 Describe this to the unwinder by pretending that both
8068 push and mov insns happen right here.
8069
8070 Putting the unwind info here at the end of the ms_hook
8071 is done so that we can make absolutely certain we get
8072 the required byte sequence at the start of the function,
8073 rather than relying on an assembler that can produce
8074 the exact encoding required.
8075
8076 However it does mean (in the unpatched case) that we have
8077 a 1 insn window where the asynchronous unwind info is
8078 incorrect. However, if we placed the unwind info at
8079 its correct location we would have incorrect unwind info
8080 in the patched case. Which is probably all moot since
8081 I don't expect Wine generates dwarf2 unwind info for the
8082 system libraries that use this feature. */
8083
8084 insn = emit_insn (gen_blockage ());
8085
8086 push = gen_push (hard_frame_pointer_rtx);
8087 mov = gen_rtx_SET (hard_frame_pointer_rtx,
8088 stack_pointer_rtx);
8089 RTX_FRAME_RELATED_P (push) = 1;
8090 RTX_FRAME_RELATED_P (mov) = 1;
8091
8092 RTX_FRAME_RELATED_P (insn) = 1;
8093 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8094 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8095
8096 /* Note that gen_push incremented m->fs.cfa_offset, even
8097 though we didn't emit the push insn here. */
8098 m->fs.cfa_reg = hard_frame_pointer_rtx;
8099 m->fs.fp_offset = m->fs.cfa_offset;
8100 m->fs.fp_valid = true;
8101 }
8102 else
8103 {
8104 /* The frame pointer is not needed so pop %ebp again.
8105 This leaves us with a pristine state. */
8106 emit_insn (gen_pop (hard_frame_pointer_rtx));
8107 }
8108 }
8109
8110 /* The first insn of a function that accepts its static chain on the
8111 stack is to push the register that would be filled in by a direct
8112 call. This insn will be skipped by the trampoline. */
8113 else if (ix86_static_chain_on_stack)
8114 {
8115 static_chain = ix86_static_chain (cfun->decl, false);
8116 insn = emit_insn (gen_push (static_chain));
8117 emit_insn (gen_blockage ());
8118
8119 /* We don't want to interpret this push insn as a register save,
8120 only as a stack adjustment. The real copy of the register as
8121 a save will be done later, if needed. */
8122 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8123 t = gen_rtx_SET (stack_pointer_rtx, t);
8124 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8125 RTX_FRAME_RELATED_P (insn) = 1;
8126 }
8127
8128 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8129 of DRAP is needed and stack realignment is really needed after reload */
8130 if (stack_realign_drap)
8131 {
8132 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8133
8134 /* Can't use DRAP in interrupt function. */
8135 if (cfun->machine->func_type != TYPE_NORMAL)
8136 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8137 "in interrupt service routine. This may be worked "
8138 "around by avoiding functions with aggregate return.");
8139
8140 /* Only need to push parameter pointer reg if it is caller saved. */
8141 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8142 {
8143 /* Push arg pointer reg */
8144 insn = emit_insn (gen_push (crtl->drap_reg));
8145 RTX_FRAME_RELATED_P (insn) = 1;
8146 }
8147
8148 /* Grab the argument pointer. */
8149 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8150 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8151 RTX_FRAME_RELATED_P (insn) = 1;
8152 m->fs.cfa_reg = crtl->drap_reg;
8153 m->fs.cfa_offset = 0;
8154
8155 /* Align the stack. */
8156 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8157 GEN_INT (-align_bytes)));
8158 RTX_FRAME_RELATED_P (insn) = 1;
8159
8160 /* Replicate the return address on the stack so that return
8161 address can be reached via (argp - 1) slot. This is needed
8162 to implement macro RETURN_ADDR_RTX and intrinsic function
8163 expand_builtin_return_addr etc. */
8164 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8165 t = gen_frame_mem (word_mode, t);
8166 insn = emit_insn (gen_push (t));
8167 RTX_FRAME_RELATED_P (insn) = 1;
8168
8169 /* For the purposes of frame and register save area addressing,
8170 we've started over with a new frame. */
8171 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8172 m->fs.realigned = true;
8173
8174 if (static_chain)
8175 {
8176 /* Replicate static chain on the stack so that static chain
8177 can be reached via (argp - 2) slot. This is needed for
8178 nested function with stack realignment. */
8179 insn = emit_insn (gen_push (static_chain));
8180 RTX_FRAME_RELATED_P (insn) = 1;
8181 }
8182 }
8183
8184 int_registers_saved = (frame.nregs == 0);
8185 sse_registers_saved = (frame.nsseregs == 0);
8186 save_stub_call_needed = (m->call_ms2sysv);
8187 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8188
8189 if (frame_pointer_needed && !m->fs.fp_valid)
8190 {
8191 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8192 slower on all targets. Also sdb didn't like it. */
8193 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8194 RTX_FRAME_RELATED_P (insn) = 1;
8195
8196 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8197 {
8198 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8199 RTX_FRAME_RELATED_P (insn) = 1;
8200
8201 if (m->fs.cfa_reg == stack_pointer_rtx)
8202 m->fs.cfa_reg = hard_frame_pointer_rtx;
8203 m->fs.fp_offset = m->fs.sp_offset;
8204 m->fs.fp_valid = true;
8205 }
8206 }
8207
8208 if (!int_registers_saved)
8209 {
8210 /* If saving registers via PUSH, do so now. */
8211 if (!frame.save_regs_using_mov)
8212 {
8213 ix86_emit_save_regs ();
8214 int_registers_saved = true;
8215 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8216 }
8217
8218 /* When using red zone we may start register saving before allocating
8219 the stack frame saving one cycle of the prologue. However, avoid
8220 doing this if we have to probe the stack; at least on x86_64 the
8221 stack probe can turn into a call that clobbers a red zone location. */
8222 else if (ix86_using_red_zone ()
8223 && (! TARGET_STACK_PROBE
8224 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8225 {
8226 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8227 int_registers_saved = true;
8228 }
8229 }
8230
8231 if (stack_realign_fp)
8232 {
8233 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8234 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8235
8236 /* Record last valid frame pointer offset. */
8237 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8238
8239 /* The computation of the size of the re-aligned stack frame means
8240 that we must allocate the size of the register save area before
8241 performing the actual alignment. Otherwise we cannot guarantee
8242 that there's enough storage above the realignment point. */
8243 allocate = frame.reg_save_offset - m->fs.sp_offset
8244 + frame.stack_realign_allocate;
8245 if (allocate)
8246 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8247 GEN_INT (-allocate), -1, false);
8248
8249 /* Align the stack. */
8250 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
8251 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8252 m->fs.sp_realigned_offset = m->fs.sp_offset
8253 - frame.stack_realign_allocate;
8254 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8255 Beyond this point, stack access should be done via choose_baseaddr or
8256 by using sp_valid_at and fp_valid_at to determine the correct base
8257 register. Henceforth, any CFA offset should be thought of as logical
8258 and not physical. */
8259 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8260 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8261 m->fs.sp_realigned = true;
8262
8263 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8264 is needed to describe where a register is saved using a realigned
8265 stack pointer, so we need to invalidate the stack pointer for that
8266 target. */
8267 if (TARGET_SEH)
8268 m->fs.sp_valid = false;
8269
8270 /* If SP offset is non-immediate after allocation of the stack frame,
8271 then emit SSE saves or stub call prior to allocating the rest of the
8272 stack frame. This is less efficient for the out-of-line stub because
8273 we can't combine allocations across the call barrier, but it's better
8274 than using a scratch register. */
8275 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8276 - m->fs.sp_realigned_offset),
8277 Pmode))
8278 {
8279 if (!sse_registers_saved)
8280 {
8281 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8282 sse_registers_saved = true;
8283 }
8284 else if (save_stub_call_needed)
8285 {
8286 ix86_emit_outlined_ms2sysv_save (frame);
8287 save_stub_call_needed = false;
8288 }
8289 }
8290 }
8291
8292 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8293
8294 if (flag_stack_usage_info)
8295 {
8296 /* We start to count from ARG_POINTER. */
8297 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8298
8299 /* If it was realigned, take into account the fake frame. */
8300 if (stack_realign_drap)
8301 {
8302 if (ix86_static_chain_on_stack)
8303 stack_size += UNITS_PER_WORD;
8304
8305 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8306 stack_size += UNITS_PER_WORD;
8307
8308 /* This over-estimates by 1 minimal-stack-alignment-unit but
8309 mitigates that by counting in the new return address slot. */
8310 current_function_dynamic_stack_size
8311 += crtl->stack_alignment_needed / BITS_PER_UNIT;
8312 }
8313
8314 current_function_static_stack_size = stack_size;
8315 }
8316
8317 /* On SEH target with very large frame size, allocate an area to save
8318 SSE registers (as the very large allocation won't be described). */
8319 if (TARGET_SEH
8320 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8321 && !sse_registers_saved)
8322 {
8323 HOST_WIDE_INT sse_size
8324 = frame.sse_reg_save_offset - frame.reg_save_offset;
8325
8326 gcc_assert (int_registers_saved);
8327
8328 /* No need to do stack checking as the area will be immediately
8329 written. */
8330 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8331 GEN_INT (-sse_size), -1,
8332 m->fs.cfa_reg == stack_pointer_rtx);
8333 allocate -= sse_size;
8334 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8335 sse_registers_saved = true;
8336 }
8337
8338 /* The stack has already been decremented by the instruction calling us
8339 so probe if the size is non-negative to preserve the protection area. */
8340 if (allocate >= 0
8341 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
8342 || flag_stack_clash_protection))
8343 {
8344 if (flag_stack_clash_protection)
8345 {
8346 ix86_adjust_stack_and_probe_stack_clash (allocate,
8347 int_registers_saved);
8348 allocate = 0;
8349 }
8350 else if (STACK_CHECK_MOVING_SP)
8351 {
8352 if (!(crtl->is_leaf && !cfun->calls_alloca
8353 && allocate <= get_probe_interval ()))
8354 {
8355 ix86_adjust_stack_and_probe (allocate, int_registers_saved);
8356 allocate = 0;
8357 }
8358 }
8359 else
8360 {
8361 HOST_WIDE_INT size = allocate;
8362
8363 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8364 size = 0x80000000 - get_stack_check_protect () - 1;
8365
8366 if (TARGET_STACK_PROBE)
8367 {
8368 if (crtl->is_leaf && !cfun->calls_alloca)
8369 {
8370 if (size > get_probe_interval ())
8371 ix86_emit_probe_stack_range (0, size, int_registers_saved);
8372 }
8373 else
8374 ix86_emit_probe_stack_range (0,
8375 size + get_stack_check_protect (),
8376 int_registers_saved);
8377 }
8378 else
8379 {
8380 if (crtl->is_leaf && !cfun->calls_alloca)
8381 {
8382 if (size > get_probe_interval ()
8383 && size > get_stack_check_protect ())
8384 ix86_emit_probe_stack_range (get_stack_check_protect (),
8385 (size
8386 - get_stack_check_protect ()),
8387 int_registers_saved);
8388 }
8389 else
8390 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8391 int_registers_saved);
8392 }
8393 }
8394 }
8395
8396 if (allocate == 0)
8397 ;
8398 else if (!ix86_target_stack_probe ()
8399 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8400 {
8401 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8402 GEN_INT (-allocate), -1,
8403 m->fs.cfa_reg == stack_pointer_rtx);
8404 }
8405 else
8406 {
8407 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8408 rtx r10 = NULL;
8409 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8410 bool eax_live = ix86_eax_live_at_start_p ();
8411 bool r10_live = false;
8412
8413 if (TARGET_64BIT)
8414 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8415
8416 if (eax_live)
8417 {
8418 insn = emit_insn (gen_push (eax));
8419 allocate -= UNITS_PER_WORD;
8420 /* Note that SEH directives need to continue tracking the stack
8421 pointer even after the frame pointer has been set up. */
8422 if (sp_is_cfa_reg || TARGET_SEH)
8423 {
8424 if (sp_is_cfa_reg)
8425 m->fs.cfa_offset += UNITS_PER_WORD;
8426 RTX_FRAME_RELATED_P (insn) = 1;
8427 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8428 gen_rtx_SET (stack_pointer_rtx,
8429 plus_constant (Pmode,
8430 stack_pointer_rtx,
8431 -UNITS_PER_WORD)));
8432 }
8433 }
8434
8435 if (r10_live)
8436 {
8437 r10 = gen_rtx_REG (Pmode, R10_REG);
8438 insn = emit_insn (gen_push (r10));
8439 allocate -= UNITS_PER_WORD;
8440 if (sp_is_cfa_reg || TARGET_SEH)
8441 {
8442 if (sp_is_cfa_reg)
8443 m->fs.cfa_offset += UNITS_PER_WORD;
8444 RTX_FRAME_RELATED_P (insn) = 1;
8445 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8446 gen_rtx_SET (stack_pointer_rtx,
8447 plus_constant (Pmode,
8448 stack_pointer_rtx,
8449 -UNITS_PER_WORD)));
8450 }
8451 }
8452
8453 emit_move_insn (eax, GEN_INT (allocate));
8454 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
8455
8456 /* Use the fact that AX still contains ALLOCATE. */
8457 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
8458 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
8459
8460 if (sp_is_cfa_reg || TARGET_SEH)
8461 {
8462 if (sp_is_cfa_reg)
8463 m->fs.cfa_offset += allocate;
8464 RTX_FRAME_RELATED_P (insn) = 1;
8465 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8466 gen_rtx_SET (stack_pointer_rtx,
8467 plus_constant (Pmode, stack_pointer_rtx,
8468 -allocate)));
8469 }
8470 m->fs.sp_offset += allocate;
8471
8472 /* Use stack_pointer_rtx for relative addressing so that code works for
8473 realigned stack. But this means that we need a blockage to prevent
8474 stores based on the frame pointer from being scheduled before. */
8475 if (r10_live && eax_live)
8476 {
8477 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8478 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8479 gen_frame_mem (word_mode, t));
8480 t = plus_constant (Pmode, t, UNITS_PER_WORD);
8481 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8482 gen_frame_mem (word_mode, t));
8483 emit_insn (gen_memory_blockage ());
8484 }
8485 else if (eax_live || r10_live)
8486 {
8487 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8488 emit_move_insn (gen_rtx_REG (word_mode,
8489 (eax_live ? AX_REG : R10_REG)),
8490 gen_frame_mem (word_mode, t));
8491 emit_insn (gen_memory_blockage ());
8492 }
8493 }
8494 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8495
8496 /* If we havn't already set up the frame pointer, do so now. */
8497 if (frame_pointer_needed && !m->fs.fp_valid)
8498 {
8499 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8500 GEN_INT (frame.stack_pointer_offset
8501 - frame.hard_frame_pointer_offset));
8502 insn = emit_insn (insn);
8503 RTX_FRAME_RELATED_P (insn) = 1;
8504 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8505
8506 if (m->fs.cfa_reg == stack_pointer_rtx)
8507 m->fs.cfa_reg = hard_frame_pointer_rtx;
8508 m->fs.fp_offset = frame.hard_frame_pointer_offset;
8509 m->fs.fp_valid = true;
8510 }
8511
8512 if (!int_registers_saved)
8513 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8514 if (!sse_registers_saved)
8515 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8516 else if (save_stub_call_needed)
8517 ix86_emit_outlined_ms2sysv_save (frame);
8518
8519 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8520 in PROLOGUE. */
8521 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
8522 {
8523 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
8524 insn = emit_insn (gen_set_got (pic));
8525 RTX_FRAME_RELATED_P (insn) = 1;
8526 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8527 emit_insn (gen_prologue_use (pic));
8528 /* Deleting already emmitted SET_GOT if exist and allocated to
8529 REAL_PIC_OFFSET_TABLE_REGNUM. */
8530 ix86_elim_entry_set_got (pic);
8531 }
8532
8533 if (crtl->drap_reg && !crtl->stack_realign_needed)
8534 {
8535 /* vDRAP is setup but after reload it turns out stack realign
8536 isn't necessary, here we will emit prologue to setup DRAP
8537 without stack realign adjustment */
8538 t = choose_baseaddr (0, NULL);
8539 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8540 }
8541
8542 /* Prevent instructions from being scheduled into register save push
8543 sequence when access to the redzone area is done through frame pointer.
8544 The offset between the frame pointer and the stack pointer is calculated
8545 relative to the value of the stack pointer at the end of the function
8546 prologue, and moving instructions that access redzone area via frame
8547 pointer inside push sequence violates this assumption. */
8548 if (frame_pointer_needed && frame.red_zone_size)
8549 emit_insn (gen_memory_blockage ());
8550
8551 /* SEH requires that the prologue end within 256 bytes of the start of
8552 the function. Prevent instruction schedules that would extend that.
8553 Further, prevent alloca modifications to the stack pointer from being
8554 combined with prologue modifications. */
8555 if (TARGET_SEH)
8556 emit_insn (gen_prologue_use (stack_pointer_rtx));
8557 }
8558
8559 /* Emit code to restore REG using a POP insn. */
8560
8561 static void
ix86_emit_restore_reg_using_pop(rtx reg)8562 ix86_emit_restore_reg_using_pop (rtx reg)
8563 {
8564 struct machine_function *m = cfun->machine;
8565 rtx_insn *insn = emit_insn (gen_pop (reg));
8566
8567 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
8568 m->fs.sp_offset -= UNITS_PER_WORD;
8569
8570 if (m->fs.cfa_reg == crtl->drap_reg
8571 && REGNO (reg) == REGNO (crtl->drap_reg))
8572 {
8573 /* Previously we'd represented the CFA as an expression
8574 like *(%ebp - 8). We've just popped that value from
8575 the stack, which means we need to reset the CFA to
8576 the drap register. This will remain until we restore
8577 the stack pointer. */
8578 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8579 RTX_FRAME_RELATED_P (insn) = 1;
8580
8581 /* This means that the DRAP register is valid for addressing too. */
8582 m->fs.drap_valid = true;
8583 return;
8584 }
8585
8586 if (m->fs.cfa_reg == stack_pointer_rtx)
8587 {
8588 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8589 x = gen_rtx_SET (stack_pointer_rtx, x);
8590 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
8591 RTX_FRAME_RELATED_P (insn) = 1;
8592
8593 m->fs.cfa_offset -= UNITS_PER_WORD;
8594 }
8595
8596 /* When the frame pointer is the CFA, and we pop it, we are
8597 swapping back to the stack pointer as the CFA. This happens
8598 for stack frames that don't allocate other data, so we assume
8599 the stack pointer is now pointing at the return address, i.e.
8600 the function entry state, which makes the offset be 1 word. */
8601 if (reg == hard_frame_pointer_rtx)
8602 {
8603 m->fs.fp_valid = false;
8604 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8605 {
8606 m->fs.cfa_reg = stack_pointer_rtx;
8607 m->fs.cfa_offset -= UNITS_PER_WORD;
8608
8609 add_reg_note (insn, REG_CFA_DEF_CFA,
8610 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8611 GEN_INT (m->fs.cfa_offset)));
8612 RTX_FRAME_RELATED_P (insn) = 1;
8613 }
8614 }
8615 }
8616
8617 /* Emit code to restore saved registers using POP insns. */
8618
8619 static void
ix86_emit_restore_regs_using_pop(void)8620 ix86_emit_restore_regs_using_pop (void)
8621 {
8622 unsigned int regno;
8623
8624 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8625 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
8626 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
8627 }
8628
8629 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
8630 omits the emit and only attaches the notes. */
8631
8632 static void
ix86_emit_leave(rtx_insn * insn)8633 ix86_emit_leave (rtx_insn *insn)
8634 {
8635 struct machine_function *m = cfun->machine;
8636
8637 if (!insn)
8638 insn = emit_insn (gen_leave (word_mode));
8639
8640 ix86_add_queued_cfa_restore_notes (insn);
8641
8642 gcc_assert (m->fs.fp_valid);
8643 m->fs.sp_valid = true;
8644 m->fs.sp_realigned = false;
8645 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
8646 m->fs.fp_valid = false;
8647
8648 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8649 {
8650 m->fs.cfa_reg = stack_pointer_rtx;
8651 m->fs.cfa_offset = m->fs.sp_offset;
8652
8653 add_reg_note (insn, REG_CFA_DEF_CFA,
8654 plus_constant (Pmode, stack_pointer_rtx,
8655 m->fs.sp_offset));
8656 RTX_FRAME_RELATED_P (insn) = 1;
8657 }
8658 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
8659 m->fs.fp_offset);
8660 }
8661
8662 /* Emit code to restore saved registers using MOV insns.
8663 First register is restored from CFA - CFA_OFFSET. */
8664 static void
ix86_emit_restore_regs_using_mov(HOST_WIDE_INT cfa_offset,bool maybe_eh_return)8665 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
8666 bool maybe_eh_return)
8667 {
8668 struct machine_function *m = cfun->machine;
8669 unsigned int regno;
8670
8671 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8672 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8673 {
8674 rtx reg = gen_rtx_REG (word_mode, regno);
8675 rtx mem;
8676 rtx_insn *insn;
8677
8678 mem = choose_baseaddr (cfa_offset, NULL);
8679 mem = gen_frame_mem (word_mode, mem);
8680 insn = emit_move_insn (reg, mem);
8681
8682 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8683 {
8684 /* Previously we'd represented the CFA as an expression
8685 like *(%ebp - 8). We've just popped that value from
8686 the stack, which means we need to reset the CFA to
8687 the drap register. This will remain until we restore
8688 the stack pointer. */
8689 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8690 RTX_FRAME_RELATED_P (insn) = 1;
8691
8692 /* This means that the DRAP register is valid for addressing. */
8693 m->fs.drap_valid = true;
8694 }
8695 else
8696 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8697
8698 cfa_offset -= UNITS_PER_WORD;
8699 }
8700 }
8701
8702 /* Emit code to restore saved registers using MOV insns.
8703 First register is restored from CFA - CFA_OFFSET. */
8704 static void
ix86_emit_restore_sse_regs_using_mov(HOST_WIDE_INT cfa_offset,bool maybe_eh_return)8705 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
8706 bool maybe_eh_return)
8707 {
8708 unsigned int regno;
8709
8710 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8711 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8712 {
8713 rtx reg = gen_rtx_REG (V4SFmode, regno);
8714 rtx mem;
8715 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
8716
8717 mem = choose_baseaddr (cfa_offset, &align);
8718 mem = gen_rtx_MEM (V4SFmode, mem);
8719
8720 /* The location aligment depends upon the base register. */
8721 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
8722 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
8723 set_mem_align (mem, align);
8724 emit_insn (gen_rtx_SET (reg, mem));
8725
8726 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8727
8728 cfa_offset -= GET_MODE_SIZE (V4SFmode);
8729 }
8730 }
8731
8732 static void
ix86_emit_outlined_ms2sysv_restore(const struct ix86_frame & frame,bool use_call,int style)8733 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
8734 bool use_call, int style)
8735 {
8736 struct machine_function *m = cfun->machine;
8737 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8738 + m->call_ms2sysv_extra_regs;
8739 rtvec v;
8740 unsigned int elems_needed, align, i, vi = 0;
8741 rtx_insn *insn;
8742 rtx sym, tmp;
8743 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
8744 rtx r10 = NULL_RTX;
8745 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8746 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
8747 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
8748 rtx rsi_frame_load = NULL_RTX;
8749 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
8750 enum xlogue_stub stub;
8751
8752 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
8753
8754 /* If using a realigned stack, we should never start with padding. */
8755 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
8756
8757 /* Setup RSI as the stub's base pointer. */
8758 align = GET_MODE_ALIGNMENT (V4SFmode);
8759 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
8760 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8761
8762 emit_insn (gen_rtx_SET (rsi, tmp));
8763
8764 /* Get a symbol for the stub. */
8765 if (frame_pointer_needed)
8766 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
8767 : XLOGUE_STUB_RESTORE_HFP_TAIL;
8768 else
8769 stub = use_call ? XLOGUE_STUB_RESTORE
8770 : XLOGUE_STUB_RESTORE_TAIL;
8771 sym = xlogue.get_stub_rtx (stub);
8772
8773 elems_needed = ncregs;
8774 if (use_call)
8775 elems_needed += 1;
8776 else
8777 elems_needed += frame_pointer_needed ? 5 : 3;
8778 v = rtvec_alloc (elems_needed);
8779
8780 /* We call the epilogue stub when we need to pop incoming args or we are
8781 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
8782 epilogue stub and it is the tail-call. */
8783 if (use_call)
8784 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8785 else
8786 {
8787 RTVEC_ELT (v, vi++) = ret_rtx;
8788 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8789 if (frame_pointer_needed)
8790 {
8791 rtx rbp = gen_rtx_REG (DImode, BP_REG);
8792 gcc_assert (m->fs.fp_valid);
8793 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
8794
8795 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
8796 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
8797 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
8798 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8799 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
8800 }
8801 else
8802 {
8803 /* If no hard frame pointer, we set R10 to the SP restore value. */
8804 gcc_assert (!m->fs.fp_valid);
8805 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8806 gcc_assert (m->fs.sp_valid);
8807
8808 r10 = gen_rtx_REG (DImode, R10_REG);
8809 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
8810 emit_insn (gen_rtx_SET (r10, tmp));
8811
8812 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
8813 }
8814 }
8815
8816 /* Generate frame load insns and restore notes. */
8817 for (i = 0; i < ncregs; ++i)
8818 {
8819 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8820 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
8821 rtx reg, frame_load;
8822
8823 reg = gen_rtx_REG (mode, r.regno);
8824 frame_load = gen_frame_load (reg, rsi, r.offset);
8825
8826 /* Save RSI frame load insn & note to add last. */
8827 if (r.regno == SI_REG)
8828 {
8829 gcc_assert (!rsi_frame_load);
8830 rsi_frame_load = frame_load;
8831 rsi_restore_offset = r.offset;
8832 }
8833 else
8834 {
8835 RTVEC_ELT (v, vi++) = frame_load;
8836 ix86_add_cfa_restore_note (NULL, reg, r.offset);
8837 }
8838 }
8839
8840 /* Add RSI frame load & restore note at the end. */
8841 gcc_assert (rsi_frame_load);
8842 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
8843 RTVEC_ELT (v, vi++) = rsi_frame_load;
8844 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
8845 rsi_restore_offset);
8846
8847 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
8848 if (!use_call && !frame_pointer_needed)
8849 {
8850 gcc_assert (m->fs.sp_valid);
8851 gcc_assert (!m->fs.sp_realigned);
8852
8853 /* At this point, R10 should point to frame.stack_realign_offset. */
8854 if (m->fs.cfa_reg == stack_pointer_rtx)
8855 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
8856 m->fs.sp_offset = frame.stack_realign_offset;
8857 }
8858
8859 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
8860 tmp = gen_rtx_PARALLEL (VOIDmode, v);
8861 if (use_call)
8862 insn = emit_insn (tmp);
8863 else
8864 {
8865 insn = emit_jump_insn (tmp);
8866 JUMP_LABEL (insn) = ret_rtx;
8867
8868 if (frame_pointer_needed)
8869 ix86_emit_leave (insn);
8870 else
8871 {
8872 /* Need CFA adjust note. */
8873 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
8874 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
8875 }
8876 }
8877
8878 RTX_FRAME_RELATED_P (insn) = true;
8879 ix86_add_queued_cfa_restore_notes (insn);
8880
8881 /* If we're not doing a tail-call, we need to adjust the stack. */
8882 if (use_call && m->fs.sp_valid)
8883 {
8884 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
8885 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8886 GEN_INT (dealloc), style,
8887 m->fs.cfa_reg == stack_pointer_rtx);
8888 }
8889 }
8890
8891 /* Restore function stack, frame, and registers. */
8892
8893 void
ix86_expand_epilogue(int style)8894 ix86_expand_epilogue (int style)
8895 {
8896 struct machine_function *m = cfun->machine;
8897 struct machine_frame_state frame_state_save = m->fs;
8898 bool restore_regs_via_mov;
8899 bool using_drap;
8900 bool restore_stub_is_tail = false;
8901
8902 if (ix86_function_naked (current_function_decl))
8903 {
8904 /* The program should not reach this point. */
8905 emit_insn (gen_ud2 ());
8906 return;
8907 }
8908
8909 ix86_finalize_stack_frame_flags ();
8910 const struct ix86_frame &frame = cfun->machine->frame;
8911
8912 m->fs.sp_realigned = stack_realign_fp;
8913 m->fs.sp_valid = stack_realign_fp
8914 || !frame_pointer_needed
8915 || crtl->sp_is_unchanging;
8916 gcc_assert (!m->fs.sp_valid
8917 || m->fs.sp_offset == frame.stack_pointer_offset);
8918
8919 /* The FP must be valid if the frame pointer is present. */
8920 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
8921 gcc_assert (!m->fs.fp_valid
8922 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
8923
8924 /* We must have *some* valid pointer to the stack frame. */
8925 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
8926
8927 /* The DRAP is never valid at this point. */
8928 gcc_assert (!m->fs.drap_valid);
8929
8930 /* See the comment about red zone and frame
8931 pointer usage in ix86_expand_prologue. */
8932 if (frame_pointer_needed && frame.red_zone_size)
8933 emit_insn (gen_memory_blockage ());
8934
8935 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8936 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
8937
8938 /* Determine the CFA offset of the end of the red-zone. */
8939 m->fs.red_zone_offset = 0;
8940 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
8941 {
8942 /* The red-zone begins below return address and error code in
8943 exception handler. */
8944 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
8945
8946 /* When the register save area is in the aligned portion of
8947 the stack, determine the maximum runtime displacement that
8948 matches up with the aligned frame. */
8949 if (stack_realign_drap)
8950 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
8951 + UNITS_PER_WORD);
8952 }
8953
8954 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
8955
8956 /* Special care must be taken for the normal return case of a function
8957 using eh_return: the eax and edx registers are marked as saved, but
8958 not restored along this path. Adjust the save location to match. */
8959 if (crtl->calls_eh_return && style != 2)
8960 reg_save_offset -= 2 * UNITS_PER_WORD;
8961
8962 /* EH_RETURN requires the use of moves to function properly. */
8963 if (crtl->calls_eh_return)
8964 restore_regs_via_mov = true;
8965 /* SEH requires the use of pops to identify the epilogue. */
8966 else if (TARGET_SEH)
8967 restore_regs_via_mov = false;
8968 /* If we're only restoring one register and sp cannot be used then
8969 using a move instruction to restore the register since it's
8970 less work than reloading sp and popping the register. */
8971 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
8972 restore_regs_via_mov = true;
8973 else if (TARGET_EPILOGUE_USING_MOVE
8974 && cfun->machine->use_fast_prologue_epilogue
8975 && (frame.nregs > 1
8976 || m->fs.sp_offset != reg_save_offset))
8977 restore_regs_via_mov = true;
8978 else if (frame_pointer_needed
8979 && !frame.nregs
8980 && m->fs.sp_offset != reg_save_offset)
8981 restore_regs_via_mov = true;
8982 else if (frame_pointer_needed
8983 && TARGET_USE_LEAVE
8984 && cfun->machine->use_fast_prologue_epilogue
8985 && frame.nregs == 1)
8986 restore_regs_via_mov = true;
8987 else
8988 restore_regs_via_mov = false;
8989
8990 if (restore_regs_via_mov || frame.nsseregs)
8991 {
8992 /* Ensure that the entire register save area is addressable via
8993 the stack pointer, if we will restore SSE regs via sp. */
8994 if (TARGET_64BIT
8995 && m->fs.sp_offset > 0x7fffffff
8996 && sp_valid_at (frame.stack_realign_offset + 1)
8997 && (frame.nsseregs + frame.nregs) != 0)
8998 {
8999 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9000 GEN_INT (m->fs.sp_offset
9001 - frame.sse_reg_save_offset),
9002 style,
9003 m->fs.cfa_reg == stack_pointer_rtx);
9004 }
9005 }
9006
9007 /* If there are any SSE registers to restore, then we have to do it
9008 via moves, since there's obviously no pop for SSE regs. */
9009 if (frame.nsseregs)
9010 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
9011 style == 2);
9012
9013 if (m->call_ms2sysv)
9014 {
9015 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9016
9017 /* We cannot use a tail-call for the stub if:
9018 1. We have to pop incoming args,
9019 2. We have additional int regs to restore, or
9020 3. A sibling call will be the tail-call, or
9021 4. We are emitting an eh_return_internal epilogue.
9022
9023 TODO: Item 4 has not yet tested!
9024
9025 If any of the above are true, we will call the stub rather than
9026 jump to it. */
9027 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9028 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
9029 }
9030
9031 /* If using out-of-line stub that is a tail-call, then...*/
9032 if (m->call_ms2sysv && restore_stub_is_tail)
9033 {
9034 /* TODO: parinoid tests. (remove eventually) */
9035 gcc_assert (m->fs.sp_valid);
9036 gcc_assert (!m->fs.sp_realigned);
9037 gcc_assert (!m->fs.fp_valid);
9038 gcc_assert (!m->fs.realigned);
9039 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9040 gcc_assert (!crtl->drap_reg);
9041 gcc_assert (!frame.nregs);
9042 }
9043 else if (restore_regs_via_mov)
9044 {
9045 rtx t;
9046
9047 if (frame.nregs)
9048 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
9049
9050 /* eh_return epilogues need %ecx added to the stack pointer. */
9051 if (style == 2)
9052 {
9053 rtx sa = EH_RETURN_STACKADJ_RTX;
9054 rtx_insn *insn;
9055
9056 /* %ecx can't be used for both DRAP register and eh_return. */
9057 if (crtl->drap_reg)
9058 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
9059
9060 /* regparm nested functions don't work with eh_return. */
9061 gcc_assert (!ix86_static_chain_on_stack);
9062
9063 if (frame_pointer_needed)
9064 {
9065 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9066 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9067 emit_insn (gen_rtx_SET (sa, t));
9068
9069 /* NB: eh_return epilogues must restore the frame pointer
9070 in word_mode since the upper 32 bits of RBP register
9071 can have any values. */
9072 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9073 rtx frame_reg = gen_rtx_REG (word_mode,
9074 HARD_FRAME_POINTER_REGNUM);
9075 insn = emit_move_insn (frame_reg, t);
9076
9077 /* Note that we use SA as a temporary CFA, as the return
9078 address is at the proper place relative to it. We
9079 pretend this happens at the FP restore insn because
9080 prior to this insn the FP would be stored at the wrong
9081 offset relative to SA, and after this insn we have no
9082 other reasonable register to use for the CFA. We don't
9083 bother resetting the CFA to the SP for the duration of
9084 the return insn, unless the control flow instrumentation
9085 is done. In this case the SP is used later and we have
9086 to reset CFA to SP. */
9087 add_reg_note (insn, REG_CFA_DEF_CFA,
9088 plus_constant (Pmode, sa, UNITS_PER_WORD));
9089 ix86_add_queued_cfa_restore_notes (insn);
9090 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9091 RTX_FRAME_RELATED_P (insn) = 1;
9092
9093 m->fs.cfa_reg = sa;
9094 m->fs.cfa_offset = UNITS_PER_WORD;
9095 m->fs.fp_valid = false;
9096
9097 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9098 const0_rtx, style,
9099 flag_cf_protection);
9100 }
9101 else
9102 {
9103 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9104 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
9105 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
9106 ix86_add_queued_cfa_restore_notes (insn);
9107
9108 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9109 if (m->fs.cfa_offset != UNITS_PER_WORD)
9110 {
9111 m->fs.cfa_offset = UNITS_PER_WORD;
9112 add_reg_note (insn, REG_CFA_DEF_CFA,
9113 plus_constant (Pmode, stack_pointer_rtx,
9114 UNITS_PER_WORD));
9115 RTX_FRAME_RELATED_P (insn) = 1;
9116 }
9117 }
9118 m->fs.sp_offset = UNITS_PER_WORD;
9119 m->fs.sp_valid = true;
9120 m->fs.sp_realigned = false;
9121 }
9122 }
9123 else
9124 {
9125 /* SEH requires that the function end with (1) a stack adjustment
9126 if necessary, (2) a sequence of pops, and (3) a return or
9127 jump instruction. Prevent insns from the function body from
9128 being scheduled into this sequence. */
9129 if (TARGET_SEH)
9130 {
9131 /* Prevent a catch region from being adjacent to the standard
9132 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
9133 nor several other flags that would be interesting to test are
9134 set up yet. */
9135 if (flag_non_call_exceptions)
9136 emit_insn (gen_nops (const1_rtx));
9137 else
9138 emit_insn (gen_blockage ());
9139 }
9140
9141 /* First step is to deallocate the stack frame so that we can
9142 pop the registers. If the stack pointer was realigned, it needs
9143 to be restored now. Also do it on SEH target for very large
9144 frame as the emitted instructions aren't allowed by the ABI
9145 in epilogues. */
9146 if (!m->fs.sp_valid || m->fs.sp_realigned
9147 || (TARGET_SEH
9148 && (m->fs.sp_offset - reg_save_offset
9149 >= SEH_MAX_FRAME_SIZE)))
9150 {
9151 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
9152 GEN_INT (m->fs.fp_offset
9153 - reg_save_offset),
9154 style, false);
9155 }
9156 else if (m->fs.sp_offset != reg_save_offset)
9157 {
9158 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9159 GEN_INT (m->fs.sp_offset
9160 - reg_save_offset),
9161 style,
9162 m->fs.cfa_reg == stack_pointer_rtx);
9163 }
9164
9165 ix86_emit_restore_regs_using_pop ();
9166 }
9167
9168 /* If we used a stack pointer and haven't already got rid of it,
9169 then do so now. */
9170 if (m->fs.fp_valid)
9171 {
9172 /* If the stack pointer is valid and pointing at the frame
9173 pointer store address, then we only need a pop. */
9174 if (sp_valid_at (frame.hfp_save_offset)
9175 && m->fs.sp_offset == frame.hfp_save_offset)
9176 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9177 /* Leave results in shorter dependency chains on CPUs that are
9178 able to grok it fast. */
9179 else if (TARGET_USE_LEAVE
9180 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
9181 || !cfun->machine->use_fast_prologue_epilogue)
9182 ix86_emit_leave (NULL);
9183 else
9184 {
9185 pro_epilogue_adjust_stack (stack_pointer_rtx,
9186 hard_frame_pointer_rtx,
9187 const0_rtx, style, !using_drap);
9188 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9189 }
9190 }
9191
9192 if (using_drap)
9193 {
9194 int param_ptr_offset = UNITS_PER_WORD;
9195 rtx_insn *insn;
9196
9197 gcc_assert (stack_realign_drap);
9198
9199 if (ix86_static_chain_on_stack)
9200 param_ptr_offset += UNITS_PER_WORD;
9201 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9202 param_ptr_offset += UNITS_PER_WORD;
9203
9204 insn = emit_insn (gen_rtx_SET
9205 (stack_pointer_rtx,
9206 gen_rtx_PLUS (Pmode,
9207 crtl->drap_reg,
9208 GEN_INT (-param_ptr_offset))));
9209 m->fs.cfa_reg = stack_pointer_rtx;
9210 m->fs.cfa_offset = param_ptr_offset;
9211 m->fs.sp_offset = param_ptr_offset;
9212 m->fs.realigned = false;
9213
9214 add_reg_note (insn, REG_CFA_DEF_CFA,
9215 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9216 GEN_INT (param_ptr_offset)));
9217 RTX_FRAME_RELATED_P (insn) = 1;
9218
9219 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9220 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
9221 }
9222
9223 /* At this point the stack pointer must be valid, and we must have
9224 restored all of the registers. We may not have deallocated the
9225 entire stack frame. We've delayed this until now because it may
9226 be possible to merge the local stack deallocation with the
9227 deallocation forced by ix86_static_chain_on_stack. */
9228 gcc_assert (m->fs.sp_valid);
9229 gcc_assert (!m->fs.sp_realigned);
9230 gcc_assert (!m->fs.fp_valid);
9231 gcc_assert (!m->fs.realigned);
9232 if (m->fs.sp_offset != UNITS_PER_WORD)
9233 {
9234 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9235 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
9236 style, true);
9237 }
9238 else
9239 ix86_add_queued_cfa_restore_notes (get_last_insn ());
9240
9241 /* Sibcall epilogues don't want a return instruction. */
9242 if (style == 0)
9243 {
9244 m->fs = frame_state_save;
9245 return;
9246 }
9247
9248 if (cfun->machine->func_type != TYPE_NORMAL)
9249 emit_jump_insn (gen_interrupt_return ());
9250 else if (crtl->args.pops_args && crtl->args.size)
9251 {
9252 rtx popc = GEN_INT (crtl->args.pops_args);
9253
9254 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9255 address, do explicit add, and jump indirectly to the caller. */
9256
9257 if (crtl->args.pops_args >= 65536)
9258 {
9259 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9260 rtx_insn *insn;
9261
9262 /* There is no "pascal" calling convention in any 64bit ABI. */
9263 gcc_assert (!TARGET_64BIT);
9264
9265 insn = emit_insn (gen_pop (ecx));
9266 m->fs.cfa_offset -= UNITS_PER_WORD;
9267 m->fs.sp_offset -= UNITS_PER_WORD;
9268
9269 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9270 x = gen_rtx_SET (stack_pointer_rtx, x);
9271 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9272 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9273 RTX_FRAME_RELATED_P (insn) = 1;
9274
9275 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9276 popc, -1, true);
9277 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9278 }
9279 else
9280 emit_jump_insn (gen_simple_return_pop_internal (popc));
9281 }
9282 else if (!m->call_ms2sysv || !restore_stub_is_tail)
9283 {
9284 /* In case of return from EH a simple return cannot be used
9285 as a return address will be compared with a shadow stack
9286 return address. Use indirect jump instead. */
9287 if (style == 2 && flag_cf_protection)
9288 {
9289 /* Register used in indirect jump must be in word_mode. But
9290 Pmode may not be the same as word_mode for x32. */
9291 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9292 rtx_insn *insn;
9293
9294 insn = emit_insn (gen_pop (ecx));
9295 m->fs.cfa_offset -= UNITS_PER_WORD;
9296 m->fs.sp_offset -= UNITS_PER_WORD;
9297
9298 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9299 x = gen_rtx_SET (stack_pointer_rtx, x);
9300 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9301 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9302 RTX_FRAME_RELATED_P (insn) = 1;
9303
9304 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9305 }
9306 else
9307 emit_jump_insn (gen_simple_return_internal ());
9308 }
9309
9310 /* Restore the state back to the state from the prologue,
9311 so that it's correct for the next epilogue. */
9312 m->fs = frame_state_save;
9313 }
9314
9315 /* Reset from the function's potential modifications. */
9316
9317 static void
ix86_output_function_epilogue(FILE * file ATTRIBUTE_UNUSED)9318 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9319 {
9320 if (pic_offset_table_rtx
9321 && !ix86_use_pseudo_pic_reg ())
9322 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9323
9324 if (TARGET_MACHO)
9325 {
9326 rtx_insn *insn = get_last_insn ();
9327 rtx_insn *deleted_debug_label = NULL;
9328
9329 /* Mach-O doesn't support labels at the end of objects, so if
9330 it looks like we might want one, take special action.
9331 First, collect any sequence of deleted debug labels. */
9332 while (insn
9333 && NOTE_P (insn)
9334 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9335 {
9336 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9337 notes only, instead set their CODE_LABEL_NUMBER to -1,
9338 otherwise there would be code generation differences
9339 in between -g and -g0. */
9340 if (NOTE_P (insn) && NOTE_KIND (insn)
9341 == NOTE_INSN_DELETED_DEBUG_LABEL)
9342 deleted_debug_label = insn;
9343 insn = PREV_INSN (insn);
9344 }
9345
9346 /* If we have:
9347 label:
9348 barrier
9349 then this needs to be detected, so skip past the barrier. */
9350
9351 if (insn && BARRIER_P (insn))
9352 insn = PREV_INSN (insn);
9353
9354 /* Up to now we've only seen notes or barriers. */
9355 if (insn)
9356 {
9357 if (LABEL_P (insn)
9358 || (NOTE_P (insn)
9359 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9360 /* Trailing label. */
9361 fputs ("\tnop\n", file);
9362 else if (cfun && ! cfun->is_thunk)
9363 {
9364 /* See if we have a completely empty function body, skipping
9365 the special case of the picbase thunk emitted as asm. */
9366 while (insn && ! INSN_P (insn))
9367 insn = PREV_INSN (insn);
9368 /* If we don't find any insns, we've got an empty function body;
9369 I.e. completely empty - without a return or branch. This is
9370 taken as the case where a function body has been removed
9371 because it contains an inline __builtin_unreachable(). GCC
9372 declares that reaching __builtin_unreachable() means UB so
9373 we're not obliged to do anything special; however, we want
9374 non-zero-sized function bodies. To meet this, and help the
9375 user out, let's trap the case. */
9376 if (insn == NULL)
9377 fputs ("\tud2\n", file);
9378 }
9379 }
9380 else if (deleted_debug_label)
9381 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9382 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9383 CODE_LABEL_NUMBER (insn) = -1;
9384 }
9385 }
9386
9387 /* Return a scratch register to use in the split stack prologue. The
9388 split stack prologue is used for -fsplit-stack. It is the first
9389 instructions in the function, even before the regular prologue.
9390 The scratch register can be any caller-saved register which is not
9391 used for parameters or for the static chain. */
9392
9393 static unsigned int
split_stack_prologue_scratch_regno(void)9394 split_stack_prologue_scratch_regno (void)
9395 {
9396 if (TARGET_64BIT)
9397 return R11_REG;
9398 else
9399 {
9400 bool is_fastcall, is_thiscall;
9401 int regparm;
9402
9403 is_fastcall = (lookup_attribute ("fastcall",
9404 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9405 != NULL);
9406 is_thiscall = (lookup_attribute ("thiscall",
9407 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9408 != NULL);
9409 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9410
9411 if (is_fastcall)
9412 {
9413 if (DECL_STATIC_CHAIN (cfun->decl))
9414 {
9415 sorry ("%<-fsplit-stack%> does not support fastcall with "
9416 "nested function");
9417 return INVALID_REGNUM;
9418 }
9419 return AX_REG;
9420 }
9421 else if (is_thiscall)
9422 {
9423 if (!DECL_STATIC_CHAIN (cfun->decl))
9424 return DX_REG;
9425 return AX_REG;
9426 }
9427 else if (regparm < 3)
9428 {
9429 if (!DECL_STATIC_CHAIN (cfun->decl))
9430 return CX_REG;
9431 else
9432 {
9433 if (regparm >= 2)
9434 {
9435 sorry ("%<-fsplit-stack%> does not support 2 register "
9436 "parameters for a nested function");
9437 return INVALID_REGNUM;
9438 }
9439 return DX_REG;
9440 }
9441 }
9442 else
9443 {
9444 /* FIXME: We could make this work by pushing a register
9445 around the addition and comparison. */
9446 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9447 return INVALID_REGNUM;
9448 }
9449 }
9450 }
9451
9452 /* A SYMBOL_REF for the function which allocates new stackspace for
9453 -fsplit-stack. */
9454
9455 static GTY(()) rtx split_stack_fn;
9456
9457 /* A SYMBOL_REF for the more stack function when using the large
9458 model. */
9459
9460 static GTY(()) rtx split_stack_fn_large;
9461
9462 /* Return location of the stack guard value in the TLS block. */
9463
9464 rtx
ix86_split_stack_guard(void)9465 ix86_split_stack_guard (void)
9466 {
9467 int offset;
9468 addr_space_t as = DEFAULT_TLS_SEG_REG;
9469 rtx r;
9470
9471 gcc_assert (flag_split_stack);
9472
9473 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9474 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9475 #else
9476 gcc_unreachable ();
9477 #endif
9478
9479 r = GEN_INT (offset);
9480 r = gen_const_mem (Pmode, r);
9481 set_mem_addr_space (r, as);
9482
9483 return r;
9484 }
9485
9486 /* Handle -fsplit-stack. These are the first instructions in the
9487 function, even before the regular prologue. */
9488
9489 void
ix86_expand_split_stack_prologue(void)9490 ix86_expand_split_stack_prologue (void)
9491 {
9492 HOST_WIDE_INT allocate;
9493 unsigned HOST_WIDE_INT args_size;
9494 rtx_code_label *label;
9495 rtx limit, current, allocate_rtx, call_fusage;
9496 rtx_insn *call_insn;
9497 rtx scratch_reg = NULL_RTX;
9498 rtx_code_label *varargs_label = NULL;
9499 rtx fn;
9500
9501 gcc_assert (flag_split_stack && reload_completed);
9502
9503 ix86_finalize_stack_frame_flags ();
9504 struct ix86_frame &frame = cfun->machine->frame;
9505 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
9506
9507 /* This is the label we will branch to if we have enough stack
9508 space. We expect the basic block reordering pass to reverse this
9509 branch if optimizing, so that we branch in the unlikely case. */
9510 label = gen_label_rtx ();
9511
9512 /* We need to compare the stack pointer minus the frame size with
9513 the stack boundary in the TCB. The stack boundary always gives
9514 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9515 can compare directly. Otherwise we need to do an addition. */
9516
9517 limit = ix86_split_stack_guard ();
9518
9519 if (allocate < SPLIT_STACK_AVAILABLE)
9520 current = stack_pointer_rtx;
9521 else
9522 {
9523 unsigned int scratch_regno;
9524 rtx offset;
9525
9526 /* We need a scratch register to hold the stack pointer minus
9527 the required frame size. Since this is the very start of the
9528 function, the scratch register can be any caller-saved
9529 register which is not used for parameters. */
9530 offset = GEN_INT (- allocate);
9531 scratch_regno = split_stack_prologue_scratch_regno ();
9532 if (scratch_regno == INVALID_REGNUM)
9533 return;
9534 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9535 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
9536 {
9537 /* We don't use gen_add in this case because it will
9538 want to split to lea, but when not optimizing the insn
9539 will not be split after this point. */
9540 emit_insn (gen_rtx_SET (scratch_reg,
9541 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9542 offset)));
9543 }
9544 else
9545 {
9546 emit_move_insn (scratch_reg, offset);
9547 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
9548 }
9549 current = scratch_reg;
9550 }
9551
9552 ix86_expand_branch (GEU, current, limit, label);
9553 rtx_insn *jump_insn = get_last_insn ();
9554 JUMP_LABEL (jump_insn) = label;
9555
9556 /* Mark the jump as very likely to be taken. */
9557 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
9558
9559 if (split_stack_fn == NULL_RTX)
9560 {
9561 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
9562 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
9563 }
9564 fn = split_stack_fn;
9565
9566 /* Get more stack space. We pass in the desired stack space and the
9567 size of the arguments to copy to the new stack. In 32-bit mode
9568 we push the parameters; __morestack will return on a new stack
9569 anyhow. In 64-bit mode we pass the parameters in r10 and
9570 r11. */
9571 allocate_rtx = GEN_INT (allocate);
9572 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
9573 call_fusage = NULL_RTX;
9574 rtx pop = NULL_RTX;
9575 if (TARGET_64BIT)
9576 {
9577 rtx reg10, reg11;
9578
9579 reg10 = gen_rtx_REG (Pmode, R10_REG);
9580 reg11 = gen_rtx_REG (Pmode, R11_REG);
9581
9582 /* If this function uses a static chain, it will be in %r10.
9583 Preserve it across the call to __morestack. */
9584 if (DECL_STATIC_CHAIN (cfun->decl))
9585 {
9586 rtx rax;
9587
9588 rax = gen_rtx_REG (word_mode, AX_REG);
9589 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
9590 use_reg (&call_fusage, rax);
9591 }
9592
9593 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
9594 && !TARGET_PECOFF)
9595 {
9596 HOST_WIDE_INT argval;
9597
9598 gcc_assert (Pmode == DImode);
9599 /* When using the large model we need to load the address
9600 into a register, and we've run out of registers. So we
9601 switch to a different calling convention, and we call a
9602 different function: __morestack_large. We pass the
9603 argument size in the upper 32 bits of r10 and pass the
9604 frame size in the lower 32 bits. */
9605 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
9606 gcc_assert ((args_size & 0xffffffff) == args_size);
9607
9608 if (split_stack_fn_large == NULL_RTX)
9609 {
9610 split_stack_fn_large
9611 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
9612 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
9613 }
9614 if (ix86_cmodel == CM_LARGE_PIC)
9615 {
9616 rtx_code_label *label;
9617 rtx x;
9618
9619 label = gen_label_rtx ();
9620 emit_label (label);
9621 LABEL_PRESERVE_P (label) = 1;
9622 emit_insn (gen_set_rip_rex64 (reg10, label));
9623 emit_insn (gen_set_got_offset_rex64 (reg11, label));
9624 emit_insn (gen_add2_insn (reg10, reg11));
9625 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
9626 UNSPEC_GOT);
9627 x = gen_rtx_CONST (Pmode, x);
9628 emit_move_insn (reg11, x);
9629 x = gen_rtx_PLUS (Pmode, reg10, reg11);
9630 x = gen_const_mem (Pmode, x);
9631 emit_move_insn (reg11, x);
9632 }
9633 else
9634 emit_move_insn (reg11, split_stack_fn_large);
9635
9636 fn = reg11;
9637
9638 argval = ((args_size << 16) << 16) + allocate;
9639 emit_move_insn (reg10, GEN_INT (argval));
9640 }
9641 else
9642 {
9643 emit_move_insn (reg10, allocate_rtx);
9644 emit_move_insn (reg11, GEN_INT (args_size));
9645 use_reg (&call_fusage, reg11);
9646 }
9647
9648 use_reg (&call_fusage, reg10);
9649 }
9650 else
9651 {
9652 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
9653 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
9654 insn = emit_insn (gen_push (allocate_rtx));
9655 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
9656 pop = GEN_INT (2 * UNITS_PER_WORD);
9657 }
9658 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
9659 GEN_INT (UNITS_PER_WORD), constm1_rtx,
9660 pop, false);
9661 add_function_usage_to (call_insn, call_fusage);
9662 if (!TARGET_64BIT)
9663 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
9664 /* Indicate that this function can't jump to non-local gotos. */
9665 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
9666
9667 /* In order to make call/return prediction work right, we now need
9668 to execute a return instruction. See
9669 libgcc/config/i386/morestack.S for the details on how this works.
9670
9671 For flow purposes gcc must not see this as a return
9672 instruction--we need control flow to continue at the subsequent
9673 label. Therefore, we use an unspec. */
9674 gcc_assert (crtl->args.pops_args < 65536);
9675 rtx_insn *ret_insn
9676 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
9677
9678 if ((flag_cf_protection & CF_BRANCH))
9679 {
9680 /* Insert ENDBR since __morestack will jump back here via indirect
9681 call. */
9682 rtx cet_eb = gen_nop_endbr ();
9683 emit_insn_after (cet_eb, ret_insn);
9684 }
9685
9686 /* If we are in 64-bit mode and this function uses a static chain,
9687 we saved %r10 in %rax before calling _morestack. */
9688 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
9689 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9690 gen_rtx_REG (word_mode, AX_REG));
9691
9692 /* If this function calls va_start, we need to store a pointer to
9693 the arguments on the old stack, because they may not have been
9694 all copied to the new stack. At this point the old stack can be
9695 found at the frame pointer value used by __morestack, because
9696 __morestack has set that up before calling back to us. Here we
9697 store that pointer in a scratch register, and in
9698 ix86_expand_prologue we store the scratch register in a stack
9699 slot. */
9700 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9701 {
9702 unsigned int scratch_regno;
9703 rtx frame_reg;
9704 int words;
9705
9706 scratch_regno = split_stack_prologue_scratch_regno ();
9707 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9708 frame_reg = gen_rtx_REG (Pmode, BP_REG);
9709
9710 /* 64-bit:
9711 fp -> old fp value
9712 return address within this function
9713 return address of caller of this function
9714 stack arguments
9715 So we add three words to get to the stack arguments.
9716
9717 32-bit:
9718 fp -> old fp value
9719 return address within this function
9720 first argument to __morestack
9721 second argument to __morestack
9722 return address of caller of this function
9723 stack arguments
9724 So we add five words to get to the stack arguments.
9725 */
9726 words = TARGET_64BIT ? 3 : 5;
9727 emit_insn (gen_rtx_SET (scratch_reg,
9728 gen_rtx_PLUS (Pmode, frame_reg,
9729 GEN_INT (words * UNITS_PER_WORD))));
9730
9731 varargs_label = gen_label_rtx ();
9732 emit_jump_insn (gen_jump (varargs_label));
9733 JUMP_LABEL (get_last_insn ()) = varargs_label;
9734
9735 emit_barrier ();
9736 }
9737
9738 emit_label (label);
9739 LABEL_NUSES (label) = 1;
9740
9741 /* If this function calls va_start, we now have to set the scratch
9742 register for the case where we do not call __morestack. In this
9743 case we need to set it based on the stack pointer. */
9744 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9745 {
9746 emit_insn (gen_rtx_SET (scratch_reg,
9747 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9748 GEN_INT (UNITS_PER_WORD))));
9749
9750 emit_label (varargs_label);
9751 LABEL_NUSES (varargs_label) = 1;
9752 }
9753 }
9754
9755 /* We may have to tell the dataflow pass that the split stack prologue
9756 is initializing a scratch register. */
9757
9758 static void
ix86_live_on_entry(bitmap regs)9759 ix86_live_on_entry (bitmap regs)
9760 {
9761 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9762 {
9763 gcc_assert (flag_split_stack);
9764 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
9765 }
9766 }
9767
9768 /* Extract the parts of an RTL expression that is a valid memory address
9769 for an instruction. Return 0 if the structure of the address is
9770 grossly off. Return -1 if the address contains ASHIFT, so it is not
9771 strictly valid, but still used for computing length of lea instruction. */
9772
9773 int
ix86_decompose_address(rtx addr,struct ix86_address * out)9774 ix86_decompose_address (rtx addr, struct ix86_address *out)
9775 {
9776 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9777 rtx base_reg, index_reg;
9778 HOST_WIDE_INT scale = 1;
9779 rtx scale_rtx = NULL_RTX;
9780 rtx tmp;
9781 int retval = 1;
9782 addr_space_t seg = ADDR_SPACE_GENERIC;
9783
9784 /* Allow zero-extended SImode addresses,
9785 they will be emitted with addr32 prefix. */
9786 if (TARGET_64BIT && GET_MODE (addr) == DImode)
9787 {
9788 if (GET_CODE (addr) == ZERO_EXTEND
9789 && GET_MODE (XEXP (addr, 0)) == SImode)
9790 {
9791 addr = XEXP (addr, 0);
9792 if (CONST_INT_P (addr))
9793 return 0;
9794 }
9795 else if (GET_CODE (addr) == AND
9796 && const_32bit_mask (XEXP (addr, 1), DImode))
9797 {
9798 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
9799 if (addr == NULL_RTX)
9800 return 0;
9801
9802 if (CONST_INT_P (addr))
9803 return 0;
9804 }
9805 }
9806
9807 /* Allow SImode subregs of DImode addresses,
9808 they will be emitted with addr32 prefix. */
9809 if (TARGET_64BIT && GET_MODE (addr) == SImode)
9810 {
9811 if (SUBREG_P (addr)
9812 && GET_MODE (SUBREG_REG (addr)) == DImode)
9813 {
9814 addr = SUBREG_REG (addr);
9815 if (CONST_INT_P (addr))
9816 return 0;
9817 }
9818 }
9819
9820 if (REG_P (addr))
9821 base = addr;
9822 else if (SUBREG_P (addr))
9823 {
9824 if (REG_P (SUBREG_REG (addr)))
9825 base = addr;
9826 else
9827 return 0;
9828 }
9829 else if (GET_CODE (addr) == PLUS)
9830 {
9831 rtx addends[4], op;
9832 int n = 0, i;
9833
9834 op = addr;
9835 do
9836 {
9837 if (n >= 4)
9838 return 0;
9839 addends[n++] = XEXP (op, 1);
9840 op = XEXP (op, 0);
9841 }
9842 while (GET_CODE (op) == PLUS);
9843 if (n >= 4)
9844 return 0;
9845 addends[n] = op;
9846
9847 for (i = n; i >= 0; --i)
9848 {
9849 op = addends[i];
9850 switch (GET_CODE (op))
9851 {
9852 case MULT:
9853 if (index)
9854 return 0;
9855 index = XEXP (op, 0);
9856 scale_rtx = XEXP (op, 1);
9857 break;
9858
9859 case ASHIFT:
9860 if (index)
9861 return 0;
9862 index = XEXP (op, 0);
9863 tmp = XEXP (op, 1);
9864 if (!CONST_INT_P (tmp))
9865 return 0;
9866 scale = INTVAL (tmp);
9867 if ((unsigned HOST_WIDE_INT) scale > 3)
9868 return 0;
9869 scale = 1 << scale;
9870 break;
9871
9872 case ZERO_EXTEND:
9873 op = XEXP (op, 0);
9874 if (GET_CODE (op) != UNSPEC)
9875 return 0;
9876 /* FALLTHRU */
9877
9878 case UNSPEC:
9879 if (XINT (op, 1) == UNSPEC_TP
9880 && TARGET_TLS_DIRECT_SEG_REFS
9881 && seg == ADDR_SPACE_GENERIC)
9882 seg = DEFAULT_TLS_SEG_REG;
9883 else
9884 return 0;
9885 break;
9886
9887 case SUBREG:
9888 if (!REG_P (SUBREG_REG (op)))
9889 return 0;
9890 /* FALLTHRU */
9891
9892 case REG:
9893 if (!base)
9894 base = op;
9895 else if (!index)
9896 index = op;
9897 else
9898 return 0;
9899 break;
9900
9901 case CONST:
9902 case CONST_INT:
9903 case SYMBOL_REF:
9904 case LABEL_REF:
9905 if (disp)
9906 return 0;
9907 disp = op;
9908 break;
9909
9910 default:
9911 return 0;
9912 }
9913 }
9914 }
9915 else if (GET_CODE (addr) == MULT)
9916 {
9917 index = XEXP (addr, 0); /* index*scale */
9918 scale_rtx = XEXP (addr, 1);
9919 }
9920 else if (GET_CODE (addr) == ASHIFT)
9921 {
9922 /* We're called for lea too, which implements ashift on occasion. */
9923 index = XEXP (addr, 0);
9924 tmp = XEXP (addr, 1);
9925 if (!CONST_INT_P (tmp))
9926 return 0;
9927 scale = INTVAL (tmp);
9928 if ((unsigned HOST_WIDE_INT) scale > 3)
9929 return 0;
9930 scale = 1 << scale;
9931 retval = -1;
9932 }
9933 else
9934 disp = addr; /* displacement */
9935
9936 if (index)
9937 {
9938 if (REG_P (index))
9939 ;
9940 else if (SUBREG_P (index)
9941 && REG_P (SUBREG_REG (index)))
9942 ;
9943 else
9944 return 0;
9945 }
9946
9947 /* Extract the integral value of scale. */
9948 if (scale_rtx)
9949 {
9950 if (!CONST_INT_P (scale_rtx))
9951 return 0;
9952 scale = INTVAL (scale_rtx);
9953 }
9954
9955 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
9956 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
9957
9958 /* Avoid useless 0 displacement. */
9959 if (disp == const0_rtx && (base || index))
9960 disp = NULL_RTX;
9961
9962 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9963 if (base_reg && index_reg && scale == 1
9964 && (REGNO (index_reg) == ARG_POINTER_REGNUM
9965 || REGNO (index_reg) == FRAME_POINTER_REGNUM
9966 || REGNO (index_reg) == SP_REG))
9967 {
9968 std::swap (base, index);
9969 std::swap (base_reg, index_reg);
9970 }
9971
9972 /* Special case: %ebp cannot be encoded as a base without a displacement.
9973 Similarly %r13. */
9974 if (!disp && base_reg
9975 && (REGNO (base_reg) == ARG_POINTER_REGNUM
9976 || REGNO (base_reg) == FRAME_POINTER_REGNUM
9977 || REGNO (base_reg) == BP_REG
9978 || REGNO (base_reg) == R13_REG))
9979 disp = const0_rtx;
9980
9981 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9982 Avoid this by transforming to [%esi+0].
9983 Reload calls address legitimization without cfun defined, so we need
9984 to test cfun for being non-NULL. */
9985 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9986 && base_reg && !index_reg && !disp
9987 && REGNO (base_reg) == SI_REG)
9988 disp = const0_rtx;
9989
9990 /* Special case: encode reg+reg instead of reg*2. */
9991 if (!base && index && scale == 2)
9992 base = index, base_reg = index_reg, scale = 1;
9993
9994 /* Special case: scaling cannot be encoded without base or displacement. */
9995 if (!base && !disp && index && scale != 1)
9996 disp = const0_rtx;
9997
9998 out->base = base;
9999 out->index = index;
10000 out->disp = disp;
10001 out->scale = scale;
10002 out->seg = seg;
10003
10004 return retval;
10005 }
10006
10007 /* Return cost of the memory address x.
10008 For i386, it is better to use a complex address than let gcc copy
10009 the address into a reg and make a new pseudo. But not if the address
10010 requires to two regs - that would mean more pseudos with longer
10011 lifetimes. */
10012 static int
ix86_address_cost(rtx x,machine_mode,addr_space_t,bool)10013 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10014 {
10015 struct ix86_address parts;
10016 int cost = 1;
10017 int ok = ix86_decompose_address (x, &parts);
10018
10019 gcc_assert (ok);
10020
10021 if (parts.base && SUBREG_P (parts.base))
10022 parts.base = SUBREG_REG (parts.base);
10023 if (parts.index && SUBREG_P (parts.index))
10024 parts.index = SUBREG_REG (parts.index);
10025
10026 /* Attempt to minimize number of registers in the address by increasing
10027 address cost for each used register. We don't increase address cost
10028 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
10029 is not invariant itself it most likely means that base or index is not
10030 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
10031 which is not profitable for x86. */
10032 if (parts.base
10033 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10034 && (current_pass->type == GIMPLE_PASS
10035 || !pic_offset_table_rtx
10036 || !REG_P (parts.base)
10037 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
10038 cost++;
10039
10040 if (parts.index
10041 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10042 && (current_pass->type == GIMPLE_PASS
10043 || !pic_offset_table_rtx
10044 || !REG_P (parts.index)
10045 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
10046 cost++;
10047
10048 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10049 since it's predecode logic can't detect the length of instructions
10050 and it degenerates to vector decoded. Increase cost of such
10051 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10052 to split such addresses or even refuse such addresses at all.
10053
10054 Following addressing modes are affected:
10055 [base+scale*index]
10056 [scale*index+disp]
10057 [base+index]
10058
10059 The first and last case may be avoidable by explicitly coding the zero in
10060 memory address, but I don't have AMD-K6 machine handy to check this
10061 theory. */
10062
10063 if (TARGET_K6
10064 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10065 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10066 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10067 cost += 10;
10068
10069 return cost;
10070 }
10071
10072 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10073 this is used for to form addresses to local data when -fPIC is in
10074 use. */
10075
10076 static bool
darwin_local_data_pic(rtx disp)10077 darwin_local_data_pic (rtx disp)
10078 {
10079 return (GET_CODE (disp) == UNSPEC
10080 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10081 }
10082
10083 /* True if operand X should be loaded from GOT. */
10084
10085 bool
ix86_force_load_from_GOT_p(rtx x)10086 ix86_force_load_from_GOT_p (rtx x)
10087 {
10088 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
10089 && !TARGET_PECOFF && !TARGET_MACHO
10090 && !flag_pic
10091 && ix86_cmodel != CM_LARGE
10092 && GET_CODE (x) == SYMBOL_REF
10093 && SYMBOL_REF_FUNCTION_P (x)
10094 && (!flag_plt
10095 || (SYMBOL_REF_DECL (x)
10096 && lookup_attribute ("noplt",
10097 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))
10098 && !SYMBOL_REF_LOCAL_P (x));
10099 }
10100
10101 /* Determine if a given RTX is a valid constant. We already know this
10102 satisfies CONSTANT_P. */
10103
10104 static bool
ix86_legitimate_constant_p(machine_mode mode,rtx x)10105 ix86_legitimate_constant_p (machine_mode mode, rtx x)
10106 {
10107 switch (GET_CODE (x))
10108 {
10109 case CONST:
10110 x = XEXP (x, 0);
10111
10112 if (GET_CODE (x) == PLUS)
10113 {
10114 if (!CONST_INT_P (XEXP (x, 1)))
10115 return false;
10116 x = XEXP (x, 0);
10117 }
10118
10119 if (TARGET_MACHO && darwin_local_data_pic (x))
10120 return true;
10121
10122 /* Only some unspecs are valid as "constants". */
10123 if (GET_CODE (x) == UNSPEC)
10124 switch (XINT (x, 1))
10125 {
10126 case UNSPEC_GOT:
10127 case UNSPEC_GOTOFF:
10128 case UNSPEC_PLTOFF:
10129 return TARGET_64BIT;
10130 case UNSPEC_TPOFF:
10131 case UNSPEC_NTPOFF:
10132 x = XVECEXP (x, 0, 0);
10133 return (GET_CODE (x) == SYMBOL_REF
10134 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10135 case UNSPEC_DTPOFF:
10136 x = XVECEXP (x, 0, 0);
10137 return (GET_CODE (x) == SYMBOL_REF
10138 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10139 default:
10140 return false;
10141 }
10142
10143 /* We must have drilled down to a symbol. */
10144 if (GET_CODE (x) == LABEL_REF)
10145 return true;
10146 if (GET_CODE (x) != SYMBOL_REF)
10147 return false;
10148 /* FALLTHRU */
10149
10150 case SYMBOL_REF:
10151 /* TLS symbols are never valid. */
10152 if (SYMBOL_REF_TLS_MODEL (x))
10153 return false;
10154
10155 /* DLLIMPORT symbols are never valid. */
10156 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10157 && SYMBOL_REF_DLLIMPORT_P (x))
10158 return false;
10159
10160 #if TARGET_MACHO
10161 /* mdynamic-no-pic */
10162 if (MACHO_DYNAMIC_NO_PIC_P)
10163 return machopic_symbol_defined_p (x);
10164 #endif
10165
10166 /* External function address should be loaded
10167 via the GOT slot to avoid PLT. */
10168 if (ix86_force_load_from_GOT_p (x))
10169 return false;
10170
10171 break;
10172
10173 CASE_CONST_SCALAR_INT:
10174 switch (mode)
10175 {
10176 case E_TImode:
10177 if (TARGET_64BIT)
10178 return true;
10179 /* FALLTHRU */
10180 case E_OImode:
10181 case E_XImode:
10182 if (!standard_sse_constant_p (x, mode))
10183 return false;
10184 default:
10185 break;
10186 }
10187 break;
10188
10189 case CONST_VECTOR:
10190 if (!standard_sse_constant_p (x, mode))
10191 return false;
10192
10193 default:
10194 break;
10195 }
10196
10197 /* Otherwise we handle everything else in the move patterns. */
10198 return true;
10199 }
10200
10201 /* Determine if it's legal to put X into the constant pool. This
10202 is not possible for the address of thread-local symbols, which
10203 is checked above. */
10204
10205 static bool
ix86_cannot_force_const_mem(machine_mode mode,rtx x)10206 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
10207 {
10208 /* We can put any immediate constant in memory. */
10209 switch (GET_CODE (x))
10210 {
10211 CASE_CONST_ANY:
10212 return false;
10213
10214 default:
10215 break;
10216 }
10217
10218 return !ix86_legitimate_constant_p (mode, x);
10219 }
10220
10221 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
10222 otherwise zero. */
10223
10224 static bool
is_imported_p(rtx x)10225 is_imported_p (rtx x)
10226 {
10227 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10228 || GET_CODE (x) != SYMBOL_REF)
10229 return false;
10230
10231 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
10232 }
10233
10234
10235 /* Nonzero if the constant value X is a legitimate general operand
10236 when generating PIC code. It is given that flag_pic is on and
10237 that X satisfies CONSTANT_P. */
10238
10239 bool
legitimate_pic_operand_p(rtx x)10240 legitimate_pic_operand_p (rtx x)
10241 {
10242 rtx inner;
10243
10244 switch (GET_CODE (x))
10245 {
10246 case CONST:
10247 inner = XEXP (x, 0);
10248 if (GET_CODE (inner) == PLUS
10249 && CONST_INT_P (XEXP (inner, 1)))
10250 inner = XEXP (inner, 0);
10251
10252 /* Only some unspecs are valid as "constants". */
10253 if (GET_CODE (inner) == UNSPEC)
10254 switch (XINT (inner, 1))
10255 {
10256 case UNSPEC_GOT:
10257 case UNSPEC_GOTOFF:
10258 case UNSPEC_PLTOFF:
10259 return TARGET_64BIT;
10260 case UNSPEC_TPOFF:
10261 x = XVECEXP (inner, 0, 0);
10262 return (GET_CODE (x) == SYMBOL_REF
10263 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10264 case UNSPEC_MACHOPIC_OFFSET:
10265 return legitimate_pic_address_disp_p (x);
10266 default:
10267 return false;
10268 }
10269 /* FALLTHRU */
10270
10271 case SYMBOL_REF:
10272 case LABEL_REF:
10273 return legitimate_pic_address_disp_p (x);
10274
10275 default:
10276 return true;
10277 }
10278 }
10279
10280 /* Determine if a given CONST RTX is a valid memory displacement
10281 in PIC mode. */
10282
10283 bool
legitimate_pic_address_disp_p(rtx disp)10284 legitimate_pic_address_disp_p (rtx disp)
10285 {
10286 bool saw_plus;
10287
10288 /* In 64bit mode we can allow direct addresses of symbols and labels
10289 when they are not dynamic symbols. */
10290 if (TARGET_64BIT)
10291 {
10292 rtx op0 = disp, op1;
10293
10294 switch (GET_CODE (disp))
10295 {
10296 case LABEL_REF:
10297 return true;
10298
10299 case CONST:
10300 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10301 break;
10302 op0 = XEXP (XEXP (disp, 0), 0);
10303 op1 = XEXP (XEXP (disp, 0), 1);
10304 if (!CONST_INT_P (op1))
10305 break;
10306 if (GET_CODE (op0) == UNSPEC
10307 && (XINT (op0, 1) == UNSPEC_DTPOFF
10308 || XINT (op0, 1) == UNSPEC_NTPOFF)
10309 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10310 return true;
10311 if (INTVAL (op1) >= 16*1024*1024
10312 || INTVAL (op1) < -16*1024*1024)
10313 break;
10314 if (GET_CODE (op0) == LABEL_REF)
10315 return true;
10316 if (GET_CODE (op0) == CONST
10317 && GET_CODE (XEXP (op0, 0)) == UNSPEC
10318 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10319 return true;
10320 if (GET_CODE (op0) == UNSPEC
10321 && XINT (op0, 1) == UNSPEC_PCREL)
10322 return true;
10323 if (GET_CODE (op0) != SYMBOL_REF)
10324 break;
10325 /* FALLTHRU */
10326
10327 case SYMBOL_REF:
10328 /* TLS references should always be enclosed in UNSPEC.
10329 The dllimported symbol needs always to be resolved. */
10330 if (SYMBOL_REF_TLS_MODEL (op0)
10331 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10332 return false;
10333
10334 if (TARGET_PECOFF)
10335 {
10336 if (is_imported_p (op0))
10337 return true;
10338
10339 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
10340 break;
10341
10342 /* Non-external-weak function symbols need to be resolved only
10343 for the large model. Non-external symbols don't need to be
10344 resolved for large and medium models. For the small model,
10345 we don't need to resolve anything here. */
10346 if ((ix86_cmodel != CM_LARGE_PIC
10347 && SYMBOL_REF_FUNCTION_P (op0)
10348 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
10349 || !SYMBOL_REF_EXTERNAL_P (op0)
10350 || ix86_cmodel == CM_SMALL_PIC)
10351 return true;
10352 }
10353 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10354 && (SYMBOL_REF_LOCAL_P (op0)
10355 || (HAVE_LD_PIE_COPYRELOC
10356 && flag_pie
10357 && !SYMBOL_REF_WEAK (op0)
10358 && !SYMBOL_REF_FUNCTION_P (op0)))
10359 && ix86_cmodel != CM_LARGE_PIC)
10360 return true;
10361 break;
10362
10363 default:
10364 break;
10365 }
10366 }
10367 if (GET_CODE (disp) != CONST)
10368 return false;
10369 disp = XEXP (disp, 0);
10370
10371 if (TARGET_64BIT)
10372 {
10373 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10374 of GOT tables. We should not need these anyway. */
10375 if (GET_CODE (disp) != UNSPEC
10376 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10377 && XINT (disp, 1) != UNSPEC_GOTOFF
10378 && XINT (disp, 1) != UNSPEC_PCREL
10379 && XINT (disp, 1) != UNSPEC_PLTOFF))
10380 return false;
10381
10382 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10383 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10384 return false;
10385 return true;
10386 }
10387
10388 saw_plus = false;
10389 if (GET_CODE (disp) == PLUS)
10390 {
10391 if (!CONST_INT_P (XEXP (disp, 1)))
10392 return false;
10393 disp = XEXP (disp, 0);
10394 saw_plus = true;
10395 }
10396
10397 if (TARGET_MACHO && darwin_local_data_pic (disp))
10398 return true;
10399
10400 if (GET_CODE (disp) != UNSPEC)
10401 return false;
10402
10403 switch (XINT (disp, 1))
10404 {
10405 case UNSPEC_GOT:
10406 if (saw_plus)
10407 return false;
10408 /* We need to check for both symbols and labels because VxWorks loads
10409 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10410 details. */
10411 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10412 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10413 case UNSPEC_GOTOFF:
10414 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10415 While ABI specify also 32bit relocation but we don't produce it in
10416 small PIC model at all. */
10417 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10418 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10419 && !TARGET_64BIT)
10420 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10421 return false;
10422 case UNSPEC_GOTTPOFF:
10423 case UNSPEC_GOTNTPOFF:
10424 case UNSPEC_INDNTPOFF:
10425 if (saw_plus)
10426 return false;
10427 disp = XVECEXP (disp, 0, 0);
10428 return (GET_CODE (disp) == SYMBOL_REF
10429 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10430 case UNSPEC_NTPOFF:
10431 disp = XVECEXP (disp, 0, 0);
10432 return (GET_CODE (disp) == SYMBOL_REF
10433 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10434 case UNSPEC_DTPOFF:
10435 disp = XVECEXP (disp, 0, 0);
10436 return (GET_CODE (disp) == SYMBOL_REF
10437 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10438 }
10439
10440 return false;
10441 }
10442
10443 /* Determine if op is suitable RTX for an address register.
10444 Return naked register if a register or a register subreg is
10445 found, otherwise return NULL_RTX. */
10446
10447 static rtx
ix86_validate_address_register(rtx op)10448 ix86_validate_address_register (rtx op)
10449 {
10450 machine_mode mode = GET_MODE (op);
10451
10452 /* Only SImode or DImode registers can form the address. */
10453 if (mode != SImode && mode != DImode)
10454 return NULL_RTX;
10455
10456 if (REG_P (op))
10457 return op;
10458 else if (SUBREG_P (op))
10459 {
10460 rtx reg = SUBREG_REG (op);
10461
10462 if (!REG_P (reg))
10463 return NULL_RTX;
10464
10465 mode = GET_MODE (reg);
10466
10467 /* Don't allow SUBREGs that span more than a word. It can
10468 lead to spill failures when the register is one word out
10469 of a two word structure. */
10470 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10471 return NULL_RTX;
10472
10473 /* Allow only SUBREGs of non-eliminable hard registers. */
10474 if (register_no_elim_operand (reg, mode))
10475 return reg;
10476 }
10477
10478 /* Op is not a register. */
10479 return NULL_RTX;
10480 }
10481
10482 /* Recognizes RTL expressions that are valid memory addresses for an
10483 instruction. The MODE argument is the machine mode for the MEM
10484 expression that wants to use this address.
10485
10486 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10487 convert common non-canonical forms to canonical form so that they will
10488 be recognized. */
10489
10490 static bool
ix86_legitimate_address_p(machine_mode,rtx addr,bool strict)10491 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
10492 {
10493 struct ix86_address parts;
10494 rtx base, index, disp;
10495 HOST_WIDE_INT scale;
10496 addr_space_t seg;
10497
10498 if (ix86_decompose_address (addr, &parts) <= 0)
10499 /* Decomposition failed. */
10500 return false;
10501
10502 base = parts.base;
10503 index = parts.index;
10504 disp = parts.disp;
10505 scale = parts.scale;
10506 seg = parts.seg;
10507
10508 /* Validate base register. */
10509 if (base)
10510 {
10511 rtx reg = ix86_validate_address_register (base);
10512
10513 if (reg == NULL_RTX)
10514 return false;
10515
10516 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10517 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10518 /* Base is not valid. */
10519 return false;
10520 }
10521
10522 /* Validate index register. */
10523 if (index)
10524 {
10525 rtx reg = ix86_validate_address_register (index);
10526
10527 if (reg == NULL_RTX)
10528 return false;
10529
10530 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10531 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10532 /* Index is not valid. */
10533 return false;
10534 }
10535
10536 /* Index and base should have the same mode. */
10537 if (base && index
10538 && GET_MODE (base) != GET_MODE (index))
10539 return false;
10540
10541 /* Address override works only on the (%reg) part of %fs:(%reg). */
10542 if (seg != ADDR_SPACE_GENERIC
10543 && ((base && GET_MODE (base) != word_mode)
10544 || (index && GET_MODE (index) != word_mode)))
10545 return false;
10546
10547 /* Validate scale factor. */
10548 if (scale != 1)
10549 {
10550 if (!index)
10551 /* Scale without index. */
10552 return false;
10553
10554 if (scale != 2 && scale != 4 && scale != 8)
10555 /* Scale is not a valid multiplier. */
10556 return false;
10557 }
10558
10559 /* Validate displacement. */
10560 if (disp)
10561 {
10562 if (GET_CODE (disp) == CONST
10563 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10564 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10565 switch (XINT (XEXP (disp, 0), 1))
10566 {
10567 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10568 when used. While ABI specify also 32bit relocations, we
10569 don't produce them at all and use IP relative instead.
10570 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10571 should be loaded via GOT. */
10572 case UNSPEC_GOT:
10573 if (!TARGET_64BIT
10574 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10575 goto is_legitimate_pic;
10576 /* FALLTHRU */
10577 case UNSPEC_GOTOFF:
10578 gcc_assert (flag_pic);
10579 if (!TARGET_64BIT)
10580 goto is_legitimate_pic;
10581
10582 /* 64bit address unspec. */
10583 return false;
10584
10585 case UNSPEC_GOTPCREL:
10586 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10587 goto is_legitimate_pic;
10588 /* FALLTHRU */
10589 case UNSPEC_PCREL:
10590 gcc_assert (flag_pic);
10591 goto is_legitimate_pic;
10592
10593 case UNSPEC_GOTTPOFF:
10594 case UNSPEC_GOTNTPOFF:
10595 case UNSPEC_INDNTPOFF:
10596 case UNSPEC_NTPOFF:
10597 case UNSPEC_DTPOFF:
10598 break;
10599
10600 default:
10601 /* Invalid address unspec. */
10602 return false;
10603 }
10604
10605 else if (SYMBOLIC_CONST (disp)
10606 && (flag_pic
10607 || (TARGET_MACHO
10608 #if TARGET_MACHO
10609 && MACHOPIC_INDIRECT
10610 && !machopic_operand_p (disp)
10611 #endif
10612 )))
10613 {
10614
10615 is_legitimate_pic:
10616 if (TARGET_64BIT && (index || base))
10617 {
10618 /* foo@dtpoff(%rX) is ok. */
10619 if (GET_CODE (disp) != CONST
10620 || GET_CODE (XEXP (disp, 0)) != PLUS
10621 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10622 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10623 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10624 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10625 /* Non-constant pic memory reference. */
10626 return false;
10627 }
10628 else if ((!TARGET_MACHO || flag_pic)
10629 && ! legitimate_pic_address_disp_p (disp))
10630 /* Displacement is an invalid pic construct. */
10631 return false;
10632 #if TARGET_MACHO
10633 else if (MACHO_DYNAMIC_NO_PIC_P
10634 && !ix86_legitimate_constant_p (Pmode, disp))
10635 /* displacment must be referenced via non_lazy_pointer */
10636 return false;
10637 #endif
10638
10639 /* This code used to verify that a symbolic pic displacement
10640 includes the pic_offset_table_rtx register.
10641
10642 While this is good idea, unfortunately these constructs may
10643 be created by "adds using lea" optimization for incorrect
10644 code like:
10645
10646 int a;
10647 int foo(int i)
10648 {
10649 return *(&a+i);
10650 }
10651
10652 This code is nonsensical, but results in addressing
10653 GOT table with pic_offset_table_rtx base. We can't
10654 just refuse it easily, since it gets matched by
10655 "addsi3" pattern, that later gets split to lea in the
10656 case output register differs from input. While this
10657 can be handled by separate addsi pattern for this case
10658 that never results in lea, this seems to be easier and
10659 correct fix for crash to disable this test. */
10660 }
10661 else if (GET_CODE (disp) != LABEL_REF
10662 && !CONST_INT_P (disp)
10663 && (GET_CODE (disp) != CONST
10664 || !ix86_legitimate_constant_p (Pmode, disp))
10665 && (GET_CODE (disp) != SYMBOL_REF
10666 || !ix86_legitimate_constant_p (Pmode, disp)))
10667 /* Displacement is not constant. */
10668 return false;
10669 else if (TARGET_64BIT
10670 && !x86_64_immediate_operand (disp, VOIDmode))
10671 /* Displacement is out of range. */
10672 return false;
10673 /* In x32 mode, constant addresses are sign extended to 64bit, so
10674 we have to prevent addresses from 0x80000000 to 0xffffffff. */
10675 else if (TARGET_X32 && !(index || base)
10676 && CONST_INT_P (disp)
10677 && val_signbit_known_set_p (SImode, INTVAL (disp)))
10678 return false;
10679 }
10680
10681 /* Everything looks valid. */
10682 return true;
10683 }
10684
10685 /* Determine if a given RTX is a valid constant address. */
10686
10687 bool
constant_address_p(rtx x)10688 constant_address_p (rtx x)
10689 {
10690 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10691 }
10692
10693 /* Return a unique alias set for the GOT. */
10694
10695 alias_set_type
ix86_GOT_alias_set(void)10696 ix86_GOT_alias_set (void)
10697 {
10698 static alias_set_type set = -1;
10699 if (set == -1)
10700 set = new_alias_set ();
10701 return set;
10702 }
10703
10704 /* Return a legitimate reference for ORIG (an address) using the
10705 register REG. If REG is 0, a new pseudo is generated.
10706
10707 There are two types of references that must be handled:
10708
10709 1. Global data references must load the address from the GOT, via
10710 the PIC reg. An insn is emitted to do this load, and the reg is
10711 returned.
10712
10713 2. Static data references, constant pool addresses, and code labels
10714 compute the address as an offset from the GOT, whose base is in
10715 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10716 differentiate them from global data objects. The returned
10717 address is the PIC reg + an unspec constant.
10718
10719 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10720 reg also appears in the address. */
10721
10722 rtx
legitimize_pic_address(rtx orig,rtx reg)10723 legitimize_pic_address (rtx orig, rtx reg)
10724 {
10725 rtx addr = orig;
10726 rtx new_rtx = orig;
10727
10728 #if TARGET_MACHO
10729 if (TARGET_MACHO && !TARGET_64BIT)
10730 {
10731 if (reg == 0)
10732 reg = gen_reg_rtx (Pmode);
10733 /* Use the generic Mach-O PIC machinery. */
10734 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10735 }
10736 #endif
10737
10738 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10739 {
10740 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10741 if (tmp)
10742 return tmp;
10743 }
10744
10745 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10746 new_rtx = addr;
10747 else if ((!TARGET_64BIT
10748 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
10749 && !TARGET_PECOFF
10750 && gotoff_operand (addr, Pmode))
10751 {
10752 /* This symbol may be referenced via a displacement
10753 from the PIC base address (@GOTOFF). */
10754 if (GET_CODE (addr) == CONST)
10755 addr = XEXP (addr, 0);
10756
10757 if (GET_CODE (addr) == PLUS)
10758 {
10759 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10760 UNSPEC_GOTOFF);
10761 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10762 }
10763 else
10764 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10765
10766 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10767
10768 if (TARGET_64BIT)
10769 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10770
10771 if (reg != 0)
10772 {
10773 gcc_assert (REG_P (reg));
10774 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
10775 new_rtx, reg, 1, OPTAB_DIRECT);
10776 }
10777 else
10778 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10779 }
10780 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10781 /* We can't use @GOTOFF for text labels
10782 on VxWorks, see gotoff_operand. */
10783 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10784 {
10785 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10786 if (tmp)
10787 return tmp;
10788
10789 /* For x64 PE-COFF there is no GOT table,
10790 so we use address directly. */
10791 if (TARGET_64BIT && TARGET_PECOFF)
10792 {
10793 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
10794 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10795 }
10796 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10797 {
10798 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
10799 UNSPEC_GOTPCREL);
10800 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10801 new_rtx = gen_const_mem (Pmode, new_rtx);
10802 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10803 }
10804 else
10805 {
10806 /* This symbol must be referenced via a load
10807 from the Global Offset Table (@GOT). */
10808 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10809 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10810 if (TARGET_64BIT)
10811 new_rtx = force_reg (Pmode, new_rtx);
10812 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10813 new_rtx = gen_const_mem (Pmode, new_rtx);
10814 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10815 }
10816
10817 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10818 }
10819 else
10820 {
10821 if (CONST_INT_P (addr)
10822 && !x86_64_immediate_operand (addr, VOIDmode))
10823 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
10824 else if (GET_CODE (addr) == CONST)
10825 {
10826 addr = XEXP (addr, 0);
10827
10828 /* We must match stuff we generate before. Assume the only
10829 unspecs that can get here are ours. Not that we could do
10830 anything with them anyway.... */
10831 if (GET_CODE (addr) == UNSPEC
10832 || (GET_CODE (addr) == PLUS
10833 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10834 return orig;
10835 gcc_assert (GET_CODE (addr) == PLUS);
10836 }
10837
10838 if (GET_CODE (addr) == PLUS)
10839 {
10840 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10841
10842 /* Check first to see if this is a constant
10843 offset from a @GOTOFF symbol reference. */
10844 if (!TARGET_PECOFF
10845 && gotoff_operand (op0, Pmode)
10846 && CONST_INT_P (op1))
10847 {
10848 if (!TARGET_64BIT)
10849 {
10850 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10851 UNSPEC_GOTOFF);
10852 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10853 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10854
10855 if (reg != 0)
10856 {
10857 gcc_assert (REG_P (reg));
10858 new_rtx = expand_simple_binop (Pmode, PLUS,
10859 pic_offset_table_rtx,
10860 new_rtx, reg, 1,
10861 OPTAB_DIRECT);
10862 }
10863 else
10864 new_rtx
10865 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10866 }
10867 else
10868 {
10869 if (INTVAL (op1) < -16*1024*1024
10870 || INTVAL (op1) >= 16*1024*1024)
10871 {
10872 if (!x86_64_immediate_operand (op1, Pmode))
10873 op1 = force_reg (Pmode, op1);
10874
10875 new_rtx
10876 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10877 }
10878 }
10879 }
10880 else
10881 {
10882 rtx base = legitimize_pic_address (op0, reg);
10883 machine_mode mode = GET_MODE (base);
10884 new_rtx
10885 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
10886
10887 if (CONST_INT_P (new_rtx))
10888 {
10889 if (INTVAL (new_rtx) < -16*1024*1024
10890 || INTVAL (new_rtx) >= 16*1024*1024)
10891 {
10892 if (!x86_64_immediate_operand (new_rtx, mode))
10893 new_rtx = force_reg (mode, new_rtx);
10894
10895 new_rtx
10896 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
10897 }
10898 else
10899 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
10900 }
10901 else
10902 {
10903 /* For %rip addressing, we have to use
10904 just disp32, not base nor index. */
10905 if (TARGET_64BIT
10906 && (GET_CODE (base) == SYMBOL_REF
10907 || GET_CODE (base) == LABEL_REF))
10908 base = force_reg (mode, base);
10909 if (GET_CODE (new_rtx) == PLUS
10910 && CONSTANT_P (XEXP (new_rtx, 1)))
10911 {
10912 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
10913 new_rtx = XEXP (new_rtx, 1);
10914 }
10915 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
10916 }
10917 }
10918 }
10919 }
10920 return new_rtx;
10921 }
10922
10923 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10924
10925 static rtx
get_thread_pointer(machine_mode tp_mode,bool to_reg)10926 get_thread_pointer (machine_mode tp_mode, bool to_reg)
10927 {
10928 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10929
10930 if (GET_MODE (tp) != tp_mode)
10931 {
10932 gcc_assert (GET_MODE (tp) == SImode);
10933 gcc_assert (tp_mode == DImode);
10934
10935 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
10936 }
10937
10938 if (to_reg)
10939 tp = copy_to_mode_reg (tp_mode, tp);
10940
10941 return tp;
10942 }
10943
10944 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10945
10946 static GTY(()) rtx ix86_tls_symbol;
10947
10948 static rtx
ix86_tls_get_addr(void)10949 ix86_tls_get_addr (void)
10950 {
10951 if (!ix86_tls_symbol)
10952 {
10953 const char *sym
10954 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
10955 ? "___tls_get_addr" : "__tls_get_addr");
10956
10957 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
10958 }
10959
10960 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
10961 {
10962 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
10963 UNSPEC_PLTOFF);
10964 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10965 gen_rtx_CONST (Pmode, unspec));
10966 }
10967
10968 return ix86_tls_symbol;
10969 }
10970
10971 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
10972
10973 static GTY(()) rtx ix86_tls_module_base_symbol;
10974
10975 rtx
ix86_tls_module_base(void)10976 ix86_tls_module_base (void)
10977 {
10978 if (!ix86_tls_module_base_symbol)
10979 {
10980 ix86_tls_module_base_symbol
10981 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
10982
10983 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
10984 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
10985 }
10986
10987 return ix86_tls_module_base_symbol;
10988 }
10989
10990 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10991 false if we expect this to be used for a memory address and true if
10992 we expect to load the address into a register. */
10993
10994 rtx
legitimize_tls_address(rtx x,enum tls_model model,bool for_mov)10995 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
10996 {
10997 rtx dest, base, off;
10998 rtx pic = NULL_RTX, tp = NULL_RTX;
10999 machine_mode tp_mode = Pmode;
11000 int type;
11001
11002 /* Fall back to global dynamic model if tool chain cannot support local
11003 dynamic. */
11004 if (TARGET_SUN_TLS && !TARGET_64BIT
11005 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
11006 && model == TLS_MODEL_LOCAL_DYNAMIC)
11007 model = TLS_MODEL_GLOBAL_DYNAMIC;
11008
11009 switch (model)
11010 {
11011 case TLS_MODEL_GLOBAL_DYNAMIC:
11012 if (!TARGET_64BIT)
11013 {
11014 if (flag_pic && !TARGET_PECOFF)
11015 pic = pic_offset_table_rtx;
11016 else
11017 {
11018 pic = gen_reg_rtx (Pmode);
11019 emit_insn (gen_set_got (pic));
11020 }
11021 }
11022
11023 if (TARGET_GNU2_TLS)
11024 {
11025 dest = gen_reg_rtx (ptr_mode);
11026 if (TARGET_64BIT)
11027 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
11028 else
11029 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
11030
11031 tp = get_thread_pointer (ptr_mode, true);
11032 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11033 if (GET_MODE (dest) != Pmode)
11034 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11035 dest = force_reg (Pmode, dest);
11036
11037 if (GET_MODE (x) != Pmode)
11038 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11039
11040 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11041 }
11042 else
11043 {
11044 rtx caddr = ix86_tls_get_addr ();
11045
11046 dest = gen_reg_rtx (Pmode);
11047 if (TARGET_64BIT)
11048 {
11049 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11050 rtx_insn *insns;
11051
11052 start_sequence ();
11053 emit_call_insn
11054 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
11055 insns = get_insns ();
11056 end_sequence ();
11057
11058 if (GET_MODE (x) != Pmode)
11059 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11060
11061 RTL_CONST_CALL_P (insns) = 1;
11062 emit_libcall_block (insns, dest, rax, x);
11063 }
11064 else
11065 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
11066 }
11067 break;
11068
11069 case TLS_MODEL_LOCAL_DYNAMIC:
11070 if (!TARGET_64BIT)
11071 {
11072 if (flag_pic)
11073 pic = pic_offset_table_rtx;
11074 else
11075 {
11076 pic = gen_reg_rtx (Pmode);
11077 emit_insn (gen_set_got (pic));
11078 }
11079 }
11080
11081 if (TARGET_GNU2_TLS)
11082 {
11083 rtx tmp = ix86_tls_module_base ();
11084
11085 base = gen_reg_rtx (ptr_mode);
11086 if (TARGET_64BIT)
11087 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
11088 else
11089 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
11090
11091 tp = get_thread_pointer (ptr_mode, true);
11092 if (GET_MODE (base) != Pmode)
11093 base = gen_rtx_ZERO_EXTEND (Pmode, base);
11094 base = force_reg (Pmode, base);
11095 }
11096 else
11097 {
11098 rtx caddr = ix86_tls_get_addr ();
11099
11100 base = gen_reg_rtx (Pmode);
11101 if (TARGET_64BIT)
11102 {
11103 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11104 rtx_insn *insns;
11105 rtx eqv;
11106
11107 start_sequence ();
11108 emit_call_insn
11109 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
11110 insns = get_insns ();
11111 end_sequence ();
11112
11113 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11114 share the LD_BASE result with other LD model accesses. */
11115 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11116 UNSPEC_TLS_LD_BASE);
11117
11118 RTL_CONST_CALL_P (insns) = 1;
11119 emit_libcall_block (insns, base, rax, eqv);
11120 }
11121 else
11122 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
11123 }
11124
11125 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11126 off = gen_rtx_CONST (Pmode, off);
11127
11128 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11129
11130 if (TARGET_GNU2_TLS)
11131 {
11132 if (GET_MODE (tp) != Pmode)
11133 {
11134 dest = lowpart_subreg (ptr_mode, dest, Pmode);
11135 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11136 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11137 }
11138 else
11139 dest = gen_rtx_PLUS (Pmode, tp, dest);
11140 dest = force_reg (Pmode, dest);
11141
11142 if (GET_MODE (x) != Pmode)
11143 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11144
11145 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11146 }
11147 break;
11148
11149 case TLS_MODEL_INITIAL_EXEC:
11150 if (TARGET_64BIT)
11151 {
11152 if (TARGET_SUN_TLS && !TARGET_X32)
11153 {
11154 /* The Sun linker took the AMD64 TLS spec literally
11155 and can only handle %rax as destination of the
11156 initial executable code sequence. */
11157
11158 dest = gen_reg_rtx (DImode);
11159 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
11160 return dest;
11161 }
11162
11163 /* Generate DImode references to avoid %fs:(%reg32)
11164 problems and linker IE->LE relaxation bug. */
11165 tp_mode = DImode;
11166 pic = NULL;
11167 type = UNSPEC_GOTNTPOFF;
11168 }
11169 else if (flag_pic)
11170 {
11171 pic = pic_offset_table_rtx;
11172 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11173 }
11174 else if (!TARGET_ANY_GNU_TLS)
11175 {
11176 pic = gen_reg_rtx (Pmode);
11177 emit_insn (gen_set_got (pic));
11178 type = UNSPEC_GOTTPOFF;
11179 }
11180 else
11181 {
11182 pic = NULL;
11183 type = UNSPEC_INDNTPOFF;
11184 }
11185
11186 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
11187 off = gen_rtx_CONST (tp_mode, off);
11188 if (pic)
11189 off = gen_rtx_PLUS (tp_mode, pic, off);
11190 off = gen_const_mem (tp_mode, off);
11191 set_mem_alias_set (off, ix86_GOT_alias_set ());
11192
11193 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11194 {
11195 base = get_thread_pointer (tp_mode,
11196 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11197 off = force_reg (tp_mode, off);
11198 dest = gen_rtx_PLUS (tp_mode, base, off);
11199 if (tp_mode != Pmode)
11200 dest = convert_to_mode (Pmode, dest, 1);
11201 }
11202 else
11203 {
11204 base = get_thread_pointer (Pmode, true);
11205 dest = gen_reg_rtx (Pmode);
11206 emit_insn (gen_sub3_insn (dest, base, off));
11207 }
11208 break;
11209
11210 case TLS_MODEL_LOCAL_EXEC:
11211 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11212 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11213 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11214 off = gen_rtx_CONST (Pmode, off);
11215
11216 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11217 {
11218 base = get_thread_pointer (Pmode,
11219 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11220 return gen_rtx_PLUS (Pmode, base, off);
11221 }
11222 else
11223 {
11224 base = get_thread_pointer (Pmode, true);
11225 dest = gen_reg_rtx (Pmode);
11226 emit_insn (gen_sub3_insn (dest, base, off));
11227 }
11228 break;
11229
11230 default:
11231 gcc_unreachable ();
11232 }
11233
11234 return dest;
11235 }
11236
11237 /* Return true if OP refers to a TLS address. */
11238 bool
ix86_tls_address_pattern_p(rtx op)11239 ix86_tls_address_pattern_p (rtx op)
11240 {
11241 subrtx_var_iterator::array_type array;
11242 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
11243 {
11244 rtx op = *iter;
11245 if (MEM_P (op))
11246 {
11247 rtx *x = &XEXP (op, 0);
11248 while (GET_CODE (*x) == PLUS)
11249 {
11250 int i;
11251 for (i = 0; i < 2; i++)
11252 {
11253 rtx u = XEXP (*x, i);
11254 if (GET_CODE (u) == ZERO_EXTEND)
11255 u = XEXP (u, 0);
11256 if (GET_CODE (u) == UNSPEC
11257 && XINT (u, 1) == UNSPEC_TP)
11258 return true;
11259 }
11260 x = &XEXP (*x, 0);
11261 }
11262
11263 iter.skip_subrtxes ();
11264 }
11265 }
11266
11267 return false;
11268 }
11269
11270 /* Rewrite *LOC so that it refers to a default TLS address space. */
11271 void
ix86_rewrite_tls_address_1(rtx * loc)11272 ix86_rewrite_tls_address_1 (rtx *loc)
11273 {
11274 subrtx_ptr_iterator::array_type array;
11275 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
11276 {
11277 rtx *loc = *iter;
11278 if (MEM_P (*loc))
11279 {
11280 rtx addr = XEXP (*loc, 0);
11281 rtx *x = &addr;
11282 while (GET_CODE (*x) == PLUS)
11283 {
11284 int i;
11285 for (i = 0; i < 2; i++)
11286 {
11287 rtx u = XEXP (*x, i);
11288 if (GET_CODE (u) == ZERO_EXTEND)
11289 u = XEXP (u, 0);
11290 if (GET_CODE (u) == UNSPEC
11291 && XINT (u, 1) == UNSPEC_TP)
11292 {
11293 addr_space_t as = DEFAULT_TLS_SEG_REG;
11294
11295 *x = XEXP (*x, 1 - i);
11296
11297 *loc = replace_equiv_address_nv (*loc, addr, true);
11298 set_mem_addr_space (*loc, as);
11299 return;
11300 }
11301 }
11302 x = &XEXP (*x, 0);
11303 }
11304
11305 iter.skip_subrtxes ();
11306 }
11307 }
11308 }
11309
11310 /* Rewrite instruction pattern involvning TLS address
11311 so that it refers to a default TLS address space. */
11312 rtx
ix86_rewrite_tls_address(rtx pattern)11313 ix86_rewrite_tls_address (rtx pattern)
11314 {
11315 pattern = copy_insn (pattern);
11316 ix86_rewrite_tls_address_1 (&pattern);
11317 return pattern;
11318 }
11319
11320 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11321 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11322 unique refptr-DECL symbol corresponding to symbol DECL. */
11323
11324 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11325 {
hashdllimport_hasher11326 static inline hashval_t hash (tree_map *m) { return m->hash; }
11327 static inline bool
equaldllimport_hasher11328 equal (tree_map *a, tree_map *b)
11329 {
11330 return a->base.from == b->base.from;
11331 }
11332
11333 static int
keep_cache_entrydllimport_hasher11334 keep_cache_entry (tree_map *&m)
11335 {
11336 return ggc_marked_p (m->base.from);
11337 }
11338 };
11339
11340 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11341
11342 static tree
get_dllimport_decl(tree decl,bool beimport)11343 get_dllimport_decl (tree decl, bool beimport)
11344 {
11345 struct tree_map *h, in;
11346 const char *name;
11347 const char *prefix;
11348 size_t namelen, prefixlen;
11349 char *imp_name;
11350 tree to;
11351 rtx rtl;
11352
11353 if (!dllimport_map)
11354 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11355
11356 in.hash = htab_hash_pointer (decl);
11357 in.base.from = decl;
11358 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11359 h = *loc;
11360 if (h)
11361 return h->to;
11362
11363 *loc = h = ggc_alloc<tree_map> ();
11364 h->hash = in.hash;
11365 h->base.from = decl;
11366 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11367 VAR_DECL, NULL, ptr_type_node);
11368 DECL_ARTIFICIAL (to) = 1;
11369 DECL_IGNORED_P (to) = 1;
11370 DECL_EXTERNAL (to) = 1;
11371 TREE_READONLY (to) = 1;
11372
11373 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11374 name = targetm.strip_name_encoding (name);
11375 if (beimport)
11376 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11377 ? "*__imp_" : "*__imp__";
11378 else
11379 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11380 namelen = strlen (name);
11381 prefixlen = strlen (prefix);
11382 imp_name = (char *) alloca (namelen + prefixlen + 1);
11383 memcpy (imp_name, prefix, prefixlen);
11384 memcpy (imp_name + prefixlen, name, namelen + 1);
11385
11386 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11387 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11388 SET_SYMBOL_REF_DECL (rtl, to);
11389 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11390 if (!beimport)
11391 {
11392 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11393 #ifdef SUB_TARGET_RECORD_STUB
11394 SUB_TARGET_RECORD_STUB (name);
11395 #endif
11396 }
11397
11398 rtl = gen_const_mem (Pmode, rtl);
11399 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11400
11401 SET_DECL_RTL (to, rtl);
11402 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11403
11404 return to;
11405 }
11406
11407 /* Expand SYMBOL into its corresponding far-address symbol.
11408 WANT_REG is true if we require the result be a register. */
11409
11410 static rtx
legitimize_pe_coff_extern_decl(rtx symbol,bool want_reg)11411 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
11412 {
11413 tree imp_decl;
11414 rtx x;
11415
11416 gcc_assert (SYMBOL_REF_DECL (symbol));
11417 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
11418
11419 x = DECL_RTL (imp_decl);
11420 if (want_reg)
11421 x = force_reg (Pmode, x);
11422 return x;
11423 }
11424
11425 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11426 true if we require the result be a register. */
11427
11428 static rtx
legitimize_dllimport_symbol(rtx symbol,bool want_reg)11429 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11430 {
11431 tree imp_decl;
11432 rtx x;
11433
11434 gcc_assert (SYMBOL_REF_DECL (symbol));
11435 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
11436
11437 x = DECL_RTL (imp_decl);
11438 if (want_reg)
11439 x = force_reg (Pmode, x);
11440 return x;
11441 }
11442
11443 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11444 is true if we require the result be a register. */
11445
11446 rtx
legitimize_pe_coff_symbol(rtx addr,bool inreg)11447 legitimize_pe_coff_symbol (rtx addr, bool inreg)
11448 {
11449 if (!TARGET_PECOFF)
11450 return NULL_RTX;
11451
11452 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11453 {
11454 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11455 return legitimize_dllimport_symbol (addr, inreg);
11456 if (GET_CODE (addr) == CONST
11457 && GET_CODE (XEXP (addr, 0)) == PLUS
11458 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11459 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11460 {
11461 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
11462 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11463 }
11464 }
11465
11466 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
11467 return NULL_RTX;
11468 if (GET_CODE (addr) == SYMBOL_REF
11469 && !is_imported_p (addr)
11470 && SYMBOL_REF_EXTERNAL_P (addr)
11471 && SYMBOL_REF_DECL (addr))
11472 return legitimize_pe_coff_extern_decl (addr, inreg);
11473
11474 if (GET_CODE (addr) == CONST
11475 && GET_CODE (XEXP (addr, 0)) == PLUS
11476 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11477 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
11478 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
11479 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
11480 {
11481 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
11482 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11483 }
11484 return NULL_RTX;
11485 }
11486
11487 /* Try machine-dependent ways of modifying an illegitimate address
11488 to be legitimate. If we find one, return the new, valid address.
11489 This macro is used in only one place: `memory_address' in explow.c.
11490
11491 OLDX is the address as it was before break_out_memory_refs was called.
11492 In some cases it is useful to look at this to decide what needs to be done.
11493
11494 It is always safe for this macro to do nothing. It exists to recognize
11495 opportunities to optimize the output.
11496
11497 For the 80386, we handle X+REG by loading X into a register R and
11498 using R+REG. R will go in a general reg and indexing will be used.
11499 However, if REG is a broken-out memory address or multiplication,
11500 nothing needs to be done because REG can certainly go in a general reg.
11501
11502 When -fpic is used, special handling is needed for symbolic references.
11503 See comments by legitimize_pic_address in i386.c for details. */
11504
11505 static rtx
ix86_legitimize_address(rtx x,rtx,machine_mode mode)11506 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
11507 {
11508 bool changed = false;
11509 unsigned log;
11510
11511 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11512 if (log)
11513 return legitimize_tls_address (x, (enum tls_model) log, false);
11514 if (GET_CODE (x) == CONST
11515 && GET_CODE (XEXP (x, 0)) == PLUS
11516 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11517 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11518 {
11519 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11520 (enum tls_model) log, false);
11521 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11522 }
11523
11524 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11525 {
11526 rtx tmp = legitimize_pe_coff_symbol (x, true);
11527 if (tmp)
11528 return tmp;
11529 }
11530
11531 if (flag_pic && SYMBOLIC_CONST (x))
11532 return legitimize_pic_address (x, 0);
11533
11534 #if TARGET_MACHO
11535 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
11536 return machopic_indirect_data_reference (x, 0);
11537 #endif
11538
11539 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11540 if (GET_CODE (x) == ASHIFT
11541 && CONST_INT_P (XEXP (x, 1))
11542 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11543 {
11544 changed = true;
11545 log = INTVAL (XEXP (x, 1));
11546 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11547 GEN_INT (1 << log));
11548 }
11549
11550 if (GET_CODE (x) == PLUS)
11551 {
11552 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11553
11554 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11555 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11556 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11557 {
11558 changed = true;
11559 log = INTVAL (XEXP (XEXP (x, 0), 1));
11560 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11561 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11562 GEN_INT (1 << log));
11563 }
11564
11565 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11566 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11567 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11568 {
11569 changed = true;
11570 log = INTVAL (XEXP (XEXP (x, 1), 1));
11571 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11572 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11573 GEN_INT (1 << log));
11574 }
11575
11576 /* Put multiply first if it isn't already. */
11577 if (GET_CODE (XEXP (x, 1)) == MULT)
11578 {
11579 std::swap (XEXP (x, 0), XEXP (x, 1));
11580 changed = true;
11581 }
11582
11583 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11584 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11585 created by virtual register instantiation, register elimination, and
11586 similar optimizations. */
11587 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11588 {
11589 changed = true;
11590 x = gen_rtx_PLUS (Pmode,
11591 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11592 XEXP (XEXP (x, 1), 0)),
11593 XEXP (XEXP (x, 1), 1));
11594 }
11595
11596 /* Canonicalize
11597 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11598 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11599 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11600 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11601 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11602 && CONSTANT_P (XEXP (x, 1)))
11603 {
11604 rtx constant;
11605 rtx other = NULL_RTX;
11606
11607 if (CONST_INT_P (XEXP (x, 1)))
11608 {
11609 constant = XEXP (x, 1);
11610 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11611 }
11612 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11613 {
11614 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11615 other = XEXP (x, 1);
11616 }
11617 else
11618 constant = 0;
11619
11620 if (constant)
11621 {
11622 changed = true;
11623 x = gen_rtx_PLUS (Pmode,
11624 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11625 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11626 plus_constant (Pmode, other,
11627 INTVAL (constant)));
11628 }
11629 }
11630
11631 if (changed && ix86_legitimate_address_p (mode, x, false))
11632 return x;
11633
11634 if (GET_CODE (XEXP (x, 0)) == MULT)
11635 {
11636 changed = true;
11637 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
11638 }
11639
11640 if (GET_CODE (XEXP (x, 1)) == MULT)
11641 {
11642 changed = true;
11643 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
11644 }
11645
11646 if (changed
11647 && REG_P (XEXP (x, 1))
11648 && REG_P (XEXP (x, 0)))
11649 return x;
11650
11651 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11652 {
11653 changed = true;
11654 x = legitimize_pic_address (x, 0);
11655 }
11656
11657 if (changed && ix86_legitimate_address_p (mode, x, false))
11658 return x;
11659
11660 if (REG_P (XEXP (x, 0)))
11661 {
11662 rtx temp = gen_reg_rtx (Pmode);
11663 rtx val = force_operand (XEXP (x, 1), temp);
11664 if (val != temp)
11665 {
11666 val = convert_to_mode (Pmode, val, 1);
11667 emit_move_insn (temp, val);
11668 }
11669
11670 XEXP (x, 1) = temp;
11671 return x;
11672 }
11673
11674 else if (REG_P (XEXP (x, 1)))
11675 {
11676 rtx temp = gen_reg_rtx (Pmode);
11677 rtx val = force_operand (XEXP (x, 0), temp);
11678 if (val != temp)
11679 {
11680 val = convert_to_mode (Pmode, val, 1);
11681 emit_move_insn (temp, val);
11682 }
11683
11684 XEXP (x, 0) = temp;
11685 return x;
11686 }
11687 }
11688
11689 return x;
11690 }
11691
11692 /* Print an integer constant expression in assembler syntax. Addition
11693 and subtraction are the only arithmetic that may appear in these
11694 expressions. FILE is the stdio stream to write to, X is the rtx, and
11695 CODE is the operand print code from the output string. */
11696
11697 static void
output_pic_addr_const(FILE * file,rtx x,int code)11698 output_pic_addr_const (FILE *file, rtx x, int code)
11699 {
11700 char buf[256];
11701
11702 switch (GET_CODE (x))
11703 {
11704 case PC:
11705 gcc_assert (flag_pic);
11706 putc ('.', file);
11707 break;
11708
11709 case SYMBOL_REF:
11710 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
11711 output_addr_const (file, x);
11712 else
11713 {
11714 const char *name = XSTR (x, 0);
11715
11716 /* Mark the decl as referenced so that cgraph will
11717 output the function. */
11718 if (SYMBOL_REF_DECL (x))
11719 mark_decl_referenced (SYMBOL_REF_DECL (x));
11720
11721 #if TARGET_MACHO
11722 if (MACHOPIC_INDIRECT
11723 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11724 name = machopic_indirection_name (x, /*stub_p=*/true);
11725 #endif
11726 assemble_name (file, name);
11727 }
11728 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
11729 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11730 fputs ("@PLT", file);
11731 break;
11732
11733 case LABEL_REF:
11734 x = XEXP (x, 0);
11735 /* FALLTHRU */
11736 case CODE_LABEL:
11737 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11738 assemble_name (asm_out_file, buf);
11739 break;
11740
11741 case CONST_INT:
11742 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11743 break;
11744
11745 case CONST:
11746 /* This used to output parentheses around the expression,
11747 but that does not work on the 386 (either ATT or BSD assembler). */
11748 output_pic_addr_const (file, XEXP (x, 0), code);
11749 break;
11750
11751 case CONST_DOUBLE:
11752 /* We can't handle floating point constants;
11753 TARGET_PRINT_OPERAND must handle them. */
11754 output_operand_lossage ("floating constant misused");
11755 break;
11756
11757 case PLUS:
11758 /* Some assemblers need integer constants to appear first. */
11759 if (CONST_INT_P (XEXP (x, 0)))
11760 {
11761 output_pic_addr_const (file, XEXP (x, 0), code);
11762 putc ('+', file);
11763 output_pic_addr_const (file, XEXP (x, 1), code);
11764 }
11765 else
11766 {
11767 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11768 output_pic_addr_const (file, XEXP (x, 1), code);
11769 putc ('+', file);
11770 output_pic_addr_const (file, XEXP (x, 0), code);
11771 }
11772 break;
11773
11774 case MINUS:
11775 if (!TARGET_MACHO)
11776 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11777 output_pic_addr_const (file, XEXP (x, 0), code);
11778 putc ('-', file);
11779 output_pic_addr_const (file, XEXP (x, 1), code);
11780 if (!TARGET_MACHO)
11781 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11782 break;
11783
11784 case UNSPEC:
11785 gcc_assert (XVECLEN (x, 0) == 1);
11786 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11787 switch (XINT (x, 1))
11788 {
11789 case UNSPEC_GOT:
11790 fputs ("@GOT", file);
11791 break;
11792 case UNSPEC_GOTOFF:
11793 fputs ("@GOTOFF", file);
11794 break;
11795 case UNSPEC_PLTOFF:
11796 fputs ("@PLTOFF", file);
11797 break;
11798 case UNSPEC_PCREL:
11799 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11800 "(%rip)" : "[rip]", file);
11801 break;
11802 case UNSPEC_GOTPCREL:
11803 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11804 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11805 break;
11806 case UNSPEC_GOTTPOFF:
11807 /* FIXME: This might be @TPOFF in Sun ld too. */
11808 fputs ("@gottpoff", file);
11809 break;
11810 case UNSPEC_TPOFF:
11811 fputs ("@tpoff", file);
11812 break;
11813 case UNSPEC_NTPOFF:
11814 if (TARGET_64BIT)
11815 fputs ("@tpoff", file);
11816 else
11817 fputs ("@ntpoff", file);
11818 break;
11819 case UNSPEC_DTPOFF:
11820 fputs ("@dtpoff", file);
11821 break;
11822 case UNSPEC_GOTNTPOFF:
11823 if (TARGET_64BIT)
11824 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11825 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11826 else
11827 fputs ("@gotntpoff", file);
11828 break;
11829 case UNSPEC_INDNTPOFF:
11830 fputs ("@indntpoff", file);
11831 break;
11832 #if TARGET_MACHO
11833 case UNSPEC_MACHOPIC_OFFSET:
11834 putc ('-', file);
11835 machopic_output_function_base_name (file);
11836 break;
11837 #endif
11838 default:
11839 output_operand_lossage ("invalid UNSPEC as operand");
11840 break;
11841 }
11842 break;
11843
11844 default:
11845 output_operand_lossage ("invalid expression as operand");
11846 }
11847 }
11848
11849 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11850 We need to emit DTP-relative relocations. */
11851
11852 static void ATTRIBUTE_UNUSED
i386_output_dwarf_dtprel(FILE * file,int size,rtx x)11853 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11854 {
11855 fputs (ASM_LONG, file);
11856 output_addr_const (file, x);
11857 fputs ("@dtpoff", file);
11858 switch (size)
11859 {
11860 case 4:
11861 break;
11862 case 8:
11863 fputs (", 0", file);
11864 break;
11865 default:
11866 gcc_unreachable ();
11867 }
11868 }
11869
11870 /* Return true if X is a representation of the PIC register. This copes
11871 with calls from ix86_find_base_term, where the register might have
11872 been replaced by a cselib value. */
11873
11874 static bool
ix86_pic_register_p(rtx x)11875 ix86_pic_register_p (rtx x)
11876 {
11877 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11878 return (pic_offset_table_rtx
11879 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11880 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
11881 return true;
11882 else if (!REG_P (x))
11883 return false;
11884 else if (pic_offset_table_rtx)
11885 {
11886 if (REGNO (x) == REGNO (pic_offset_table_rtx))
11887 return true;
11888 if (HARD_REGISTER_P (x)
11889 && !HARD_REGISTER_P (pic_offset_table_rtx)
11890 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
11891 return true;
11892 return false;
11893 }
11894 else
11895 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11896 }
11897
11898 /* Helper function for ix86_delegitimize_address.
11899 Attempt to delegitimize TLS local-exec accesses. */
11900
11901 static rtx
ix86_delegitimize_tls_address(rtx orig_x)11902 ix86_delegitimize_tls_address (rtx orig_x)
11903 {
11904 rtx x = orig_x, unspec;
11905 struct ix86_address addr;
11906
11907 if (!TARGET_TLS_DIRECT_SEG_REFS)
11908 return orig_x;
11909 if (MEM_P (x))
11910 x = XEXP (x, 0);
11911 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
11912 return orig_x;
11913 if (ix86_decompose_address (x, &addr) == 0
11914 || addr.seg != DEFAULT_TLS_SEG_REG
11915 || addr.disp == NULL_RTX
11916 || GET_CODE (addr.disp) != CONST)
11917 return orig_x;
11918 unspec = XEXP (addr.disp, 0);
11919 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
11920 unspec = XEXP (unspec, 0);
11921 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
11922 return orig_x;
11923 x = XVECEXP (unspec, 0, 0);
11924 gcc_assert (GET_CODE (x) == SYMBOL_REF);
11925 if (unspec != XEXP (addr.disp, 0))
11926 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
11927 if (addr.index)
11928 {
11929 rtx idx = addr.index;
11930 if (addr.scale != 1)
11931 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
11932 x = gen_rtx_PLUS (Pmode, idx, x);
11933 }
11934 if (addr.base)
11935 x = gen_rtx_PLUS (Pmode, addr.base, x);
11936 if (MEM_P (orig_x))
11937 x = replace_equiv_address_nv (orig_x, x);
11938 return x;
11939 }
11940
11941 /* In the name of slightly smaller debug output, and to cater to
11942 general assembler lossage, recognize PIC+GOTOFF and turn it back
11943 into a direct symbol reference.
11944
11945 On Darwin, this is necessary to avoid a crash, because Darwin
11946 has a different PIC label for each routine but the DWARF debugging
11947 information is not associated with any particular routine, so it's
11948 necessary to remove references to the PIC label from RTL stored by
11949 the DWARF output code.
11950
11951 This helper is used in the normal ix86_delegitimize_address
11952 entrypoint (e.g. used in the target delegitimization hook) and
11953 in ix86_find_base_term. As compile time memory optimization, we
11954 avoid allocating rtxes that will not change anything on the outcome
11955 of the callers (find_base_value and find_base_term). */
11956
11957 static inline rtx
ix86_delegitimize_address_1(rtx x,bool base_term_p)11958 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
11959 {
11960 rtx orig_x = delegitimize_mem_from_attrs (x);
11961 /* addend is NULL or some rtx if x is something+GOTOFF where
11962 something doesn't include the PIC register. */
11963 rtx addend = NULL_RTX;
11964 /* reg_addend is NULL or a multiple of some register. */
11965 rtx reg_addend = NULL_RTX;
11966 /* const_addend is NULL or a const_int. */
11967 rtx const_addend = NULL_RTX;
11968 /* This is the result, or NULL. */
11969 rtx result = NULL_RTX;
11970
11971 x = orig_x;
11972
11973 if (MEM_P (x))
11974 x = XEXP (x, 0);
11975
11976 if (TARGET_64BIT)
11977 {
11978 if (GET_CODE (x) == CONST
11979 && GET_CODE (XEXP (x, 0)) == PLUS
11980 && GET_MODE (XEXP (x, 0)) == Pmode
11981 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11982 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
11983 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
11984 {
11985 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
11986 base. A CONST can't be arg_pointer_rtx based. */
11987 if (base_term_p && MEM_P (orig_x))
11988 return orig_x;
11989 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
11990 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
11991 if (MEM_P (orig_x))
11992 x = replace_equiv_address_nv (orig_x, x);
11993 return x;
11994 }
11995
11996 if (GET_CODE (x) == CONST
11997 && GET_CODE (XEXP (x, 0)) == UNSPEC
11998 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
11999 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
12000 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
12001 {
12002 x = XVECEXP (XEXP (x, 0), 0, 0);
12003 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
12004 {
12005 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
12006 if (x == NULL_RTX)
12007 return orig_x;
12008 }
12009 return x;
12010 }
12011
12012 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
12013 return ix86_delegitimize_tls_address (orig_x);
12014
12015 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12016 and -mcmodel=medium -fpic. */
12017 }
12018
12019 if (GET_CODE (x) != PLUS
12020 || GET_CODE (XEXP (x, 1)) != CONST)
12021 return ix86_delegitimize_tls_address (orig_x);
12022
12023 if (ix86_pic_register_p (XEXP (x, 0)))
12024 /* %ebx + GOT/GOTOFF */
12025 ;
12026 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12027 {
12028 /* %ebx + %reg * scale + GOT/GOTOFF */
12029 reg_addend = XEXP (x, 0);
12030 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12031 reg_addend = XEXP (reg_addend, 1);
12032 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12033 reg_addend = XEXP (reg_addend, 0);
12034 else
12035 {
12036 reg_addend = NULL_RTX;
12037 addend = XEXP (x, 0);
12038 }
12039 }
12040 else
12041 addend = XEXP (x, 0);
12042
12043 x = XEXP (XEXP (x, 1), 0);
12044 if (GET_CODE (x) == PLUS
12045 && CONST_INT_P (XEXP (x, 1)))
12046 {
12047 const_addend = XEXP (x, 1);
12048 x = XEXP (x, 0);
12049 }
12050
12051 if (GET_CODE (x) == UNSPEC
12052 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12053 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
12054 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
12055 && !MEM_P (orig_x) && !addend)))
12056 result = XVECEXP (x, 0, 0);
12057
12058 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
12059 && !MEM_P (orig_x))
12060 result = XVECEXP (x, 0, 0);
12061
12062 if (! result)
12063 return ix86_delegitimize_tls_address (orig_x);
12064
12065 /* For (PLUS something CONST_INT) both find_base_{value,term} just
12066 recurse on the first operand. */
12067 if (const_addend && !base_term_p)
12068 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12069 if (reg_addend)
12070 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12071 if (addend)
12072 {
12073 /* If the rest of original X doesn't involve the PIC register, add
12074 addend and subtract pic_offset_table_rtx. This can happen e.g.
12075 for code like:
12076 leal (%ebx, %ecx, 4), %ecx
12077 ...
12078 movl foo@GOTOFF(%ecx), %edx
12079 in which case we return (%ecx - %ebx) + foo
12080 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12081 and reload has completed. Don't do the latter for debug,
12082 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
12083 if (pic_offset_table_rtx
12084 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
12085 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12086 pic_offset_table_rtx),
12087 result);
12088 else if (base_term_p
12089 && pic_offset_table_rtx
12090 && !TARGET_MACHO
12091 && !TARGET_VXWORKS_RTP)
12092 {
12093 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
12094 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
12095 result = gen_rtx_PLUS (Pmode, tmp, result);
12096 }
12097 else
12098 return orig_x;
12099 }
12100 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12101 {
12102 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
12103 if (result == NULL_RTX)
12104 return orig_x;
12105 }
12106 return result;
12107 }
12108
12109 /* The normal instantiation of the above template. */
12110
12111 static rtx
ix86_delegitimize_address(rtx x)12112 ix86_delegitimize_address (rtx x)
12113 {
12114 return ix86_delegitimize_address_1 (x, false);
12115 }
12116
12117 /* If X is a machine specific address (i.e. a symbol or label being
12118 referenced as a displacement from the GOT implemented using an
12119 UNSPEC), then return the base term. Otherwise return X. */
12120
12121 rtx
ix86_find_base_term(rtx x)12122 ix86_find_base_term (rtx x)
12123 {
12124 rtx term;
12125
12126 if (TARGET_64BIT)
12127 {
12128 if (GET_CODE (x) != CONST)
12129 return x;
12130 term = XEXP (x, 0);
12131 if (GET_CODE (term) == PLUS
12132 && CONST_INT_P (XEXP (term, 1)))
12133 term = XEXP (term, 0);
12134 if (GET_CODE (term) != UNSPEC
12135 || (XINT (term, 1) != UNSPEC_GOTPCREL
12136 && XINT (term, 1) != UNSPEC_PCREL))
12137 return x;
12138
12139 return XVECEXP (term, 0, 0);
12140 }
12141
12142 return ix86_delegitimize_address_1 (x, true);
12143 }
12144
12145 /* Return true if X shouldn't be emitted into the debug info.
12146 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12147 symbol easily into the .debug_info section, so we need not to
12148 delegitimize, but instead assemble as @gotoff.
12149 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12150 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
12151
12152 static bool
ix86_const_not_ok_for_debug_p(rtx x)12153 ix86_const_not_ok_for_debug_p (rtx x)
12154 {
12155 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
12156 return true;
12157
12158 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
12159 return true;
12160
12161 return false;
12162 }
12163
12164 static void
put_condition_code(enum rtx_code code,machine_mode mode,bool reverse,bool fp,FILE * file)12165 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
12166 bool fp, FILE *file)
12167 {
12168 const char *suffix;
12169
12170 if (mode == CCFPmode)
12171 {
12172 code = ix86_fp_compare_code_to_integer (code);
12173 mode = CCmode;
12174 }
12175 if (reverse)
12176 code = reverse_condition (code);
12177
12178 switch (code)
12179 {
12180 case EQ:
12181 gcc_assert (mode != CCGZmode);
12182 switch (mode)
12183 {
12184 case E_CCAmode:
12185 suffix = "a";
12186 break;
12187 case E_CCCmode:
12188 suffix = "c";
12189 break;
12190 case E_CCOmode:
12191 suffix = "o";
12192 break;
12193 case E_CCPmode:
12194 suffix = "p";
12195 break;
12196 case E_CCSmode:
12197 suffix = "s";
12198 break;
12199 default:
12200 suffix = "e";
12201 break;
12202 }
12203 break;
12204 case NE:
12205 gcc_assert (mode != CCGZmode);
12206 switch (mode)
12207 {
12208 case E_CCAmode:
12209 suffix = "na";
12210 break;
12211 case E_CCCmode:
12212 suffix = "nc";
12213 break;
12214 case E_CCOmode:
12215 suffix = "no";
12216 break;
12217 case E_CCPmode:
12218 suffix = "np";
12219 break;
12220 case E_CCSmode:
12221 suffix = "ns";
12222 break;
12223 default:
12224 suffix = "ne";
12225 break;
12226 }
12227 break;
12228 case GT:
12229 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12230 suffix = "g";
12231 break;
12232 case GTU:
12233 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12234 Those same assemblers have the same but opposite lossage on cmov. */
12235 if (mode == CCmode)
12236 suffix = fp ? "nbe" : "a";
12237 else
12238 gcc_unreachable ();
12239 break;
12240 case LT:
12241 switch (mode)
12242 {
12243 case E_CCNOmode:
12244 case E_CCGOCmode:
12245 suffix = "s";
12246 break;
12247
12248 case E_CCmode:
12249 case E_CCGCmode:
12250 case E_CCGZmode:
12251 suffix = "l";
12252 break;
12253
12254 default:
12255 gcc_unreachable ();
12256 }
12257 break;
12258 case LTU:
12259 if (mode == CCmode || mode == CCGZmode)
12260 suffix = "b";
12261 else if (mode == CCCmode)
12262 suffix = fp ? "b" : "c";
12263 else
12264 gcc_unreachable ();
12265 break;
12266 case GE:
12267 switch (mode)
12268 {
12269 case E_CCNOmode:
12270 case E_CCGOCmode:
12271 suffix = "ns";
12272 break;
12273
12274 case E_CCmode:
12275 case E_CCGCmode:
12276 case E_CCGZmode:
12277 suffix = "ge";
12278 break;
12279
12280 default:
12281 gcc_unreachable ();
12282 }
12283 break;
12284 case GEU:
12285 if (mode == CCmode || mode == CCGZmode)
12286 suffix = "nb";
12287 else if (mode == CCCmode)
12288 suffix = fp ? "nb" : "nc";
12289 else
12290 gcc_unreachable ();
12291 break;
12292 case LE:
12293 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12294 suffix = "le";
12295 break;
12296 case LEU:
12297 if (mode == CCmode)
12298 suffix = "be";
12299 else
12300 gcc_unreachable ();
12301 break;
12302 case UNORDERED:
12303 suffix = fp ? "u" : "p";
12304 break;
12305 case ORDERED:
12306 suffix = fp ? "nu" : "np";
12307 break;
12308 default:
12309 gcc_unreachable ();
12310 }
12311 fputs (suffix, file);
12312 }
12313
12314 /* Print the name of register X to FILE based on its machine mode and number.
12315 If CODE is 'w', pretend the mode is HImode.
12316 If CODE is 'b', pretend the mode is QImode.
12317 If CODE is 'k', pretend the mode is SImode.
12318 If CODE is 'q', pretend the mode is DImode.
12319 If CODE is 'x', pretend the mode is V4SFmode.
12320 If CODE is 't', pretend the mode is V8SFmode.
12321 If CODE is 'g', pretend the mode is V16SFmode.
12322 If CODE is 'h', pretend the reg is the 'high' byte register.
12323 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12324 If CODE is 'd', duplicate the operand for AVX instruction.
12325 If CODE is 'V', print naked full integer register name without %.
12326 */
12327
12328 void
print_reg(rtx x,int code,FILE * file)12329 print_reg (rtx x, int code, FILE *file)
12330 {
12331 const char *reg;
12332 int msize;
12333 unsigned int regno;
12334 bool duplicated;
12335
12336 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12337 putc ('%', file);
12338
12339 if (x == pc_rtx)
12340 {
12341 gcc_assert (TARGET_64BIT);
12342 fputs ("rip", file);
12343 return;
12344 }
12345
12346 if (code == 'y' && STACK_TOP_P (x))
12347 {
12348 fputs ("st(0)", file);
12349 return;
12350 }
12351
12352 if (code == 'w')
12353 msize = 2;
12354 else if (code == 'b')
12355 msize = 1;
12356 else if (code == 'k')
12357 msize = 4;
12358 else if (code == 'q')
12359 msize = 8;
12360 else if (code == 'h')
12361 msize = 0;
12362 else if (code == 'x')
12363 msize = 16;
12364 else if (code == 't')
12365 msize = 32;
12366 else if (code == 'g')
12367 msize = 64;
12368 else
12369 msize = GET_MODE_SIZE (GET_MODE (x));
12370
12371 regno = REGNO (x);
12372
12373 if (regno == ARG_POINTER_REGNUM
12374 || regno == FRAME_POINTER_REGNUM
12375 || regno == FPSR_REG)
12376 {
12377 output_operand_lossage
12378 ("invalid use of register '%s'", reg_names[regno]);
12379 return;
12380 }
12381 else if (regno == FLAGS_REG)
12382 {
12383 output_operand_lossage ("invalid use of asm flag output");
12384 return;
12385 }
12386
12387 if (code == 'V')
12388 {
12389 if (GENERAL_REGNO_P (regno))
12390 msize = GET_MODE_SIZE (word_mode);
12391 else
12392 error ("%<V%> modifier on non-integer register");
12393 }
12394
12395 duplicated = code == 'd' && TARGET_AVX;
12396
12397 switch (msize)
12398 {
12399 case 16:
12400 case 12:
12401 case 8:
12402 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
12403 warning (0, "unsupported size for integer register");
12404 /* FALLTHRU */
12405 case 4:
12406 if (LEGACY_INT_REGNO_P (regno))
12407 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
12408 /* FALLTHRU */
12409 case 2:
12410 normal:
12411 reg = hi_reg_name[regno];
12412 break;
12413 case 1:
12414 if (regno >= ARRAY_SIZE (qi_reg_name))
12415 goto normal;
12416 if (!ANY_QI_REGNO_P (regno))
12417 error ("unsupported size for integer register");
12418 reg = qi_reg_name[regno];
12419 break;
12420 case 0:
12421 if (regno >= ARRAY_SIZE (qi_high_reg_name))
12422 goto normal;
12423 reg = qi_high_reg_name[regno];
12424 break;
12425 case 32:
12426 case 64:
12427 if (SSE_REGNO_P (regno))
12428 {
12429 gcc_assert (!duplicated);
12430 putc (msize == 32 ? 'y' : 'z', file);
12431 reg = hi_reg_name[regno] + 1;
12432 break;
12433 }
12434 goto normal;
12435 default:
12436 gcc_unreachable ();
12437 }
12438
12439 fputs (reg, file);
12440
12441 /* Irritatingly, AMD extended registers use
12442 different naming convention: "r%d[bwd]" */
12443 if (REX_INT_REGNO_P (regno))
12444 {
12445 gcc_assert (TARGET_64BIT);
12446 switch (msize)
12447 {
12448 case 0:
12449 error ("extended registers have no high halves");
12450 break;
12451 case 1:
12452 putc ('b', file);
12453 break;
12454 case 2:
12455 putc ('w', file);
12456 break;
12457 case 4:
12458 putc ('d', file);
12459 break;
12460 case 8:
12461 /* no suffix */
12462 break;
12463 default:
12464 error ("unsupported operand size for extended register");
12465 break;
12466 }
12467 return;
12468 }
12469
12470 if (duplicated)
12471 {
12472 if (ASSEMBLER_DIALECT == ASM_ATT)
12473 fprintf (file, ", %%%s", reg);
12474 else
12475 fprintf (file, ", %s", reg);
12476 }
12477 }
12478
12479 /* Meaning of CODE:
12480 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12481 C -- print opcode suffix for set/cmov insn.
12482 c -- like C, but print reversed condition
12483 F,f -- likewise, but for floating-point.
12484 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12485 otherwise nothing
12486 R -- print embedded rounding and sae.
12487 r -- print only sae.
12488 z -- print the opcode suffix for the size of the current operand.
12489 Z -- likewise, with special suffixes for x87 instructions.
12490 * -- print a star (in certain assembler syntax)
12491 A -- print an absolute memory reference.
12492 E -- print address with DImode register names if TARGET_64BIT.
12493 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12494 s -- print a shift double count, followed by the assemblers argument
12495 delimiter.
12496 b -- print the QImode name of the register for the indicated operand.
12497 %b0 would print %al if operands[0] is reg 0.
12498 w -- likewise, print the HImode name of the register.
12499 k -- likewise, print the SImode name of the register.
12500 q -- likewise, print the DImode name of the register.
12501 x -- likewise, print the V4SFmode name of the register.
12502 t -- likewise, print the V8SFmode name of the register.
12503 g -- likewise, print the V16SFmode name of the register.
12504 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12505 y -- print "st(0)" instead of "st" as a register.
12506 d -- print duplicated register operand for AVX instruction.
12507 D -- print condition for SSE cmp instruction.
12508 P -- if PIC, print an @PLT suffix.
12509 p -- print raw symbol name.
12510 X -- don't print any sort of PIC '@' suffix for a symbol.
12511 & -- print some in-use local-dynamic symbol name.
12512 H -- print a memory address offset by 8; used for sse high-parts
12513 Y -- print condition for XOP pcom* instruction.
12514 V -- print naked full integer register name without %.
12515 + -- print a branch hint as 'cs' or 'ds' prefix
12516 ; -- print a semicolon (after prefixes due to bug in older gas).
12517 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12518 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12519 M -- print addr32 prefix for TARGET_X32 with VSIB address.
12520 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12521 */
12522
12523 void
ix86_print_operand(FILE * file,rtx x,int code)12524 ix86_print_operand (FILE *file, rtx x, int code)
12525 {
12526 if (code)
12527 {
12528 switch (code)
12529 {
12530 case 'A':
12531 switch (ASSEMBLER_DIALECT)
12532 {
12533 case ASM_ATT:
12534 putc ('*', file);
12535 break;
12536
12537 case ASM_INTEL:
12538 /* Intel syntax. For absolute addresses, registers should not
12539 be surrounded by braces. */
12540 if (!REG_P (x))
12541 {
12542 putc ('[', file);
12543 ix86_print_operand (file, x, 0);
12544 putc (']', file);
12545 return;
12546 }
12547 break;
12548
12549 default:
12550 gcc_unreachable ();
12551 }
12552
12553 ix86_print_operand (file, x, 0);
12554 return;
12555
12556 case 'E':
12557 /* Wrap address in an UNSPEC to declare special handling. */
12558 if (TARGET_64BIT)
12559 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
12560
12561 output_address (VOIDmode, x);
12562 return;
12563
12564 case 'L':
12565 if (ASSEMBLER_DIALECT == ASM_ATT)
12566 putc ('l', file);
12567 return;
12568
12569 case 'W':
12570 if (ASSEMBLER_DIALECT == ASM_ATT)
12571 putc ('w', file);
12572 return;
12573
12574 case 'B':
12575 if (ASSEMBLER_DIALECT == ASM_ATT)
12576 putc ('b', file);
12577 return;
12578
12579 case 'Q':
12580 if (ASSEMBLER_DIALECT == ASM_ATT)
12581 putc ('l', file);
12582 return;
12583
12584 case 'S':
12585 if (ASSEMBLER_DIALECT == ASM_ATT)
12586 putc ('s', file);
12587 return;
12588
12589 case 'T':
12590 if (ASSEMBLER_DIALECT == ASM_ATT)
12591 putc ('t', file);
12592 return;
12593
12594 case 'O':
12595 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12596 if (ASSEMBLER_DIALECT != ASM_ATT)
12597 return;
12598
12599 switch (GET_MODE_SIZE (GET_MODE (x)))
12600 {
12601 case 2:
12602 putc ('w', file);
12603 break;
12604
12605 case 4:
12606 putc ('l', file);
12607 break;
12608
12609 case 8:
12610 putc ('q', file);
12611 break;
12612
12613 default:
12614 output_operand_lossage ("invalid operand size for operand "
12615 "code 'O'");
12616 return;
12617 }
12618
12619 putc ('.', file);
12620 #endif
12621 return;
12622
12623 case 'z':
12624 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12625 {
12626 /* Opcodes don't get size suffixes if using Intel opcodes. */
12627 if (ASSEMBLER_DIALECT == ASM_INTEL)
12628 return;
12629
12630 switch (GET_MODE_SIZE (GET_MODE (x)))
12631 {
12632 case 1:
12633 putc ('b', file);
12634 return;
12635
12636 case 2:
12637 putc ('w', file);
12638 return;
12639
12640 case 4:
12641 putc ('l', file);
12642 return;
12643
12644 case 8:
12645 putc ('q', file);
12646 return;
12647
12648 default:
12649 output_operand_lossage ("invalid operand size for operand "
12650 "code 'z'");
12651 return;
12652 }
12653 }
12654
12655 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12656 warning (0, "non-integer operand used with operand code %<z%>");
12657 /* FALLTHRU */
12658
12659 case 'Z':
12660 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12661 if (ASSEMBLER_DIALECT == ASM_INTEL)
12662 return;
12663
12664 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12665 {
12666 switch (GET_MODE_SIZE (GET_MODE (x)))
12667 {
12668 case 2:
12669 #ifdef HAVE_AS_IX86_FILDS
12670 putc ('s', file);
12671 #endif
12672 return;
12673
12674 case 4:
12675 putc ('l', file);
12676 return;
12677
12678 case 8:
12679 #ifdef HAVE_AS_IX86_FILDQ
12680 putc ('q', file);
12681 #else
12682 fputs ("ll", file);
12683 #endif
12684 return;
12685
12686 default:
12687 break;
12688 }
12689 }
12690 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12691 {
12692 /* 387 opcodes don't get size suffixes
12693 if the operands are registers. */
12694 if (STACK_REG_P (x))
12695 return;
12696
12697 switch (GET_MODE_SIZE (GET_MODE (x)))
12698 {
12699 case 4:
12700 putc ('s', file);
12701 return;
12702
12703 case 8:
12704 putc ('l', file);
12705 return;
12706
12707 case 12:
12708 case 16:
12709 putc ('t', file);
12710 return;
12711
12712 default:
12713 break;
12714 }
12715 }
12716 else
12717 {
12718 output_operand_lossage ("invalid operand type used with "
12719 "operand code 'Z'");
12720 return;
12721 }
12722
12723 output_operand_lossage ("invalid operand size for operand code 'Z'");
12724 return;
12725
12726 case 'd':
12727 case 'b':
12728 case 'w':
12729 case 'k':
12730 case 'q':
12731 case 'h':
12732 case 't':
12733 case 'g':
12734 case 'y':
12735 case 'x':
12736 case 'X':
12737 case 'P':
12738 case 'p':
12739 case 'V':
12740 break;
12741
12742 case 's':
12743 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12744 {
12745 ix86_print_operand (file, x, 0);
12746 fputs (", ", file);
12747 }
12748 return;
12749
12750 case 'Y':
12751 switch (GET_CODE (x))
12752 {
12753 case NE:
12754 fputs ("neq", file);
12755 break;
12756 case EQ:
12757 fputs ("eq", file);
12758 break;
12759 case GE:
12760 case GEU:
12761 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12762 break;
12763 case GT:
12764 case GTU:
12765 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12766 break;
12767 case LE:
12768 case LEU:
12769 fputs ("le", file);
12770 break;
12771 case LT:
12772 case LTU:
12773 fputs ("lt", file);
12774 break;
12775 case UNORDERED:
12776 fputs ("unord", file);
12777 break;
12778 case ORDERED:
12779 fputs ("ord", file);
12780 break;
12781 case UNEQ:
12782 fputs ("ueq", file);
12783 break;
12784 case UNGE:
12785 fputs ("nlt", file);
12786 break;
12787 case UNGT:
12788 fputs ("nle", file);
12789 break;
12790 case UNLE:
12791 fputs ("ule", file);
12792 break;
12793 case UNLT:
12794 fputs ("ult", file);
12795 break;
12796 case LTGT:
12797 fputs ("une", file);
12798 break;
12799 default:
12800 output_operand_lossage ("operand is not a condition code, "
12801 "invalid operand code 'Y'");
12802 return;
12803 }
12804 return;
12805
12806 case 'D':
12807 /* Little bit of braindamage here. The SSE compare instructions
12808 does use completely different names for the comparisons that the
12809 fp conditional moves. */
12810 switch (GET_CODE (x))
12811 {
12812 case UNEQ:
12813 if (TARGET_AVX)
12814 {
12815 fputs ("eq_us", file);
12816 break;
12817 }
12818 /* FALLTHRU */
12819 case EQ:
12820 fputs ("eq", file);
12821 break;
12822 case UNLT:
12823 if (TARGET_AVX)
12824 {
12825 fputs ("nge", file);
12826 break;
12827 }
12828 /* FALLTHRU */
12829 case LT:
12830 fputs ("lt", file);
12831 break;
12832 case UNLE:
12833 if (TARGET_AVX)
12834 {
12835 fputs ("ngt", file);
12836 break;
12837 }
12838 /* FALLTHRU */
12839 case LE:
12840 fputs ("le", file);
12841 break;
12842 case UNORDERED:
12843 fputs ("unord", file);
12844 break;
12845 case LTGT:
12846 if (TARGET_AVX)
12847 {
12848 fputs ("neq_oq", file);
12849 break;
12850 }
12851 /* FALLTHRU */
12852 case NE:
12853 fputs ("neq", file);
12854 break;
12855 case GE:
12856 if (TARGET_AVX)
12857 {
12858 fputs ("ge", file);
12859 break;
12860 }
12861 /* FALLTHRU */
12862 case UNGE:
12863 fputs ("nlt", file);
12864 break;
12865 case GT:
12866 if (TARGET_AVX)
12867 {
12868 fputs ("gt", file);
12869 break;
12870 }
12871 /* FALLTHRU */
12872 case UNGT:
12873 fputs ("nle", file);
12874 break;
12875 case ORDERED:
12876 fputs ("ord", file);
12877 break;
12878 default:
12879 output_operand_lossage ("operand is not a condition code, "
12880 "invalid operand code 'D'");
12881 return;
12882 }
12883 return;
12884
12885 case 'F':
12886 case 'f':
12887 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12888 if (ASSEMBLER_DIALECT == ASM_ATT)
12889 putc ('.', file);
12890 gcc_fallthrough ();
12891 #endif
12892
12893 case 'C':
12894 case 'c':
12895 if (!COMPARISON_P (x))
12896 {
12897 output_operand_lossage ("operand is not a condition code, "
12898 "invalid operand code '%c'", code);
12899 return;
12900 }
12901 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
12902 code == 'c' || code == 'f',
12903 code == 'F' || code == 'f',
12904 file);
12905 return;
12906
12907 case 'H':
12908 if (!offsettable_memref_p (x))
12909 {
12910 output_operand_lossage ("operand is not an offsettable memory "
12911 "reference, invalid operand code 'H'");
12912 return;
12913 }
12914 /* It doesn't actually matter what mode we use here, as we're
12915 only going to use this for printing. */
12916 x = adjust_address_nv (x, DImode, 8);
12917 /* Output 'qword ptr' for intel assembler dialect. */
12918 if (ASSEMBLER_DIALECT == ASM_INTEL)
12919 code = 'q';
12920 break;
12921
12922 case 'K':
12923 if (!CONST_INT_P (x))
12924 {
12925 output_operand_lossage ("operand is not an integer, invalid "
12926 "operand code 'K'");
12927 return;
12928 }
12929
12930 if (INTVAL (x) & IX86_HLE_ACQUIRE)
12931 #ifdef HAVE_AS_IX86_HLE
12932 fputs ("xacquire ", file);
12933 #else
12934 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
12935 #endif
12936 else if (INTVAL (x) & IX86_HLE_RELEASE)
12937 #ifdef HAVE_AS_IX86_HLE
12938 fputs ("xrelease ", file);
12939 #else
12940 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
12941 #endif
12942 /* We do not want to print value of the operand. */
12943 return;
12944
12945 case 'N':
12946 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
12947 fputs ("{z}", file);
12948 return;
12949
12950 case 'r':
12951 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
12952 {
12953 output_operand_lossage ("operand is not a specific integer, "
12954 "invalid operand code 'r'");
12955 return;
12956 }
12957
12958 if (ASSEMBLER_DIALECT == ASM_INTEL)
12959 fputs (", ", file);
12960
12961 fputs ("{sae}", file);
12962
12963 if (ASSEMBLER_DIALECT == ASM_ATT)
12964 fputs (", ", file);
12965
12966 return;
12967
12968 case 'R':
12969 if (!CONST_INT_P (x))
12970 {
12971 output_operand_lossage ("operand is not an integer, invalid "
12972 "operand code 'R'");
12973 return;
12974 }
12975
12976 if (ASSEMBLER_DIALECT == ASM_INTEL)
12977 fputs (", ", file);
12978
12979 switch (INTVAL (x))
12980 {
12981 case ROUND_NEAREST_INT | ROUND_SAE:
12982 fputs ("{rn-sae}", file);
12983 break;
12984 case ROUND_NEG_INF | ROUND_SAE:
12985 fputs ("{rd-sae}", file);
12986 break;
12987 case ROUND_POS_INF | ROUND_SAE:
12988 fputs ("{ru-sae}", file);
12989 break;
12990 case ROUND_ZERO | ROUND_SAE:
12991 fputs ("{rz-sae}", file);
12992 break;
12993 default:
12994 output_operand_lossage ("operand is not a specific integer, "
12995 "invalid operand code 'R'");
12996 }
12997
12998 if (ASSEMBLER_DIALECT == ASM_ATT)
12999 fputs (", ", file);
13000
13001 return;
13002
13003 case '*':
13004 if (ASSEMBLER_DIALECT == ASM_ATT)
13005 putc ('*', file);
13006 return;
13007
13008 case '&':
13009 {
13010 const char *name = get_some_local_dynamic_name ();
13011 if (name == NULL)
13012 output_operand_lossage ("'%%&' used without any "
13013 "local dynamic TLS references");
13014 else
13015 assemble_name (file, name);
13016 return;
13017 }
13018
13019 case '+':
13020 {
13021 rtx x;
13022
13023 if (!optimize
13024 || optimize_function_for_size_p (cfun)
13025 || !TARGET_BRANCH_PREDICTION_HINTS)
13026 return;
13027
13028 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13029 if (x)
13030 {
13031 int pred_val = profile_probability::from_reg_br_prob_note
13032 (XINT (x, 0)).to_reg_br_prob_base ();
13033
13034 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13035 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13036 {
13037 bool taken = pred_val > REG_BR_PROB_BASE / 2;
13038 bool cputaken
13039 = final_forward_branch_p (current_output_insn) == 0;
13040
13041 /* Emit hints only in the case default branch prediction
13042 heuristics would fail. */
13043 if (taken != cputaken)
13044 {
13045 /* We use 3e (DS) prefix for taken branches and
13046 2e (CS) prefix for not taken branches. */
13047 if (taken)
13048 fputs ("ds ; ", file);
13049 else
13050 fputs ("cs ; ", file);
13051 }
13052 }
13053 }
13054 return;
13055 }
13056
13057 case ';':
13058 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13059 putc (';', file);
13060 #endif
13061 return;
13062
13063 case '~':
13064 putc (TARGET_AVX2 ? 'i' : 'f', file);
13065 return;
13066
13067 case 'M':
13068 if (TARGET_X32)
13069 {
13070 /* NB: 32-bit indices in VSIB address are sign-extended
13071 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13072 sign-extended to 0xfffffffff7fa3010 which is invalid
13073 address. Add addr32 prefix if there is no base
13074 register nor symbol. */
13075 bool ok;
13076 struct ix86_address parts;
13077 ok = ix86_decompose_address (x, &parts);
13078 gcc_assert (ok && parts.index == NULL_RTX);
13079 if (parts.base == NULL_RTX
13080 && (parts.disp == NULL_RTX
13081 || !symbolic_operand (parts.disp,
13082 GET_MODE (parts.disp))))
13083 fputs ("addr32 ", file);
13084 }
13085 return;
13086
13087 case '^':
13088 if (TARGET_64BIT && Pmode != word_mode)
13089 fputs ("addr32 ", file);
13090 return;
13091
13092 case '!':
13093 if (ix86_notrack_prefixed_insn_p (current_output_insn))
13094 fputs ("notrack ", file);
13095 return;
13096
13097 default:
13098 output_operand_lossage ("invalid operand code '%c'", code);
13099 }
13100 }
13101
13102 if (REG_P (x))
13103 print_reg (x, code, file);
13104
13105 else if (MEM_P (x))
13106 {
13107 rtx addr = XEXP (x, 0);
13108
13109 /* No `byte ptr' prefix for call instructions ... */
13110 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
13111 {
13112 machine_mode mode = GET_MODE (x);
13113 const char *size;
13114
13115 /* Check for explicit size override codes. */
13116 if (code == 'b')
13117 size = "BYTE";
13118 else if (code == 'w')
13119 size = "WORD";
13120 else if (code == 'k')
13121 size = "DWORD";
13122 else if (code == 'q')
13123 size = "QWORD";
13124 else if (code == 'x')
13125 size = "XMMWORD";
13126 else if (code == 't')
13127 size = "YMMWORD";
13128 else if (code == 'g')
13129 size = "ZMMWORD";
13130 else if (mode == BLKmode)
13131 /* ... or BLKmode operands, when not overridden. */
13132 size = NULL;
13133 else
13134 switch (GET_MODE_SIZE (mode))
13135 {
13136 case 1: size = "BYTE"; break;
13137 case 2: size = "WORD"; break;
13138 case 4: size = "DWORD"; break;
13139 case 8: size = "QWORD"; break;
13140 case 12: size = "TBYTE"; break;
13141 case 16:
13142 if (mode == XFmode)
13143 size = "TBYTE";
13144 else
13145 size = "XMMWORD";
13146 break;
13147 case 32: size = "YMMWORD"; break;
13148 case 64: size = "ZMMWORD"; break;
13149 default:
13150 gcc_unreachable ();
13151 }
13152 if (size)
13153 {
13154 fputs (size, file);
13155 fputs (" PTR ", file);
13156 }
13157 }
13158
13159 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13160 output_operand_lossage ("invalid constraints for operand");
13161 else
13162 ix86_print_operand_address_as
13163 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
13164 }
13165
13166 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
13167 {
13168 long l;
13169
13170 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13171
13172 if (ASSEMBLER_DIALECT == ASM_ATT)
13173 putc ('$', file);
13174 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13175 if (code == 'q')
13176 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
13177 (unsigned long long) (int) l);
13178 else
13179 fprintf (file, "0x%08x", (unsigned int) l);
13180 }
13181
13182 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
13183 {
13184 long l[2];
13185
13186 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13187
13188 if (ASSEMBLER_DIALECT == ASM_ATT)
13189 putc ('$', file);
13190 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13191 }
13192
13193 /* These float cases don't actually occur as immediate operands. */
13194 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
13195 {
13196 char dstr[30];
13197
13198 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13199 fputs (dstr, file);
13200 }
13201
13202 else
13203 {
13204 /* We have patterns that allow zero sets of memory, for instance.
13205 In 64-bit mode, we should probably support all 8-byte vectors,
13206 since we can in fact encode that into an immediate. */
13207 if (GET_CODE (x) == CONST_VECTOR)
13208 {
13209 if (x != CONST0_RTX (GET_MODE (x)))
13210 output_operand_lossage ("invalid vector immediate");
13211 x = const0_rtx;
13212 }
13213
13214 if (code != 'P' && code != 'p')
13215 {
13216 if (CONST_INT_P (x))
13217 {
13218 if (ASSEMBLER_DIALECT == ASM_ATT)
13219 putc ('$', file);
13220 }
13221 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13222 || GET_CODE (x) == LABEL_REF)
13223 {
13224 if (ASSEMBLER_DIALECT == ASM_ATT)
13225 putc ('$', file);
13226 else
13227 fputs ("OFFSET FLAT:", file);
13228 }
13229 }
13230 if (CONST_INT_P (x))
13231 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13232 else if (flag_pic || MACHOPIC_INDIRECT)
13233 output_pic_addr_const (file, x, code);
13234 else
13235 output_addr_const (file, x);
13236 }
13237 }
13238
13239 static bool
ix86_print_operand_punct_valid_p(unsigned char code)13240 ix86_print_operand_punct_valid_p (unsigned char code)
13241 {
13242 return (code == '*' || code == '+' || code == '&' || code == ';'
13243 || code == '~' || code == '^' || code == '!');
13244 }
13245
13246 /* Print a memory operand whose address is ADDR. */
13247
13248 static void
ix86_print_operand_address_as(FILE * file,rtx addr,addr_space_t as,bool no_rip)13249 ix86_print_operand_address_as (FILE *file, rtx addr,
13250 addr_space_t as, bool no_rip)
13251 {
13252 struct ix86_address parts;
13253 rtx base, index, disp;
13254 int scale;
13255 int ok;
13256 bool vsib = false;
13257 int code = 0;
13258
13259 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
13260 {
13261 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13262 gcc_assert (parts.index == NULL_RTX);
13263 parts.index = XVECEXP (addr, 0, 1);
13264 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
13265 addr = XVECEXP (addr, 0, 0);
13266 vsib = true;
13267 }
13268 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
13269 {
13270 gcc_assert (TARGET_64BIT);
13271 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13272 code = 'q';
13273 }
13274 else
13275 ok = ix86_decompose_address (addr, &parts);
13276
13277 gcc_assert (ok);
13278
13279 base = parts.base;
13280 index = parts.index;
13281 disp = parts.disp;
13282 scale = parts.scale;
13283
13284 if (ADDR_SPACE_GENERIC_P (as))
13285 as = parts.seg;
13286 else
13287 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
13288
13289 if (!ADDR_SPACE_GENERIC_P (as))
13290 {
13291 if (ASSEMBLER_DIALECT == ASM_ATT)
13292 putc ('%', file);
13293
13294 switch (as)
13295 {
13296 case ADDR_SPACE_SEG_FS:
13297 fputs ("fs:", file);
13298 break;
13299 case ADDR_SPACE_SEG_GS:
13300 fputs ("gs:", file);
13301 break;
13302 default:
13303 gcc_unreachable ();
13304 }
13305 }
13306
13307 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13308 if (TARGET_64BIT && !base && !index && !no_rip)
13309 {
13310 rtx symbol = disp;
13311
13312 if (GET_CODE (disp) == CONST
13313 && GET_CODE (XEXP (disp, 0)) == PLUS
13314 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13315 symbol = XEXP (XEXP (disp, 0), 0);
13316
13317 if (GET_CODE (symbol) == LABEL_REF
13318 || (GET_CODE (symbol) == SYMBOL_REF
13319 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13320 base = pc_rtx;
13321 }
13322
13323 if (!base && !index)
13324 {
13325 /* Displacement only requires special attention. */
13326 if (CONST_INT_P (disp))
13327 {
13328 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
13329 fputs ("ds:", file);
13330 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13331 }
13332 /* Load the external function address via the GOT slot to avoid PLT. */
13333 else if (GET_CODE (disp) == CONST
13334 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13335 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
13336 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
13337 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
13338 output_pic_addr_const (file, disp, 0);
13339 else if (flag_pic)
13340 output_pic_addr_const (file, disp, 0);
13341 else
13342 output_addr_const (file, disp);
13343 }
13344 else
13345 {
13346 /* Print SImode register names to force addr32 prefix. */
13347 if (SImode_address_operand (addr, VOIDmode))
13348 {
13349 if (flag_checking)
13350 {
13351 gcc_assert (TARGET_64BIT);
13352 switch (GET_CODE (addr))
13353 {
13354 case SUBREG:
13355 gcc_assert (GET_MODE (addr) == SImode);
13356 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
13357 break;
13358 case ZERO_EXTEND:
13359 case AND:
13360 gcc_assert (GET_MODE (addr) == DImode);
13361 break;
13362 default:
13363 gcc_unreachable ();
13364 }
13365 }
13366 gcc_assert (!code);
13367 code = 'k';
13368 }
13369 else if (code == 0
13370 && TARGET_X32
13371 && disp
13372 && CONST_INT_P (disp)
13373 && INTVAL (disp) < -16*1024*1024)
13374 {
13375 /* X32 runs in 64-bit mode, where displacement, DISP, in
13376 address DISP(%r64), is encoded as 32-bit immediate sign-
13377 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13378 address is %r64 + 0xffffffffbffffd00. When %r64 <
13379 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13380 which is invalid for x32. The correct address is %r64
13381 - 0x40000300 == 0xf7ffdd64. To properly encode
13382 -0x40000300(%r64) for x32, we zero-extend negative
13383 displacement by forcing addr32 prefix which truncates
13384 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13385 zero-extend all negative displacements, including -1(%rsp).
13386 However, for small negative displacements, sign-extension
13387 won't cause overflow. We only zero-extend negative
13388 displacements if they < -16*1024*1024, which is also used
13389 to check legitimate address displacements for PIC. */
13390 code = 'k';
13391 }
13392
13393 /* Since the upper 32 bits of RSP are always zero for x32,
13394 we can encode %esp as %rsp to avoid 0x67 prefix if
13395 there is no index register. */
13396 if (TARGET_X32 && Pmode == SImode
13397 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
13398 code = 'q';
13399
13400 if (ASSEMBLER_DIALECT == ASM_ATT)
13401 {
13402 if (disp)
13403 {
13404 if (flag_pic)
13405 output_pic_addr_const (file, disp, 0);
13406 else if (GET_CODE (disp) == LABEL_REF)
13407 output_asm_label (disp);
13408 else
13409 output_addr_const (file, disp);
13410 }
13411
13412 putc ('(', file);
13413 if (base)
13414 print_reg (base, code, file);
13415 if (index)
13416 {
13417 putc (',', file);
13418 print_reg (index, vsib ? 0 : code, file);
13419 if (scale != 1 || vsib)
13420 fprintf (file, ",%d", scale);
13421 }
13422 putc (')', file);
13423 }
13424 else
13425 {
13426 rtx offset = NULL_RTX;
13427
13428 if (disp)
13429 {
13430 /* Pull out the offset of a symbol; print any symbol itself. */
13431 if (GET_CODE (disp) == CONST
13432 && GET_CODE (XEXP (disp, 0)) == PLUS
13433 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13434 {
13435 offset = XEXP (XEXP (disp, 0), 1);
13436 disp = gen_rtx_CONST (VOIDmode,
13437 XEXP (XEXP (disp, 0), 0));
13438 }
13439
13440 if (flag_pic)
13441 output_pic_addr_const (file, disp, 0);
13442 else if (GET_CODE (disp) == LABEL_REF)
13443 output_asm_label (disp);
13444 else if (CONST_INT_P (disp))
13445 offset = disp;
13446 else
13447 output_addr_const (file, disp);
13448 }
13449
13450 putc ('[', file);
13451 if (base)
13452 {
13453 print_reg (base, code, file);
13454 if (offset)
13455 {
13456 if (INTVAL (offset) >= 0)
13457 putc ('+', file);
13458 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13459 }
13460 }
13461 else if (offset)
13462 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13463 else
13464 putc ('0', file);
13465
13466 if (index)
13467 {
13468 putc ('+', file);
13469 print_reg (index, vsib ? 0 : code, file);
13470 if (scale != 1 || vsib)
13471 fprintf (file, "*%d", scale);
13472 }
13473 putc (']', file);
13474 }
13475 }
13476 }
13477
13478 static void
ix86_print_operand_address(FILE * file,machine_mode,rtx addr)13479 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
13480 {
13481 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13482 output_operand_lossage ("invalid constraints for operand");
13483 else
13484 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
13485 }
13486
13487 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13488
13489 static bool
i386_asm_output_addr_const_extra(FILE * file,rtx x)13490 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13491 {
13492 rtx op;
13493
13494 if (GET_CODE (x) != UNSPEC)
13495 return false;
13496
13497 op = XVECEXP (x, 0, 0);
13498 switch (XINT (x, 1))
13499 {
13500 case UNSPEC_GOTOFF:
13501 output_addr_const (file, op);
13502 fputs ("@gotoff", file);
13503 break;
13504 case UNSPEC_GOTTPOFF:
13505 output_addr_const (file, op);
13506 /* FIXME: This might be @TPOFF in Sun ld. */
13507 fputs ("@gottpoff", file);
13508 break;
13509 case UNSPEC_TPOFF:
13510 output_addr_const (file, op);
13511 fputs ("@tpoff", file);
13512 break;
13513 case UNSPEC_NTPOFF:
13514 output_addr_const (file, op);
13515 if (TARGET_64BIT)
13516 fputs ("@tpoff", file);
13517 else
13518 fputs ("@ntpoff", file);
13519 break;
13520 case UNSPEC_DTPOFF:
13521 output_addr_const (file, op);
13522 fputs ("@dtpoff", file);
13523 break;
13524 case UNSPEC_GOTNTPOFF:
13525 output_addr_const (file, op);
13526 if (TARGET_64BIT)
13527 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13528 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13529 else
13530 fputs ("@gotntpoff", file);
13531 break;
13532 case UNSPEC_INDNTPOFF:
13533 output_addr_const (file, op);
13534 fputs ("@indntpoff", file);
13535 break;
13536 #if TARGET_MACHO
13537 case UNSPEC_MACHOPIC_OFFSET:
13538 output_addr_const (file, op);
13539 putc ('-', file);
13540 machopic_output_function_base_name (file);
13541 break;
13542 #endif
13543
13544 default:
13545 return false;
13546 }
13547
13548 return true;
13549 }
13550
13551
13552 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13553 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13554 is the expression of the binary operation. The output may either be
13555 emitted here, or returned to the caller, like all output_* functions.
13556
13557 There is no guarantee that the operands are the same mode, as they
13558 might be within FLOAT or FLOAT_EXTEND expressions. */
13559
13560 #ifndef SYSV386_COMPAT
13561 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13562 wants to fix the assemblers because that causes incompatibility
13563 with gcc. No-one wants to fix gcc because that causes
13564 incompatibility with assemblers... You can use the option of
13565 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13566 #define SYSV386_COMPAT 1
13567 #endif
13568
13569 const char *
output_387_binary_op(rtx_insn * insn,rtx * operands)13570 output_387_binary_op (rtx_insn *insn, rtx *operands)
13571 {
13572 static char buf[40];
13573 const char *p;
13574 bool is_sse
13575 = (SSE_REG_P (operands[0])
13576 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
13577
13578 if (is_sse)
13579 p = "%v";
13580 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13581 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13582 p = "fi";
13583 else
13584 p = "f";
13585
13586 strcpy (buf, p);
13587
13588 switch (GET_CODE (operands[3]))
13589 {
13590 case PLUS:
13591 p = "add"; break;
13592 case MINUS:
13593 p = "sub"; break;
13594 case MULT:
13595 p = "mul"; break;
13596 case DIV:
13597 p = "div"; break;
13598 default:
13599 gcc_unreachable ();
13600 }
13601
13602 strcat (buf, p);
13603
13604 if (is_sse)
13605 {
13606 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
13607 strcat (buf, p);
13608
13609 if (TARGET_AVX)
13610 p = "\t{%2, %1, %0|%0, %1, %2}";
13611 else
13612 p = "\t{%2, %0|%0, %2}";
13613
13614 strcat (buf, p);
13615 return buf;
13616 }
13617
13618 /* Even if we do not want to check the inputs, this documents input
13619 constraints. Which helps in understanding the following code. */
13620 if (flag_checking)
13621 {
13622 if (STACK_REG_P (operands[0])
13623 && ((REG_P (operands[1])
13624 && REGNO (operands[0]) == REGNO (operands[1])
13625 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13626 || (REG_P (operands[2])
13627 && REGNO (operands[0]) == REGNO (operands[2])
13628 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13629 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13630 ; /* ok */
13631 else
13632 gcc_unreachable ();
13633 }
13634
13635 switch (GET_CODE (operands[3]))
13636 {
13637 case MULT:
13638 case PLUS:
13639 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13640 std::swap (operands[1], operands[2]);
13641
13642 /* know operands[0] == operands[1]. */
13643
13644 if (MEM_P (operands[2]))
13645 {
13646 p = "%Z2\t%2";
13647 break;
13648 }
13649
13650 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13651 {
13652 if (STACK_TOP_P (operands[0]))
13653 /* How is it that we are storing to a dead operand[2]?
13654 Well, presumably operands[1] is dead too. We can't
13655 store the result to st(0) as st(0) gets popped on this
13656 instruction. Instead store to operands[2] (which I
13657 think has to be st(1)). st(1) will be popped later.
13658 gcc <= 2.8.1 didn't have this check and generated
13659 assembly code that the Unixware assembler rejected. */
13660 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13661 else
13662 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13663 break;
13664 }
13665
13666 if (STACK_TOP_P (operands[0]))
13667 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13668 else
13669 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13670 break;
13671
13672 case MINUS:
13673 case DIV:
13674 if (MEM_P (operands[1]))
13675 {
13676 p = "r%Z1\t%1";
13677 break;
13678 }
13679
13680 if (MEM_P (operands[2]))
13681 {
13682 p = "%Z2\t%2";
13683 break;
13684 }
13685
13686 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13687 {
13688 #if SYSV386_COMPAT
13689 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13690 derived assemblers, confusingly reverse the direction of
13691 the operation for fsub{r} and fdiv{r} when the
13692 destination register is not st(0). The Intel assembler
13693 doesn't have this brain damage. Read !SYSV386_COMPAT to
13694 figure out what the hardware really does. */
13695 if (STACK_TOP_P (operands[0]))
13696 p = "{p\t%0, %2|rp\t%2, %0}";
13697 else
13698 p = "{rp\t%2, %0|p\t%0, %2}";
13699 #else
13700 if (STACK_TOP_P (operands[0]))
13701 /* As above for fmul/fadd, we can't store to st(0). */
13702 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13703 else
13704 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13705 #endif
13706 break;
13707 }
13708
13709 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13710 {
13711 #if SYSV386_COMPAT
13712 if (STACK_TOP_P (operands[0]))
13713 p = "{rp\t%0, %1|p\t%1, %0}";
13714 else
13715 p = "{p\t%1, %0|rp\t%0, %1}";
13716 #else
13717 if (STACK_TOP_P (operands[0]))
13718 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13719 else
13720 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13721 #endif
13722 break;
13723 }
13724
13725 if (STACK_TOP_P (operands[0]))
13726 {
13727 if (STACK_TOP_P (operands[1]))
13728 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13729 else
13730 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13731 break;
13732 }
13733 else if (STACK_TOP_P (operands[1]))
13734 {
13735 #if SYSV386_COMPAT
13736 p = "{\t%1, %0|r\t%0, %1}";
13737 #else
13738 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13739 #endif
13740 }
13741 else
13742 {
13743 #if SYSV386_COMPAT
13744 p = "{r\t%2, %0|\t%0, %2}";
13745 #else
13746 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13747 #endif
13748 }
13749 break;
13750
13751 default:
13752 gcc_unreachable ();
13753 }
13754
13755 strcat (buf, p);
13756 return buf;
13757 }
13758
13759 /* Return needed mode for entity in optimize_mode_switching pass. */
13760
13761 static int
ix86_dirflag_mode_needed(rtx_insn * insn)13762 ix86_dirflag_mode_needed (rtx_insn *insn)
13763 {
13764 if (CALL_P (insn))
13765 {
13766 if (cfun->machine->func_type == TYPE_NORMAL)
13767 return X86_DIRFLAG_ANY;
13768 else
13769 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
13770 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
13771 }
13772
13773 if (recog_memoized (insn) < 0)
13774 return X86_DIRFLAG_ANY;
13775
13776 if (get_attr_type (insn) == TYPE_STR)
13777 {
13778 /* Emit cld instruction if stringops are used in the function. */
13779 if (cfun->machine->func_type == TYPE_NORMAL)
13780 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
13781 else
13782 return X86_DIRFLAG_RESET;
13783 }
13784
13785 return X86_DIRFLAG_ANY;
13786 }
13787
13788 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
13789
13790 static bool
ix86_check_avx_upper_register(const_rtx exp)13791 ix86_check_avx_upper_register (const_rtx exp)
13792 {
13793 return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128;
13794 }
13795
13796 /* Return needed mode for entity in optimize_mode_switching pass. */
13797
13798 static int
ix86_avx_u128_mode_needed(rtx_insn * insn)13799 ix86_avx_u128_mode_needed (rtx_insn *insn)
13800 {
13801 if (CALL_P (insn))
13802 {
13803 rtx link;
13804
13805 /* Needed mode is set to AVX_U128_CLEAN if there are
13806 no 256bit or 512bit modes used in function arguments. */
13807 for (link = CALL_INSN_FUNCTION_USAGE (insn);
13808 link;
13809 link = XEXP (link, 1))
13810 {
13811 if (GET_CODE (XEXP (link, 0)) == USE)
13812 {
13813 rtx arg = XEXP (XEXP (link, 0), 0);
13814
13815 if (ix86_check_avx_upper_register (arg))
13816 return AVX_U128_DIRTY;
13817 }
13818 }
13819
13820 /* If the function is known to preserve some SSE registers,
13821 RA and previous passes can legitimately rely on that for
13822 modes wider than 256 bits. It's only safe to issue a
13823 vzeroupper if all SSE registers are clobbered. */
13824 const function_abi &abi = insn_callee_abi (insn);
13825 if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
13826 abi.mode_clobbers (V4DImode)))
13827 return AVX_U128_ANY;
13828
13829 return AVX_U128_CLEAN;
13830 }
13831
13832 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
13833 Hardware changes state only when a 256bit register is written to,
13834 but we need to prevent the compiler from moving optimal insertion
13835 point above eventual read from 256bit or 512 bit register. */
13836 subrtx_iterator::array_type array;
13837 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13838 if (ix86_check_avx_upper_register (*iter))
13839 return AVX_U128_DIRTY;
13840
13841 return AVX_U128_ANY;
13842 }
13843
13844 /* Return mode that i387 must be switched into
13845 prior to the execution of insn. */
13846
13847 static int
ix86_i387_mode_needed(int entity,rtx_insn * insn)13848 ix86_i387_mode_needed (int entity, rtx_insn *insn)
13849 {
13850 enum attr_i387_cw mode;
13851
13852 /* The mode UNINITIALIZED is used to store control word after a
13853 function call or ASM pattern. The mode ANY specify that function
13854 has no requirements on the control word and make no changes in the
13855 bits we are interested in. */
13856
13857 if (CALL_P (insn)
13858 || (NONJUMP_INSN_P (insn)
13859 && (asm_noperands (PATTERN (insn)) >= 0
13860 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13861 return I387_CW_UNINITIALIZED;
13862
13863 if (recog_memoized (insn) < 0)
13864 return I387_CW_ANY;
13865
13866 mode = get_attr_i387_cw (insn);
13867
13868 switch (entity)
13869 {
13870 case I387_ROUNDEVEN:
13871 if (mode == I387_CW_ROUNDEVEN)
13872 return mode;
13873 break;
13874
13875 case I387_TRUNC:
13876 if (mode == I387_CW_TRUNC)
13877 return mode;
13878 break;
13879
13880 case I387_FLOOR:
13881 if (mode == I387_CW_FLOOR)
13882 return mode;
13883 break;
13884
13885 case I387_CEIL:
13886 if (mode == I387_CW_CEIL)
13887 return mode;
13888 break;
13889
13890 default:
13891 gcc_unreachable ();
13892 }
13893
13894 return I387_CW_ANY;
13895 }
13896
13897 /* Return mode that entity must be switched into
13898 prior to the execution of insn. */
13899
13900 static int
ix86_mode_needed(int entity,rtx_insn * insn)13901 ix86_mode_needed (int entity, rtx_insn *insn)
13902 {
13903 switch (entity)
13904 {
13905 case X86_DIRFLAG:
13906 return ix86_dirflag_mode_needed (insn);
13907 case AVX_U128:
13908 return ix86_avx_u128_mode_needed (insn);
13909 case I387_ROUNDEVEN:
13910 case I387_TRUNC:
13911 case I387_FLOOR:
13912 case I387_CEIL:
13913 return ix86_i387_mode_needed (entity, insn);
13914 default:
13915 gcc_unreachable ();
13916 }
13917 return 0;
13918 }
13919
13920 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
13921
13922 static void
ix86_check_avx_upper_stores(rtx dest,const_rtx,void * data)13923 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
13924 {
13925 if (ix86_check_avx_upper_register (dest))
13926 {
13927 bool *used = (bool *) data;
13928 *used = true;
13929 }
13930 }
13931
13932 /* Calculate mode of upper 128bit AVX registers after the insn. */
13933
13934 static int
ix86_avx_u128_mode_after(int mode,rtx_insn * insn)13935 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
13936 {
13937 rtx pat = PATTERN (insn);
13938
13939 if (vzeroupper_pattern (pat, VOIDmode)
13940 || vzeroall_pattern (pat, VOIDmode))
13941 return AVX_U128_CLEAN;
13942
13943 /* We know that state is clean after CALL insn if there are no
13944 256bit or 512bit registers used in the function return register. */
13945 if (CALL_P (insn))
13946 {
13947 bool avx_upper_reg_found = false;
13948 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
13949
13950 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
13951 }
13952
13953 /* Otherwise, return current mode. Remember that if insn
13954 references AVX 256bit or 512bit registers, the mode was already
13955 changed to DIRTY from MODE_NEEDED. */
13956 return mode;
13957 }
13958
13959 /* Return the mode that an insn results in. */
13960
13961 static int
ix86_mode_after(int entity,int mode,rtx_insn * insn)13962 ix86_mode_after (int entity, int mode, rtx_insn *insn)
13963 {
13964 switch (entity)
13965 {
13966 case X86_DIRFLAG:
13967 return mode;
13968 case AVX_U128:
13969 return ix86_avx_u128_mode_after (mode, insn);
13970 case I387_ROUNDEVEN:
13971 case I387_TRUNC:
13972 case I387_FLOOR:
13973 case I387_CEIL:
13974 return mode;
13975 default:
13976 gcc_unreachable ();
13977 }
13978 }
13979
13980 static int
ix86_dirflag_mode_entry(void)13981 ix86_dirflag_mode_entry (void)
13982 {
13983 /* For TARGET_CLD or in the interrupt handler we can't assume
13984 direction flag state at function entry. */
13985 if (TARGET_CLD
13986 || cfun->machine->func_type != TYPE_NORMAL)
13987 return X86_DIRFLAG_ANY;
13988
13989 return X86_DIRFLAG_RESET;
13990 }
13991
13992 static int
ix86_avx_u128_mode_entry(void)13993 ix86_avx_u128_mode_entry (void)
13994 {
13995 tree arg;
13996
13997 /* Entry mode is set to AVX_U128_DIRTY if there are
13998 256bit or 512bit modes used in function arguments. */
13999 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
14000 arg = TREE_CHAIN (arg))
14001 {
14002 rtx incoming = DECL_INCOMING_RTL (arg);
14003
14004 if (incoming && ix86_check_avx_upper_register (incoming))
14005 return AVX_U128_DIRTY;
14006 }
14007
14008 return AVX_U128_CLEAN;
14009 }
14010
14011 /* Return a mode that ENTITY is assumed to be
14012 switched to at function entry. */
14013
14014 static int
ix86_mode_entry(int entity)14015 ix86_mode_entry (int entity)
14016 {
14017 switch (entity)
14018 {
14019 case X86_DIRFLAG:
14020 return ix86_dirflag_mode_entry ();
14021 case AVX_U128:
14022 return ix86_avx_u128_mode_entry ();
14023 case I387_ROUNDEVEN:
14024 case I387_TRUNC:
14025 case I387_FLOOR:
14026 case I387_CEIL:
14027 return I387_CW_ANY;
14028 default:
14029 gcc_unreachable ();
14030 }
14031 }
14032
14033 static int
ix86_avx_u128_mode_exit(void)14034 ix86_avx_u128_mode_exit (void)
14035 {
14036 rtx reg = crtl->return_rtx;
14037
14038 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14039 or 512 bit modes used in the function return register. */
14040 if (reg && ix86_check_avx_upper_register (reg))
14041 return AVX_U128_DIRTY;
14042
14043 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14044 modes used in function arguments, otherwise return AVX_U128_CLEAN.
14045 */
14046 return ix86_avx_u128_mode_entry ();
14047 }
14048
14049 /* Return a mode that ENTITY is assumed to be
14050 switched to at function exit. */
14051
14052 static int
ix86_mode_exit(int entity)14053 ix86_mode_exit (int entity)
14054 {
14055 switch (entity)
14056 {
14057 case X86_DIRFLAG:
14058 return X86_DIRFLAG_ANY;
14059 case AVX_U128:
14060 return ix86_avx_u128_mode_exit ();
14061 case I387_ROUNDEVEN:
14062 case I387_TRUNC:
14063 case I387_FLOOR:
14064 case I387_CEIL:
14065 return I387_CW_ANY;
14066 default:
14067 gcc_unreachable ();
14068 }
14069 }
14070
14071 static int
ix86_mode_priority(int,int n)14072 ix86_mode_priority (int, int n)
14073 {
14074 return n;
14075 }
14076
14077 /* Output code to initialize control word copies used by trunc?f?i and
14078 rounding patterns. CURRENT_MODE is set to current control word,
14079 while NEW_MODE is set to new control word. */
14080
14081 static void
emit_i387_cw_initialization(int mode)14082 emit_i387_cw_initialization (int mode)
14083 {
14084 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14085 rtx new_mode;
14086
14087 enum ix86_stack_slot slot;
14088
14089 rtx reg = gen_reg_rtx (HImode);
14090
14091 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14092 emit_move_insn (reg, copy_rtx (stored_mode));
14093
14094 switch (mode)
14095 {
14096 case I387_CW_ROUNDEVEN:
14097 /* round to nearest */
14098 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14099 slot = SLOT_CW_ROUNDEVEN;
14100 break;
14101
14102 case I387_CW_TRUNC:
14103 /* round toward zero (truncate) */
14104 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14105 slot = SLOT_CW_TRUNC;
14106 break;
14107
14108 case I387_CW_FLOOR:
14109 /* round down toward -oo */
14110 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14111 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14112 slot = SLOT_CW_FLOOR;
14113 break;
14114
14115 case I387_CW_CEIL:
14116 /* round up toward +oo */
14117 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14118 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14119 slot = SLOT_CW_CEIL;
14120 break;
14121
14122 default:
14123 gcc_unreachable ();
14124 }
14125
14126 gcc_assert (slot < MAX_386_STACK_LOCALS);
14127
14128 new_mode = assign_386_stack_local (HImode, slot);
14129 emit_move_insn (new_mode, reg);
14130 }
14131
14132 /* Generate one or more insns to set ENTITY to MODE. */
14133
14134 static void
ix86_emit_mode_set(int entity,int mode,int prev_mode ATTRIBUTE_UNUSED,HARD_REG_SET regs_live ATTRIBUTE_UNUSED)14135 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
14136 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14137 {
14138 switch (entity)
14139 {
14140 case X86_DIRFLAG:
14141 if (mode == X86_DIRFLAG_RESET)
14142 emit_insn (gen_cld ());
14143 break;
14144 case AVX_U128:
14145 if (mode == AVX_U128_CLEAN)
14146 emit_insn (gen_avx_vzeroupper ());
14147 break;
14148 case I387_ROUNDEVEN:
14149 case I387_TRUNC:
14150 case I387_FLOOR:
14151 case I387_CEIL:
14152 if (mode != I387_CW_ANY
14153 && mode != I387_CW_UNINITIALIZED)
14154 emit_i387_cw_initialization (mode);
14155 break;
14156 default:
14157 gcc_unreachable ();
14158 }
14159 }
14160
14161 /* Output code for INSN to convert a float to a signed int. OPERANDS
14162 are the insn operands. The output may be [HSD]Imode and the input
14163 operand may be [SDX]Fmode. */
14164
14165 const char *
output_fix_trunc(rtx_insn * insn,rtx * operands,bool fisttp)14166 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
14167 {
14168 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14169 bool dimode_p = GET_MODE (operands[0]) == DImode;
14170 int round_mode = get_attr_i387_cw (insn);
14171
14172 static char buf[40];
14173 const char *p;
14174
14175 /* Jump through a hoop or two for DImode, since the hardware has no
14176 non-popping instruction. We used to do this a different way, but
14177 that was somewhat fragile and broke with post-reload splitters. */
14178 if ((dimode_p || fisttp) && !stack_top_dies)
14179 output_asm_insn ("fld\t%y1", operands);
14180
14181 gcc_assert (STACK_TOP_P (operands[1]));
14182 gcc_assert (MEM_P (operands[0]));
14183 gcc_assert (GET_MODE (operands[1]) != TFmode);
14184
14185 if (fisttp)
14186 return "fisttp%Z0\t%0";
14187
14188 strcpy (buf, "fist");
14189
14190 if (round_mode != I387_CW_ANY)
14191 output_asm_insn ("fldcw\t%3", operands);
14192
14193 p = "p%Z0\t%0";
14194 strcat (buf, p + !(stack_top_dies || dimode_p));
14195
14196 output_asm_insn (buf, operands);
14197
14198 if (round_mode != I387_CW_ANY)
14199 output_asm_insn ("fldcw\t%2", operands);
14200
14201 return "";
14202 }
14203
14204 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14205 have the values zero or one, indicates the ffreep insn's operand
14206 from the OPERANDS array. */
14207
14208 static const char *
output_387_ffreep(rtx * operands ATTRIBUTE_UNUSED,int opno)14209 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14210 {
14211 if (TARGET_USE_FFREEP)
14212 #ifdef HAVE_AS_IX86_FFREEP
14213 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14214 #else
14215 {
14216 static char retval[32];
14217 int regno = REGNO (operands[opno]);
14218
14219 gcc_assert (STACK_REGNO_P (regno));
14220
14221 regno -= FIRST_STACK_REG;
14222
14223 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14224 return retval;
14225 }
14226 #endif
14227
14228 return opno ? "fstp\t%y1" : "fstp\t%y0";
14229 }
14230
14231
14232 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14233 should be used. UNORDERED_P is true when fucom should be used. */
14234
14235 const char *
output_fp_compare(rtx_insn * insn,rtx * operands,bool eflags_p,bool unordered_p)14236 output_fp_compare (rtx_insn *insn, rtx *operands,
14237 bool eflags_p, bool unordered_p)
14238 {
14239 rtx *xops = eflags_p ? &operands[0] : &operands[1];
14240 bool stack_top_dies;
14241
14242 static char buf[40];
14243 const char *p;
14244
14245 gcc_assert (STACK_TOP_P (xops[0]));
14246
14247 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14248
14249 if (eflags_p)
14250 {
14251 p = unordered_p ? "fucomi" : "fcomi";
14252 strcpy (buf, p);
14253
14254 p = "p\t{%y1, %0|%0, %y1}";
14255 strcat (buf, p + !stack_top_dies);
14256
14257 return buf;
14258 }
14259
14260 if (STACK_REG_P (xops[1])
14261 && stack_top_dies
14262 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
14263 {
14264 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
14265
14266 /* If both the top of the 387 stack die, and the other operand
14267 is also a stack register that dies, then this must be a
14268 `fcompp' float compare. */
14269 p = unordered_p ? "fucompp" : "fcompp";
14270 strcpy (buf, p);
14271 }
14272 else if (const0_operand (xops[1], VOIDmode))
14273 {
14274 gcc_assert (!unordered_p);
14275 strcpy (buf, "ftst");
14276 }
14277 else
14278 {
14279 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
14280 {
14281 gcc_assert (!unordered_p);
14282 p = "ficom";
14283 }
14284 else
14285 p = unordered_p ? "fucom" : "fcom";
14286
14287 strcpy (buf, p);
14288
14289 p = "p%Z2\t%y2";
14290 strcat (buf, p + !stack_top_dies);
14291 }
14292
14293 output_asm_insn (buf, operands);
14294 return "fnstsw\t%0";
14295 }
14296
14297 void
ix86_output_addr_vec_elt(FILE * file,int value)14298 ix86_output_addr_vec_elt (FILE *file, int value)
14299 {
14300 const char *directive = ASM_LONG;
14301
14302 #ifdef ASM_QUAD
14303 if (TARGET_LP64)
14304 directive = ASM_QUAD;
14305 #else
14306 gcc_assert (!TARGET_64BIT);
14307 #endif
14308
14309 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14310 }
14311
14312 void
ix86_output_addr_diff_elt(FILE * file,int value,int rel)14313 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14314 {
14315 const char *directive = ASM_LONG;
14316
14317 #ifdef ASM_QUAD
14318 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14319 directive = ASM_QUAD;
14320 #else
14321 gcc_assert (!TARGET_64BIT);
14322 #endif
14323 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14324 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14325 fprintf (file, "%s%s%d-%s%d\n",
14326 directive, LPREFIX, value, LPREFIX, rel);
14327 #if TARGET_MACHO
14328 else if (TARGET_MACHO)
14329 {
14330 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14331 machopic_output_function_base_name (file);
14332 putc ('\n', file);
14333 }
14334 #endif
14335 else if (HAVE_AS_GOTOFF_IN_DATA)
14336 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14337 else
14338 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14339 GOT_SYMBOL_NAME, LPREFIX, value);
14340 }
14341
14342 #define LEA_MAX_STALL (3)
14343 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14344
14345 /* Increase given DISTANCE in half-cycles according to
14346 dependencies between PREV and NEXT instructions.
14347 Add 1 half-cycle if there is no dependency and
14348 go to next cycle if there is some dependecy. */
14349
14350 static unsigned int
increase_distance(rtx_insn * prev,rtx_insn * next,unsigned int distance)14351 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
14352 {
14353 df_ref def, use;
14354
14355 if (!prev || !next)
14356 return distance + (distance & 1) + 2;
14357
14358 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
14359 return distance + 1;
14360
14361 FOR_EACH_INSN_USE (use, next)
14362 FOR_EACH_INSN_DEF (def, prev)
14363 if (!DF_REF_IS_ARTIFICIAL (def)
14364 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
14365 return distance + (distance & 1) + 2;
14366
14367 return distance + 1;
14368 }
14369
14370 /* Function checks if instruction INSN defines register number
14371 REGNO1 or REGNO2. */
14372
14373 bool
insn_defines_reg(unsigned int regno1,unsigned int regno2,rtx_insn * insn)14374 insn_defines_reg (unsigned int regno1, unsigned int regno2,
14375 rtx_insn *insn)
14376 {
14377 df_ref def;
14378
14379 FOR_EACH_INSN_DEF (def, insn)
14380 if (DF_REF_REG_DEF_P (def)
14381 && !DF_REF_IS_ARTIFICIAL (def)
14382 && (regno1 == DF_REF_REGNO (def)
14383 || regno2 == DF_REF_REGNO (def)))
14384 return true;
14385
14386 return false;
14387 }
14388
14389 /* Function checks if instruction INSN uses register number
14390 REGNO as a part of address expression. */
14391
14392 static bool
insn_uses_reg_mem(unsigned int regno,rtx insn)14393 insn_uses_reg_mem (unsigned int regno, rtx insn)
14394 {
14395 df_ref use;
14396
14397 FOR_EACH_INSN_USE (use, insn)
14398 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
14399 return true;
14400
14401 return false;
14402 }
14403
14404 /* Search backward for non-agu definition of register number REGNO1
14405 or register number REGNO2 in basic block starting from instruction
14406 START up to head of basic block or instruction INSN.
14407
14408 Function puts true value into *FOUND var if definition was found
14409 and false otherwise.
14410
14411 Distance in half-cycles between START and found instruction or head
14412 of BB is added to DISTANCE and returned. */
14413
14414 static int
distance_non_agu_define_in_bb(unsigned int regno1,unsigned int regno2,rtx_insn * insn,int distance,rtx_insn * start,bool * found)14415 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
14416 rtx_insn *insn, int distance,
14417 rtx_insn *start, bool *found)
14418 {
14419 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
14420 rtx_insn *prev = start;
14421 rtx_insn *next = NULL;
14422
14423 *found = false;
14424
14425 while (prev
14426 && prev != insn
14427 && distance < LEA_SEARCH_THRESHOLD)
14428 {
14429 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
14430 {
14431 distance = increase_distance (prev, next, distance);
14432 if (insn_defines_reg (regno1, regno2, prev))
14433 {
14434 if (recog_memoized (prev) < 0
14435 || get_attr_type (prev) != TYPE_LEA)
14436 {
14437 *found = true;
14438 return distance;
14439 }
14440 }
14441
14442 next = prev;
14443 }
14444 if (prev == BB_HEAD (bb))
14445 break;
14446
14447 prev = PREV_INSN (prev);
14448 }
14449
14450 return distance;
14451 }
14452
14453 /* Search backward for non-agu definition of register number REGNO1
14454 or register number REGNO2 in INSN's basic block until
14455 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14456 2. Reach neighbor BBs boundary, or
14457 3. Reach agu definition.
14458 Returns the distance between the non-agu definition point and INSN.
14459 If no definition point, returns -1. */
14460
14461 static int
distance_non_agu_define(unsigned int regno1,unsigned int regno2,rtx_insn * insn)14462 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14463 rtx_insn *insn)
14464 {
14465 basic_block bb = BLOCK_FOR_INSN (insn);
14466 int distance = 0;
14467 bool found = false;
14468
14469 if (insn != BB_HEAD (bb))
14470 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
14471 distance, PREV_INSN (insn),
14472 &found);
14473
14474 if (!found && distance < LEA_SEARCH_THRESHOLD)
14475 {
14476 edge e;
14477 edge_iterator ei;
14478 bool simple_loop = false;
14479
14480 FOR_EACH_EDGE (e, ei, bb->preds)
14481 if (e->src == bb)
14482 {
14483 simple_loop = true;
14484 break;
14485 }
14486
14487 if (simple_loop)
14488 distance = distance_non_agu_define_in_bb (regno1, regno2,
14489 insn, distance,
14490 BB_END (bb), &found);
14491 else
14492 {
14493 int shortest_dist = -1;
14494 bool found_in_bb = false;
14495
14496 FOR_EACH_EDGE (e, ei, bb->preds)
14497 {
14498 int bb_dist
14499 = distance_non_agu_define_in_bb (regno1, regno2,
14500 insn, distance,
14501 BB_END (e->src),
14502 &found_in_bb);
14503 if (found_in_bb)
14504 {
14505 if (shortest_dist < 0)
14506 shortest_dist = bb_dist;
14507 else if (bb_dist > 0)
14508 shortest_dist = MIN (bb_dist, shortest_dist);
14509
14510 found = true;
14511 }
14512 }
14513
14514 distance = shortest_dist;
14515 }
14516 }
14517
14518 /* get_attr_type may modify recog data. We want to make sure
14519 that recog data is valid for instruction INSN, on which
14520 distance_non_agu_define is called. INSN is unchanged here. */
14521 extract_insn_cached (insn);
14522
14523 if (!found)
14524 return -1;
14525
14526 return distance >> 1;
14527 }
14528
14529 /* Return the distance in half-cycles between INSN and the next
14530 insn that uses register number REGNO in memory address added
14531 to DISTANCE. Return -1 if REGNO0 is set.
14532
14533 Put true value into *FOUND if register usage was found and
14534 false otherwise.
14535 Put true value into *REDEFINED if register redefinition was
14536 found and false otherwise. */
14537
14538 static int
distance_agu_use_in_bb(unsigned int regno,rtx_insn * insn,int distance,rtx_insn * start,bool * found,bool * redefined)14539 distance_agu_use_in_bb (unsigned int regno,
14540 rtx_insn *insn, int distance, rtx_insn *start,
14541 bool *found, bool *redefined)
14542 {
14543 basic_block bb = NULL;
14544 rtx_insn *next = start;
14545 rtx_insn *prev = NULL;
14546
14547 *found = false;
14548 *redefined = false;
14549
14550 if (start != NULL_RTX)
14551 {
14552 bb = BLOCK_FOR_INSN (start);
14553 if (start != BB_HEAD (bb))
14554 /* If insn and start belong to the same bb, set prev to insn,
14555 so the call to increase_distance will increase the distance
14556 between insns by 1. */
14557 prev = insn;
14558 }
14559
14560 while (next
14561 && next != insn
14562 && distance < LEA_SEARCH_THRESHOLD)
14563 {
14564 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
14565 {
14566 distance = increase_distance(prev, next, distance);
14567 if (insn_uses_reg_mem (regno, next))
14568 {
14569 /* Return DISTANCE if OP0 is used in memory
14570 address in NEXT. */
14571 *found = true;
14572 return distance;
14573 }
14574
14575 if (insn_defines_reg (regno, INVALID_REGNUM, next))
14576 {
14577 /* Return -1 if OP0 is set in NEXT. */
14578 *redefined = true;
14579 return -1;
14580 }
14581
14582 prev = next;
14583 }
14584
14585 if (next == BB_END (bb))
14586 break;
14587
14588 next = NEXT_INSN (next);
14589 }
14590
14591 return distance;
14592 }
14593
14594 /* Return the distance between INSN and the next insn that uses
14595 register number REGNO0 in memory address. Return -1 if no such
14596 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14597
14598 static int
distance_agu_use(unsigned int regno0,rtx_insn * insn)14599 distance_agu_use (unsigned int regno0, rtx_insn *insn)
14600 {
14601 basic_block bb = BLOCK_FOR_INSN (insn);
14602 int distance = 0;
14603 bool found = false;
14604 bool redefined = false;
14605
14606 if (insn != BB_END (bb))
14607 distance = distance_agu_use_in_bb (regno0, insn, distance,
14608 NEXT_INSN (insn),
14609 &found, &redefined);
14610
14611 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
14612 {
14613 edge e;
14614 edge_iterator ei;
14615 bool simple_loop = false;
14616
14617 FOR_EACH_EDGE (e, ei, bb->succs)
14618 if (e->dest == bb)
14619 {
14620 simple_loop = true;
14621 break;
14622 }
14623
14624 if (simple_loop)
14625 distance = distance_agu_use_in_bb (regno0, insn,
14626 distance, BB_HEAD (bb),
14627 &found, &redefined);
14628 else
14629 {
14630 int shortest_dist = -1;
14631 bool found_in_bb = false;
14632 bool redefined_in_bb = false;
14633
14634 FOR_EACH_EDGE (e, ei, bb->succs)
14635 {
14636 int bb_dist
14637 = distance_agu_use_in_bb (regno0, insn,
14638 distance, BB_HEAD (e->dest),
14639 &found_in_bb, &redefined_in_bb);
14640 if (found_in_bb)
14641 {
14642 if (shortest_dist < 0)
14643 shortest_dist = bb_dist;
14644 else if (bb_dist > 0)
14645 shortest_dist = MIN (bb_dist, shortest_dist);
14646
14647 found = true;
14648 }
14649 }
14650
14651 distance = shortest_dist;
14652 }
14653 }
14654
14655 if (!found || redefined)
14656 return -1;
14657
14658 return distance >> 1;
14659 }
14660
14661 /* Define this macro to tune LEA priority vs ADD, it take effect when
14662 there is a dilemma of choosing LEA or ADD
14663 Negative value: ADD is more preferred than LEA
14664 Zero: Neutral
14665 Positive value: LEA is more preferred than ADD. */
14666 #define IX86_LEA_PRIORITY 0
14667
14668 /* Return true if usage of lea INSN has performance advantage
14669 over a sequence of instructions. Instructions sequence has
14670 SPLIT_COST cycles higher latency than lea latency. */
14671
14672 static bool
ix86_lea_outperforms(rtx_insn * insn,unsigned int regno0,unsigned int regno1,unsigned int regno2,int split_cost,bool has_scale)14673 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
14674 unsigned int regno2, int split_cost, bool has_scale)
14675 {
14676 int dist_define, dist_use;
14677
14678 /* For Atom processors newer than Bonnell, if using a 2-source or
14679 3-source LEA for non-destructive destination purposes, or due to
14680 wanting ability to use SCALE, the use of LEA is justified. */
14681 if (!TARGET_BONNELL)
14682 {
14683 if (has_scale)
14684 return true;
14685 if (split_cost < 1)
14686 return false;
14687 if (regno0 == regno1 || regno0 == regno2)
14688 return false;
14689 return true;
14690 }
14691
14692 rtx_insn *rinsn = recog_data.insn;
14693
14694 dist_define = distance_non_agu_define (regno1, regno2, insn);
14695 dist_use = distance_agu_use (regno0, insn);
14696
14697 /* distance_non_agu_define can call extract_insn_cached. If this function
14698 is called from define_split conditions, that can break insn splitting,
14699 because split_insns works by clearing recog_data.insn and then modifying
14700 recog_data.operand array and match the various split conditions. */
14701 if (recog_data.insn != rinsn)
14702 recog_data.insn = NULL;
14703
14704 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
14705 {
14706 /* If there is no non AGU operand definition, no AGU
14707 operand usage and split cost is 0 then both lea
14708 and non lea variants have same priority. Currently
14709 we prefer lea for 64 bit code and non lea on 32 bit
14710 code. */
14711 if (dist_use < 0 && split_cost == 0)
14712 return TARGET_64BIT || IX86_LEA_PRIORITY;
14713 else
14714 return true;
14715 }
14716
14717 /* With longer definitions distance lea is more preferable.
14718 Here we change it to take into account splitting cost and
14719 lea priority. */
14720 dist_define += split_cost + IX86_LEA_PRIORITY;
14721
14722 /* If there is no use in memory addess then we just check
14723 that split cost exceeds AGU stall. */
14724 if (dist_use < 0)
14725 return dist_define > LEA_MAX_STALL;
14726
14727 /* If this insn has both backward non-agu dependence and forward
14728 agu dependence, the one with short distance takes effect. */
14729 return dist_define >= dist_use;
14730 }
14731
14732 /* Return true if it is legal to clobber flags by INSN and
14733 false otherwise. */
14734
14735 static bool
ix86_ok_to_clobber_flags(rtx_insn * insn)14736 ix86_ok_to_clobber_flags (rtx_insn *insn)
14737 {
14738 basic_block bb = BLOCK_FOR_INSN (insn);
14739 df_ref use;
14740 bitmap live;
14741
14742 while (insn)
14743 {
14744 if (NONDEBUG_INSN_P (insn))
14745 {
14746 FOR_EACH_INSN_USE (use, insn)
14747 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
14748 return false;
14749
14750 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
14751 return true;
14752 }
14753
14754 if (insn == BB_END (bb))
14755 break;
14756
14757 insn = NEXT_INSN (insn);
14758 }
14759
14760 live = df_get_live_out(bb);
14761 return !REGNO_REG_SET_P (live, FLAGS_REG);
14762 }
14763
14764 /* Return true if we need to split op0 = op1 + op2 into a sequence of
14765 move and add to avoid AGU stalls. */
14766
14767 bool
ix86_avoid_lea_for_add(rtx_insn * insn,rtx operands[])14768 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
14769 {
14770 unsigned int regno0, regno1, regno2;
14771
14772 /* Check if we need to optimize. */
14773 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14774 return false;
14775
14776 /* Check it is correct to split here. */
14777 if (!ix86_ok_to_clobber_flags(insn))
14778 return false;
14779
14780 regno0 = true_regnum (operands[0]);
14781 regno1 = true_regnum (operands[1]);
14782 regno2 = true_regnum (operands[2]);
14783
14784 /* We need to split only adds with non destructive
14785 destination operand. */
14786 if (regno0 == regno1 || regno0 == regno2)
14787 return false;
14788 else
14789 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
14790 }
14791
14792 /* Return true if we should emit lea instruction instead of mov
14793 instruction. */
14794
14795 bool
ix86_use_lea_for_mov(rtx_insn * insn,rtx operands[])14796 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
14797 {
14798 unsigned int regno0, regno1;
14799
14800 /* Check if we need to optimize. */
14801 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14802 return false;
14803
14804 /* Use lea for reg to reg moves only. */
14805 if (!REG_P (operands[0]) || !REG_P (operands[1]))
14806 return false;
14807
14808 regno0 = true_regnum (operands[0]);
14809 regno1 = true_regnum (operands[1]);
14810
14811 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
14812 }
14813
14814 /* Return true if we need to split lea into a sequence of
14815 instructions to avoid AGU stalls. */
14816
14817 bool
ix86_avoid_lea_for_addr(rtx_insn * insn,rtx operands[])14818 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
14819 {
14820 unsigned int regno0, regno1, regno2;
14821 int split_cost;
14822 struct ix86_address parts;
14823 int ok;
14824
14825 /* The "at least two components" test below might not catch simple
14826 move or zero extension insns if parts.base is non-NULL and parts.disp
14827 is const0_rtx as the only components in the address, e.g. if the
14828 register is %rbp or %r13. As this test is much cheaper and moves or
14829 zero extensions are the common case, do this check first. */
14830 if (REG_P (operands[1])
14831 || (SImode_address_operand (operands[1], VOIDmode)
14832 && REG_P (XEXP (operands[1], 0))))
14833 return false;
14834
14835 /* Check if it is OK to split here. */
14836 if (!ix86_ok_to_clobber_flags (insn))
14837 return false;
14838
14839 ok = ix86_decompose_address (operands[1], &parts);
14840 gcc_assert (ok);
14841
14842 /* There should be at least two components in the address. */
14843 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
14844 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
14845 return false;
14846
14847 /* We should not split into add if non legitimate pic
14848 operand is used as displacement. */
14849 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
14850 return false;
14851
14852 regno0 = true_regnum (operands[0]) ;
14853 regno1 = INVALID_REGNUM;
14854 regno2 = INVALID_REGNUM;
14855
14856 if (parts.base)
14857 regno1 = true_regnum (parts.base);
14858 if (parts.index)
14859 regno2 = true_regnum (parts.index);
14860
14861 /* Use add for a = a + b and a = b + a since it is faster and shorter
14862 than lea for most processors. For the processors like BONNELL, if
14863 the destination register of LEA holds an actual address which will
14864 be used soon, LEA is better and otherwise ADD is better. */
14865 if (!TARGET_BONNELL
14866 && parts.scale == 1
14867 && (!parts.disp || parts.disp == const0_rtx)
14868 && (regno0 == regno1 || regno0 == regno2))
14869 return true;
14870
14871 /* Check we need to optimize. */
14872 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
14873 return false;
14874
14875 split_cost = 0;
14876
14877 /* Compute how many cycles we will add to execution time
14878 if split lea into a sequence of instructions. */
14879 if (parts.base || parts.index)
14880 {
14881 /* Have to use mov instruction if non desctructive
14882 destination form is used. */
14883 if (regno1 != regno0 && regno2 != regno0)
14884 split_cost += 1;
14885
14886 /* Have to add index to base if both exist. */
14887 if (parts.base && parts.index)
14888 split_cost += 1;
14889
14890 /* Have to use shift and adds if scale is 2 or greater. */
14891 if (parts.scale > 1)
14892 {
14893 if (regno0 != regno1)
14894 split_cost += 1;
14895 else if (regno2 == regno0)
14896 split_cost += 4;
14897 else
14898 split_cost += parts.scale;
14899 }
14900
14901 /* Have to use add instruction with immediate if
14902 disp is non zero. */
14903 if (parts.disp && parts.disp != const0_rtx)
14904 split_cost += 1;
14905
14906 /* Subtract the price of lea. */
14907 split_cost -= 1;
14908 }
14909
14910 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
14911 parts.scale > 1);
14912 }
14913
14914 /* Return true if it is ok to optimize an ADD operation to LEA
14915 operation to avoid flag register consumation. For most processors,
14916 ADD is faster than LEA. For the processors like BONNELL, if the
14917 destination register of LEA holds an actual address which will be
14918 used soon, LEA is better and otherwise ADD is better. */
14919
14920 bool
ix86_lea_for_add_ok(rtx_insn * insn,rtx operands[])14921 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
14922 {
14923 unsigned int regno0 = true_regnum (operands[0]);
14924 unsigned int regno1 = true_regnum (operands[1]);
14925 unsigned int regno2 = true_regnum (operands[2]);
14926
14927 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14928 if (regno0 != regno1 && regno0 != regno2)
14929 return true;
14930
14931 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14932 return false;
14933
14934 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
14935 }
14936
14937 /* Return true if destination reg of SET_BODY is shift count of
14938 USE_BODY. */
14939
14940 static bool
ix86_dep_by_shift_count_body(const_rtx set_body,const_rtx use_body)14941 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14942 {
14943 rtx set_dest;
14944 rtx shift_rtx;
14945 int i;
14946
14947 /* Retrieve destination of SET_BODY. */
14948 switch (GET_CODE (set_body))
14949 {
14950 case SET:
14951 set_dest = SET_DEST (set_body);
14952 if (!set_dest || !REG_P (set_dest))
14953 return false;
14954 break;
14955 case PARALLEL:
14956 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14957 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14958 use_body))
14959 return true;
14960 /* FALLTHROUGH */
14961 default:
14962 return false;
14963 }
14964
14965 /* Retrieve shift count of USE_BODY. */
14966 switch (GET_CODE (use_body))
14967 {
14968 case SET:
14969 shift_rtx = XEXP (use_body, 1);
14970 break;
14971 case PARALLEL:
14972 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14973 if (ix86_dep_by_shift_count_body (set_body,
14974 XVECEXP (use_body, 0, i)))
14975 return true;
14976 /* FALLTHROUGH */
14977 default:
14978 return false;
14979 }
14980
14981 if (shift_rtx
14982 && (GET_CODE (shift_rtx) == ASHIFT
14983 || GET_CODE (shift_rtx) == LSHIFTRT
14984 || GET_CODE (shift_rtx) == ASHIFTRT
14985 || GET_CODE (shift_rtx) == ROTATE
14986 || GET_CODE (shift_rtx) == ROTATERT))
14987 {
14988 rtx shift_count = XEXP (shift_rtx, 1);
14989
14990 /* Return true if shift count is dest of SET_BODY. */
14991 if (REG_P (shift_count))
14992 {
14993 /* Add check since it can be invoked before register
14994 allocation in pre-reload schedule. */
14995 if (reload_completed
14996 && true_regnum (set_dest) == true_regnum (shift_count))
14997 return true;
14998 else if (REGNO(set_dest) == REGNO(shift_count))
14999 return true;
15000 }
15001 }
15002
15003 return false;
15004 }
15005
15006 /* Return true if destination reg of SET_INSN is shift count of
15007 USE_INSN. */
15008
15009 bool
ix86_dep_by_shift_count(const_rtx set_insn,const_rtx use_insn)15010 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15011 {
15012 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15013 PATTERN (use_insn));
15014 }
15015
15016 /* Return TRUE or FALSE depending on whether the unary operator meets the
15017 appropriate constraints. */
15018
15019 bool
ix86_unary_operator_ok(enum rtx_code,machine_mode,rtx operands[2])15020 ix86_unary_operator_ok (enum rtx_code,
15021 machine_mode,
15022 rtx operands[2])
15023 {
15024 /* If one of operands is memory, source and destination must match. */
15025 if ((MEM_P (operands[0])
15026 || MEM_P (operands[1]))
15027 && ! rtx_equal_p (operands[0], operands[1]))
15028 return false;
15029 return true;
15030 }
15031
15032 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15033 are ok, keeping in mind the possible movddup alternative. */
15034
15035 bool
ix86_vec_interleave_v2df_operator_ok(rtx operands[3],bool high)15036 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15037 {
15038 if (MEM_P (operands[0]))
15039 return rtx_equal_p (operands[0], operands[1 + high]);
15040 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15041 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15042 return true;
15043 }
15044
15045 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15046 then replicate the value for all elements of the vector
15047 register. */
15048
15049 rtx
ix86_build_const_vector(machine_mode mode,bool vect,rtx value)15050 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
15051 {
15052 int i, n_elt;
15053 rtvec v;
15054 machine_mode scalar_mode;
15055
15056 switch (mode)
15057 {
15058 case E_V64QImode:
15059 case E_V32QImode:
15060 case E_V16QImode:
15061 case E_V32HImode:
15062 case E_V16HImode:
15063 case E_V8HImode:
15064 case E_V16SImode:
15065 case E_V8SImode:
15066 case E_V4SImode:
15067 case E_V8DImode:
15068 case E_V4DImode:
15069 case E_V2DImode:
15070 gcc_assert (vect);
15071 /* FALLTHRU */
15072 case E_V16SFmode:
15073 case E_V8SFmode:
15074 case E_V4SFmode:
15075 case E_V8DFmode:
15076 case E_V4DFmode:
15077 case E_V2DFmode:
15078 n_elt = GET_MODE_NUNITS (mode);
15079 v = rtvec_alloc (n_elt);
15080 scalar_mode = GET_MODE_INNER (mode);
15081
15082 RTVEC_ELT (v, 0) = value;
15083
15084 for (i = 1; i < n_elt; ++i)
15085 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
15086
15087 return gen_rtx_CONST_VECTOR (mode, v);
15088
15089 default:
15090 gcc_unreachable ();
15091 }
15092 }
15093
15094 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15095 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15096 for an SSE register. If VECT is true, then replicate the mask for
15097 all elements of the vector register. If INVERT is true, then create
15098 a mask excluding the sign bit. */
15099
15100 rtx
ix86_build_signbit_mask(machine_mode mode,bool vect,bool invert)15101 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
15102 {
15103 machine_mode vec_mode, imode;
15104 wide_int w;
15105 rtx mask, v;
15106
15107 switch (mode)
15108 {
15109 case E_V16SImode:
15110 case E_V16SFmode:
15111 case E_V8SImode:
15112 case E_V4SImode:
15113 case E_V8SFmode:
15114 case E_V4SFmode:
15115 vec_mode = mode;
15116 imode = SImode;
15117 break;
15118
15119 case E_V8DImode:
15120 case E_V4DImode:
15121 case E_V2DImode:
15122 case E_V8DFmode:
15123 case E_V4DFmode:
15124 case E_V2DFmode:
15125 vec_mode = mode;
15126 imode = DImode;
15127 break;
15128
15129 case E_TImode:
15130 case E_TFmode:
15131 vec_mode = VOIDmode;
15132 imode = TImode;
15133 break;
15134
15135 default:
15136 gcc_unreachable ();
15137 }
15138
15139 machine_mode inner_mode = GET_MODE_INNER (mode);
15140 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
15141 GET_MODE_BITSIZE (inner_mode));
15142 if (invert)
15143 w = wi::bit_not (w);
15144
15145 /* Force this value into the low part of a fp vector constant. */
15146 mask = immed_wide_int_const (w, imode);
15147 mask = gen_lowpart (inner_mode, mask);
15148
15149 if (vec_mode == VOIDmode)
15150 return force_reg (inner_mode, mask);
15151
15152 v = ix86_build_const_vector (vec_mode, vect, mask);
15153 return force_reg (vec_mode, v);
15154 }
15155
15156 /* Return TRUE or FALSE depending on whether the first SET in INSN
15157 has source and destination with matching CC modes, and that the
15158 CC mode is at least as constrained as REQ_MODE. */
15159
15160 bool
ix86_match_ccmode(rtx insn,machine_mode req_mode)15161 ix86_match_ccmode (rtx insn, machine_mode req_mode)
15162 {
15163 rtx set;
15164 machine_mode set_mode;
15165
15166 set = PATTERN (insn);
15167 if (GET_CODE (set) == PARALLEL)
15168 set = XVECEXP (set, 0, 0);
15169 gcc_assert (GET_CODE (set) == SET);
15170 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15171
15172 set_mode = GET_MODE (SET_DEST (set));
15173 switch (set_mode)
15174 {
15175 case E_CCNOmode:
15176 if (req_mode != CCNOmode
15177 && (req_mode != CCmode
15178 || XEXP (SET_SRC (set), 1) != const0_rtx))
15179 return false;
15180 break;
15181 case E_CCmode:
15182 if (req_mode == CCGCmode)
15183 return false;
15184 /* FALLTHRU */
15185 case E_CCGCmode:
15186 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15187 return false;
15188 /* FALLTHRU */
15189 case E_CCGOCmode:
15190 if (req_mode == CCZmode)
15191 return false;
15192 /* FALLTHRU */
15193 case E_CCZmode:
15194 break;
15195
15196 case E_CCGZmode:
15197
15198 case E_CCAmode:
15199 case E_CCCmode:
15200 case E_CCOmode:
15201 case E_CCPmode:
15202 case E_CCSmode:
15203 if (set_mode != req_mode)
15204 return false;
15205 break;
15206
15207 default:
15208 gcc_unreachable ();
15209 }
15210
15211 return GET_MODE (SET_SRC (set)) == set_mode;
15212 }
15213
15214 machine_mode
ix86_cc_mode(enum rtx_code code,rtx op0,rtx op1)15215 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15216 {
15217 machine_mode mode = GET_MODE (op0);
15218
15219 if (SCALAR_FLOAT_MODE_P (mode))
15220 {
15221 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15222 return CCFPmode;
15223 }
15224
15225 switch (code)
15226 {
15227 /* Only zero flag is needed. */
15228 case EQ: /* ZF=0 */
15229 case NE: /* ZF!=0 */
15230 return CCZmode;
15231 /* Codes needing carry flag. */
15232 case GEU: /* CF=0 */
15233 case LTU: /* CF=1 */
15234 /* Detect overflow checks. They need just the carry flag. */
15235 if (GET_CODE (op0) == PLUS
15236 && (rtx_equal_p (op1, XEXP (op0, 0))
15237 || rtx_equal_p (op1, XEXP (op0, 1))))
15238 return CCCmode;
15239 else
15240 return CCmode;
15241 case GTU: /* CF=0 & ZF=0 */
15242 case LEU: /* CF=1 | ZF=1 */
15243 return CCmode;
15244 /* Codes possibly doable only with sign flag when
15245 comparing against zero. */
15246 case GE: /* SF=OF or SF=0 */
15247 case LT: /* SF<>OF or SF=1 */
15248 if (op1 == const0_rtx)
15249 return CCGOCmode;
15250 else
15251 /* For other cases Carry flag is not required. */
15252 return CCGCmode;
15253 /* Codes doable only with sign flag when comparing
15254 against zero, but we miss jump instruction for it
15255 so we need to use relational tests against overflow
15256 that thus needs to be zero. */
15257 case GT: /* ZF=0 & SF=OF */
15258 case LE: /* ZF=1 | SF<>OF */
15259 if (op1 == const0_rtx)
15260 return CCNOmode;
15261 else
15262 return CCGCmode;
15263 /* strcmp pattern do (use flags) and combine may ask us for proper
15264 mode. */
15265 case USE:
15266 return CCmode;
15267 default:
15268 gcc_unreachable ();
15269 }
15270 }
15271
15272 /* Return the fixed registers used for condition codes. */
15273
15274 static bool
ix86_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)15275 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15276 {
15277 *p1 = FLAGS_REG;
15278 *p2 = INVALID_REGNUM;
15279 return true;
15280 }
15281
15282 /* If two condition code modes are compatible, return a condition code
15283 mode which is compatible with both. Otherwise, return
15284 VOIDmode. */
15285
15286 static machine_mode
ix86_cc_modes_compatible(machine_mode m1,machine_mode m2)15287 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
15288 {
15289 if (m1 == m2)
15290 return m1;
15291
15292 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15293 return VOIDmode;
15294
15295 if ((m1 == CCGCmode && m2 == CCGOCmode)
15296 || (m1 == CCGOCmode && m2 == CCGCmode))
15297 return CCGCmode;
15298
15299 if ((m1 == CCNOmode && m2 == CCGOCmode)
15300 || (m1 == CCGOCmode && m2 == CCNOmode))
15301 return CCNOmode;
15302
15303 if (m1 == CCZmode
15304 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
15305 return m2;
15306 else if (m2 == CCZmode
15307 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
15308 return m1;
15309
15310 switch (m1)
15311 {
15312 default:
15313 gcc_unreachable ();
15314
15315 case E_CCmode:
15316 case E_CCGCmode:
15317 case E_CCGOCmode:
15318 case E_CCNOmode:
15319 case E_CCAmode:
15320 case E_CCCmode:
15321 case E_CCOmode:
15322 case E_CCPmode:
15323 case E_CCSmode:
15324 case E_CCZmode:
15325 switch (m2)
15326 {
15327 default:
15328 return VOIDmode;
15329
15330 case E_CCmode:
15331 case E_CCGCmode:
15332 case E_CCGOCmode:
15333 case E_CCNOmode:
15334 case E_CCAmode:
15335 case E_CCCmode:
15336 case E_CCOmode:
15337 case E_CCPmode:
15338 case E_CCSmode:
15339 case E_CCZmode:
15340 return CCmode;
15341 }
15342
15343 case E_CCFPmode:
15344 /* These are only compatible with themselves, which we already
15345 checked above. */
15346 return VOIDmode;
15347 }
15348 }
15349
15350 /* Return strategy to use for floating-point. We assume that fcomi is always
15351 preferrable where available, since that is also true when looking at size
15352 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15353
15354 enum ix86_fpcmp_strategy
ix86_fp_comparison_strategy(enum rtx_code)15355 ix86_fp_comparison_strategy (enum rtx_code)
15356 {
15357 /* Do fcomi/sahf based test when profitable. */
15358
15359 if (TARGET_CMOVE)
15360 return IX86_FPCMP_COMI;
15361
15362 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
15363 return IX86_FPCMP_SAHF;
15364
15365 return IX86_FPCMP_ARITH;
15366 }
15367
15368 /* Convert comparison codes we use to represent FP comparison to integer
15369 code that will result in proper branch. Return UNKNOWN if no such code
15370 is available. */
15371
15372 enum rtx_code
ix86_fp_compare_code_to_integer(enum rtx_code code)15373 ix86_fp_compare_code_to_integer (enum rtx_code code)
15374 {
15375 switch (code)
15376 {
15377 case GT:
15378 return GTU;
15379 case GE:
15380 return GEU;
15381 case ORDERED:
15382 case UNORDERED:
15383 return code;
15384 case UNEQ:
15385 return EQ;
15386 case UNLT:
15387 return LTU;
15388 case UNLE:
15389 return LEU;
15390 case LTGT:
15391 return NE;
15392 default:
15393 return UNKNOWN;
15394 }
15395 }
15396
15397 /* Zero extend possibly SImode EXP to Pmode register. */
15398 rtx
ix86_zero_extend_to_Pmode(rtx exp)15399 ix86_zero_extend_to_Pmode (rtx exp)
15400 {
15401 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
15402 }
15403
15404 /* Return true if the function being called was marked with attribute
15405 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
15406 to handle the non-PIC case in the backend because there is no easy
15407 interface for the front-end to force non-PLT calls to use the GOT.
15408 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15409 to call the function marked "noplt" indirectly. */
15410
15411 static bool
ix86_nopic_noplt_attribute_p(rtx call_op)15412 ix86_nopic_noplt_attribute_p (rtx call_op)
15413 {
15414 if (flag_pic || ix86_cmodel == CM_LARGE
15415 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15416 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
15417 || SYMBOL_REF_LOCAL_P (call_op))
15418 return false;
15419
15420 tree symbol_decl = SYMBOL_REF_DECL (call_op);
15421
15422 if (!flag_plt
15423 || (symbol_decl != NULL_TREE
15424 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
15425 return true;
15426
15427 return false;
15428 }
15429
15430 /* Helper to output the jmp/call. */
15431 static void
ix86_output_jmp_thunk_or_indirect(const char * thunk_name,const int regno)15432 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
15433 {
15434 if (thunk_name != NULL)
15435 {
15436 fprintf (asm_out_file, "\tjmp\t");
15437 assemble_name (asm_out_file, thunk_name);
15438 putc ('\n', asm_out_file);
15439 }
15440 else
15441 output_indirect_thunk (regno);
15442 }
15443
15444 /* Output indirect branch via a call and return thunk. CALL_OP is a
15445 register which contains the branch target. XASM is the assembly
15446 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
15447 A normal call is converted to:
15448
15449 call __x86_indirect_thunk_reg
15450
15451 and a tail call is converted to:
15452
15453 jmp __x86_indirect_thunk_reg
15454 */
15455
15456 static void
ix86_output_indirect_branch_via_reg(rtx call_op,bool sibcall_p)15457 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
15458 {
15459 char thunk_name_buf[32];
15460 char *thunk_name;
15461 enum indirect_thunk_prefix need_prefix
15462 = indirect_thunk_need_prefix (current_output_insn);
15463 int regno = REGNO (call_op);
15464
15465 if (cfun->machine->indirect_branch_type
15466 != indirect_branch_thunk_inline)
15467 {
15468 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15469 {
15470 int i = regno;
15471 if (i >= FIRST_REX_INT_REG)
15472 i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
15473 indirect_thunks_used |= 1 << i;
15474 }
15475 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15476 thunk_name = thunk_name_buf;
15477 }
15478 else
15479 thunk_name = NULL;
15480
15481 if (sibcall_p)
15482 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15483 else
15484 {
15485 if (thunk_name != NULL)
15486 {
15487 fprintf (asm_out_file, "\tcall\t");
15488 assemble_name (asm_out_file, thunk_name);
15489 putc ('\n', asm_out_file);
15490 return;
15491 }
15492
15493 char indirectlabel1[32];
15494 char indirectlabel2[32];
15495
15496 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15497 INDIRECT_LABEL,
15498 indirectlabelno++);
15499 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15500 INDIRECT_LABEL,
15501 indirectlabelno++);
15502
15503 /* Jump. */
15504 fputs ("\tjmp\t", asm_out_file);
15505 assemble_name_raw (asm_out_file, indirectlabel2);
15506 fputc ('\n', asm_out_file);
15507
15508 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15509
15510 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15511
15512 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15513
15514 /* Call. */
15515 fputs ("\tcall\t", asm_out_file);
15516 assemble_name_raw (asm_out_file, indirectlabel1);
15517 fputc ('\n', asm_out_file);
15518 }
15519 }
15520
15521 /* Output indirect branch via a call and return thunk. CALL_OP is
15522 the branch target. XASM is the assembly template for CALL_OP.
15523 Branch is a tail call if SIBCALL_P is true. A normal call is
15524 converted to:
15525
15526 jmp L2
15527 L1:
15528 push CALL_OP
15529 jmp __x86_indirect_thunk
15530 L2:
15531 call L1
15532
15533 and a tail call is converted to:
15534
15535 push CALL_OP
15536 jmp __x86_indirect_thunk
15537 */
15538
15539 static void
ix86_output_indirect_branch_via_push(rtx call_op,const char * xasm,bool sibcall_p)15540 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
15541 bool sibcall_p)
15542 {
15543 char thunk_name_buf[32];
15544 char *thunk_name;
15545 char push_buf[64];
15546 enum indirect_thunk_prefix need_prefix
15547 = indirect_thunk_need_prefix (current_output_insn);
15548 int regno = -1;
15549
15550 if (cfun->machine->indirect_branch_type
15551 != indirect_branch_thunk_inline)
15552 {
15553 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15554 indirect_thunk_needed = true;
15555 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15556 thunk_name = thunk_name_buf;
15557 }
15558 else
15559 thunk_name = NULL;
15560
15561 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
15562 TARGET_64BIT ? 'q' : 'l', xasm);
15563
15564 if (sibcall_p)
15565 {
15566 output_asm_insn (push_buf, &call_op);
15567 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15568 }
15569 else
15570 {
15571 char indirectlabel1[32];
15572 char indirectlabel2[32];
15573
15574 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15575 INDIRECT_LABEL,
15576 indirectlabelno++);
15577 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15578 INDIRECT_LABEL,
15579 indirectlabelno++);
15580
15581 /* Jump. */
15582 fputs ("\tjmp\t", asm_out_file);
15583 assemble_name_raw (asm_out_file, indirectlabel2);
15584 fputc ('\n', asm_out_file);
15585
15586 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15587
15588 /* An external function may be called via GOT, instead of PLT. */
15589 if (MEM_P (call_op))
15590 {
15591 struct ix86_address parts;
15592 rtx addr = XEXP (call_op, 0);
15593 if (ix86_decompose_address (addr, &parts)
15594 && parts.base == stack_pointer_rtx)
15595 {
15596 /* Since call will adjust stack by -UNITS_PER_WORD,
15597 we must convert "disp(stack, index, scale)" to
15598 "disp+UNITS_PER_WORD(stack, index, scale)". */
15599 if (parts.index)
15600 {
15601 addr = gen_rtx_MULT (Pmode, parts.index,
15602 GEN_INT (parts.scale));
15603 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15604 addr);
15605 }
15606 else
15607 addr = stack_pointer_rtx;
15608
15609 rtx disp;
15610 if (parts.disp != NULL_RTX)
15611 disp = plus_constant (Pmode, parts.disp,
15612 UNITS_PER_WORD);
15613 else
15614 disp = GEN_INT (UNITS_PER_WORD);
15615
15616 addr = gen_rtx_PLUS (Pmode, addr, disp);
15617 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
15618 }
15619 }
15620
15621 output_asm_insn (push_buf, &call_op);
15622
15623 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15624
15625 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15626
15627 /* Call. */
15628 fputs ("\tcall\t", asm_out_file);
15629 assemble_name_raw (asm_out_file, indirectlabel1);
15630 fputc ('\n', asm_out_file);
15631 }
15632 }
15633
15634 /* Output indirect branch via a call and return thunk. CALL_OP is
15635 the branch target. XASM is the assembly template for CALL_OP.
15636 Branch is a tail call if SIBCALL_P is true. */
15637
15638 static void
ix86_output_indirect_branch(rtx call_op,const char * xasm,bool sibcall_p)15639 ix86_output_indirect_branch (rtx call_op, const char *xasm,
15640 bool sibcall_p)
15641 {
15642 if (REG_P (call_op))
15643 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
15644 else
15645 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
15646 }
15647
15648 /* Output indirect jump. CALL_OP is the jump target. */
15649
15650 const char *
ix86_output_indirect_jmp(rtx call_op)15651 ix86_output_indirect_jmp (rtx call_op)
15652 {
15653 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
15654 {
15655 /* We can't have red-zone since "call" in the indirect thunk
15656 pushes the return address onto stack, destroying red-zone. */
15657 if (ix86_red_zone_size != 0)
15658 gcc_unreachable ();
15659
15660 ix86_output_indirect_branch (call_op, "%0", true);
15661 return "";
15662 }
15663 else
15664 return "%!jmp\t%A0";
15665 }
15666
15667 /* Output return instrumentation for current function if needed. */
15668
15669 static void
output_return_instrumentation(void)15670 output_return_instrumentation (void)
15671 {
15672 if (ix86_instrument_return != instrument_return_none
15673 && flag_fentry
15674 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
15675 {
15676 if (ix86_flag_record_return)
15677 fprintf (asm_out_file, "1:\n");
15678 switch (ix86_instrument_return)
15679 {
15680 case instrument_return_call:
15681 fprintf (asm_out_file, "\tcall\t__return__\n");
15682 break;
15683 case instrument_return_nop5:
15684 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
15685 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15686 break;
15687 case instrument_return_none:
15688 break;
15689 }
15690
15691 if (ix86_flag_record_return)
15692 {
15693 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
15694 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
15695 fprintf (asm_out_file, "\t.previous\n");
15696 }
15697 }
15698 }
15699
15700 /* Output function return. CALL_OP is the jump target. Add a REP
15701 prefix to RET if LONG_P is true and function return is kept. */
15702
15703 const char *
ix86_output_function_return(bool long_p)15704 ix86_output_function_return (bool long_p)
15705 {
15706 output_return_instrumentation ();
15707
15708 if (cfun->machine->function_return_type != indirect_branch_keep)
15709 {
15710 char thunk_name[32];
15711 enum indirect_thunk_prefix need_prefix
15712 = indirect_thunk_need_prefix (current_output_insn);
15713
15714 if (cfun->machine->function_return_type
15715 != indirect_branch_thunk_inline)
15716 {
15717 bool need_thunk = (cfun->machine->function_return_type
15718 == indirect_branch_thunk);
15719 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
15720 true);
15721 indirect_return_needed |= need_thunk;
15722 fprintf (asm_out_file, "\tjmp\t");
15723 assemble_name (asm_out_file, thunk_name);
15724 putc ('\n', asm_out_file);
15725 }
15726 else
15727 output_indirect_thunk (INVALID_REGNUM);
15728
15729 return "";
15730 }
15731
15732 if (!long_p)
15733 return "%!ret";
15734
15735 return "rep%; ret";
15736 }
15737
15738 /* Output indirect function return. RET_OP is the function return
15739 target. */
15740
15741 const char *
ix86_output_indirect_function_return(rtx ret_op)15742 ix86_output_indirect_function_return (rtx ret_op)
15743 {
15744 if (cfun->machine->function_return_type != indirect_branch_keep)
15745 {
15746 char thunk_name[32];
15747 enum indirect_thunk_prefix need_prefix
15748 = indirect_thunk_need_prefix (current_output_insn);
15749 unsigned int regno = REGNO (ret_op);
15750 gcc_assert (regno == CX_REG);
15751
15752 if (cfun->machine->function_return_type
15753 != indirect_branch_thunk_inline)
15754 {
15755 bool need_thunk = (cfun->machine->function_return_type
15756 == indirect_branch_thunk);
15757 indirect_thunk_name (thunk_name, regno, need_prefix, true);
15758
15759 if (need_thunk)
15760 {
15761 indirect_return_via_cx = true;
15762 indirect_thunks_used |= 1 << CX_REG;
15763 }
15764 fprintf (asm_out_file, "\tjmp\t");
15765 assemble_name (asm_out_file, thunk_name);
15766 putc ('\n', asm_out_file);
15767 }
15768 else
15769 output_indirect_thunk (regno);
15770
15771 return "";
15772 }
15773 else
15774 return "%!jmp\t%A0";
15775 }
15776
15777 /* Output the assembly for a call instruction. */
15778
15779 const char *
ix86_output_call_insn(rtx_insn * insn,rtx call_op)15780 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
15781 {
15782 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
15783 bool output_indirect_p
15784 = (!TARGET_SEH
15785 && cfun->machine->indirect_branch_type != indirect_branch_keep);
15786 bool seh_nop_p = false;
15787 const char *xasm;
15788
15789 if (SIBLING_CALL_P (insn))
15790 {
15791 output_return_instrumentation ();
15792 if (direct_p)
15793 {
15794 if (ix86_nopic_noplt_attribute_p (call_op))
15795 {
15796 direct_p = false;
15797 if (TARGET_64BIT)
15798 {
15799 if (output_indirect_p)
15800 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15801 else
15802 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15803 }
15804 else
15805 {
15806 if (output_indirect_p)
15807 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15808 else
15809 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15810 }
15811 }
15812 else
15813 xasm = "%!jmp\t%P0";
15814 }
15815 /* SEH epilogue detection requires the indirect branch case
15816 to include REX.W. */
15817 else if (TARGET_SEH)
15818 xasm = "%!rex.W jmp\t%A0";
15819 else
15820 {
15821 if (output_indirect_p)
15822 xasm = "%0";
15823 else
15824 xasm = "%!jmp\t%A0";
15825 }
15826
15827 if (output_indirect_p && !direct_p)
15828 ix86_output_indirect_branch (call_op, xasm, true);
15829 else
15830 output_asm_insn (xasm, &call_op);
15831 return "";
15832 }
15833
15834 /* SEH unwinding can require an extra nop to be emitted in several
15835 circumstances. Determine if we have one of those. */
15836 if (TARGET_SEH)
15837 {
15838 rtx_insn *i;
15839
15840 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
15841 {
15842 /* Prevent a catch region from being adjacent to a jump that would
15843 be interpreted as an epilogue sequence by the unwinder. */
15844 if (JUMP_P(i) && CROSSING_JUMP_P (i))
15845 {
15846 seh_nop_p = true;
15847 break;
15848 }
15849
15850 /* If we get to another real insn, we don't need the nop. */
15851 if (INSN_P (i))
15852 break;
15853
15854 /* If we get to the epilogue note, prevent a catch region from
15855 being adjacent to the standard epilogue sequence. Note that,
15856 if non-call exceptions are enabled, we already did it during
15857 epilogue expansion, or else, if the insn can throw internally,
15858 we already did it during the reorg pass. */
15859 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
15860 && !flag_non_call_exceptions
15861 && !can_throw_internal (insn))
15862 {
15863 seh_nop_p = true;
15864 break;
15865 }
15866 }
15867
15868 /* If we didn't find a real insn following the call, prevent the
15869 unwinder from looking into the next function. */
15870 if (i == NULL)
15871 seh_nop_p = true;
15872 }
15873
15874 if (direct_p)
15875 {
15876 if (ix86_nopic_noplt_attribute_p (call_op))
15877 {
15878 direct_p = false;
15879 if (TARGET_64BIT)
15880 {
15881 if (output_indirect_p)
15882 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15883 else
15884 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15885 }
15886 else
15887 {
15888 if (output_indirect_p)
15889 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15890 else
15891 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15892 }
15893 }
15894 else
15895 xasm = "%!call\t%P0";
15896 }
15897 else
15898 {
15899 if (output_indirect_p)
15900 xasm = "%0";
15901 else
15902 xasm = "%!call\t%A0";
15903 }
15904
15905 if (output_indirect_p && !direct_p)
15906 ix86_output_indirect_branch (call_op, xasm, false);
15907 else
15908 output_asm_insn (xasm, &call_op);
15909
15910 if (seh_nop_p)
15911 return "nop";
15912
15913 return "";
15914 }
15915
15916 /* Return a MEM corresponding to a stack slot with mode MODE.
15917 Allocate a new slot if necessary.
15918
15919 The RTL for a function can have several slots available: N is
15920 which slot to use. */
15921
15922 rtx
assign_386_stack_local(machine_mode mode,enum ix86_stack_slot n)15923 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
15924 {
15925 struct stack_local_entry *s;
15926
15927 gcc_assert (n < MAX_386_STACK_LOCALS);
15928
15929 for (s = ix86_stack_locals; s; s = s->next)
15930 if (s->mode == mode && s->n == n)
15931 return validize_mem (copy_rtx (s->rtl));
15932
15933 s = ggc_alloc<stack_local_entry> ();
15934 s->n = n;
15935 s->mode = mode;
15936 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15937
15938 s->next = ix86_stack_locals;
15939 ix86_stack_locals = s;
15940 return validize_mem (copy_rtx (s->rtl));
15941 }
15942
15943 static void
ix86_instantiate_decls(void)15944 ix86_instantiate_decls (void)
15945 {
15946 struct stack_local_entry *s;
15947
15948 for (s = ix86_stack_locals; s; s = s->next)
15949 if (s->rtl != NULL_RTX)
15950 instantiate_decl_rtl (s->rtl);
15951 }
15952
15953 /* Check whether x86 address PARTS is a pc-relative address. */
15954
15955 bool
ix86_rip_relative_addr_p(struct ix86_address * parts)15956 ix86_rip_relative_addr_p (struct ix86_address *parts)
15957 {
15958 rtx base, index, disp;
15959
15960 base = parts->base;
15961 index = parts->index;
15962 disp = parts->disp;
15963
15964 if (disp && !base && !index)
15965 {
15966 if (TARGET_64BIT)
15967 {
15968 rtx symbol = disp;
15969
15970 if (GET_CODE (disp) == CONST)
15971 symbol = XEXP (disp, 0);
15972 if (GET_CODE (symbol) == PLUS
15973 && CONST_INT_P (XEXP (symbol, 1)))
15974 symbol = XEXP (symbol, 0);
15975
15976 if (GET_CODE (symbol) == LABEL_REF
15977 || (GET_CODE (symbol) == SYMBOL_REF
15978 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
15979 || (GET_CODE (symbol) == UNSPEC
15980 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
15981 || XINT (symbol, 1) == UNSPEC_PCREL
15982 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
15983 return true;
15984 }
15985 }
15986 return false;
15987 }
15988
15989 /* Calculate the length of the memory address in the instruction encoding.
15990 Includes addr32 prefix, does not include the one-byte modrm, opcode,
15991 or other prefixes. We never generate addr32 prefix for LEA insn. */
15992
15993 int
memory_address_length(rtx addr,bool lea)15994 memory_address_length (rtx addr, bool lea)
15995 {
15996 struct ix86_address parts;
15997 rtx base, index, disp;
15998 int len;
15999 int ok;
16000
16001 if (GET_CODE (addr) == PRE_DEC
16002 || GET_CODE (addr) == POST_INC
16003 || GET_CODE (addr) == PRE_MODIFY
16004 || GET_CODE (addr) == POST_MODIFY)
16005 return 0;
16006
16007 ok = ix86_decompose_address (addr, &parts);
16008 gcc_assert (ok);
16009
16010 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
16011
16012 /* If this is not LEA instruction, add the length of addr32 prefix. */
16013 if (TARGET_64BIT && !lea
16014 && (SImode_address_operand (addr, VOIDmode)
16015 || (parts.base && GET_MODE (parts.base) == SImode)
16016 || (parts.index && GET_MODE (parts.index) == SImode)))
16017 len++;
16018
16019 base = parts.base;
16020 index = parts.index;
16021 disp = parts.disp;
16022
16023 if (base && SUBREG_P (base))
16024 base = SUBREG_REG (base);
16025 if (index && SUBREG_P (index))
16026 index = SUBREG_REG (index);
16027
16028 gcc_assert (base == NULL_RTX || REG_P (base));
16029 gcc_assert (index == NULL_RTX || REG_P (index));
16030
16031 /* Rule of thumb:
16032 - esp as the base always wants an index,
16033 - ebp as the base always wants a displacement,
16034 - r12 as the base always wants an index,
16035 - r13 as the base always wants a displacement. */
16036
16037 /* Register Indirect. */
16038 if (base && !index && !disp)
16039 {
16040 /* esp (for its index) and ebp (for its displacement) need
16041 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
16042 code. */
16043 if (base == arg_pointer_rtx
16044 || base == frame_pointer_rtx
16045 || REGNO (base) == SP_REG
16046 || REGNO (base) == BP_REG
16047 || REGNO (base) == R12_REG
16048 || REGNO (base) == R13_REG)
16049 len++;
16050 }
16051
16052 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
16053 is not disp32, but disp32(%rip), so for disp32
16054 SIB byte is needed, unless print_operand_address
16055 optimizes it into disp32(%rip) or (%rip) is implied
16056 by UNSPEC. */
16057 else if (disp && !base && !index)
16058 {
16059 len += 4;
16060 if (!ix86_rip_relative_addr_p (&parts))
16061 len++;
16062 }
16063 else
16064 {
16065 /* Find the length of the displacement constant. */
16066 if (disp)
16067 {
16068 if (base && satisfies_constraint_K (disp))
16069 len += 1;
16070 else
16071 len += 4;
16072 }
16073 /* ebp always wants a displacement. Similarly r13. */
16074 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
16075 len++;
16076
16077 /* An index requires the two-byte modrm form.... */
16078 if (index
16079 /* ...like esp (or r12), which always wants an index. */
16080 || base == arg_pointer_rtx
16081 || base == frame_pointer_rtx
16082 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
16083 len++;
16084 }
16085
16086 return len;
16087 }
16088
16089 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16090 is set, expect that insn have 8bit immediate alternative. */
16091 int
ix86_attr_length_immediate_default(rtx_insn * insn,bool shortform)16092 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
16093 {
16094 int len = 0;
16095 int i;
16096 extract_insn_cached (insn);
16097 for (i = recog_data.n_operands - 1; i >= 0; --i)
16098 if (CONSTANT_P (recog_data.operand[i]))
16099 {
16100 enum attr_mode mode = get_attr_mode (insn);
16101
16102 gcc_assert (!len);
16103 if (shortform && CONST_INT_P (recog_data.operand[i]))
16104 {
16105 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
16106 switch (mode)
16107 {
16108 case MODE_QI:
16109 len = 1;
16110 continue;
16111 case MODE_HI:
16112 ival = trunc_int_for_mode (ival, HImode);
16113 break;
16114 case MODE_SI:
16115 ival = trunc_int_for_mode (ival, SImode);
16116 break;
16117 default:
16118 break;
16119 }
16120 if (IN_RANGE (ival, -128, 127))
16121 {
16122 len = 1;
16123 continue;
16124 }
16125 }
16126 switch (mode)
16127 {
16128 case MODE_QI:
16129 len = 1;
16130 break;
16131 case MODE_HI:
16132 len = 2;
16133 break;
16134 case MODE_SI:
16135 len = 4;
16136 break;
16137 /* Immediates for DImode instructions are encoded
16138 as 32bit sign extended values. */
16139 case MODE_DI:
16140 len = 4;
16141 break;
16142 default:
16143 fatal_insn ("unknown insn mode", insn);
16144 }
16145 }
16146 return len;
16147 }
16148
16149 /* Compute default value for "length_address" attribute. */
16150 int
ix86_attr_length_address_default(rtx_insn * insn)16151 ix86_attr_length_address_default (rtx_insn *insn)
16152 {
16153 int i;
16154
16155 if (get_attr_type (insn) == TYPE_LEA)
16156 {
16157 rtx set = PATTERN (insn), addr;
16158
16159 if (GET_CODE (set) == PARALLEL)
16160 set = XVECEXP (set, 0, 0);
16161
16162 gcc_assert (GET_CODE (set) == SET);
16163
16164 addr = SET_SRC (set);
16165
16166 return memory_address_length (addr, true);
16167 }
16168
16169 extract_insn_cached (insn);
16170 for (i = recog_data.n_operands - 1; i >= 0; --i)
16171 {
16172 rtx op = recog_data.operand[i];
16173 if (MEM_P (op))
16174 {
16175 constrain_operands_cached (insn, reload_completed);
16176 if (which_alternative != -1)
16177 {
16178 const char *constraints = recog_data.constraints[i];
16179 int alt = which_alternative;
16180
16181 while (*constraints == '=' || *constraints == '+')
16182 constraints++;
16183 while (alt-- > 0)
16184 while (*constraints++ != ',')
16185 ;
16186 /* Skip ignored operands. */
16187 if (*constraints == 'X')
16188 continue;
16189 }
16190
16191 int len = memory_address_length (XEXP (op, 0), false);
16192
16193 /* Account for segment prefix for non-default addr spaces. */
16194 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
16195 len++;
16196
16197 return len;
16198 }
16199 }
16200 return 0;
16201 }
16202
16203 /* Compute default value for "length_vex" attribute. It includes
16204 2 or 3 byte VEX prefix and 1 opcode byte. */
16205
16206 int
ix86_attr_length_vex_default(rtx_insn * insn,bool has_0f_opcode,bool has_vex_w)16207 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
16208 bool has_vex_w)
16209 {
16210 int i;
16211
16212 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
16213 byte VEX prefix. */
16214 if (!has_0f_opcode || has_vex_w)
16215 return 3 + 1;
16216
16217 /* We can always use 2 byte VEX prefix in 32bit. */
16218 if (!TARGET_64BIT)
16219 return 2 + 1;
16220
16221 extract_insn_cached (insn);
16222
16223 for (i = recog_data.n_operands - 1; i >= 0; --i)
16224 if (REG_P (recog_data.operand[i]))
16225 {
16226 /* REX.W bit uses 3 byte VEX prefix. */
16227 if (GET_MODE (recog_data.operand[i]) == DImode
16228 && GENERAL_REG_P (recog_data.operand[i]))
16229 return 3 + 1;
16230 }
16231 else
16232 {
16233 /* REX.X or REX.B bits use 3 byte VEX prefix. */
16234 if (MEM_P (recog_data.operand[i])
16235 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
16236 return 3 + 1;
16237 }
16238
16239 return 2 + 1;
16240 }
16241
16242
16243 static bool
16244 ix86_class_likely_spilled_p (reg_class_t);
16245
16246 /* Returns true if lhs of insn is HW function argument register and set up
16247 is_spilled to true if it is likely spilled HW register. */
16248 static bool
insn_is_function_arg(rtx insn,bool * is_spilled)16249 insn_is_function_arg (rtx insn, bool* is_spilled)
16250 {
16251 rtx dst;
16252
16253 if (!NONDEBUG_INSN_P (insn))
16254 return false;
16255 /* Call instructions are not movable, ignore it. */
16256 if (CALL_P (insn))
16257 return false;
16258 insn = PATTERN (insn);
16259 if (GET_CODE (insn) == PARALLEL)
16260 insn = XVECEXP (insn, 0, 0);
16261 if (GET_CODE (insn) != SET)
16262 return false;
16263 dst = SET_DEST (insn);
16264 if (REG_P (dst) && HARD_REGISTER_P (dst)
16265 && ix86_function_arg_regno_p (REGNO (dst)))
16266 {
16267 /* Is it likely spilled HW register? */
16268 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
16269 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
16270 *is_spilled = true;
16271 return true;
16272 }
16273 return false;
16274 }
16275
16276 /* Add output dependencies for chain of function adjacent arguments if only
16277 there is a move to likely spilled HW register. Return first argument
16278 if at least one dependence was added or NULL otherwise. */
16279 static rtx_insn *
add_parameter_dependencies(rtx_insn * call,rtx_insn * head)16280 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
16281 {
16282 rtx_insn *insn;
16283 rtx_insn *last = call;
16284 rtx_insn *first_arg = NULL;
16285 bool is_spilled = false;
16286
16287 head = PREV_INSN (head);
16288
16289 /* Find nearest to call argument passing instruction. */
16290 while (true)
16291 {
16292 last = PREV_INSN (last);
16293 if (last == head)
16294 return NULL;
16295 if (!NONDEBUG_INSN_P (last))
16296 continue;
16297 if (insn_is_function_arg (last, &is_spilled))
16298 break;
16299 return NULL;
16300 }
16301
16302 first_arg = last;
16303 while (true)
16304 {
16305 insn = PREV_INSN (last);
16306 if (!INSN_P (insn))
16307 break;
16308 if (insn == head)
16309 break;
16310 if (!NONDEBUG_INSN_P (insn))
16311 {
16312 last = insn;
16313 continue;
16314 }
16315 if (insn_is_function_arg (insn, &is_spilled))
16316 {
16317 /* Add output depdendence between two function arguments if chain
16318 of output arguments contains likely spilled HW registers. */
16319 if (is_spilled)
16320 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16321 first_arg = last = insn;
16322 }
16323 else
16324 break;
16325 }
16326 if (!is_spilled)
16327 return NULL;
16328 return first_arg;
16329 }
16330
16331 /* Add output or anti dependency from insn to first_arg to restrict its code
16332 motion. */
16333 static void
avoid_func_arg_motion(rtx_insn * first_arg,rtx_insn * insn)16334 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
16335 {
16336 rtx set;
16337 rtx tmp;
16338
16339 set = single_set (insn);
16340 if (!set)
16341 return;
16342 tmp = SET_DEST (set);
16343 if (REG_P (tmp))
16344 {
16345 /* Add output dependency to the first function argument. */
16346 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16347 return;
16348 }
16349 /* Add anti dependency. */
16350 add_dependence (first_arg, insn, REG_DEP_ANTI);
16351 }
16352
16353 /* Avoid cross block motion of function argument through adding dependency
16354 from the first non-jump instruction in bb. */
16355 static void
add_dependee_for_func_arg(rtx_insn * arg,basic_block bb)16356 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
16357 {
16358 rtx_insn *insn = BB_END (bb);
16359
16360 while (insn)
16361 {
16362 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
16363 {
16364 rtx set = single_set (insn);
16365 if (set)
16366 {
16367 avoid_func_arg_motion (arg, insn);
16368 return;
16369 }
16370 }
16371 if (insn == BB_HEAD (bb))
16372 return;
16373 insn = PREV_INSN (insn);
16374 }
16375 }
16376
16377 /* Hook for pre-reload schedule - avoid motion of function arguments
16378 passed in likely spilled HW registers. */
16379 static void
ix86_dependencies_evaluation_hook(rtx_insn * head,rtx_insn * tail)16380 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
16381 {
16382 rtx_insn *insn;
16383 rtx_insn *first_arg = NULL;
16384 if (reload_completed)
16385 return;
16386 while (head != tail && DEBUG_INSN_P (head))
16387 head = NEXT_INSN (head);
16388 for (insn = tail; insn != head; insn = PREV_INSN (insn))
16389 if (INSN_P (insn) && CALL_P (insn))
16390 {
16391 first_arg = add_parameter_dependencies (insn, head);
16392 if (first_arg)
16393 {
16394 /* Add dependee for first argument to predecessors if only
16395 region contains more than one block. */
16396 basic_block bb = BLOCK_FOR_INSN (insn);
16397 int rgn = CONTAINING_RGN (bb->index);
16398 int nr_blks = RGN_NR_BLOCKS (rgn);
16399 /* Skip trivial regions and region head blocks that can have
16400 predecessors outside of region. */
16401 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
16402 {
16403 edge e;
16404 edge_iterator ei;
16405
16406 /* Regions are SCCs with the exception of selective
16407 scheduling with pipelining of outer blocks enabled.
16408 So also check that immediate predecessors of a non-head
16409 block are in the same region. */
16410 FOR_EACH_EDGE (e, ei, bb->preds)
16411 {
16412 /* Avoid creating of loop-carried dependencies through
16413 using topological ordering in the region. */
16414 if (rgn == CONTAINING_RGN (e->src->index)
16415 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
16416 add_dependee_for_func_arg (first_arg, e->src);
16417 }
16418 }
16419 insn = first_arg;
16420 if (insn == head)
16421 break;
16422 }
16423 }
16424 else if (first_arg)
16425 avoid_func_arg_motion (first_arg, insn);
16426 }
16427
16428 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16429 HW registers to maximum, to schedule them at soon as possible. These are
16430 moves from function argument registers at the top of the function entry
16431 and moves from function return value registers after call. */
16432 static int
ix86_adjust_priority(rtx_insn * insn,int priority)16433 ix86_adjust_priority (rtx_insn *insn, int priority)
16434 {
16435 rtx set;
16436
16437 if (reload_completed)
16438 return priority;
16439
16440 if (!NONDEBUG_INSN_P (insn))
16441 return priority;
16442
16443 set = single_set (insn);
16444 if (set)
16445 {
16446 rtx tmp = SET_SRC (set);
16447 if (REG_P (tmp)
16448 && HARD_REGISTER_P (tmp)
16449 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
16450 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
16451 return current_sched_info->sched_max_insns_priority;
16452 }
16453
16454 return priority;
16455 }
16456
16457 /* Prepare for scheduling pass. */
16458 static void
ix86_sched_init_global(FILE *,int,int)16459 ix86_sched_init_global (FILE *, int, int)
16460 {
16461 /* Install scheduling hooks for current CPU. Some of these hooks are used
16462 in time-critical parts of the scheduler, so we only set them up when
16463 they are actually used. */
16464 switch (ix86_tune)
16465 {
16466 case PROCESSOR_CORE2:
16467 case PROCESSOR_NEHALEM:
16468 case PROCESSOR_SANDYBRIDGE:
16469 case PROCESSOR_HASWELL:
16470 case PROCESSOR_GENERIC:
16471 /* Do not perform multipass scheduling for pre-reload schedule
16472 to save compile time. */
16473 if (reload_completed)
16474 {
16475 ix86_core2i7_init_hooks ();
16476 break;
16477 }
16478 /* Fall through. */
16479 default:
16480 targetm.sched.dfa_post_advance_cycle = NULL;
16481 targetm.sched.first_cycle_multipass_init = NULL;
16482 targetm.sched.first_cycle_multipass_begin = NULL;
16483 targetm.sched.first_cycle_multipass_issue = NULL;
16484 targetm.sched.first_cycle_multipass_backtrack = NULL;
16485 targetm.sched.first_cycle_multipass_end = NULL;
16486 targetm.sched.first_cycle_multipass_fini = NULL;
16487 break;
16488 }
16489 }
16490
16491
16492 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
16493
16494 static HOST_WIDE_INT
ix86_static_rtx_alignment(machine_mode mode)16495 ix86_static_rtx_alignment (machine_mode mode)
16496 {
16497 if (mode == DFmode)
16498 return 64;
16499 if (ALIGN_MODE_128 (mode))
16500 return MAX (128, GET_MODE_ALIGNMENT (mode));
16501 return GET_MODE_ALIGNMENT (mode);
16502 }
16503
16504 /* Implement TARGET_CONSTANT_ALIGNMENT. */
16505
16506 static HOST_WIDE_INT
ix86_constant_alignment(const_tree exp,HOST_WIDE_INT align)16507 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
16508 {
16509 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16510 || TREE_CODE (exp) == INTEGER_CST)
16511 {
16512 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
16513 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
16514 return MAX (mode_align, align);
16515 }
16516 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16517 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16518 return BITS_PER_WORD;
16519
16520 return align;
16521 }
16522
16523 /* Implement TARGET_EMPTY_RECORD_P. */
16524
16525 static bool
ix86_is_empty_record(const_tree type)16526 ix86_is_empty_record (const_tree type)
16527 {
16528 if (!TARGET_64BIT)
16529 return false;
16530 return default_is_empty_record (type);
16531 }
16532
16533 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
16534
16535 static void
ix86_warn_parameter_passing_abi(cumulative_args_t cum_v,tree type)16536 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
16537 {
16538 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
16539
16540 if (!cum->warn_empty)
16541 return;
16542
16543 if (!TYPE_EMPTY_P (type))
16544 return;
16545
16546 /* Don't warn if the function isn't visible outside of the TU. */
16547 if (cum->decl && !TREE_PUBLIC (cum->decl))
16548 return;
16549
16550 const_tree ctx = get_ultimate_context (cum->decl);
16551 if (ctx != NULL_TREE
16552 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
16553 return;
16554
16555 /* If the actual size of the type is zero, then there is no change
16556 in how objects of this size are passed. */
16557 if (int_size_in_bytes (type) == 0)
16558 return;
16559
16560 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
16561 "changes in %<-fabi-version=12%> (GCC 8)", type);
16562
16563 /* Only warn once. */
16564 cum->warn_empty = false;
16565 }
16566
16567 /* This hook returns name of multilib ABI. */
16568
16569 static const char *
ix86_get_multilib_abi_name(void)16570 ix86_get_multilib_abi_name (void)
16571 {
16572 if (!(TARGET_64BIT_P (ix86_isa_flags)))
16573 return "i386";
16574 else if (TARGET_X32_P (ix86_isa_flags))
16575 return "x32";
16576 else
16577 return "x86_64";
16578 }
16579
16580 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
16581 the data type, and ALIGN is the alignment that the object would
16582 ordinarily have. */
16583
16584 static int
iamcu_alignment(tree type,int align)16585 iamcu_alignment (tree type, int align)
16586 {
16587 machine_mode mode;
16588
16589 if (align < 32 || TYPE_USER_ALIGN (type))
16590 return align;
16591
16592 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16593 bytes. */
16594 mode = TYPE_MODE (strip_array_types (type));
16595 switch (GET_MODE_CLASS (mode))
16596 {
16597 case MODE_INT:
16598 case MODE_COMPLEX_INT:
16599 case MODE_COMPLEX_FLOAT:
16600 case MODE_FLOAT:
16601 case MODE_DECIMAL_FLOAT:
16602 return 32;
16603 default:
16604 return align;
16605 }
16606 }
16607
16608 /* Compute the alignment for a static variable.
16609 TYPE is the data type, and ALIGN is the alignment that
16610 the object would ordinarily have. The value of this function is used
16611 instead of that alignment to align the object. */
16612
16613 int
ix86_data_alignment(tree type,unsigned int align,bool opt)16614 ix86_data_alignment (tree type, unsigned int align, bool opt)
16615 {
16616 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16617 for symbols from other compilation units or symbols that don't need
16618 to bind locally. In order to preserve some ABI compatibility with
16619 those compilers, ensure we don't decrease alignment from what we
16620 used to assume. */
16621
16622 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
16623
16624 /* A data structure, equal or greater than the size of a cache line
16625 (64 bytes in the Pentium 4 and other recent Intel processors, including
16626 processors based on Intel Core microarchitecture) should be aligned
16627 so that its base address is a multiple of a cache line size. */
16628
16629 unsigned int max_align
16630 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
16631
16632 if (max_align < BITS_PER_WORD)
16633 max_align = BITS_PER_WORD;
16634
16635 switch (ix86_align_data_type)
16636 {
16637 case ix86_align_data_type_abi: opt = false; break;
16638 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
16639 case ix86_align_data_type_cacheline: break;
16640 }
16641
16642 if (TARGET_IAMCU)
16643 align = iamcu_alignment (type, align);
16644
16645 if (opt
16646 && AGGREGATE_TYPE_P (type)
16647 && TYPE_SIZE (type)
16648 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
16649 {
16650 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
16651 && align < max_align_compat)
16652 align = max_align_compat;
16653 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
16654 && align < max_align)
16655 align = max_align;
16656 }
16657
16658 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16659 to 16byte boundary. */
16660 if (TARGET_64BIT)
16661 {
16662 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
16663 && TYPE_SIZE (type)
16664 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16665 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16666 && align < 128)
16667 return 128;
16668 }
16669
16670 if (!opt)
16671 return align;
16672
16673 if (TREE_CODE (type) == ARRAY_TYPE)
16674 {
16675 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16676 return 64;
16677 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16678 return 128;
16679 }
16680 else if (TREE_CODE (type) == COMPLEX_TYPE)
16681 {
16682
16683 if (TYPE_MODE (type) == DCmode && align < 64)
16684 return 64;
16685 if ((TYPE_MODE (type) == XCmode
16686 || TYPE_MODE (type) == TCmode) && align < 128)
16687 return 128;
16688 }
16689 else if ((TREE_CODE (type) == RECORD_TYPE
16690 || TREE_CODE (type) == UNION_TYPE
16691 || TREE_CODE (type) == QUAL_UNION_TYPE)
16692 && TYPE_FIELDS (type))
16693 {
16694 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16695 return 64;
16696 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16697 return 128;
16698 }
16699 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16700 || TREE_CODE (type) == INTEGER_TYPE)
16701 {
16702 if (TYPE_MODE (type) == DFmode && align < 64)
16703 return 64;
16704 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16705 return 128;
16706 }
16707
16708 return align;
16709 }
16710
16711 /* Compute the alignment for a local variable or a stack slot. EXP is
16712 the data type or decl itself, MODE is the widest mode available and
16713 ALIGN is the alignment that the object would ordinarily have. The
16714 value of this macro is used instead of that alignment to align the
16715 object. */
16716
16717 unsigned int
ix86_local_alignment(tree exp,machine_mode mode,unsigned int align)16718 ix86_local_alignment (tree exp, machine_mode mode,
16719 unsigned int align)
16720 {
16721 tree type, decl;
16722
16723 if (exp && DECL_P (exp))
16724 {
16725 type = TREE_TYPE (exp);
16726 decl = exp;
16727 }
16728 else
16729 {
16730 type = exp;
16731 decl = NULL;
16732 }
16733
16734 /* Don't do dynamic stack realignment for long long objects with
16735 -mpreferred-stack-boundary=2. */
16736 if (!TARGET_64BIT
16737 && align == 64
16738 && ix86_preferred_stack_boundary < 64
16739 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
16740 && (!type || !TYPE_USER_ALIGN (type))
16741 && (!decl || !DECL_USER_ALIGN (decl)))
16742 align = 32;
16743
16744 /* If TYPE is NULL, we are allocating a stack slot for caller-save
16745 register in MODE. We will return the largest alignment of XF
16746 and DF. */
16747 if (!type)
16748 {
16749 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
16750 align = GET_MODE_ALIGNMENT (DFmode);
16751 return align;
16752 }
16753
16754 /* Don't increase alignment for Intel MCU psABI. */
16755 if (TARGET_IAMCU)
16756 return align;
16757
16758 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16759 to 16byte boundary. Exact wording is:
16760
16761 An array uses the same alignment as its elements, except that a local or
16762 global array variable of length at least 16 bytes or
16763 a C99 variable-length array variable always has alignment of at least 16 bytes.
16764
16765 This was added to allow use of aligned SSE instructions at arrays. This
16766 rule is meant for static storage (where compiler cannot do the analysis
16767 by itself). We follow it for automatic variables only when convenient.
16768 We fully control everything in the function compiled and functions from
16769 other unit cannot rely on the alignment.
16770
16771 Exclude va_list type. It is the common case of local array where
16772 we cannot benefit from the alignment.
16773
16774 TODO: Probably one should optimize for size only when var is not escaping. */
16775 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
16776 && TARGET_SSE)
16777 {
16778 if (AGGREGATE_TYPE_P (type)
16779 && (va_list_type_node == NULL_TREE
16780 || (TYPE_MAIN_VARIANT (type)
16781 != TYPE_MAIN_VARIANT (va_list_type_node)))
16782 && TYPE_SIZE (type)
16783 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16784 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16785 && align < 128)
16786 return 128;
16787 }
16788 if (TREE_CODE (type) == ARRAY_TYPE)
16789 {
16790 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16791 return 64;
16792 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16793 return 128;
16794 }
16795 else if (TREE_CODE (type) == COMPLEX_TYPE)
16796 {
16797 if (TYPE_MODE (type) == DCmode && align < 64)
16798 return 64;
16799 if ((TYPE_MODE (type) == XCmode
16800 || TYPE_MODE (type) == TCmode) && align < 128)
16801 return 128;
16802 }
16803 else if ((TREE_CODE (type) == RECORD_TYPE
16804 || TREE_CODE (type) == UNION_TYPE
16805 || TREE_CODE (type) == QUAL_UNION_TYPE)
16806 && TYPE_FIELDS (type))
16807 {
16808 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16809 return 64;
16810 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16811 return 128;
16812 }
16813 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16814 || TREE_CODE (type) == INTEGER_TYPE)
16815 {
16816
16817 if (TYPE_MODE (type) == DFmode && align < 64)
16818 return 64;
16819 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16820 return 128;
16821 }
16822 return align;
16823 }
16824
16825 /* Compute the minimum required alignment for dynamic stack realignment
16826 purposes for a local variable, parameter or a stack slot. EXP is
16827 the data type or decl itself, MODE is its mode and ALIGN is the
16828 alignment that the object would ordinarily have. */
16829
16830 unsigned int
ix86_minimum_alignment(tree exp,machine_mode mode,unsigned int align)16831 ix86_minimum_alignment (tree exp, machine_mode mode,
16832 unsigned int align)
16833 {
16834 tree type, decl;
16835
16836 if (exp && DECL_P (exp))
16837 {
16838 type = TREE_TYPE (exp);
16839 decl = exp;
16840 }
16841 else
16842 {
16843 type = exp;
16844 decl = NULL;
16845 }
16846
16847 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
16848 return align;
16849
16850 /* Don't do dynamic stack realignment for long long objects with
16851 -mpreferred-stack-boundary=2. */
16852 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
16853 && (!type || !TYPE_USER_ALIGN (type))
16854 && (!decl || !DECL_USER_ALIGN (decl)))
16855 {
16856 gcc_checking_assert (!TARGET_STV);
16857 return 32;
16858 }
16859
16860 return align;
16861 }
16862
16863 /* Find a location for the static chain incoming to a nested function.
16864 This is a register, unless all free registers are used by arguments. */
16865
16866 static rtx
ix86_static_chain(const_tree fndecl_or_type,bool incoming_p)16867 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
16868 {
16869 unsigned regno;
16870
16871 if (TARGET_64BIT)
16872 {
16873 /* We always use R10 in 64-bit mode. */
16874 regno = R10_REG;
16875 }
16876 else
16877 {
16878 const_tree fntype, fndecl;
16879 unsigned int ccvt;
16880
16881 /* By default in 32-bit mode we use ECX to pass the static chain. */
16882 regno = CX_REG;
16883
16884 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
16885 {
16886 fntype = TREE_TYPE (fndecl_or_type);
16887 fndecl = fndecl_or_type;
16888 }
16889 else
16890 {
16891 fntype = fndecl_or_type;
16892 fndecl = NULL;
16893 }
16894
16895 ccvt = ix86_get_callcvt (fntype);
16896 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
16897 {
16898 /* Fastcall functions use ecx/edx for arguments, which leaves
16899 us with EAX for the static chain.
16900 Thiscall functions use ecx for arguments, which also
16901 leaves us with EAX for the static chain. */
16902 regno = AX_REG;
16903 }
16904 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
16905 {
16906 /* Thiscall functions use ecx for arguments, which leaves
16907 us with EAX and EDX for the static chain.
16908 We are using for abi-compatibility EAX. */
16909 regno = AX_REG;
16910 }
16911 else if (ix86_function_regparm (fntype, fndecl) == 3)
16912 {
16913 /* For regparm 3, we have no free call-clobbered registers in
16914 which to store the static chain. In order to implement this,
16915 we have the trampoline push the static chain to the stack.
16916 However, we can't push a value below the return address when
16917 we call the nested function directly, so we have to use an
16918 alternate entry point. For this we use ESI, and have the
16919 alternate entry point push ESI, so that things appear the
16920 same once we're executing the nested function. */
16921 if (incoming_p)
16922 {
16923 if (fndecl == current_function_decl
16924 && !ix86_static_chain_on_stack)
16925 {
16926 gcc_assert (!reload_completed);
16927 ix86_static_chain_on_stack = true;
16928 }
16929 return gen_frame_mem (SImode,
16930 plus_constant (Pmode,
16931 arg_pointer_rtx, -8));
16932 }
16933 regno = SI_REG;
16934 }
16935 }
16936
16937 return gen_rtx_REG (Pmode, regno);
16938 }
16939
16940 /* Emit RTL insns to initialize the variable parts of a trampoline.
16941 FNDECL is the decl of the target address; M_TRAMP is a MEM for
16942 the trampoline, and CHAIN_VALUE is an RTX for the static chain
16943 to be passed to the target function. */
16944
16945 static void
ix86_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)16946 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
16947 {
16948 rtx mem, fnaddr;
16949 int opcode;
16950 int offset = 0;
16951 bool need_endbr = (flag_cf_protection & CF_BRANCH);
16952
16953 fnaddr = XEXP (DECL_RTL (fndecl), 0);
16954
16955 if (TARGET_64BIT)
16956 {
16957 int size;
16958
16959 if (need_endbr)
16960 {
16961 /* Insert ENDBR64. */
16962 mem = adjust_address (m_tramp, SImode, offset);
16963 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
16964 offset += 4;
16965 }
16966
16967 /* Load the function address to r11. Try to load address using
16968 the shorter movl instead of movabs. We may want to support
16969 movq for kernel mode, but kernel does not use trampolines at
16970 the moment. FNADDR is a 32bit address and may not be in
16971 DImode when ptr_mode == SImode. Always use movl in this
16972 case. */
16973 if (ptr_mode == SImode
16974 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16975 {
16976 fnaddr = copy_addr_to_reg (fnaddr);
16977
16978 mem = adjust_address (m_tramp, HImode, offset);
16979 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
16980
16981 mem = adjust_address (m_tramp, SImode, offset + 2);
16982 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
16983 offset += 6;
16984 }
16985 else
16986 {
16987 mem = adjust_address (m_tramp, HImode, offset);
16988 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
16989
16990 mem = adjust_address (m_tramp, DImode, offset + 2);
16991 emit_move_insn (mem, fnaddr);
16992 offset += 10;
16993 }
16994
16995 /* Load static chain using movabs to r10. Use the shorter movl
16996 instead of movabs when ptr_mode == SImode. */
16997 if (ptr_mode == SImode)
16998 {
16999 opcode = 0xba41;
17000 size = 6;
17001 }
17002 else
17003 {
17004 opcode = 0xba49;
17005 size = 10;
17006 }
17007
17008 mem = adjust_address (m_tramp, HImode, offset);
17009 emit_move_insn (mem, gen_int_mode (opcode, HImode));
17010
17011 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
17012 emit_move_insn (mem, chain_value);
17013 offset += size;
17014
17015 /* Jump to r11; the last (unused) byte is a nop, only there to
17016 pad the write out to a single 32-bit store. */
17017 mem = adjust_address (m_tramp, SImode, offset);
17018 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
17019 offset += 4;
17020 }
17021 else
17022 {
17023 rtx disp, chain;
17024
17025 /* Depending on the static chain location, either load a register
17026 with a constant, or push the constant to the stack. All of the
17027 instructions are the same size. */
17028 chain = ix86_static_chain (fndecl, true);
17029 if (REG_P (chain))
17030 {
17031 switch (REGNO (chain))
17032 {
17033 case AX_REG:
17034 opcode = 0xb8; break;
17035 case CX_REG:
17036 opcode = 0xb9; break;
17037 default:
17038 gcc_unreachable ();
17039 }
17040 }
17041 else
17042 opcode = 0x68;
17043
17044 if (need_endbr)
17045 {
17046 /* Insert ENDBR32. */
17047 mem = adjust_address (m_tramp, SImode, offset);
17048 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
17049 offset += 4;
17050 }
17051
17052 mem = adjust_address (m_tramp, QImode, offset);
17053 emit_move_insn (mem, gen_int_mode (opcode, QImode));
17054
17055 mem = adjust_address (m_tramp, SImode, offset + 1);
17056 emit_move_insn (mem, chain_value);
17057 offset += 5;
17058
17059 mem = adjust_address (m_tramp, QImode, offset);
17060 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
17061
17062 mem = adjust_address (m_tramp, SImode, offset + 1);
17063
17064 /* Compute offset from the end of the jmp to the target function.
17065 In the case in which the trampoline stores the static chain on
17066 the stack, we need to skip the first insn which pushes the
17067 (call-saved) register static chain; this push is 1 byte. */
17068 offset += 5;
17069 int skip = MEM_P (chain) ? 1 : 0;
17070 /* Skip ENDBR32 at the entry of the target function. */
17071 if (need_endbr
17072 && !cgraph_node::get (fndecl)->only_called_directly_p ())
17073 skip += 4;
17074 disp = expand_binop (SImode, sub_optab, fnaddr,
17075 plus_constant (Pmode, XEXP (m_tramp, 0),
17076 offset - skip),
17077 NULL_RTX, 1, OPTAB_DIRECT);
17078 emit_move_insn (mem, disp);
17079 }
17080
17081 gcc_assert (offset <= TRAMPOLINE_SIZE);
17082
17083 #ifdef HAVE_ENABLE_EXECUTE_STACK
17084 #ifdef CHECK_EXECUTE_STACK_ENABLED
17085 if (CHECK_EXECUTE_STACK_ENABLED)
17086 #endif
17087 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17088 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
17089 #endif
17090 }
17091
17092 static bool
ix86_allocate_stack_slots_for_args(void)17093 ix86_allocate_stack_slots_for_args (void)
17094 {
17095 /* Naked functions should not allocate stack slots for arguments. */
17096 return !ix86_function_naked (current_function_decl);
17097 }
17098
17099 static bool
ix86_warn_func_return(tree decl)17100 ix86_warn_func_return (tree decl)
17101 {
17102 /* Naked functions are implemented entirely in assembly, including the
17103 return sequence, so suppress warnings about this. */
17104 return !ix86_function_naked (decl);
17105 }
17106
17107 /* Return the shift count of a vector by scalar shift builtin second argument
17108 ARG1. */
17109 static tree
ix86_vector_shift_count(tree arg1)17110 ix86_vector_shift_count (tree arg1)
17111 {
17112 if (tree_fits_uhwi_p (arg1))
17113 return arg1;
17114 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
17115 {
17116 /* The count argument is weird, passed in as various 128-bit
17117 (or 64-bit) vectors, the low 64 bits from it are the count. */
17118 unsigned char buf[16];
17119 int len = native_encode_expr (arg1, buf, 16);
17120 if (len == 0)
17121 return NULL_TREE;
17122 tree t = native_interpret_expr (uint64_type_node, buf, len);
17123 if (t && tree_fits_uhwi_p (t))
17124 return t;
17125 }
17126 return NULL_TREE;
17127 }
17128
17129 static tree
ix86_fold_builtin(tree fndecl,int n_args,tree * args,bool ignore ATTRIBUTE_UNUSED)17130 ix86_fold_builtin (tree fndecl, int n_args,
17131 tree *args, bool ignore ATTRIBUTE_UNUSED)
17132 {
17133 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17134 {
17135 enum ix86_builtins fn_code
17136 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17137 enum rtx_code rcode;
17138 bool is_vshift;
17139 unsigned HOST_WIDE_INT mask;
17140
17141 switch (fn_code)
17142 {
17143 case IX86_BUILTIN_CPU_IS:
17144 case IX86_BUILTIN_CPU_SUPPORTS:
17145 gcc_assert (n_args == 1);
17146 return fold_builtin_cpu (fndecl, args);
17147
17148 case IX86_BUILTIN_NANQ:
17149 case IX86_BUILTIN_NANSQ:
17150 {
17151 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17152 const char *str = c_getstr (*args);
17153 int quiet = fn_code == IX86_BUILTIN_NANQ;
17154 REAL_VALUE_TYPE real;
17155
17156 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17157 return build_real (type, real);
17158 return NULL_TREE;
17159 }
17160
17161 case IX86_BUILTIN_INFQ:
17162 case IX86_BUILTIN_HUGE_VALQ:
17163 {
17164 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17165 REAL_VALUE_TYPE inf;
17166 real_inf (&inf);
17167 return build_real (type, inf);
17168 }
17169
17170 case IX86_BUILTIN_TZCNT16:
17171 case IX86_BUILTIN_CTZS:
17172 case IX86_BUILTIN_TZCNT32:
17173 case IX86_BUILTIN_TZCNT64:
17174 gcc_assert (n_args == 1);
17175 if (TREE_CODE (args[0]) == INTEGER_CST)
17176 {
17177 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17178 tree arg = args[0];
17179 if (fn_code == IX86_BUILTIN_TZCNT16
17180 || fn_code == IX86_BUILTIN_CTZS)
17181 arg = fold_convert (short_unsigned_type_node, arg);
17182 if (integer_zerop (arg))
17183 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17184 else
17185 return fold_const_call (CFN_CTZ, type, arg);
17186 }
17187 break;
17188
17189 case IX86_BUILTIN_LZCNT16:
17190 case IX86_BUILTIN_CLZS:
17191 case IX86_BUILTIN_LZCNT32:
17192 case IX86_BUILTIN_LZCNT64:
17193 gcc_assert (n_args == 1);
17194 if (TREE_CODE (args[0]) == INTEGER_CST)
17195 {
17196 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17197 tree arg = args[0];
17198 if (fn_code == IX86_BUILTIN_LZCNT16
17199 || fn_code == IX86_BUILTIN_CLZS)
17200 arg = fold_convert (short_unsigned_type_node, arg);
17201 if (integer_zerop (arg))
17202 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17203 else
17204 return fold_const_call (CFN_CLZ, type, arg);
17205 }
17206 break;
17207
17208 case IX86_BUILTIN_BEXTR32:
17209 case IX86_BUILTIN_BEXTR64:
17210 case IX86_BUILTIN_BEXTRI32:
17211 case IX86_BUILTIN_BEXTRI64:
17212 gcc_assert (n_args == 2);
17213 if (tree_fits_uhwi_p (args[1]))
17214 {
17215 unsigned HOST_WIDE_INT res = 0;
17216 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
17217 unsigned int start = tree_to_uhwi (args[1]);
17218 unsigned int len = (start & 0xff00) >> 8;
17219 start &= 0xff;
17220 if (start >= prec || len == 0)
17221 res = 0;
17222 else if (!tree_fits_uhwi_p (args[0]))
17223 break;
17224 else
17225 res = tree_to_uhwi (args[0]) >> start;
17226 if (len > prec)
17227 len = prec;
17228 if (len < HOST_BITS_PER_WIDE_INT)
17229 res &= (HOST_WIDE_INT_1U << len) - 1;
17230 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17231 }
17232 break;
17233
17234 case IX86_BUILTIN_BZHI32:
17235 case IX86_BUILTIN_BZHI64:
17236 gcc_assert (n_args == 2);
17237 if (tree_fits_uhwi_p (args[1]))
17238 {
17239 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
17240 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
17241 return args[0];
17242 if (idx == 0)
17243 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
17244 if (!tree_fits_uhwi_p (args[0]))
17245 break;
17246 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
17247 res &= ~(HOST_WIDE_INT_M1U << idx);
17248 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17249 }
17250 break;
17251
17252 case IX86_BUILTIN_PDEP32:
17253 case IX86_BUILTIN_PDEP64:
17254 gcc_assert (n_args == 2);
17255 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17256 {
17257 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17258 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17259 unsigned HOST_WIDE_INT res = 0;
17260 unsigned HOST_WIDE_INT m, k = 1;
17261 for (m = 1; m; m <<= 1)
17262 if ((mask & m) != 0)
17263 {
17264 if ((src & k) != 0)
17265 res |= m;
17266 k <<= 1;
17267 }
17268 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17269 }
17270 break;
17271
17272 case IX86_BUILTIN_PEXT32:
17273 case IX86_BUILTIN_PEXT64:
17274 gcc_assert (n_args == 2);
17275 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17276 {
17277 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17278 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17279 unsigned HOST_WIDE_INT res = 0;
17280 unsigned HOST_WIDE_INT m, k = 1;
17281 for (m = 1; m; m <<= 1)
17282 if ((mask & m) != 0)
17283 {
17284 if ((src & m) != 0)
17285 res |= k;
17286 k <<= 1;
17287 }
17288 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17289 }
17290 break;
17291
17292 case IX86_BUILTIN_MOVMSKPS:
17293 case IX86_BUILTIN_PMOVMSKB:
17294 case IX86_BUILTIN_MOVMSKPD:
17295 case IX86_BUILTIN_PMOVMSKB128:
17296 case IX86_BUILTIN_MOVMSKPD256:
17297 case IX86_BUILTIN_MOVMSKPS256:
17298 case IX86_BUILTIN_PMOVMSKB256:
17299 gcc_assert (n_args == 1);
17300 if (TREE_CODE (args[0]) == VECTOR_CST)
17301 {
17302 HOST_WIDE_INT res = 0;
17303 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
17304 {
17305 tree e = VECTOR_CST_ELT (args[0], i);
17306 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
17307 {
17308 if (wi::neg_p (wi::to_wide (e)))
17309 res |= HOST_WIDE_INT_1 << i;
17310 }
17311 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
17312 {
17313 if (TREE_REAL_CST (e).sign)
17314 res |= HOST_WIDE_INT_1 << i;
17315 }
17316 else
17317 return NULL_TREE;
17318 }
17319 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
17320 }
17321 break;
17322
17323 case IX86_BUILTIN_PSLLD:
17324 case IX86_BUILTIN_PSLLD128:
17325 case IX86_BUILTIN_PSLLD128_MASK:
17326 case IX86_BUILTIN_PSLLD256:
17327 case IX86_BUILTIN_PSLLD256_MASK:
17328 case IX86_BUILTIN_PSLLD512:
17329 case IX86_BUILTIN_PSLLDI:
17330 case IX86_BUILTIN_PSLLDI128:
17331 case IX86_BUILTIN_PSLLDI128_MASK:
17332 case IX86_BUILTIN_PSLLDI256:
17333 case IX86_BUILTIN_PSLLDI256_MASK:
17334 case IX86_BUILTIN_PSLLDI512:
17335 case IX86_BUILTIN_PSLLQ:
17336 case IX86_BUILTIN_PSLLQ128:
17337 case IX86_BUILTIN_PSLLQ128_MASK:
17338 case IX86_BUILTIN_PSLLQ256:
17339 case IX86_BUILTIN_PSLLQ256_MASK:
17340 case IX86_BUILTIN_PSLLQ512:
17341 case IX86_BUILTIN_PSLLQI:
17342 case IX86_BUILTIN_PSLLQI128:
17343 case IX86_BUILTIN_PSLLQI128_MASK:
17344 case IX86_BUILTIN_PSLLQI256:
17345 case IX86_BUILTIN_PSLLQI256_MASK:
17346 case IX86_BUILTIN_PSLLQI512:
17347 case IX86_BUILTIN_PSLLW:
17348 case IX86_BUILTIN_PSLLW128:
17349 case IX86_BUILTIN_PSLLW128_MASK:
17350 case IX86_BUILTIN_PSLLW256:
17351 case IX86_BUILTIN_PSLLW256_MASK:
17352 case IX86_BUILTIN_PSLLW512_MASK:
17353 case IX86_BUILTIN_PSLLWI:
17354 case IX86_BUILTIN_PSLLWI128:
17355 case IX86_BUILTIN_PSLLWI128_MASK:
17356 case IX86_BUILTIN_PSLLWI256:
17357 case IX86_BUILTIN_PSLLWI256_MASK:
17358 case IX86_BUILTIN_PSLLWI512_MASK:
17359 rcode = ASHIFT;
17360 is_vshift = false;
17361 goto do_shift;
17362 case IX86_BUILTIN_PSRAD:
17363 case IX86_BUILTIN_PSRAD128:
17364 case IX86_BUILTIN_PSRAD128_MASK:
17365 case IX86_BUILTIN_PSRAD256:
17366 case IX86_BUILTIN_PSRAD256_MASK:
17367 case IX86_BUILTIN_PSRAD512:
17368 case IX86_BUILTIN_PSRADI:
17369 case IX86_BUILTIN_PSRADI128:
17370 case IX86_BUILTIN_PSRADI128_MASK:
17371 case IX86_BUILTIN_PSRADI256:
17372 case IX86_BUILTIN_PSRADI256_MASK:
17373 case IX86_BUILTIN_PSRADI512:
17374 case IX86_BUILTIN_PSRAQ128_MASK:
17375 case IX86_BUILTIN_PSRAQ256_MASK:
17376 case IX86_BUILTIN_PSRAQ512:
17377 case IX86_BUILTIN_PSRAQI128_MASK:
17378 case IX86_BUILTIN_PSRAQI256_MASK:
17379 case IX86_BUILTIN_PSRAQI512:
17380 case IX86_BUILTIN_PSRAW:
17381 case IX86_BUILTIN_PSRAW128:
17382 case IX86_BUILTIN_PSRAW128_MASK:
17383 case IX86_BUILTIN_PSRAW256:
17384 case IX86_BUILTIN_PSRAW256_MASK:
17385 case IX86_BUILTIN_PSRAW512:
17386 case IX86_BUILTIN_PSRAWI:
17387 case IX86_BUILTIN_PSRAWI128:
17388 case IX86_BUILTIN_PSRAWI128_MASK:
17389 case IX86_BUILTIN_PSRAWI256:
17390 case IX86_BUILTIN_PSRAWI256_MASK:
17391 case IX86_BUILTIN_PSRAWI512:
17392 rcode = ASHIFTRT;
17393 is_vshift = false;
17394 goto do_shift;
17395 case IX86_BUILTIN_PSRLD:
17396 case IX86_BUILTIN_PSRLD128:
17397 case IX86_BUILTIN_PSRLD128_MASK:
17398 case IX86_BUILTIN_PSRLD256:
17399 case IX86_BUILTIN_PSRLD256_MASK:
17400 case IX86_BUILTIN_PSRLD512:
17401 case IX86_BUILTIN_PSRLDI:
17402 case IX86_BUILTIN_PSRLDI128:
17403 case IX86_BUILTIN_PSRLDI128_MASK:
17404 case IX86_BUILTIN_PSRLDI256:
17405 case IX86_BUILTIN_PSRLDI256_MASK:
17406 case IX86_BUILTIN_PSRLDI512:
17407 case IX86_BUILTIN_PSRLQ:
17408 case IX86_BUILTIN_PSRLQ128:
17409 case IX86_BUILTIN_PSRLQ128_MASK:
17410 case IX86_BUILTIN_PSRLQ256:
17411 case IX86_BUILTIN_PSRLQ256_MASK:
17412 case IX86_BUILTIN_PSRLQ512:
17413 case IX86_BUILTIN_PSRLQI:
17414 case IX86_BUILTIN_PSRLQI128:
17415 case IX86_BUILTIN_PSRLQI128_MASK:
17416 case IX86_BUILTIN_PSRLQI256:
17417 case IX86_BUILTIN_PSRLQI256_MASK:
17418 case IX86_BUILTIN_PSRLQI512:
17419 case IX86_BUILTIN_PSRLW:
17420 case IX86_BUILTIN_PSRLW128:
17421 case IX86_BUILTIN_PSRLW128_MASK:
17422 case IX86_BUILTIN_PSRLW256:
17423 case IX86_BUILTIN_PSRLW256_MASK:
17424 case IX86_BUILTIN_PSRLW512:
17425 case IX86_BUILTIN_PSRLWI:
17426 case IX86_BUILTIN_PSRLWI128:
17427 case IX86_BUILTIN_PSRLWI128_MASK:
17428 case IX86_BUILTIN_PSRLWI256:
17429 case IX86_BUILTIN_PSRLWI256_MASK:
17430 case IX86_BUILTIN_PSRLWI512:
17431 rcode = LSHIFTRT;
17432 is_vshift = false;
17433 goto do_shift;
17434 case IX86_BUILTIN_PSLLVV16HI:
17435 case IX86_BUILTIN_PSLLVV16SI:
17436 case IX86_BUILTIN_PSLLVV2DI:
17437 case IX86_BUILTIN_PSLLVV2DI_MASK:
17438 case IX86_BUILTIN_PSLLVV32HI:
17439 case IX86_BUILTIN_PSLLVV4DI:
17440 case IX86_BUILTIN_PSLLVV4DI_MASK:
17441 case IX86_BUILTIN_PSLLVV4SI:
17442 case IX86_BUILTIN_PSLLVV4SI_MASK:
17443 case IX86_BUILTIN_PSLLVV8DI:
17444 case IX86_BUILTIN_PSLLVV8HI:
17445 case IX86_BUILTIN_PSLLVV8SI:
17446 case IX86_BUILTIN_PSLLVV8SI_MASK:
17447 rcode = ASHIFT;
17448 is_vshift = true;
17449 goto do_shift;
17450 case IX86_BUILTIN_PSRAVQ128:
17451 case IX86_BUILTIN_PSRAVQ256:
17452 case IX86_BUILTIN_PSRAVV16HI:
17453 case IX86_BUILTIN_PSRAVV16SI:
17454 case IX86_BUILTIN_PSRAVV32HI:
17455 case IX86_BUILTIN_PSRAVV4SI:
17456 case IX86_BUILTIN_PSRAVV4SI_MASK:
17457 case IX86_BUILTIN_PSRAVV8DI:
17458 case IX86_BUILTIN_PSRAVV8HI:
17459 case IX86_BUILTIN_PSRAVV8SI:
17460 case IX86_BUILTIN_PSRAVV8SI_MASK:
17461 rcode = ASHIFTRT;
17462 is_vshift = true;
17463 goto do_shift;
17464 case IX86_BUILTIN_PSRLVV16HI:
17465 case IX86_BUILTIN_PSRLVV16SI:
17466 case IX86_BUILTIN_PSRLVV2DI:
17467 case IX86_BUILTIN_PSRLVV2DI_MASK:
17468 case IX86_BUILTIN_PSRLVV32HI:
17469 case IX86_BUILTIN_PSRLVV4DI:
17470 case IX86_BUILTIN_PSRLVV4DI_MASK:
17471 case IX86_BUILTIN_PSRLVV4SI:
17472 case IX86_BUILTIN_PSRLVV4SI_MASK:
17473 case IX86_BUILTIN_PSRLVV8DI:
17474 case IX86_BUILTIN_PSRLVV8HI:
17475 case IX86_BUILTIN_PSRLVV8SI:
17476 case IX86_BUILTIN_PSRLVV8SI_MASK:
17477 rcode = LSHIFTRT;
17478 is_vshift = true;
17479 goto do_shift;
17480
17481 do_shift:
17482 gcc_assert (n_args >= 2);
17483 if (TREE_CODE (args[0]) != VECTOR_CST)
17484 break;
17485 mask = HOST_WIDE_INT_M1U;
17486 if (n_args > 2)
17487 {
17488 /* This is masked shift. */
17489 if (!tree_fits_uhwi_p (args[n_args - 1])
17490 || TREE_SIDE_EFFECTS (args[n_args - 2]))
17491 break;
17492 mask = tree_to_uhwi (args[n_args - 1]);
17493 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
17494 mask |= HOST_WIDE_INT_M1U << elems;
17495 if (mask != HOST_WIDE_INT_M1U
17496 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
17497 break;
17498 if (mask == (HOST_WIDE_INT_M1U << elems))
17499 return args[n_args - 2];
17500 }
17501 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
17502 break;
17503 if (tree tem = (is_vshift ? integer_one_node
17504 : ix86_vector_shift_count (args[1])))
17505 {
17506 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
17507 unsigned HOST_WIDE_INT prec
17508 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
17509 if (count == 0 && mask == HOST_WIDE_INT_M1U)
17510 return args[0];
17511 if (count >= prec)
17512 {
17513 if (rcode == ASHIFTRT)
17514 count = prec - 1;
17515 else if (mask == HOST_WIDE_INT_M1U)
17516 return build_zero_cst (TREE_TYPE (args[0]));
17517 }
17518 tree countt = NULL_TREE;
17519 if (!is_vshift)
17520 {
17521 if (count >= prec)
17522 countt = integer_zero_node;
17523 else
17524 countt = build_int_cst (integer_type_node, count);
17525 }
17526 tree_vector_builder builder;
17527 if (mask != HOST_WIDE_INT_M1U || is_vshift)
17528 builder.new_vector (TREE_TYPE (args[0]),
17529 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
17530 1);
17531 else
17532 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
17533 false);
17534 unsigned int cnt = builder.encoded_nelts ();
17535 for (unsigned int i = 0; i < cnt; ++i)
17536 {
17537 tree elt = VECTOR_CST_ELT (args[0], i);
17538 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
17539 return NULL_TREE;
17540 tree type = TREE_TYPE (elt);
17541 if (rcode == LSHIFTRT)
17542 elt = fold_convert (unsigned_type_for (type), elt);
17543 if (is_vshift)
17544 {
17545 countt = VECTOR_CST_ELT (args[1], i);
17546 if (TREE_CODE (countt) != INTEGER_CST
17547 || TREE_OVERFLOW (countt))
17548 return NULL_TREE;
17549 if (wi::neg_p (wi::to_wide (countt))
17550 || wi::to_widest (countt) >= prec)
17551 {
17552 if (rcode == ASHIFTRT)
17553 countt = build_int_cst (TREE_TYPE (countt),
17554 prec - 1);
17555 else
17556 {
17557 elt = build_zero_cst (TREE_TYPE (elt));
17558 countt = build_zero_cst (TREE_TYPE (countt));
17559 }
17560 }
17561 }
17562 else if (count >= prec)
17563 elt = build_zero_cst (TREE_TYPE (elt));
17564 elt = const_binop (rcode == ASHIFT
17565 ? LSHIFT_EXPR : RSHIFT_EXPR,
17566 TREE_TYPE (elt), elt, countt);
17567 if (!elt || TREE_CODE (elt) != INTEGER_CST)
17568 return NULL_TREE;
17569 if (rcode == LSHIFTRT)
17570 elt = fold_convert (type, elt);
17571 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
17572 {
17573 elt = VECTOR_CST_ELT (args[n_args - 2], i);
17574 if (TREE_CODE (elt) != INTEGER_CST
17575 || TREE_OVERFLOW (elt))
17576 return NULL_TREE;
17577 }
17578 builder.quick_push (elt);
17579 }
17580 return builder.build ();
17581 }
17582 break;
17583
17584 default:
17585 break;
17586 }
17587 }
17588
17589 #ifdef SUBTARGET_FOLD_BUILTIN
17590 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17591 #endif
17592
17593 return NULL_TREE;
17594 }
17595
17596 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17597 constant) in GIMPLE. */
17598
17599 bool
ix86_gimple_fold_builtin(gimple_stmt_iterator * gsi)17600 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17601 {
17602 gimple *stmt = gsi_stmt (*gsi);
17603 tree fndecl = gimple_call_fndecl (stmt);
17604 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
17605 int n_args = gimple_call_num_args (stmt);
17606 enum ix86_builtins fn_code
17607 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17608 tree decl = NULL_TREE;
17609 tree arg0, arg1, arg2;
17610 enum rtx_code rcode;
17611 unsigned HOST_WIDE_INT count;
17612 bool is_vshift;
17613
17614 switch (fn_code)
17615 {
17616 case IX86_BUILTIN_TZCNT32:
17617 decl = builtin_decl_implicit (BUILT_IN_CTZ);
17618 goto fold_tzcnt_lzcnt;
17619
17620 case IX86_BUILTIN_TZCNT64:
17621 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
17622 goto fold_tzcnt_lzcnt;
17623
17624 case IX86_BUILTIN_LZCNT32:
17625 decl = builtin_decl_implicit (BUILT_IN_CLZ);
17626 goto fold_tzcnt_lzcnt;
17627
17628 case IX86_BUILTIN_LZCNT64:
17629 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
17630 goto fold_tzcnt_lzcnt;
17631
17632 fold_tzcnt_lzcnt:
17633 gcc_assert (n_args == 1);
17634 arg0 = gimple_call_arg (stmt, 0);
17635 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
17636 {
17637 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
17638 /* If arg0 is provably non-zero, optimize into generic
17639 __builtin_c[tl]z{,ll} function the middle-end handles
17640 better. */
17641 if (!expr_not_equal_to (arg0, wi::zero (prec)))
17642 return false;
17643
17644 location_t loc = gimple_location (stmt);
17645 gimple *g = gimple_build_call (decl, 1, arg0);
17646 gimple_set_location (g, loc);
17647 tree lhs = make_ssa_name (integer_type_node);
17648 gimple_call_set_lhs (g, lhs);
17649 gsi_insert_before (gsi, g, GSI_SAME_STMT);
17650 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
17651 gimple_set_location (g, loc);
17652 gsi_replace (gsi, g, false);
17653 return true;
17654 }
17655 break;
17656
17657 case IX86_BUILTIN_BZHI32:
17658 case IX86_BUILTIN_BZHI64:
17659 gcc_assert (n_args == 2);
17660 arg1 = gimple_call_arg (stmt, 1);
17661 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
17662 {
17663 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
17664 arg0 = gimple_call_arg (stmt, 0);
17665 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
17666 break;
17667 location_t loc = gimple_location (stmt);
17668 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17669 gimple_set_location (g, loc);
17670 gsi_replace (gsi, g, false);
17671 return true;
17672 }
17673 break;
17674
17675 case IX86_BUILTIN_PDEP32:
17676 case IX86_BUILTIN_PDEP64:
17677 case IX86_BUILTIN_PEXT32:
17678 case IX86_BUILTIN_PEXT64:
17679 gcc_assert (n_args == 2);
17680 arg1 = gimple_call_arg (stmt, 1);
17681 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
17682 {
17683 location_t loc = gimple_location (stmt);
17684 arg0 = gimple_call_arg (stmt, 0);
17685 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17686 gimple_set_location (g, loc);
17687 gsi_replace (gsi, g, false);
17688 return true;
17689 }
17690 break;
17691
17692 case IX86_BUILTIN_PSLLD:
17693 case IX86_BUILTIN_PSLLD128:
17694 case IX86_BUILTIN_PSLLD128_MASK:
17695 case IX86_BUILTIN_PSLLD256:
17696 case IX86_BUILTIN_PSLLD256_MASK:
17697 case IX86_BUILTIN_PSLLD512:
17698 case IX86_BUILTIN_PSLLDI:
17699 case IX86_BUILTIN_PSLLDI128:
17700 case IX86_BUILTIN_PSLLDI128_MASK:
17701 case IX86_BUILTIN_PSLLDI256:
17702 case IX86_BUILTIN_PSLLDI256_MASK:
17703 case IX86_BUILTIN_PSLLDI512:
17704 case IX86_BUILTIN_PSLLQ:
17705 case IX86_BUILTIN_PSLLQ128:
17706 case IX86_BUILTIN_PSLLQ128_MASK:
17707 case IX86_BUILTIN_PSLLQ256:
17708 case IX86_BUILTIN_PSLLQ256_MASK:
17709 case IX86_BUILTIN_PSLLQ512:
17710 case IX86_BUILTIN_PSLLQI:
17711 case IX86_BUILTIN_PSLLQI128:
17712 case IX86_BUILTIN_PSLLQI128_MASK:
17713 case IX86_BUILTIN_PSLLQI256:
17714 case IX86_BUILTIN_PSLLQI256_MASK:
17715 case IX86_BUILTIN_PSLLQI512:
17716 case IX86_BUILTIN_PSLLW:
17717 case IX86_BUILTIN_PSLLW128:
17718 case IX86_BUILTIN_PSLLW128_MASK:
17719 case IX86_BUILTIN_PSLLW256:
17720 case IX86_BUILTIN_PSLLW256_MASK:
17721 case IX86_BUILTIN_PSLLW512_MASK:
17722 case IX86_BUILTIN_PSLLWI:
17723 case IX86_BUILTIN_PSLLWI128:
17724 case IX86_BUILTIN_PSLLWI128_MASK:
17725 case IX86_BUILTIN_PSLLWI256:
17726 case IX86_BUILTIN_PSLLWI256_MASK:
17727 case IX86_BUILTIN_PSLLWI512_MASK:
17728 rcode = ASHIFT;
17729 is_vshift = false;
17730 goto do_shift;
17731 case IX86_BUILTIN_PSRAD:
17732 case IX86_BUILTIN_PSRAD128:
17733 case IX86_BUILTIN_PSRAD128_MASK:
17734 case IX86_BUILTIN_PSRAD256:
17735 case IX86_BUILTIN_PSRAD256_MASK:
17736 case IX86_BUILTIN_PSRAD512:
17737 case IX86_BUILTIN_PSRADI:
17738 case IX86_BUILTIN_PSRADI128:
17739 case IX86_BUILTIN_PSRADI128_MASK:
17740 case IX86_BUILTIN_PSRADI256:
17741 case IX86_BUILTIN_PSRADI256_MASK:
17742 case IX86_BUILTIN_PSRADI512:
17743 case IX86_BUILTIN_PSRAQ128_MASK:
17744 case IX86_BUILTIN_PSRAQ256_MASK:
17745 case IX86_BUILTIN_PSRAQ512:
17746 case IX86_BUILTIN_PSRAQI128_MASK:
17747 case IX86_BUILTIN_PSRAQI256_MASK:
17748 case IX86_BUILTIN_PSRAQI512:
17749 case IX86_BUILTIN_PSRAW:
17750 case IX86_BUILTIN_PSRAW128:
17751 case IX86_BUILTIN_PSRAW128_MASK:
17752 case IX86_BUILTIN_PSRAW256:
17753 case IX86_BUILTIN_PSRAW256_MASK:
17754 case IX86_BUILTIN_PSRAW512:
17755 case IX86_BUILTIN_PSRAWI:
17756 case IX86_BUILTIN_PSRAWI128:
17757 case IX86_BUILTIN_PSRAWI128_MASK:
17758 case IX86_BUILTIN_PSRAWI256:
17759 case IX86_BUILTIN_PSRAWI256_MASK:
17760 case IX86_BUILTIN_PSRAWI512:
17761 rcode = ASHIFTRT;
17762 is_vshift = false;
17763 goto do_shift;
17764 case IX86_BUILTIN_PSRLD:
17765 case IX86_BUILTIN_PSRLD128:
17766 case IX86_BUILTIN_PSRLD128_MASK:
17767 case IX86_BUILTIN_PSRLD256:
17768 case IX86_BUILTIN_PSRLD256_MASK:
17769 case IX86_BUILTIN_PSRLD512:
17770 case IX86_BUILTIN_PSRLDI:
17771 case IX86_BUILTIN_PSRLDI128:
17772 case IX86_BUILTIN_PSRLDI128_MASK:
17773 case IX86_BUILTIN_PSRLDI256:
17774 case IX86_BUILTIN_PSRLDI256_MASK:
17775 case IX86_BUILTIN_PSRLDI512:
17776 case IX86_BUILTIN_PSRLQ:
17777 case IX86_BUILTIN_PSRLQ128:
17778 case IX86_BUILTIN_PSRLQ128_MASK:
17779 case IX86_BUILTIN_PSRLQ256:
17780 case IX86_BUILTIN_PSRLQ256_MASK:
17781 case IX86_BUILTIN_PSRLQ512:
17782 case IX86_BUILTIN_PSRLQI:
17783 case IX86_BUILTIN_PSRLQI128:
17784 case IX86_BUILTIN_PSRLQI128_MASK:
17785 case IX86_BUILTIN_PSRLQI256:
17786 case IX86_BUILTIN_PSRLQI256_MASK:
17787 case IX86_BUILTIN_PSRLQI512:
17788 case IX86_BUILTIN_PSRLW:
17789 case IX86_BUILTIN_PSRLW128:
17790 case IX86_BUILTIN_PSRLW128_MASK:
17791 case IX86_BUILTIN_PSRLW256:
17792 case IX86_BUILTIN_PSRLW256_MASK:
17793 case IX86_BUILTIN_PSRLW512:
17794 case IX86_BUILTIN_PSRLWI:
17795 case IX86_BUILTIN_PSRLWI128:
17796 case IX86_BUILTIN_PSRLWI128_MASK:
17797 case IX86_BUILTIN_PSRLWI256:
17798 case IX86_BUILTIN_PSRLWI256_MASK:
17799 case IX86_BUILTIN_PSRLWI512:
17800 rcode = LSHIFTRT;
17801 is_vshift = false;
17802 goto do_shift;
17803 case IX86_BUILTIN_PSLLVV16HI:
17804 case IX86_BUILTIN_PSLLVV16SI:
17805 case IX86_BUILTIN_PSLLVV2DI:
17806 case IX86_BUILTIN_PSLLVV2DI_MASK:
17807 case IX86_BUILTIN_PSLLVV32HI:
17808 case IX86_BUILTIN_PSLLVV4DI:
17809 case IX86_BUILTIN_PSLLVV4DI_MASK:
17810 case IX86_BUILTIN_PSLLVV4SI:
17811 case IX86_BUILTIN_PSLLVV4SI_MASK:
17812 case IX86_BUILTIN_PSLLVV8DI:
17813 case IX86_BUILTIN_PSLLVV8HI:
17814 case IX86_BUILTIN_PSLLVV8SI:
17815 case IX86_BUILTIN_PSLLVV8SI_MASK:
17816 rcode = ASHIFT;
17817 is_vshift = true;
17818 goto do_shift;
17819 case IX86_BUILTIN_PSRAVQ128:
17820 case IX86_BUILTIN_PSRAVQ256:
17821 case IX86_BUILTIN_PSRAVV16HI:
17822 case IX86_BUILTIN_PSRAVV16SI:
17823 case IX86_BUILTIN_PSRAVV32HI:
17824 case IX86_BUILTIN_PSRAVV4SI:
17825 case IX86_BUILTIN_PSRAVV4SI_MASK:
17826 case IX86_BUILTIN_PSRAVV8DI:
17827 case IX86_BUILTIN_PSRAVV8HI:
17828 case IX86_BUILTIN_PSRAVV8SI:
17829 case IX86_BUILTIN_PSRAVV8SI_MASK:
17830 rcode = ASHIFTRT;
17831 is_vshift = true;
17832 goto do_shift;
17833 case IX86_BUILTIN_PSRLVV16HI:
17834 case IX86_BUILTIN_PSRLVV16SI:
17835 case IX86_BUILTIN_PSRLVV2DI:
17836 case IX86_BUILTIN_PSRLVV2DI_MASK:
17837 case IX86_BUILTIN_PSRLVV32HI:
17838 case IX86_BUILTIN_PSRLVV4DI:
17839 case IX86_BUILTIN_PSRLVV4DI_MASK:
17840 case IX86_BUILTIN_PSRLVV4SI:
17841 case IX86_BUILTIN_PSRLVV4SI_MASK:
17842 case IX86_BUILTIN_PSRLVV8DI:
17843 case IX86_BUILTIN_PSRLVV8HI:
17844 case IX86_BUILTIN_PSRLVV8SI:
17845 case IX86_BUILTIN_PSRLVV8SI_MASK:
17846 rcode = LSHIFTRT;
17847 is_vshift = true;
17848 goto do_shift;
17849
17850 do_shift:
17851 gcc_assert (n_args >= 2);
17852 if (!gimple_call_lhs (stmt))
17853 break;
17854 arg0 = gimple_call_arg (stmt, 0);
17855 arg1 = gimple_call_arg (stmt, 1);
17856 if (n_args > 2)
17857 {
17858 /* This is masked shift. Only optimize if the mask is all ones. */
17859 tree argl = gimple_call_arg (stmt, n_args - 1);
17860 if (!tree_fits_uhwi_p (argl))
17861 break;
17862 unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
17863 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
17864 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
17865 break;
17866 }
17867 if (is_vshift)
17868 {
17869 if (TREE_CODE (arg1) != VECTOR_CST)
17870 break;
17871 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
17872 if (integer_zerop (arg1))
17873 count = 0;
17874 else if (rcode == ASHIFTRT)
17875 break;
17876 else
17877 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
17878 {
17879 tree elt = VECTOR_CST_ELT (arg1, i);
17880 if (!wi::neg_p (wi::to_wide (elt))
17881 && wi::to_widest (elt) < count)
17882 return false;
17883 }
17884 }
17885 else
17886 {
17887 arg1 = ix86_vector_shift_count (arg1);
17888 if (!arg1)
17889 break;
17890 count = tree_to_uhwi (arg1);
17891 }
17892 if (count == 0)
17893 {
17894 /* Just return the first argument for shift by 0. */
17895 location_t loc = gimple_location (stmt);
17896 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17897 gimple_set_location (g, loc);
17898 gsi_replace (gsi, g, false);
17899 return true;
17900 }
17901 if (rcode != ASHIFTRT
17902 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
17903 {
17904 /* For shift counts equal or greater than precision, except for
17905 arithmetic right shift the result is zero. */
17906 location_t loc = gimple_location (stmt);
17907 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17908 build_zero_cst (TREE_TYPE (arg0)));
17909 gimple_set_location (g, loc);
17910 gsi_replace (gsi, g, false);
17911 return true;
17912 }
17913 break;
17914
17915 case IX86_BUILTIN_SHUFPD:
17916 arg2 = gimple_call_arg (stmt, 2);
17917 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
17918 {
17919 location_t loc = gimple_location (stmt);
17920 unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
17921 arg0 = gimple_call_arg (stmt, 0);
17922 arg1 = gimple_call_arg (stmt, 1);
17923 tree itype = long_long_integer_type_node;
17924 tree vtype = build_vector_type (itype, 2); /* V2DI */
17925 tree_vector_builder elts (vtype, 2, 1);
17926 /* Ignore bits other than the lowest 2. */
17927 elts.quick_push (build_int_cst (itype, imask & 1));
17928 imask >>= 1;
17929 elts.quick_push (build_int_cst (itype, 2 + (imask & 1)));
17930 tree omask = elts.build ();
17931 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17932 VEC_PERM_EXPR,
17933 arg0, arg1, omask);
17934 gimple_set_location (g, loc);
17935 gsi_replace (gsi, g, false);
17936 return true;
17937 }
17938 // Do not error yet, the constant could be propagated later?
17939 break;
17940
17941 default:
17942 break;
17943 }
17944
17945 return false;
17946 }
17947
17948 /* Handler for an SVML-style interface to
17949 a library with vectorized intrinsics. */
17950
17951 tree
ix86_veclibabi_svml(combined_fn fn,tree type_out,tree type_in)17952 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
17953 {
17954 char name[20];
17955 tree fntype, new_fndecl, args;
17956 unsigned arity;
17957 const char *bname;
17958 machine_mode el_mode, in_mode;
17959 int n, in_n;
17960
17961 /* The SVML is suitable for unsafe math only. */
17962 if (!flag_unsafe_math_optimizations)
17963 return NULL_TREE;
17964
17965 el_mode = TYPE_MODE (TREE_TYPE (type_out));
17966 n = TYPE_VECTOR_SUBPARTS (type_out);
17967 in_mode = TYPE_MODE (TREE_TYPE (type_in));
17968 in_n = TYPE_VECTOR_SUBPARTS (type_in);
17969 if (el_mode != in_mode
17970 || n != in_n)
17971 return NULL_TREE;
17972
17973 switch (fn)
17974 {
17975 CASE_CFN_EXP:
17976 CASE_CFN_LOG:
17977 CASE_CFN_LOG10:
17978 CASE_CFN_POW:
17979 CASE_CFN_TANH:
17980 CASE_CFN_TAN:
17981 CASE_CFN_ATAN:
17982 CASE_CFN_ATAN2:
17983 CASE_CFN_ATANH:
17984 CASE_CFN_CBRT:
17985 CASE_CFN_SINH:
17986 CASE_CFN_SIN:
17987 CASE_CFN_ASINH:
17988 CASE_CFN_ASIN:
17989 CASE_CFN_COSH:
17990 CASE_CFN_COS:
17991 CASE_CFN_ACOSH:
17992 CASE_CFN_ACOS:
17993 if ((el_mode != DFmode || n != 2)
17994 && (el_mode != SFmode || n != 4))
17995 return NULL_TREE;
17996 break;
17997
17998 default:
17999 return NULL_TREE;
18000 }
18001
18002 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18003 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18004
18005 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
18006 strcpy (name, "vmlsLn4");
18007 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
18008 strcpy (name, "vmldLn2");
18009 else if (n == 4)
18010 {
18011 sprintf (name, "vmls%s", bname+10);
18012 name[strlen (name)-1] = '4';
18013 }
18014 else
18015 sprintf (name, "vmld%s2", bname+10);
18016
18017 /* Convert to uppercase. */
18018 name[4] &= ~0x20;
18019
18020 arity = 0;
18021 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18022 arity++;
18023
18024 if (arity == 1)
18025 fntype = build_function_type_list (type_out, type_in, NULL);
18026 else
18027 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18028
18029 /* Build a function declaration for the vectorized function. */
18030 new_fndecl = build_decl (BUILTINS_LOCATION,
18031 FUNCTION_DECL, get_identifier (name), fntype);
18032 TREE_PUBLIC (new_fndecl) = 1;
18033 DECL_EXTERNAL (new_fndecl) = 1;
18034 DECL_IS_NOVOPS (new_fndecl) = 1;
18035 TREE_READONLY (new_fndecl) = 1;
18036
18037 return new_fndecl;
18038 }
18039
18040 /* Handler for an ACML-style interface to
18041 a library with vectorized intrinsics. */
18042
18043 tree
ix86_veclibabi_acml(combined_fn fn,tree type_out,tree type_in)18044 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
18045 {
18046 char name[20] = "__vr.._";
18047 tree fntype, new_fndecl, args;
18048 unsigned arity;
18049 const char *bname;
18050 machine_mode el_mode, in_mode;
18051 int n, in_n;
18052
18053 /* The ACML is 64bits only and suitable for unsafe math only as
18054 it does not correctly support parts of IEEE with the required
18055 precision such as denormals. */
18056 if (!TARGET_64BIT
18057 || !flag_unsafe_math_optimizations)
18058 return NULL_TREE;
18059
18060 el_mode = TYPE_MODE (TREE_TYPE (type_out));
18061 n = TYPE_VECTOR_SUBPARTS (type_out);
18062 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18063 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18064 if (el_mode != in_mode
18065 || n != in_n)
18066 return NULL_TREE;
18067
18068 switch (fn)
18069 {
18070 CASE_CFN_SIN:
18071 CASE_CFN_COS:
18072 CASE_CFN_EXP:
18073 CASE_CFN_LOG:
18074 CASE_CFN_LOG2:
18075 CASE_CFN_LOG10:
18076 if (el_mode == DFmode && n == 2)
18077 {
18078 name[4] = 'd';
18079 name[5] = '2';
18080 }
18081 else if (el_mode == SFmode && n == 4)
18082 {
18083 name[4] = 's';
18084 name[5] = '4';
18085 }
18086 else
18087 return NULL_TREE;
18088 break;
18089
18090 default:
18091 return NULL_TREE;
18092 }
18093
18094 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18095 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18096 sprintf (name + 7, "%s", bname+10);
18097
18098 arity = 0;
18099 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18100 arity++;
18101
18102 if (arity == 1)
18103 fntype = build_function_type_list (type_out, type_in, NULL);
18104 else
18105 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18106
18107 /* Build a function declaration for the vectorized function. */
18108 new_fndecl = build_decl (BUILTINS_LOCATION,
18109 FUNCTION_DECL, get_identifier (name), fntype);
18110 TREE_PUBLIC (new_fndecl) = 1;
18111 DECL_EXTERNAL (new_fndecl) = 1;
18112 DECL_IS_NOVOPS (new_fndecl) = 1;
18113 TREE_READONLY (new_fndecl) = 1;
18114
18115 return new_fndecl;
18116 }
18117
18118 /* Returns a decl of a function that implements scatter store with
18119 register type VECTYPE and index type INDEX_TYPE and SCALE.
18120 Return NULL_TREE if it is not available. */
18121
18122 static tree
ix86_vectorize_builtin_scatter(const_tree vectype,const_tree index_type,int scale)18123 ix86_vectorize_builtin_scatter (const_tree vectype,
18124 const_tree index_type, int scale)
18125 {
18126 bool si;
18127 enum ix86_builtins code;
18128
18129 if (!TARGET_AVX512F)
18130 return NULL_TREE;
18131
18132 if ((TREE_CODE (index_type) != INTEGER_TYPE
18133 && !POINTER_TYPE_P (index_type))
18134 || (TYPE_MODE (index_type) != SImode
18135 && TYPE_MODE (index_type) != DImode))
18136 return NULL_TREE;
18137
18138 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
18139 return NULL_TREE;
18140
18141 /* v*scatter* insn sign extends index to pointer mode. */
18142 if (TYPE_PRECISION (index_type) < POINTER_SIZE
18143 && TYPE_UNSIGNED (index_type))
18144 return NULL_TREE;
18145
18146 /* Scale can be 1, 2, 4 or 8. */
18147 if (scale <= 0
18148 || scale > 8
18149 || (scale & (scale - 1)) != 0)
18150 return NULL_TREE;
18151
18152 si = TYPE_MODE (index_type) == SImode;
18153 switch (TYPE_MODE (vectype))
18154 {
18155 case E_V8DFmode:
18156 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
18157 break;
18158 case E_V8DImode:
18159 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
18160 break;
18161 case E_V16SFmode:
18162 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
18163 break;
18164 case E_V16SImode:
18165 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
18166 break;
18167 case E_V4DFmode:
18168 if (TARGET_AVX512VL)
18169 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
18170 else
18171 return NULL_TREE;
18172 break;
18173 case E_V4DImode:
18174 if (TARGET_AVX512VL)
18175 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
18176 else
18177 return NULL_TREE;
18178 break;
18179 case E_V8SFmode:
18180 if (TARGET_AVX512VL)
18181 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
18182 else
18183 return NULL_TREE;
18184 break;
18185 case E_V8SImode:
18186 if (TARGET_AVX512VL)
18187 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
18188 else
18189 return NULL_TREE;
18190 break;
18191 case E_V2DFmode:
18192 if (TARGET_AVX512VL)
18193 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
18194 else
18195 return NULL_TREE;
18196 break;
18197 case E_V2DImode:
18198 if (TARGET_AVX512VL)
18199 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
18200 else
18201 return NULL_TREE;
18202 break;
18203 case E_V4SFmode:
18204 if (TARGET_AVX512VL)
18205 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
18206 else
18207 return NULL_TREE;
18208 break;
18209 case E_V4SImode:
18210 if (TARGET_AVX512VL)
18211 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
18212 else
18213 return NULL_TREE;
18214 break;
18215 default:
18216 return NULL_TREE;
18217 }
18218
18219 return get_ix86_builtin (code);
18220 }
18221
18222 /* Return true if it is safe to use the rsqrt optabs to optimize
18223 1.0/sqrt. */
18224
18225 static bool
use_rsqrt_p()18226 use_rsqrt_p ()
18227 {
18228 return (TARGET_SSE && TARGET_SSE_MATH
18229 && flag_finite_math_only
18230 && !flag_trapping_math
18231 && flag_unsafe_math_optimizations);
18232 }
18233
18234 /* Helper for avx_vpermilps256_operand et al. This is also used by
18235 the expansion functions to turn the parallel back into a mask.
18236 The return value is 0 for no match and the imm8+1 for a match. */
18237
18238 int
avx_vpermilp_parallel(rtx par,machine_mode mode)18239 avx_vpermilp_parallel (rtx par, machine_mode mode)
18240 {
18241 unsigned i, nelt = GET_MODE_NUNITS (mode);
18242 unsigned mask = 0;
18243 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
18244
18245 if (XVECLEN (par, 0) != (int) nelt)
18246 return 0;
18247
18248 /* Validate that all of the elements are constants, and not totally
18249 out of range. Copy the data into an integral array to make the
18250 subsequent checks easier. */
18251 for (i = 0; i < nelt; ++i)
18252 {
18253 rtx er = XVECEXP (par, 0, i);
18254 unsigned HOST_WIDE_INT ei;
18255
18256 if (!CONST_INT_P (er))
18257 return 0;
18258 ei = INTVAL (er);
18259 if (ei >= nelt)
18260 return 0;
18261 ipar[i] = ei;
18262 }
18263
18264 switch (mode)
18265 {
18266 case E_V8DFmode:
18267 /* In the 512-bit DFmode case, we can only move elements within
18268 a 128-bit lane. First fill the second part of the mask,
18269 then fallthru. */
18270 for (i = 4; i < 6; ++i)
18271 {
18272 if (ipar[i] < 4 || ipar[i] >= 6)
18273 return 0;
18274 mask |= (ipar[i] - 4) << i;
18275 }
18276 for (i = 6; i < 8; ++i)
18277 {
18278 if (ipar[i] < 6)
18279 return 0;
18280 mask |= (ipar[i] - 6) << i;
18281 }
18282 /* FALLTHRU */
18283
18284 case E_V4DFmode:
18285 /* In the 256-bit DFmode case, we can only move elements within
18286 a 128-bit lane. */
18287 for (i = 0; i < 2; ++i)
18288 {
18289 if (ipar[i] >= 2)
18290 return 0;
18291 mask |= ipar[i] << i;
18292 }
18293 for (i = 2; i < 4; ++i)
18294 {
18295 if (ipar[i] < 2)
18296 return 0;
18297 mask |= (ipar[i] - 2) << i;
18298 }
18299 break;
18300
18301 case E_V16SFmode:
18302 /* In 512 bit SFmode case, permutation in the upper 256 bits
18303 must mirror the permutation in the lower 256-bits. */
18304 for (i = 0; i < 8; ++i)
18305 if (ipar[i] + 8 != ipar[i + 8])
18306 return 0;
18307 /* FALLTHRU */
18308
18309 case E_V8SFmode:
18310 /* In 256 bit SFmode case, we have full freedom of
18311 movement within the low 128-bit lane, but the high 128-bit
18312 lane must mirror the exact same pattern. */
18313 for (i = 0; i < 4; ++i)
18314 if (ipar[i] + 4 != ipar[i + 4])
18315 return 0;
18316 nelt = 4;
18317 /* FALLTHRU */
18318
18319 case E_V2DFmode:
18320 case E_V4SFmode:
18321 /* In the 128-bit case, we've full freedom in the placement of
18322 the elements from the source operand. */
18323 for (i = 0; i < nelt; ++i)
18324 mask |= ipar[i] << (i * (nelt / 2));
18325 break;
18326
18327 default:
18328 gcc_unreachable ();
18329 }
18330
18331 /* Make sure success has a non-zero value by adding one. */
18332 return mask + 1;
18333 }
18334
18335 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
18336 the expansion functions to turn the parallel back into a mask.
18337 The return value is 0 for no match and the imm8+1 for a match. */
18338
18339 int
avx_vperm2f128_parallel(rtx par,machine_mode mode)18340 avx_vperm2f128_parallel (rtx par, machine_mode mode)
18341 {
18342 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
18343 unsigned mask = 0;
18344 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
18345
18346 if (XVECLEN (par, 0) != (int) nelt)
18347 return 0;
18348
18349 /* Validate that all of the elements are constants, and not totally
18350 out of range. Copy the data into an integral array to make the
18351 subsequent checks easier. */
18352 for (i = 0; i < nelt; ++i)
18353 {
18354 rtx er = XVECEXP (par, 0, i);
18355 unsigned HOST_WIDE_INT ei;
18356
18357 if (!CONST_INT_P (er))
18358 return 0;
18359 ei = INTVAL (er);
18360 if (ei >= 2 * nelt)
18361 return 0;
18362 ipar[i] = ei;
18363 }
18364
18365 /* Validate that the halves of the permute are halves. */
18366 for (i = 0; i < nelt2 - 1; ++i)
18367 if (ipar[i] + 1 != ipar[i + 1])
18368 return 0;
18369 for (i = nelt2; i < nelt - 1; ++i)
18370 if (ipar[i] + 1 != ipar[i + 1])
18371 return 0;
18372
18373 /* Reconstruct the mask. */
18374 for (i = 0; i < 2; ++i)
18375 {
18376 unsigned e = ipar[i * nelt2];
18377 if (e % nelt2)
18378 return 0;
18379 e /= nelt2;
18380 mask |= e << (i * 4);
18381 }
18382
18383 /* Make sure success has a non-zero value by adding one. */
18384 return mask + 1;
18385 }
18386
18387 /* Return a register priority for hard reg REGNO. */
18388 static int
ix86_register_priority(int hard_regno)18389 ix86_register_priority (int hard_regno)
18390 {
18391 /* ebp and r13 as the base always wants a displacement, r12 as the
18392 base always wants an index. So discourage their usage in an
18393 address. */
18394 if (hard_regno == R12_REG || hard_regno == R13_REG)
18395 return 0;
18396 if (hard_regno == BP_REG)
18397 return 1;
18398 /* New x86-64 int registers result in bigger code size. Discourage
18399 them. */
18400 if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
18401 return 2;
18402 /* New x86-64 SSE registers result in bigger code size. Discourage
18403 them. */
18404 if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
18405 return 2;
18406 if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG))
18407 return 1;
18408 /* Usage of AX register results in smaller code. Prefer it. */
18409 if (hard_regno == AX_REG)
18410 return 4;
18411 return 3;
18412 }
18413
18414 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18415
18416 Put float CONST_DOUBLE in the constant pool instead of fp regs.
18417 QImode must go into class Q_REGS.
18418 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18419 movdf to do mem-to-mem moves through integer regs. */
18420
18421 static reg_class_t
ix86_preferred_reload_class(rtx x,reg_class_t regclass)18422 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
18423 {
18424 machine_mode mode = GET_MODE (x);
18425
18426 /* We're only allowed to return a subclass of CLASS. Many of the
18427 following checks fail for NO_REGS, so eliminate that early. */
18428 if (regclass == NO_REGS)
18429 return NO_REGS;
18430
18431 /* All classes can load zeros. */
18432 if (x == CONST0_RTX (mode))
18433 return regclass;
18434
18435 /* Force constants into memory if we are loading a (nonzero) constant into
18436 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
18437 instructions to load from a constant. */
18438 if (CONSTANT_P (x)
18439 && (MAYBE_MMX_CLASS_P (regclass)
18440 || MAYBE_SSE_CLASS_P (regclass)
18441 || MAYBE_MASK_CLASS_P (regclass)))
18442 return NO_REGS;
18443
18444 /* Floating-point constants need more complex checks. */
18445 if (CONST_DOUBLE_P (x))
18446 {
18447 /* General regs can load everything. */
18448 if (INTEGER_CLASS_P (regclass))
18449 return regclass;
18450
18451 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18452 zero above. We only want to wind up preferring 80387 registers if
18453 we plan on doing computation with them. */
18454 if (IS_STACK_MODE (mode)
18455 && standard_80387_constant_p (x) > 0)
18456 {
18457 /* Limit class to FP regs. */
18458 if (FLOAT_CLASS_P (regclass))
18459 return FLOAT_REGS;
18460 }
18461
18462 return NO_REGS;
18463 }
18464
18465 /* Prefer SSE regs only, if we can use them for math. */
18466 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18467 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
18468
18469 /* Generally when we see PLUS here, it's the function invariant
18470 (plus soft-fp const_int). Which can only be computed into general
18471 regs. */
18472 if (GET_CODE (x) == PLUS)
18473 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
18474
18475 /* QImode constants are easy to load, but non-constant QImode data
18476 must go into Q_REGS. */
18477 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18478 {
18479 if (Q_CLASS_P (regclass))
18480 return regclass;
18481 else if (reg_class_subset_p (Q_REGS, regclass))
18482 return Q_REGS;
18483 else
18484 return NO_REGS;
18485 }
18486
18487 return regclass;
18488 }
18489
18490 /* Discourage putting floating-point values in SSE registers unless
18491 SSE math is being used, and likewise for the 387 registers. */
18492 static reg_class_t
ix86_preferred_output_reload_class(rtx x,reg_class_t regclass)18493 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
18494 {
18495 /* Restrict the output reload class to the register bank that we are doing
18496 math on. If we would like not to return a subset of CLASS, reject this
18497 alternative: if reload cannot do this, it will still use its choice. */
18498 machine_mode mode = GET_MODE (x);
18499 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18500 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
18501
18502 if (IS_STACK_MODE (mode))
18503 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
18504
18505 return regclass;
18506 }
18507
18508 static reg_class_t
ix86_secondary_reload(bool in_p,rtx x,reg_class_t rclass,machine_mode mode,secondary_reload_info * sri)18509 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
18510 machine_mode mode, secondary_reload_info *sri)
18511 {
18512 /* Double-word spills from general registers to non-offsettable memory
18513 references (zero-extended addresses) require special handling. */
18514 if (TARGET_64BIT
18515 && MEM_P (x)
18516 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
18517 && INTEGER_CLASS_P (rclass)
18518 && !offsettable_memref_p (x))
18519 {
18520 sri->icode = (in_p
18521 ? CODE_FOR_reload_noff_load
18522 : CODE_FOR_reload_noff_store);
18523 /* Add the cost of moving address to a temporary. */
18524 sri->extra_cost = 1;
18525
18526 return NO_REGS;
18527 }
18528
18529 /* QImode spills from non-QI registers require
18530 intermediate register on 32bit targets. */
18531 if (mode == QImode
18532 && ((!TARGET_64BIT && !in_p
18533 && INTEGER_CLASS_P (rclass)
18534 && MAYBE_NON_Q_CLASS_P (rclass))
18535 || (!TARGET_AVX512DQ
18536 && MAYBE_MASK_CLASS_P (rclass))))
18537 {
18538 int regno = true_regnum (x);
18539
18540 /* Return Q_REGS if the operand is in memory. */
18541 if (regno == -1)
18542 return Q_REGS;
18543
18544 return NO_REGS;
18545 }
18546
18547 /* This condition handles corner case where an expression involving
18548 pointers gets vectorized. We're trying to use the address of a
18549 stack slot as a vector initializer.
18550
18551 (set (reg:V2DI 74 [ vect_cst_.2 ])
18552 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18553
18554 Eventually frame gets turned into sp+offset like this:
18555
18556 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18557 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18558 (const_int 392 [0x188]))))
18559
18560 That later gets turned into:
18561
18562 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18563 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18564 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18565
18566 We'll have the following reload recorded:
18567
18568 Reload 0: reload_in (DI) =
18569 (plus:DI (reg/f:DI 7 sp)
18570 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18571 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18572 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18573 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18574 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18575 reload_reg_rtx: (reg:V2DI 22 xmm1)
18576
18577 Which isn't going to work since SSE instructions can't handle scalar
18578 additions. Returning GENERAL_REGS forces the addition into integer
18579 register and reload can handle subsequent reloads without problems. */
18580
18581 if (in_p && GET_CODE (x) == PLUS
18582 && SSE_CLASS_P (rclass)
18583 && SCALAR_INT_MODE_P (mode))
18584 return GENERAL_REGS;
18585
18586 return NO_REGS;
18587 }
18588
18589 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
18590
18591 static bool
ix86_class_likely_spilled_p(reg_class_t rclass)18592 ix86_class_likely_spilled_p (reg_class_t rclass)
18593 {
18594 switch (rclass)
18595 {
18596 case AREG:
18597 case DREG:
18598 case CREG:
18599 case BREG:
18600 case AD_REGS:
18601 case SIREG:
18602 case DIREG:
18603 case SSE_FIRST_REG:
18604 case FP_TOP_REG:
18605 case FP_SECOND_REG:
18606 return true;
18607
18608 default:
18609 break;
18610 }
18611
18612 return false;
18613 }
18614
18615 /* If we are copying between registers from different register sets
18616 (e.g. FP and integer), we may need a memory location.
18617
18618 The function can't work reliably when one of the CLASSES is a class
18619 containing registers from multiple sets. We avoid this by never combining
18620 different sets in a single alternative in the machine description.
18621 Ensure that this constraint holds to avoid unexpected surprises.
18622
18623 When STRICT is false, we are being called from REGISTER_MOVE_COST,
18624 so do not enforce these sanity checks.
18625
18626 To optimize register_move_cost performance, define inline variant. */
18627
18628 static inline bool
inline_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2,int strict)18629 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18630 reg_class_t class2, int strict)
18631 {
18632 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
18633 return false;
18634
18635 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18636 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18637 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18638 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18639 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18640 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
18641 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
18642 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
18643 {
18644 gcc_assert (!strict || lra_in_progress);
18645 return true;
18646 }
18647
18648 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18649 return true;
18650
18651 /* ??? This is a lie. We do have moves between mmx/general, and for
18652 mmx/sse2. But by saying we need secondary memory we discourage the
18653 register allocator from using the mmx registers unless needed. */
18654 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18655 return true;
18656
18657 /* Between mask and general, we have moves no larger than word size. */
18658 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
18659 {
18660 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18661 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18662 return true;
18663 }
18664
18665 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18666 {
18667 /* SSE1 doesn't have any direct moves from other classes. */
18668 if (!TARGET_SSE2)
18669 return true;
18670
18671 /* Between SSE and general, we have moves no larger than word size. */
18672 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18673 || GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)
18674 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18675 return true;
18676
18677 /* If the target says that inter-unit moves are more expensive
18678 than moving through memory, then don't generate them. */
18679 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
18680 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
18681 return true;
18682 }
18683
18684 return false;
18685 }
18686
18687 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
18688
18689 static bool
ix86_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)18690 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18691 reg_class_t class2)
18692 {
18693 return inline_secondary_memory_needed (mode, class1, class2, true);
18694 }
18695
18696 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18697
18698 get_secondary_mem widens integral modes to BITS_PER_WORD.
18699 There is no need to emit full 64 bit move on 64 bit targets
18700 for integral modes that can be moved using 32 bit move. */
18701
18702 static machine_mode
ix86_secondary_memory_needed_mode(machine_mode mode)18703 ix86_secondary_memory_needed_mode (machine_mode mode)
18704 {
18705 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
18706 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
18707 return mode;
18708 }
18709
18710 /* Implement the TARGET_CLASS_MAX_NREGS hook.
18711
18712 On the 80386, this is the size of MODE in words,
18713 except in the FP regs, where a single reg is always enough. */
18714
18715 static unsigned char
ix86_class_max_nregs(reg_class_t rclass,machine_mode mode)18716 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
18717 {
18718 if (MAYBE_INTEGER_CLASS_P (rclass))
18719 {
18720 if (mode == XFmode)
18721 return (TARGET_64BIT ? 2 : 3);
18722 else if (mode == XCmode)
18723 return (TARGET_64BIT ? 4 : 6);
18724 else
18725 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18726 }
18727 else
18728 {
18729 if (COMPLEX_MODE_P (mode))
18730 return 2;
18731 else
18732 return 1;
18733 }
18734 }
18735
18736 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
18737
18738 static bool
ix86_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t regclass)18739 ix86_can_change_mode_class (machine_mode from, machine_mode to,
18740 reg_class_t regclass)
18741 {
18742 if (from == to)
18743 return true;
18744
18745 /* x87 registers can't do subreg at all, as all values are reformatted
18746 to extended precision. */
18747 if (MAYBE_FLOAT_CLASS_P (regclass))
18748 return false;
18749
18750 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
18751 {
18752 /* Vector registers do not support QI or HImode loads. If we don't
18753 disallow a change to these modes, reload will assume it's ok to
18754 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18755 the vec_dupv4hi pattern. */
18756 if (GET_MODE_SIZE (from) < 4)
18757 return false;
18758 }
18759
18760 return true;
18761 }
18762
18763 /* Return index of MODE in the sse load/store tables. */
18764
18765 static inline int
sse_store_index(machine_mode mode)18766 sse_store_index (machine_mode mode)
18767 {
18768 switch (GET_MODE_SIZE (mode))
18769 {
18770 case 4:
18771 return 0;
18772 case 8:
18773 return 1;
18774 case 16:
18775 return 2;
18776 case 32:
18777 return 3;
18778 case 64:
18779 return 4;
18780 default:
18781 return -1;
18782 }
18783 }
18784
18785 /* Return the cost of moving data of mode M between a
18786 register and memory. A value of 2 is the default; this cost is
18787 relative to those in `REGISTER_MOVE_COST'.
18788
18789 This function is used extensively by register_move_cost that is used to
18790 build tables at startup. Make it inline in this case.
18791 When IN is 2, return maximum of in and out move cost.
18792
18793 If moving between registers and memory is more expensive than
18794 between two registers, you should define this macro to express the
18795 relative cost.
18796
18797 Model also increased moving costs of QImode registers in non
18798 Q_REGS classes.
18799 */
18800 static inline int
inline_memory_move_cost(machine_mode mode,enum reg_class regclass,int in)18801 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
18802 {
18803 int cost;
18804 if (FLOAT_CLASS_P (regclass))
18805 {
18806 int index;
18807 switch (mode)
18808 {
18809 case E_SFmode:
18810 index = 0;
18811 break;
18812 case E_DFmode:
18813 index = 1;
18814 break;
18815 case E_XFmode:
18816 index = 2;
18817 break;
18818 default:
18819 return 100;
18820 }
18821 if (in == 2)
18822 return MAX (ix86_cost->hard_register.fp_load [index],
18823 ix86_cost->hard_register.fp_store [index]);
18824 return in ? ix86_cost->hard_register.fp_load [index]
18825 : ix86_cost->hard_register.fp_store [index];
18826 }
18827 if (SSE_CLASS_P (regclass))
18828 {
18829 int index = sse_store_index (mode);
18830 if (index == -1)
18831 return 100;
18832 if (in == 2)
18833 return MAX (ix86_cost->hard_register.sse_load [index],
18834 ix86_cost->hard_register.sse_store [index]);
18835 return in ? ix86_cost->hard_register.sse_load [index]
18836 : ix86_cost->hard_register.sse_store [index];
18837 }
18838 if (MMX_CLASS_P (regclass))
18839 {
18840 int index;
18841 switch (GET_MODE_SIZE (mode))
18842 {
18843 case 4:
18844 index = 0;
18845 break;
18846 case 8:
18847 index = 1;
18848 break;
18849 default:
18850 return 100;
18851 }
18852 if (in == 2)
18853 return MAX (ix86_cost->hard_register.mmx_load [index],
18854 ix86_cost->hard_register.mmx_store [index]);
18855 return in ? ix86_cost->hard_register.mmx_load [index]
18856 : ix86_cost->hard_register.mmx_store [index];
18857 }
18858 switch (GET_MODE_SIZE (mode))
18859 {
18860 case 1:
18861 if (Q_CLASS_P (regclass) || TARGET_64BIT)
18862 {
18863 if (!in)
18864 return ix86_cost->hard_register.int_store[0];
18865 if (TARGET_PARTIAL_REG_DEPENDENCY
18866 && optimize_function_for_speed_p (cfun))
18867 cost = ix86_cost->hard_register.movzbl_load;
18868 else
18869 cost = ix86_cost->hard_register.int_load[0];
18870 if (in == 2)
18871 return MAX (cost, ix86_cost->hard_register.int_store[0]);
18872 return cost;
18873 }
18874 else
18875 {
18876 if (in == 2)
18877 return MAX (ix86_cost->hard_register.movzbl_load,
18878 ix86_cost->hard_register.int_store[0] + 4);
18879 if (in)
18880 return ix86_cost->hard_register.movzbl_load;
18881 else
18882 return ix86_cost->hard_register.int_store[0] + 4;
18883 }
18884 break;
18885 case 2:
18886 if (in == 2)
18887 return MAX (ix86_cost->hard_register.int_load[1],
18888 ix86_cost->hard_register.int_store[1]);
18889 return in ? ix86_cost->hard_register.int_load[1]
18890 : ix86_cost->hard_register.int_store[1];
18891 default:
18892 if (in == 2)
18893 cost = MAX (ix86_cost->hard_register.int_load[2],
18894 ix86_cost->hard_register.int_store[2]);
18895 else if (in)
18896 cost = ix86_cost->hard_register.int_load[2];
18897 else
18898 cost = ix86_cost->hard_register.int_store[2];
18899 /* Multiply with the number of GPR moves needed. */
18900 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
18901 }
18902 }
18903
18904 static int
ix86_memory_move_cost(machine_mode mode,reg_class_t regclass,bool in)18905 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
18906 {
18907 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
18908 }
18909
18910
18911 /* Return the cost of moving data from a register in class CLASS1 to
18912 one in class CLASS2.
18913
18914 It is not required that the cost always equal 2 when FROM is the same as TO;
18915 on some machines it is expensive to move between registers if they are not
18916 general registers. */
18917
18918 static int
ix86_register_move_cost(machine_mode mode,reg_class_t class1_i,reg_class_t class2_i)18919 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
18920 reg_class_t class2_i)
18921 {
18922 enum reg_class class1 = (enum reg_class) class1_i;
18923 enum reg_class class2 = (enum reg_class) class2_i;
18924
18925 /* In case we require secondary memory, compute cost of the store followed
18926 by load. In order to avoid bad register allocation choices, we need
18927 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18928
18929 if (inline_secondary_memory_needed (mode, class1, class2, false))
18930 {
18931 int cost = 1;
18932
18933 cost += inline_memory_move_cost (mode, class1, 2);
18934 cost += inline_memory_move_cost (mode, class2, 2);
18935
18936 /* In case of copying from general_purpose_register we may emit multiple
18937 stores followed by single load causing memory size mismatch stall.
18938 Count this as arbitrarily high cost of 20. */
18939 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
18940 && TARGET_MEMORY_MISMATCH_STALL
18941 && targetm.class_max_nregs (class1, mode)
18942 > targetm.class_max_nregs (class2, mode))
18943 cost += 20;
18944
18945 /* In the case of FP/MMX moves, the registers actually overlap, and we
18946 have to switch modes in order to treat them differently. */
18947 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18948 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18949 cost += 20;
18950
18951 return cost;
18952 }
18953
18954 /* Moves between MMX and non-MMX units require secondary memory. */
18955 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18956 gcc_unreachable ();
18957
18958 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18959 return (SSE_CLASS_P (class1)
18960 ? ix86_cost->hard_register.sse_to_integer
18961 : ix86_cost->hard_register.integer_to_sse);
18962
18963 if (MAYBE_FLOAT_CLASS_P (class1))
18964 return ix86_cost->hard_register.fp_move;
18965 if (MAYBE_SSE_CLASS_P (class1))
18966 {
18967 if (GET_MODE_BITSIZE (mode) <= 128)
18968 return ix86_cost->hard_register.xmm_move;
18969 if (GET_MODE_BITSIZE (mode) <= 256)
18970 return ix86_cost->hard_register.ymm_move;
18971 return ix86_cost->hard_register.zmm_move;
18972 }
18973 if (MAYBE_MMX_CLASS_P (class1))
18974 return ix86_cost->hard_register.mmx_move;
18975 return 2;
18976 }
18977
18978 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
18979 words of a value of mode MODE but can be less for certain modes in
18980 special long registers.
18981
18982 Actually there are no two word move instructions for consecutive
18983 registers. And only registers 0-3 may have mov byte instructions
18984 applied to them. */
18985
18986 static unsigned int
ix86_hard_regno_nregs(unsigned int regno,machine_mode mode)18987 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
18988 {
18989 if (GENERAL_REGNO_P (regno))
18990 {
18991 if (mode == XFmode)
18992 return TARGET_64BIT ? 2 : 3;
18993 if (mode == XCmode)
18994 return TARGET_64BIT ? 4 : 6;
18995 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18996 }
18997 if (COMPLEX_MODE_P (mode))
18998 return 2;
18999 /* Register pair for mask registers. */
19000 if (mode == P2QImode || mode == P2HImode)
19001 return 2;
19002 if (mode == V64SFmode || mode == V64SImode)
19003 return 4;
19004 return 1;
19005 }
19006
19007 /* Implement REGMODE_NATURAL_SIZE(MODE). */
19008 unsigned int
ix86_regmode_natural_size(machine_mode mode)19009 ix86_regmode_natural_size (machine_mode mode)
19010 {
19011 if (mode == P2HImode || mode == P2QImode)
19012 return GET_MODE_SIZE (mode) / 2;
19013 return UNITS_PER_WORD;
19014 }
19015
19016 /* Implement TARGET_HARD_REGNO_MODE_OK. */
19017
19018 static bool
ix86_hard_regno_mode_ok(unsigned int regno,machine_mode mode)19019 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
19020 {
19021 /* Flags and only flags can only hold CCmode values. */
19022 if (CC_REGNO_P (regno))
19023 return GET_MODE_CLASS (mode) == MODE_CC;
19024 if (GET_MODE_CLASS (mode) == MODE_CC
19025 || GET_MODE_CLASS (mode) == MODE_RANDOM)
19026 return false;
19027 if (STACK_REGNO_P (regno))
19028 return VALID_FP_MODE_P (mode);
19029 if (MASK_REGNO_P (regno))
19030 {
19031 /* Register pair only starts at even register number. */
19032 if ((mode == P2QImode || mode == P2HImode))
19033 return MASK_PAIR_REGNO_P(regno);
19034
19035 return (VALID_MASK_REG_MODE (mode)
19036 || (TARGET_AVX512BW
19037 && VALID_MASK_AVX512BW_MODE (mode)));
19038 }
19039
19040 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19041 return false;
19042
19043 if (SSE_REGNO_P (regno))
19044 {
19045 /* We implement the move patterns for all vector modes into and
19046 out of SSE registers, even when no operation instructions
19047 are available. */
19048
19049 /* For AVX-512 we allow, regardless of regno:
19050 - XI mode
19051 - any of 512-bit wide vector mode
19052 - any scalar mode. */
19053 if (TARGET_AVX512F
19054 && (mode == XImode
19055 || VALID_AVX512F_REG_MODE (mode)
19056 || VALID_AVX512F_SCALAR_MODE (mode)))
19057 return true;
19058
19059 /* For AVX-5124FMAPS or AVX-5124VNNIW
19060 allow V64SF and V64SI modes for special regnos. */
19061 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
19062 && (mode == V64SFmode || mode == V64SImode)
19063 && MOD4_SSE_REGNO_P (regno))
19064 return true;
19065
19066 /* TODO check for QI/HI scalars. */
19067 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
19068 if (TARGET_AVX512VL
19069 && (mode == OImode
19070 || mode == TImode
19071 || VALID_AVX256_REG_MODE (mode)
19072 || VALID_AVX512VL_128_REG_MODE (mode)))
19073 return true;
19074
19075 /* xmm16-xmm31 are only available for AVX-512. */
19076 if (EXT_REX_SSE_REGNO_P (regno))
19077 return false;
19078
19079 /* OImode and AVX modes are available only when AVX is enabled. */
19080 return ((TARGET_AVX
19081 && VALID_AVX256_REG_OR_OI_MODE (mode))
19082 || VALID_SSE_REG_MODE (mode)
19083 || VALID_SSE2_REG_MODE (mode)
19084 || VALID_MMX_REG_MODE (mode)
19085 || VALID_MMX_REG_MODE_3DNOW (mode));
19086 }
19087 if (MMX_REGNO_P (regno))
19088 {
19089 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19090 so if the register is available at all, then we can move data of
19091 the given mode into or out of it. */
19092 return (VALID_MMX_REG_MODE (mode)
19093 || VALID_MMX_REG_MODE_3DNOW (mode));
19094 }
19095
19096 if (mode == QImode)
19097 {
19098 /* Take care for QImode values - they can be in non-QI regs,
19099 but then they do cause partial register stalls. */
19100 if (ANY_QI_REGNO_P (regno))
19101 return true;
19102 if (!TARGET_PARTIAL_REG_STALL)
19103 return true;
19104 /* LRA checks if the hard register is OK for the given mode.
19105 QImode values can live in non-QI regs, so we allow all
19106 registers here. */
19107 if (lra_in_progress)
19108 return true;
19109 return !can_create_pseudo_p ();
19110 }
19111 /* We handle both integer and floats in the general purpose registers. */
19112 else if (VALID_INT_MODE_P (mode))
19113 return true;
19114 else if (VALID_FP_MODE_P (mode))
19115 return true;
19116 else if (VALID_DFP_MODE_P (mode))
19117 return true;
19118 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19119 on to use that value in smaller contexts, this can easily force a
19120 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19121 supporting DImode, allow it. */
19122 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19123 return true;
19124
19125 return false;
19126 }
19127
19128 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
19129 saves SSE registers across calls is Win64 (thus no need to check the
19130 current ABI here), and with AVX enabled Win64 only guarantees that
19131 the low 16 bytes are saved. */
19132
19133 static bool
ix86_hard_regno_call_part_clobbered(unsigned int,unsigned int regno,machine_mode mode)19134 ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
19135 machine_mode mode)
19136 {
19137 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
19138 }
19139
19140 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19141 tieable integer mode. */
19142
19143 static bool
ix86_tieable_integer_mode_p(machine_mode mode)19144 ix86_tieable_integer_mode_p (machine_mode mode)
19145 {
19146 switch (mode)
19147 {
19148 case E_HImode:
19149 case E_SImode:
19150 return true;
19151
19152 case E_QImode:
19153 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19154
19155 case E_DImode:
19156 return TARGET_64BIT;
19157
19158 default:
19159 return false;
19160 }
19161 }
19162
19163 /* Implement TARGET_MODES_TIEABLE_P.
19164
19165 Return true if MODE1 is accessible in a register that can hold MODE2
19166 without copying. That is, all register classes that can hold MODE2
19167 can also hold MODE1. */
19168
19169 static bool
ix86_modes_tieable_p(machine_mode mode1,machine_mode mode2)19170 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
19171 {
19172 if (mode1 == mode2)
19173 return true;
19174
19175 if (ix86_tieable_integer_mode_p (mode1)
19176 && ix86_tieable_integer_mode_p (mode2))
19177 return true;
19178
19179 /* MODE2 being XFmode implies fp stack or general regs, which means we
19180 can tie any smaller floating point modes to it. Note that we do not
19181 tie this with TFmode. */
19182 if (mode2 == XFmode)
19183 return mode1 == SFmode || mode1 == DFmode;
19184
19185 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19186 that we can tie it with SFmode. */
19187 if (mode2 == DFmode)
19188 return mode1 == SFmode;
19189
19190 /* If MODE2 is only appropriate for an SSE register, then tie with
19191 any other mode acceptable to SSE registers. */
19192 if (GET_MODE_SIZE (mode2) == 64
19193 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19194 return (GET_MODE_SIZE (mode1) == 64
19195 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19196 if (GET_MODE_SIZE (mode2) == 32
19197 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19198 return (GET_MODE_SIZE (mode1) == 32
19199 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19200 if (GET_MODE_SIZE (mode2) == 16
19201 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19202 return (GET_MODE_SIZE (mode1) == 16
19203 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19204
19205 /* If MODE2 is appropriate for an MMX register, then tie
19206 with any other mode acceptable to MMX registers. */
19207 if (GET_MODE_SIZE (mode2) == 8
19208 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19209 return (GET_MODE_SIZE (mode1) == 8
19210 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19211
19212 return false;
19213 }
19214
19215 /* Return the cost of moving between two registers of mode MODE. */
19216
19217 static int
ix86_set_reg_reg_cost(machine_mode mode)19218 ix86_set_reg_reg_cost (machine_mode mode)
19219 {
19220 unsigned int units = UNITS_PER_WORD;
19221
19222 switch (GET_MODE_CLASS (mode))
19223 {
19224 default:
19225 break;
19226
19227 case MODE_CC:
19228 units = GET_MODE_SIZE (CCmode);
19229 break;
19230
19231 case MODE_FLOAT:
19232 if ((TARGET_SSE && mode == TFmode)
19233 || (TARGET_80387 && mode == XFmode)
19234 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
19235 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
19236 units = GET_MODE_SIZE (mode);
19237 break;
19238
19239 case MODE_COMPLEX_FLOAT:
19240 if ((TARGET_SSE && mode == TCmode)
19241 || (TARGET_80387 && mode == XCmode)
19242 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
19243 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
19244 units = GET_MODE_SIZE (mode);
19245 break;
19246
19247 case MODE_VECTOR_INT:
19248 case MODE_VECTOR_FLOAT:
19249 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
19250 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
19251 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19252 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19253 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
19254 && VALID_MMX_REG_MODE (mode)))
19255 units = GET_MODE_SIZE (mode);
19256 }
19257
19258 /* Return the cost of moving between two registers of mode MODE,
19259 assuming that the move will be in pieces of at most UNITS bytes. */
19260 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
19261 }
19262
19263 /* Return cost of vector operation in MODE given that scalar version has
19264 COST. */
19265
19266 static int
ix86_vec_cost(machine_mode mode,int cost)19267 ix86_vec_cost (machine_mode mode, int cost)
19268 {
19269 if (!VECTOR_MODE_P (mode))
19270 return cost;
19271
19272 if (GET_MODE_BITSIZE (mode) == 128
19273 && TARGET_SSE_SPLIT_REGS)
19274 return cost * 2;
19275 if (GET_MODE_BITSIZE (mode) > 128
19276 && TARGET_AVX256_SPLIT_REGS)
19277 return cost * GET_MODE_BITSIZE (mode) / 128;
19278 return cost;
19279 }
19280
19281 /* Return cost of multiplication in MODE. */
19282
19283 static int
ix86_multiplication_cost(const struct processor_costs * cost,enum machine_mode mode)19284 ix86_multiplication_cost (const struct processor_costs *cost,
19285 enum machine_mode mode)
19286 {
19287 machine_mode inner_mode = mode;
19288 if (VECTOR_MODE_P (mode))
19289 inner_mode = GET_MODE_INNER (mode);
19290
19291 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19292 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
19293 else if (X87_FLOAT_MODE_P (mode))
19294 return cost->fmul;
19295 else if (FLOAT_MODE_P (mode))
19296 return ix86_vec_cost (mode,
19297 inner_mode == DFmode ? cost->mulsd : cost->mulss);
19298 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19299 {
19300 /* vpmullq is used in this case. No emulation is needed. */
19301 if (TARGET_AVX512DQ)
19302 return ix86_vec_cost (mode, cost->mulss);
19303
19304 /* V*QImode is emulated with 7-13 insns. */
19305 if (mode == V16QImode || mode == V32QImode)
19306 {
19307 int extra = 11;
19308 if (TARGET_XOP && mode == V16QImode)
19309 extra = 5;
19310 else if (TARGET_SSSE3)
19311 extra = 6;
19312 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
19313 }
19314 /* V*DImode is emulated with 5-8 insns. */
19315 else if (mode == V2DImode || mode == V4DImode)
19316 {
19317 if (TARGET_XOP && mode == V2DImode)
19318 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
19319 else
19320 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
19321 }
19322 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
19323 insns, including two PMULUDQ. */
19324 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
19325 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
19326 else
19327 return ix86_vec_cost (mode, cost->mulss);
19328 }
19329 else
19330 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
19331 }
19332
19333 /* Return cost of multiplication in MODE. */
19334
19335 static int
ix86_division_cost(const struct processor_costs * cost,enum machine_mode mode)19336 ix86_division_cost (const struct processor_costs *cost,
19337 enum machine_mode mode)
19338 {
19339 machine_mode inner_mode = mode;
19340 if (VECTOR_MODE_P (mode))
19341 inner_mode = GET_MODE_INNER (mode);
19342
19343 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19344 return inner_mode == DFmode ? cost->divsd : cost->divss;
19345 else if (X87_FLOAT_MODE_P (mode))
19346 return cost->fdiv;
19347 else if (FLOAT_MODE_P (mode))
19348 return ix86_vec_cost (mode,
19349 inner_mode == DFmode ? cost->divsd : cost->divss);
19350 else
19351 return cost->divide[MODE_INDEX (mode)];
19352 }
19353
19354 #define COSTS_N_BYTES(N) ((N) * 2)
19355
19356 /* Return cost of shift in MODE.
19357 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19358 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19359 if op1 is a result of subreg.
19360
19361 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
19362
19363 static int
ix86_shift_rotate_cost(const struct processor_costs * cost,enum machine_mode mode,bool constant_op1,HOST_WIDE_INT op1_val,bool speed,bool and_in_op1,bool shift_and_truncate,bool * skip_op0,bool * skip_op1)19364 ix86_shift_rotate_cost (const struct processor_costs *cost,
19365 enum machine_mode mode, bool constant_op1,
19366 HOST_WIDE_INT op1_val,
19367 bool speed,
19368 bool and_in_op1,
19369 bool shift_and_truncate,
19370 bool *skip_op0, bool *skip_op1)
19371 {
19372 if (skip_op0)
19373 *skip_op0 = *skip_op1 = false;
19374 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19375 {
19376 /* V*QImode is emulated with 1-11 insns. */
19377 if (mode == V16QImode || mode == V32QImode)
19378 {
19379 int count = 11;
19380 if (TARGET_XOP && mode == V16QImode)
19381 {
19382 /* For XOP we use vpshab, which requires a broadcast of the
19383 value to the variable shift insn. For constants this
19384 means a V16Q const in mem; even when we can perform the
19385 shift with one insn set the cost to prefer paddb. */
19386 if (constant_op1)
19387 {
19388 if (skip_op1)
19389 *skip_op1 = true;
19390 return ix86_vec_cost (mode,
19391 cost->sse_op
19392 + (speed
19393 ? 2
19394 : COSTS_N_BYTES
19395 (GET_MODE_UNIT_SIZE (mode))));
19396 }
19397 count = 3;
19398 }
19399 else if (TARGET_SSSE3)
19400 count = 7;
19401 return ix86_vec_cost (mode, cost->sse_op * count);
19402 }
19403 else
19404 return ix86_vec_cost (mode, cost->sse_op);
19405 }
19406 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19407 {
19408 if (constant_op1)
19409 {
19410 if (op1_val > 32)
19411 return cost->shift_const + COSTS_N_INSNS (2);
19412 else
19413 return cost->shift_const * 2;
19414 }
19415 else
19416 {
19417 if (and_in_op1)
19418 return cost->shift_var * 2;
19419 else
19420 return cost->shift_var * 6 + COSTS_N_INSNS (2);
19421 }
19422 }
19423 else
19424 {
19425 if (constant_op1)
19426 return cost->shift_const;
19427 else if (shift_and_truncate)
19428 {
19429 if (skip_op0)
19430 *skip_op0 = *skip_op1 = true;
19431 /* Return the cost after shift-and truncation. */
19432 return cost->shift_var;
19433 }
19434 else
19435 return cost->shift_var;
19436 }
19437 return cost->shift_const;
19438 }
19439
19440 /* Compute a (partial) cost for rtx X. Return true if the complete
19441 cost has been computed, and false if subexpressions should be
19442 scanned. In either case, *TOTAL contains the cost result. */
19443
19444 static bool
ix86_rtx_costs(rtx x,machine_mode mode,int outer_code_i,int opno,int * total,bool speed)19445 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
19446 int *total, bool speed)
19447 {
19448 rtx mask;
19449 enum rtx_code code = GET_CODE (x);
19450 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
19451 const struct processor_costs *cost
19452 = speed ? ix86_tune_cost : &ix86_size_cost;
19453 int src_cost;
19454
19455 switch (code)
19456 {
19457 case SET:
19458 if (register_operand (SET_DEST (x), VOIDmode)
19459 && register_operand (SET_SRC (x), VOIDmode))
19460 {
19461 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
19462 return true;
19463 }
19464
19465 if (register_operand (SET_SRC (x), VOIDmode))
19466 /* Avoid potentially incorrect high cost from rtx_costs
19467 for non-tieable SUBREGs. */
19468 src_cost = 0;
19469 else
19470 {
19471 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
19472
19473 if (CONSTANT_P (SET_SRC (x)))
19474 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19475 a small value, possibly zero for cheap constants. */
19476 src_cost += COSTS_N_INSNS (1);
19477 }
19478
19479 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
19480 return true;
19481
19482 case CONST_INT:
19483 case CONST:
19484 case LABEL_REF:
19485 case SYMBOL_REF:
19486 if (x86_64_immediate_operand (x, VOIDmode))
19487 *total = 0;
19488 else
19489 *total = 1;
19490 return true;
19491
19492 case CONST_DOUBLE:
19493 if (IS_STACK_MODE (mode))
19494 switch (standard_80387_constant_p (x))
19495 {
19496 case -1:
19497 case 0:
19498 break;
19499 case 1: /* 0.0 */
19500 *total = 1;
19501 return true;
19502 default: /* Other constants */
19503 *total = 2;
19504 return true;
19505 }
19506 /* FALLTHRU */
19507
19508 case CONST_VECTOR:
19509 switch (standard_sse_constant_p (x, mode))
19510 {
19511 case 0:
19512 break;
19513 case 1: /* 0: xor eliminates false dependency */
19514 *total = 0;
19515 return true;
19516 default: /* -1: cmp contains false dependency */
19517 *total = 1;
19518 return true;
19519 }
19520 /* FALLTHRU */
19521
19522 case CONST_WIDE_INT:
19523 /* Fall back to (MEM (SYMBOL_REF)), since that's where
19524 it'll probably end up. Add a penalty for size. */
19525 *total = (COSTS_N_INSNS (1)
19526 + (!TARGET_64BIT && flag_pic)
19527 + (GET_MODE_SIZE (mode) <= 4
19528 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
19529 return true;
19530
19531 case ZERO_EXTEND:
19532 /* The zero extensions is often completely free on x86_64, so make
19533 it as cheap as possible. */
19534 if (TARGET_64BIT && mode == DImode
19535 && GET_MODE (XEXP (x, 0)) == SImode)
19536 *total = 1;
19537 else if (TARGET_ZERO_EXTEND_WITH_AND)
19538 *total = cost->add;
19539 else
19540 *total = cost->movzx;
19541 return false;
19542
19543 case SIGN_EXTEND:
19544 *total = cost->movsx;
19545 return false;
19546
19547 case ASHIFT:
19548 if (SCALAR_INT_MODE_P (mode)
19549 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
19550 && CONST_INT_P (XEXP (x, 1)))
19551 {
19552 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19553 if (value == 1)
19554 {
19555 *total = cost->add;
19556 return false;
19557 }
19558 if ((value == 2 || value == 3)
19559 && cost->lea <= cost->shift_const)
19560 {
19561 *total = cost->lea;
19562 return false;
19563 }
19564 }
19565 /* FALLTHRU */
19566
19567 case ROTATE:
19568 case ASHIFTRT:
19569 case LSHIFTRT:
19570 case ROTATERT:
19571 bool skip_op0, skip_op1;
19572 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
19573 CONST_INT_P (XEXP (x, 1))
19574 ? INTVAL (XEXP (x, 1)) : -1,
19575 speed,
19576 GET_CODE (XEXP (x, 1)) == AND,
19577 SUBREG_P (XEXP (x, 1))
19578 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
19579 &skip_op0, &skip_op1);
19580 if (skip_op0 || skip_op1)
19581 {
19582 if (!skip_op0)
19583 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
19584 if (!skip_op1)
19585 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
19586 return true;
19587 }
19588 return false;
19589
19590 case FMA:
19591 {
19592 rtx sub;
19593
19594 gcc_assert (FLOAT_MODE_P (mode));
19595 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
19596
19597 *total = ix86_vec_cost (mode,
19598 GET_MODE_INNER (mode) == SFmode
19599 ? cost->fmass : cost->fmasd);
19600 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
19601
19602 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
19603 sub = XEXP (x, 0);
19604 if (GET_CODE (sub) == NEG)
19605 sub = XEXP (sub, 0);
19606 *total += rtx_cost (sub, mode, FMA, 0, speed);
19607
19608 sub = XEXP (x, 2);
19609 if (GET_CODE (sub) == NEG)
19610 sub = XEXP (sub, 0);
19611 *total += rtx_cost (sub, mode, FMA, 2, speed);
19612 return true;
19613 }
19614
19615 case MULT:
19616 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
19617 {
19618 rtx op0 = XEXP (x, 0);
19619 rtx op1 = XEXP (x, 1);
19620 int nbits;
19621 if (CONST_INT_P (XEXP (x, 1)))
19622 {
19623 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19624 for (nbits = 0; value != 0; value &= value - 1)
19625 nbits++;
19626 }
19627 else
19628 /* This is arbitrary. */
19629 nbits = 7;
19630
19631 /* Compute costs correctly for widening multiplication. */
19632 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
19633 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19634 == GET_MODE_SIZE (mode))
19635 {
19636 int is_mulwiden = 0;
19637 machine_mode inner_mode = GET_MODE (op0);
19638
19639 if (GET_CODE (op0) == GET_CODE (op1))
19640 is_mulwiden = 1, op1 = XEXP (op1, 0);
19641 else if (CONST_INT_P (op1))
19642 {
19643 if (GET_CODE (op0) == SIGN_EXTEND)
19644 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19645 == INTVAL (op1);
19646 else
19647 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19648 }
19649
19650 if (is_mulwiden)
19651 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19652 }
19653
19654 *total = (cost->mult_init[MODE_INDEX (mode)]
19655 + nbits * cost->mult_bit
19656 + rtx_cost (op0, mode, outer_code, opno, speed)
19657 + rtx_cost (op1, mode, outer_code, opno, speed));
19658
19659 return true;
19660 }
19661 *total = ix86_multiplication_cost (cost, mode);
19662 return false;
19663
19664 case DIV:
19665 case UDIV:
19666 case MOD:
19667 case UMOD:
19668 *total = ix86_division_cost (cost, mode);
19669 return false;
19670
19671 case PLUS:
19672 if (GET_MODE_CLASS (mode) == MODE_INT
19673 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
19674 {
19675 if (GET_CODE (XEXP (x, 0)) == PLUS
19676 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19677 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19678 && CONSTANT_P (XEXP (x, 1)))
19679 {
19680 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19681 if (val == 2 || val == 4 || val == 8)
19682 {
19683 *total = cost->lea;
19684 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19685 outer_code, opno, speed);
19686 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
19687 outer_code, opno, speed);
19688 *total += rtx_cost (XEXP (x, 1), mode,
19689 outer_code, opno, speed);
19690 return true;
19691 }
19692 }
19693 else if (GET_CODE (XEXP (x, 0)) == MULT
19694 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19695 {
19696 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19697 if (val == 2 || val == 4 || val == 8)
19698 {
19699 *total = cost->lea;
19700 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19701 outer_code, opno, speed);
19702 *total += rtx_cost (XEXP (x, 1), mode,
19703 outer_code, opno, speed);
19704 return true;
19705 }
19706 }
19707 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19708 {
19709 /* Add with carry, ignore the cost of adding a carry flag. */
19710 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
19711 *total = cost->add;
19712 else
19713 {
19714 *total = cost->lea;
19715 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19716 outer_code, opno, speed);
19717 }
19718
19719 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19720 outer_code, opno, speed);
19721 *total += rtx_cost (XEXP (x, 1), mode,
19722 outer_code, opno, speed);
19723 return true;
19724 }
19725 }
19726 /* FALLTHRU */
19727
19728 case MINUS:
19729 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
19730 if (GET_MODE_CLASS (mode) == MODE_INT
19731 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
19732 && GET_CODE (XEXP (x, 0)) == MINUS
19733 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
19734 {
19735 *total = cost->add;
19736 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19737 outer_code, opno, speed);
19738 *total += rtx_cost (XEXP (x, 1), mode,
19739 outer_code, opno, speed);
19740 return true;
19741 }
19742
19743 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19744 {
19745 *total = cost->addss;
19746 return false;
19747 }
19748 else if (X87_FLOAT_MODE_P (mode))
19749 {
19750 *total = cost->fadd;
19751 return false;
19752 }
19753 else if (FLOAT_MODE_P (mode))
19754 {
19755 *total = ix86_vec_cost (mode, cost->addss);
19756 return false;
19757 }
19758 /* FALLTHRU */
19759
19760 case AND:
19761 case IOR:
19762 case XOR:
19763 if (GET_MODE_CLASS (mode) == MODE_INT
19764 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19765 {
19766 *total = (cost->add * 2
19767 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
19768 << (GET_MODE (XEXP (x, 0)) != DImode))
19769 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
19770 << (GET_MODE (XEXP (x, 1)) != DImode)));
19771 return true;
19772 }
19773 /* FALLTHRU */
19774
19775 case NEG:
19776 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19777 {
19778 *total = cost->sse_op;
19779 return false;
19780 }
19781 else if (X87_FLOAT_MODE_P (mode))
19782 {
19783 *total = cost->fchs;
19784 return false;
19785 }
19786 else if (FLOAT_MODE_P (mode))
19787 {
19788 *total = ix86_vec_cost (mode, cost->sse_op);
19789 return false;
19790 }
19791 /* FALLTHRU */
19792
19793 case NOT:
19794 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19795 *total = ix86_vec_cost (mode, cost->sse_op);
19796 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19797 *total = cost->add * 2;
19798 else
19799 *total = cost->add;
19800 return false;
19801
19802 case COMPARE:
19803 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19804 && XEXP (XEXP (x, 0), 1) == const1_rtx
19805 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19806 && XEXP (x, 1) == const0_rtx)
19807 {
19808 /* This kind of construct is implemented using test[bwl].
19809 Treat it as if we had an AND. */
19810 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
19811 *total = (cost->add
19812 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
19813 opno, speed)
19814 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
19815 return true;
19816 }
19817
19818 if (GET_CODE (XEXP (x, 0)) == PLUS
19819 && rtx_equal_p (XEXP (XEXP (x, 0), 0), XEXP (x, 1)))
19820 {
19821 /* This is an overflow detection, count it as a normal compare. */
19822 *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
19823 COMPARE, 0, speed);
19824 return true;
19825 }
19826
19827 /* The embedded comparison operand is completely free. */
19828 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
19829 && XEXP (x, 1) == const0_rtx)
19830 *total = 0;
19831
19832 return false;
19833
19834 case FLOAT_EXTEND:
19835 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19836 *total = 0;
19837 else
19838 *total = ix86_vec_cost (mode, cost->addss);
19839 return false;
19840
19841 case FLOAT_TRUNCATE:
19842 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19843 *total = cost->fadd;
19844 else
19845 *total = ix86_vec_cost (mode, cost->addss);
19846 return false;
19847
19848 case ABS:
19849 /* SSE requires memory load for the constant operand. It may make
19850 sense to account for this. Of course the constant operand may or
19851 may not be reused. */
19852 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19853 *total = cost->sse_op;
19854 else if (X87_FLOAT_MODE_P (mode))
19855 *total = cost->fabs;
19856 else if (FLOAT_MODE_P (mode))
19857 *total = ix86_vec_cost (mode, cost->sse_op);
19858 return false;
19859
19860 case SQRT:
19861 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19862 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
19863 else if (X87_FLOAT_MODE_P (mode))
19864 *total = cost->fsqrt;
19865 else if (FLOAT_MODE_P (mode))
19866 *total = ix86_vec_cost (mode,
19867 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
19868 return false;
19869
19870 case UNSPEC:
19871 if (XINT (x, 1) == UNSPEC_TP)
19872 *total = 0;
19873 return false;
19874
19875 case VEC_SELECT:
19876 case VEC_CONCAT:
19877 case VEC_DUPLICATE:
19878 /* ??? Assume all of these vector manipulation patterns are
19879 recognizable. In which case they all pretty much have the
19880 same cost. */
19881 *total = cost->sse_op;
19882 return true;
19883 case VEC_MERGE:
19884 mask = XEXP (x, 2);
19885 /* This is masked instruction, assume the same cost,
19886 as nonmasked variant. */
19887 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
19888 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
19889 else
19890 *total = cost->sse_op;
19891 return true;
19892
19893 default:
19894 return false;
19895 }
19896 }
19897
19898 #if TARGET_MACHO
19899
19900 static int current_machopic_label_num;
19901
19902 /* Given a symbol name and its associated stub, write out the
19903 definition of the stub. */
19904
19905 void
machopic_output_stub(FILE * file,const char * symb,const char * stub)19906 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19907 {
19908 unsigned int length;
19909 char *binder_name, *symbol_name, lazy_ptr_name[32];
19910 int label = ++current_machopic_label_num;
19911
19912 /* For 64-bit we shouldn't get here. */
19913 gcc_assert (!TARGET_64BIT);
19914
19915 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19916 symb = targetm.strip_name_encoding (symb);
19917
19918 length = strlen (stub);
19919 binder_name = XALLOCAVEC (char, length + 32);
19920 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19921
19922 length = strlen (symb);
19923 symbol_name = XALLOCAVEC (char, length + 32);
19924 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19925
19926 sprintf (lazy_ptr_name, "L%d$lz", label);
19927
19928 if (MACHOPIC_ATT_STUB)
19929 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
19930 else if (MACHOPIC_PURE)
19931 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
19932 else
19933 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19934
19935 fprintf (file, "%s:\n", stub);
19936 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19937
19938 if (MACHOPIC_ATT_STUB)
19939 {
19940 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
19941 }
19942 else if (MACHOPIC_PURE)
19943 {
19944 /* PIC stub. */
19945 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19946 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19947 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
19948 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
19949 label, lazy_ptr_name, label);
19950 fprintf (file, "\tjmp\t*%%ecx\n");
19951 }
19952 else
19953 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19954
19955 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
19956 it needs no stub-binding-helper. */
19957 if (MACHOPIC_ATT_STUB)
19958 return;
19959
19960 fprintf (file, "%s:\n", binder_name);
19961
19962 if (MACHOPIC_PURE)
19963 {
19964 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
19965 fprintf (file, "\tpushl\t%%ecx\n");
19966 }
19967 else
19968 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19969
19970 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
19971
19972 /* N.B. Keep the correspondence of these
19973 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
19974 old-pic/new-pic/non-pic stubs; altering this will break
19975 compatibility with existing dylibs. */
19976 if (MACHOPIC_PURE)
19977 {
19978 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19979 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
19980 }
19981 else
19982 /* 16-byte -mdynamic-no-pic stub. */
19983 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
19984
19985 fprintf (file, "%s:\n", lazy_ptr_name);
19986 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19987 fprintf (file, ASM_LONG "%s\n", binder_name);
19988 }
19989 #endif /* TARGET_MACHO */
19990
19991 /* Order the registers for register allocator. */
19992
19993 void
x86_order_regs_for_local_alloc(void)19994 x86_order_regs_for_local_alloc (void)
19995 {
19996 int pos = 0;
19997 int i;
19998
19999 /* First allocate the local general purpose registers. */
20000 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20001 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
20002 reg_alloc_order [pos++] = i;
20003
20004 /* Global general purpose registers. */
20005 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20006 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
20007 reg_alloc_order [pos++] = i;
20008
20009 /* x87 registers come first in case we are doing FP math
20010 using them. */
20011 if (!TARGET_SSE_MATH)
20012 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20013 reg_alloc_order [pos++] = i;
20014
20015 /* SSE registers. */
20016 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20017 reg_alloc_order [pos++] = i;
20018 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20019 reg_alloc_order [pos++] = i;
20020
20021 /* Extended REX SSE registers. */
20022 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
20023 reg_alloc_order [pos++] = i;
20024
20025 /* Mask register. */
20026 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
20027 reg_alloc_order [pos++] = i;
20028
20029 /* x87 registers. */
20030 if (TARGET_SSE_MATH)
20031 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20032 reg_alloc_order [pos++] = i;
20033
20034 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20035 reg_alloc_order [pos++] = i;
20036
20037 /* Initialize the rest of array as we do not allocate some registers
20038 at all. */
20039 while (pos < FIRST_PSEUDO_REGISTER)
20040 reg_alloc_order [pos++] = 0;
20041 }
20042
20043 static bool
ix86_ms_bitfield_layout_p(const_tree record_type)20044 ix86_ms_bitfield_layout_p (const_tree record_type)
20045 {
20046 return ((TARGET_MS_BITFIELD_LAYOUT
20047 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20048 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
20049 }
20050
20051 /* Returns an expression indicating where the this parameter is
20052 located on entry to the FUNCTION. */
20053
20054 static rtx
x86_this_parameter(tree function)20055 x86_this_parameter (tree function)
20056 {
20057 tree type = TREE_TYPE (function);
20058 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20059 int nregs;
20060
20061 if (TARGET_64BIT)
20062 {
20063 const int *parm_regs;
20064
20065 if (ix86_function_type_abi (type) == MS_ABI)
20066 parm_regs = x86_64_ms_abi_int_parameter_registers;
20067 else
20068 parm_regs = x86_64_int_parameter_registers;
20069 return gen_rtx_REG (Pmode, parm_regs[aggr]);
20070 }
20071
20072 nregs = ix86_function_regparm (type, function);
20073
20074 if (nregs > 0 && !stdarg_p (type))
20075 {
20076 int regno;
20077 unsigned int ccvt = ix86_get_callcvt (type);
20078
20079 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20080 regno = aggr ? DX_REG : CX_REG;
20081 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20082 {
20083 regno = CX_REG;
20084 if (aggr)
20085 return gen_rtx_MEM (SImode,
20086 plus_constant (Pmode, stack_pointer_rtx, 4));
20087 }
20088 else
20089 {
20090 regno = AX_REG;
20091 if (aggr)
20092 {
20093 regno = DX_REG;
20094 if (nregs == 1)
20095 return gen_rtx_MEM (SImode,
20096 plus_constant (Pmode,
20097 stack_pointer_rtx, 4));
20098 }
20099 }
20100 return gen_rtx_REG (SImode, regno);
20101 }
20102
20103 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
20104 aggr ? 8 : 4));
20105 }
20106
20107 /* Determine whether x86_output_mi_thunk can succeed. */
20108
20109 static bool
x86_can_output_mi_thunk(const_tree,HOST_WIDE_INT,HOST_WIDE_INT vcall_offset,const_tree function)20110 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
20111 const_tree function)
20112 {
20113 /* 64-bit can handle anything. */
20114 if (TARGET_64BIT)
20115 return true;
20116
20117 /* For 32-bit, everything's fine if we have one free register. */
20118 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20119 return true;
20120
20121 /* Need a free register for vcall_offset. */
20122 if (vcall_offset)
20123 return false;
20124
20125 /* Need a free register for GOT references. */
20126 if (flag_pic && !targetm.binds_local_p (function))
20127 return false;
20128
20129 /* Otherwise ok. */
20130 return true;
20131 }
20132
20133 /* Output the assembler code for a thunk function. THUNK_DECL is the
20134 declaration for the thunk function itself, FUNCTION is the decl for
20135 the target function. DELTA is an immediate constant offset to be
20136 added to THIS. If VCALL_OFFSET is nonzero, the word at
20137 *(*this + vcall_offset) should be added to THIS. */
20138
20139 static void
x86_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)20140 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
20141 HOST_WIDE_INT vcall_offset, tree function)
20142 {
20143 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
20144 rtx this_param = x86_this_parameter (function);
20145 rtx this_reg, tmp, fnaddr;
20146 unsigned int tmp_regno;
20147 rtx_insn *insn;
20148
20149 if (TARGET_64BIT)
20150 tmp_regno = R10_REG;
20151 else
20152 {
20153 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
20154 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20155 tmp_regno = AX_REG;
20156 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20157 tmp_regno = DX_REG;
20158 else
20159 tmp_regno = CX_REG;
20160 }
20161
20162 emit_note (NOTE_INSN_PROLOGUE_END);
20163
20164 /* CET is enabled, insert EB instruction. */
20165 if ((flag_cf_protection & CF_BRANCH))
20166 emit_insn (gen_nop_endbr ());
20167
20168 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20169 pull it in now and let DELTA benefit. */
20170 if (REG_P (this_param))
20171 this_reg = this_param;
20172 else if (vcall_offset)
20173 {
20174 /* Put the this parameter into %eax. */
20175 this_reg = gen_rtx_REG (Pmode, AX_REG);
20176 emit_move_insn (this_reg, this_param);
20177 }
20178 else
20179 this_reg = NULL_RTX;
20180
20181 /* Adjust the this parameter by a fixed constant. */
20182 if (delta)
20183 {
20184 rtx delta_rtx = GEN_INT (delta);
20185 rtx delta_dst = this_reg ? this_reg : this_param;
20186
20187 if (TARGET_64BIT)
20188 {
20189 if (!x86_64_general_operand (delta_rtx, Pmode))
20190 {
20191 tmp = gen_rtx_REG (Pmode, tmp_regno);
20192 emit_move_insn (tmp, delta_rtx);
20193 delta_rtx = tmp;
20194 }
20195 }
20196
20197 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
20198 }
20199
20200 /* Adjust the this parameter by a value stored in the vtable. */
20201 if (vcall_offset)
20202 {
20203 rtx vcall_addr, vcall_mem, this_mem;
20204
20205 tmp = gen_rtx_REG (Pmode, tmp_regno);
20206
20207 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
20208 if (Pmode != ptr_mode)
20209 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
20210 emit_move_insn (tmp, this_mem);
20211
20212 /* Adjust the this parameter. */
20213 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
20214 if (TARGET_64BIT
20215 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
20216 {
20217 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
20218 emit_move_insn (tmp2, GEN_INT (vcall_offset));
20219 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
20220 }
20221
20222 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
20223 if (Pmode != ptr_mode)
20224 emit_insn (gen_addsi_1_zext (this_reg,
20225 gen_rtx_REG (ptr_mode,
20226 REGNO (this_reg)),
20227 vcall_mem));
20228 else
20229 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
20230 }
20231
20232 /* If necessary, drop THIS back to its stack slot. */
20233 if (this_reg && this_reg != this_param)
20234 emit_move_insn (this_param, this_reg);
20235
20236 fnaddr = XEXP (DECL_RTL (function), 0);
20237 if (TARGET_64BIT)
20238 {
20239 if (!flag_pic || targetm.binds_local_p (function)
20240 || TARGET_PECOFF)
20241 ;
20242 else
20243 {
20244 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
20245 tmp = gen_rtx_CONST (Pmode, tmp);
20246 fnaddr = gen_const_mem (Pmode, tmp);
20247 }
20248 }
20249 else
20250 {
20251 if (!flag_pic || targetm.binds_local_p (function))
20252 ;
20253 #if TARGET_MACHO
20254 else if (TARGET_MACHO)
20255 {
20256 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
20257 fnaddr = XEXP (fnaddr, 0);
20258 }
20259 #endif /* TARGET_MACHO */
20260 else
20261 {
20262 tmp = gen_rtx_REG (Pmode, CX_REG);
20263 output_set_got (tmp, NULL_RTX);
20264
20265 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
20266 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
20267 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
20268 fnaddr = gen_const_mem (Pmode, fnaddr);
20269 }
20270 }
20271
20272 /* Our sibling call patterns do not allow memories, because we have no
20273 predicate that can distinguish between frame and non-frame memory.
20274 For our purposes here, we can get away with (ab)using a jump pattern,
20275 because we're going to do no optimization. */
20276 if (MEM_P (fnaddr))
20277 {
20278 if (sibcall_insn_operand (fnaddr, word_mode))
20279 {
20280 fnaddr = XEXP (DECL_RTL (function), 0);
20281 tmp = gen_rtx_MEM (QImode, fnaddr);
20282 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20283 tmp = emit_call_insn (tmp);
20284 SIBLING_CALL_P (tmp) = 1;
20285 }
20286 else
20287 emit_jump_insn (gen_indirect_jump (fnaddr));
20288 }
20289 else
20290 {
20291 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
20292 {
20293 // CM_LARGE_PIC always uses pseudo PIC register which is
20294 // uninitialized. Since FUNCTION is local and calling it
20295 // doesn't go through PLT, we use scratch register %r11 as
20296 // PIC register and initialize it here.
20297 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
20298 ix86_init_large_pic_reg (tmp_regno);
20299 fnaddr = legitimize_pic_address (fnaddr,
20300 gen_rtx_REG (Pmode, tmp_regno));
20301 }
20302
20303 if (!sibcall_insn_operand (fnaddr, word_mode))
20304 {
20305 tmp = gen_rtx_REG (word_mode, tmp_regno);
20306 if (GET_MODE (fnaddr) != word_mode)
20307 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
20308 emit_move_insn (tmp, fnaddr);
20309 fnaddr = tmp;
20310 }
20311
20312 tmp = gen_rtx_MEM (QImode, fnaddr);
20313 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20314 tmp = emit_call_insn (tmp);
20315 SIBLING_CALL_P (tmp) = 1;
20316 }
20317 emit_barrier ();
20318
20319 /* Emit just enough of rest_of_compilation to get the insns emitted. */
20320 insn = get_insns ();
20321 shorten_branches (insn);
20322 assemble_start_function (thunk_fndecl, fnname);
20323 final_start_function (insn, file, 1);
20324 final (insn, file, 1);
20325 final_end_function ();
20326 assemble_end_function (thunk_fndecl, fnname);
20327 }
20328
20329 static void
x86_file_start(void)20330 x86_file_start (void)
20331 {
20332 default_file_start ();
20333 if (TARGET_16BIT)
20334 fputs ("\t.code16gcc\n", asm_out_file);
20335 #if TARGET_MACHO
20336 darwin_file_start ();
20337 #endif
20338 if (X86_FILE_START_VERSION_DIRECTIVE)
20339 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20340 if (X86_FILE_START_FLTUSED)
20341 fputs ("\t.global\t__fltused\n", asm_out_file);
20342 if (ix86_asm_dialect == ASM_INTEL)
20343 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
20344 }
20345
20346 int
x86_field_alignment(tree type,int computed)20347 x86_field_alignment (tree type, int computed)
20348 {
20349 machine_mode mode;
20350
20351 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20352 return computed;
20353 if (TARGET_IAMCU)
20354 return iamcu_alignment (type, computed);
20355 mode = TYPE_MODE (strip_array_types (type));
20356 if (mode == DFmode || mode == DCmode
20357 || GET_MODE_CLASS (mode) == MODE_INT
20358 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20359 return MIN (32, computed);
20360 return computed;
20361 }
20362
20363 /* Print call to TARGET to FILE. */
20364
20365 static void
x86_print_call_or_nop(FILE * file,const char * target)20366 x86_print_call_or_nop (FILE *file, const char *target)
20367 {
20368 if (flag_nop_mcount || !strcmp (target, "nop"))
20369 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20370 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20371 else
20372 fprintf (file, "1:\tcall\t%s\n", target);
20373 }
20374
20375 static bool
current_fentry_name(const char ** name)20376 current_fentry_name (const char **name)
20377 {
20378 tree attr = lookup_attribute ("fentry_name",
20379 DECL_ATTRIBUTES (current_function_decl));
20380 if (!attr)
20381 return false;
20382 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20383 return true;
20384 }
20385
20386 static bool
current_fentry_section(const char ** name)20387 current_fentry_section (const char **name)
20388 {
20389 tree attr = lookup_attribute ("fentry_section",
20390 DECL_ATTRIBUTES (current_function_decl));
20391 if (!attr)
20392 return false;
20393 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20394 return true;
20395 }
20396
20397 /* Output assembler code to FILE to increment profiler label # LABELNO
20398 for profiling a function entry. */
20399 void
x86_function_profiler(FILE * file,int labelno ATTRIBUTE_UNUSED)20400 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20401 {
20402 if (cfun->machine->endbr_queued_at_entrance)
20403 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
20404
20405 const char *mcount_name = MCOUNT_NAME;
20406
20407 if (current_fentry_name (&mcount_name))
20408 ;
20409 else if (fentry_name)
20410 mcount_name = fentry_name;
20411 else if (flag_fentry)
20412 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
20413
20414 if (TARGET_64BIT)
20415 {
20416 #ifndef NO_PROFILE_COUNTERS
20417 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
20418 #endif
20419
20420 if (!TARGET_PECOFF && flag_pic)
20421 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
20422 else
20423 x86_print_call_or_nop (file, mcount_name);
20424 }
20425 else if (flag_pic)
20426 {
20427 #ifndef NO_PROFILE_COUNTERS
20428 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
20429 LPREFIX, labelno);
20430 #endif
20431 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
20432 }
20433 else
20434 {
20435 #ifndef NO_PROFILE_COUNTERS
20436 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
20437 LPREFIX, labelno);
20438 #endif
20439 x86_print_call_or_nop (file, mcount_name);
20440 }
20441
20442 if (flag_record_mcount
20443 || lookup_attribute ("fentry_section",
20444 DECL_ATTRIBUTES (current_function_decl)))
20445 {
20446 const char *sname = "__mcount_loc";
20447
20448 if (current_fentry_section (&sname))
20449 ;
20450 else if (fentry_section)
20451 sname = fentry_section;
20452
20453 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
20454 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
20455 fprintf (file, "\t.previous\n");
20456 }
20457 }
20458
20459 /* We don't have exact information about the insn sizes, but we may assume
20460 quite safely that we are informed about all 1 byte insns and memory
20461 address sizes. This is enough to eliminate unnecessary padding in
20462 99% of cases. */
20463
20464 int
ix86_min_insn_size(rtx_insn * insn)20465 ix86_min_insn_size (rtx_insn *insn)
20466 {
20467 int l = 0, len;
20468
20469 if (!INSN_P (insn) || !active_insn_p (insn))
20470 return 0;
20471
20472 /* Discard alignments we've emit and jump instructions. */
20473 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20474 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20475 return 0;
20476
20477 /* Important case - calls are always 5 bytes.
20478 It is common to have many calls in the row. */
20479 if (CALL_P (insn)
20480 && symbolic_reference_mentioned_p (PATTERN (insn))
20481 && !SIBLING_CALL_P (insn))
20482 return 5;
20483 len = get_attr_length (insn);
20484 if (len <= 1)
20485 return 1;
20486
20487 /* For normal instructions we rely on get_attr_length being exact,
20488 with a few exceptions. */
20489 if (!JUMP_P (insn))
20490 {
20491 enum attr_type type = get_attr_type (insn);
20492
20493 switch (type)
20494 {
20495 case TYPE_MULTI:
20496 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
20497 || asm_noperands (PATTERN (insn)) >= 0)
20498 return 0;
20499 break;
20500 case TYPE_OTHER:
20501 case TYPE_FCMP:
20502 break;
20503 default:
20504 /* Otherwise trust get_attr_length. */
20505 return len;
20506 }
20507
20508 l = get_attr_length_address (insn);
20509 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20510 l = 4;
20511 }
20512 if (l)
20513 return 1+l;
20514 else
20515 return 2;
20516 }
20517
20518 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20519
20520 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20521 window. */
20522
20523 static void
ix86_avoid_jump_mispredicts(void)20524 ix86_avoid_jump_mispredicts (void)
20525 {
20526 rtx_insn *insn, *start = get_insns ();
20527 int nbytes = 0, njumps = 0;
20528 bool isjump = false;
20529
20530 /* Look for all minimal intervals of instructions containing 4 jumps.
20531 The intervals are bounded by START and INSN. NBYTES is the total
20532 size of instructions in the interval including INSN and not including
20533 START. When the NBYTES is smaller than 16 bytes, it is possible
20534 that the end of START and INSN ends up in the same 16byte page.
20535
20536 The smallest offset in the page INSN can start is the case where START
20537 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20538 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20539
20540 Don't consider asm goto as jump, while it can contain a jump, it doesn't
20541 have to, control transfer to label(s) can be performed through other
20542 means, and also we estimate minimum length of all asm stmts as 0. */
20543 for (insn = start; insn; insn = NEXT_INSN (insn))
20544 {
20545 int min_size;
20546
20547 if (LABEL_P (insn))
20548 {
20549 align_flags alignment = label_to_alignment (insn);
20550 int align = alignment.levels[0].log;
20551 int max_skip = alignment.levels[0].maxskip;
20552
20553 if (max_skip > 15)
20554 max_skip = 15;
20555 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20556 already in the current 16 byte page, because otherwise
20557 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20558 bytes to reach 16 byte boundary. */
20559 if (align <= 0
20560 || (align <= 3 && max_skip != (1 << align) - 1))
20561 max_skip = 0;
20562 if (dump_file)
20563 fprintf (dump_file, "Label %i with max_skip %i\n",
20564 INSN_UID (insn), max_skip);
20565 if (max_skip)
20566 {
20567 while (nbytes + max_skip >= 16)
20568 {
20569 start = NEXT_INSN (start);
20570 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20571 || CALL_P (start))
20572 njumps--, isjump = true;
20573 else
20574 isjump = false;
20575 nbytes -= ix86_min_insn_size (start);
20576 }
20577 }
20578 continue;
20579 }
20580
20581 min_size = ix86_min_insn_size (insn);
20582 nbytes += min_size;
20583 if (dump_file)
20584 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
20585 INSN_UID (insn), min_size);
20586 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
20587 || CALL_P (insn))
20588 njumps++;
20589 else
20590 continue;
20591
20592 while (njumps > 3)
20593 {
20594 start = NEXT_INSN (start);
20595 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20596 || CALL_P (start))
20597 njumps--, isjump = true;
20598 else
20599 isjump = false;
20600 nbytes -= ix86_min_insn_size (start);
20601 }
20602 gcc_assert (njumps >= 0);
20603 if (dump_file)
20604 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20605 INSN_UID (start), INSN_UID (insn), nbytes);
20606
20607 if (njumps == 3 && isjump && nbytes < 16)
20608 {
20609 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
20610
20611 if (dump_file)
20612 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20613 INSN_UID (insn), padsize);
20614 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
20615 }
20616 }
20617 }
20618 #endif
20619
20620 /* AMD Athlon works faster
20621 when RET is not destination of conditional jump or directly preceded
20622 by other jump instruction. We avoid the penalty by inserting NOP just
20623 before the RET instructions in such cases. */
20624 static void
ix86_pad_returns(void)20625 ix86_pad_returns (void)
20626 {
20627 edge e;
20628 edge_iterator ei;
20629
20630 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20631 {
20632 basic_block bb = e->src;
20633 rtx_insn *ret = BB_END (bb);
20634 rtx_insn *prev;
20635 bool replace = false;
20636
20637 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
20638 || optimize_bb_for_size_p (bb))
20639 continue;
20640 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20641 if (active_insn_p (prev) || LABEL_P (prev))
20642 break;
20643 if (prev && LABEL_P (prev))
20644 {
20645 edge e;
20646 edge_iterator ei;
20647
20648 FOR_EACH_EDGE (e, ei, bb->preds)
20649 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20650 && !(e->flags & EDGE_FALLTHRU))
20651 {
20652 replace = true;
20653 break;
20654 }
20655 }
20656 if (!replace)
20657 {
20658 prev = prev_active_insn (ret);
20659 if (prev
20660 && ((JUMP_P (prev) && any_condjump_p (prev))
20661 || CALL_P (prev)))
20662 replace = true;
20663 /* Empty functions get branch mispredict even when
20664 the jump destination is not visible to us. */
20665 if (!prev && !optimize_function_for_size_p (cfun))
20666 replace = true;
20667 }
20668 if (replace)
20669 {
20670 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
20671 delete_insn (ret);
20672 }
20673 }
20674 }
20675
20676 /* Count the minimum number of instructions in BB. Return 4 if the
20677 number of instructions >= 4. */
20678
20679 static int
ix86_count_insn_bb(basic_block bb)20680 ix86_count_insn_bb (basic_block bb)
20681 {
20682 rtx_insn *insn;
20683 int insn_count = 0;
20684
20685 /* Count number of instructions in this block. Return 4 if the number
20686 of instructions >= 4. */
20687 FOR_BB_INSNS (bb, insn)
20688 {
20689 /* Only happen in exit blocks. */
20690 if (JUMP_P (insn)
20691 && ANY_RETURN_P (PATTERN (insn)))
20692 break;
20693
20694 if (NONDEBUG_INSN_P (insn)
20695 && GET_CODE (PATTERN (insn)) != USE
20696 && GET_CODE (PATTERN (insn)) != CLOBBER)
20697 {
20698 insn_count++;
20699 if (insn_count >= 4)
20700 return insn_count;
20701 }
20702 }
20703
20704 return insn_count;
20705 }
20706
20707
20708 /* Count the minimum number of instructions in code path in BB.
20709 Return 4 if the number of instructions >= 4. */
20710
20711 static int
ix86_count_insn(basic_block bb)20712 ix86_count_insn (basic_block bb)
20713 {
20714 edge e;
20715 edge_iterator ei;
20716 int min_prev_count;
20717
20718 /* Only bother counting instructions along paths with no
20719 more than 2 basic blocks between entry and exit. Given
20720 that BB has an edge to exit, determine if a predecessor
20721 of BB has an edge from entry. If so, compute the number
20722 of instructions in the predecessor block. If there
20723 happen to be multiple such blocks, compute the minimum. */
20724 min_prev_count = 4;
20725 FOR_EACH_EDGE (e, ei, bb->preds)
20726 {
20727 edge prev_e;
20728 edge_iterator prev_ei;
20729
20730 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20731 {
20732 min_prev_count = 0;
20733 break;
20734 }
20735 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
20736 {
20737 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20738 {
20739 int count = ix86_count_insn_bb (e->src);
20740 if (count < min_prev_count)
20741 min_prev_count = count;
20742 break;
20743 }
20744 }
20745 }
20746
20747 if (min_prev_count < 4)
20748 min_prev_count += ix86_count_insn_bb (bb);
20749
20750 return min_prev_count;
20751 }
20752
20753 /* Pad short function to 4 instructions. */
20754
20755 static void
ix86_pad_short_function(void)20756 ix86_pad_short_function (void)
20757 {
20758 edge e;
20759 edge_iterator ei;
20760
20761 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20762 {
20763 rtx_insn *ret = BB_END (e->src);
20764 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
20765 {
20766 int insn_count = ix86_count_insn (e->src);
20767
20768 /* Pad short function. */
20769 if (insn_count < 4)
20770 {
20771 rtx_insn *insn = ret;
20772
20773 /* Find epilogue. */
20774 while (insn
20775 && (!NOTE_P (insn)
20776 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
20777 insn = PREV_INSN (insn);
20778
20779 if (!insn)
20780 insn = ret;
20781
20782 /* Two NOPs count as one instruction. */
20783 insn_count = 2 * (4 - insn_count);
20784 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
20785 }
20786 }
20787 }
20788 }
20789
20790 /* Fix up a Windows system unwinder issue. If an EH region falls through into
20791 the epilogue, the Windows system unwinder will apply epilogue logic and
20792 produce incorrect offsets. This can be avoided by adding a nop between
20793 the last insn that can throw and the first insn of the epilogue. */
20794
20795 static void
ix86_seh_fixup_eh_fallthru(void)20796 ix86_seh_fixup_eh_fallthru (void)
20797 {
20798 edge e;
20799 edge_iterator ei;
20800
20801 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20802 {
20803 rtx_insn *insn, *next;
20804
20805 /* Find the beginning of the epilogue. */
20806 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
20807 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
20808 break;
20809 if (insn == NULL)
20810 continue;
20811
20812 /* We only care about preceding insns that can throw. */
20813 insn = prev_active_insn (insn);
20814 if (insn == NULL || !can_throw_internal (insn))
20815 continue;
20816
20817 /* Do not separate calls from their debug information. */
20818 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
20819 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
20820 insn = next;
20821 else
20822 break;
20823
20824 emit_insn_after (gen_nops (const1_rtx), insn);
20825 }
20826 }
20827
20828 /* Implement machine specific optimizations. We implement padding of returns
20829 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20830 static void
ix86_reorg(void)20831 ix86_reorg (void)
20832 {
20833 /* We are freeing block_for_insn in the toplev to keep compatibility
20834 with old MDEP_REORGS that are not CFG based. Recompute it now. */
20835 compute_bb_for_insn ();
20836
20837 if (TARGET_SEH && current_function_has_exception_handlers ())
20838 ix86_seh_fixup_eh_fallthru ();
20839
20840 if (optimize && optimize_function_for_speed_p (cfun))
20841 {
20842 if (TARGET_PAD_SHORT_FUNCTION)
20843 ix86_pad_short_function ();
20844 else if (TARGET_PAD_RETURNS)
20845 ix86_pad_returns ();
20846 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20847 if (TARGET_FOUR_JUMP_LIMIT)
20848 ix86_avoid_jump_mispredicts ();
20849 #endif
20850 }
20851 }
20852
20853 /* Return nonzero when QImode register that must be represented via REX prefix
20854 is used. */
20855 bool
x86_extended_QIreg_mentioned_p(rtx_insn * insn)20856 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
20857 {
20858 int i;
20859 extract_insn_cached (insn);
20860 for (i = 0; i < recog_data.n_operands; i++)
20861 if (GENERAL_REG_P (recog_data.operand[i])
20862 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
20863 return true;
20864 return false;
20865 }
20866
20867 /* Return true when INSN mentions register that must be encoded using REX
20868 prefix. */
20869 bool
x86_extended_reg_mentioned_p(rtx insn)20870 x86_extended_reg_mentioned_p (rtx insn)
20871 {
20872 subrtx_iterator::array_type array;
20873 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
20874 {
20875 const_rtx x = *iter;
20876 if (REG_P (x)
20877 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
20878 return true;
20879 }
20880 return false;
20881 }
20882
20883 /* If profitable, negate (without causing overflow) integer constant
20884 of mode MODE at location LOC. Return true in this case. */
20885 bool
x86_maybe_negate_const_int(rtx * loc,machine_mode mode)20886 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
20887 {
20888 HOST_WIDE_INT val;
20889
20890 if (!CONST_INT_P (*loc))
20891 return false;
20892
20893 switch (mode)
20894 {
20895 case E_DImode:
20896 /* DImode x86_64 constants must fit in 32 bits. */
20897 gcc_assert (x86_64_immediate_operand (*loc, mode));
20898
20899 mode = SImode;
20900 break;
20901
20902 case E_SImode:
20903 case E_HImode:
20904 case E_QImode:
20905 break;
20906
20907 default:
20908 gcc_unreachable ();
20909 }
20910
20911 /* Avoid overflows. */
20912 if (mode_signbit_p (mode, *loc))
20913 return false;
20914
20915 val = INTVAL (*loc);
20916
20917 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
20918 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
20919 if ((val < 0 && val != -128)
20920 || val == 128)
20921 {
20922 *loc = GEN_INT (-val);
20923 return true;
20924 }
20925
20926 return false;
20927 }
20928
20929 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20930 optabs would emit if we didn't have TFmode patterns. */
20931
20932 void
x86_emit_floatuns(rtx operands[2])20933 x86_emit_floatuns (rtx operands[2])
20934 {
20935 rtx_code_label *neglab, *donelab;
20936 rtx i0, i1, f0, in, out;
20937 machine_mode mode, inmode;
20938
20939 inmode = GET_MODE (operands[1]);
20940 gcc_assert (inmode == SImode || inmode == DImode);
20941
20942 out = operands[0];
20943 in = force_reg (inmode, operands[1]);
20944 mode = GET_MODE (out);
20945 neglab = gen_label_rtx ();
20946 donelab = gen_label_rtx ();
20947 f0 = gen_reg_rtx (mode);
20948
20949 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20950
20951 expand_float (out, in, 0);
20952
20953 emit_jump_insn (gen_jump (donelab));
20954 emit_barrier ();
20955
20956 emit_label (neglab);
20957
20958 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20959 1, OPTAB_DIRECT);
20960 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20961 1, OPTAB_DIRECT);
20962 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20963
20964 expand_float (f0, i0, 0);
20965
20966 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
20967
20968 emit_label (donelab);
20969 }
20970
20971 /* Target hook for scalar_mode_supported_p. */
20972 static bool
ix86_scalar_mode_supported_p(scalar_mode mode)20973 ix86_scalar_mode_supported_p (scalar_mode mode)
20974 {
20975 if (DECIMAL_FLOAT_MODE_P (mode))
20976 return default_decimal_float_supported_p ();
20977 else if (mode == TFmode)
20978 return true;
20979 else
20980 return default_scalar_mode_supported_p (mode);
20981 }
20982
20983 /* Implements target hook vector_mode_supported_p. */
20984 static bool
ix86_vector_mode_supported_p(machine_mode mode)20985 ix86_vector_mode_supported_p (machine_mode mode)
20986 {
20987 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20988 return true;
20989 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20990 return true;
20991 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
20992 return true;
20993 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
20994 return true;
20995 if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
20996 return true;
20997 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20998 return true;
20999 return false;
21000 }
21001
21002 /* Target hook for c_mode_for_suffix. */
21003 static machine_mode
ix86_c_mode_for_suffix(char suffix)21004 ix86_c_mode_for_suffix (char suffix)
21005 {
21006 if (suffix == 'q')
21007 return TFmode;
21008 if (suffix == 'w')
21009 return XFmode;
21010
21011 return VOIDmode;
21012 }
21013
21014 /* Worker function for TARGET_MD_ASM_ADJUST.
21015
21016 We implement asm flag outputs, and maintain source compatibility
21017 with the old cc0-based compiler. */
21018
21019 static rtx_insn *
ix86_md_asm_adjust(vec<rtx> & outputs,vec<rtx> &,vec<const char * > & constraints,vec<rtx> & clobbers,HARD_REG_SET & clobbered_regs)21020 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
21021 vec<const char *> &constraints,
21022 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
21023 {
21024 bool saw_asm_flag = false;
21025
21026 start_sequence ();
21027 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
21028 {
21029 const char *con = constraints[i];
21030 if (strncmp (con, "=@cc", 4) != 0)
21031 continue;
21032 con += 4;
21033 if (strchr (con, ',') != NULL)
21034 {
21035 error ("alternatives not allowed in %<asm%> flag output");
21036 continue;
21037 }
21038
21039 bool invert = false;
21040 if (con[0] == 'n')
21041 invert = true, con++;
21042
21043 machine_mode mode = CCmode;
21044 rtx_code code = UNKNOWN;
21045
21046 switch (con[0])
21047 {
21048 case 'a':
21049 if (con[1] == 0)
21050 mode = CCAmode, code = EQ;
21051 else if (con[1] == 'e' && con[2] == 0)
21052 mode = CCCmode, code = NE;
21053 break;
21054 case 'b':
21055 if (con[1] == 0)
21056 mode = CCCmode, code = EQ;
21057 else if (con[1] == 'e' && con[2] == 0)
21058 mode = CCAmode, code = NE;
21059 break;
21060 case 'c':
21061 if (con[1] == 0)
21062 mode = CCCmode, code = EQ;
21063 break;
21064 case 'e':
21065 if (con[1] == 0)
21066 mode = CCZmode, code = EQ;
21067 break;
21068 case 'g':
21069 if (con[1] == 0)
21070 mode = CCGCmode, code = GT;
21071 else if (con[1] == 'e' && con[2] == 0)
21072 mode = CCGCmode, code = GE;
21073 break;
21074 case 'l':
21075 if (con[1] == 0)
21076 mode = CCGCmode, code = LT;
21077 else if (con[1] == 'e' && con[2] == 0)
21078 mode = CCGCmode, code = LE;
21079 break;
21080 case 'o':
21081 if (con[1] == 0)
21082 mode = CCOmode, code = EQ;
21083 break;
21084 case 'p':
21085 if (con[1] == 0)
21086 mode = CCPmode, code = EQ;
21087 break;
21088 case 's':
21089 if (con[1] == 0)
21090 mode = CCSmode, code = EQ;
21091 break;
21092 case 'z':
21093 if (con[1] == 0)
21094 mode = CCZmode, code = EQ;
21095 break;
21096 }
21097 if (code == UNKNOWN)
21098 {
21099 error ("unknown %<asm%> flag output %qs", constraints[i]);
21100 continue;
21101 }
21102 if (invert)
21103 code = reverse_condition (code);
21104
21105 rtx dest = outputs[i];
21106 if (!saw_asm_flag)
21107 {
21108 /* This is the first asm flag output. Here we put the flags
21109 register in as the real output and adjust the condition to
21110 allow it. */
21111 constraints[i] = "=Bf";
21112 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
21113 saw_asm_flag = true;
21114 }
21115 else
21116 {
21117 /* We don't need the flags register as output twice. */
21118 constraints[i] = "=X";
21119 outputs[i] = gen_rtx_SCRATCH (SImode);
21120 }
21121
21122 rtx x = gen_rtx_REG (mode, FLAGS_REG);
21123 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
21124
21125 machine_mode dest_mode = GET_MODE (dest);
21126 if (!SCALAR_INT_MODE_P (dest_mode))
21127 {
21128 error ("invalid type for %<asm%> flag output");
21129 continue;
21130 }
21131
21132 if (dest_mode == QImode)
21133 emit_insn (gen_rtx_SET (dest, x));
21134 else
21135 {
21136 rtx reg = gen_reg_rtx (QImode);
21137 emit_insn (gen_rtx_SET (reg, x));
21138
21139 reg = convert_to_mode (dest_mode, reg, 1);
21140 emit_move_insn (dest, reg);
21141 }
21142 }
21143
21144 rtx_insn *seq = get_insns ();
21145 end_sequence ();
21146
21147 if (saw_asm_flag)
21148 return seq;
21149 else
21150 {
21151 /* If we had no asm flag outputs, clobber the flags. */
21152 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
21153 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
21154 return NULL;
21155 }
21156 }
21157
21158 /* Implements target vector targetm.asm.encode_section_info. */
21159
21160 static void ATTRIBUTE_UNUSED
ix86_encode_section_info(tree decl,rtx rtl,int first)21161 ix86_encode_section_info (tree decl, rtx rtl, int first)
21162 {
21163 default_encode_section_info (decl, rtl, first);
21164
21165 if (ix86_in_large_data_p (decl))
21166 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21167 }
21168
21169 /* Worker function for REVERSE_CONDITION. */
21170
21171 enum rtx_code
ix86_reverse_condition(enum rtx_code code,machine_mode mode)21172 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
21173 {
21174 return (mode == CCFPmode
21175 ? reverse_condition_maybe_unordered (code)
21176 : reverse_condition (code));
21177 }
21178
21179 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21180 to OPERANDS[0]. */
21181
21182 const char *
output_387_reg_move(rtx_insn * insn,rtx * operands)21183 output_387_reg_move (rtx_insn *insn, rtx *operands)
21184 {
21185 if (REG_P (operands[0]))
21186 {
21187 if (REG_P (operands[1])
21188 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21189 {
21190 if (REGNO (operands[0]) == FIRST_STACK_REG)
21191 return output_387_ffreep (operands, 0);
21192 return "fstp\t%y0";
21193 }
21194 if (STACK_TOP_P (operands[0]))
21195 return "fld%Z1\t%y1";
21196 return "fst\t%y0";
21197 }
21198 else if (MEM_P (operands[0]))
21199 {
21200 gcc_assert (REG_P (operands[1]));
21201 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21202 return "fstp%Z0\t%y0";
21203 else
21204 {
21205 /* There is no non-popping store to memory for XFmode.
21206 So if we need one, follow the store with a load. */
21207 if (GET_MODE (operands[0]) == XFmode)
21208 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
21209 else
21210 return "fst%Z0\t%y0";
21211 }
21212 }
21213 else
21214 gcc_unreachable();
21215 }
21216 #ifdef TARGET_SOLARIS
21217 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21218
21219 static void
i386_solaris_elf_named_section(const char * name,unsigned int flags,tree decl)21220 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21221 tree decl)
21222 {
21223 /* With Binutils 2.15, the "@unwind" marker must be specified on
21224 every occurrence of the ".eh_frame" section, not just the first
21225 one. */
21226 if (TARGET_64BIT
21227 && strcmp (name, ".eh_frame") == 0)
21228 {
21229 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21230 flags & SECTION_WRITE ? "aw" : "a");
21231 return;
21232 }
21233
21234 #ifndef USE_GAS
21235 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
21236 {
21237 solaris_elf_asm_comdat_section (name, flags, decl);
21238 return;
21239 }
21240
21241 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
21242 SPARC assembler. One cannot mix single-letter flags and #exclude, so
21243 only emit the latter here. */
21244 if (flags & SECTION_EXCLUDE)
21245 {
21246 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
21247 return;
21248 }
21249 #endif
21250
21251 default_elf_asm_named_section (name, flags, decl);
21252 }
21253 #endif /* TARGET_SOLARIS */
21254
21255 /* Return the mangling of TYPE if it is an extended fundamental type. */
21256
21257 static const char *
ix86_mangle_type(const_tree type)21258 ix86_mangle_type (const_tree type)
21259 {
21260 type = TYPE_MAIN_VARIANT (type);
21261
21262 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
21263 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
21264 return NULL;
21265
21266 switch (TYPE_MODE (type))
21267 {
21268 case E_TFmode:
21269 /* __float128 is "g". */
21270 return "g";
21271 case E_XFmode:
21272 /* "long double" or __float80 is "e". */
21273 return "e";
21274 default:
21275 return NULL;
21276 }
21277 }
21278
21279 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
21280
21281 static tree
ix86_stack_protect_guard(void)21282 ix86_stack_protect_guard (void)
21283 {
21284 if (TARGET_SSP_TLS_GUARD)
21285 {
21286 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
21287 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
21288 tree type = build_qualified_type (type_node, qual);
21289 tree t;
21290
21291 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
21292 {
21293 t = ix86_tls_stack_chk_guard_decl;
21294
21295 if (t == NULL)
21296 {
21297 rtx x;
21298
21299 t = build_decl
21300 (UNKNOWN_LOCATION, VAR_DECL,
21301 get_identifier (ix86_stack_protector_guard_symbol_str),
21302 type);
21303 TREE_STATIC (t) = 1;
21304 TREE_PUBLIC (t) = 1;
21305 DECL_EXTERNAL (t) = 1;
21306 TREE_USED (t) = 1;
21307 TREE_THIS_VOLATILE (t) = 1;
21308 DECL_ARTIFICIAL (t) = 1;
21309 DECL_IGNORED_P (t) = 1;
21310
21311 /* Do not share RTL as the declaration is visible outside of
21312 current function. */
21313 x = DECL_RTL (t);
21314 RTX_FLAG (x, used) = 1;
21315
21316 ix86_tls_stack_chk_guard_decl = t;
21317 }
21318 }
21319 else
21320 {
21321 tree asptrtype = build_pointer_type (type);
21322
21323 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
21324 t = build2 (MEM_REF, asptrtype, t,
21325 build_int_cst (asptrtype, 0));
21326 TREE_THIS_VOLATILE (t) = 1;
21327 }
21328
21329 return t;
21330 }
21331
21332 return default_stack_protect_guard ();
21333 }
21334
21335 /* For 32-bit code we can save PIC register setup by using
21336 __stack_chk_fail_local hidden function instead of calling
21337 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21338 register, so it is better to call __stack_chk_fail directly. */
21339
21340 static tree ATTRIBUTE_UNUSED
ix86_stack_protect_fail(void)21341 ix86_stack_protect_fail (void)
21342 {
21343 return TARGET_64BIT
21344 ? default_external_stack_protect_fail ()
21345 : default_hidden_stack_protect_fail ();
21346 }
21347
21348 /* Select a format to encode pointers in exception handling data. CODE
21349 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21350 true if the symbol may be affected by dynamic relocations.
21351
21352 ??? All x86 object file formats are capable of representing this.
21353 After all, the relocation needed is the same as for the call insn.
21354 Whether or not a particular assembler allows us to enter such, I
21355 guess we'll have to see. */
21356
21357 int
asm_preferred_eh_data_format(int code,int global)21358 asm_preferred_eh_data_format (int code, int global)
21359 {
21360 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
21361 if (flag_pic || TARGET_PECOFF)
21362 {
21363 int type = DW_EH_PE_sdata8;
21364 if (!TARGET_64BIT
21365 || ix86_cmodel == CM_SMALL_PIC
21366 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21367 type = DW_EH_PE_sdata4;
21368 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21369 }
21370
21371 if (ix86_cmodel == CM_SMALL
21372 || (ix86_cmodel == CM_MEDIUM && code))
21373 return DW_EH_PE_udata4;
21374
21375 return DW_EH_PE_absptr;
21376 }
21377
21378 /* Implement targetm.vectorize.builtin_vectorization_cost. */
21379 static int
ix86_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int)21380 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
21381 tree vectype, int)
21382 {
21383 bool fp = false;
21384 machine_mode mode = TImode;
21385 int index;
21386 if (vectype != NULL)
21387 {
21388 fp = FLOAT_TYPE_P (vectype);
21389 mode = TYPE_MODE (vectype);
21390 }
21391
21392 switch (type_of_cost)
21393 {
21394 case scalar_stmt:
21395 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
21396
21397 case scalar_load:
21398 /* load/store costs are relative to register move which is 2. Recompute
21399 it to COSTS_N_INSNS so everything have same base. */
21400 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
21401 : ix86_cost->int_load [2]) / 2;
21402
21403 case scalar_store:
21404 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
21405 : ix86_cost->int_store [2]) / 2;
21406
21407 case vector_stmt:
21408 return ix86_vec_cost (mode,
21409 fp ? ix86_cost->addss : ix86_cost->sse_op);
21410
21411 case vector_load:
21412 index = sse_store_index (mode);
21413 /* See PR82713 - we may end up being called on non-vector type. */
21414 if (index < 0)
21415 index = 2;
21416 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
21417
21418 case vector_store:
21419 index = sse_store_index (mode);
21420 /* See PR82713 - we may end up being called on non-vector type. */
21421 if (index < 0)
21422 index = 2;
21423 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
21424
21425 case vec_to_scalar:
21426 case scalar_to_vec:
21427 return ix86_vec_cost (mode, ix86_cost->sse_op);
21428
21429 /* We should have separate costs for unaligned loads and gather/scatter.
21430 Do that incrementally. */
21431 case unaligned_load:
21432 index = sse_store_index (mode);
21433 /* See PR82713 - we may end up being called on non-vector type. */
21434 if (index < 0)
21435 index = 2;
21436 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
21437
21438 case unaligned_store:
21439 index = sse_store_index (mode);
21440 /* See PR82713 - we may end up being called on non-vector type. */
21441 if (index < 0)
21442 index = 2;
21443 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
21444
21445 case vector_gather_load:
21446 return ix86_vec_cost (mode,
21447 COSTS_N_INSNS
21448 (ix86_cost->gather_static
21449 + ix86_cost->gather_per_elt
21450 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21451
21452 case vector_scatter_store:
21453 return ix86_vec_cost (mode,
21454 COSTS_N_INSNS
21455 (ix86_cost->scatter_static
21456 + ix86_cost->scatter_per_elt
21457 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21458
21459 case cond_branch_taken:
21460 return ix86_cost->cond_taken_branch_cost;
21461
21462 case cond_branch_not_taken:
21463 return ix86_cost->cond_not_taken_branch_cost;
21464
21465 case vec_perm:
21466 case vec_promote_demote:
21467 return ix86_vec_cost (mode, ix86_cost->sse_op);
21468
21469 case vec_construct:
21470 {
21471 /* N element inserts into SSE vectors. */
21472 int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
21473 /* One vinserti128 for combining two SSE vectors for AVX256. */
21474 if (GET_MODE_BITSIZE (mode) == 256)
21475 cost += ix86_vec_cost (mode, ix86_cost->addss);
21476 /* One vinserti64x4 and two vinserti128 for combining SSE
21477 and AVX256 vectors to AVX512. */
21478 else if (GET_MODE_BITSIZE (mode) == 512)
21479 cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
21480 return cost;
21481 }
21482
21483 default:
21484 gcc_unreachable ();
21485 }
21486 }
21487
21488
21489 /* This function returns the calling abi specific va_list type node.
21490 It returns the FNDECL specific va_list type. */
21491
21492 static tree
ix86_fn_abi_va_list(tree fndecl)21493 ix86_fn_abi_va_list (tree fndecl)
21494 {
21495 if (!TARGET_64BIT)
21496 return va_list_type_node;
21497 gcc_assert (fndecl != NULL_TREE);
21498
21499 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
21500 return ms_va_list_type_node;
21501 else
21502 return sysv_va_list_type_node;
21503 }
21504
21505 /* Returns the canonical va_list type specified by TYPE. If there
21506 is no valid TYPE provided, it return NULL_TREE. */
21507
21508 static tree
ix86_canonical_va_list_type(tree type)21509 ix86_canonical_va_list_type (tree type)
21510 {
21511 if (TARGET_64BIT)
21512 {
21513 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
21514 return ms_va_list_type_node;
21515
21516 if ((TREE_CODE (type) == ARRAY_TYPE
21517 && integer_zerop (array_type_nelts (type)))
21518 || POINTER_TYPE_P (type))
21519 {
21520 tree elem_type = TREE_TYPE (type);
21521 if (TREE_CODE (elem_type) == RECORD_TYPE
21522 && lookup_attribute ("sysv_abi va_list",
21523 TYPE_ATTRIBUTES (elem_type)))
21524 return sysv_va_list_type_node;
21525 }
21526
21527 return NULL_TREE;
21528 }
21529
21530 return std_canonical_va_list_type (type);
21531 }
21532
21533 /* Iterate through the target-specific builtin types for va_list.
21534 IDX denotes the iterator, *PTREE is set to the result type of
21535 the va_list builtin, and *PNAME to its internal type.
21536 Returns zero if there is no element for this index, otherwise
21537 IDX should be increased upon the next call.
21538 Note, do not iterate a base builtin's name like __builtin_va_list.
21539 Used from c_common_nodes_and_builtins. */
21540
21541 static int
ix86_enum_va_list(int idx,const char ** pname,tree * ptree)21542 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
21543 {
21544 if (TARGET_64BIT)
21545 {
21546 switch (idx)
21547 {
21548 default:
21549 break;
21550
21551 case 0:
21552 *ptree = ms_va_list_type_node;
21553 *pname = "__builtin_ms_va_list";
21554 return 1;
21555
21556 case 1:
21557 *ptree = sysv_va_list_type_node;
21558 *pname = "__builtin_sysv_va_list";
21559 return 1;
21560 }
21561 }
21562
21563 return 0;
21564 }
21565
21566 #undef TARGET_SCHED_DISPATCH
21567 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21568 #undef TARGET_SCHED_DISPATCH_DO
21569 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21570 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21571 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21572 #undef TARGET_SCHED_REORDER
21573 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21574 #undef TARGET_SCHED_ADJUST_PRIORITY
21575 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21576 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21577 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21578 ix86_dependencies_evaluation_hook
21579
21580
21581 /* Implementation of reassociation_width target hook used by
21582 reassoc phase to identify parallelism level in reassociated
21583 tree. Statements tree_code is passed in OPC. Arguments type
21584 is passed in MODE. */
21585
21586 static int
ix86_reassociation_width(unsigned int op,machine_mode mode)21587 ix86_reassociation_width (unsigned int op, machine_mode mode)
21588 {
21589 int width = 1;
21590 /* Vector part. */
21591 if (VECTOR_MODE_P (mode))
21592 {
21593 int div = 1;
21594 if (INTEGRAL_MODE_P (mode))
21595 width = ix86_cost->reassoc_vec_int;
21596 else if (FLOAT_MODE_P (mode))
21597 width = ix86_cost->reassoc_vec_fp;
21598
21599 if (width == 1)
21600 return 1;
21601
21602 /* Integer vector instructions execute in FP unit
21603 and can execute 3 additions and one multiplication per cycle. */
21604 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
21605 || ix86_tune == PROCESSOR_ZNVER3)
21606 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
21607 return 1;
21608
21609 /* Account for targets that splits wide vectors into multiple parts. */
21610 if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
21611 div = GET_MODE_BITSIZE (mode) / 128;
21612 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
21613 div = GET_MODE_BITSIZE (mode) / 64;
21614 width = (width + div - 1) / div;
21615 }
21616 /* Scalar part. */
21617 else if (INTEGRAL_MODE_P (mode))
21618 width = ix86_cost->reassoc_int;
21619 else if (FLOAT_MODE_P (mode))
21620 width = ix86_cost->reassoc_fp;
21621
21622 /* Avoid using too many registers in 32bit mode. */
21623 if (!TARGET_64BIT && width > 2)
21624 width = 2;
21625 return width;
21626 }
21627
21628 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
21629 place emms and femms instructions. */
21630
21631 static machine_mode
ix86_preferred_simd_mode(scalar_mode mode)21632 ix86_preferred_simd_mode (scalar_mode mode)
21633 {
21634 if (!TARGET_SSE)
21635 return word_mode;
21636
21637 switch (mode)
21638 {
21639 case E_QImode:
21640 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21641 return V64QImode;
21642 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21643 return V32QImode;
21644 else
21645 return V16QImode;
21646
21647 case E_HImode:
21648 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21649 return V32HImode;
21650 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21651 return V16HImode;
21652 else
21653 return V8HImode;
21654
21655 case E_SImode:
21656 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21657 return V16SImode;
21658 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21659 return V8SImode;
21660 else
21661 return V4SImode;
21662
21663 case E_DImode:
21664 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21665 return V8DImode;
21666 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21667 return V4DImode;
21668 else
21669 return V2DImode;
21670
21671 case E_SFmode:
21672 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21673 return V16SFmode;
21674 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21675 return V8SFmode;
21676 else
21677 return V4SFmode;
21678
21679 case E_DFmode:
21680 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21681 return V8DFmode;
21682 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21683 return V4DFmode;
21684 else if (TARGET_SSE2)
21685 return V2DFmode;
21686 /* FALLTHRU */
21687
21688 default:
21689 return word_mode;
21690 }
21691 }
21692
21693 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
21694 vectors. If AVX512F is enabled then try vectorizing with 512bit,
21695 256bit and 128bit vectors. */
21696
21697 static unsigned int
ix86_autovectorize_vector_modes(vector_modes * modes,bool all)21698 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
21699 {
21700 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21701 {
21702 modes->safe_push (V64QImode);
21703 modes->safe_push (V32QImode);
21704 modes->safe_push (V16QImode);
21705 }
21706 else if (TARGET_AVX512F && all)
21707 {
21708 modes->safe_push (V32QImode);
21709 modes->safe_push (V16QImode);
21710 modes->safe_push (V64QImode);
21711 }
21712 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21713 {
21714 modes->safe_push (V32QImode);
21715 modes->safe_push (V16QImode);
21716 }
21717 else if (TARGET_AVX && all)
21718 {
21719 modes->safe_push (V16QImode);
21720 modes->safe_push (V32QImode);
21721 }
21722 else if (TARGET_MMX_WITH_SSE)
21723 modes->safe_push (V16QImode);
21724
21725 if (TARGET_MMX_WITH_SSE)
21726 modes->safe_push (V8QImode);
21727
21728 return 0;
21729 }
21730
21731 /* Implemenation of targetm.vectorize.get_mask_mode. */
21732
21733 static opt_machine_mode
ix86_get_mask_mode(machine_mode data_mode)21734 ix86_get_mask_mode (machine_mode data_mode)
21735 {
21736 unsigned vector_size = GET_MODE_SIZE (data_mode);
21737 unsigned nunits = GET_MODE_NUNITS (data_mode);
21738 unsigned elem_size = vector_size / nunits;
21739
21740 /* Scalar mask case. */
21741 if ((TARGET_AVX512F && vector_size == 64)
21742 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
21743 {
21744 if (elem_size == 4
21745 || elem_size == 8
21746 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
21747 return smallest_int_mode_for_size (nunits);
21748 }
21749
21750 scalar_int_mode elem_mode
21751 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
21752
21753 gcc_assert (elem_size * nunits == vector_size);
21754
21755 return mode_for_vector (elem_mode, nunits);
21756 }
21757
21758
21759
21760 /* Return class of registers which could be used for pseudo of MODE
21761 and of class RCLASS for spilling instead of memory. Return NO_REGS
21762 if it is not possible or non-profitable. */
21763
21764 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
21765
21766 static reg_class_t
ix86_spill_class(reg_class_t rclass,machine_mode mode)21767 ix86_spill_class (reg_class_t rclass, machine_mode mode)
21768 {
21769 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
21770 && TARGET_SSE2
21771 && TARGET_INTER_UNIT_MOVES_TO_VEC
21772 && TARGET_INTER_UNIT_MOVES_FROM_VEC
21773 && (mode == SImode || (TARGET_64BIT && mode == DImode))
21774 && INTEGER_CLASS_P (rclass))
21775 return ALL_SSE_REGS;
21776 return NO_REGS;
21777 }
21778
21779 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
21780 but returns a lower bound. */
21781
21782 static unsigned int
ix86_max_noce_ifcvt_seq_cost(edge e)21783 ix86_max_noce_ifcvt_seq_cost (edge e)
21784 {
21785 bool predictable_p = predictable_edge_p (e);
21786 if (predictable_p)
21787 {
21788 if (global_options_set.x_param_max_rtl_if_conversion_predictable_cost)
21789 return param_max_rtl_if_conversion_predictable_cost;
21790 }
21791 else
21792 {
21793 if (global_options_set.x_param_max_rtl_if_conversion_unpredictable_cost)
21794 return param_max_rtl_if_conversion_unpredictable_cost;
21795 }
21796
21797 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
21798 }
21799
21800 /* Return true if SEQ is a good candidate as a replacement for the
21801 if-convertible sequence described in IF_INFO. */
21802
21803 static bool
ix86_noce_conversion_profitable_p(rtx_insn * seq,struct noce_if_info * if_info)21804 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
21805 {
21806 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
21807 {
21808 int cmov_cnt = 0;
21809 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
21810 Maybe we should allow even more conditional moves as long as they
21811 are used far enough not to stall the CPU, or also consider
21812 IF_INFO->TEST_BB succ edge probabilities. */
21813 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
21814 {
21815 rtx set = single_set (insn);
21816 if (!set)
21817 continue;
21818 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
21819 continue;
21820 rtx src = SET_SRC (set);
21821 machine_mode mode = GET_MODE (src);
21822 if (GET_MODE_CLASS (mode) != MODE_INT
21823 && GET_MODE_CLASS (mode) != MODE_FLOAT)
21824 continue;
21825 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
21826 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
21827 continue;
21828 /* insn is CMOV or FCMOV. */
21829 if (++cmov_cnt > 1)
21830 return false;
21831 }
21832 }
21833 return default_noce_conversion_profitable_p (seq, if_info);
21834 }
21835
21836 /* Implement targetm.vectorize.init_cost. */
21837
21838 static void *
ix86_init_cost(class loop *)21839 ix86_init_cost (class loop *)
21840 {
21841 unsigned *cost = XNEWVEC (unsigned, 3);
21842 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
21843 return cost;
21844 }
21845
21846 /* Implement targetm.vectorize.add_stmt_cost. */
21847
21848 static unsigned
ix86_add_stmt_cost(void * data,int count,enum vect_cost_for_stmt kind,class _stmt_vec_info * stmt_info,int misalign,enum vect_cost_model_location where)21849 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
21850 class _stmt_vec_info *stmt_info, int misalign,
21851 enum vect_cost_model_location where)
21852 {
21853 unsigned *cost = (unsigned *) data;
21854 unsigned retval = 0;
21855 bool scalar_p
21856 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
21857
21858 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
21859 int stmt_cost = - 1;
21860
21861 bool fp = false;
21862 machine_mode mode = scalar_p ? SImode : TImode;
21863
21864 if (vectype != NULL)
21865 {
21866 fp = FLOAT_TYPE_P (vectype);
21867 mode = TYPE_MODE (vectype);
21868 if (scalar_p)
21869 mode = TYPE_MODE (TREE_TYPE (vectype));
21870 }
21871
21872 if ((kind == vector_stmt || kind == scalar_stmt)
21873 && stmt_info
21874 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
21875 {
21876 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
21877 /*machine_mode inner_mode = mode;
21878 if (VECTOR_MODE_P (mode))
21879 inner_mode = GET_MODE_INNER (mode);*/
21880
21881 switch (subcode)
21882 {
21883 case PLUS_EXPR:
21884 case POINTER_PLUS_EXPR:
21885 case MINUS_EXPR:
21886 if (kind == scalar_stmt)
21887 {
21888 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21889 stmt_cost = ix86_cost->addss;
21890 else if (X87_FLOAT_MODE_P (mode))
21891 stmt_cost = ix86_cost->fadd;
21892 else
21893 stmt_cost = ix86_cost->add;
21894 }
21895 else
21896 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
21897 : ix86_cost->sse_op);
21898 break;
21899
21900 case MULT_EXPR:
21901 case WIDEN_MULT_EXPR:
21902 case MULT_HIGHPART_EXPR:
21903 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
21904 break;
21905 case NEGATE_EXPR:
21906 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21907 stmt_cost = ix86_cost->sse_op;
21908 else if (X87_FLOAT_MODE_P (mode))
21909 stmt_cost = ix86_cost->fchs;
21910 else if (VECTOR_MODE_P (mode))
21911 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21912 else
21913 stmt_cost = ix86_cost->add;
21914 break;
21915 case TRUNC_DIV_EXPR:
21916 case CEIL_DIV_EXPR:
21917 case FLOOR_DIV_EXPR:
21918 case ROUND_DIV_EXPR:
21919 case TRUNC_MOD_EXPR:
21920 case CEIL_MOD_EXPR:
21921 case FLOOR_MOD_EXPR:
21922 case RDIV_EXPR:
21923 case ROUND_MOD_EXPR:
21924 case EXACT_DIV_EXPR:
21925 stmt_cost = ix86_division_cost (ix86_cost, mode);
21926 break;
21927
21928 case RSHIFT_EXPR:
21929 case LSHIFT_EXPR:
21930 case LROTATE_EXPR:
21931 case RROTATE_EXPR:
21932 {
21933 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
21934 stmt_cost = ix86_shift_rotate_cost
21935 (ix86_cost, mode,
21936 TREE_CODE (op2) == INTEGER_CST,
21937 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
21938 true, false, false, NULL, NULL);
21939 }
21940 break;
21941 case NOP_EXPR:
21942 /* Only sign-conversions are free. */
21943 if (tree_nop_conversion_p
21944 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
21945 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
21946 stmt_cost = 0;
21947 break;
21948
21949 case BIT_IOR_EXPR:
21950 case ABS_EXPR:
21951 case ABSU_EXPR:
21952 case MIN_EXPR:
21953 case MAX_EXPR:
21954 case BIT_XOR_EXPR:
21955 case BIT_AND_EXPR:
21956 case BIT_NOT_EXPR:
21957 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21958 stmt_cost = ix86_cost->sse_op;
21959 else if (VECTOR_MODE_P (mode))
21960 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21961 else
21962 stmt_cost = ix86_cost->add;
21963 break;
21964 default:
21965 break;
21966 }
21967 }
21968
21969 combined_fn cfn;
21970 if ((kind == vector_stmt || kind == scalar_stmt)
21971 && stmt_info
21972 && stmt_info->stmt
21973 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
21974 switch (cfn)
21975 {
21976 case CFN_FMA:
21977 stmt_cost = ix86_vec_cost (mode,
21978 mode == SFmode ? ix86_cost->fmass
21979 : ix86_cost->fmasd);
21980 break;
21981 default:
21982 break;
21983 }
21984
21985 /* If we do elementwise loads into a vector then we are bound by
21986 latency and execution resources for the many scalar loads
21987 (AGU and load ports). Try to account for this by scaling the
21988 construction cost by the number of elements involved. */
21989 if ((kind == vec_construct || kind == vec_to_scalar)
21990 && stmt_info
21991 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
21992 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
21993 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
21994 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
21995 {
21996 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
21997 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
21998 }
21999 if (stmt_cost == -1)
22000 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
22001
22002 /* Penalize DFmode vector operations for Bonnell. */
22003 if (TARGET_BONNELL && kind == vector_stmt
22004 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
22005 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
22006
22007 /* Statements in an inner loop relative to the loop being
22008 vectorized are weighted more heavily. The value here is
22009 arbitrary and could potentially be improved with analysis. */
22010 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
22011 count *= 50; /* FIXME. */
22012
22013 retval = (unsigned) (count * stmt_cost);
22014
22015 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
22016 for Silvermont as it has out of order integer pipeline and can execute
22017 2 scalar instruction per tick, but has in order SIMD pipeline. */
22018 if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
22019 || TARGET_TREMONT || TARGET_INTEL) && stmt_info && stmt_info->stmt)
22020 {
22021 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
22022 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
22023 retval = (retval * 17) / 10;
22024 }
22025
22026 cost[where] += retval;
22027
22028 return retval;
22029 }
22030
22031 /* Implement targetm.vectorize.finish_cost. */
22032
22033 static void
ix86_finish_cost(void * data,unsigned * prologue_cost,unsigned * body_cost,unsigned * epilogue_cost)22034 ix86_finish_cost (void *data, unsigned *prologue_cost,
22035 unsigned *body_cost, unsigned *epilogue_cost)
22036 {
22037 unsigned *cost = (unsigned *) data;
22038 *prologue_cost = cost[vect_prologue];
22039 *body_cost = cost[vect_body];
22040 *epilogue_cost = cost[vect_epilogue];
22041 }
22042
22043 /* Implement targetm.vectorize.destroy_cost_data. */
22044
22045 static void
ix86_destroy_cost_data(void * data)22046 ix86_destroy_cost_data (void *data)
22047 {
22048 free (data);
22049 }
22050
22051 /* Validate target specific memory model bits in VAL. */
22052
22053 static unsigned HOST_WIDE_INT
ix86_memmodel_check(unsigned HOST_WIDE_INT val)22054 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
22055 {
22056 enum memmodel model = memmodel_from_int (val);
22057 bool strong;
22058
22059 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
22060 |MEMMODEL_MASK)
22061 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
22062 {
22063 warning (OPT_Winvalid_memory_model,
22064 "unknown architecture specific memory model");
22065 return MEMMODEL_SEQ_CST;
22066 }
22067 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
22068 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
22069 {
22070 warning (OPT_Winvalid_memory_model,
22071 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
22072 "memory model");
22073 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
22074 }
22075 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
22076 {
22077 warning (OPT_Winvalid_memory_model,
22078 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
22079 "memory model");
22080 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
22081 }
22082 return val;
22083 }
22084
22085 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
22086 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
22087 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
22088 or number of vecsize_mangle variants that should be emitted. */
22089
22090 static int
ix86_simd_clone_compute_vecsize_and_simdlen(struct cgraph_node * node,struct cgraph_simd_clone * clonei,tree base_type,int num)22091 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
22092 struct cgraph_simd_clone *clonei,
22093 tree base_type, int num)
22094 {
22095 int ret = 1;
22096
22097 if (clonei->simdlen
22098 && (clonei->simdlen < 2
22099 || clonei->simdlen > 1024
22100 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
22101 {
22102 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22103 "unsupported simdlen %d", clonei->simdlen);
22104 return 0;
22105 }
22106
22107 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
22108 if (TREE_CODE (ret_type) != VOID_TYPE)
22109 switch (TYPE_MODE (ret_type))
22110 {
22111 case E_QImode:
22112 case E_HImode:
22113 case E_SImode:
22114 case E_DImode:
22115 case E_SFmode:
22116 case E_DFmode:
22117 /* case E_SCmode: */
22118 /* case E_DCmode: */
22119 if (!AGGREGATE_TYPE_P (ret_type))
22120 break;
22121 /* FALLTHRU */
22122 default:
22123 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22124 "unsupported return type %qT for simd", ret_type);
22125 return 0;
22126 }
22127
22128 tree t;
22129 int i;
22130 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
22131 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
22132
22133 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
22134 t && t != void_list_node; t = TREE_CHAIN (t), i++)
22135 {
22136 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
22137 switch (TYPE_MODE (arg_type))
22138 {
22139 case E_QImode:
22140 case E_HImode:
22141 case E_SImode:
22142 case E_DImode:
22143 case E_SFmode:
22144 case E_DFmode:
22145 /* case E_SCmode: */
22146 /* case E_DCmode: */
22147 if (!AGGREGATE_TYPE_P (arg_type))
22148 break;
22149 /* FALLTHRU */
22150 default:
22151 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
22152 break;
22153 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22154 "unsupported argument type %qT for simd", arg_type);
22155 return 0;
22156 }
22157 }
22158
22159 if (!TREE_PUBLIC (node->decl))
22160 {
22161 /* If the function isn't exported, we can pick up just one ISA
22162 for the clones. */
22163 if (TARGET_AVX512F)
22164 clonei->vecsize_mangle = 'e';
22165 else if (TARGET_AVX2)
22166 clonei->vecsize_mangle = 'd';
22167 else if (TARGET_AVX)
22168 clonei->vecsize_mangle = 'c';
22169 else
22170 clonei->vecsize_mangle = 'b';
22171 ret = 1;
22172 }
22173 else
22174 {
22175 clonei->vecsize_mangle = "bcde"[num];
22176 ret = 4;
22177 }
22178 clonei->mask_mode = VOIDmode;
22179 switch (clonei->vecsize_mangle)
22180 {
22181 case 'b':
22182 clonei->vecsize_int = 128;
22183 clonei->vecsize_float = 128;
22184 break;
22185 case 'c':
22186 clonei->vecsize_int = 128;
22187 clonei->vecsize_float = 256;
22188 break;
22189 case 'd':
22190 clonei->vecsize_int = 256;
22191 clonei->vecsize_float = 256;
22192 break;
22193 case 'e':
22194 clonei->vecsize_int = 512;
22195 clonei->vecsize_float = 512;
22196 if (TYPE_MODE (base_type) == QImode)
22197 clonei->mask_mode = DImode;
22198 else
22199 clonei->mask_mode = SImode;
22200 break;
22201 }
22202 if (clonei->simdlen == 0)
22203 {
22204 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
22205 clonei->simdlen = clonei->vecsize_int;
22206 else
22207 clonei->simdlen = clonei->vecsize_float;
22208 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
22209 }
22210 else if (clonei->simdlen > 16)
22211 {
22212 /* For compatibility with ICC, use the same upper bounds
22213 for simdlen. In particular, for CTYPE below, use the return type,
22214 unless the function returns void, in that case use the characteristic
22215 type. If it is possible for given SIMDLEN to pass CTYPE value
22216 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
22217 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
22218 emit corresponding clone. */
22219 tree ctype = ret_type;
22220 if (TREE_CODE (ret_type) == VOID_TYPE)
22221 ctype = base_type;
22222 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
22223 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
22224 cnt /= clonei->vecsize_int;
22225 else
22226 cnt /= clonei->vecsize_float;
22227 if (cnt > (TARGET_64BIT ? 16 : 8))
22228 {
22229 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22230 "unsupported simdlen %d", clonei->simdlen);
22231 return 0;
22232 }
22233 }
22234 return ret;
22235 }
22236
22237 /* If SIMD clone NODE can't be used in a vectorized loop
22238 in current function, return -1, otherwise return a badness of using it
22239 (0 if it is most desirable from vecsize_mangle point of view, 1
22240 slightly less desirable, etc.). */
22241
22242 static int
ix86_simd_clone_usable(struct cgraph_node * node)22243 ix86_simd_clone_usable (struct cgraph_node *node)
22244 {
22245 switch (node->simdclone->vecsize_mangle)
22246 {
22247 case 'b':
22248 if (!TARGET_SSE2)
22249 return -1;
22250 if (!TARGET_AVX)
22251 return 0;
22252 return TARGET_AVX2 ? 2 : 1;
22253 case 'c':
22254 if (!TARGET_AVX)
22255 return -1;
22256 return TARGET_AVX2 ? 1 : 0;
22257 case 'd':
22258 if (!TARGET_AVX2)
22259 return -1;
22260 return 0;
22261 case 'e':
22262 if (!TARGET_AVX512F)
22263 return -1;
22264 return 0;
22265 default:
22266 gcc_unreachable ();
22267 }
22268 }
22269
22270 /* This function adjusts the unroll factor based on
22271 the hardware capabilities. For ex, bdver3 has
22272 a loop buffer which makes unrolling of smaller
22273 loops less important. This function decides the
22274 unroll factor using number of memory references
22275 (value 32 is used) as a heuristic. */
22276
22277 static unsigned
ix86_loop_unroll_adjust(unsigned nunroll,class loop * loop)22278 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
22279 {
22280 basic_block *bbs;
22281 rtx_insn *insn;
22282 unsigned i;
22283 unsigned mem_count = 0;
22284
22285 if (!TARGET_ADJUST_UNROLL)
22286 return nunroll;
22287
22288 /* Count the number of memory references within the loop body.
22289 This value determines the unrolling factor for bdver3 and bdver4
22290 architectures. */
22291 subrtx_iterator::array_type array;
22292 bbs = get_loop_body (loop);
22293 for (i = 0; i < loop->num_nodes; i++)
22294 FOR_BB_INSNS (bbs[i], insn)
22295 if (NONDEBUG_INSN_P (insn))
22296 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
22297 if (const_rtx x = *iter)
22298 if (MEM_P (x))
22299 {
22300 machine_mode mode = GET_MODE (x);
22301 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22302 if (n_words > 4)
22303 mem_count += 2;
22304 else
22305 mem_count += 1;
22306 }
22307 free (bbs);
22308
22309 if (mem_count && mem_count <=32)
22310 return MIN (nunroll, 32 / mem_count);
22311
22312 return nunroll;
22313 }
22314
22315
22316 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
22317
22318 static bool
ix86_float_exceptions_rounding_supported_p(void)22319 ix86_float_exceptions_rounding_supported_p (void)
22320 {
22321 /* For x87 floating point with standard excess precision handling,
22322 there is no adddf3 pattern (since x87 floating point only has
22323 XFmode operations) so the default hook implementation gets this
22324 wrong. */
22325 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
22326 }
22327
22328 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
22329
22330 static void
ix86_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)22331 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
22332 {
22333 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
22334 return;
22335 tree exceptions_var = create_tmp_var_raw (integer_type_node);
22336 if (TARGET_80387)
22337 {
22338 tree fenv_index_type = build_index_type (size_int (6));
22339 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
22340 tree fenv_var = create_tmp_var_raw (fenv_type);
22341 TREE_ADDRESSABLE (fenv_var) = 1;
22342 tree fenv_ptr = build_pointer_type (fenv_type);
22343 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
22344 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
22345 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
22346 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
22347 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
22348 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
22349 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
22350 tree hold_fnclex = build_call_expr (fnclex, 0);
22351 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
22352 NULL_TREE, NULL_TREE);
22353 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
22354 hold_fnclex);
22355 *clear = build_call_expr (fnclex, 0);
22356 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
22357 tree fnstsw_call = build_call_expr (fnstsw, 0);
22358 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
22359 fnstsw_call, NULL_TREE, NULL_TREE);
22360 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
22361 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
22362 exceptions_var, exceptions_x87,
22363 NULL_TREE, NULL_TREE);
22364 *update = build2 (COMPOUND_EXPR, integer_type_node,
22365 sw_mod, update_mod);
22366 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
22367 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
22368 }
22369 if (TARGET_SSE && TARGET_SSE_MATH)
22370 {
22371 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
22372 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
22373 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
22374 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
22375 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
22376 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
22377 mxcsr_orig_var, stmxcsr_hold_call,
22378 NULL_TREE, NULL_TREE);
22379 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
22380 mxcsr_orig_var,
22381 build_int_cst (unsigned_type_node, 0x1f80));
22382 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
22383 build_int_cst (unsigned_type_node, 0xffffffc0));
22384 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
22385 mxcsr_mod_var, hold_mod_val,
22386 NULL_TREE, NULL_TREE);
22387 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22388 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
22389 hold_assign_orig, hold_assign_mod);
22390 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
22391 ldmxcsr_hold_call);
22392 if (*hold)
22393 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
22394 else
22395 *hold = hold_all;
22396 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22397 if (*clear)
22398 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
22399 ldmxcsr_clear_call);
22400 else
22401 *clear = ldmxcsr_clear_call;
22402 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
22403 tree exceptions_sse = fold_convert (integer_type_node,
22404 stxmcsr_update_call);
22405 if (*update)
22406 {
22407 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
22408 exceptions_var, exceptions_sse);
22409 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
22410 exceptions_var, exceptions_mod);
22411 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
22412 exceptions_assign);
22413 }
22414 else
22415 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
22416 exceptions_sse, NULL_TREE, NULL_TREE);
22417 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
22418 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22419 ldmxcsr_update_call);
22420 }
22421 tree atomic_feraiseexcept
22422 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
22423 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
22424 1, exceptions_var);
22425 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22426 atomic_feraiseexcept_call);
22427 }
22428
22429 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22430 /* For i386, common symbol is local only for non-PIE binaries. For
22431 x86-64, common symbol is local only for non-PIE binaries or linker
22432 supports copy reloc in PIE binaries. */
22433
22434 static bool
ix86_binds_local_p(const_tree exp)22435 ix86_binds_local_p (const_tree exp)
22436 {
22437 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
22438 (!flag_pic
22439 || (TARGET_64BIT
22440 && HAVE_LD_PIE_COPYRELOC != 0)));
22441 }
22442 #endif
22443
22444 /* If MEM is in the form of [base+offset], extract the two parts
22445 of address and set to BASE and OFFSET, otherwise return false. */
22446
22447 static bool
extract_base_offset_in_addr(rtx mem,rtx * base,rtx * offset)22448 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
22449 {
22450 rtx addr;
22451
22452 gcc_assert (MEM_P (mem));
22453
22454 addr = XEXP (mem, 0);
22455
22456 if (GET_CODE (addr) == CONST)
22457 addr = XEXP (addr, 0);
22458
22459 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
22460 {
22461 *base = addr;
22462 *offset = const0_rtx;
22463 return true;
22464 }
22465
22466 if (GET_CODE (addr) == PLUS
22467 && (REG_P (XEXP (addr, 0))
22468 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
22469 && CONST_INT_P (XEXP (addr, 1)))
22470 {
22471 *base = XEXP (addr, 0);
22472 *offset = XEXP (addr, 1);
22473 return true;
22474 }
22475
22476 return false;
22477 }
22478
22479 /* Given OPERANDS of consecutive load/store, check if we can merge
22480 them into move multiple. LOAD is true if they are load instructions.
22481 MODE is the mode of memory operands. */
22482
22483 bool
ix86_operands_ok_for_move_multiple(rtx * operands,bool load,machine_mode mode)22484 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
22485 machine_mode mode)
22486 {
22487 HOST_WIDE_INT offval_1, offval_2, msize;
22488 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
22489
22490 if (load)
22491 {
22492 mem_1 = operands[1];
22493 mem_2 = operands[3];
22494 reg_1 = operands[0];
22495 reg_2 = operands[2];
22496 }
22497 else
22498 {
22499 mem_1 = operands[0];
22500 mem_2 = operands[2];
22501 reg_1 = operands[1];
22502 reg_2 = operands[3];
22503 }
22504
22505 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
22506
22507 if (REGNO (reg_1) != REGNO (reg_2))
22508 return false;
22509
22510 /* Check if the addresses are in the form of [base+offset]. */
22511 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
22512 return false;
22513 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
22514 return false;
22515
22516 /* Check if the bases are the same. */
22517 if (!rtx_equal_p (base_1, base_2))
22518 return false;
22519
22520 offval_1 = INTVAL (offset_1);
22521 offval_2 = INTVAL (offset_2);
22522 msize = GET_MODE_SIZE (mode);
22523 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
22524 if (offval_1 + msize != offval_2)
22525 return false;
22526
22527 return true;
22528 }
22529
22530 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22531
22532 static bool
ix86_optab_supported_p(int op,machine_mode mode1,machine_mode,optimization_type opt_type)22533 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
22534 optimization_type opt_type)
22535 {
22536 switch (op)
22537 {
22538 case asin_optab:
22539 case acos_optab:
22540 case log1p_optab:
22541 case exp_optab:
22542 case exp10_optab:
22543 case exp2_optab:
22544 case expm1_optab:
22545 case ldexp_optab:
22546 case scalb_optab:
22547 case round_optab:
22548 return opt_type == OPTIMIZE_FOR_SPEED;
22549
22550 case rint_optab:
22551 if (SSE_FLOAT_MODE_P (mode1)
22552 && TARGET_SSE_MATH
22553 && !flag_trapping_math
22554 && !TARGET_SSE4_1)
22555 return opt_type == OPTIMIZE_FOR_SPEED;
22556 return true;
22557
22558 case floor_optab:
22559 case ceil_optab:
22560 case btrunc_optab:
22561 if (SSE_FLOAT_MODE_P (mode1)
22562 && TARGET_SSE_MATH
22563 && !flag_trapping_math
22564 && TARGET_SSE4_1)
22565 return true;
22566 return opt_type == OPTIMIZE_FOR_SPEED;
22567
22568 case rsqrt_optab:
22569 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
22570
22571 default:
22572 return true;
22573 }
22574 }
22575
22576 /* Address space support.
22577
22578 This is not "far pointers" in the 16-bit sense, but an easy way
22579 to use %fs and %gs segment prefixes. Therefore:
22580
22581 (a) All address spaces have the same modes,
22582 (b) All address spaces have the same addresss forms,
22583 (c) While %fs and %gs are technically subsets of the generic
22584 address space, they are probably not subsets of each other.
22585 (d) Since we have no access to the segment base register values
22586 without resorting to a system call, we cannot convert a
22587 non-default address space to a default address space.
22588 Therefore we do not claim %fs or %gs are subsets of generic.
22589
22590 Therefore we can (mostly) use the default hooks. */
22591
22592 /* All use of segmentation is assumed to make address 0 valid. */
22593
22594 static bool
ix86_addr_space_zero_address_valid(addr_space_t as)22595 ix86_addr_space_zero_address_valid (addr_space_t as)
22596 {
22597 return as != ADDR_SPACE_GENERIC;
22598 }
22599
22600 static void
ix86_init_libfuncs(void)22601 ix86_init_libfuncs (void)
22602 {
22603 if (TARGET_64BIT)
22604 {
22605 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
22606 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
22607 }
22608 else
22609 {
22610 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
22611 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
22612 }
22613
22614 #if TARGET_MACHO
22615 darwin_rename_builtins ();
22616 #endif
22617 }
22618
22619 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
22620 FPU, assume that the fpcw is set to extended precision; when using
22621 only SSE, rounding is correct; when using both SSE and the FPU,
22622 the rounding precision is indeterminate, since either may be chosen
22623 apparently at random. */
22624
22625 static enum flt_eval_method
ix86_get_excess_precision(enum excess_precision_type type)22626 ix86_get_excess_precision (enum excess_precision_type type)
22627 {
22628 switch (type)
22629 {
22630 case EXCESS_PRECISION_TYPE_FAST:
22631 /* The fastest type to promote to will always be the native type,
22632 whether that occurs with implicit excess precision or
22633 otherwise. */
22634 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22635 case EXCESS_PRECISION_TYPE_STANDARD:
22636 case EXCESS_PRECISION_TYPE_IMPLICIT:
22637 /* Otherwise, the excess precision we want when we are
22638 in a standards compliant mode, and the implicit precision we
22639 provide would be identical were it not for the unpredictable
22640 cases. */
22641 if (!TARGET_80387)
22642 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22643 else if (!TARGET_MIX_SSE_I387)
22644 {
22645 if (!(TARGET_SSE && TARGET_SSE_MATH))
22646 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
22647 else if (TARGET_SSE2)
22648 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22649 }
22650
22651 /* If we are in standards compliant mode, but we know we will
22652 calculate in unpredictable precision, return
22653 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
22654 excess precision if the target can't guarantee it will honor
22655 it. */
22656 return (type == EXCESS_PRECISION_TYPE_STANDARD
22657 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
22658 : FLT_EVAL_METHOD_UNPREDICTABLE);
22659 default:
22660 gcc_unreachable ();
22661 }
22662
22663 return FLT_EVAL_METHOD_UNPREDICTABLE;
22664 }
22665
22666 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
22667 decrements by exactly 2 no matter what the position was, there is no pushb.
22668
22669 But as CIE data alignment factor on this arch is -4 for 32bit targets
22670 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
22671 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
22672
22673 poly_int64
ix86_push_rounding(poly_int64 bytes)22674 ix86_push_rounding (poly_int64 bytes)
22675 {
22676 return ROUND_UP (bytes, UNITS_PER_WORD);
22677 }
22678
22679 /* Target-specific selftests. */
22680
22681 #if CHECKING_P
22682
22683 namespace selftest {
22684
22685 /* Verify that hard regs are dumped as expected (in compact mode). */
22686
22687 static void
ix86_test_dumping_hard_regs()22688 ix86_test_dumping_hard_regs ()
22689 {
22690 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
22691 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
22692 }
22693
22694 /* Test dumping an insn with repeated references to the same SCRATCH,
22695 to verify the rtx_reuse code. */
22696
22697 static void
ix86_test_dumping_memory_blockage()22698 ix86_test_dumping_memory_blockage ()
22699 {
22700 set_new_first_and_last_insn (NULL, NULL);
22701
22702 rtx pat = gen_memory_blockage ();
22703 rtx_reuse_manager r;
22704 r.preprocess (pat);
22705
22706 /* Verify that the repeated references to the SCRATCH show use
22707 reuse IDS. The first should be prefixed with a reuse ID,
22708 and the second should be dumped as a "reuse_rtx" of that ID.
22709 The expected string assumes Pmode == DImode. */
22710 if (Pmode == DImode)
22711 ASSERT_RTL_DUMP_EQ_WITH_REUSE
22712 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
22713 " (unspec:BLK [\n"
22714 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
22715 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
22716 }
22717
22718 /* Verify loading an RTL dump; specifically a dump of copying
22719 a param on x86_64 from a hard reg into the frame.
22720 This test is target-specific since the dump contains target-specific
22721 hard reg names. */
22722
22723 static void
ix86_test_loading_dump_fragment_1()22724 ix86_test_loading_dump_fragment_1 ()
22725 {
22726 rtl_dump_test t (SELFTEST_LOCATION,
22727 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
22728
22729 rtx_insn *insn = get_insn_by_uid (1);
22730
22731 /* The block structure and indentation here is purely for
22732 readability; it mirrors the structure of the rtx. */
22733 tree mem_expr;
22734 {
22735 rtx pat = PATTERN (insn);
22736 ASSERT_EQ (SET, GET_CODE (pat));
22737 {
22738 rtx dest = SET_DEST (pat);
22739 ASSERT_EQ (MEM, GET_CODE (dest));
22740 /* Verify the "/c" was parsed. */
22741 ASSERT_TRUE (RTX_FLAG (dest, call));
22742 ASSERT_EQ (SImode, GET_MODE (dest));
22743 {
22744 rtx addr = XEXP (dest, 0);
22745 ASSERT_EQ (PLUS, GET_CODE (addr));
22746 ASSERT_EQ (DImode, GET_MODE (addr));
22747 {
22748 rtx lhs = XEXP (addr, 0);
22749 /* Verify that the "frame" REG was consolidated. */
22750 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
22751 }
22752 {
22753 rtx rhs = XEXP (addr, 1);
22754 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
22755 ASSERT_EQ (-4, INTVAL (rhs));
22756 }
22757 }
22758 /* Verify the "[1 i+0 S4 A32]" was parsed. */
22759 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
22760 /* "i" should have been handled by synthesizing a global int
22761 variable named "i". */
22762 mem_expr = MEM_EXPR (dest);
22763 ASSERT_NE (mem_expr, NULL);
22764 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
22765 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
22766 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
22767 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
22768 /* "+0". */
22769 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
22770 ASSERT_EQ (0, MEM_OFFSET (dest));
22771 /* "S4". */
22772 ASSERT_EQ (4, MEM_SIZE (dest));
22773 /* "A32. */
22774 ASSERT_EQ (32, MEM_ALIGN (dest));
22775 }
22776 {
22777 rtx src = SET_SRC (pat);
22778 ASSERT_EQ (REG, GET_CODE (src));
22779 ASSERT_EQ (SImode, GET_MODE (src));
22780 ASSERT_EQ (5, REGNO (src));
22781 tree reg_expr = REG_EXPR (src);
22782 /* "i" here should point to the same var as for the MEM_EXPR. */
22783 ASSERT_EQ (reg_expr, mem_expr);
22784 }
22785 }
22786 }
22787
22788 /* Verify that the RTL loader copes with a call_insn dump.
22789 This test is target-specific since the dump contains a target-specific
22790 hard reg name. */
22791
22792 static void
ix86_test_loading_call_insn()22793 ix86_test_loading_call_insn ()
22794 {
22795 /* The test dump includes register "xmm0", where requires TARGET_SSE
22796 to exist. */
22797 if (!TARGET_SSE)
22798 return;
22799
22800 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
22801
22802 rtx_insn *insn = get_insns ();
22803 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
22804
22805 /* "/j". */
22806 ASSERT_TRUE (RTX_FLAG (insn, jump));
22807
22808 rtx pat = PATTERN (insn);
22809 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
22810
22811 /* Verify REG_NOTES. */
22812 {
22813 /* "(expr_list:REG_CALL_DECL". */
22814 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
22815 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
22816 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
22817
22818 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
22819 rtx_expr_list *note1 = note0->next ();
22820 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
22821
22822 ASSERT_EQ (NULL, note1->next ());
22823 }
22824
22825 /* Verify CALL_INSN_FUNCTION_USAGE. */
22826 {
22827 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
22828 rtx_expr_list *usage
22829 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
22830 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
22831 ASSERT_EQ (DFmode, GET_MODE (usage));
22832 ASSERT_EQ (USE, GET_CODE (usage->element ()));
22833 ASSERT_EQ (NULL, usage->next ());
22834 }
22835 }
22836
22837 /* Verify that the RTL loader copes a dump from print_rtx_function.
22838 This test is target-specific since the dump contains target-specific
22839 hard reg names. */
22840
22841 static void
ix86_test_loading_full_dump()22842 ix86_test_loading_full_dump ()
22843 {
22844 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
22845
22846 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22847
22848 rtx_insn *insn_1 = get_insn_by_uid (1);
22849 ASSERT_EQ (NOTE, GET_CODE (insn_1));
22850
22851 rtx_insn *insn_7 = get_insn_by_uid (7);
22852 ASSERT_EQ (INSN, GET_CODE (insn_7));
22853 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
22854
22855 rtx_insn *insn_15 = get_insn_by_uid (15);
22856 ASSERT_EQ (INSN, GET_CODE (insn_15));
22857 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
22858
22859 /* Verify crtl->return_rtx. */
22860 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
22861 ASSERT_EQ (0, REGNO (crtl->return_rtx));
22862 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
22863 }
22864
22865 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
22866 In particular, verify that it correctly loads the 2nd operand.
22867 This test is target-specific since these are machine-specific
22868 operands (and enums). */
22869
22870 static void
ix86_test_loading_unspec()22871 ix86_test_loading_unspec ()
22872 {
22873 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
22874
22875 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22876
22877 ASSERT_TRUE (cfun);
22878
22879 /* Test of an UNSPEC. */
22880 rtx_insn *insn = get_insns ();
22881 ASSERT_EQ (INSN, GET_CODE (insn));
22882 rtx set = single_set (insn);
22883 ASSERT_NE (NULL, set);
22884 rtx dst = SET_DEST (set);
22885 ASSERT_EQ (MEM, GET_CODE (dst));
22886 rtx src = SET_SRC (set);
22887 ASSERT_EQ (UNSPEC, GET_CODE (src));
22888 ASSERT_EQ (BLKmode, GET_MODE (src));
22889 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
22890
22891 rtx v0 = XVECEXP (src, 0, 0);
22892
22893 /* Verify that the two uses of the first SCRATCH have pointer
22894 equality. */
22895 rtx scratch_a = XEXP (dst, 0);
22896 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
22897
22898 rtx scratch_b = XEXP (v0, 0);
22899 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
22900
22901 ASSERT_EQ (scratch_a, scratch_b);
22902
22903 /* Verify that the two mems are thus treated as equal. */
22904 ASSERT_TRUE (rtx_equal_p (dst, v0));
22905
22906 /* Verify that the insn is recognized. */
22907 ASSERT_NE(-1, recog_memoized (insn));
22908
22909 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
22910 insn = NEXT_INSN (insn);
22911 ASSERT_EQ (INSN, GET_CODE (insn));
22912
22913 set = single_set (insn);
22914 ASSERT_NE (NULL, set);
22915
22916 src = SET_SRC (set);
22917 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
22918 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
22919 }
22920
22921 /* Run all target-specific selftests. */
22922
22923 static void
ix86_run_selftests(void)22924 ix86_run_selftests (void)
22925 {
22926 ix86_test_dumping_hard_regs ();
22927 ix86_test_dumping_memory_blockage ();
22928
22929 /* Various tests of loading RTL dumps, here because they contain
22930 ix86-isms (e.g. names of hard regs). */
22931 ix86_test_loading_dump_fragment_1 ();
22932 ix86_test_loading_call_insn ();
22933 ix86_test_loading_full_dump ();
22934 ix86_test_loading_unspec ();
22935 }
22936
22937 } // namespace selftest
22938
22939 #endif /* CHECKING_P */
22940
22941 /* Initialize the GCC target structure. */
22942 #undef TARGET_RETURN_IN_MEMORY
22943 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
22944
22945 #undef TARGET_LEGITIMIZE_ADDRESS
22946 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
22947
22948 #undef TARGET_ATTRIBUTE_TABLE
22949 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22950 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
22951 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
22952 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22953 # undef TARGET_MERGE_DECL_ATTRIBUTES
22954 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22955 #endif
22956
22957 #undef TARGET_COMP_TYPE_ATTRIBUTES
22958 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22959
22960 #undef TARGET_INIT_BUILTINS
22961 #define TARGET_INIT_BUILTINS ix86_init_builtins
22962 #undef TARGET_BUILTIN_DECL
22963 #define TARGET_BUILTIN_DECL ix86_builtin_decl
22964 #undef TARGET_EXPAND_BUILTIN
22965 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22966
22967 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22968 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
22969 ix86_builtin_vectorized_function
22970
22971 #undef TARGET_VECTORIZE_BUILTIN_GATHER
22972 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
22973
22974 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
22975 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
22976
22977 #undef TARGET_BUILTIN_RECIPROCAL
22978 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
22979
22980 #undef TARGET_ASM_FUNCTION_EPILOGUE
22981 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22982
22983 #undef TARGET_ENCODE_SECTION_INFO
22984 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22985 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22986 #else
22987 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22988 #endif
22989
22990 #undef TARGET_ASM_OPEN_PAREN
22991 #define TARGET_ASM_OPEN_PAREN ""
22992 #undef TARGET_ASM_CLOSE_PAREN
22993 #define TARGET_ASM_CLOSE_PAREN ""
22994
22995 #undef TARGET_ASM_BYTE_OP
22996 #define TARGET_ASM_BYTE_OP ASM_BYTE
22997
22998 #undef TARGET_ASM_ALIGNED_HI_OP
22999 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23000 #undef TARGET_ASM_ALIGNED_SI_OP
23001 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23002 #ifdef ASM_QUAD
23003 #undef TARGET_ASM_ALIGNED_DI_OP
23004 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23005 #endif
23006
23007 #undef TARGET_PROFILE_BEFORE_PROLOGUE
23008 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
23009
23010 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
23011 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
23012
23013 #undef TARGET_ASM_UNALIGNED_HI_OP
23014 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23015 #undef TARGET_ASM_UNALIGNED_SI_OP
23016 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23017 #undef TARGET_ASM_UNALIGNED_DI_OP
23018 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23019
23020 #undef TARGET_PRINT_OPERAND
23021 #define TARGET_PRINT_OPERAND ix86_print_operand
23022 #undef TARGET_PRINT_OPERAND_ADDRESS
23023 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
23024 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
23025 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
23026 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
23027 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
23028
23029 #undef TARGET_SCHED_INIT_GLOBAL
23030 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
23031 #undef TARGET_SCHED_ADJUST_COST
23032 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23033 #undef TARGET_SCHED_ISSUE_RATE
23034 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23035 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23036 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23037 ia32_multipass_dfa_lookahead
23038 #undef TARGET_SCHED_MACRO_FUSION_P
23039 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
23040 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
23041 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
23042
23043 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23044 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23045
23046 #undef TARGET_MEMMODEL_CHECK
23047 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
23048
23049 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
23050 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
23051
23052 #ifdef HAVE_AS_TLS
23053 #undef TARGET_HAVE_TLS
23054 #define TARGET_HAVE_TLS true
23055 #endif
23056 #undef TARGET_CANNOT_FORCE_CONST_MEM
23057 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23058 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23059 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23060
23061 #undef TARGET_DELEGITIMIZE_ADDRESS
23062 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23063
23064 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
23065 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
23066
23067 #undef TARGET_MS_BITFIELD_LAYOUT_P
23068 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23069
23070 #if TARGET_MACHO
23071 #undef TARGET_BINDS_LOCAL_P
23072 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23073 #else
23074 #undef TARGET_BINDS_LOCAL_P
23075 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
23076 #endif
23077 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23078 #undef TARGET_BINDS_LOCAL_P
23079 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23080 #endif
23081
23082 #undef TARGET_ASM_OUTPUT_MI_THUNK
23083 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23084 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23085 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23086
23087 #undef TARGET_ASM_FILE_START
23088 #define TARGET_ASM_FILE_START x86_file_start
23089
23090 #undef TARGET_OPTION_OVERRIDE
23091 #define TARGET_OPTION_OVERRIDE ix86_option_override
23092
23093 #undef TARGET_REGISTER_MOVE_COST
23094 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
23095 #undef TARGET_MEMORY_MOVE_COST
23096 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
23097 #undef TARGET_RTX_COSTS
23098 #define TARGET_RTX_COSTS ix86_rtx_costs
23099 #undef TARGET_ADDRESS_COST
23100 #define TARGET_ADDRESS_COST ix86_address_cost
23101
23102 #undef TARGET_FLAGS_REGNUM
23103 #define TARGET_FLAGS_REGNUM FLAGS_REG
23104 #undef TARGET_FIXED_CONDITION_CODE_REGS
23105 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23106 #undef TARGET_CC_MODES_COMPATIBLE
23107 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23108
23109 #undef TARGET_MACHINE_DEPENDENT_REORG
23110 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23111
23112 #undef TARGET_BUILD_BUILTIN_VA_LIST
23113 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23114
23115 #undef TARGET_FOLD_BUILTIN
23116 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
23117
23118 #undef TARGET_GIMPLE_FOLD_BUILTIN
23119 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
23120
23121 #undef TARGET_COMPARE_VERSION_PRIORITY
23122 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
23123
23124 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
23125 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
23126 ix86_generate_version_dispatcher_body
23127
23128 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
23129 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
23130 ix86_get_function_versions_dispatcher
23131
23132 #undef TARGET_ENUM_VA_LIST_P
23133 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
23134
23135 #undef TARGET_FN_ABI_VA_LIST
23136 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
23137
23138 #undef TARGET_CANONICAL_VA_LIST_TYPE
23139 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
23140
23141 #undef TARGET_EXPAND_BUILTIN_VA_START
23142 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
23143
23144 #undef TARGET_MD_ASM_ADJUST
23145 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
23146
23147 #undef TARGET_C_EXCESS_PRECISION
23148 #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
23149 #undef TARGET_PROMOTE_PROTOTYPES
23150 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23151 #undef TARGET_SETUP_INCOMING_VARARGS
23152 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23153 #undef TARGET_MUST_PASS_IN_STACK
23154 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23155 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
23156 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
23157 #undef TARGET_FUNCTION_ARG_ADVANCE
23158 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
23159 #undef TARGET_FUNCTION_ARG
23160 #define TARGET_FUNCTION_ARG ix86_function_arg
23161 #undef TARGET_INIT_PIC_REG
23162 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
23163 #undef TARGET_USE_PSEUDO_PIC_REG
23164 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
23165 #undef TARGET_FUNCTION_ARG_BOUNDARY
23166 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
23167 #undef TARGET_PASS_BY_REFERENCE
23168 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23169 #undef TARGET_INTERNAL_ARG_POINTER
23170 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23171 #undef TARGET_UPDATE_STACK_BOUNDARY
23172 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
23173 #undef TARGET_GET_DRAP_RTX
23174 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
23175 #undef TARGET_STRICT_ARGUMENT_NAMING
23176 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23177 #undef TARGET_STATIC_CHAIN
23178 #define TARGET_STATIC_CHAIN ix86_static_chain
23179 #undef TARGET_TRAMPOLINE_INIT
23180 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
23181 #undef TARGET_RETURN_POPS_ARGS
23182 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
23183
23184 #undef TARGET_WARN_FUNC_RETURN
23185 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
23186
23187 #undef TARGET_LEGITIMATE_COMBINED_INSN
23188 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
23189
23190 #undef TARGET_ASAN_SHADOW_OFFSET
23191 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
23192
23193 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23194 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23195
23196 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23197 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23198
23199 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23200 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23201
23202 #undef TARGET_C_MODE_FOR_SUFFIX
23203 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23204
23205 #ifdef HAVE_AS_TLS
23206 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23207 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23208 #endif
23209
23210 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23211 #undef TARGET_INSERT_ATTRIBUTES
23212 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23213 #endif
23214
23215 #undef TARGET_MANGLE_TYPE
23216 #define TARGET_MANGLE_TYPE ix86_mangle_type
23217
23218 #undef TARGET_STACK_PROTECT_GUARD
23219 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
23220
23221 #if !TARGET_MACHO
23222 #undef TARGET_STACK_PROTECT_FAIL
23223 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23224 #endif
23225
23226 #undef TARGET_FUNCTION_VALUE
23227 #define TARGET_FUNCTION_VALUE ix86_function_value
23228
23229 #undef TARGET_FUNCTION_VALUE_REGNO_P
23230 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
23231
23232 #undef TARGET_PROMOTE_FUNCTION_MODE
23233 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
23234
23235 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
23236 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
23237
23238 #undef TARGET_MEMBER_TYPE_FORCES_BLK
23239 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
23240
23241 #undef TARGET_INSTANTIATE_DECLS
23242 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
23243
23244 #undef TARGET_SECONDARY_RELOAD
23245 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
23246 #undef TARGET_SECONDARY_MEMORY_NEEDED
23247 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
23248 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
23249 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
23250
23251 #undef TARGET_CLASS_MAX_NREGS
23252 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
23253
23254 #undef TARGET_PREFERRED_RELOAD_CLASS
23255 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
23256 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
23257 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
23258 #undef TARGET_CLASS_LIKELY_SPILLED_P
23259 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
23260
23261 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23262 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
23263 ix86_builtin_vectorization_cost
23264 #undef TARGET_VECTORIZE_VEC_PERM_CONST
23265 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
23266 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
23267 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
23268 ix86_preferred_simd_mode
23269 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
23270 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
23271 ix86_split_reduction
23272 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
23273 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
23274 ix86_autovectorize_vector_modes
23275 #undef TARGET_VECTORIZE_GET_MASK_MODE
23276 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
23277 #undef TARGET_VECTORIZE_INIT_COST
23278 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
23279 #undef TARGET_VECTORIZE_ADD_STMT_COST
23280 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
23281 #undef TARGET_VECTORIZE_FINISH_COST
23282 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
23283 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
23284 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
23285
23286 #undef TARGET_SET_CURRENT_FUNCTION
23287 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
23288
23289 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
23290 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
23291
23292 #undef TARGET_OPTION_SAVE
23293 #define TARGET_OPTION_SAVE ix86_function_specific_save
23294
23295 #undef TARGET_OPTION_RESTORE
23296 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
23297
23298 #undef TARGET_OPTION_POST_STREAM_IN
23299 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
23300
23301 #undef TARGET_OPTION_PRINT
23302 #define TARGET_OPTION_PRINT ix86_function_specific_print
23303
23304 #undef TARGET_OPTION_FUNCTION_VERSIONS
23305 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
23306
23307 #undef TARGET_CAN_INLINE_P
23308 #define TARGET_CAN_INLINE_P ix86_can_inline_p
23309
23310 #undef TARGET_LEGITIMATE_ADDRESS_P
23311 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
23312
23313 #undef TARGET_REGISTER_PRIORITY
23314 #define TARGET_REGISTER_PRIORITY ix86_register_priority
23315
23316 #undef TARGET_REGISTER_USAGE_LEVELING_P
23317 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
23318
23319 #undef TARGET_LEGITIMATE_CONSTANT_P
23320 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
23321
23322 #undef TARGET_COMPUTE_FRAME_LAYOUT
23323 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
23324
23325 #undef TARGET_FRAME_POINTER_REQUIRED
23326 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
23327
23328 #undef TARGET_CAN_ELIMINATE
23329 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
23330
23331 #undef TARGET_EXTRA_LIVE_ON_ENTRY
23332 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
23333
23334 #undef TARGET_ASM_CODE_END
23335 #define TARGET_ASM_CODE_END ix86_code_end
23336
23337 #undef TARGET_CONDITIONAL_REGISTER_USAGE
23338 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
23339
23340 #undef TARGET_CANONICALIZE_COMPARISON
23341 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
23342
23343 #undef TARGET_LOOP_UNROLL_ADJUST
23344 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
23345
23346 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
23347 #undef TARGET_SPILL_CLASS
23348 #define TARGET_SPILL_CLASS ix86_spill_class
23349
23350 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23351 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23352 ix86_simd_clone_compute_vecsize_and_simdlen
23353
23354 #undef TARGET_SIMD_CLONE_ADJUST
23355 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
23356
23357 #undef TARGET_SIMD_CLONE_USABLE
23358 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
23359
23360 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
23361 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
23362
23363 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23364 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23365 ix86_float_exceptions_rounding_supported_p
23366
23367 #undef TARGET_MODE_EMIT
23368 #define TARGET_MODE_EMIT ix86_emit_mode_set
23369
23370 #undef TARGET_MODE_NEEDED
23371 #define TARGET_MODE_NEEDED ix86_mode_needed
23372
23373 #undef TARGET_MODE_AFTER
23374 #define TARGET_MODE_AFTER ix86_mode_after
23375
23376 #undef TARGET_MODE_ENTRY
23377 #define TARGET_MODE_ENTRY ix86_mode_entry
23378
23379 #undef TARGET_MODE_EXIT
23380 #define TARGET_MODE_EXIT ix86_mode_exit
23381
23382 #undef TARGET_MODE_PRIORITY
23383 #define TARGET_MODE_PRIORITY ix86_mode_priority
23384
23385 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23386 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23387
23388 #undef TARGET_OFFLOAD_OPTIONS
23389 #define TARGET_OFFLOAD_OPTIONS \
23390 ix86_offload_options
23391
23392 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23393 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23394
23395 #undef TARGET_OPTAB_SUPPORTED_P
23396 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23397
23398 #undef TARGET_HARD_REGNO_SCRATCH_OK
23399 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23400
23401 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23402 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23403
23404 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23405 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23406
23407 #undef TARGET_INIT_LIBFUNCS
23408 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23409
23410 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23411 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23412
23413 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23414 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23415
23416 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23417 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23418
23419 #undef TARGET_HARD_REGNO_NREGS
23420 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23421 #undef TARGET_HARD_REGNO_MODE_OK
23422 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23423
23424 #undef TARGET_MODES_TIEABLE_P
23425 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23426
23427 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23428 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23429 ix86_hard_regno_call_part_clobbered
23430
23431 #undef TARGET_CAN_CHANGE_MODE_CLASS
23432 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23433
23434 #undef TARGET_STATIC_RTX_ALIGNMENT
23435 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23436 #undef TARGET_CONSTANT_ALIGNMENT
23437 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23438
23439 #undef TARGET_EMPTY_RECORD_P
23440 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23441
23442 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23443 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23444
23445 #undef TARGET_GET_MULTILIB_ABI_NAME
23446 #define TARGET_GET_MULTILIB_ABI_NAME \
23447 ix86_get_multilib_abi_name
23448
ix86_libc_has_fast_function(int fcode ATTRIBUTE_UNUSED)23449 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
23450 {
23451 #ifdef OPTION_GLIBC
23452 if (OPTION_GLIBC)
23453 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
23454 else
23455 return false;
23456 #else
23457 return false;
23458 #endif
23459 }
23460
23461 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23462 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23463
23464 #if CHECKING_P
23465 #undef TARGET_RUN_TARGET_SELFTESTS
23466 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23467 #endif /* #if CHECKING_P */
23468
23469 struct gcc_target targetm = TARGET_INITIALIZER;
23470
23471 #include "gt-i386.h"
23472