xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/i386/i386.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* Subroutines used for code generation on IA-32.
2    Copyright (C) 1988-2020 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10 
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #define IN_TARGET_CODE 1
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "cselib.h"
63 #include "sched-int.h"
64 #include "opts.h"
65 #include "tree-pass.h"
66 #include "context.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "tree-vectorizer.h"
71 #include "shrink-wrap.h"
72 #include "builtins.h"
73 #include "rtl-iter.h"
74 #include "tree-iterator.h"
75 #include "dbgcnt.h"
76 #include "case-cfn-macros.h"
77 #include "dojump.h"
78 #include "fold-const-call.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "selftest.h"
82 #include "selftest-rtl.h"
83 #include "print-rtl.h"
84 #include "intl.h"
85 #include "ifcvt.h"
86 #include "symbol-summary.h"
87 #include "ipa-prop.h"
88 #include "ipa-fnsummary.h"
89 #include "wide-int-bitmask.h"
90 #include "tree-vector-builder.h"
91 #include "debug.h"
92 #include "dwarf2out.h"
93 #include "i386-options.h"
94 #include "i386-builtins.h"
95 #include "i386-expand.h"
96 #include "i386-features.h"
97 #include "function-abi.h"
98 
99 /* This file should be included last.  */
100 #include "target-def.h"
101 
102 static rtx legitimize_dllimport_symbol (rtx, bool);
103 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx);
106 
107 
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
110 #endif
111 
112 /* Return index of given mode in mult and division cost tables.  */
113 #define MODE_INDEX(mode)					\
114   ((mode) == QImode ? 0						\
115    : (mode) == HImode ? 1					\
116    : (mode) == SImode ? 2					\
117    : (mode) == DImode ? 3					\
118    : 4)
119 
120 
121 /* Set by -mtune.  */
122 const struct processor_costs *ix86_tune_cost = NULL;
123 
124 /* Set by -mtune or -Os.  */
125 const struct processor_costs *ix86_cost = NULL;
126 
127 /* In case the average insn count for single function invocation is
128    lower than this constant, emit fast (but longer) prologue and
129    epilogue code.  */
130 #define FAST_PROLOGUE_INSN_COUNT 20
131 
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
133 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
134 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
135 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
136 
137 /* Array of the smallest class containing reg number REGNO, indexed by
138    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
139 
140 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
141 {
142   /* ax, dx, cx, bx */
143   AREG, DREG, CREG, BREG,
144   /* si, di, bp, sp */
145   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
146   /* FP registers */
147   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
148   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
149   /* arg pointer, flags, fpsr, frame */
150   NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
151   /* SSE registers */
152   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
153   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
154   /* MMX registers */
155   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
156   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
157   /* REX registers */
158   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160   /* SSE REX registers */
161   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
162   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
163   /* AVX-512 SSE registers */
164   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
165   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
166   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168   /* Mask registers.  */
169   ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
170   MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
171 };
172 
173 /* The "default" register map used in 32bit mode.  */
174 
175 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
176 {
177   /* general regs */
178   0, 2, 1, 3, 6, 7, 4, 5,
179   /* fp regs */
180   12, 13, 14, 15, 16, 17, 18, 19,
181   /* arg, flags, fpsr, frame */
182   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
183   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
184   /* SSE */
185   21, 22, 23, 24, 25, 26, 27, 28,
186   /* MMX */
187   29, 30, 31, 32, 33, 34, 35, 36,
188   /* extended integer registers */
189   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
190   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
191   /* extended sse registers */
192   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
193   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
194   /* AVX-512 registers 16-23 */
195   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
196   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
197   /* AVX-512 registers 24-31 */
198   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200   /* Mask registers */
201   93, 94, 95, 96, 97, 98, 99, 100
202 };
203 
204 /* The "default" register map used in 64bit mode.  */
205 
206 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
207 {
208   /* general regs */
209   0, 1, 2, 3, 4, 5, 6, 7,
210   /* fp regs */
211   33, 34, 35, 36, 37, 38, 39, 40,
212   /* arg, flags, fpsr, frame */
213   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
214   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
215   /* SSE */
216   17, 18, 19, 20, 21, 22, 23, 24,
217   /* MMX */
218   41, 42, 43, 44, 45, 46, 47, 48,
219   /* extended integer registers */
220   8, 9, 10, 11, 12, 13, 14, 15,
221   /* extended SSE registers */
222   25, 26, 27, 28, 29, 30, 31, 32,
223   /* AVX-512 registers 16-23 */
224   67, 68, 69, 70, 71, 72, 73, 74,
225   /* AVX-512 registers 24-31 */
226   75, 76, 77, 78, 79, 80, 81, 82,
227   /* Mask registers */
228   118, 119, 120, 121, 122, 123, 124, 125
229 };
230 
231 /* Define the register numbers to be used in Dwarf debugging information.
232    The SVR4 reference port C compiler uses the following register numbers
233    in its Dwarf output code:
234 	0 for %eax (gcc regno = 0)
235 	1 for %ecx (gcc regno = 2)
236 	2 for %edx (gcc regno = 1)
237 	3 for %ebx (gcc regno = 3)
238 	4 for %esp (gcc regno = 7)
239 	5 for %ebp (gcc regno = 6)
240 	6 for %esi (gcc regno = 4)
241 	7 for %edi (gcc regno = 5)
242    The following three DWARF register numbers are never generated by
243    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244    believed these numbers have these meanings.
245 	8  for %eip    (no gcc equivalent)
246 	9  for %eflags (gcc regno = 17)
247 	10 for %trapno (no gcc equivalent)
248    It is not at all clear how we should number the FP stack registers
249    for the x86 architecture.  If the version of SDB on x86/svr4 were
250    a bit less brain dead with respect to floating-point then we would
251    have a precedent to follow with respect to DWARF register numbers
252    for x86 FP registers, but the SDB on x86/svr4 was so completely
253    broken with respect to FP registers that it is hardly worth thinking
254    of it as something to strive for compatibility with.
255    The version of x86/svr4 SDB I had does (partially)
256    seem to believe that DWARF register number 11 is associated with
257    the x86 register %st(0), but that's about all.  Higher DWARF
258    register numbers don't seem to be associated with anything in
259    particular, and even for DWARF regno 11, SDB only seemed to under-
260    stand that it should say that a variable lives in %st(0) (when
261    asked via an `=' command) if we said it was in DWARF regno 11,
262    but SDB still printed garbage when asked for the value of the
263    variable in question (via a `/' command).
264    (Also note that the labels SDB printed for various FP stack regs
265    when doing an `x' command were all wrong.)
266    Note that these problems generally don't affect the native SVR4
267    C compiler because it doesn't allow the use of -O with -g and
268    because when it is *not* optimizing, it allocates a memory
269    location for each floating-point variable, and the memory
270    location is what gets described in the DWARF AT_location
271    attribute for the variable in question.
272    Regardless of the severe mental illness of the x86/svr4 SDB, we
273    do something sensible here and we use the following DWARF
274    register numbers.  Note that these are all stack-top-relative
275    numbers.
276 	11 for %st(0) (gcc regno = 8)
277 	12 for %st(1) (gcc regno = 9)
278 	13 for %st(2) (gcc regno = 10)
279 	14 for %st(3) (gcc regno = 11)
280 	15 for %st(4) (gcc regno = 12)
281 	16 for %st(5) (gcc regno = 13)
282 	17 for %st(6) (gcc regno = 14)
283 	18 for %st(7) (gcc regno = 15)
284 */
285 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
286 {
287   /* general regs */
288   0, 2, 1, 3, 6, 7, 5, 4,
289   /* fp regs */
290   11, 12, 13, 14, 15, 16, 17, 18,
291   /* arg, flags, fpsr, frame */
292   IGNORED_DWARF_REGNUM, 9,
293   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
294   /* SSE registers */
295   21, 22, 23, 24, 25, 26, 27, 28,
296   /* MMX registers */
297   29, 30, 31, 32, 33, 34, 35, 36,
298   /* extended integer registers */
299   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
300   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
301   /* extended sse registers */
302   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
303   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
304   /* AVX-512 registers 16-23 */
305   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
306   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
307   /* AVX-512 registers 24-31 */
308   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
309   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
310   /* Mask registers */
311   93, 94, 95, 96, 97, 98, 99, 100
312 };
313 
314 /* Define parameter passing and return registers.  */
315 
316 static int const x86_64_int_parameter_registers[6] =
317 {
318   DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
319 };
320 
321 static int const x86_64_ms_abi_int_parameter_registers[4] =
322 {
323   CX_REG, DX_REG, R8_REG, R9_REG
324 };
325 
326 static int const x86_64_int_return_registers[4] =
327 {
328   AX_REG, DX_REG, DI_REG, SI_REG
329 };
330 
331 /* Define the structure for the machine field in struct function.  */
332 
333 struct GTY(()) stack_local_entry {
334   unsigned short mode;
335   unsigned short n;
336   rtx rtl;
337   struct stack_local_entry *next;
338 };
339 
340 /* Which cpu are we scheduling for.  */
341 enum attr_cpu ix86_schedule;
342 
343 /* Which cpu are we optimizing for.  */
344 enum processor_type ix86_tune;
345 
346 /* Which instruction set architecture to use.  */
347 enum processor_type ix86_arch;
348 
349 /* True if processor has SSE prefetch instruction.  */
350 unsigned char x86_prefetch_sse;
351 
352 /* Preferred alignment for stack boundary in bits.  */
353 unsigned int ix86_preferred_stack_boundary;
354 
355 /* Alignment for incoming stack boundary in bits specified at
356    command line.  */
357 unsigned int ix86_user_incoming_stack_boundary;
358 
359 /* Default alignment for incoming stack boundary in bits.  */
360 unsigned int ix86_default_incoming_stack_boundary;
361 
362 /* Alignment for incoming stack boundary in bits.  */
363 unsigned int ix86_incoming_stack_boundary;
364 
365 /* Calling abi specific va_list type nodes.  */
366 tree sysv_va_list_type_node;
367 tree ms_va_list_type_node;
368 
369 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
370 char internal_label_prefix[16];
371 int internal_label_prefix_len;
372 
373 /* Fence to use after loop using movnt.  */
374 tree x86_mfence;
375 
376 /* Register class used for passing given 64bit part of the argument.
377    These represent classes as documented by the PS ABI, with the exception
378    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
379    use SF or DFmode move instead of DImode to avoid reformatting penalties.
380 
381    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
382    whenever possible (upper half does contain padding).  */
383 enum x86_64_reg_class
384   {
385     X86_64_NO_CLASS,
386     X86_64_INTEGER_CLASS,
387     X86_64_INTEGERSI_CLASS,
388     X86_64_SSE_CLASS,
389     X86_64_SSESF_CLASS,
390     X86_64_SSEDF_CLASS,
391     X86_64_SSEUP_CLASS,
392     X86_64_X87_CLASS,
393     X86_64_X87UP_CLASS,
394     X86_64_COMPLEX_X87_CLASS,
395     X86_64_MEMORY_CLASS
396   };
397 
398 #define MAX_CLASSES 8
399 
400 /* Table of constants used by fldpi, fldln2, etc....  */
401 static REAL_VALUE_TYPE ext_80387_constants_table [5];
402 static bool ext_80387_constants_init;
403 
404 
405 static rtx ix86_function_value (const_tree, const_tree, bool);
406 static bool ix86_function_value_regno_p (const unsigned int);
407 static unsigned int ix86_function_arg_boundary (machine_mode,
408 						const_tree);
409 static rtx ix86_static_chain (const_tree, bool);
410 static int ix86_function_regparm (const_tree, const_tree);
411 static void ix86_compute_frame_layout (void);
412 static tree ix86_canonical_va_list_type (tree);
413 static unsigned int split_stack_prologue_scratch_regno (void);
414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
415 
416 static bool ix86_can_inline_p (tree, tree);
417 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
418 
419 
420 /* Whether -mtune= or -march= were specified */
421 int ix86_tune_defaulted;
422 int ix86_arch_specified;
423 
424 /* Return true if a red-zone is in use.  We can't use red-zone when
425    there are local indirect jumps, like "indirect_jump" or "tablejump",
426    which jumps to another place in the function, since "call" in the
427    indirect thunk pushes the return address onto stack, destroying
428    red-zone.
429 
430    TODO: If we can reserve the first 2 WORDs, for PUSH and, another
431    for CALL, in red-zone, we can allow local indirect jumps with
432    indirect thunk.  */
433 
434 bool
ix86_using_red_zone(void)435 ix86_using_red_zone (void)
436 {
437   return (TARGET_RED_ZONE
438 	  && !TARGET_64BIT_MS_ABI
439 	  && (!cfun->machine->has_local_indirect_jump
440 	      || cfun->machine->indirect_branch_type == indirect_branch_keep));
441 }
442 
443 /* Return true, if profiling code should be emitted before
444    prologue. Otherwise it returns false.
445    Note: For x86 with "hotfix" it is sorried.  */
446 static bool
ix86_profile_before_prologue(void)447 ix86_profile_before_prologue (void)
448 {
449   return flag_fentry != 0;
450 }
451 
452 /* Update register usage after having seen the compiler flags.  */
453 
454 static void
ix86_conditional_register_usage(void)455 ix86_conditional_register_usage (void)
456 {
457   int i, c_mask;
458 
459   /* If there are no caller-saved registers, preserve all registers.
460      except fixed_regs and registers used for function return value
461      since aggregate_value_p checks call_used_regs[regno] on return
462      value.  */
463   if (cfun && cfun->machine->no_caller_saved_registers)
464     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
465       if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
466 	call_used_regs[i] = 0;
467 
468   /* For 32-bit targets, disable the REX registers.  */
469   if (! TARGET_64BIT)
470     {
471       for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
472 	CLEAR_HARD_REG_BIT (accessible_reg_set, i);
473       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
474 	CLEAR_HARD_REG_BIT (accessible_reg_set, i);
475       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
476 	CLEAR_HARD_REG_BIT (accessible_reg_set, i);
477     }
478 
479   /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
480   c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
481 
482   CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
483 
484   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
485     {
486       /* Set/reset conditionally defined registers from
487 	 CALL_USED_REGISTERS initializer.  */
488       if (call_used_regs[i] > 1)
489 	call_used_regs[i] = !!(call_used_regs[i] & c_mask);
490 
491       /* Calculate registers of CLOBBERED_REGS register set
492 	 as call used registers from GENERAL_REGS register set.  */
493       if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
494 	  && call_used_regs[i])
495 	SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
496     }
497 
498   /* If MMX is disabled, disable the registers.  */
499   if (! TARGET_MMX)
500     accessible_reg_set &= ~reg_class_contents[MMX_REGS];
501 
502   /* If SSE is disabled, disable the registers.  */
503   if (! TARGET_SSE)
504     accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
505 
506   /* If the FPU is disabled, disable the registers.  */
507   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
508     accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
509 
510   /* If AVX512F is disabled, disable the registers.  */
511   if (! TARGET_AVX512F)
512     {
513       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
514 	CLEAR_HARD_REG_BIT (accessible_reg_set, i);
515 
516       accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
517     }
518 }
519 
520 /* Canonicalize a comparison from one we don't have to one we do have.  */
521 
522 static void
ix86_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)523 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
524 			      bool op0_preserve_value)
525 {
526   /* The order of operands in x87 ficom compare is forced by combine in
527      simplify_comparison () function. Float operator is treated as RTX_OBJ
528      with a precedence over other operators and is always put in the first
529      place. Swap condition and operands to match ficom instruction.  */
530   if (!op0_preserve_value
531       && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
532     {
533       enum rtx_code scode = swap_condition ((enum rtx_code) *code);
534 
535       /* We are called only for compares that are split to SAHF instruction.
536 	 Ensure that we have setcc/jcc insn for the swapped condition.  */
537       if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
538 	{
539 	  std::swap (*op0, *op1);
540 	  *code = (int) scode;
541 	}
542     }
543 }
544 
545 
546 /* Hook to determine if one function can safely inline another.  */
547 
548 static bool
ix86_can_inline_p(tree caller,tree callee)549 ix86_can_inline_p (tree caller, tree callee)
550 {
551   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
552   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
553 
554   /* Changes of those flags can be tolerated for always inlines. Lets hope
555      user knows what he is doing.  */
556   const unsigned HOST_WIDE_INT always_inline_safe_mask
557 	 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
558 	    | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
559 	    | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
560 	    | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
561 	    | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
562 	    | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
563 	    | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
564 
565 
566   if (!callee_tree)
567     callee_tree = target_option_default_node;
568   if (!caller_tree)
569     caller_tree = target_option_default_node;
570   if (callee_tree == caller_tree)
571     return true;
572 
573   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
574   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
575   bool ret = false;
576   bool always_inline
577     = (DECL_DISREGARD_INLINE_LIMITS (callee)
578        && lookup_attribute ("always_inline",
579 			    DECL_ATTRIBUTES (callee)));
580 
581   cgraph_node *callee_node = cgraph_node::get (callee);
582   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
583      function can inline a SSE2 function but a SSE2 function can't inline
584      a SSE4 function.  */
585   if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
586        != callee_opts->x_ix86_isa_flags)
587       || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
588 	  != callee_opts->x_ix86_isa_flags2))
589     ret = false;
590 
591   /* See if we have the same non-isa options.  */
592   else if ((!always_inline
593 	    && caller_opts->x_target_flags != callee_opts->x_target_flags)
594 	   || (caller_opts->x_target_flags & ~always_inline_safe_mask)
595 	       != (callee_opts->x_target_flags & ~always_inline_safe_mask))
596     ret = false;
597 
598   /* See if arch, tune, etc. are the same.  */
599   else if (caller_opts->arch != callee_opts->arch)
600     ret = false;
601 
602   else if (!always_inline && caller_opts->tune != callee_opts->tune)
603     ret = false;
604 
605   else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
606 	   /* If the calle doesn't use FP expressions differences in
607 	      ix86_fpmath can be ignored.  We are called from FEs
608 	      for multi-versioning call optimization, so beware of
609 	      ipa_fn_summaries not available.  */
610 	   && (! ipa_fn_summaries
611 	       || ipa_fn_summaries->get (callee_node) == NULL
612 	       || ipa_fn_summaries->get (callee_node)->fp_expressions))
613     ret = false;
614 
615   else if (!always_inline
616 	   && caller_opts->branch_cost != callee_opts->branch_cost)
617     ret = false;
618 
619   else
620     ret = true;
621 
622   return ret;
623 }
624 
625 /* Return true if this goes in large data/bss.  */
626 
627 static bool
ix86_in_large_data_p(tree exp)628 ix86_in_large_data_p (tree exp)
629 {
630   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
631     return false;
632 
633   if (exp == NULL_TREE)
634     return false;
635 
636   /* Functions are never large data.  */
637   if (TREE_CODE (exp) == FUNCTION_DECL)
638     return false;
639 
640   /* Automatic variables are never large data.  */
641   if (VAR_P (exp) && !is_global_var (exp))
642     return false;
643 
644   if (VAR_P (exp) && DECL_SECTION_NAME (exp))
645     {
646       const char *section = DECL_SECTION_NAME (exp);
647       if (strcmp (section, ".ldata") == 0
648 	  || strcmp (section, ".lbss") == 0)
649 	return true;
650       return false;
651     }
652   else
653     {
654       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
655 
656       /* If this is an incomplete type with size 0, then we can't put it
657 	 in data because it might be too big when completed.  Also,
658 	 int_size_in_bytes returns -1 if size can vary or is larger than
659 	 an integer in which case also it is safer to assume that it goes in
660 	 large data.  */
661       if (size <= 0 || size > ix86_section_threshold)
662 	return true;
663     }
664 
665   return false;
666 }
667 
668 /* i386-specific section flag to mark large sections.  */
669 #define SECTION_LARGE SECTION_MACH_DEP
670 
671 /* Switch to the appropriate section for output of DECL.
672    DECL is either a `VAR_DECL' node or a constant of some sort.
673    RELOC indicates whether forming the initial value of DECL requires
674    link-time relocations.  */
675 
676 ATTRIBUTE_UNUSED static section *
x86_64_elf_select_section(tree decl,int reloc,unsigned HOST_WIDE_INT align)677 x86_64_elf_select_section (tree decl, int reloc,
678 			   unsigned HOST_WIDE_INT align)
679 {
680   if (ix86_in_large_data_p (decl))
681     {
682       const char *sname = NULL;
683       unsigned int flags = SECTION_WRITE | SECTION_LARGE;
684       switch (categorize_decl_for_section (decl, reloc))
685 	{
686 	case SECCAT_DATA:
687 	  sname = ".ldata";
688 	  break;
689 	case SECCAT_DATA_REL:
690 	  sname = ".ldata.rel";
691 	  break;
692 	case SECCAT_DATA_REL_LOCAL:
693 	  sname = ".ldata.rel.local";
694 	  break;
695 	case SECCAT_DATA_REL_RO:
696 	  sname = ".ldata.rel.ro";
697 	  break;
698 	case SECCAT_DATA_REL_RO_LOCAL:
699 	  sname = ".ldata.rel.ro.local";
700 	  break;
701 	case SECCAT_BSS:
702 	  sname = ".lbss";
703 	  flags |= SECTION_BSS;
704 	  break;
705 	case SECCAT_RODATA:
706 	case SECCAT_RODATA_MERGE_STR:
707 	case SECCAT_RODATA_MERGE_STR_INIT:
708 	case SECCAT_RODATA_MERGE_CONST:
709 	  sname = ".lrodata";
710 	  flags &= ~SECTION_WRITE;
711 	  break;
712 	case SECCAT_SRODATA:
713 	case SECCAT_SDATA:
714 	case SECCAT_SBSS:
715 	  gcc_unreachable ();
716 	case SECCAT_TEXT:
717 	case SECCAT_TDATA:
718 	case SECCAT_TBSS:
719 	  /* We don't split these for medium model.  Place them into
720 	     default sections and hope for best.  */
721 	  break;
722 	}
723       if (sname)
724 	{
725 	  /* We might get called with string constants, but get_named_section
726 	     doesn't like them as they are not DECLs.  Also, we need to set
727 	     flags in that case.  */
728 	  if (!DECL_P (decl))
729 	    return get_section (sname, flags, NULL);
730 	  return get_named_section (decl, sname, reloc);
731 	}
732     }
733   return default_elf_select_section (decl, reloc, align);
734 }
735 
736 /* Select a set of attributes for section NAME based on the properties
737    of DECL and whether or not RELOC indicates that DECL's initializer
738    might contain runtime relocations.  */
739 
740 static unsigned int ATTRIBUTE_UNUSED
x86_64_elf_section_type_flags(tree decl,const char * name,int reloc)741 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
742 {
743   unsigned int flags = default_section_type_flags (decl, name, reloc);
744 
745   if (ix86_in_large_data_p (decl))
746     flags |= SECTION_LARGE;
747 
748   if (decl == NULL_TREE
749       && (strcmp (name, ".ldata.rel.ro") == 0
750 	  || strcmp (name, ".ldata.rel.ro.local") == 0))
751     flags |= SECTION_RELRO;
752 
753   if (strcmp (name, ".lbss") == 0
754       || strncmp (name, ".lbss.", sizeof (".lbss.") - 1) == 0
755       || strncmp (name, ".gnu.linkonce.lb.",
756 		  sizeof (".gnu.linkonce.lb.") - 1) == 0)
757     flags |= SECTION_BSS;
758 
759   return flags;
760 }
761 
762 /* Build up a unique section name, expressed as a
763    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
764    RELOC indicates whether the initial value of EXP requires
765    link-time relocations.  */
766 
767 static void ATTRIBUTE_UNUSED
x86_64_elf_unique_section(tree decl,int reloc)768 x86_64_elf_unique_section (tree decl, int reloc)
769 {
770   if (ix86_in_large_data_p (decl))
771     {
772       const char *prefix = NULL;
773       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
774       bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
775 
776       switch (categorize_decl_for_section (decl, reloc))
777 	{
778 	case SECCAT_DATA:
779 	case SECCAT_DATA_REL:
780 	case SECCAT_DATA_REL_LOCAL:
781 	case SECCAT_DATA_REL_RO:
782 	case SECCAT_DATA_REL_RO_LOCAL:
783           prefix = one_only ? ".ld" : ".ldata";
784 	  break;
785 	case SECCAT_BSS:
786           prefix = one_only ? ".lb" : ".lbss";
787 	  break;
788 	case SECCAT_RODATA:
789 	case SECCAT_RODATA_MERGE_STR:
790 	case SECCAT_RODATA_MERGE_STR_INIT:
791 	case SECCAT_RODATA_MERGE_CONST:
792           prefix = one_only ? ".lr" : ".lrodata";
793 	  break;
794 	case SECCAT_SRODATA:
795 	case SECCAT_SDATA:
796 	case SECCAT_SBSS:
797 	  gcc_unreachable ();
798 	case SECCAT_TEXT:
799 	case SECCAT_TDATA:
800 	case SECCAT_TBSS:
801 	  /* We don't split these for medium model.  Place them into
802 	     default sections and hope for best.  */
803 	  break;
804 	}
805       if (prefix)
806 	{
807 	  const char *name, *linkonce;
808 	  char *string;
809 
810 	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
811 	  name = targetm.strip_name_encoding (name);
812 
813 	  /* If we're using one_only, then there needs to be a .gnu.linkonce
814      	     prefix to the section name.  */
815 	  linkonce = one_only ? ".gnu.linkonce" : "";
816 
817 	  string = ACONCAT ((linkonce, prefix, ".", name, NULL));
818 
819 	  set_decl_section_name (decl, string);
820 	  return;
821 	}
822     }
823   default_unique_section (decl, reloc);
824 }
825 
826 #ifdef COMMON_ASM_OP
827 
828 #ifndef LARGECOMM_SECTION_ASM_OP
829 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
830 #endif
831 
832 /* This says how to output assembler code to declare an
833    uninitialized external linkage data object.
834 
835    For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
836    large objects.  */
837 void
x86_elf_aligned_decl_common(FILE * file,tree decl,const char * name,unsigned HOST_WIDE_INT size,int align)838 x86_elf_aligned_decl_common (FILE *file, tree decl,
839 			const char *name, unsigned HOST_WIDE_INT size,
840 			int align)
841 {
842   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
843       && size > (unsigned int)ix86_section_threshold)
844     {
845       switch_to_section (get_named_section (decl, ".lbss", 0));
846       fputs (LARGECOMM_SECTION_ASM_OP, file);
847     }
848   else
849     fputs (COMMON_ASM_OP, file);
850   assemble_name (file, name);
851   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
852 	   size, align / BITS_PER_UNIT);
853 }
854 #endif
855 
856 /* Utility function for targets to use in implementing
857    ASM_OUTPUT_ALIGNED_BSS.  */
858 
859 void
x86_output_aligned_bss(FILE * file,tree decl,const char * name,unsigned HOST_WIDE_INT size,int align)860 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
861 		       	unsigned HOST_WIDE_INT size, int align)
862 {
863   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
864       && size > (unsigned int)ix86_section_threshold)
865     switch_to_section (get_named_section (decl, ".lbss", 0));
866   else
867     switch_to_section (bss_section);
868   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
869 #ifdef ASM_DECLARE_OBJECT_NAME
870   last_assemble_variable_decl = decl;
871   ASM_DECLARE_OBJECT_NAME (file, name, decl);
872 #else
873   /* Standard thing is just output label for the object.  */
874   ASM_OUTPUT_LABEL (file, name);
875 #endif /* ASM_DECLARE_OBJECT_NAME */
876   ASM_OUTPUT_SKIP (file, size ? size : 1);
877 }
878 
879 /* Decide whether we must probe the stack before any space allocation
880    on this target.  It's essentially TARGET_STACK_PROBE except when
881    -fstack-check causes the stack to be already probed differently.  */
882 
883 bool
ix86_target_stack_probe(void)884 ix86_target_stack_probe (void)
885 {
886   /* Do not probe the stack twice if static stack checking is enabled.  */
887   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
888     return false;
889 
890   return TARGET_STACK_PROBE;
891 }
892 
893 /* Decide whether we can make a sibling call to a function.  DECL is the
894    declaration of the function being targeted by the call and EXP is the
895    CALL_EXPR representing the call.  */
896 
897 static bool
ix86_function_ok_for_sibcall(tree decl,tree exp)898 ix86_function_ok_for_sibcall (tree decl, tree exp)
899 {
900   tree type, decl_or_type;
901   rtx a, b;
902   bool bind_global = decl && !targetm.binds_local_p (decl);
903 
904   if (ix86_function_naked (current_function_decl))
905     return false;
906 
907   /* Sibling call isn't OK if there are no caller-saved registers
908      since all registers must be preserved before return.  */
909   if (cfun->machine->no_caller_saved_registers)
910     return false;
911 
912   /* If we are generating position-independent code, we cannot sibcall
913      optimize direct calls to global functions, as the PLT requires
914      %ebx be live. (Darwin does not have a PLT.)  */
915   if (!TARGET_MACHO
916       && !TARGET_64BIT
917       && flag_pic
918       && flag_plt
919       && bind_global)
920     return false;
921 
922   /* If we need to align the outgoing stack, then sibcalling would
923      unalign the stack, which may break the called function.  */
924   if (ix86_minimum_incoming_stack_boundary (true)
925       < PREFERRED_STACK_BOUNDARY)
926     return false;
927 
928   if (decl)
929     {
930       decl_or_type = decl;
931       type = TREE_TYPE (decl);
932     }
933   else
934     {
935       /* We're looking at the CALL_EXPR, we need the type of the function.  */
936       type = CALL_EXPR_FN (exp);		/* pointer expression */
937       type = TREE_TYPE (type);			/* pointer type */
938       type = TREE_TYPE (type);			/* function type */
939       decl_or_type = type;
940     }
941 
942   /* Check that the return value locations are the same.  Like
943      if we are returning floats on the 80387 register stack, we cannot
944      make a sibcall from a function that doesn't return a float to a
945      function that does or, conversely, from a function that does return
946      a float to a function that doesn't; the necessary stack adjustment
947      would not be executed.  This is also the place we notice
948      differences in the return value ABI.  Note that it is ok for one
949      of the functions to have void return type as long as the return
950      value of the other is passed in a register.  */
951   a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
952   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
953 			   cfun->decl, false);
954   if (STACK_REG_P (a) || STACK_REG_P (b))
955     {
956       if (!rtx_equal_p (a, b))
957 	return false;
958     }
959   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
960     ;
961   else if (!rtx_equal_p (a, b))
962     return false;
963 
964   if (TARGET_64BIT)
965     {
966       /* The SYSV ABI has more call-clobbered registers;
967 	 disallow sibcalls from MS to SYSV.  */
968       if (cfun->machine->call_abi == MS_ABI
969 	  && ix86_function_type_abi (type) == SYSV_ABI)
970 	return false;
971     }
972   else
973     {
974       /* If this call is indirect, we'll need to be able to use a
975 	 call-clobbered register for the address of the target function.
976 	 Make sure that all such registers are not used for passing
977 	 parameters.  Note that DLLIMPORT functions and call to global
978 	 function via GOT slot are indirect.  */
979       if (!decl
980 	  || (bind_global && flag_pic && !flag_plt)
981 	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
982 	  || flag_force_indirect_call)
983 	{
984 	  /* Check if regparm >= 3 since arg_reg_available is set to
985 	     false if regparm == 0.  If regparm is 1 or 2, there is
986 	     always a call-clobbered register available.
987 
988 	     ??? The symbol indirect call doesn't need a call-clobbered
989 	     register.  But we don't know if this is a symbol indirect
990 	     call or not here.  */
991 	  if (ix86_function_regparm (type, decl) >= 3
992 	      && !cfun->machine->arg_reg_available)
993 	    return false;
994 	}
995     }
996 
997   /* Otherwise okay.  That also includes certain types of indirect calls.  */
998   return true;
999 }
1000 
1001 /* This function determines from TYPE the calling-convention.  */
1002 
1003 unsigned int
ix86_get_callcvt(const_tree type)1004 ix86_get_callcvt (const_tree type)
1005 {
1006   unsigned int ret = 0;
1007   bool is_stdarg;
1008   tree attrs;
1009 
1010   if (TARGET_64BIT)
1011     return IX86_CALLCVT_CDECL;
1012 
1013   attrs = TYPE_ATTRIBUTES (type);
1014   if (attrs != NULL_TREE)
1015     {
1016       if (lookup_attribute ("cdecl", attrs))
1017 	ret |= IX86_CALLCVT_CDECL;
1018       else if (lookup_attribute ("stdcall", attrs))
1019 	ret |= IX86_CALLCVT_STDCALL;
1020       else if (lookup_attribute ("fastcall", attrs))
1021 	ret |= IX86_CALLCVT_FASTCALL;
1022       else if (lookup_attribute ("thiscall", attrs))
1023 	ret |= IX86_CALLCVT_THISCALL;
1024 
1025       /* Regparam isn't allowed for thiscall and fastcall.  */
1026       if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1027 	{
1028 	  if (lookup_attribute ("regparm", attrs))
1029 	    ret |= IX86_CALLCVT_REGPARM;
1030 	  if (lookup_attribute ("sseregparm", attrs))
1031 	    ret |= IX86_CALLCVT_SSEREGPARM;
1032 	}
1033 
1034       if (IX86_BASE_CALLCVT(ret) != 0)
1035 	return ret;
1036     }
1037 
1038   is_stdarg = stdarg_p (type);
1039   if (TARGET_RTD && !is_stdarg)
1040     return IX86_CALLCVT_STDCALL | ret;
1041 
1042   if (ret != 0
1043       || is_stdarg
1044       || TREE_CODE (type) != METHOD_TYPE
1045       || ix86_function_type_abi (type) != MS_ABI)
1046     return IX86_CALLCVT_CDECL | ret;
1047 
1048   return IX86_CALLCVT_THISCALL;
1049 }
1050 
1051 /* Return 0 if the attributes for two types are incompatible, 1 if they
1052    are compatible, and 2 if they are nearly compatible (which causes a
1053    warning to be generated).  */
1054 
1055 static int
ix86_comp_type_attributes(const_tree type1,const_tree type2)1056 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1057 {
1058   unsigned int ccvt1, ccvt2;
1059 
1060   if (TREE_CODE (type1) != FUNCTION_TYPE
1061       && TREE_CODE (type1) != METHOD_TYPE)
1062     return 1;
1063 
1064   ccvt1 = ix86_get_callcvt (type1);
1065   ccvt2 = ix86_get_callcvt (type2);
1066   if (ccvt1 != ccvt2)
1067     return 0;
1068   if (ix86_function_regparm (type1, NULL)
1069       != ix86_function_regparm (type2, NULL))
1070     return 0;
1071 
1072   return 1;
1073 }
1074 
1075 /* Return the regparm value for a function with the indicated TYPE and DECL.
1076    DECL may be NULL when calling function indirectly
1077    or considering a libcall.  */
1078 
1079 static int
ix86_function_regparm(const_tree type,const_tree decl)1080 ix86_function_regparm (const_tree type, const_tree decl)
1081 {
1082   tree attr;
1083   int regparm;
1084   unsigned int ccvt;
1085 
1086   if (TARGET_64BIT)
1087     return (ix86_function_type_abi (type) == SYSV_ABI
1088 	    ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1089   ccvt = ix86_get_callcvt (type);
1090   regparm = ix86_regparm;
1091 
1092   if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1093     {
1094       attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1095       if (attr)
1096 	{
1097 	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1098 	  return regparm;
1099 	}
1100     }
1101   else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1102     return 2;
1103   else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1104     return 1;
1105 
1106   /* Use register calling convention for local functions when possible.  */
1107   if (decl
1108       && TREE_CODE (decl) == FUNCTION_DECL)
1109     {
1110       cgraph_node *target = cgraph_node::get (decl);
1111       if (target)
1112 	target = target->function_symbol ();
1113 
1114       /* Caller and callee must agree on the calling convention, so
1115 	 checking here just optimize means that with
1116 	 __attribute__((optimize (...))) caller could use regparm convention
1117 	 and callee not, or vice versa.  Instead look at whether the callee
1118 	 is optimized or not.  */
1119       if (target && opt_for_fn (target->decl, optimize)
1120 	  && !(profile_flag && !flag_fentry))
1121 	{
1122 	  if (target->local && target->can_change_signature)
1123 	    {
1124 	      int local_regparm, globals = 0, regno;
1125 
1126 	      /* Make sure no regparm register is taken by a
1127 		 fixed register variable.  */
1128 	      for (local_regparm = 0; local_regparm < REGPARM_MAX;
1129 		   local_regparm++)
1130 		if (fixed_regs[local_regparm])
1131 		  break;
1132 
1133 	      /* We don't want to use regparm(3) for nested functions as
1134 		 these use a static chain pointer in the third argument.  */
1135 	      if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1136 		local_regparm = 2;
1137 
1138 	      /* Save a register for the split stack.  */
1139 	      if (flag_split_stack)
1140 		{
1141 		  if (local_regparm == 3)
1142 		    local_regparm = 2;
1143 		  else if (local_regparm == 2
1144 			   && DECL_STATIC_CHAIN (target->decl))
1145 		    local_regparm = 1;
1146 		}
1147 
1148 	      /* Each fixed register usage increases register pressure,
1149 		 so less registers should be used for argument passing.
1150 		 This functionality can be overriden by an explicit
1151 		 regparm value.  */
1152 	      for (regno = AX_REG; regno <= DI_REG; regno++)
1153 		if (fixed_regs[regno])
1154 		  globals++;
1155 
1156 	      local_regparm
1157 		= globals < local_regparm ? local_regparm - globals : 0;
1158 
1159 	      if (local_regparm > regparm)
1160 		regparm = local_regparm;
1161 	    }
1162 	}
1163     }
1164 
1165   return regparm;
1166 }
1167 
1168 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1169    DFmode (2) arguments in SSE registers for a function with the
1170    indicated TYPE and DECL.  DECL may be NULL when calling function
1171    indirectly or considering a libcall.  Return -1 if any FP parameter
1172    should be rejected by error.  This is used in siutation we imply SSE
1173    calling convetion but the function is called from another function with
1174    SSE disabled. Otherwise return 0.  */
1175 
1176 static int
ix86_function_sseregparm(const_tree type,const_tree decl,bool warn)1177 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1178 {
1179   gcc_assert (!TARGET_64BIT);
1180 
1181   /* Use SSE registers to pass SFmode and DFmode arguments if requested
1182      by the sseregparm attribute.  */
1183   if (TARGET_SSEREGPARM
1184       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1185     {
1186       if (!TARGET_SSE)
1187 	{
1188 	  if (warn)
1189 	    {
1190 	      if (decl)
1191 		error ("calling %qD with attribute sseregparm without "
1192 		       "SSE/SSE2 enabled", decl);
1193 	      else
1194 		error ("calling %qT with attribute sseregparm without "
1195 		       "SSE/SSE2 enabled", type);
1196 	    }
1197 	  return 0;
1198 	}
1199 
1200       return 2;
1201     }
1202 
1203   if (!decl)
1204     return 0;
1205 
1206   cgraph_node *target = cgraph_node::get (decl);
1207   if (target)
1208     target = target->function_symbol ();
1209 
1210   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1211      (and DFmode for SSE2) arguments in SSE registers.  */
1212   if (target
1213       /* TARGET_SSE_MATH */
1214       && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1215       && opt_for_fn (target->decl, optimize)
1216       && !(profile_flag && !flag_fentry))
1217     {
1218       if (target->local && target->can_change_signature)
1219 	{
1220 	  /* Refuse to produce wrong code when local function with SSE enabled
1221 	     is called from SSE disabled function.
1222 	     FIXME: We need a way to detect these cases cross-ltrans partition
1223 	     and avoid using SSE calling conventions on local functions called
1224 	     from function with SSE disabled.  For now at least delay the
1225 	     warning until we know we are going to produce wrong code.
1226 	     See PR66047  */
1227 	  if (!TARGET_SSE && warn)
1228 	    return -1;
1229 	  return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1230 				->x_ix86_isa_flags) ? 2 : 1;
1231 	}
1232     }
1233 
1234   return 0;
1235 }
1236 
1237 /* Return true if EAX is live at the start of the function.  Used by
1238    ix86_expand_prologue to determine if we need special help before
1239    calling allocate_stack_worker.  */
1240 
1241 static bool
ix86_eax_live_at_start_p(void)1242 ix86_eax_live_at_start_p (void)
1243 {
1244   /* Cheat.  Don't bother working forward from ix86_function_regparm
1245      to the function type to whether an actual argument is located in
1246      eax.  Instead just look at cfg info, which is still close enough
1247      to correct at this point.  This gives false positives for broken
1248      functions that might use uninitialized data that happens to be
1249      allocated in eax, but who cares?  */
1250   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1251 }
1252 
1253 static bool
ix86_keep_aggregate_return_pointer(tree fntype)1254 ix86_keep_aggregate_return_pointer (tree fntype)
1255 {
1256   tree attr;
1257 
1258   if (!TARGET_64BIT)
1259     {
1260       attr = lookup_attribute ("callee_pop_aggregate_return",
1261 			       TYPE_ATTRIBUTES (fntype));
1262       if (attr)
1263 	return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1264 
1265       /* For 32-bit MS-ABI the default is to keep aggregate
1266          return pointer.  */
1267       if (ix86_function_type_abi (fntype) == MS_ABI)
1268 	return true;
1269     }
1270   return KEEP_AGGREGATE_RETURN_POINTER != 0;
1271 }
1272 
1273 /* Value is the number of bytes of arguments automatically
1274    popped when returning from a subroutine call.
1275    FUNDECL is the declaration node of the function (as a tree),
1276    FUNTYPE is the data type of the function (as a tree),
1277    or for a library call it is an identifier node for the subroutine name.
1278    SIZE is the number of bytes of arguments passed on the stack.
1279 
1280    On the 80386, the RTD insn may be used to pop them if the number
1281      of args is fixed, but if the number is variable then the caller
1282      must pop them all.  RTD can't be used for library calls now
1283      because the library is compiled with the Unix compiler.
1284    Use of RTD is a selectable option, since it is incompatible with
1285    standard Unix calling sequences.  If the option is not selected,
1286    the caller must always pop the args.
1287 
1288    The attribute stdcall is equivalent to RTD on a per module basis.  */
1289 
1290 static poly_int64
ix86_return_pops_args(tree fundecl,tree funtype,poly_int64 size)1291 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1292 {
1293   unsigned int ccvt;
1294 
1295   /* None of the 64-bit ABIs pop arguments.  */
1296   if (TARGET_64BIT)
1297     return 0;
1298 
1299   ccvt = ix86_get_callcvt (funtype);
1300 
1301   if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1302 	       | IX86_CALLCVT_THISCALL)) != 0
1303       && ! stdarg_p (funtype))
1304     return size;
1305 
1306   /* Lose any fake structure return argument if it is passed on the stack.  */
1307   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1308       && !ix86_keep_aggregate_return_pointer (funtype))
1309     {
1310       int nregs = ix86_function_regparm (funtype, fundecl);
1311       if (nregs == 0)
1312 	return GET_MODE_SIZE (Pmode);
1313     }
1314 
1315   return 0;
1316 }
1317 
1318 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
1319 
1320 static bool
ix86_legitimate_combined_insn(rtx_insn * insn)1321 ix86_legitimate_combined_insn (rtx_insn *insn)
1322 {
1323   int i;
1324 
1325   /* Check operand constraints in case hard registers were propagated
1326      into insn pattern.  This check prevents combine pass from
1327      generating insn patterns with invalid hard register operands.
1328      These invalid insns can eventually confuse reload to error out
1329      with a spill failure.  See also PRs 46829 and 46843.  */
1330 
1331   gcc_assert (INSN_CODE (insn) >= 0);
1332 
1333   extract_insn (insn);
1334   preprocess_constraints (insn);
1335 
1336   int n_operands = recog_data.n_operands;
1337   int n_alternatives = recog_data.n_alternatives;
1338   for (i = 0; i < n_operands; i++)
1339     {
1340       rtx op = recog_data.operand[i];
1341       machine_mode mode = GET_MODE (op);
1342       const operand_alternative *op_alt;
1343       int offset = 0;
1344       bool win;
1345       int j;
1346 
1347       /* A unary operator may be accepted by the predicate, but it
1348 	 is irrelevant for matching constraints.  */
1349       if (UNARY_P (op))
1350 	op = XEXP (op, 0);
1351 
1352       if (SUBREG_P (op))
1353 	{
1354 	  if (REG_P (SUBREG_REG (op))
1355 	      && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1356 	    offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1357 					  GET_MODE (SUBREG_REG (op)),
1358 					  SUBREG_BYTE (op),
1359 					  GET_MODE (op));
1360 	  op = SUBREG_REG (op);
1361 	}
1362 
1363       if (!(REG_P (op) && HARD_REGISTER_P (op)))
1364 	continue;
1365 
1366       op_alt = recog_op_alt;
1367 
1368       /* Operand has no constraints, anything is OK.  */
1369       win = !n_alternatives;
1370 
1371       alternative_mask preferred = get_preferred_alternatives (insn);
1372       for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1373 	{
1374 	  if (!TEST_BIT (preferred, j))
1375 	    continue;
1376 	  if (op_alt[i].anything_ok
1377 	      || (op_alt[i].matches != -1
1378 		  && operands_match_p
1379 		  (recog_data.operand[i],
1380 		   recog_data.operand[op_alt[i].matches]))
1381 	      || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1382 	    {
1383 	      win = true;
1384 	      break;
1385 	    }
1386 	}
1387 
1388       if (!win)
1389 	return false;
1390     }
1391 
1392   return true;
1393 }
1394 
1395 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
1396 
1397 static unsigned HOST_WIDE_INT
ix86_asan_shadow_offset(void)1398 ix86_asan_shadow_offset (void)
1399 {
1400   return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
1401 				     : HOST_WIDE_INT_C (0x7fff8000))
1402 		     : (HOST_WIDE_INT_1 << X86_32_ASAN_BIT_OFFSET);
1403 }
1404 
1405 /* Argument support functions.  */
1406 
1407 /* Return true when register may be used to pass function parameters.  */
1408 bool
ix86_function_arg_regno_p(int regno)1409 ix86_function_arg_regno_p (int regno)
1410 {
1411   int i;
1412   enum calling_abi call_abi;
1413   const int *parm_regs;
1414 
1415   if (!TARGET_64BIT)
1416     {
1417       if (TARGET_MACHO)
1418         return (regno < REGPARM_MAX
1419                 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1420       else
1421         return (regno < REGPARM_MAX
1422 	        || (TARGET_MMX && MMX_REGNO_P (regno)
1423 	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
1424 	        || (TARGET_SSE && SSE_REGNO_P (regno)
1425 		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
1426     }
1427 
1428   if (TARGET_SSE && SSE_REGNO_P (regno)
1429       && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
1430     return true;
1431 
1432   /* TODO: The function should depend on current function ABI but
1433      builtins.c would need updating then. Therefore we use the
1434      default ABI.  */
1435   call_abi = ix86_cfun_abi ();
1436 
1437   /* RAX is used as hidden argument to va_arg functions.  */
1438   if (call_abi == SYSV_ABI && regno == AX_REG)
1439     return true;
1440 
1441   if (call_abi == MS_ABI)
1442     parm_regs = x86_64_ms_abi_int_parameter_registers;
1443   else
1444     parm_regs = x86_64_int_parameter_registers;
1445 
1446   for (i = 0; i < (call_abi == MS_ABI
1447 		   ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1448     if (regno == parm_regs[i])
1449       return true;
1450   return false;
1451 }
1452 
1453 /* Return if we do not know how to pass ARG solely in registers.  */
1454 
1455 static bool
ix86_must_pass_in_stack(const function_arg_info & arg)1456 ix86_must_pass_in_stack (const function_arg_info &arg)
1457 {
1458   if (must_pass_in_stack_var_size_or_pad (arg))
1459     return true;
1460 
1461   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
1462      The layout_type routine is crafty and tries to trick us into passing
1463      currently unsupported vector types on the stack by using TImode.  */
1464   return (!TARGET_64BIT && arg.mode == TImode
1465 	  && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1466 }
1467 
1468 /* It returns the size, in bytes, of the area reserved for arguments passed
1469    in registers for the function represented by fndecl dependent to the used
1470    abi format.  */
1471 int
ix86_reg_parm_stack_space(const_tree fndecl)1472 ix86_reg_parm_stack_space (const_tree fndecl)
1473 {
1474   enum calling_abi call_abi = SYSV_ABI;
1475   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1476     call_abi = ix86_function_abi (fndecl);
1477   else
1478     call_abi = ix86_function_type_abi (fndecl);
1479   if (TARGET_64BIT && call_abi == MS_ABI)
1480     return 32;
1481   return 0;
1482 }
1483 
1484 /* We add this as a workaround in order to use libc_has_function
1485    hook in i386.md.  */
1486 bool
ix86_libc_has_function(enum function_class fn_class)1487 ix86_libc_has_function (enum function_class fn_class)
1488 {
1489   return targetm.libc_has_function (fn_class);
1490 }
1491 
1492 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1493    specifying the call abi used.  */
1494 enum calling_abi
ix86_function_type_abi(const_tree fntype)1495 ix86_function_type_abi (const_tree fntype)
1496 {
1497   enum calling_abi abi = ix86_abi;
1498 
1499   if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1500     return abi;
1501 
1502   if (abi == SYSV_ABI
1503       && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1504     {
1505       static int warned;
1506       if (TARGET_X32 && !warned)
1507 	{
1508 	  error ("X32 does not support %<ms_abi%> attribute");
1509 	  warned = 1;
1510 	}
1511 
1512       abi = MS_ABI;
1513     }
1514   else if (abi == MS_ABI
1515 	   && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1516     abi = SYSV_ABI;
1517 
1518   return abi;
1519 }
1520 
1521 enum calling_abi
ix86_function_abi(const_tree fndecl)1522 ix86_function_abi (const_tree fndecl)
1523 {
1524   return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1525 }
1526 
1527 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1528    specifying the call abi used.  */
1529 enum calling_abi
ix86_cfun_abi(void)1530 ix86_cfun_abi (void)
1531 {
1532   return cfun ? cfun->machine->call_abi : ix86_abi;
1533 }
1534 
1535 bool
ix86_function_ms_hook_prologue(const_tree fn)1536 ix86_function_ms_hook_prologue (const_tree fn)
1537 {
1538   if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1539     {
1540       if (decl_function_context (fn) != NULL_TREE)
1541 	error_at (DECL_SOURCE_LOCATION (fn),
1542 		  "%<ms_hook_prologue%> attribute is not compatible "
1543 		  "with nested function");
1544       else
1545         return true;
1546     }
1547   return false;
1548 }
1549 
1550 bool
ix86_function_naked(const_tree fn)1551 ix86_function_naked (const_tree fn)
1552 {
1553   if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1554     return true;
1555 
1556   return false;
1557 }
1558 
1559 /* Write the extra assembler code needed to declare a function properly.  */
1560 
1561 void
ix86_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)1562 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
1563 				tree decl)
1564 {
1565   bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1566 
1567   if (is_ms_hook)
1568     {
1569       int i, filler_count = (TARGET_64BIT ? 32 : 16);
1570       unsigned int filler_cc = 0xcccccccc;
1571 
1572       for (i = 0; i < filler_count; i += 4)
1573         fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
1574     }
1575 
1576 #ifdef SUBTARGET_ASM_UNWIND_INIT
1577   SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
1578 #endif
1579 
1580   ASM_OUTPUT_LABEL (asm_out_file, fname);
1581 
1582   /* Output magic byte marker, if hot-patch attribute is set.  */
1583   if (is_ms_hook)
1584     {
1585       if (TARGET_64BIT)
1586 	{
1587 	  /* leaq [%rsp + 0], %rsp  */
1588 	  fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1589 		 asm_out_file);
1590 	}
1591       else
1592 	{
1593           /* movl.s %edi, %edi
1594 	     push   %ebp
1595 	     movl.s %esp, %ebp */
1596 	  fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
1597 	}
1598     }
1599 }
1600 
1601 /* Implementation of call abi switching target hook. Specific to FNDECL
1602    the specific call register sets are set.  See also
1603    ix86_conditional_register_usage for more details.  */
1604 void
ix86_call_abi_override(const_tree fndecl)1605 ix86_call_abi_override (const_tree fndecl)
1606 {
1607   cfun->machine->call_abi = ix86_function_abi (fndecl);
1608 }
1609 
1610 /* Return 1 if pseudo register should be created and used to hold
1611    GOT address for PIC code.  */
1612 bool
ix86_use_pseudo_pic_reg(void)1613 ix86_use_pseudo_pic_reg (void)
1614 {
1615   if ((TARGET_64BIT
1616        && (ix86_cmodel == CM_SMALL_PIC
1617 	   || TARGET_PECOFF))
1618       || !flag_pic)
1619     return false;
1620   return true;
1621 }
1622 
1623 /* Initialize large model PIC register.  */
1624 
1625 static void
ix86_init_large_pic_reg(unsigned int tmp_regno)1626 ix86_init_large_pic_reg (unsigned int tmp_regno)
1627 {
1628   rtx_code_label *label;
1629   rtx tmp_reg;
1630 
1631   gcc_assert (Pmode == DImode);
1632   label = gen_label_rtx ();
1633   emit_label (label);
1634   LABEL_PRESERVE_P (label) = 1;
1635   tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1636   gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1637   emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1638 				label));
1639   emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1640   emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1641   const char *name = LABEL_NAME (label);
1642   PUT_CODE (label, NOTE);
1643   NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1644   NOTE_DELETED_LABEL_NAME (label) = name;
1645 }
1646 
1647 /* Create and initialize PIC register if required.  */
1648 static void
ix86_init_pic_reg(void)1649 ix86_init_pic_reg (void)
1650 {
1651   edge entry_edge;
1652   rtx_insn *seq;
1653 
1654   if (!ix86_use_pseudo_pic_reg ())
1655     return;
1656 
1657   start_sequence ();
1658 
1659   if (TARGET_64BIT)
1660     {
1661       if (ix86_cmodel == CM_LARGE_PIC)
1662 	ix86_init_large_pic_reg (R11_REG);
1663       else
1664 	emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1665     }
1666   else
1667     {
1668       /*  If there is future mcount call in the function it is more profitable
1669 	  to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM.  */
1670       rtx reg = crtl->profile
1671 		? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1672 		: pic_offset_table_rtx;
1673       rtx_insn *insn = emit_insn (gen_set_got (reg));
1674       RTX_FRAME_RELATED_P (insn) = 1;
1675       if (crtl->profile)
1676         emit_move_insn (pic_offset_table_rtx, reg);
1677       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1678     }
1679 
1680   seq = get_insns ();
1681   end_sequence ();
1682 
1683   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1684   insert_insn_on_edge (seq, entry_edge);
1685   commit_one_edge_insertion (entry_edge);
1686 }
1687 
1688 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1689    for a call to a function whose data type is FNTYPE.
1690    For a library call, FNTYPE is 0.  */
1691 
1692 void
init_cumulative_args(CUMULATIVE_ARGS * cum,tree fntype,rtx libname,tree fndecl,int caller)1693 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
1694 		      tree fntype,	/* tree ptr for function decl */
1695 		      rtx libname,	/* SYMBOL_REF of library name or 0 */
1696 		      tree fndecl,
1697 		      int caller)
1698 {
1699   struct cgraph_node *local_info_node = NULL;
1700   struct cgraph_node *target = NULL;
1701 
1702   memset (cum, 0, sizeof (*cum));
1703 
1704   if (fndecl)
1705     {
1706       target = cgraph_node::get (fndecl);
1707       if (target)
1708 	{
1709 	  target = target->function_symbol ();
1710 	  local_info_node = cgraph_node::local_info_node (target->decl);
1711 	  cum->call_abi = ix86_function_abi (target->decl);
1712 	}
1713       else
1714 	cum->call_abi = ix86_function_abi (fndecl);
1715     }
1716   else
1717     cum->call_abi = ix86_function_type_abi (fntype);
1718 
1719   cum->caller = caller;
1720 
1721   /* Set up the number of registers to use for passing arguments.  */
1722   cum->nregs = ix86_regparm;
1723   if (TARGET_64BIT)
1724     {
1725       cum->nregs = (cum->call_abi == SYSV_ABI
1726                    ? X86_64_REGPARM_MAX
1727                    : X86_64_MS_REGPARM_MAX);
1728     }
1729   if (TARGET_SSE)
1730     {
1731       cum->sse_nregs = SSE_REGPARM_MAX;
1732       if (TARGET_64BIT)
1733         {
1734           cum->sse_nregs = (cum->call_abi == SYSV_ABI
1735                            ? X86_64_SSE_REGPARM_MAX
1736                            : X86_64_MS_SSE_REGPARM_MAX);
1737         }
1738     }
1739   if (TARGET_MMX)
1740     cum->mmx_nregs = MMX_REGPARM_MAX;
1741   cum->warn_avx512f = true;
1742   cum->warn_avx = true;
1743   cum->warn_sse = true;
1744   cum->warn_mmx = true;
1745 
1746   /* Because type might mismatch in between caller and callee, we need to
1747      use actual type of function for local calls.
1748      FIXME: cgraph_analyze can be told to actually record if function uses
1749      va_start so for local functions maybe_vaarg can be made aggressive
1750      helping K&R code.
1751      FIXME: once typesytem is fixed, we won't need this code anymore.  */
1752   if (local_info_node && local_info_node->local
1753       && local_info_node->can_change_signature)
1754     fntype = TREE_TYPE (target->decl);
1755   cum->stdarg = stdarg_p (fntype);
1756   cum->maybe_vaarg = (fntype
1757 		      ? (!prototype_p (fntype) || stdarg_p (fntype))
1758 		      : !libname);
1759 
1760   cum->decl = fndecl;
1761 
1762   cum->warn_empty = !warn_abi || cum->stdarg;
1763   if (!cum->warn_empty && fntype)
1764     {
1765       function_args_iterator iter;
1766       tree argtype;
1767       bool seen_empty_type = false;
1768       FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1769 	{
1770 	  if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1771 	    break;
1772 	  if (TYPE_EMPTY_P (argtype))
1773 	    seen_empty_type = true;
1774 	  else if (seen_empty_type)
1775 	    {
1776 	      cum->warn_empty = true;
1777 	      break;
1778 	    }
1779 	}
1780     }
1781 
1782   if (!TARGET_64BIT)
1783     {
1784       /* If there are variable arguments, then we won't pass anything
1785          in registers in 32-bit mode. */
1786       if (stdarg_p (fntype))
1787 	{
1788 	  cum->nregs = 0;
1789 	  /* Since in 32-bit, variable arguments are always passed on
1790 	     stack, there is scratch register available for indirect
1791 	     sibcall.  */
1792 	  cfun->machine->arg_reg_available = true;
1793 	  cum->sse_nregs = 0;
1794 	  cum->mmx_nregs = 0;
1795 	  cum->warn_avx512f = false;
1796 	  cum->warn_avx = false;
1797 	  cum->warn_sse = false;
1798 	  cum->warn_mmx = false;
1799 	  return;
1800 	}
1801 
1802       /* Use ecx and edx registers if function has fastcall attribute,
1803 	 else look for regparm information.  */
1804       if (fntype)
1805 	{
1806 	  unsigned int ccvt = ix86_get_callcvt (fntype);
1807 	  if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1808 	    {
1809 	      cum->nregs = 1;
1810 	      cum->fastcall = 1; /* Same first register as in fastcall.  */
1811 	    }
1812 	  else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1813 	    {
1814 	      cum->nregs = 2;
1815 	      cum->fastcall = 1;
1816 	    }
1817 	  else
1818 	    cum->nregs = ix86_function_regparm (fntype, fndecl);
1819 	}
1820 
1821       /* Set up the number of SSE registers used for passing SFmode
1822 	 and DFmode arguments.  Warn for mismatching ABI.  */
1823       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1824     }
1825 
1826   cfun->machine->arg_reg_available = (cum->nregs > 0);
1827 }
1828 
1829 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
1830    But in the case of vector types, it is some vector mode.
1831 
1832    When we have only some of our vector isa extensions enabled, then there
1833    are some modes for which vector_mode_supported_p is false.  For these
1834    modes, the generic vector support in gcc will choose some non-vector mode
1835    in order to implement the type.  By computing the natural mode, we'll
1836    select the proper ABI location for the operand and not depend on whatever
1837    the middle-end decides to do with these vector types.
1838 
1839    The midde-end can't deal with the vector types > 16 bytes.  In this
1840    case, we return the original mode and warn ABI change if CUM isn't
1841    NULL.
1842 
1843    If INT_RETURN is true, warn ABI change if the vector mode isn't
1844    available for function return value.  */
1845 
1846 static machine_mode
type_natural_mode(const_tree type,const CUMULATIVE_ARGS * cum,bool in_return)1847 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1848 		   bool in_return)
1849 {
1850   machine_mode mode = TYPE_MODE (type);
1851 
1852   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
1853     {
1854       HOST_WIDE_INT size = int_size_in_bytes (type);
1855       if ((size == 8 || size == 16 || size == 32 || size == 64)
1856 	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
1857 	  && TYPE_VECTOR_SUBPARTS (type) > 1)
1858 	{
1859 	  machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1860 
1861 	  /* There are no XFmode vector modes.  */
1862 	  if (innermode == XFmode)
1863 	    return mode;
1864 
1865 	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
1866 	    mode = MIN_MODE_VECTOR_FLOAT;
1867 	  else
1868 	    mode = MIN_MODE_VECTOR_INT;
1869 
1870 	  /* Get the mode which has this inner mode and number of units.  */
1871 	  FOR_EACH_MODE_FROM (mode, mode)
1872 	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1873 		&& GET_MODE_INNER (mode) == innermode)
1874 	      {
1875 		if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1876 		  {
1877 		    static bool warnedavx512f;
1878 		    static bool warnedavx512f_ret;
1879 
1880 		    if (cum && cum->warn_avx512f && !warnedavx512f)
1881 		      {
1882 			if (warning (OPT_Wpsabi, "AVX512F vector argument "
1883 				     "without AVX512F enabled changes the ABI"))
1884 			  warnedavx512f = true;
1885 		      }
1886 		    else if (in_return && !warnedavx512f_ret)
1887 		      {
1888 			if (warning (OPT_Wpsabi, "AVX512F vector return "
1889 				     "without AVX512F enabled changes the ABI"))
1890 			  warnedavx512f_ret = true;
1891 		      }
1892 
1893 		    return TYPE_MODE (type);
1894 		  }
1895 		else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1896 		  {
1897 		    static bool warnedavx;
1898 		    static bool warnedavx_ret;
1899 
1900 		    if (cum && cum->warn_avx && !warnedavx)
1901 		      {
1902 			if (warning (OPT_Wpsabi, "AVX vector argument "
1903 				     "without AVX enabled changes the ABI"))
1904 			  warnedavx = true;
1905 		      }
1906 		    else if (in_return && !warnedavx_ret)
1907 		      {
1908 			if (warning (OPT_Wpsabi, "AVX vector return "
1909 				     "without AVX enabled changes the ABI"))
1910 			  warnedavx_ret = true;
1911 		      }
1912 
1913 		    return TYPE_MODE (type);
1914 		  }
1915 		else if (((size == 8 && TARGET_64BIT) || size == 16)
1916 			 && !TARGET_SSE
1917 			 && !TARGET_IAMCU)
1918 		  {
1919 		    static bool warnedsse;
1920 		    static bool warnedsse_ret;
1921 
1922 		    if (cum && cum->warn_sse && !warnedsse)
1923 		      {
1924 			if (warning (OPT_Wpsabi, "SSE vector argument "
1925 				     "without SSE enabled changes the ABI"))
1926 			  warnedsse = true;
1927 		      }
1928 		    else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1929 		      {
1930 			if (warning (OPT_Wpsabi, "SSE vector return "
1931 				     "without SSE enabled changes the ABI"))
1932 			  warnedsse_ret = true;
1933 		      }
1934 		  }
1935 		else if ((size == 8 && !TARGET_64BIT)
1936 			 && (!cfun
1937 			     || cfun->machine->func_type == TYPE_NORMAL)
1938 			 && !TARGET_MMX
1939 			 && !TARGET_IAMCU)
1940 		  {
1941 		    static bool warnedmmx;
1942 		    static bool warnedmmx_ret;
1943 
1944 		    if (cum && cum->warn_mmx && !warnedmmx)
1945 		      {
1946 			if (warning (OPT_Wpsabi, "MMX vector argument "
1947 				     "without MMX enabled changes the ABI"))
1948 			  warnedmmx = true;
1949 		      }
1950 		    else if (in_return && !warnedmmx_ret)
1951 		      {
1952 			if (warning (OPT_Wpsabi, "MMX vector return "
1953 				     "without MMX enabled changes the ABI"))
1954 			  warnedmmx_ret = true;
1955 		      }
1956 		  }
1957 		return mode;
1958 	      }
1959 
1960 	  gcc_unreachable ();
1961 	}
1962     }
1963 
1964   return mode;
1965 }
1966 
1967 /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
1968    this may not agree with the mode that the type system has chosen for the
1969    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
1970    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
1971 
1972 static rtx
gen_reg_or_parallel(machine_mode mode,machine_mode orig_mode,unsigned int regno)1973 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
1974 		     unsigned int regno)
1975 {
1976   rtx tmp;
1977 
1978   if (orig_mode != BLKmode)
1979     tmp = gen_rtx_REG (orig_mode, regno);
1980   else
1981     {
1982       tmp = gen_rtx_REG (mode, regno);
1983       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
1984       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
1985     }
1986 
1987   return tmp;
1988 }
1989 
1990 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
1991    of this code is to classify each 8bytes of incoming argument by the register
1992    class and assign registers accordingly.  */
1993 
1994 /* Return the union class of CLASS1 and CLASS2.
1995    See the x86-64 PS ABI for details.  */
1996 
1997 static enum x86_64_reg_class
merge_classes(enum x86_64_reg_class class1,enum x86_64_reg_class class2)1998 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1999 {
2000   /* Rule #1: If both classes are equal, this is the resulting class.  */
2001   if (class1 == class2)
2002     return class1;
2003 
2004   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2005      the other class.  */
2006   if (class1 == X86_64_NO_CLASS)
2007     return class2;
2008   if (class2 == X86_64_NO_CLASS)
2009     return class1;
2010 
2011   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
2012   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2013     return X86_64_MEMORY_CLASS;
2014 
2015   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
2016   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2017       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2018     return X86_64_INTEGERSI_CLASS;
2019   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2020       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2021     return X86_64_INTEGER_CLASS;
2022 
2023   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2024      MEMORY is used.  */
2025   if (class1 == X86_64_X87_CLASS
2026       || class1 == X86_64_X87UP_CLASS
2027       || class1 == X86_64_COMPLEX_X87_CLASS
2028       || class2 == X86_64_X87_CLASS
2029       || class2 == X86_64_X87UP_CLASS
2030       || class2 == X86_64_COMPLEX_X87_CLASS)
2031     return X86_64_MEMORY_CLASS;
2032 
2033   /* Rule #6: Otherwise class SSE is used.  */
2034   return X86_64_SSE_CLASS;
2035 }
2036 
2037 /* Classify the argument of type TYPE and mode MODE.
2038    CLASSES will be filled by the register class used to pass each word
2039    of the operand.  The number of words is returned.  In case the parameter
2040    should be passed in memory, 0 is returned. As a special case for zero
2041    sized containers, classes[0] will be NO_CLASS and 1 is returned.
2042 
2043    BIT_OFFSET is used internally for handling records and specifies offset
2044    of the offset in bits modulo 512 to avoid overflow cases.
2045 
2046    See the x86-64 PS ABI for details.
2047 */
2048 
2049 static int
classify_argument(machine_mode mode,const_tree type,enum x86_64_reg_class classes[MAX_CLASSES],int bit_offset)2050 classify_argument (machine_mode mode, const_tree type,
2051 		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2052 {
2053   HOST_WIDE_INT bytes
2054     = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2055   int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2056 
2057   /* Variable sized entities are always passed/returned in memory.  */
2058   if (bytes < 0)
2059     return 0;
2060 
2061   if (mode != VOIDmode)
2062     {
2063       /* The value of "named" doesn't matter.  */
2064       function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2065       if (targetm.calls.must_pass_in_stack (arg))
2066 	return 0;
2067     }
2068 
2069   if (type && AGGREGATE_TYPE_P (type))
2070     {
2071       int i;
2072       tree field;
2073       enum x86_64_reg_class subclasses[MAX_CLASSES];
2074 
2075       /* On x86-64 we pass structures larger than 64 bytes on the stack.  */
2076       if (bytes > 64)
2077 	return 0;
2078 
2079       for (i = 0; i < words; i++)
2080 	classes[i] = X86_64_NO_CLASS;
2081 
2082       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2083 	 signalize memory class, so handle it as special case.  */
2084       if (!words)
2085 	{
2086 	  classes[0] = X86_64_NO_CLASS;
2087 	  return 1;
2088 	}
2089 
2090       /* Classify each field of record and merge classes.  */
2091       switch (TREE_CODE (type))
2092 	{
2093 	case RECORD_TYPE:
2094 	  /* And now merge the fields of structure.  */
2095 	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2096 	    {
2097 	      if (TREE_CODE (field) == FIELD_DECL)
2098 		{
2099 		  int num;
2100 
2101 		  if (TREE_TYPE (field) == error_mark_node)
2102 		    continue;
2103 
2104 		  /* Bitfields are always classified as integer.  Handle them
2105 		     early, since later code would consider them to be
2106 		     misaligned integers.  */
2107 		  if (DECL_BIT_FIELD (field))
2108 		    {
2109 		      for (i = (int_bit_position (field)
2110 				+ (bit_offset % 64)) / 8 / 8;
2111 			   i < ((int_bit_position (field) + (bit_offset % 64))
2112 			        + tree_to_shwi (DECL_SIZE (field))
2113 				+ 63) / 8 / 8; i++)
2114 			classes[i]
2115 			  = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2116 		    }
2117 		  else
2118 		    {
2119 		      int pos;
2120 
2121 		      type = TREE_TYPE (field);
2122 
2123 		      /* Flexible array member is ignored.  */
2124 		      if (TYPE_MODE (type) == BLKmode
2125 			  && TREE_CODE (type) == ARRAY_TYPE
2126 			  && TYPE_SIZE (type) == NULL_TREE
2127 			  && TYPE_DOMAIN (type) != NULL_TREE
2128 			  && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2129 			      == NULL_TREE))
2130 			{
2131 			  static bool warned;
2132 
2133 			  if (!warned && warn_psabi)
2134 			    {
2135 			      warned = true;
2136 			      inform (input_location,
2137 				      "the ABI of passing struct with"
2138 				      " a flexible array member has"
2139 				      " changed in GCC 4.4");
2140 			    }
2141 			  continue;
2142 			}
2143 		      num = classify_argument (TYPE_MODE (type), type,
2144 					       subclasses,
2145 					       (int_bit_position (field)
2146 						+ bit_offset) % 512);
2147 		      if (!num)
2148 			return 0;
2149 		      pos = (int_bit_position (field)
2150 			     + (bit_offset % 64)) / 8 / 8;
2151 		      for (i = 0; i < num && (i + pos) < words; i++)
2152 			classes[i + pos]
2153 			  = merge_classes (subclasses[i], classes[i + pos]);
2154 		    }
2155 		}
2156 	    }
2157 	  break;
2158 
2159 	case ARRAY_TYPE:
2160 	  /* Arrays are handled as small records.  */
2161 	  {
2162 	    int num;
2163 	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2164 				     TREE_TYPE (type), subclasses, bit_offset);
2165 	    if (!num)
2166 	      return 0;
2167 
2168 	    /* The partial classes are now full classes.  */
2169 	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2170 	      subclasses[0] = X86_64_SSE_CLASS;
2171 	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
2172 		&& !((bit_offset % 64) == 0 && bytes == 4))
2173 	      subclasses[0] = X86_64_INTEGER_CLASS;
2174 
2175 	    for (i = 0; i < words; i++)
2176 	      classes[i] = subclasses[i % num];
2177 
2178 	    break;
2179 	  }
2180 	case UNION_TYPE:
2181 	case QUAL_UNION_TYPE:
2182 	  /* Unions are similar to RECORD_TYPE but offset is always 0.
2183 	     */
2184 	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2185 	    {
2186 	      if (TREE_CODE (field) == FIELD_DECL)
2187 		{
2188 		  int num;
2189 
2190 		  if (TREE_TYPE (field) == error_mark_node)
2191 		    continue;
2192 
2193 		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2194 					   TREE_TYPE (field), subclasses,
2195 					   bit_offset);
2196 		  if (!num)
2197 		    return 0;
2198 		  for (i = 0; i < num && i < words; i++)
2199 		    classes[i] = merge_classes (subclasses[i], classes[i]);
2200 		}
2201 	    }
2202 	  break;
2203 
2204 	default:
2205 	  gcc_unreachable ();
2206 	}
2207 
2208       if (words > 2)
2209 	{
2210 	  /* When size > 16 bytes, if the first one isn't
2211 	     X86_64_SSE_CLASS or any other ones aren't
2212 	     X86_64_SSEUP_CLASS, everything should be passed in
2213 	     memory.  */
2214 	  if (classes[0] != X86_64_SSE_CLASS)
2215 	      return 0;
2216 
2217 	  for (i = 1; i < words; i++)
2218 	    if (classes[i] != X86_64_SSEUP_CLASS)
2219 	      return 0;
2220 	}
2221 
2222       /* Final merger cleanup.  */
2223       for (i = 0; i < words; i++)
2224 	{
2225 	  /* If one class is MEMORY, everything should be passed in
2226 	     memory.  */
2227 	  if (classes[i] == X86_64_MEMORY_CLASS)
2228 	    return 0;
2229 
2230 	  /* The X86_64_SSEUP_CLASS should be always preceded by
2231 	     X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
2232 	  if (classes[i] == X86_64_SSEUP_CLASS
2233 	      && classes[i - 1] != X86_64_SSE_CLASS
2234 	      && classes[i - 1] != X86_64_SSEUP_CLASS)
2235 	    {
2236 	      /* The first one should never be X86_64_SSEUP_CLASS.  */
2237 	      gcc_assert (i != 0);
2238 	      classes[i] = X86_64_SSE_CLASS;
2239 	    }
2240 
2241 	  /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2242 	       everything should be passed in memory.  */
2243 	  if (classes[i] == X86_64_X87UP_CLASS
2244 	      && (classes[i - 1] != X86_64_X87_CLASS))
2245 	    {
2246 	      static bool warned;
2247 
2248 	      /* The first one should never be X86_64_X87UP_CLASS.  */
2249 	      gcc_assert (i != 0);
2250 	      if (!warned && warn_psabi)
2251 		{
2252 		  warned = true;
2253 		  inform (input_location,
2254 			  "the ABI of passing union with %<long double%>"
2255 			  " has changed in GCC 4.4");
2256 		}
2257 	      return 0;
2258 	    }
2259 	}
2260       return words;
2261     }
2262 
2263   /* Compute alignment needed.  We align all types to natural boundaries with
2264      exception of XFmode that is aligned to 64bits.  */
2265   if (mode != VOIDmode && mode != BLKmode)
2266     {
2267       int mode_alignment = GET_MODE_BITSIZE (mode);
2268 
2269       if (mode == XFmode)
2270 	mode_alignment = 128;
2271       else if (mode == XCmode)
2272 	mode_alignment = 256;
2273       if (COMPLEX_MODE_P (mode))
2274 	mode_alignment /= 2;
2275       /* Misaligned fields are always returned in memory.  */
2276       if (bit_offset % mode_alignment)
2277 	return 0;
2278     }
2279 
2280   /* for V1xx modes, just use the base mode */
2281   if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2282       && GET_MODE_UNIT_SIZE (mode) == bytes)
2283     mode = GET_MODE_INNER (mode);
2284 
2285   /* Classification of atomic types.  */
2286   switch (mode)
2287     {
2288     case E_SDmode:
2289     case E_DDmode:
2290       classes[0] = X86_64_SSE_CLASS;
2291       return 1;
2292     case E_TDmode:
2293       classes[0] = X86_64_SSE_CLASS;
2294       classes[1] = X86_64_SSEUP_CLASS;
2295       return 2;
2296     case E_DImode:
2297     case E_SImode:
2298     case E_HImode:
2299     case E_QImode:
2300     case E_CSImode:
2301     case E_CHImode:
2302     case E_CQImode:
2303       {
2304 	int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2305 
2306 	/* Analyze last 128 bits only.  */
2307 	size = (size - 1) & 0x7f;
2308 
2309 	if (size < 32)
2310 	  {
2311 	    classes[0] = X86_64_INTEGERSI_CLASS;
2312 	    return 1;
2313 	  }
2314 	else if (size < 64)
2315 	  {
2316 	    classes[0] = X86_64_INTEGER_CLASS;
2317 	    return 1;
2318 	  }
2319 	else if (size < 64+32)
2320 	  {
2321 	    classes[0] = X86_64_INTEGER_CLASS;
2322 	    classes[1] = X86_64_INTEGERSI_CLASS;
2323 	    return 2;
2324 	  }
2325 	else if (size < 64+64)
2326 	  {
2327 	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2328 	    return 2;
2329 	  }
2330 	else
2331 	  gcc_unreachable ();
2332       }
2333     case E_CDImode:
2334     case E_TImode:
2335       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2336       return 2;
2337     case E_COImode:
2338     case E_OImode:
2339       /* OImode shouldn't be used directly.  */
2340       gcc_unreachable ();
2341     case E_CTImode:
2342       return 0;
2343     case E_SFmode:
2344       if (!(bit_offset % 64))
2345 	classes[0] = X86_64_SSESF_CLASS;
2346       else
2347 	classes[0] = X86_64_SSE_CLASS;
2348       return 1;
2349     case E_DFmode:
2350       classes[0] = X86_64_SSEDF_CLASS;
2351       return 1;
2352     case E_XFmode:
2353       classes[0] = X86_64_X87_CLASS;
2354       classes[1] = X86_64_X87UP_CLASS;
2355       return 2;
2356     case E_TFmode:
2357       classes[0] = X86_64_SSE_CLASS;
2358       classes[1] = X86_64_SSEUP_CLASS;
2359       return 2;
2360     case E_SCmode:
2361       classes[0] = X86_64_SSE_CLASS;
2362       if (!(bit_offset % 64))
2363 	return 1;
2364       else
2365 	{
2366 	  static bool warned;
2367 
2368 	  if (!warned && warn_psabi)
2369 	    {
2370 	      warned = true;
2371 	      inform (input_location,
2372 		      "the ABI of passing structure with %<complex float%>"
2373 		      " member has changed in GCC 4.4");
2374 	    }
2375 	  classes[1] = X86_64_SSESF_CLASS;
2376 	  return 2;
2377 	}
2378     case E_DCmode:
2379       classes[0] = X86_64_SSEDF_CLASS;
2380       classes[1] = X86_64_SSEDF_CLASS;
2381       return 2;
2382     case E_XCmode:
2383       classes[0] = X86_64_COMPLEX_X87_CLASS;
2384       return 1;
2385     case E_TCmode:
2386       /* This modes is larger than 16 bytes.  */
2387       return 0;
2388     case E_V8SFmode:
2389     case E_V8SImode:
2390     case E_V32QImode:
2391     case E_V16HImode:
2392     case E_V4DFmode:
2393     case E_V4DImode:
2394       classes[0] = X86_64_SSE_CLASS;
2395       classes[1] = X86_64_SSEUP_CLASS;
2396       classes[2] = X86_64_SSEUP_CLASS;
2397       classes[3] = X86_64_SSEUP_CLASS;
2398       return 4;
2399     case E_V8DFmode:
2400     case E_V16SFmode:
2401     case E_V8DImode:
2402     case E_V16SImode:
2403     case E_V32HImode:
2404     case E_V64QImode:
2405       classes[0] = X86_64_SSE_CLASS;
2406       classes[1] = X86_64_SSEUP_CLASS;
2407       classes[2] = X86_64_SSEUP_CLASS;
2408       classes[3] = X86_64_SSEUP_CLASS;
2409       classes[4] = X86_64_SSEUP_CLASS;
2410       classes[5] = X86_64_SSEUP_CLASS;
2411       classes[6] = X86_64_SSEUP_CLASS;
2412       classes[7] = X86_64_SSEUP_CLASS;
2413       return 8;
2414     case E_V4SFmode:
2415     case E_V4SImode:
2416     case E_V16QImode:
2417     case E_V8HImode:
2418     case E_V2DFmode:
2419     case E_V2DImode:
2420       classes[0] = X86_64_SSE_CLASS;
2421       classes[1] = X86_64_SSEUP_CLASS;
2422       return 2;
2423     case E_V1TImode:
2424     case E_V1DImode:
2425     case E_V2SFmode:
2426     case E_V2SImode:
2427     case E_V4HImode:
2428     case E_V8QImode:
2429       classes[0] = X86_64_SSE_CLASS;
2430       return 1;
2431     case E_BLKmode:
2432     case E_VOIDmode:
2433       return 0;
2434     default:
2435       gcc_assert (VECTOR_MODE_P (mode));
2436 
2437       if (bytes > 16)
2438 	return 0;
2439 
2440       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2441 
2442       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2443 	classes[0] = X86_64_INTEGERSI_CLASS;
2444       else
2445 	classes[0] = X86_64_INTEGER_CLASS;
2446       classes[1] = X86_64_INTEGER_CLASS;
2447       return 1 + (bytes > 8);
2448     }
2449 }
2450 
2451 /* Examine the argument and return set number of register required in each
2452    class.  Return true iff parameter should be passed in memory.  */
2453 
2454 static bool
examine_argument(machine_mode mode,const_tree type,int in_return,int * int_nregs,int * sse_nregs)2455 examine_argument (machine_mode mode, const_tree type, int in_return,
2456 		  int *int_nregs, int *sse_nregs)
2457 {
2458   enum x86_64_reg_class regclass[MAX_CLASSES];
2459   int n = classify_argument (mode, type, regclass, 0);
2460 
2461   *int_nregs = 0;
2462   *sse_nregs = 0;
2463 
2464   if (!n)
2465     return true;
2466   for (n--; n >= 0; n--)
2467     switch (regclass[n])
2468       {
2469       case X86_64_INTEGER_CLASS:
2470       case X86_64_INTEGERSI_CLASS:
2471 	(*int_nregs)++;
2472 	break;
2473       case X86_64_SSE_CLASS:
2474       case X86_64_SSESF_CLASS:
2475       case X86_64_SSEDF_CLASS:
2476 	(*sse_nregs)++;
2477 	break;
2478       case X86_64_NO_CLASS:
2479       case X86_64_SSEUP_CLASS:
2480 	break;
2481       case X86_64_X87_CLASS:
2482       case X86_64_X87UP_CLASS:
2483       case X86_64_COMPLEX_X87_CLASS:
2484 	if (!in_return)
2485 	  return true;
2486 	break;
2487       case X86_64_MEMORY_CLASS:
2488 	gcc_unreachable ();
2489       }
2490 
2491   return false;
2492 }
2493 
2494 /* Construct container for the argument used by GCC interface.  See
2495    FUNCTION_ARG for the detailed description.  */
2496 
2497 static rtx
construct_container(machine_mode mode,machine_mode orig_mode,const_tree type,int in_return,int nintregs,int nsseregs,const int * intreg,int sse_regno)2498 construct_container (machine_mode mode, machine_mode orig_mode,
2499 		     const_tree type, int in_return, int nintregs, int nsseregs,
2500 		     const int *intreg, int sse_regno)
2501 {
2502   /* The following variables hold the static issued_error state.  */
2503   static bool issued_sse_arg_error;
2504   static bool issued_sse_ret_error;
2505   static bool issued_x87_ret_error;
2506 
2507   machine_mode tmpmode;
2508   int bytes
2509     = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2510   enum x86_64_reg_class regclass[MAX_CLASSES];
2511   int n;
2512   int i;
2513   int nexps = 0;
2514   int needed_sseregs, needed_intregs;
2515   rtx exp[MAX_CLASSES];
2516   rtx ret;
2517 
2518   n = classify_argument (mode, type, regclass, 0);
2519   if (!n)
2520     return NULL;
2521   if (examine_argument (mode, type, in_return, &needed_intregs,
2522 			&needed_sseregs))
2523     return NULL;
2524   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2525     return NULL;
2526 
2527   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
2528      some less clueful developer tries to use floating-point anyway.  */
2529   if (needed_sseregs && !TARGET_SSE)
2530     {
2531       if (in_return)
2532 	{
2533 	  if (!issued_sse_ret_error)
2534 	    {
2535 	      error ("SSE register return with SSE disabled");
2536 	      issued_sse_ret_error = true;
2537 	    }
2538 	}
2539       else if (!issued_sse_arg_error)
2540 	{
2541 	  error ("SSE register argument with SSE disabled");
2542 	  issued_sse_arg_error = true;
2543 	}
2544       return NULL;
2545     }
2546 
2547   /* Likewise, error if the ABI requires us to return values in the
2548      x87 registers and the user specified -mno-80387.  */
2549   if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2550     for (i = 0; i < n; i++)
2551       if (regclass[i] == X86_64_X87_CLASS
2552 	  || regclass[i] == X86_64_X87UP_CLASS
2553 	  || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2554 	{
2555 	  if (!issued_x87_ret_error)
2556 	    {
2557 	      error ("x87 register return with x87 disabled");
2558 	      issued_x87_ret_error = true;
2559 	    }
2560 	  return NULL;
2561 	}
2562 
2563   /* First construct simple cases.  Avoid SCmode, since we want to use
2564      single register to pass this type.  */
2565   if (n == 1 && mode != SCmode)
2566     switch (regclass[0])
2567       {
2568       case X86_64_INTEGER_CLASS:
2569       case X86_64_INTEGERSI_CLASS:
2570 	return gen_rtx_REG (mode, intreg[0]);
2571       case X86_64_SSE_CLASS:
2572       case X86_64_SSESF_CLASS:
2573       case X86_64_SSEDF_CLASS:
2574 	if (mode != BLKmode)
2575 	  return gen_reg_or_parallel (mode, orig_mode,
2576 				      GET_SSE_REGNO (sse_regno));
2577 	break;
2578       case X86_64_X87_CLASS:
2579       case X86_64_COMPLEX_X87_CLASS:
2580 	return gen_rtx_REG (mode, FIRST_STACK_REG);
2581       case X86_64_NO_CLASS:
2582 	/* Zero sized array, struct or class.  */
2583 	return NULL;
2584       default:
2585 	gcc_unreachable ();
2586       }
2587   if (n == 2
2588       && regclass[0] == X86_64_SSE_CLASS
2589       && regclass[1] == X86_64_SSEUP_CLASS
2590       && mode != BLKmode)
2591     return gen_reg_or_parallel (mode, orig_mode,
2592 				GET_SSE_REGNO (sse_regno));
2593   if (n == 4
2594       && regclass[0] == X86_64_SSE_CLASS
2595       && regclass[1] == X86_64_SSEUP_CLASS
2596       && regclass[2] == X86_64_SSEUP_CLASS
2597       && regclass[3] == X86_64_SSEUP_CLASS
2598       && mode != BLKmode)
2599     return gen_reg_or_parallel (mode, orig_mode,
2600 				GET_SSE_REGNO (sse_regno));
2601   if (n == 8
2602       && regclass[0] == X86_64_SSE_CLASS
2603       && regclass[1] == X86_64_SSEUP_CLASS
2604       && regclass[2] == X86_64_SSEUP_CLASS
2605       && regclass[3] == X86_64_SSEUP_CLASS
2606       && regclass[4] == X86_64_SSEUP_CLASS
2607       && regclass[5] == X86_64_SSEUP_CLASS
2608       && regclass[6] == X86_64_SSEUP_CLASS
2609       && regclass[7] == X86_64_SSEUP_CLASS
2610       && mode != BLKmode)
2611     return gen_reg_or_parallel (mode, orig_mode,
2612 				GET_SSE_REGNO (sse_regno));
2613   if (n == 2
2614       && regclass[0] == X86_64_X87_CLASS
2615       && regclass[1] == X86_64_X87UP_CLASS)
2616     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2617 
2618   if (n == 2
2619       && regclass[0] == X86_64_INTEGER_CLASS
2620       && regclass[1] == X86_64_INTEGER_CLASS
2621       && (mode == CDImode || mode == TImode || mode == BLKmode)
2622       && intreg[0] + 1 == intreg[1])
2623     {
2624       if (mode == BLKmode)
2625 	{
2626 	  /* Use TImode for BLKmode values in 2 integer registers.  */
2627 	  exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2628 				      gen_rtx_REG (TImode, intreg[0]),
2629 				      GEN_INT (0));
2630 	  ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2631 	  XVECEXP (ret, 0, 0) = exp[0];
2632 	  return ret;
2633 	}
2634       else
2635 	return gen_rtx_REG (mode, intreg[0]);
2636     }
2637 
2638   /* Otherwise figure out the entries of the PARALLEL.  */
2639   for (i = 0; i < n; i++)
2640     {
2641       int pos;
2642 
2643       switch (regclass[i])
2644         {
2645 	  case X86_64_NO_CLASS:
2646 	    break;
2647 	  case X86_64_INTEGER_CLASS:
2648 	  case X86_64_INTEGERSI_CLASS:
2649 	    /* Merge TImodes on aligned occasions here too.  */
2650 	    if (i * 8 + 8 > bytes)
2651 	      {
2652 		unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2653 		if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2654 		  /* We've requested 24 bytes we
2655 		     don't have mode for.  Use DImode.  */
2656 		  tmpmode = DImode;
2657 	      }
2658 	    else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2659 	      tmpmode = SImode;
2660 	    else
2661 	      tmpmode = DImode;
2662 	    exp [nexps++]
2663 	      = gen_rtx_EXPR_LIST (VOIDmode,
2664 				   gen_rtx_REG (tmpmode, *intreg),
2665 				   GEN_INT (i*8));
2666 	    intreg++;
2667 	    break;
2668 	  case X86_64_SSESF_CLASS:
2669 	    exp [nexps++]
2670 	      = gen_rtx_EXPR_LIST (VOIDmode,
2671 				   gen_rtx_REG (SFmode,
2672 						GET_SSE_REGNO (sse_regno)),
2673 				   GEN_INT (i*8));
2674 	    sse_regno++;
2675 	    break;
2676 	  case X86_64_SSEDF_CLASS:
2677 	    exp [nexps++]
2678 	      = gen_rtx_EXPR_LIST (VOIDmode,
2679 				   gen_rtx_REG (DFmode,
2680 						GET_SSE_REGNO (sse_regno)),
2681 				   GEN_INT (i*8));
2682 	    sse_regno++;
2683 	    break;
2684 	  case X86_64_SSE_CLASS:
2685 	    pos = i;
2686 	    switch (n)
2687 	      {
2688 	      case 1:
2689 		tmpmode = DImode;
2690 		break;
2691 	      case 2:
2692 		if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2693 		  {
2694 		    tmpmode = TImode;
2695 		    i++;
2696 		  }
2697 		else
2698 		  tmpmode = DImode;
2699 		break;
2700 	      case 4:
2701 		gcc_assert (i == 0
2702 			    && regclass[1] == X86_64_SSEUP_CLASS
2703 			    && regclass[2] == X86_64_SSEUP_CLASS
2704 			    && regclass[3] == X86_64_SSEUP_CLASS);
2705 		tmpmode = OImode;
2706 		i += 3;
2707 		break;
2708 	      case 8:
2709 		gcc_assert (i == 0
2710 			    && regclass[1] == X86_64_SSEUP_CLASS
2711 			    && regclass[2] == X86_64_SSEUP_CLASS
2712 			    && regclass[3] == X86_64_SSEUP_CLASS
2713 			    && regclass[4] == X86_64_SSEUP_CLASS
2714 			    && regclass[5] == X86_64_SSEUP_CLASS
2715 			    && regclass[6] == X86_64_SSEUP_CLASS
2716 			    && regclass[7] == X86_64_SSEUP_CLASS);
2717 		tmpmode = XImode;
2718 		i += 7;
2719 		break;
2720 	      default:
2721 		gcc_unreachable ();
2722 	      }
2723 	    exp [nexps++]
2724 	      = gen_rtx_EXPR_LIST (VOIDmode,
2725 				   gen_rtx_REG (tmpmode,
2726 						GET_SSE_REGNO (sse_regno)),
2727 				   GEN_INT (pos*8));
2728 	    sse_regno++;
2729 	    break;
2730 	  default:
2731 	    gcc_unreachable ();
2732 	}
2733     }
2734 
2735   /* Empty aligned struct, union or class.  */
2736   if (nexps == 0)
2737     return NULL;
2738 
2739   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2740   for (i = 0; i < nexps; i++)
2741     XVECEXP (ret, 0, i) = exp [i];
2742   return ret;
2743 }
2744 
2745 /* Update the data in CUM to advance over an argument of mode MODE
2746    and data type TYPE.  (TYPE is null for libcalls where that information
2747    may not be available.)
2748 
2749    Return a number of integer regsiters advanced over.  */
2750 
2751 static int
function_arg_advance_32(CUMULATIVE_ARGS * cum,machine_mode mode,const_tree type,HOST_WIDE_INT bytes,HOST_WIDE_INT words)2752 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2753 			 const_tree type, HOST_WIDE_INT bytes,
2754 			 HOST_WIDE_INT words)
2755 {
2756   int res = 0;
2757   bool error_p = false;
2758 
2759   if (TARGET_IAMCU)
2760     {
2761       /* Intel MCU psABI passes scalars and aggregates no larger than 8
2762 	 bytes in registers.  */
2763       if (!VECTOR_MODE_P (mode) && bytes <= 8)
2764 	goto pass_in_reg;
2765       return res;
2766     }
2767 
2768   switch (mode)
2769     {
2770     default:
2771       break;
2772 
2773     case E_BLKmode:
2774       if (bytes < 0)
2775 	break;
2776       /* FALLTHRU */
2777 
2778     case E_DImode:
2779     case E_SImode:
2780     case E_HImode:
2781     case E_QImode:
2782 pass_in_reg:
2783       cum->words += words;
2784       cum->nregs -= words;
2785       cum->regno += words;
2786       if (cum->nregs >= 0)
2787 	res = words;
2788       if (cum->nregs <= 0)
2789 	{
2790 	  cum->nregs = 0;
2791 	  cfun->machine->arg_reg_available = false;
2792 	  cum->regno = 0;
2793 	}
2794       break;
2795 
2796     case E_OImode:
2797       /* OImode shouldn't be used directly.  */
2798       gcc_unreachable ();
2799 
2800     case E_DFmode:
2801       if (cum->float_in_sse == -1)
2802 	error_p = true;
2803       if (cum->float_in_sse < 2)
2804 	break;
2805       /* FALLTHRU */
2806     case E_SFmode:
2807       if (cum->float_in_sse == -1)
2808 	error_p = true;
2809       if (cum->float_in_sse < 1)
2810 	break;
2811       /* FALLTHRU */
2812 
2813     case E_V8SFmode:
2814     case E_V8SImode:
2815     case E_V64QImode:
2816     case E_V32HImode:
2817     case E_V16SImode:
2818     case E_V8DImode:
2819     case E_V16SFmode:
2820     case E_V8DFmode:
2821     case E_V32QImode:
2822     case E_V16HImode:
2823     case E_V4DFmode:
2824     case E_V4DImode:
2825     case E_TImode:
2826     case E_V16QImode:
2827     case E_V8HImode:
2828     case E_V4SImode:
2829     case E_V2DImode:
2830     case E_V4SFmode:
2831     case E_V2DFmode:
2832       if (!type || !AGGREGATE_TYPE_P (type))
2833 	{
2834 	  cum->sse_words += words;
2835 	  cum->sse_nregs -= 1;
2836 	  cum->sse_regno += 1;
2837 	  if (cum->sse_nregs <= 0)
2838 	    {
2839 	      cum->sse_nregs = 0;
2840 	      cum->sse_regno = 0;
2841 	    }
2842 	}
2843       break;
2844 
2845     case E_V8QImode:
2846     case E_V4HImode:
2847     case E_V2SImode:
2848     case E_V2SFmode:
2849     case E_V1TImode:
2850     case E_V1DImode:
2851       if (!type || !AGGREGATE_TYPE_P (type))
2852 	{
2853 	  cum->mmx_words += words;
2854 	  cum->mmx_nregs -= 1;
2855 	  cum->mmx_regno += 1;
2856 	  if (cum->mmx_nregs <= 0)
2857 	    {
2858 	      cum->mmx_nregs = 0;
2859 	      cum->mmx_regno = 0;
2860 	    }
2861 	}
2862       break;
2863     }
2864   if (error_p)
2865     {
2866       cum->float_in_sse = 0;
2867       error ("calling %qD with SSE calling convention without "
2868 	     "SSE/SSE2 enabled", cum->decl);
2869       sorry ("this is a GCC bug that can be worked around by adding "
2870 	     "attribute used to function called");
2871     }
2872 
2873   return res;
2874 }
2875 
2876 static int
function_arg_advance_64(CUMULATIVE_ARGS * cum,machine_mode mode,const_tree type,HOST_WIDE_INT words,bool named)2877 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
2878 			 const_tree type, HOST_WIDE_INT words, bool named)
2879 {
2880   int int_nregs, sse_nregs;
2881 
2882   /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
2883   if (!named && (VALID_AVX512F_REG_MODE (mode)
2884 		 || VALID_AVX256_REG_MODE (mode)))
2885     return 0;
2886 
2887   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
2888       && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2889     {
2890       cum->nregs -= int_nregs;
2891       cum->sse_nregs -= sse_nregs;
2892       cum->regno += int_nregs;
2893       cum->sse_regno += sse_nregs;
2894       return int_nregs;
2895     }
2896   else
2897     {
2898       int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
2899       cum->words = ROUND_UP (cum->words, align);
2900       cum->words += words;
2901       return 0;
2902     }
2903 }
2904 
2905 static int
function_arg_advance_ms_64(CUMULATIVE_ARGS * cum,HOST_WIDE_INT bytes,HOST_WIDE_INT words)2906 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
2907 			    HOST_WIDE_INT words)
2908 {
2909   /* Otherwise, this should be passed indirect.  */
2910   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
2911 
2912   cum->words += words;
2913   if (cum->nregs > 0)
2914     {
2915       cum->nregs -= 1;
2916       cum->regno += 1;
2917       return 1;
2918     }
2919   return 0;
2920 }
2921 
2922 /* Update the data in CUM to advance over argument ARG.  */
2923 
2924 static void
ix86_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)2925 ix86_function_arg_advance (cumulative_args_t cum_v,
2926 			   const function_arg_info &arg)
2927 {
2928   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2929   machine_mode mode = arg.mode;
2930   HOST_WIDE_INT bytes, words;
2931   int nregs;
2932 
2933   /* The argument of interrupt handler is a special case and is
2934      handled in ix86_function_arg.  */
2935   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
2936     return;
2937 
2938   bytes = arg.promoted_size_in_bytes ();
2939   words = CEIL (bytes, UNITS_PER_WORD);
2940 
2941   if (arg.type)
2942     mode = type_natural_mode (arg.type, NULL, false);
2943 
2944   if (TARGET_64BIT)
2945     {
2946       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
2947 
2948       if (call_abi == MS_ABI)
2949 	nregs = function_arg_advance_ms_64 (cum, bytes, words);
2950       else
2951 	nregs = function_arg_advance_64 (cum, mode, arg.type, words,
2952 					 arg.named);
2953     }
2954   else
2955     nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
2956 
2957   if (!nregs)
2958     {
2959       /* Track if there are outgoing arguments on stack.  */
2960       if (cum->caller)
2961 	cfun->machine->outgoing_args_on_stack = true;
2962     }
2963 }
2964 
2965 /* Define where to put the arguments to a function.
2966    Value is zero to push the argument on the stack,
2967    or a hard register in which to store the argument.
2968 
2969    MODE is the argument's machine mode.
2970    TYPE is the data type of the argument (as a tree).
2971     This is null for libcalls where that information may
2972     not be available.
2973    CUM is a variable of type CUMULATIVE_ARGS which gives info about
2974     the preceding args and about the function being called.
2975    NAMED is nonzero if this argument is a named parameter
2976     (otherwise it is an extra parameter matching an ellipsis).  */
2977 
2978 static rtx
function_arg_32(CUMULATIVE_ARGS * cum,machine_mode mode,machine_mode orig_mode,const_tree type,HOST_WIDE_INT bytes,HOST_WIDE_INT words)2979 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2980 		 machine_mode orig_mode, const_tree type,
2981 		 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
2982 {
2983   bool error_p = false;
2984 
2985   /* Avoid the AL settings for the Unix64 ABI.  */
2986   if (mode == VOIDmode)
2987     return constm1_rtx;
2988 
2989   if (TARGET_IAMCU)
2990     {
2991       /* Intel MCU psABI passes scalars and aggregates no larger than 8
2992 	 bytes in registers.  */
2993       if (!VECTOR_MODE_P (mode) && bytes <= 8)
2994 	goto pass_in_reg;
2995       return NULL_RTX;
2996     }
2997 
2998   switch (mode)
2999     {
3000     default:
3001       break;
3002 
3003     case E_BLKmode:
3004       if (bytes < 0)
3005 	break;
3006       /* FALLTHRU */
3007     case E_DImode:
3008     case E_SImode:
3009     case E_HImode:
3010     case E_QImode:
3011 pass_in_reg:
3012       if (words <= cum->nregs)
3013 	{
3014 	  int regno = cum->regno;
3015 
3016 	  /* Fastcall allocates the first two DWORD (SImode) or
3017             smaller arguments to ECX and EDX if it isn't an
3018             aggregate type .  */
3019 	  if (cum->fastcall)
3020 	    {
3021 	      if (mode == BLKmode
3022 		  || mode == DImode
3023 		  || (type && AGGREGATE_TYPE_P (type)))
3024 	        break;
3025 
3026 	      /* ECX not EAX is the first allocated register.  */
3027 	      if (regno == AX_REG)
3028 		regno = CX_REG;
3029 	    }
3030 	  return gen_rtx_REG (mode, regno);
3031 	}
3032       break;
3033 
3034     case E_DFmode:
3035       if (cum->float_in_sse == -1)
3036 	error_p = true;
3037       if (cum->float_in_sse < 2)
3038 	break;
3039       /* FALLTHRU */
3040     case E_SFmode:
3041       if (cum->float_in_sse == -1)
3042 	error_p = true;
3043       if (cum->float_in_sse < 1)
3044 	break;
3045       /* FALLTHRU */
3046     case E_TImode:
3047       /* In 32bit, we pass TImode in xmm registers.  */
3048     case E_V16QImode:
3049     case E_V8HImode:
3050     case E_V4SImode:
3051     case E_V2DImode:
3052     case E_V4SFmode:
3053     case E_V2DFmode:
3054       if (!type || !AGGREGATE_TYPE_P (type))
3055 	{
3056 	  if (cum->sse_nregs)
3057 	    return gen_reg_or_parallel (mode, orig_mode,
3058 				        cum->sse_regno + FIRST_SSE_REG);
3059 	}
3060       break;
3061 
3062     case E_OImode:
3063     case E_XImode:
3064       /* OImode and XImode shouldn't be used directly.  */
3065       gcc_unreachable ();
3066 
3067     case E_V64QImode:
3068     case E_V32HImode:
3069     case E_V16SImode:
3070     case E_V8DImode:
3071     case E_V16SFmode:
3072     case E_V8DFmode:
3073     case E_V8SFmode:
3074     case E_V8SImode:
3075     case E_V32QImode:
3076     case E_V16HImode:
3077     case E_V4DFmode:
3078     case E_V4DImode:
3079       if (!type || !AGGREGATE_TYPE_P (type))
3080 	{
3081 	  if (cum->sse_nregs)
3082 	    return gen_reg_or_parallel (mode, orig_mode,
3083 				        cum->sse_regno + FIRST_SSE_REG);
3084 	}
3085       break;
3086 
3087     case E_V8QImode:
3088     case E_V4HImode:
3089     case E_V2SImode:
3090     case E_V2SFmode:
3091     case E_V1TImode:
3092     case E_V1DImode:
3093       if (!type || !AGGREGATE_TYPE_P (type))
3094 	{
3095 	  if (cum->mmx_nregs)
3096 	    return gen_reg_or_parallel (mode, orig_mode,
3097 				        cum->mmx_regno + FIRST_MMX_REG);
3098 	}
3099       break;
3100     }
3101   if (error_p)
3102     {
3103       cum->float_in_sse = 0;
3104       error ("calling %qD with SSE calling convention without "
3105 	     "SSE/SSE2 enabled", cum->decl);
3106       sorry ("this is a GCC bug that can be worked around by adding "
3107 	     "attribute used to function called");
3108     }
3109 
3110   return NULL_RTX;
3111 }
3112 
3113 static rtx
function_arg_64(const CUMULATIVE_ARGS * cum,machine_mode mode,machine_mode orig_mode,const_tree type,bool named)3114 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3115 		 machine_mode orig_mode, const_tree type, bool named)
3116 {
3117   /* Handle a hidden AL argument containing number of registers
3118      for varargs x86-64 functions.  */
3119   if (mode == VOIDmode)
3120     return GEN_INT (cum->maybe_vaarg
3121 		    ? (cum->sse_nregs < 0
3122 		       ? X86_64_SSE_REGPARM_MAX
3123 		       : cum->sse_regno)
3124 		    : -1);
3125 
3126   switch (mode)
3127     {
3128     default:
3129       break;
3130 
3131     case E_V8SFmode:
3132     case E_V8SImode:
3133     case E_V32QImode:
3134     case E_V16HImode:
3135     case E_V4DFmode:
3136     case E_V4DImode:
3137     case E_V16SFmode:
3138     case E_V16SImode:
3139     case E_V64QImode:
3140     case E_V32HImode:
3141     case E_V8DFmode:
3142     case E_V8DImode:
3143       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
3144       if (!named)
3145 	return NULL;
3146       break;
3147     }
3148 
3149   return construct_container (mode, orig_mode, type, 0, cum->nregs,
3150 			      cum->sse_nregs,
3151 			      &x86_64_int_parameter_registers [cum->regno],
3152 			      cum->sse_regno);
3153 }
3154 
3155 static rtx
function_arg_ms_64(const CUMULATIVE_ARGS * cum,machine_mode mode,machine_mode orig_mode,bool named,const_tree type,HOST_WIDE_INT bytes)3156 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3157 		    machine_mode orig_mode, bool named, const_tree type,
3158 		    HOST_WIDE_INT bytes)
3159 {
3160   unsigned int regno;
3161 
3162   /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3163      We use value of -2 to specify that current function call is MSABI.  */
3164   if (mode == VOIDmode)
3165     return GEN_INT (-2);
3166 
3167   /* If we've run out of registers, it goes on the stack.  */
3168   if (cum->nregs == 0)
3169     return NULL_RTX;
3170 
3171   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3172 
3173   /* Only floating point modes are passed in anything but integer regs.  */
3174   if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3175     {
3176       if (named)
3177 	{
3178 	  if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3179 	    regno = cum->regno + FIRST_SSE_REG;
3180 	}
3181       else
3182 	{
3183 	  rtx t1, t2;
3184 
3185 	  /* Unnamed floating parameters are passed in both the
3186 	     SSE and integer registers.  */
3187 	  t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3188 	  t2 = gen_rtx_REG (mode, regno);
3189 	  t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3190 	  t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3191 	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3192 	}
3193     }
3194   /* Handle aggregated types passed in register.  */
3195   if (orig_mode == BLKmode)
3196     {
3197       if (bytes > 0 && bytes <= 8)
3198         mode = (bytes > 4 ? DImode : SImode);
3199       if (mode == BLKmode)
3200         mode = DImode;
3201     }
3202 
3203   return gen_reg_or_parallel (mode, orig_mode, regno);
3204 }
3205 
3206 /* Return where to put the arguments to a function.
3207    Return zero to push the argument on the stack, or a hard register in which to store the argument.
3208 
3209    ARG describes the argument while CUM gives information about the
3210    preceding args and about the function being called.  */
3211 
3212 static rtx
ix86_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)3213 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3214 {
3215   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3216   machine_mode mode = arg.mode;
3217   HOST_WIDE_INT bytes, words;
3218   rtx reg;
3219 
3220   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3221     {
3222       gcc_assert (arg.type != NULL_TREE);
3223       if (POINTER_TYPE_P (arg.type))
3224 	{
3225 	  /* This is the pointer argument.  */
3226 	  gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3227 	  /* It is at -WORD(AP) in the current frame in interrupt and
3228 	     exception handlers.  */
3229 	  reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3230 	}
3231       else
3232 	{
3233 	  gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3234 		      && TREE_CODE (arg.type) == INTEGER_TYPE
3235 		      && TYPE_MODE (arg.type) == word_mode);
3236 	  /* The error code is the word-mode integer argument at
3237 	     -2 * WORD(AP) in the current frame of the exception
3238 	     handler.  */
3239 	  reg = gen_rtx_MEM (word_mode,
3240 			     plus_constant (Pmode,
3241 					    arg_pointer_rtx,
3242 					    -2 * UNITS_PER_WORD));
3243 	}
3244       return reg;
3245     }
3246 
3247   bytes = arg.promoted_size_in_bytes ();
3248   words = CEIL (bytes, UNITS_PER_WORD);
3249 
3250   /* To simplify the code below, represent vector types with a vector mode
3251      even if MMX/SSE are not active.  */
3252   if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE)
3253     mode = type_natural_mode (arg.type, cum, false);
3254 
3255   if (TARGET_64BIT)
3256     {
3257       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3258 
3259       if (call_abi == MS_ABI)
3260 	reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3261 				  arg.type, bytes);
3262       else
3263 	reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3264     }
3265   else
3266     reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3267 
3268   /* Track if there are outgoing arguments on stack.  */
3269   if (reg == NULL_RTX && cum->caller)
3270     cfun->machine->outgoing_args_on_stack = true;
3271 
3272   return reg;
3273 }
3274 
3275 /* A C expression that indicates when an argument must be passed by
3276    reference.  If nonzero for an argument, a copy of that argument is
3277    made in memory and a pointer to the argument is passed instead of
3278    the argument itself.  The pointer is passed in whatever way is
3279    appropriate for passing a pointer to that type.  */
3280 
3281 static bool
ix86_pass_by_reference(cumulative_args_t cum_v,const function_arg_info & arg)3282 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3283 {
3284   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3285 
3286   if (TARGET_64BIT)
3287     {
3288       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3289 
3290       /* See Windows x64 Software Convention.  */
3291       if (call_abi == MS_ABI)
3292 	{
3293 	  HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3294 
3295 	  if (tree type = arg.type)
3296 	    {
3297 	      /* Arrays are passed by reference.  */
3298 	      if (TREE_CODE (type) == ARRAY_TYPE)
3299 		return true;
3300 
3301 	      if (RECORD_OR_UNION_TYPE_P (type))
3302 		{
3303 		  /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3304 		     are passed by reference.  */
3305 		  msize = int_size_in_bytes (type);
3306 		}
3307 	    }
3308 
3309 	  /* __m128 is passed by reference.  */
3310 	  return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3311 	}
3312       else if (arg.type && int_size_in_bytes (arg.type) == -1)
3313 	return true;
3314     }
3315 
3316   return false;
3317 }
3318 
3319 /* Return true when TYPE should be 128bit aligned for 32bit argument
3320    passing ABI.  XXX: This function is obsolete and is only used for
3321    checking psABI compatibility with previous versions of GCC.  */
3322 
3323 static bool
ix86_compat_aligned_value_p(const_tree type)3324 ix86_compat_aligned_value_p (const_tree type)
3325 {
3326   machine_mode mode = TYPE_MODE (type);
3327   if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3328        || mode == TDmode
3329        || mode == TFmode
3330        || mode == TCmode)
3331       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3332     return true;
3333   if (TYPE_ALIGN (type) < 128)
3334     return false;
3335 
3336   if (AGGREGATE_TYPE_P (type))
3337     {
3338       /* Walk the aggregates recursively.  */
3339       switch (TREE_CODE (type))
3340 	{
3341 	case RECORD_TYPE:
3342 	case UNION_TYPE:
3343 	case QUAL_UNION_TYPE:
3344 	  {
3345 	    tree field;
3346 
3347 	    /* Walk all the structure fields.  */
3348 	    for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3349 	      {
3350 		if (TREE_CODE (field) == FIELD_DECL
3351 		    && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3352 		  return true;
3353 	      }
3354 	    break;
3355 	  }
3356 
3357 	case ARRAY_TYPE:
3358 	  /* Just for use if some languages passes arrays by value.  */
3359 	  if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3360 	    return true;
3361 	  break;
3362 
3363 	default:
3364 	  gcc_unreachable ();
3365 	}
3366     }
3367   return false;
3368 }
3369 
3370 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3371    XXX: This function is obsolete and is only used for checking psABI
3372    compatibility with previous versions of GCC.  */
3373 
3374 static unsigned int
ix86_compat_function_arg_boundary(machine_mode mode,const_tree type,unsigned int align)3375 ix86_compat_function_arg_boundary (machine_mode mode,
3376 				   const_tree type, unsigned int align)
3377 {
3378   /* In 32bit, only _Decimal128 and __float128 are aligned to their
3379      natural boundaries.  */
3380   if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3381     {
3382       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
3383 	 make an exception for SSE modes since these require 128bit
3384 	 alignment.
3385 
3386 	 The handling here differs from field_alignment.  ICC aligns MMX
3387 	 arguments to 4 byte boundaries, while structure fields are aligned
3388 	 to 8 byte boundaries.  */
3389       if (!type)
3390 	{
3391 	  if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3392 	    align = PARM_BOUNDARY;
3393 	}
3394       else
3395 	{
3396 	  if (!ix86_compat_aligned_value_p (type))
3397 	    align = PARM_BOUNDARY;
3398 	}
3399     }
3400   if (align > BIGGEST_ALIGNMENT)
3401     align = BIGGEST_ALIGNMENT;
3402   return align;
3403 }
3404 
3405 /* Return true when TYPE should be 128bit aligned for 32bit argument
3406    passing ABI.  */
3407 
3408 static bool
ix86_contains_aligned_value_p(const_tree type)3409 ix86_contains_aligned_value_p (const_tree type)
3410 {
3411   machine_mode mode = TYPE_MODE (type);
3412 
3413   if (mode == XFmode || mode == XCmode)
3414     return false;
3415 
3416   if (TYPE_ALIGN (type) < 128)
3417     return false;
3418 
3419   if (AGGREGATE_TYPE_P (type))
3420     {
3421       /* Walk the aggregates recursively.  */
3422       switch (TREE_CODE (type))
3423 	{
3424 	case RECORD_TYPE:
3425 	case UNION_TYPE:
3426 	case QUAL_UNION_TYPE:
3427 	  {
3428 	    tree field;
3429 
3430 	    /* Walk all the structure fields.  */
3431 	    for (field = TYPE_FIELDS (type);
3432 		 field;
3433 		 field = DECL_CHAIN (field))
3434 	      {
3435 		if (TREE_CODE (field) == FIELD_DECL
3436 		    && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3437 		  return true;
3438 	      }
3439 	    break;
3440 	  }
3441 
3442 	case ARRAY_TYPE:
3443 	  /* Just for use if some languages passes arrays by value.  */
3444 	  if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3445 	    return true;
3446 	  break;
3447 
3448 	default:
3449 	  gcc_unreachable ();
3450 	}
3451     }
3452   else
3453     return TYPE_ALIGN (type) >= 128;
3454 
3455   return false;
3456 }
3457 
3458 /* Gives the alignment boundary, in bits, of an argument with the
3459    specified mode and type.  */
3460 
3461 static unsigned int
ix86_function_arg_boundary(machine_mode mode,const_tree type)3462 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3463 {
3464   unsigned int align;
3465   if (type)
3466     {
3467       /* Since the main variant type is used for call, we convert it to
3468 	 the main variant type.  */
3469       type = TYPE_MAIN_VARIANT (type);
3470       align = TYPE_ALIGN (type);
3471       if (TYPE_EMPTY_P (type))
3472 	return PARM_BOUNDARY;
3473     }
3474   else
3475     align = GET_MODE_ALIGNMENT (mode);
3476   if (align < PARM_BOUNDARY)
3477     align = PARM_BOUNDARY;
3478   else
3479     {
3480       static bool warned;
3481       unsigned int saved_align = align;
3482 
3483       if (!TARGET_64BIT)
3484 	{
3485 	  /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
3486 	  if (!type)
3487 	    {
3488 	      if (mode == XFmode || mode == XCmode)
3489 		align = PARM_BOUNDARY;
3490 	    }
3491 	  else if (!ix86_contains_aligned_value_p (type))
3492 	    align = PARM_BOUNDARY;
3493 
3494 	  if (align < 128)
3495 	    align = PARM_BOUNDARY;
3496 	}
3497 
3498       if (warn_psabi
3499 	  && !warned
3500 	  && align != ix86_compat_function_arg_boundary (mode, type,
3501 							 saved_align))
3502 	{
3503 	  warned = true;
3504 	  inform (input_location,
3505 		  "the ABI for passing parameters with %d-byte"
3506 		  " alignment has changed in GCC 4.6",
3507 		  align / BITS_PER_UNIT);
3508 	}
3509     }
3510 
3511   return align;
3512 }
3513 
3514 /* Return true if N is a possible register number of function value.  */
3515 
3516 static bool
ix86_function_value_regno_p(const unsigned int regno)3517 ix86_function_value_regno_p (const unsigned int regno)
3518 {
3519   switch (regno)
3520     {
3521     case AX_REG:
3522       return true;
3523     case DX_REG:
3524       return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3525     case DI_REG:
3526     case SI_REG:
3527       return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3528 
3529       /* Complex values are returned in %st(0)/%st(1) pair.  */
3530     case ST0_REG:
3531     case ST1_REG:
3532       /* TODO: The function should depend on current function ABI but
3533        builtins.c would need updating then. Therefore we use the
3534        default ABI.  */
3535       if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3536 	return false;
3537       return TARGET_FLOAT_RETURNS_IN_80387;
3538 
3539       /* Complex values are returned in %xmm0/%xmm1 pair.  */
3540     case XMM0_REG:
3541     case XMM1_REG:
3542       return TARGET_SSE;
3543 
3544     case MM0_REG:
3545       if (TARGET_MACHO || TARGET_64BIT)
3546 	return false;
3547       return TARGET_MMX;
3548     }
3549 
3550   return false;
3551 }
3552 
3553 /* Define how to find the value returned by a function.
3554    VALTYPE is the data type of the value (as a tree).
3555    If the precise function being called is known, FUNC is its FUNCTION_DECL;
3556    otherwise, FUNC is 0.  */
3557 
3558 static rtx
function_value_32(machine_mode orig_mode,machine_mode mode,const_tree fntype,const_tree fn)3559 function_value_32 (machine_mode orig_mode, machine_mode mode,
3560 		   const_tree fntype, const_tree fn)
3561 {
3562   unsigned int regno;
3563 
3564   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3565      we normally prevent this case when mmx is not available.  However
3566      some ABIs may require the result to be returned like DImode.  */
3567   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3568     regno = FIRST_MMX_REG;
3569 
3570   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
3571      we prevent this case when sse is not available.  However some ABIs
3572      may require the result to be returned like integer TImode.  */
3573   else if (mode == TImode
3574 	   || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3575     regno = FIRST_SSE_REG;
3576 
3577   /* 32-byte vector modes in %ymm0.   */
3578   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
3579     regno = FIRST_SSE_REG;
3580 
3581   /* 64-byte vector modes in %zmm0.   */
3582   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
3583     regno = FIRST_SSE_REG;
3584 
3585   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
3586   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
3587     regno = FIRST_FLOAT_REG;
3588   else
3589     /* Most things go in %eax.  */
3590     regno = AX_REG;
3591 
3592   /* Override FP return register with %xmm0 for local functions when
3593      SSE math is enabled or for functions with sseregparm attribute.  */
3594   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
3595     {
3596       int sse_level = ix86_function_sseregparm (fntype, fn, false);
3597       if (sse_level == -1)
3598 	{
3599 	  error ("calling %qD with SSE calling convention without "
3600 		 "SSE/SSE2 enabled", fn);
3601 	  sorry ("this is a GCC bug that can be worked around by adding "
3602 		 "attribute used to function called");
3603 	}
3604       else if ((sse_level >= 1 && mode == SFmode)
3605 	       || (sse_level == 2 && mode == DFmode))
3606 	regno = FIRST_SSE_REG;
3607     }
3608 
3609   /* OImode shouldn't be used directly.  */
3610   gcc_assert (mode != OImode);
3611 
3612   return gen_rtx_REG (orig_mode, regno);
3613 }
3614 
3615 static rtx
function_value_64(machine_mode orig_mode,machine_mode mode,const_tree valtype)3616 function_value_64 (machine_mode orig_mode, machine_mode mode,
3617 		   const_tree valtype)
3618 {
3619   rtx ret;
3620 
3621   /* Handle libcalls, which don't provide a type node.  */
3622   if (valtype == NULL)
3623     {
3624       unsigned int regno;
3625 
3626       switch (mode)
3627 	{
3628 	case E_SFmode:
3629 	case E_SCmode:
3630 	case E_DFmode:
3631 	case E_DCmode:
3632 	case E_TFmode:
3633 	case E_SDmode:
3634 	case E_DDmode:
3635 	case E_TDmode:
3636 	  regno = FIRST_SSE_REG;
3637 	  break;
3638 	case E_XFmode:
3639 	case E_XCmode:
3640 	  regno = FIRST_FLOAT_REG;
3641 	  break;
3642 	case E_TCmode:
3643 	  return NULL;
3644 	default:
3645 	  regno = AX_REG;
3646 	}
3647 
3648       return gen_rtx_REG (mode, regno);
3649     }
3650   else if (POINTER_TYPE_P (valtype))
3651     {
3652       /* Pointers are always returned in word_mode.  */
3653       mode = word_mode;
3654     }
3655 
3656   ret = construct_container (mode, orig_mode, valtype, 1,
3657 			     X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
3658 			     x86_64_int_return_registers, 0);
3659 
3660   /* For zero sized structures, construct_container returns NULL, but we
3661      need to keep rest of compiler happy by returning meaningful value.  */
3662   if (!ret)
3663     ret = gen_rtx_REG (orig_mode, AX_REG);
3664 
3665   return ret;
3666 }
3667 
3668 static rtx
function_value_ms_32(machine_mode orig_mode,machine_mode mode,const_tree fntype,const_tree fn,const_tree valtype)3669 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
3670 		      const_tree fntype, const_tree fn, const_tree valtype)
3671 {
3672   unsigned int regno;
3673 
3674   /* Floating point return values in %st(0)
3675      (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes).  */
3676   if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
3677 	   && (GET_MODE_SIZE (mode) > 8
3678 	       || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
3679   {
3680     regno = FIRST_FLOAT_REG;
3681     return gen_rtx_REG (orig_mode, regno);
3682   }
3683   else
3684     return function_value_32(orig_mode, mode, fntype,fn);
3685 }
3686 
3687 static rtx
function_value_ms_64(machine_mode orig_mode,machine_mode mode,const_tree valtype)3688 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
3689 		      const_tree valtype)
3690 {
3691   unsigned int regno = AX_REG;
3692 
3693   if (TARGET_SSE)
3694     {
3695       switch (GET_MODE_SIZE (mode))
3696 	{
3697 	case 16:
3698 	  if (valtype != NULL_TREE
3699 	      && !VECTOR_INTEGER_TYPE_P (valtype)
3700 	      && !VECTOR_INTEGER_TYPE_P (valtype)
3701 	      && !INTEGRAL_TYPE_P (valtype)
3702 	      && !VECTOR_FLOAT_TYPE_P (valtype))
3703 	    break;
3704 	  if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3705 	      && !COMPLEX_MODE_P (mode))
3706 	    regno = FIRST_SSE_REG;
3707 	  break;
3708 	case 8:
3709 	case 4:
3710 	  if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
3711 	    break;
3712 	  if (mode == SFmode || mode == DFmode)
3713 	    regno = FIRST_SSE_REG;
3714 	  break;
3715 	default:
3716 	  break;
3717         }
3718     }
3719   return gen_rtx_REG (orig_mode, regno);
3720 }
3721 
3722 static rtx
ix86_function_value_1(const_tree valtype,const_tree fntype_or_decl,machine_mode orig_mode,machine_mode mode)3723 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
3724 		       machine_mode orig_mode, machine_mode mode)
3725 {
3726   const_tree fn, fntype;
3727 
3728   fn = NULL_TREE;
3729   if (fntype_or_decl && DECL_P (fntype_or_decl))
3730     fn = fntype_or_decl;
3731   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3732 
3733   if (ix86_function_type_abi (fntype) == MS_ABI)
3734     {
3735       if (TARGET_64BIT)
3736 	return function_value_ms_64 (orig_mode, mode, valtype);
3737       else
3738 	return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
3739     }
3740   else if (TARGET_64BIT)
3741     return function_value_64 (orig_mode, mode, valtype);
3742   else
3743     return function_value_32 (orig_mode, mode, fntype, fn);
3744 }
3745 
3746 static rtx
ix86_function_value(const_tree valtype,const_tree fntype_or_decl,bool)3747 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
3748 {
3749   machine_mode mode, orig_mode;
3750 
3751   orig_mode = TYPE_MODE (valtype);
3752   mode = type_natural_mode (valtype, NULL, true);
3753   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
3754 }
3755 
3756 /* Pointer function arguments and return values are promoted to
3757    word_mode for normal functions.  */
3758 
3759 static machine_mode
ix86_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype,int for_return)3760 ix86_promote_function_mode (const_tree type, machine_mode mode,
3761 			    int *punsignedp, const_tree fntype,
3762 			    int for_return)
3763 {
3764   if (cfun->machine->func_type == TYPE_NORMAL
3765       && type != NULL_TREE
3766       && POINTER_TYPE_P (type))
3767     {
3768       *punsignedp = POINTERS_EXTEND_UNSIGNED;
3769       return word_mode;
3770     }
3771   return default_promote_function_mode (type, mode, punsignedp, fntype,
3772 					for_return);
3773 }
3774 
3775 /* Return true if a structure, union or array with MODE containing FIELD
3776    should be accessed using BLKmode.  */
3777 
3778 static bool
ix86_member_type_forces_blk(const_tree field,machine_mode mode)3779 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
3780 {
3781   /* Union with XFmode must be in BLKmode.  */
3782   return (mode == XFmode
3783 	  && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
3784 	      || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
3785 }
3786 
3787 rtx
ix86_libcall_value(machine_mode mode)3788 ix86_libcall_value (machine_mode mode)
3789 {
3790   return ix86_function_value_1 (NULL, NULL, mode, mode);
3791 }
3792 
3793 /* Return true iff type is returned in memory.  */
3794 
3795 static bool
ix86_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)3796 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3797 {
3798 #ifdef SUBTARGET_RETURN_IN_MEMORY
3799   return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
3800 #else
3801   const machine_mode mode = type_natural_mode (type, NULL, true);
3802   HOST_WIDE_INT size;
3803 
3804   if (TARGET_64BIT)
3805     {
3806       if (ix86_function_type_abi (fntype) == MS_ABI)
3807 	{
3808 	  size = int_size_in_bytes (type);
3809 
3810 	  /* __m128 is returned in xmm0.  */
3811 	  if ((!type || VECTOR_INTEGER_TYPE_P (type)
3812 	       || INTEGRAL_TYPE_P (type)
3813 	       || VECTOR_FLOAT_TYPE_P (type))
3814 	      && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3815 	      && !COMPLEX_MODE_P (mode)
3816 	      && (GET_MODE_SIZE (mode) == 16 || size == 16))
3817 	    return false;
3818 
3819 	  /* Otherwise, the size must be exactly in [1248]. */
3820 	  return size != 1 && size != 2 && size != 4 && size != 8;
3821 	}
3822       else
3823 	{
3824 	  int needed_intregs, needed_sseregs;
3825 
3826 	  return examine_argument (mode, type, 1,
3827 				   &needed_intregs, &needed_sseregs);
3828 	}
3829     }
3830   else
3831     {
3832       size = int_size_in_bytes (type);
3833 
3834       /* Intel MCU psABI returns scalars and aggregates no larger than 8
3835 	 bytes in registers.  */
3836       if (TARGET_IAMCU)
3837 	return VECTOR_MODE_P (mode) || size < 0 || size > 8;
3838 
3839       if (mode == BLKmode)
3840 	return true;
3841 
3842       if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3843 	return false;
3844 
3845       if (VECTOR_MODE_P (mode) || mode == TImode)
3846 	{
3847 	  /* User-created vectors small enough to fit in EAX.  */
3848 	  if (size < 8)
3849 	    return false;
3850 
3851 	  /* Unless ABI prescibes otherwise,
3852 	     MMX/3dNow values are returned in MM0 if available.  */
3853 
3854 	  if (size == 8)
3855 	    return TARGET_VECT8_RETURNS || !TARGET_MMX;
3856 
3857 	  /* SSE values are returned in XMM0 if available.  */
3858 	  if (size == 16)
3859 	    return !TARGET_SSE;
3860 
3861 	  /* AVX values are returned in YMM0 if available.  */
3862 	  if (size == 32)
3863 	    return !TARGET_AVX;
3864 
3865 	  /* AVX512F values are returned in ZMM0 if available.  */
3866 	  if (size == 64)
3867 	    return !TARGET_AVX512F;
3868 	}
3869 
3870       if (mode == XFmode)
3871 	return false;
3872 
3873       if (size > 12)
3874 	return true;
3875 
3876       /* OImode shouldn't be used directly.  */
3877       gcc_assert (mode != OImode);
3878 
3879       return false;
3880     }
3881 #endif
3882 }
3883 
3884 
3885 /* Create the va_list data type.  */
3886 
3887 static tree
ix86_build_builtin_va_list_64(void)3888 ix86_build_builtin_va_list_64 (void)
3889 {
3890   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3891 
3892   record = lang_hooks.types.make_type (RECORD_TYPE);
3893   type_decl = build_decl (BUILTINS_LOCATION,
3894 			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
3895 
3896   f_gpr = build_decl (BUILTINS_LOCATION,
3897 		      FIELD_DECL, get_identifier ("gp_offset"),
3898 		      unsigned_type_node);
3899   f_fpr = build_decl (BUILTINS_LOCATION,
3900 		      FIELD_DECL, get_identifier ("fp_offset"),
3901 		      unsigned_type_node);
3902   f_ovf = build_decl (BUILTINS_LOCATION,
3903 		      FIELD_DECL, get_identifier ("overflow_arg_area"),
3904 		      ptr_type_node);
3905   f_sav = build_decl (BUILTINS_LOCATION,
3906 		      FIELD_DECL, get_identifier ("reg_save_area"),
3907 		      ptr_type_node);
3908 
3909   va_list_gpr_counter_field = f_gpr;
3910   va_list_fpr_counter_field = f_fpr;
3911 
3912   DECL_FIELD_CONTEXT (f_gpr) = record;
3913   DECL_FIELD_CONTEXT (f_fpr) = record;
3914   DECL_FIELD_CONTEXT (f_ovf) = record;
3915   DECL_FIELD_CONTEXT (f_sav) = record;
3916 
3917   TYPE_STUB_DECL (record) = type_decl;
3918   TYPE_NAME (record) = type_decl;
3919   TYPE_FIELDS (record) = f_gpr;
3920   DECL_CHAIN (f_gpr) = f_fpr;
3921   DECL_CHAIN (f_fpr) = f_ovf;
3922   DECL_CHAIN (f_ovf) = f_sav;
3923 
3924   layout_type (record);
3925 
3926   TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
3927 					NULL_TREE, TYPE_ATTRIBUTES (record));
3928 
3929   /* The correct type is an array type of one element.  */
3930   return build_array_type (record, build_index_type (size_zero_node));
3931 }
3932 
3933 /* Setup the builtin va_list data type and for 64-bit the additional
3934    calling convention specific va_list data types.  */
3935 
3936 static tree
ix86_build_builtin_va_list(void)3937 ix86_build_builtin_va_list (void)
3938 {
3939   if (TARGET_64BIT)
3940     {
3941       /* Initialize ABI specific va_list builtin types.
3942 
3943 	 In lto1, we can encounter two va_list types:
3944 	 - one as a result of the type-merge across TUs, and
3945 	 - the one constructed here.
3946 	 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
3947 	 a type identity check in canonical_va_list_type based on
3948 	 TYPE_MAIN_VARIANT (which we used to have) will not work.
3949 	 Instead, we tag each va_list_type_node with its unique attribute, and
3950 	 look for the attribute in the type identity check in
3951 	 canonical_va_list_type.
3952 
3953 	 Tagging sysv_va_list_type_node directly with the attribute is
3954 	 problematic since it's a array of one record, which will degrade into a
3955 	 pointer to record when used as parameter (see build_va_arg comments for
3956 	 an example), dropping the attribute in the process.  So we tag the
3957 	 record instead.  */
3958 
3959       /* For SYSV_ABI we use an array of one record.  */
3960       sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
3961 
3962       /* For MS_ABI we use plain pointer to argument area.  */
3963       tree char_ptr_type = build_pointer_type (char_type_node);
3964       tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
3965 			     TYPE_ATTRIBUTES (char_ptr_type));
3966       ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
3967 
3968       return ((ix86_abi == MS_ABI)
3969 	      ? ms_va_list_type_node
3970 	      : sysv_va_list_type_node);
3971     }
3972   else
3973     {
3974       /* For i386 we use plain pointer to argument area.  */
3975       return build_pointer_type (char_type_node);
3976     }
3977 }
3978 
3979 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
3980 
3981 static void
setup_incoming_varargs_64(CUMULATIVE_ARGS * cum)3982 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
3983 {
3984   rtx save_area, mem;
3985   alias_set_type set;
3986   int i, max;
3987 
3988   /* GPR size of varargs save area.  */
3989   if (cfun->va_list_gpr_size)
3990     ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
3991   else
3992     ix86_varargs_gpr_size = 0;
3993 
3994   /* FPR size of varargs save area.  We don't need it if we don't pass
3995      anything in SSE registers.  */
3996   if (TARGET_SSE && cfun->va_list_fpr_size)
3997     ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
3998   else
3999     ix86_varargs_fpr_size = 0;
4000 
4001   if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4002     return;
4003 
4004   save_area = frame_pointer_rtx;
4005   set = get_varargs_alias_set ();
4006 
4007   max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4008   if (max > X86_64_REGPARM_MAX)
4009     max = X86_64_REGPARM_MAX;
4010 
4011   for (i = cum->regno; i < max; i++)
4012     {
4013       mem = gen_rtx_MEM (word_mode,
4014 			 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4015       MEM_NOTRAP_P (mem) = 1;
4016       set_mem_alias_set (mem, set);
4017       emit_move_insn (mem,
4018 		      gen_rtx_REG (word_mode,
4019 				   x86_64_int_parameter_registers[i]));
4020     }
4021 
4022   if (ix86_varargs_fpr_size)
4023     {
4024       machine_mode smode;
4025       rtx_code_label *label;
4026       rtx test;
4027 
4028       /* Now emit code to save SSE registers.  The AX parameter contains number
4029 	 of SSE parameter registers used to call this function, though all we
4030 	 actually check here is the zero/non-zero status.  */
4031 
4032       label = gen_label_rtx ();
4033       test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4034       emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4035 				      label));
4036 
4037       /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4038 	 we used movdqa (i.e. TImode) instead?  Perhaps even better would
4039 	 be if we could determine the real mode of the data, via a hook
4040 	 into pass_stdarg.  Ignore all that for now.  */
4041       smode = V4SFmode;
4042       if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4043 	crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4044 
4045       max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4046       if (max > X86_64_SSE_REGPARM_MAX)
4047 	max = X86_64_SSE_REGPARM_MAX;
4048 
4049       for (i = cum->sse_regno; i < max; ++i)
4050 	{
4051 	  mem = plus_constant (Pmode, save_area,
4052 			       i * 16 + ix86_varargs_gpr_size);
4053 	  mem = gen_rtx_MEM (smode, mem);
4054 	  MEM_NOTRAP_P (mem) = 1;
4055 	  set_mem_alias_set (mem, set);
4056 	  set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4057 
4058 	  emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4059 	}
4060 
4061       emit_label (label);
4062     }
4063 }
4064 
4065 static void
setup_incoming_varargs_ms_64(CUMULATIVE_ARGS * cum)4066 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4067 {
4068   alias_set_type set = get_varargs_alias_set ();
4069   int i;
4070 
4071   /* Reset to zero, as there might be a sysv vaarg used
4072      before.  */
4073   ix86_varargs_gpr_size = 0;
4074   ix86_varargs_fpr_size = 0;
4075 
4076   for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4077     {
4078       rtx reg, mem;
4079 
4080       mem = gen_rtx_MEM (Pmode,
4081 			 plus_constant (Pmode, virtual_incoming_args_rtx,
4082 					i * UNITS_PER_WORD));
4083       MEM_NOTRAP_P (mem) = 1;
4084       set_mem_alias_set (mem, set);
4085 
4086       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4087       emit_move_insn (mem, reg);
4088     }
4089 }
4090 
4091 static void
ix86_setup_incoming_varargs(cumulative_args_t cum_v,const function_arg_info & arg,int *,int no_rtl)4092 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4093 			     const function_arg_info &arg,
4094 			     int *, int no_rtl)
4095 {
4096   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4097   CUMULATIVE_ARGS next_cum;
4098   tree fntype;
4099 
4100   /* This argument doesn't appear to be used anymore.  Which is good,
4101      because the old code here didn't suppress rtl generation.  */
4102   gcc_assert (!no_rtl);
4103 
4104   if (!TARGET_64BIT)
4105     return;
4106 
4107   fntype = TREE_TYPE (current_function_decl);
4108 
4109   /* For varargs, we do not want to skip the dummy va_dcl argument.
4110      For stdargs, we do want to skip the last named argument.  */
4111   next_cum = *cum;
4112   if (stdarg_p (fntype))
4113     ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4114 
4115   if (cum->call_abi == MS_ABI)
4116     setup_incoming_varargs_ms_64 (&next_cum);
4117   else
4118     setup_incoming_varargs_64 (&next_cum);
4119 }
4120 
4121 /* Checks if TYPE is of kind va_list char *.  */
4122 
4123 static bool
is_va_list_char_pointer(tree type)4124 is_va_list_char_pointer (tree type)
4125 {
4126   tree canonic;
4127 
4128   /* For 32-bit it is always true.  */
4129   if (!TARGET_64BIT)
4130     return true;
4131   canonic = ix86_canonical_va_list_type (type);
4132   return (canonic == ms_va_list_type_node
4133           || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4134 }
4135 
4136 /* Implement va_start.  */
4137 
4138 static void
ix86_va_start(tree valist,rtx nextarg)4139 ix86_va_start (tree valist, rtx nextarg)
4140 {
4141   HOST_WIDE_INT words, n_gpr, n_fpr;
4142   tree f_gpr, f_fpr, f_ovf, f_sav;
4143   tree gpr, fpr, ovf, sav, t;
4144   tree type;
4145   rtx ovf_rtx;
4146 
4147   if (flag_split_stack
4148       && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4149     {
4150       unsigned int scratch_regno;
4151 
4152       /* When we are splitting the stack, we can't refer to the stack
4153 	 arguments using internal_arg_pointer, because they may be on
4154 	 the old stack.  The split stack prologue will arrange to
4155 	 leave a pointer to the old stack arguments in a scratch
4156 	 register, which we here copy to a pseudo-register.  The split
4157 	 stack prologue can't set the pseudo-register directly because
4158 	 it (the prologue) runs before any registers have been saved.  */
4159 
4160       scratch_regno = split_stack_prologue_scratch_regno ();
4161       if (scratch_regno != INVALID_REGNUM)
4162 	{
4163 	  rtx reg;
4164 	  rtx_insn *seq;
4165 
4166 	  reg = gen_reg_rtx (Pmode);
4167 	  cfun->machine->split_stack_varargs_pointer = reg;
4168 
4169 	  start_sequence ();
4170 	  emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4171 	  seq = get_insns ();
4172 	  end_sequence ();
4173 
4174 	  push_topmost_sequence ();
4175 	  emit_insn_after (seq, entry_of_function ());
4176 	  pop_topmost_sequence ();
4177 	}
4178     }
4179 
4180   /* Only 64bit target needs something special.  */
4181   if (is_va_list_char_pointer (TREE_TYPE (valist)))
4182     {
4183       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4184 	std_expand_builtin_va_start (valist, nextarg);
4185       else
4186 	{
4187 	  rtx va_r, next;
4188 
4189 	  va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4190 	  next = expand_binop (ptr_mode, add_optab,
4191 			       cfun->machine->split_stack_varargs_pointer,
4192 			       crtl->args.arg_offset_rtx,
4193 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
4194 	  convert_move (va_r, next, 0);
4195 	}
4196       return;
4197     }
4198 
4199   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4200   f_fpr = DECL_CHAIN (f_gpr);
4201   f_ovf = DECL_CHAIN (f_fpr);
4202   f_sav = DECL_CHAIN (f_ovf);
4203 
4204   valist = build_simple_mem_ref (valist);
4205   TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4206   /* The following should be folded into the MEM_REF offset.  */
4207   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4208 		f_gpr, NULL_TREE);
4209   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4210 		f_fpr, NULL_TREE);
4211   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4212 		f_ovf, NULL_TREE);
4213   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4214 		f_sav, NULL_TREE);
4215 
4216   /* Count number of gp and fp argument registers used.  */
4217   words = crtl->args.info.words;
4218   n_gpr = crtl->args.info.regno;
4219   n_fpr = crtl->args.info.sse_regno;
4220 
4221   if (cfun->va_list_gpr_size)
4222     {
4223       type = TREE_TYPE (gpr);
4224       t = build2 (MODIFY_EXPR, type,
4225 		  gpr, build_int_cst (type, n_gpr * 8));
4226       TREE_SIDE_EFFECTS (t) = 1;
4227       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4228     }
4229 
4230   if (TARGET_SSE && cfun->va_list_fpr_size)
4231     {
4232       type = TREE_TYPE (fpr);
4233       t = build2 (MODIFY_EXPR, type, fpr,
4234 		  build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4235       TREE_SIDE_EFFECTS (t) = 1;
4236       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4237     }
4238 
4239   /* Find the overflow area.  */
4240   type = TREE_TYPE (ovf);
4241   if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4242     ovf_rtx = crtl->args.internal_arg_pointer;
4243   else
4244     ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4245   t = make_tree (type, ovf_rtx);
4246   if (words != 0)
4247     t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4248 
4249   t = build2 (MODIFY_EXPR, type, ovf, t);
4250   TREE_SIDE_EFFECTS (t) = 1;
4251   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4252 
4253   if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4254     {
4255       /* Find the register save area.
4256 	 Prologue of the function save it right above stack frame.  */
4257       type = TREE_TYPE (sav);
4258       t = make_tree (type, frame_pointer_rtx);
4259       if (!ix86_varargs_gpr_size)
4260 	t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4261 
4262       t = build2 (MODIFY_EXPR, type, sav, t);
4263       TREE_SIDE_EFFECTS (t) = 1;
4264       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4265     }
4266 }
4267 
4268 /* Implement va_arg.  */
4269 
4270 static tree
ix86_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)4271 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4272 		      gimple_seq *post_p)
4273 {
4274   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4275   tree f_gpr, f_fpr, f_ovf, f_sav;
4276   tree gpr, fpr, ovf, sav, t;
4277   int size, rsize;
4278   tree lab_false, lab_over = NULL_TREE;
4279   tree addr, t2;
4280   rtx container;
4281   int indirect_p = 0;
4282   tree ptrtype;
4283   machine_mode nat_mode;
4284   unsigned int arg_boundary;
4285   unsigned int type_align;
4286 
4287   /* Only 64bit target needs something special.  */
4288   if (is_va_list_char_pointer (TREE_TYPE (valist)))
4289     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4290 
4291   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4292   f_fpr = DECL_CHAIN (f_gpr);
4293   f_ovf = DECL_CHAIN (f_fpr);
4294   f_sav = DECL_CHAIN (f_ovf);
4295 
4296   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4297 		valist, f_gpr, NULL_TREE);
4298 
4299   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4300   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4301   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4302 
4303   indirect_p = pass_va_arg_by_reference (type);
4304   if (indirect_p)
4305     type = build_pointer_type (type);
4306   size = arg_int_size_in_bytes (type);
4307   rsize = CEIL (size, UNITS_PER_WORD);
4308 
4309   nat_mode = type_natural_mode (type, NULL, false);
4310   switch (nat_mode)
4311     {
4312     case E_V8SFmode:
4313     case E_V8SImode:
4314     case E_V32QImode:
4315     case E_V16HImode:
4316     case E_V4DFmode:
4317     case E_V4DImode:
4318     case E_V16SFmode:
4319     case E_V16SImode:
4320     case E_V64QImode:
4321     case E_V32HImode:
4322     case E_V8DFmode:
4323     case E_V8DImode:
4324       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
4325       if (!TARGET_64BIT_MS_ABI)
4326 	{
4327 	  container = NULL;
4328 	  break;
4329 	}
4330       /* FALLTHRU */
4331 
4332     default:
4333       container = construct_container (nat_mode, TYPE_MODE (type),
4334 				       type, 0, X86_64_REGPARM_MAX,
4335 				       X86_64_SSE_REGPARM_MAX, intreg,
4336 				       0);
4337       break;
4338     }
4339 
4340   /* Pull the value out of the saved registers.  */
4341 
4342   addr = create_tmp_var (ptr_type_node, "addr");
4343   type_align = TYPE_ALIGN (type);
4344 
4345   if (container)
4346     {
4347       int needed_intregs, needed_sseregs;
4348       bool need_temp;
4349       tree int_addr, sse_addr;
4350 
4351       lab_false = create_artificial_label (UNKNOWN_LOCATION);
4352       lab_over = create_artificial_label (UNKNOWN_LOCATION);
4353 
4354       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4355 
4356       need_temp = (!REG_P (container)
4357 		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
4358 		       || TYPE_ALIGN (type) > 128));
4359 
4360       /* In case we are passing structure, verify that it is consecutive block
4361          on the register save area.  If not we need to do moves.  */
4362       if (!need_temp && !REG_P (container))
4363 	{
4364 	  /* Verify that all registers are strictly consecutive  */
4365 	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4366 	    {
4367 	      int i;
4368 
4369 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4370 		{
4371 		  rtx slot = XVECEXP (container, 0, i);
4372 		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4373 		      || INTVAL (XEXP (slot, 1)) != i * 16)
4374 		    need_temp = true;
4375 		}
4376 	    }
4377 	  else
4378 	    {
4379 	      int i;
4380 
4381 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4382 		{
4383 		  rtx slot = XVECEXP (container, 0, i);
4384 		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4385 		      || INTVAL (XEXP (slot, 1)) != i * 8)
4386 		    need_temp = true;
4387 		}
4388 	    }
4389 	}
4390       if (!need_temp)
4391 	{
4392 	  int_addr = addr;
4393 	  sse_addr = addr;
4394 	}
4395       else
4396 	{
4397 	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
4398 	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4399 	}
4400 
4401       /* First ensure that we fit completely in registers.  */
4402       if (needed_intregs)
4403 	{
4404 	  t = build_int_cst (TREE_TYPE (gpr),
4405 			     (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4406 	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4407 	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4408 	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4409 	  gimplify_and_add (t, pre_p);
4410 	}
4411       if (needed_sseregs)
4412 	{
4413 	  t = build_int_cst (TREE_TYPE (fpr),
4414 			     (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4415 			     + X86_64_REGPARM_MAX * 8);
4416 	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4417 	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4418 	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4419 	  gimplify_and_add (t, pre_p);
4420 	}
4421 
4422       /* Compute index to start of area used for integer regs.  */
4423       if (needed_intregs)
4424 	{
4425 	  /* int_addr = gpr + sav; */
4426 	  t = fold_build_pointer_plus (sav, gpr);
4427 	  gimplify_assign (int_addr, t, pre_p);
4428 	}
4429       if (needed_sseregs)
4430 	{
4431 	  /* sse_addr = fpr + sav; */
4432 	  t = fold_build_pointer_plus (sav, fpr);
4433 	  gimplify_assign (sse_addr, t, pre_p);
4434 	}
4435       if (need_temp)
4436 	{
4437 	  int i, prev_size = 0;
4438 	  tree temp = create_tmp_var (type, "va_arg_tmp");
4439 
4440 	  /* addr = &temp; */
4441 	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4442 	  gimplify_assign (addr, t, pre_p);
4443 
4444 	  for (i = 0; i < XVECLEN (container, 0); i++)
4445 	    {
4446 	      rtx slot = XVECEXP (container, 0, i);
4447 	      rtx reg = XEXP (slot, 0);
4448 	      machine_mode mode = GET_MODE (reg);
4449 	      tree piece_type;
4450 	      tree addr_type;
4451 	      tree daddr_type;
4452 	      tree src_addr, src;
4453 	      int src_offset;
4454 	      tree dest_addr, dest;
4455 	      int cur_size = GET_MODE_SIZE (mode);
4456 
4457 	      gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4458 	      prev_size = INTVAL (XEXP (slot, 1));
4459 	      if (prev_size + cur_size > size)
4460 		{
4461 		  cur_size = size - prev_size;
4462 		  unsigned int nbits = cur_size * BITS_PER_UNIT;
4463 		  if (!int_mode_for_size (nbits, 1).exists (&mode))
4464 		    mode = QImode;
4465 		}
4466 	      piece_type = lang_hooks.types.type_for_mode (mode, 1);
4467 	      if (mode == GET_MODE (reg))
4468 		addr_type = build_pointer_type (piece_type);
4469 	      else
4470 		addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4471 							 true);
4472 	      daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4473 							true);
4474 
4475 	      if (SSE_REGNO_P (REGNO (reg)))
4476 		{
4477 		  src_addr = sse_addr;
4478 		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4479 		}
4480 	      else
4481 		{
4482 		  src_addr = int_addr;
4483 		  src_offset = REGNO (reg) * 8;
4484 		}
4485 	      src_addr = fold_convert (addr_type, src_addr);
4486 	      src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4487 
4488 	      dest_addr = fold_convert (daddr_type, addr);
4489 	      dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4490 	      if (cur_size == GET_MODE_SIZE (mode))
4491 		{
4492 		  src = build_va_arg_indirect_ref (src_addr);
4493 		  dest = build_va_arg_indirect_ref (dest_addr);
4494 
4495 		  gimplify_assign (dest, src, pre_p);
4496 		}
4497 	      else
4498 		{
4499 		  tree copy
4500 		    = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
4501 				       3, dest_addr, src_addr,
4502 				       size_int (cur_size));
4503 		  gimplify_and_add (copy, pre_p);
4504 		}
4505 	      prev_size += cur_size;
4506 	    }
4507 	}
4508 
4509       if (needed_intregs)
4510 	{
4511 	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4512 		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4513 	  gimplify_assign (gpr, t, pre_p);
4514 	  /* The GPR save area guarantees only 8-byte alignment.  */
4515 	  if (!need_temp)
4516 	    type_align = MIN (type_align, 64);
4517 	}
4518 
4519       if (needed_sseregs)
4520 	{
4521 	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4522 		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4523 	  gimplify_assign (unshare_expr (fpr), t, pre_p);
4524 	}
4525 
4526       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
4527 
4528       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
4529     }
4530 
4531   /* ... otherwise out of the overflow area.  */
4532 
4533   /* When we align parameter on stack for caller, if the parameter
4534      alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4535      aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
4536      here with caller.  */
4537   arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
4538   if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
4539     arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
4540 
4541   /* Care for on-stack alignment if needed.  */
4542   if (arg_boundary <= 64 || size == 0)
4543     t = ovf;
4544  else
4545     {
4546       HOST_WIDE_INT align = arg_boundary / 8;
4547       t = fold_build_pointer_plus_hwi (ovf, align - 1);
4548       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4549 		  build_int_cst (TREE_TYPE (t), -align));
4550     }
4551 
4552   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4553   gimplify_assign (addr, t, pre_p);
4554 
4555   t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
4556   gimplify_assign (unshare_expr (ovf), t, pre_p);
4557 
4558   if (container)
4559     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
4560 
4561   type = build_aligned_type (type, type_align);
4562   ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
4563   addr = fold_convert (ptrtype, addr);
4564 
4565   if (indirect_p)
4566     addr = build_va_arg_indirect_ref (addr);
4567   return build_va_arg_indirect_ref (addr);
4568 }
4569 
4570 /* Return true if OPNUM's MEM should be matched
4571    in movabs* patterns.  */
4572 
4573 bool
ix86_check_movabs(rtx insn,int opnum)4574 ix86_check_movabs (rtx insn, int opnum)
4575 {
4576   rtx set, mem;
4577 
4578   set = PATTERN (insn);
4579   if (GET_CODE (set) == PARALLEL)
4580     set = XVECEXP (set, 0, 0);
4581   gcc_assert (GET_CODE (set) == SET);
4582   mem = XEXP (set, opnum);
4583   while (SUBREG_P (mem))
4584     mem = SUBREG_REG (mem);
4585   gcc_assert (MEM_P (mem));
4586   return volatile_ok || !MEM_VOLATILE_P (mem);
4587 }
4588 
4589 /* Return false if INSN contains a MEM with a non-default address space.  */
4590 bool
ix86_check_no_addr_space(rtx insn)4591 ix86_check_no_addr_space (rtx insn)
4592 {
4593   subrtx_var_iterator::array_type array;
4594   FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
4595     {
4596       rtx x = *iter;
4597       if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
4598 	return false;
4599     }
4600   return true;
4601 }
4602 
4603 /* Initialize the table of extra 80387 mathematical constants.  */
4604 
4605 static void
init_ext_80387_constants(void)4606 init_ext_80387_constants (void)
4607 {
4608   static const char * cst[5] =
4609   {
4610     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4611     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4612     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4613     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4614     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4615   };
4616   int i;
4617 
4618   for (i = 0; i < 5; i++)
4619     {
4620       real_from_string (&ext_80387_constants_table[i], cst[i]);
4621       /* Ensure each constant is rounded to XFmode precision.  */
4622       real_convert (&ext_80387_constants_table[i],
4623 		    XFmode, &ext_80387_constants_table[i]);
4624     }
4625 
4626   ext_80387_constants_init = 1;
4627 }
4628 
4629 /* Return non-zero if the constant is something that
4630    can be loaded with a special instruction.  */
4631 
4632 int
standard_80387_constant_p(rtx x)4633 standard_80387_constant_p (rtx x)
4634 {
4635   machine_mode mode = GET_MODE (x);
4636 
4637   const REAL_VALUE_TYPE *r;
4638 
4639   if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
4640     return -1;
4641 
4642   if (x == CONST0_RTX (mode))
4643     return 1;
4644   if (x == CONST1_RTX (mode))
4645     return 2;
4646 
4647   r = CONST_DOUBLE_REAL_VALUE (x);
4648 
4649   /* For XFmode constants, try to find a special 80387 instruction when
4650      optimizing for size or on those CPUs that benefit from them.  */
4651   if (mode == XFmode
4652       && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
4653       && !flag_rounding_math)
4654     {
4655       int i;
4656 
4657       if (! ext_80387_constants_init)
4658 	init_ext_80387_constants ();
4659 
4660       for (i = 0; i < 5; i++)
4661         if (real_identical (r, &ext_80387_constants_table[i]))
4662 	  return i + 3;
4663     }
4664 
4665   /* Load of the constant -0.0 or -1.0 will be split as
4666      fldz;fchs or fld1;fchs sequence.  */
4667   if (real_isnegzero (r))
4668     return 8;
4669   if (real_identical (r, &dconstm1))
4670     return 9;
4671 
4672   return 0;
4673 }
4674 
4675 /* Return the opcode of the special instruction to be used to load
4676    the constant X.  */
4677 
4678 const char *
standard_80387_constant_opcode(rtx x)4679 standard_80387_constant_opcode (rtx x)
4680 {
4681   switch (standard_80387_constant_p (x))
4682     {
4683     case 1:
4684       return "fldz";
4685     case 2:
4686       return "fld1";
4687     case 3:
4688       return "fldlg2";
4689     case 4:
4690       return "fldln2";
4691     case 5:
4692       return "fldl2e";
4693     case 6:
4694       return "fldl2t";
4695     case 7:
4696       return "fldpi";
4697     case 8:
4698     case 9:
4699       return "#";
4700     default:
4701       gcc_unreachable ();
4702     }
4703 }
4704 
4705 /* Return the CONST_DOUBLE representing the 80387 constant that is
4706    loaded by the specified special instruction.  The argument IDX
4707    matches the return value from standard_80387_constant_p.  */
4708 
4709 rtx
standard_80387_constant_rtx(int idx)4710 standard_80387_constant_rtx (int idx)
4711 {
4712   int i;
4713 
4714   if (! ext_80387_constants_init)
4715     init_ext_80387_constants ();
4716 
4717   switch (idx)
4718     {
4719     case 3:
4720     case 4:
4721     case 5:
4722     case 6:
4723     case 7:
4724       i = idx - 3;
4725       break;
4726 
4727     default:
4728       gcc_unreachable ();
4729     }
4730 
4731   return const_double_from_real_value (ext_80387_constants_table[i],
4732 				       XFmode);
4733 }
4734 
4735 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
4736    in supported SSE/AVX vector mode.  */
4737 
4738 int
standard_sse_constant_p(rtx x,machine_mode pred_mode)4739 standard_sse_constant_p (rtx x, machine_mode pred_mode)
4740 {
4741   machine_mode mode;
4742 
4743   if (!TARGET_SSE)
4744     return 0;
4745 
4746   mode = GET_MODE (x);
4747 
4748   if (x == const0_rtx || const0_operand (x, mode))
4749     return 1;
4750 
4751   if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4752     {
4753       /* VOIDmode integer constant, get mode from the predicate.  */
4754       if (mode == VOIDmode)
4755 	mode = pred_mode;
4756 
4757       switch (GET_MODE_SIZE (mode))
4758 	{
4759 	case 64:
4760 	  if (TARGET_AVX512F)
4761 	    return 2;
4762 	  break;
4763 	case 32:
4764 	  if (TARGET_AVX2)
4765 	    return 2;
4766 	  break;
4767 	case 16:
4768 	  if (TARGET_SSE2)
4769 	    return 2;
4770 	  break;
4771 	case 0:
4772 	  /* VOIDmode */
4773 	  gcc_unreachable ();
4774 	default:
4775 	  break;
4776 	}
4777     }
4778 
4779   return 0;
4780 }
4781 
4782 /* Return the opcode of the special instruction to be used to load
4783    the constant operands[1] into operands[0].  */
4784 
4785 const char *
standard_sse_constant_opcode(rtx_insn * insn,rtx * operands)4786 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
4787 {
4788   machine_mode mode;
4789   rtx x = operands[1];
4790 
4791   gcc_assert (TARGET_SSE);
4792 
4793   mode = GET_MODE (x);
4794 
4795   if (x == const0_rtx || const0_operand (x, mode))
4796     {
4797       switch (get_attr_mode (insn))
4798 	{
4799 	case MODE_TI:
4800 	  if (!EXT_REX_SSE_REG_P (operands[0]))
4801 	    return "%vpxor\t%0, %d0";
4802 	  /* FALLTHRU */
4803 	case MODE_XI:
4804 	case MODE_OI:
4805 	  if (EXT_REX_SSE_REG_P (operands[0]))
4806 	    return (TARGET_AVX512VL
4807 		    ? "vpxord\t%x0, %x0, %x0"
4808 		    : "vpxord\t%g0, %g0, %g0");
4809 	  return "vpxor\t%x0, %x0, %x0";
4810 
4811 	case MODE_V2DF:
4812 	  if (!EXT_REX_SSE_REG_P (operands[0]))
4813 	    return "%vxorpd\t%0, %d0";
4814 	  /* FALLTHRU */
4815 	case MODE_V8DF:
4816 	case MODE_V4DF:
4817 	  if (!EXT_REX_SSE_REG_P (operands[0]))
4818 	    return "vxorpd\t%x0, %x0, %x0";
4819 	  else if (TARGET_AVX512DQ)
4820 	    return (TARGET_AVX512VL
4821 		    ? "vxorpd\t%x0, %x0, %x0"
4822 		    : "vxorpd\t%g0, %g0, %g0");
4823 	  else
4824 	    return (TARGET_AVX512VL
4825 		    ? "vpxorq\t%x0, %x0, %x0"
4826 		    : "vpxorq\t%g0, %g0, %g0");
4827 
4828 	case MODE_V4SF:
4829 	  if (!EXT_REX_SSE_REG_P (operands[0]))
4830 	    return "%vxorps\t%0, %d0";
4831 	  /* FALLTHRU */
4832 	case MODE_V16SF:
4833 	case MODE_V8SF:
4834 	  if (!EXT_REX_SSE_REG_P (operands[0]))
4835 	    return "vxorps\t%x0, %x0, %x0";
4836 	  else if (TARGET_AVX512DQ)
4837 	    return (TARGET_AVX512VL
4838 		    ? "vxorps\t%x0, %x0, %x0"
4839 		    : "vxorps\t%g0, %g0, %g0");
4840 	  else
4841 	    return (TARGET_AVX512VL
4842 		    ? "vpxord\t%x0, %x0, %x0"
4843 		    : "vpxord\t%g0, %g0, %g0");
4844 
4845 	default:
4846 	  gcc_unreachable ();
4847 	}
4848     }
4849   else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4850     {
4851       enum attr_mode insn_mode = get_attr_mode (insn);
4852 
4853       switch (insn_mode)
4854 	{
4855 	case MODE_XI:
4856 	case MODE_V8DF:
4857 	case MODE_V16SF:
4858 	  gcc_assert (TARGET_AVX512F);
4859 	  return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4860 
4861 	case MODE_OI:
4862 	case MODE_V4DF:
4863 	case MODE_V8SF:
4864 	  gcc_assert (TARGET_AVX2);
4865 	  /* FALLTHRU */
4866 	case MODE_TI:
4867 	case MODE_V2DF:
4868 	case MODE_V4SF:
4869 	  gcc_assert (TARGET_SSE2);
4870 	  if (!EXT_REX_SSE_REG_P (operands[0]))
4871 	    return (TARGET_AVX
4872 		    ? "vpcmpeqd\t%0, %0, %0"
4873 		    : "pcmpeqd\t%0, %0");
4874 	  else if (TARGET_AVX512VL)
4875 	    return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
4876 	  else
4877 	    return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4878 
4879 	default:
4880 	  gcc_unreachable ();
4881 	}
4882    }
4883 
4884   gcc_unreachable ();
4885 }
4886 
4887 /* Returns true if INSN can be transformed from a memory load
4888    to a supported FP constant load.  */
4889 
4890 bool
ix86_standard_x87sse_constant_load_p(const rtx_insn * insn,rtx dst)4891 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
4892 {
4893   rtx src = find_constant_src (insn);
4894 
4895   gcc_assert (REG_P (dst));
4896 
4897   if (src == NULL
4898       || (SSE_REGNO_P (REGNO (dst))
4899 	  && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
4900       || (STACK_REGNO_P (REGNO (dst))
4901 	   && standard_80387_constant_p (src) < 1))
4902     return false;
4903 
4904   return true;
4905 }
4906 
4907 /* Predicate for pre-reload splitters with associated instructions,
4908    which can match any time before the split1 pass (usually combine),
4909    then are unconditionally split in that pass and should not be
4910    matched again afterwards.  */
4911 
4912 bool
ix86_pre_reload_split(void)4913 ix86_pre_reload_split (void)
4914 {
4915   return (can_create_pseudo_p ()
4916 	  && !(cfun->curr_properties & PROP_rtl_split_insns));
4917 }
4918 
4919 /* Return the opcode of the TYPE_SSEMOV instruction.  To move from
4920    or to xmm16-xmm31/ymm16-ymm31 registers, we either require
4921    TARGET_AVX512VL or it is a register to register move which can
4922    be done with zmm register move. */
4923 
4924 static const char *
ix86_get_ssemov(rtx * operands,unsigned size,enum attr_mode insn_mode,machine_mode mode)4925 ix86_get_ssemov (rtx *operands, unsigned size,
4926 		 enum attr_mode insn_mode, machine_mode mode)
4927 {
4928   char buf[128];
4929   bool misaligned_p = (misaligned_operand (operands[0], mode)
4930 		       || misaligned_operand (operands[1], mode));
4931   bool evex_reg_p = (size == 64
4932 		     || EXT_REX_SSE_REG_P (operands[0])
4933 		     || EXT_REX_SSE_REG_P (operands[1]));
4934   machine_mode scalar_mode;
4935 
4936   const char *opcode = NULL;
4937   enum
4938     {
4939       opcode_int,
4940       opcode_float,
4941       opcode_double
4942     } type = opcode_int;
4943 
4944   switch (insn_mode)
4945     {
4946     case MODE_V16SF:
4947     case MODE_V8SF:
4948     case MODE_V4SF:
4949       scalar_mode = E_SFmode;
4950       type = opcode_float;
4951       break;
4952     case MODE_V8DF:
4953     case MODE_V4DF:
4954     case MODE_V2DF:
4955       scalar_mode = E_DFmode;
4956       type = opcode_double;
4957       break;
4958     case MODE_XI:
4959     case MODE_OI:
4960     case MODE_TI:
4961       scalar_mode = GET_MODE_INNER (mode);
4962       break;
4963     default:
4964       gcc_unreachable ();
4965     }
4966 
4967   /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
4968      we can only use zmm register move without memory operand.  */
4969   if (evex_reg_p
4970       && !TARGET_AVX512VL
4971       && GET_MODE_SIZE (mode) < 64)
4972     {
4973       /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
4974 	 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
4975 	 AVX512VL is disabled, LRA can still generate reg to
4976 	 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
4977 	 modes.  */
4978       if (memory_operand (operands[0], mode)
4979 	  || memory_operand (operands[1], mode))
4980 	gcc_unreachable ();
4981       size = 64;
4982       switch (type)
4983 	{
4984 	case opcode_int:
4985 	  opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
4986 	  break;
4987 	case opcode_float:
4988 	  opcode = misaligned_p ? "vmovups" : "vmovaps";
4989 	  break;
4990 	case opcode_double:
4991 	  opcode = misaligned_p ? "vmovupd" : "vmovapd";
4992 	  break;
4993 	}
4994     }
4995   else if (SCALAR_FLOAT_MODE_P (scalar_mode))
4996     {
4997       switch (scalar_mode)
4998 	{
4999 	case E_SFmode:
5000 	  opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5001 	  break;
5002 	case E_DFmode:
5003 	  opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5004 	  break;
5005 	case E_TFmode:
5006 	  if (evex_reg_p)
5007 	    opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5008 	  else
5009 	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5010 	  break;
5011 	default:
5012 	  gcc_unreachable ();
5013 	}
5014     }
5015   else if (SCALAR_INT_MODE_P (scalar_mode))
5016     {
5017       switch (scalar_mode)
5018 	{
5019 	case E_QImode:
5020 	  if (evex_reg_p)
5021 	    opcode = (misaligned_p
5022 		      ? (TARGET_AVX512BW
5023 			 ? "vmovdqu8"
5024 			 : "vmovdqu64")
5025 		      : "vmovdqa64");
5026 	  else
5027 	    opcode = (misaligned_p
5028 		      ? (TARGET_AVX512BW
5029 			 ? "vmovdqu8"
5030 			 : "%vmovdqu")
5031 		      : "%vmovdqa");
5032 	  break;
5033 	case E_HImode:
5034 	  if (evex_reg_p)
5035 	    opcode = (misaligned_p
5036 		      ? (TARGET_AVX512BW
5037 			 ? "vmovdqu16"
5038 			 : "vmovdqu64")
5039 		      : "vmovdqa64");
5040 	  else
5041 	    opcode = (misaligned_p
5042 		      ? (TARGET_AVX512BW
5043 			 ? "vmovdqu16"
5044 			 : "%vmovdqu")
5045 		      : "%vmovdqa");
5046 	  break;
5047 	case E_SImode:
5048 	  if (evex_reg_p)
5049 	    opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5050 	  else
5051 	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5052 	  break;
5053 	case E_DImode:
5054 	case E_TImode:
5055 	case E_OImode:
5056 	  if (evex_reg_p)
5057 	    opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5058 	  else
5059 	    opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5060 	  break;
5061 	case E_XImode:
5062 	  opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5063 	  break;
5064 	default:
5065 	  gcc_unreachable ();
5066 	}
5067     }
5068   else
5069     gcc_unreachable ();
5070 
5071   switch (size)
5072     {
5073     case 64:
5074       snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5075 		opcode);
5076       break;
5077     case 32:
5078       snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5079 		opcode);
5080       break;
5081     case 16:
5082       snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5083 		opcode);
5084       break;
5085     default:
5086       gcc_unreachable ();
5087     }
5088   output_asm_insn (buf, operands);
5089   return "";
5090 }
5091 
5092 /* Return the template of the TYPE_SSEMOV instruction to move
5093    operands[1] into operands[0].  */
5094 
5095 const char *
ix86_output_ssemov(rtx_insn * insn,rtx * operands)5096 ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5097 {
5098   machine_mode mode = GET_MODE (operands[0]);
5099   if (get_attr_type (insn) != TYPE_SSEMOV
5100       || mode != GET_MODE (operands[1]))
5101     gcc_unreachable ();
5102 
5103   enum attr_mode insn_mode = get_attr_mode (insn);
5104 
5105   switch (insn_mode)
5106     {
5107     case MODE_XI:
5108     case MODE_V8DF:
5109     case MODE_V16SF:
5110       return ix86_get_ssemov (operands, 64, insn_mode, mode);
5111 
5112     case MODE_OI:
5113     case MODE_V4DF:
5114     case MODE_V8SF:
5115       return ix86_get_ssemov (operands, 32, insn_mode, mode);
5116 
5117     case MODE_TI:
5118     case MODE_V2DF:
5119     case MODE_V4SF:
5120       return ix86_get_ssemov (operands, 16, insn_mode, mode);
5121 
5122     case MODE_DI:
5123       /* Handle broken assemblers that require movd instead of movq. */
5124       if (!HAVE_AS_IX86_INTERUNIT_MOVQ
5125 	  && (GENERAL_REG_P (operands[0])
5126 	      || GENERAL_REG_P (operands[1])))
5127 	return "%vmovd\t{%1, %0|%0, %1}";
5128       else
5129 	return "%vmovq\t{%1, %0|%0, %1}";
5130 
5131     case MODE_SI:
5132       return "%vmovd\t{%1, %0|%0, %1}";
5133 
5134     case MODE_DF:
5135       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5136 	return "vmovsd\t{%d1, %0|%0, %d1}";
5137       else
5138 	return "%vmovsd\t{%1, %0|%0, %1}";
5139 
5140     case MODE_SF:
5141       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5142 	return "vmovss\t{%d1, %0|%0, %d1}";
5143       else
5144 	return "%vmovss\t{%1, %0|%0, %1}";
5145 
5146     case MODE_V1DF:
5147       gcc_assert (!TARGET_AVX);
5148       return "movlpd\t{%1, %0|%0, %1}";
5149 
5150     case MODE_V2SF:
5151       if (TARGET_AVX && REG_P (operands[0]))
5152 	return "vmovlps\t{%1, %d0|%d0, %1}";
5153       else
5154 	return "%vmovlps\t{%1, %0|%0, %1}";
5155 
5156     default:
5157       gcc_unreachable ();
5158     }
5159 }
5160 
5161 /* Returns true if OP contains a symbol reference */
5162 
5163 bool
symbolic_reference_mentioned_p(rtx op)5164 symbolic_reference_mentioned_p (rtx op)
5165 {
5166   const char *fmt;
5167   int i;
5168 
5169   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5170     return true;
5171 
5172   fmt = GET_RTX_FORMAT (GET_CODE (op));
5173   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5174     {
5175       if (fmt[i] == 'E')
5176 	{
5177 	  int j;
5178 
5179 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5180 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5181 	      return true;
5182 	}
5183 
5184       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5185 	return true;
5186     }
5187 
5188   return false;
5189 }
5190 
5191 /* Return true if it is appropriate to emit `ret' instructions in the
5192    body of a function.  Do this only if the epilogue is simple, needing a
5193    couple of insns.  Prior to reloading, we can't tell how many registers
5194    must be saved, so return false then.  Return false if there is no frame
5195    marker to de-allocate.  */
5196 
5197 bool
ix86_can_use_return_insn_p(void)5198 ix86_can_use_return_insn_p (void)
5199 {
5200   if (ix86_function_naked (current_function_decl))
5201     return false;
5202 
5203   /* Don't use `ret' instruction in interrupt handler.  */
5204   if (! reload_completed
5205       || frame_pointer_needed
5206       || cfun->machine->func_type != TYPE_NORMAL)
5207     return 0;
5208 
5209   /* Don't allow more than 32k pop, since that's all we can do
5210      with one instruction.  */
5211   if (crtl->args.pops_args && crtl->args.size >= 32768)
5212     return 0;
5213 
5214   struct ix86_frame &frame = cfun->machine->frame;
5215   return (frame.stack_pointer_offset == UNITS_PER_WORD
5216 	  && (frame.nregs + frame.nsseregs) == 0);
5217 }
5218 
5219 /* Return stack frame size.  get_frame_size () returns used stack slots
5220    during compilation, which may be optimized out later.  If stack frame
5221    is needed, stack_frame_required should be true.  */
5222 
5223 static HOST_WIDE_INT
ix86_get_frame_size(void)5224 ix86_get_frame_size (void)
5225 {
5226   if (cfun->machine->stack_frame_required)
5227     return get_frame_size ();
5228   else
5229     return 0;
5230 }
5231 
5232 /* Value should be nonzero if functions must have frame pointers.
5233    Zero means the frame pointer need not be set up (and parms may
5234    be accessed via the stack pointer) in functions that seem suitable.  */
5235 
5236 static bool
ix86_frame_pointer_required(void)5237 ix86_frame_pointer_required (void)
5238 {
5239   /* If we accessed previous frames, then the generated code expects
5240      to be able to access the saved ebp value in our frame.  */
5241   if (cfun->machine->accesses_prev_frame)
5242     return true;
5243 
5244   /* Several x86 os'es need a frame pointer for other reasons,
5245      usually pertaining to setjmp.  */
5246   if (SUBTARGET_FRAME_POINTER_REQUIRED)
5247     return true;
5248 
5249   /* For older 32-bit runtimes setjmp requires valid frame-pointer.  */
5250   if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5251     return true;
5252 
5253   /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5254      allocation is 4GB.  */
5255   if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5256     return true;
5257 
5258   /* SSE saves require frame-pointer when stack is misaligned.  */
5259   if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5260     return true;
5261 
5262   /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5263      turns off the frame pointer by default.  Turn it back on now if
5264      we've not got a leaf function.  */
5265   if (TARGET_OMIT_LEAF_FRAME_POINTER
5266       && (!crtl->is_leaf
5267 	  || ix86_current_function_calls_tls_descriptor))
5268     return true;
5269 
5270   if (crtl->profile && !flag_fentry)
5271     return true;
5272 
5273   return false;
5274 }
5275 
5276 /* Record that the current function accesses previous call frames.  */
5277 
5278 void
ix86_setup_frame_addresses(void)5279 ix86_setup_frame_addresses (void)
5280 {
5281   cfun->machine->accesses_prev_frame = 1;
5282 }
5283 
5284 #ifndef USE_HIDDEN_LINKONCE
5285 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5286 #  define USE_HIDDEN_LINKONCE 1
5287 # else
5288 #  define USE_HIDDEN_LINKONCE 0
5289 # endif
5290 #endif
5291 
5292 /* Label count for call and return thunks.  It is used to make unique
5293    labels in call and return thunks.  */
5294 static int indirectlabelno;
5295 
5296 /* True if call thunk function is needed.  */
5297 static bool indirect_thunk_needed = false;
5298 
5299 /* Bit masks of integer registers, which contain branch target, used
5300    by call thunk functions.  */
5301 static int indirect_thunks_used;
5302 
5303 /* True if return thunk function is needed.  */
5304 static bool indirect_return_needed = false;
5305 
5306 /* True if return thunk function via CX is needed.  */
5307 static bool indirect_return_via_cx;
5308 
5309 #ifndef INDIRECT_LABEL
5310 # define INDIRECT_LABEL "LIND"
5311 #endif
5312 
5313 /* Indicate what prefix is needed for an indirect branch.  */
5314 enum indirect_thunk_prefix
5315 {
5316   indirect_thunk_prefix_none,
5317   indirect_thunk_prefix_nt
5318 };
5319 
5320 /* Return the prefix needed for an indirect branch INSN.  */
5321 
5322 enum indirect_thunk_prefix
indirect_thunk_need_prefix(rtx_insn * insn)5323 indirect_thunk_need_prefix (rtx_insn *insn)
5324 {
5325   enum indirect_thunk_prefix need_prefix;
5326   if ((cfun->machine->indirect_branch_type
5327 	    == indirect_branch_thunk_extern)
5328 	   && ix86_notrack_prefixed_insn_p (insn))
5329     {
5330       /* NOTRACK prefix is only used with external thunk so that it
5331 	 can be properly updated to support CET at run-time.  */
5332       need_prefix = indirect_thunk_prefix_nt;
5333     }
5334   else
5335     need_prefix = indirect_thunk_prefix_none;
5336   return need_prefix;
5337 }
5338 
5339 /* Fills in the label name that should be used for the indirect thunk.  */
5340 
5341 static void
indirect_thunk_name(char name[32],unsigned int regno,enum indirect_thunk_prefix need_prefix,bool ret_p)5342 indirect_thunk_name (char name[32], unsigned int regno,
5343 		     enum indirect_thunk_prefix need_prefix,
5344 		     bool ret_p)
5345 {
5346   if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5347     gcc_unreachable ();
5348 
5349   if (USE_HIDDEN_LINKONCE)
5350     {
5351       const char *prefix;
5352 
5353       if (need_prefix == indirect_thunk_prefix_nt
5354 	  && regno != INVALID_REGNUM)
5355 	{
5356 	  /* NOTRACK prefix is only used with external thunk via
5357 	     register so that NOTRACK prefix can be added to indirect
5358 	     branch via register to support CET at run-time.  */
5359 	  prefix = "_nt";
5360 	}
5361       else
5362 	prefix = "";
5363 
5364       const char *ret = ret_p ? "return" : "indirect";
5365 
5366       if (regno != INVALID_REGNUM)
5367 	{
5368 	  const char *reg_prefix;
5369 	  if (LEGACY_INT_REGNO_P (regno))
5370 	    reg_prefix = TARGET_64BIT ? "r" : "e";
5371 	  else
5372 	    reg_prefix = "";
5373 	  sprintf (name, "__x86_%s_thunk%s_%s%s",
5374 		   ret, prefix, reg_prefix, reg_names[regno]);
5375 	}
5376       else
5377 	sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5378     }
5379   else
5380     {
5381       if (regno != INVALID_REGNUM)
5382 	ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5383       else
5384 	{
5385 	  if (ret_p)
5386 	    ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5387 	  else
5388 	    ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5389 	}
5390     }
5391 }
5392 
5393 /* Output a call and return thunk for indirect branch.  If REGNO != -1,
5394    the function address is in REGNO and the call and return thunk looks like:
5395 
5396 	call	L2
5397    L1:
5398 	pause
5399 	lfence
5400 	jmp	L1
5401    L2:
5402 	mov	%REG, (%sp)
5403 	ret
5404 
5405    Otherwise, the function address is on the top of stack and the
5406    call and return thunk looks like:
5407 
5408 	call L2
5409   L1:
5410 	pause
5411 	lfence
5412 	jmp L1
5413   L2:
5414 	lea WORD_SIZE(%sp), %sp
5415 	ret
5416  */
5417 
5418 static void
output_indirect_thunk(unsigned int regno)5419 output_indirect_thunk (unsigned int regno)
5420 {
5421   char indirectlabel1[32];
5422   char indirectlabel2[32];
5423 
5424   ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
5425 			       indirectlabelno++);
5426   ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
5427 			       indirectlabelno++);
5428 
5429   /* Call */
5430   fputs ("\tcall\t", asm_out_file);
5431   assemble_name_raw (asm_out_file, indirectlabel2);
5432   fputc ('\n', asm_out_file);
5433 
5434   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
5435 
5436   /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5437      Usage of both pause + lfence is compromise solution.  */
5438   fprintf (asm_out_file, "\tpause\n\tlfence\n");
5439 
5440   /* Jump.  */
5441   fputs ("\tjmp\t", asm_out_file);
5442   assemble_name_raw (asm_out_file, indirectlabel1);
5443   fputc ('\n', asm_out_file);
5444 
5445   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
5446 
5447   /* The above call insn pushed a word to stack.  Adjust CFI info.  */
5448   if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
5449     {
5450       if (! dwarf2out_do_cfi_asm ())
5451 	{
5452 	  dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5453 	  xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
5454 	  xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
5455 	  vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5456 	}
5457       dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5458       xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
5459       xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
5460       vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5461       dwarf2out_emit_cfi (xcfi);
5462     }
5463 
5464   if (regno != INVALID_REGNUM)
5465     {
5466       /* MOV.  */
5467       rtx xops[2];
5468       xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
5469       xops[1] = gen_rtx_REG (word_mode, regno);
5470       output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
5471     }
5472   else
5473     {
5474       /* LEA.  */
5475       rtx xops[2];
5476       xops[0] = stack_pointer_rtx;
5477       xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
5478       output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
5479     }
5480 
5481   fputs ("\tret\n", asm_out_file);
5482 }
5483 
5484 /* Output a funtion with a call and return thunk for indirect branch.
5485    If REGNO != INVALID_REGNUM, the function address is in REGNO.
5486    Otherwise, the function address is on the top of stack.  Thunk is
5487    used for function return if RET_P is true.  */
5488 
5489 static void
output_indirect_thunk_function(enum indirect_thunk_prefix need_prefix,unsigned int regno,bool ret_p)5490 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
5491 				unsigned int regno, bool ret_p)
5492 {
5493   char name[32];
5494   tree decl;
5495 
5496   /* Create __x86_indirect_thunk.  */
5497   indirect_thunk_name (name, regno, need_prefix, ret_p);
5498   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5499 		     get_identifier (name),
5500 		     build_function_type_list (void_type_node, NULL_TREE));
5501   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5502 				   NULL_TREE, void_type_node);
5503   TREE_PUBLIC (decl) = 1;
5504   TREE_STATIC (decl) = 1;
5505   DECL_IGNORED_P (decl) = 1;
5506 
5507 #if TARGET_MACHO
5508   if (TARGET_MACHO)
5509     {
5510       switch_to_section (darwin_sections[picbase_thunk_section]);
5511       fputs ("\t.weak_definition\t", asm_out_file);
5512       assemble_name (asm_out_file, name);
5513       fputs ("\n\t.private_extern\t", asm_out_file);
5514       assemble_name (asm_out_file, name);
5515       putc ('\n', asm_out_file);
5516       ASM_OUTPUT_LABEL (asm_out_file, name);
5517       DECL_WEAK (decl) = 1;
5518     }
5519   else
5520 #endif
5521     if (USE_HIDDEN_LINKONCE)
5522       {
5523 	cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5524 
5525 	targetm.asm_out.unique_section (decl, 0);
5526 	switch_to_section (get_named_section (decl, NULL, 0));
5527 
5528 	targetm.asm_out.globalize_label (asm_out_file, name);
5529 	fputs ("\t.hidden\t", asm_out_file);
5530 	assemble_name (asm_out_file, name);
5531 	putc ('\n', asm_out_file);
5532 	ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5533       }
5534     else
5535       {
5536 	switch_to_section (text_section);
5537 	ASM_OUTPUT_LABEL (asm_out_file, name);
5538       }
5539 
5540   DECL_INITIAL (decl) = make_node (BLOCK);
5541   current_function_decl = decl;
5542   allocate_struct_function (decl, false);
5543   init_function_start (decl);
5544   /* We're about to hide the function body from callees of final_* by
5545      emitting it directly; tell them we're a thunk, if they care.  */
5546   cfun->is_thunk = true;
5547   first_function_block_is_cold = false;
5548   /* Make sure unwind info is emitted for the thunk if needed.  */
5549   final_start_function (emit_barrier (), asm_out_file, 1);
5550 
5551   output_indirect_thunk (regno);
5552 
5553   final_end_function ();
5554   init_insn_lengths ();
5555   free_after_compilation (cfun);
5556   set_cfun (NULL);
5557   current_function_decl = NULL;
5558 }
5559 
5560 static int pic_labels_used;
5561 
5562 /* Fills in the label name that should be used for a pc thunk for
5563    the given register.  */
5564 
5565 static void
get_pc_thunk_name(char name[32],unsigned int regno)5566 get_pc_thunk_name (char name[32], unsigned int regno)
5567 {
5568   gcc_assert (!TARGET_64BIT);
5569 
5570   if (USE_HIDDEN_LINKONCE)
5571     sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
5572   else
5573     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5574 }
5575 
5576 
5577 /* This function generates code for -fpic that loads %ebx with
5578    the return address of the caller and then returns.  */
5579 
5580 static void
ix86_code_end(void)5581 ix86_code_end (void)
5582 {
5583   rtx xops[2];
5584   unsigned int regno;
5585 
5586   if (indirect_return_needed)
5587     output_indirect_thunk_function (indirect_thunk_prefix_none,
5588 				    INVALID_REGNUM, true);
5589   if (indirect_return_via_cx)
5590     output_indirect_thunk_function (indirect_thunk_prefix_none,
5591 				    CX_REG, true);
5592   if (indirect_thunk_needed)
5593     output_indirect_thunk_function (indirect_thunk_prefix_none,
5594 				    INVALID_REGNUM, false);
5595 
5596   for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
5597     {
5598       unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
5599       if ((indirect_thunks_used & (1 << i)))
5600 	output_indirect_thunk_function (indirect_thunk_prefix_none,
5601 					regno, false);
5602     }
5603 
5604   for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
5605     {
5606       char name[32];
5607       tree decl;
5608 
5609       if ((indirect_thunks_used & (1 << regno)))
5610 	output_indirect_thunk_function (indirect_thunk_prefix_none,
5611 					regno, false);
5612 
5613       if (!(pic_labels_used & (1 << regno)))
5614 	continue;
5615 
5616       get_pc_thunk_name (name, regno);
5617 
5618       decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5619 			 get_identifier (name),
5620 			 build_function_type_list (void_type_node, NULL_TREE));
5621       DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5622 				       NULL_TREE, void_type_node);
5623       TREE_PUBLIC (decl) = 1;
5624       TREE_STATIC (decl) = 1;
5625       DECL_IGNORED_P (decl) = 1;
5626 
5627 #if TARGET_MACHO
5628       if (TARGET_MACHO)
5629 	{
5630 	  switch_to_section (darwin_sections[picbase_thunk_section]);
5631 	  fputs ("\t.weak_definition\t", asm_out_file);
5632 	  assemble_name (asm_out_file, name);
5633 	  fputs ("\n\t.private_extern\t", asm_out_file);
5634 	  assemble_name (asm_out_file, name);
5635 	  putc ('\n', asm_out_file);
5636 	  ASM_OUTPUT_LABEL (asm_out_file, name);
5637 	  DECL_WEAK (decl) = 1;
5638 	}
5639       else
5640 #endif
5641       if (USE_HIDDEN_LINKONCE)
5642 	{
5643 	  cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5644 
5645 	  targetm.asm_out.unique_section (decl, 0);
5646 	  switch_to_section (get_named_section (decl, NULL, 0));
5647 
5648 	  targetm.asm_out.globalize_label (asm_out_file, name);
5649 	  fputs ("\t.hidden\t", asm_out_file);
5650 	  assemble_name (asm_out_file, name);
5651 	  putc ('\n', asm_out_file);
5652 	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5653 	}
5654       else
5655 	{
5656 	  switch_to_section (text_section);
5657 	  ASM_OUTPUT_LABEL (asm_out_file, name);
5658 	}
5659 
5660       DECL_INITIAL (decl) = make_node (BLOCK);
5661       current_function_decl = decl;
5662       allocate_struct_function (decl, false);
5663       init_function_start (decl);
5664       /* We're about to hide the function body from callees of final_* by
5665 	 emitting it directly; tell them we're a thunk, if they care.  */
5666       cfun->is_thunk = true;
5667       first_function_block_is_cold = false;
5668       /* Make sure unwind info is emitted for the thunk if needed.  */
5669       final_start_function (emit_barrier (), asm_out_file, 1);
5670 
5671       /* Pad stack IP move with 4 instructions (two NOPs count
5672 	 as one instruction).  */
5673       if (TARGET_PAD_SHORT_FUNCTION)
5674 	{
5675 	  int i = 8;
5676 
5677 	  while (i--)
5678 	    fputs ("\tnop\n", asm_out_file);
5679 	}
5680 
5681       xops[0] = gen_rtx_REG (Pmode, regno);
5682       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5683       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
5684       output_asm_insn ("%!ret", NULL);
5685       final_end_function ();
5686       init_insn_lengths ();
5687       free_after_compilation (cfun);
5688       set_cfun (NULL);
5689       current_function_decl = NULL;
5690     }
5691 
5692   if (flag_split_stack)
5693     file_end_indicate_split_stack ();
5694 }
5695 
5696 /* Emit code for the SET_GOT patterns.  */
5697 
5698 const char *
output_set_got(rtx dest,rtx label)5699 output_set_got (rtx dest, rtx label)
5700 {
5701   rtx xops[3];
5702 
5703   xops[0] = dest;
5704 
5705   if (TARGET_VXWORKS_RTP && flag_pic)
5706     {
5707       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
5708       xops[2] = gen_rtx_MEM (Pmode,
5709 			     gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5710       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5711 
5712       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5713 	 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5714 	 an unadorned address.  */
5715       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5716       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5717       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5718       return "";
5719     }
5720 
5721   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5722 
5723   if (flag_pic)
5724     {
5725       char name[32];
5726       get_pc_thunk_name (name, REGNO (dest));
5727       pic_labels_used |= 1 << REGNO (dest);
5728 
5729       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5730       xops[2] = gen_rtx_MEM (QImode, xops[2]);
5731       output_asm_insn ("%!call\t%X2", xops);
5732 
5733 #if TARGET_MACHO
5734       /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
5735          This is what will be referenced by the Mach-O PIC subsystem.  */
5736       if (machopic_should_output_picbase_label () || !label)
5737 	ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
5738 
5739       /* When we are restoring the pic base at the site of a nonlocal label,
5740          and we decided to emit the pic base above, we will still output a
5741          local label used for calculating the correction offset (even though
5742          the offset will be 0 in that case).  */
5743       if (label)
5744         targetm.asm_out.internal_label (asm_out_file, "L",
5745 					   CODE_LABEL_NUMBER (label));
5746 #endif
5747     }
5748   else
5749     {
5750       if (TARGET_MACHO)
5751 	/* We don't need a pic base, we're not producing pic.  */
5752 	gcc_unreachable ();
5753 
5754       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5755       output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
5756       targetm.asm_out.internal_label (asm_out_file, "L",
5757 				      CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5758     }
5759 
5760   if (!TARGET_MACHO)
5761     output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
5762 
5763   return "";
5764 }
5765 
5766 /* Generate an "push" pattern for input ARG.  */
5767 
5768 rtx
gen_push(rtx arg)5769 gen_push (rtx arg)
5770 {
5771   struct machine_function *m = cfun->machine;
5772 
5773   if (m->fs.cfa_reg == stack_pointer_rtx)
5774     m->fs.cfa_offset += UNITS_PER_WORD;
5775   m->fs.sp_offset += UNITS_PER_WORD;
5776 
5777   if (REG_P (arg) && GET_MODE (arg) != word_mode)
5778     arg = gen_rtx_REG (word_mode, REGNO (arg));
5779 
5780   return gen_rtx_SET (gen_rtx_MEM (word_mode,
5781 				   gen_rtx_PRE_DEC (Pmode,
5782 						    stack_pointer_rtx)),
5783 		      arg);
5784 }
5785 
5786 /* Generate an "pop" pattern for input ARG.  */
5787 
5788 rtx
gen_pop(rtx arg)5789 gen_pop (rtx arg)
5790 {
5791   if (REG_P (arg) && GET_MODE (arg) != word_mode)
5792     arg = gen_rtx_REG (word_mode, REGNO (arg));
5793 
5794   return gen_rtx_SET (arg,
5795 		      gen_rtx_MEM (word_mode,
5796 				   gen_rtx_POST_INC (Pmode,
5797 						     stack_pointer_rtx)));
5798 }
5799 
5800 /* Return >= 0 if there is an unused call-clobbered register available
5801    for the entire function.  */
5802 
5803 static unsigned int
ix86_select_alt_pic_regnum(void)5804 ix86_select_alt_pic_regnum (void)
5805 {
5806   if (ix86_use_pseudo_pic_reg ())
5807     return INVALID_REGNUM;
5808 
5809   if (crtl->is_leaf
5810       && !crtl->profile
5811       && !ix86_current_function_calls_tls_descriptor)
5812     {
5813       int i, drap;
5814       /* Can't use the same register for both PIC and DRAP.  */
5815       if (crtl->drap_reg)
5816 	drap = REGNO (crtl->drap_reg);
5817       else
5818 	drap = -1;
5819       for (i = 2; i >= 0; --i)
5820         if (i != drap && !df_regs_ever_live_p (i))
5821 	  return i;
5822     }
5823 
5824   return INVALID_REGNUM;
5825 }
5826 
5827 /* Return true if REGNO is used by the epilogue.  */
5828 
5829 bool
ix86_epilogue_uses(int regno)5830 ix86_epilogue_uses (int regno)
5831 {
5832   /* If there are no caller-saved registers, we preserve all registers,
5833      except for MMX and x87 registers which aren't supported when saving
5834      and restoring registers.  Don't explicitly save SP register since
5835      it is always preserved.  */
5836   return (epilogue_completed
5837 	  && cfun->machine->no_caller_saved_registers
5838 	  && !fixed_regs[regno]
5839 	  && !STACK_REGNO_P (regno)
5840 	  && !MMX_REGNO_P (regno));
5841 }
5842 
5843 /* Return nonzero if register REGNO can be used as a scratch register
5844    in peephole2.  */
5845 
5846 static bool
ix86_hard_regno_scratch_ok(unsigned int regno)5847 ix86_hard_regno_scratch_ok (unsigned int regno)
5848 {
5849   /* If there are no caller-saved registers, we can't use any register
5850      as a scratch register after epilogue and use REGNO as scratch
5851      register only if it has been used before to avoid saving and
5852      restoring it.  */
5853   return (!cfun->machine->no_caller_saved_registers
5854 	  || (!epilogue_completed
5855 	      && df_regs_ever_live_p (regno)));
5856 }
5857 
5858 /* Return TRUE if we need to save REGNO.  */
5859 
5860 bool
ix86_save_reg(unsigned int regno,bool maybe_eh_return,bool ignore_outlined)5861 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
5862 {
5863   /* If there are no caller-saved registers, we preserve all registers,
5864      except for MMX and x87 registers which aren't supported when saving
5865      and restoring registers.  Don't explicitly save SP register since
5866      it is always preserved.  */
5867   if (cfun->machine->no_caller_saved_registers)
5868     {
5869       /* Don't preserve registers used for function return value.  */
5870       rtx reg = crtl->return_rtx;
5871       if (reg)
5872 	{
5873 	  unsigned int i = REGNO (reg);
5874 	  unsigned int nregs = REG_NREGS (reg);
5875 	  while (nregs-- > 0)
5876 	    if ((i + nregs) == regno)
5877 	      return false;
5878 	}
5879 
5880       return (df_regs_ever_live_p (regno)
5881 	      && !fixed_regs[regno]
5882 	      && !STACK_REGNO_P (regno)
5883 	      && !MMX_REGNO_P (regno)
5884 	      && (regno != HARD_FRAME_POINTER_REGNUM
5885 		  || !frame_pointer_needed));
5886     }
5887 
5888   if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
5889       && pic_offset_table_rtx)
5890     {
5891       if (ix86_use_pseudo_pic_reg ())
5892 	{
5893 	  /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
5894 	  _mcount in prologue.  */
5895 	  if (!TARGET_64BIT && flag_pic && crtl->profile)
5896 	    return true;
5897 	}
5898       else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5899 	       || crtl->profile
5900 	       || crtl->calls_eh_return
5901 	       || crtl->uses_const_pool
5902 	       || cfun->has_nonlocal_label)
5903         return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
5904     }
5905 
5906   if (crtl->calls_eh_return && maybe_eh_return)
5907     {
5908       unsigned i;
5909       for (i = 0; ; i++)
5910 	{
5911 	  unsigned test = EH_RETURN_DATA_REGNO (i);
5912 	  if (test == INVALID_REGNUM)
5913 	    break;
5914 	  if (test == regno)
5915 	    return true;
5916 	}
5917     }
5918 
5919   if (ignore_outlined && cfun->machine->call_ms2sysv)
5920     {
5921       unsigned count = cfun->machine->call_ms2sysv_extra_regs
5922 		       + xlogue_layout::MIN_REGS;
5923       if (xlogue_layout::is_stub_managed_reg (regno, count))
5924 	return false;
5925     }
5926 
5927   if (crtl->drap_reg
5928       && regno == REGNO (crtl->drap_reg)
5929       && !cfun->machine->no_drap_save_restore)
5930     return true;
5931 
5932   return (df_regs_ever_live_p (regno)
5933 	  && !call_used_or_fixed_reg_p (regno)
5934 	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5935 }
5936 
5937 /* Return number of saved general prupose registers.  */
5938 
5939 static int
ix86_nsaved_regs(void)5940 ix86_nsaved_regs (void)
5941 {
5942   int nregs = 0;
5943   int regno;
5944 
5945   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5946     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5947       nregs ++;
5948   return nregs;
5949 }
5950 
5951 /* Return number of saved SSE registers.  */
5952 
5953 static int
ix86_nsaved_sseregs(void)5954 ix86_nsaved_sseregs (void)
5955 {
5956   int nregs = 0;
5957   int regno;
5958 
5959   if (!TARGET_64BIT_MS_ABI)
5960     return 0;
5961   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5962     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5963       nregs ++;
5964   return nregs;
5965 }
5966 
5967 /* Given FROM and TO register numbers, say whether this elimination is
5968    allowed.  If stack alignment is needed, we can only replace argument
5969    pointer with hard frame pointer, or replace frame pointer with stack
5970    pointer.  Otherwise, frame pointer elimination is automatically
5971    handled and all other eliminations are valid.  */
5972 
5973 static bool
ix86_can_eliminate(const int from,const int to)5974 ix86_can_eliminate (const int from, const int to)
5975 {
5976   if (stack_realign_fp)
5977     return ((from == ARG_POINTER_REGNUM
5978 	     && to == HARD_FRAME_POINTER_REGNUM)
5979 	    || (from == FRAME_POINTER_REGNUM
5980 		&& to == STACK_POINTER_REGNUM));
5981   else
5982     return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
5983 }
5984 
5985 /* Return the offset between two registers, one to be eliminated, and the other
5986    its replacement, at the start of a routine.  */
5987 
5988 HOST_WIDE_INT
ix86_initial_elimination_offset(int from,int to)5989 ix86_initial_elimination_offset (int from, int to)
5990 {
5991   struct ix86_frame &frame = cfun->machine->frame;
5992 
5993   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5994     return frame.hard_frame_pointer_offset;
5995   else if (from == FRAME_POINTER_REGNUM
5996 	   && to == HARD_FRAME_POINTER_REGNUM)
5997     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5998   else
5999     {
6000       gcc_assert (to == STACK_POINTER_REGNUM);
6001 
6002       if (from == ARG_POINTER_REGNUM)
6003 	return frame.stack_pointer_offset;
6004 
6005       gcc_assert (from == FRAME_POINTER_REGNUM);
6006       return frame.stack_pointer_offset - frame.frame_pointer_offset;
6007     }
6008 }
6009 
6010 /* Emits a warning for unsupported msabi to sysv pro/epilogues.  */
warn_once_call_ms2sysv_xlogues(const char * feature)6011 void warn_once_call_ms2sysv_xlogues (const char *feature)
6012 {
6013   static bool warned_once = false;
6014   if (!warned_once)
6015     {
6016       warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6017 	       feature);
6018       warned_once = true;
6019     }
6020 }
6021 
6022 /* Return the probing interval for -fstack-clash-protection.  */
6023 
6024 static HOST_WIDE_INT
get_probe_interval(void)6025 get_probe_interval (void)
6026 {
6027   if (flag_stack_clash_protection)
6028     return (HOST_WIDE_INT_1U
6029 	    << param_stack_clash_protection_probe_interval);
6030   else
6031     return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6032 }
6033 
6034 /* When using -fsplit-stack, the allocation routines set a field in
6035    the TCB to the bottom of the stack plus this much space, measured
6036    in bytes.  */
6037 
6038 #define SPLIT_STACK_AVAILABLE 256
6039 
6040 /* Fill structure ix86_frame about frame of currently computed function.  */
6041 
6042 static void
ix86_compute_frame_layout(void)6043 ix86_compute_frame_layout (void)
6044 {
6045   struct ix86_frame *frame = &cfun->machine->frame;
6046   struct machine_function *m = cfun->machine;
6047   unsigned HOST_WIDE_INT stack_alignment_needed;
6048   HOST_WIDE_INT offset;
6049   unsigned HOST_WIDE_INT preferred_alignment;
6050   HOST_WIDE_INT size = ix86_get_frame_size ();
6051   HOST_WIDE_INT to_allocate;
6052 
6053   /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6054    * ms_abi functions that call a sysv function.  We now need to prune away
6055    * cases where it should be disabled.  */
6056   if (TARGET_64BIT && m->call_ms2sysv)
6057     {
6058       gcc_assert (TARGET_64BIT_MS_ABI);
6059       gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6060       gcc_assert (!TARGET_SEH);
6061       gcc_assert (TARGET_SSE);
6062       gcc_assert (!ix86_using_red_zone ());
6063 
6064       if (crtl->calls_eh_return)
6065 	{
6066 	  gcc_assert (!reload_completed);
6067 	  m->call_ms2sysv = false;
6068 	  warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6069 	}
6070 
6071       else if (ix86_static_chain_on_stack)
6072 	{
6073 	  gcc_assert (!reload_completed);
6074 	  m->call_ms2sysv = false;
6075 	  warn_once_call_ms2sysv_xlogues ("static call chains");
6076 	}
6077 
6078       /* Finally, compute which registers the stub will manage.  */
6079       else
6080 	{
6081 	  unsigned count = xlogue_layout::count_stub_managed_regs ();
6082 	  m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6083 	  m->call_ms2sysv_pad_in = 0;
6084 	}
6085     }
6086 
6087   frame->nregs = ix86_nsaved_regs ();
6088   frame->nsseregs = ix86_nsaved_sseregs ();
6089 
6090   /* 64-bit MS ABI seem to require stack alignment to be always 16,
6091      except for function prologues, leaf functions and when the defult
6092      incoming stack boundary is overriden at command line or via
6093      force_align_arg_pointer attribute.
6094 
6095      Darwin's ABI specifies 128b alignment for both 32 and  64 bit variants
6096      at call sites, including profile function calls.
6097  */
6098   if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6099         && crtl->preferred_stack_boundary < 128)
6100       && (!crtl->is_leaf || cfun->calls_alloca != 0
6101 	  || ix86_current_function_calls_tls_descriptor
6102 	  || (TARGET_MACHO && crtl->profile)
6103 	  || ix86_incoming_stack_boundary < 128))
6104     {
6105       crtl->preferred_stack_boundary = 128;
6106       crtl->stack_alignment_needed = 128;
6107     }
6108 
6109   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6110   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6111 
6112   gcc_assert (!size || stack_alignment_needed);
6113   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6114   gcc_assert (preferred_alignment <= stack_alignment_needed);
6115 
6116   /* The only ABI saving SSE regs should be 64-bit ms_abi.  */
6117   gcc_assert (TARGET_64BIT || !frame->nsseregs);
6118   if (TARGET_64BIT && m->call_ms2sysv)
6119     {
6120       gcc_assert (stack_alignment_needed >= 16);
6121       gcc_assert (!frame->nsseregs);
6122     }
6123 
6124   /* For SEH we have to limit the amount of code movement into the prologue.
6125      At present we do this via a BLOCKAGE, at which point there's very little
6126      scheduling that can be done, which means that there's very little point
6127      in doing anything except PUSHs.  */
6128   if (TARGET_SEH)
6129     m->use_fast_prologue_epilogue = false;
6130   else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6131     {
6132       int count = frame->nregs;
6133       struct cgraph_node *node = cgraph_node::get (current_function_decl);
6134 
6135       /* The fast prologue uses move instead of push to save registers.  This
6136          is significantly longer, but also executes faster as modern hardware
6137          can execute the moves in parallel, but can't do that for push/pop.
6138 
6139 	 Be careful about choosing what prologue to emit:  When function takes
6140 	 many instructions to execute we may use slow version as well as in
6141 	 case function is known to be outside hot spot (this is known with
6142 	 feedback only).  Weight the size of function by number of registers
6143 	 to save as it is cheap to use one or two push instructions but very
6144 	 slow to use many of them.
6145 
6146 	 Calling this hook multiple times with the same frame requirements
6147 	 must produce the same layout, since the RA might otherwise be
6148 	 unable to reach a fixed point or might fail its final sanity checks.
6149 	 This means that once we've assumed that a function does or doesn't
6150 	 have a particular size, we have to stick to that assumption
6151 	 regardless of how the function has changed since.  */
6152       if (count)
6153 	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6154       if (node->frequency < NODE_FREQUENCY_NORMAL
6155 	  || (flag_branch_probabilities
6156 	      && node->frequency < NODE_FREQUENCY_HOT))
6157 	m->use_fast_prologue_epilogue = false;
6158       else
6159 	{
6160 	  if (count != frame->expensive_count)
6161 	    {
6162 	      frame->expensive_count = count;
6163 	      frame->expensive_p = expensive_function_p (count);
6164 	    }
6165 	  m->use_fast_prologue_epilogue = !frame->expensive_p;
6166 	}
6167     }
6168 
6169   frame->save_regs_using_mov
6170     = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
6171        /* If static stack checking is enabled and done with probes,
6172 	  the registers need to be saved before allocating the frame.  */
6173        && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
6174 
6175   /* Skip return address and error code in exception handler.  */
6176   offset = INCOMING_FRAME_SP_OFFSET;
6177 
6178   /* Skip pushed static chain.  */
6179   if (ix86_static_chain_on_stack)
6180     offset += UNITS_PER_WORD;
6181 
6182   /* Skip saved base pointer.  */
6183   if (frame_pointer_needed)
6184     offset += UNITS_PER_WORD;
6185   frame->hfp_save_offset = offset;
6186 
6187   /* The traditional frame pointer location is at the top of the frame.  */
6188   frame->hard_frame_pointer_offset = offset;
6189 
6190   /* Register save area */
6191   offset += frame->nregs * UNITS_PER_WORD;
6192   frame->reg_save_offset = offset;
6193 
6194   /* Calculate the size of the va-arg area (not including padding, if any).  */
6195   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6196 
6197   /* Also adjust stack_realign_offset for the largest alignment of
6198      stack slot actually used.  */
6199   if (stack_realign_fp
6200       || (cfun->machine->max_used_stack_alignment != 0
6201 	  && (offset % cfun->machine->max_used_stack_alignment) != 0))
6202     {
6203       /* We may need a 16-byte aligned stack for the remainder of the
6204 	 register save area, but the stack frame for the local function
6205 	 may require a greater alignment if using AVX/2/512.  In order
6206 	 to avoid wasting space, we first calculate the space needed for
6207 	 the rest of the register saves, add that to the stack pointer,
6208 	 and then realign the stack to the boundary of the start of the
6209 	 frame for the local function.  */
6210       HOST_WIDE_INT space_needed = 0;
6211       HOST_WIDE_INT sse_reg_space_needed = 0;
6212 
6213       if (TARGET_64BIT)
6214 	{
6215 	  if (m->call_ms2sysv)
6216 	    {
6217 	      m->call_ms2sysv_pad_in = 0;
6218 	      space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6219 	    }
6220 
6221 	  else if (frame->nsseregs)
6222 	    /* The only ABI that has saved SSE registers (Win64) also has a
6223 	       16-byte aligned default stack.  However, many programs violate
6224 	       the ABI, and Wine64 forces stack realignment to compensate.  */
6225 	    space_needed = frame->nsseregs * 16;
6226 
6227 	  sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6228 
6229 	  /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6230 	     rounding to be pedantic.  */
6231 	  space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6232 	}
6233       else
6234 	space_needed = frame->va_arg_size;
6235 
6236       /* Record the allocation size required prior to the realignment AND.  */
6237       frame->stack_realign_allocate = space_needed;
6238 
6239       /* The re-aligned stack starts at frame->stack_realign_offset.  Values
6240 	 before this point are not directly comparable with values below
6241 	 this point.  Use sp_valid_at to determine if the stack pointer is
6242 	 valid for a given offset, fp_valid_at for the frame pointer, or
6243 	 choose_baseaddr to have a base register chosen for you.
6244 
6245 	 Note that the result of (frame->stack_realign_offset
6246 	 & (stack_alignment_needed - 1)) may not equal zero.  */
6247       offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6248       frame->stack_realign_offset = offset - space_needed;
6249       frame->sse_reg_save_offset = frame->stack_realign_offset
6250 							+ sse_reg_space_needed;
6251     }
6252   else
6253     {
6254       frame->stack_realign_offset = offset;
6255 
6256       if (TARGET_64BIT && m->call_ms2sysv)
6257 	{
6258 	  m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6259 	  offset += xlogue_layout::get_instance ().get_stack_space_used ();
6260 	}
6261 
6262       /* Align and set SSE register save area.  */
6263       else if (frame->nsseregs)
6264 	{
6265 	  /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6266 	     required and the DRAP re-alignment boundary is at least 16 bytes,
6267 	     then we want the SSE register save area properly aligned.  */
6268 	  if (ix86_incoming_stack_boundary >= 128
6269 		  || (stack_realign_drap && stack_alignment_needed >= 16))
6270 	    offset = ROUND_UP (offset, 16);
6271 	  offset += frame->nsseregs * 16;
6272 	}
6273       frame->sse_reg_save_offset = offset;
6274       offset += frame->va_arg_size;
6275     }
6276 
6277   /* Align start of frame for local function.  When a function call
6278      is removed, it may become a leaf function.  But if argument may
6279      be passed on stack, we need to align the stack when there is no
6280      tail call.  */
6281   if (m->call_ms2sysv
6282       || frame->va_arg_size != 0
6283       || size != 0
6284       || !crtl->is_leaf
6285       || (!crtl->tail_call_emit
6286 	  && cfun->machine->outgoing_args_on_stack)
6287       || cfun->calls_alloca
6288       || ix86_current_function_calls_tls_descriptor)
6289     offset = ROUND_UP (offset, stack_alignment_needed);
6290 
6291   /* Frame pointer points here.  */
6292   frame->frame_pointer_offset = offset;
6293 
6294   offset += size;
6295 
6296   /* Add outgoing arguments area.  Can be skipped if we eliminated
6297      all the function calls as dead code.
6298      Skipping is however impossible when function calls alloca.  Alloca
6299      expander assumes that last crtl->outgoing_args_size
6300      of stack frame are unused.  */
6301   if (ACCUMULATE_OUTGOING_ARGS
6302       && (!crtl->is_leaf || cfun->calls_alloca
6303 	  || ix86_current_function_calls_tls_descriptor))
6304     {
6305       offset += crtl->outgoing_args_size;
6306       frame->outgoing_arguments_size = crtl->outgoing_args_size;
6307     }
6308   else
6309     frame->outgoing_arguments_size = 0;
6310 
6311   /* Align stack boundary.  Only needed if we're calling another function
6312      or using alloca.  */
6313   if (!crtl->is_leaf || cfun->calls_alloca
6314       || ix86_current_function_calls_tls_descriptor)
6315     offset = ROUND_UP (offset, preferred_alignment);
6316 
6317   /* We've reached end of stack frame.  */
6318   frame->stack_pointer_offset = offset;
6319 
6320   /* Size prologue needs to allocate.  */
6321   to_allocate = offset - frame->sse_reg_save_offset;
6322 
6323   if ((!to_allocate && frame->nregs <= 1)
6324       || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6325       /* If stack clash probing needs a loop, then it needs a
6326 	 scratch register.  But the returned register is only guaranteed
6327 	 to be safe to use after register saves are complete.  So if
6328 	 stack clash protections are enabled and the allocated frame is
6329 	 larger than the probe interval, then use pushes to save
6330 	 callee saved registers.  */
6331       || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
6332     frame->save_regs_using_mov = false;
6333 
6334   if (ix86_using_red_zone ()
6335       && crtl->sp_is_unchanging
6336       && crtl->is_leaf
6337       && !ix86_pc_thunk_call_expanded
6338       && !ix86_current_function_calls_tls_descriptor)
6339     {
6340       frame->red_zone_size = to_allocate;
6341       if (frame->save_regs_using_mov)
6342 	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6343       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6344 	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6345     }
6346   else
6347     frame->red_zone_size = 0;
6348   frame->stack_pointer_offset -= frame->red_zone_size;
6349 
6350   /* The SEH frame pointer location is near the bottom of the frame.
6351      This is enforced by the fact that the difference between the
6352      stack pointer and the frame pointer is limited to 240 bytes in
6353      the unwind data structure.  */
6354   if (TARGET_SEH)
6355     {
6356       /* Force the frame pointer to point at or below the lowest register save
6357 	 area, see the SEH code in config/i386/winnt.c for the rationale.  */
6358       frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
6359 
6360       /* If we can leave the frame pointer where it is, do so; however return
6361 	 the establisher frame for __builtin_frame_address (0) or else if the
6362 	 frame overflows the SEH maximum frame size.
6363 
6364 	 Note that the value returned by __builtin_frame_address (0) is quite
6365 	 constrained, because setjmp is piggybacked on the SEH machinery with
6366 	 recent versions of MinGW:
6367 
6368 	  #    elif defined(__SEH__)
6369 	  #     if defined(__aarch64__) || defined(_ARM64_)
6370 	  #      define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
6371 	  #     elif (__MINGW_GCC_VERSION < 40702)
6372 	  #      define setjmp(BUF) _setjmp((BUF), mingw_getsp())
6373 	  #     else
6374 	  #      define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
6375 	  #     endif
6376 
6377 	 and the second argument passed to _setjmp, if not null, is forwarded
6378 	 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
6379 	 built an ExceptionRecord on the fly describing the setjmp buffer).  */
6380       const HOST_WIDE_INT diff
6381 	= frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6382       if (diff <= 255 && !crtl->accesses_prior_frames)
6383 	{
6384 	  /* The resulting diff will be a multiple of 16 lower than 255,
6385 	     i.e. at most 240 as required by the unwind data structure.  */
6386 	  frame->hard_frame_pointer_offset += (diff & 15);
6387 	}
6388       else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
6389 	{
6390 	  /* Ideally we'd determine what portion of the local stack frame
6391 	     (within the constraint of the lowest 240) is most heavily used.
6392 	     But without that complication, simply bias the frame pointer
6393 	     by 128 bytes so as to maximize the amount of the local stack
6394 	     frame that is addressable with 8-bit offsets.  */
6395 	  frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6396 	}
6397       else
6398 	frame->hard_frame_pointer_offset = frame->hfp_save_offset;
6399     }
6400 }
6401 
6402 /* This is semi-inlined memory_address_length, but simplified
6403    since we know that we're always dealing with reg+offset, and
6404    to avoid having to create and discard all that rtl.  */
6405 
6406 static inline int
choose_baseaddr_len(unsigned int regno,HOST_WIDE_INT offset)6407 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6408 {
6409   int len = 4;
6410 
6411   if (offset == 0)
6412     {
6413       /* EBP and R13 cannot be encoded without an offset.  */
6414       len = (regno == BP_REG || regno == R13_REG);
6415     }
6416   else if (IN_RANGE (offset, -128, 127))
6417     len = 1;
6418 
6419   /* ESP and R12 must be encoded with a SIB byte.  */
6420   if (regno == SP_REG || regno == R12_REG)
6421     len++;
6422 
6423   return len;
6424 }
6425 
6426 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6427    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
6428 
6429 static bool
sp_valid_at(HOST_WIDE_INT cfa_offset)6430 sp_valid_at (HOST_WIDE_INT cfa_offset)
6431 {
6432   const struct machine_frame_state &fs = cfun->machine->fs;
6433   if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
6434     {
6435       /* Validate that the cfa_offset isn't in a "no-man's land".  */
6436       gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
6437       return false;
6438     }
6439   return fs.sp_valid;
6440 }
6441 
6442 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6443    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
6444 
6445 static inline bool
fp_valid_at(HOST_WIDE_INT cfa_offset)6446 fp_valid_at (HOST_WIDE_INT cfa_offset)
6447 {
6448   const struct machine_frame_state &fs = cfun->machine->fs;
6449   if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
6450     {
6451       /* Validate that the cfa_offset isn't in a "no-man's land".  */
6452       gcc_assert (cfa_offset >= fs.sp_realigned_offset);
6453       return false;
6454     }
6455   return fs.fp_valid;
6456 }
6457 
6458 /* Choose a base register based upon alignment requested, speed and/or
6459    size.  */
6460 
6461 static void
choose_basereg(HOST_WIDE_INT cfa_offset,rtx & base_reg,HOST_WIDE_INT & base_offset,unsigned int align_reqested,unsigned int * align)6462 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
6463 		HOST_WIDE_INT &base_offset,
6464 		unsigned int align_reqested, unsigned int *align)
6465 {
6466   const struct machine_function *m = cfun->machine;
6467   unsigned int hfp_align;
6468   unsigned int drap_align;
6469   unsigned int sp_align;
6470   bool hfp_ok  = fp_valid_at (cfa_offset);
6471   bool drap_ok = m->fs.drap_valid;
6472   bool sp_ok   = sp_valid_at (cfa_offset);
6473 
6474   hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
6475 
6476   /* Filter out any registers that don't meet the requested alignment
6477      criteria.  */
6478   if (align_reqested)
6479     {
6480       if (m->fs.realigned)
6481 	hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
6482       /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6483 	 notes (which we would need to use a realigned stack pointer),
6484 	 so disable on SEH targets.  */
6485       else if (m->fs.sp_realigned)
6486 	sp_align = crtl->stack_alignment_needed;
6487 
6488       hfp_ok = hfp_ok && hfp_align >= align_reqested;
6489       drap_ok = drap_ok && drap_align >= align_reqested;
6490       sp_ok = sp_ok && sp_align >= align_reqested;
6491     }
6492 
6493   if (m->use_fast_prologue_epilogue)
6494     {
6495       /* Choose the base register most likely to allow the most scheduling
6496          opportunities.  Generally FP is valid throughout the function,
6497          while DRAP must be reloaded within the epilogue.  But choose either
6498          over the SP due to increased encoding size.  */
6499 
6500       if (hfp_ok)
6501 	{
6502 	  base_reg = hard_frame_pointer_rtx;
6503 	  base_offset = m->fs.fp_offset - cfa_offset;
6504 	}
6505       else if (drap_ok)
6506 	{
6507 	  base_reg = crtl->drap_reg;
6508 	  base_offset = 0 - cfa_offset;
6509 	}
6510       else if (sp_ok)
6511 	{
6512 	  base_reg = stack_pointer_rtx;
6513 	  base_offset = m->fs.sp_offset - cfa_offset;
6514 	}
6515     }
6516   else
6517     {
6518       HOST_WIDE_INT toffset;
6519       int len = 16, tlen;
6520 
6521       /* Choose the base register with the smallest address encoding.
6522          With a tie, choose FP > DRAP > SP.  */
6523       if (sp_ok)
6524 	{
6525 	  base_reg = stack_pointer_rtx;
6526 	  base_offset = m->fs.sp_offset - cfa_offset;
6527           len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
6528 	}
6529       if (drap_ok)
6530 	{
6531 	  toffset = 0 - cfa_offset;
6532 	  tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
6533 	  if (tlen <= len)
6534 	    {
6535 	      base_reg = crtl->drap_reg;
6536 	      base_offset = toffset;
6537 	      len = tlen;
6538 	    }
6539 	}
6540       if (hfp_ok)
6541 	{
6542 	  toffset = m->fs.fp_offset - cfa_offset;
6543 	  tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
6544 	  if (tlen <= len)
6545 	    {
6546 	      base_reg = hard_frame_pointer_rtx;
6547 	      base_offset = toffset;
6548 	    }
6549 	}
6550     }
6551 
6552     /* Set the align return value.  */
6553     if (align)
6554       {
6555 	if (base_reg == stack_pointer_rtx)
6556 	  *align = sp_align;
6557 	else if (base_reg == crtl->drap_reg)
6558 	  *align = drap_align;
6559 	else if (base_reg == hard_frame_pointer_rtx)
6560 	  *align = hfp_align;
6561       }
6562 }
6563 
6564 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6565    the alignment of address.  If ALIGN is non-null, it should point to
6566    an alignment value (in bits) that is preferred or zero and will
6567    recieve the alignment of the base register that was selected,
6568    irrespective of rather or not CFA_OFFSET is a multiple of that
6569    alignment value.  If it is possible for the base register offset to be
6570    non-immediate then SCRATCH_REGNO should specify a scratch register to
6571    use.
6572 
6573    The valid base registers are taken from CFUN->MACHINE->FS.  */
6574 
6575 static rtx
6576 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
6577 		 unsigned int scratch_regno = INVALID_REGNUM)
6578 {
6579   rtx base_reg = NULL;
6580   HOST_WIDE_INT base_offset = 0;
6581 
6582   /* If a specific alignment is requested, try to get a base register
6583      with that alignment first.  */
6584   if (align && *align)
6585     choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
6586 
6587   if (!base_reg)
6588     choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
6589 
6590   gcc_assert (base_reg != NULL);
6591 
6592   rtx base_offset_rtx = GEN_INT (base_offset);
6593 
6594   if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
6595     {
6596       gcc_assert (scratch_regno != INVALID_REGNUM);
6597 
6598       rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
6599       emit_move_insn (scratch_reg, base_offset_rtx);
6600 
6601       return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
6602     }
6603 
6604   return plus_constant (Pmode, base_reg, base_offset);
6605 }
6606 
6607 /* Emit code to save registers in the prologue.  */
6608 
6609 static void
ix86_emit_save_regs(void)6610 ix86_emit_save_regs (void)
6611 {
6612   unsigned int regno;
6613   rtx_insn *insn;
6614 
6615   for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
6616     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6617       {
6618 	insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
6619 	RTX_FRAME_RELATED_P (insn) = 1;
6620       }
6621 }
6622 
6623 /* Emit a single register save at CFA - CFA_OFFSET.  */
6624 
6625 static void
ix86_emit_save_reg_using_mov(machine_mode mode,unsigned int regno,HOST_WIDE_INT cfa_offset)6626 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
6627 			      HOST_WIDE_INT cfa_offset)
6628 {
6629   struct machine_function *m = cfun->machine;
6630   rtx reg = gen_rtx_REG (mode, regno);
6631   rtx mem, addr, base, insn;
6632   unsigned int align = GET_MODE_ALIGNMENT (mode);
6633 
6634   addr = choose_baseaddr (cfa_offset, &align);
6635   mem = gen_frame_mem (mode, addr);
6636 
6637   /* The location aligment depends upon the base register.  */
6638   align = MIN (GET_MODE_ALIGNMENT (mode), align);
6639   gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
6640   set_mem_align (mem, align);
6641 
6642   insn = emit_insn (gen_rtx_SET (mem, reg));
6643   RTX_FRAME_RELATED_P (insn) = 1;
6644 
6645   base = addr;
6646   if (GET_CODE (base) == PLUS)
6647     base = XEXP (base, 0);
6648   gcc_checking_assert (REG_P (base));
6649 
6650   /* When saving registers into a re-aligned local stack frame, avoid
6651      any tricky guessing by dwarf2out.  */
6652   if (m->fs.realigned)
6653     {
6654       gcc_checking_assert (stack_realign_drap);
6655 
6656       if (regno == REGNO (crtl->drap_reg))
6657 	{
6658 	  /* A bit of a hack.  We force the DRAP register to be saved in
6659 	     the re-aligned stack frame, which provides us with a copy
6660 	     of the CFA that will last past the prologue.  Install it.  */
6661 	  gcc_checking_assert (cfun->machine->fs.fp_valid);
6662 	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6663 				cfun->machine->fs.fp_offset - cfa_offset);
6664 	  mem = gen_rtx_MEM (mode, addr);
6665 	  add_reg_note (insn, REG_CFA_DEF_CFA, mem);
6666 	}
6667       else
6668 	{
6669 	  /* The frame pointer is a stable reference within the
6670 	     aligned frame.  Use it.  */
6671 	  gcc_checking_assert (cfun->machine->fs.fp_valid);
6672 	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6673 				cfun->machine->fs.fp_offset - cfa_offset);
6674 	  mem = gen_rtx_MEM (mode, addr);
6675 	  add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6676 	}
6677     }
6678 
6679   else if (base == stack_pointer_rtx && m->fs.sp_realigned
6680 	   && cfa_offset >= m->fs.sp_realigned_offset)
6681     {
6682       gcc_checking_assert (stack_realign_fp);
6683       add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6684     }
6685 
6686   /* The memory may not be relative to the current CFA register,
6687      which means that we may need to generate a new pattern for
6688      use by the unwind info.  */
6689   else if (base != m->fs.cfa_reg)
6690     {
6691       addr = plus_constant (Pmode, m->fs.cfa_reg,
6692 			    m->fs.cfa_offset - cfa_offset);
6693       mem = gen_rtx_MEM (mode, addr);
6694       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6695     }
6696 }
6697 
6698 /* Emit code to save registers using MOV insns.
6699    First register is stored at CFA - CFA_OFFSET.  */
6700 static void
ix86_emit_save_regs_using_mov(HOST_WIDE_INT cfa_offset)6701 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
6702 {
6703   unsigned int regno;
6704 
6705   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6706     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6707       {
6708         ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
6709 	cfa_offset -= UNITS_PER_WORD;
6710       }
6711 }
6712 
6713 /* Emit code to save SSE registers using MOV insns.
6714    First register is stored at CFA - CFA_OFFSET.  */
6715 static void
ix86_emit_save_sse_regs_using_mov(HOST_WIDE_INT cfa_offset)6716 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
6717 {
6718   unsigned int regno;
6719 
6720   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6721     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6722       {
6723 	ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
6724 	cfa_offset -= GET_MODE_SIZE (V4SFmode);
6725       }
6726 }
6727 
6728 static GTY(()) rtx queued_cfa_restores;
6729 
6730 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
6731    manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
6732    Don't add the note if the previously saved value will be left untouched
6733    within stack red-zone till return, as unwinders can find the same value
6734    in the register and on the stack.  */
6735 
6736 static void
ix86_add_cfa_restore_note(rtx_insn * insn,rtx reg,HOST_WIDE_INT cfa_offset)6737 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
6738 {
6739   if (!crtl->shrink_wrapped
6740       && cfa_offset <= cfun->machine->fs.red_zone_offset)
6741     return;
6742 
6743   if (insn)
6744     {
6745       add_reg_note (insn, REG_CFA_RESTORE, reg);
6746       RTX_FRAME_RELATED_P (insn) = 1;
6747     }
6748   else
6749     queued_cfa_restores
6750       = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
6751 }
6752 
6753 /* Add queued REG_CFA_RESTORE notes if any to INSN.  */
6754 
6755 static void
ix86_add_queued_cfa_restore_notes(rtx insn)6756 ix86_add_queued_cfa_restore_notes (rtx insn)
6757 {
6758   rtx last;
6759   if (!queued_cfa_restores)
6760     return;
6761   for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
6762     ;
6763   XEXP (last, 1) = REG_NOTES (insn);
6764   REG_NOTES (insn) = queued_cfa_restores;
6765   queued_cfa_restores = NULL_RTX;
6766   RTX_FRAME_RELATED_P (insn) = 1;
6767 }
6768 
6769 /* Expand prologue or epilogue stack adjustment.
6770    The pattern exist to put a dependency on all ebp-based memory accesses.
6771    STYLE should be negative if instructions should be marked as frame related,
6772    zero if %r11 register is live and cannot be freely used and positive
6773    otherwise.  */
6774 
6775 static rtx
pro_epilogue_adjust_stack(rtx dest,rtx src,rtx offset,int style,bool set_cfa)6776 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
6777 			   int style, bool set_cfa)
6778 {
6779   struct machine_function *m = cfun->machine;
6780   rtx addend = offset;
6781   rtx insn;
6782   bool add_frame_related_expr = false;
6783 
6784   if (!x86_64_immediate_operand (offset, Pmode))
6785     {
6786       /* r11 is used by indirect sibcall return as well, set before the
6787 	 epilogue and used after the epilogue.  */
6788       if (style)
6789         addend = gen_rtx_REG (Pmode, R11_REG);
6790       else
6791 	{
6792 	  gcc_assert (src != hard_frame_pointer_rtx
6793 		      && dest != hard_frame_pointer_rtx);
6794 	  addend = hard_frame_pointer_rtx;
6795 	}
6796       emit_insn (gen_rtx_SET (addend, offset));
6797       if (style < 0)
6798 	add_frame_related_expr = true;
6799     }
6800 
6801   insn = emit_insn (gen_pro_epilogue_adjust_stack_add
6802 		    (Pmode, dest, src, addend));
6803   if (style >= 0)
6804     ix86_add_queued_cfa_restore_notes (insn);
6805 
6806   if (set_cfa)
6807     {
6808       rtx r;
6809 
6810       gcc_assert (m->fs.cfa_reg == src);
6811       m->fs.cfa_offset += INTVAL (offset);
6812       m->fs.cfa_reg = dest;
6813 
6814       r = gen_rtx_PLUS (Pmode, src, offset);
6815       r = gen_rtx_SET (dest, r);
6816       add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
6817       RTX_FRAME_RELATED_P (insn) = 1;
6818     }
6819   else if (style < 0)
6820     {
6821       RTX_FRAME_RELATED_P (insn) = 1;
6822       if (add_frame_related_expr)
6823 	{
6824 	  rtx r = gen_rtx_PLUS (Pmode, src, offset);
6825 	  r = gen_rtx_SET (dest, r);
6826 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
6827 	}
6828     }
6829 
6830   if (dest == stack_pointer_rtx)
6831     {
6832       HOST_WIDE_INT ooffset = m->fs.sp_offset;
6833       bool valid = m->fs.sp_valid;
6834       bool realigned = m->fs.sp_realigned;
6835 
6836       if (src == hard_frame_pointer_rtx)
6837 	{
6838 	  valid = m->fs.fp_valid;
6839 	  realigned = false;
6840 	  ooffset = m->fs.fp_offset;
6841 	}
6842       else if (src == crtl->drap_reg)
6843 	{
6844 	  valid = m->fs.drap_valid;
6845 	  realigned = false;
6846 	  ooffset = 0;
6847 	}
6848       else
6849 	{
6850 	  /* Else there are two possibilities: SP itself, which we set
6851 	     up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
6852 	     taken care of this by hand along the eh_return path.  */
6853 	  gcc_checking_assert (src == stack_pointer_rtx
6854 			       || offset == const0_rtx);
6855 	}
6856 
6857       m->fs.sp_offset = ooffset - INTVAL (offset);
6858       m->fs.sp_valid = valid;
6859       m->fs.sp_realigned = realigned;
6860     }
6861   return insn;
6862 }
6863 
6864 /* Find an available register to be used as dynamic realign argument
6865    pointer regsiter.  Such a register will be written in prologue and
6866    used in begin of body, so it must not be
6867 	1. parameter passing register.
6868 	2. GOT pointer.
6869    We reuse static-chain register if it is available.  Otherwise, we
6870    use DI for i386 and R13 for x86-64.  We chose R13 since it has
6871    shorter encoding.
6872 
6873    Return: the regno of chosen register.  */
6874 
6875 static unsigned int
find_drap_reg(void)6876 find_drap_reg (void)
6877 {
6878   tree decl = cfun->decl;
6879 
6880   /* Always use callee-saved register if there are no caller-saved
6881      registers.  */
6882   if (TARGET_64BIT)
6883     {
6884       /* Use R13 for nested function or function need static chain.
6885 	 Since function with tail call may use any caller-saved
6886 	 registers in epilogue, DRAP must not use caller-saved
6887 	 register in such case.  */
6888       if (DECL_STATIC_CHAIN (decl)
6889 	  || cfun->machine->no_caller_saved_registers
6890 	  || crtl->tail_call_emit)
6891 	return R13_REG;
6892 
6893       return R10_REG;
6894     }
6895   else
6896     {
6897       /* Use DI for nested function or function need static chain.
6898 	 Since function with tail call may use any caller-saved
6899 	 registers in epilogue, DRAP must not use caller-saved
6900 	 register in such case.  */
6901       if (DECL_STATIC_CHAIN (decl)
6902 	  || cfun->machine->no_caller_saved_registers
6903 	  || crtl->tail_call_emit)
6904 	return DI_REG;
6905 
6906       /* Reuse static chain register if it isn't used for parameter
6907          passing.  */
6908       if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
6909 	{
6910 	  unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
6911 	  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
6912 	    return CX_REG;
6913 	}
6914       return DI_REG;
6915     }
6916 }
6917 
6918 /* Return minimum incoming stack alignment.  */
6919 
6920 static unsigned int
ix86_minimum_incoming_stack_boundary(bool sibcall)6921 ix86_minimum_incoming_stack_boundary (bool sibcall)
6922 {
6923   unsigned int incoming_stack_boundary;
6924 
6925   /* Stack of interrupt handler is aligned to 128 bits in 64bit mode.  */
6926   if (cfun->machine->func_type != TYPE_NORMAL)
6927     incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
6928   /* Prefer the one specified at command line. */
6929   else if (ix86_user_incoming_stack_boundary)
6930     incoming_stack_boundary = ix86_user_incoming_stack_boundary;
6931   /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
6932      if -mstackrealign is used, it isn't used for sibcall check and
6933      estimated stack alignment is 128bit.  */
6934   else if (!sibcall
6935 	   && ix86_force_align_arg_pointer
6936 	   && crtl->stack_alignment_estimated == 128)
6937     incoming_stack_boundary = MIN_STACK_BOUNDARY;
6938   else
6939     incoming_stack_boundary = ix86_default_incoming_stack_boundary;
6940 
6941   /* Incoming stack alignment can be changed on individual functions
6942      via force_align_arg_pointer attribute.  We use the smallest
6943      incoming stack boundary.  */
6944   if (incoming_stack_boundary > MIN_STACK_BOUNDARY
6945       && lookup_attribute ("force_align_arg_pointer",
6946 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6947     incoming_stack_boundary = MIN_STACK_BOUNDARY;
6948 
6949   /* The incoming stack frame has to be aligned at least at
6950      parm_stack_boundary.  */
6951   if (incoming_stack_boundary < crtl->parm_stack_boundary)
6952     incoming_stack_boundary = crtl->parm_stack_boundary;
6953 
6954   /* Stack at entrance of main is aligned by runtime.  We use the
6955      smallest incoming stack boundary. */
6956   if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
6957       && DECL_NAME (current_function_decl)
6958       && MAIN_NAME_P (DECL_NAME (current_function_decl))
6959       && DECL_FILE_SCOPE_P (current_function_decl))
6960     incoming_stack_boundary = MAIN_STACK_BOUNDARY;
6961 
6962   return incoming_stack_boundary;
6963 }
6964 
6965 /* Update incoming stack boundary and estimated stack alignment.  */
6966 
6967 static void
ix86_update_stack_boundary(void)6968 ix86_update_stack_boundary (void)
6969 {
6970   ix86_incoming_stack_boundary
6971     = ix86_minimum_incoming_stack_boundary (false);
6972 
6973   /* x86_64 vararg needs 16byte stack alignment for register save area.  */
6974   if (TARGET_64BIT
6975       && cfun->stdarg
6976       && crtl->stack_alignment_estimated < 128)
6977     crtl->stack_alignment_estimated = 128;
6978 
6979   /* __tls_get_addr needs to be called with 16-byte aligned stack.  */
6980   if (ix86_tls_descriptor_calls_expanded_in_cfun
6981       && crtl->preferred_stack_boundary < 128)
6982     crtl->preferred_stack_boundary = 128;
6983 }
6984 
6985 /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
6986    needed or an rtx for DRAP otherwise.  */
6987 
6988 static rtx
ix86_get_drap_rtx(void)6989 ix86_get_drap_rtx (void)
6990 {
6991   /* We must use DRAP if there are outgoing arguments on stack or
6992      the stack pointer register is clobbered by asm statment and
6993      ACCUMULATE_OUTGOING_ARGS is false.  */
6994   if (ix86_force_drap
6995       || ((cfun->machine->outgoing_args_on_stack
6996 	   || crtl->sp_is_clobbered_by_asm)
6997 	  && !ACCUMULATE_OUTGOING_ARGS))
6998     crtl->need_drap = true;
6999 
7000   if (stack_realign_drap)
7001     {
7002       /* Assign DRAP to vDRAP and returns vDRAP */
7003       unsigned int regno = find_drap_reg ();
7004       rtx drap_vreg;
7005       rtx arg_ptr;
7006       rtx_insn *seq, *insn;
7007 
7008       arg_ptr = gen_rtx_REG (Pmode, regno);
7009       crtl->drap_reg = arg_ptr;
7010 
7011       start_sequence ();
7012       drap_vreg = copy_to_reg (arg_ptr);
7013       seq = get_insns ();
7014       end_sequence ();
7015 
7016       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7017       if (!optimize)
7018 	{
7019 	  add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7020 	  RTX_FRAME_RELATED_P (insn) = 1;
7021 	}
7022       return drap_vreg;
7023     }
7024   else
7025     return NULL;
7026 }
7027 
7028 /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
7029 
7030 static rtx
ix86_internal_arg_pointer(void)7031 ix86_internal_arg_pointer (void)
7032 {
7033   return virtual_incoming_args_rtx;
7034 }
7035 
7036 struct scratch_reg {
7037   rtx reg;
7038   bool saved;
7039 };
7040 
7041 /* Return a short-lived scratch register for use on function entry.
7042    In 32-bit mode, it is valid only after the registers are saved
7043    in the prologue.  This register must be released by means of
7044    release_scratch_register_on_entry once it is dead.  */
7045 
7046 static void
get_scratch_register_on_entry(struct scratch_reg * sr)7047 get_scratch_register_on_entry (struct scratch_reg *sr)
7048 {
7049   int regno;
7050 
7051   sr->saved = false;
7052 
7053   if (TARGET_64BIT)
7054     {
7055       /* We always use R11 in 64-bit mode.  */
7056       regno = R11_REG;
7057     }
7058   else
7059     {
7060       tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7061       bool fastcall_p
7062 	= lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7063       bool thiscall_p
7064 	= lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7065       bool static_chain_p = DECL_STATIC_CHAIN (decl);
7066       int regparm = ix86_function_regparm (fntype, decl);
7067       int drap_regno
7068 	= crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7069 
7070       /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7071 	  for the static chain register.  */
7072       if ((regparm < 1 || (fastcall_p && !static_chain_p))
7073 	  && drap_regno != AX_REG)
7074 	regno = AX_REG;
7075       /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7076 	  for the static chain register.  */
7077       else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7078         regno = AX_REG;
7079       else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7080 	regno = DX_REG;
7081       /* ecx is the static chain register.  */
7082       else if (regparm < 3 && !fastcall_p && !thiscall_p
7083 	       && !static_chain_p
7084 	       && drap_regno != CX_REG)
7085 	regno = CX_REG;
7086       else if (ix86_save_reg (BX_REG, true, false))
7087 	regno = BX_REG;
7088       /* esi is the static chain register.  */
7089       else if (!(regparm == 3 && static_chain_p)
7090 	       && ix86_save_reg (SI_REG, true, false))
7091 	regno = SI_REG;
7092       else if (ix86_save_reg (DI_REG, true, false))
7093 	regno = DI_REG;
7094       else
7095 	{
7096 	  regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7097 	  sr->saved = true;
7098 	}
7099     }
7100 
7101   sr->reg = gen_rtx_REG (Pmode, regno);
7102   if (sr->saved)
7103     {
7104       rtx_insn *insn = emit_insn (gen_push (sr->reg));
7105       RTX_FRAME_RELATED_P (insn) = 1;
7106     }
7107 }
7108 
7109 /* Release a scratch register obtained from the preceding function.
7110 
7111    If RELEASE_VIA_POP is true, we just pop the register off the stack
7112    to release it.  This is what non-Linux systems use with -fstack-check.
7113 
7114    Otherwise we use OFFSET to locate the saved register and the
7115    allocated stack space becomes part of the local frame and is
7116    deallocated by the epilogue.  */
7117 
7118 static void
release_scratch_register_on_entry(struct scratch_reg * sr,HOST_WIDE_INT offset,bool release_via_pop)7119 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
7120 				   bool release_via_pop)
7121 {
7122   if (sr->saved)
7123     {
7124       if (release_via_pop)
7125 	{
7126 	  struct machine_function *m = cfun->machine;
7127 	  rtx x, insn = emit_insn (gen_pop (sr->reg));
7128 
7129 	  /* The RX FRAME_RELATED_P mechanism doesn't know about pop.  */
7130 	  RTX_FRAME_RELATED_P (insn) = 1;
7131 	  x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
7132 	  x = gen_rtx_SET (stack_pointer_rtx, x);
7133 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
7134 	  m->fs.sp_offset -= UNITS_PER_WORD;
7135 	}
7136       else
7137 	{
7138 	  rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
7139 	  x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
7140 	  emit_insn (x);
7141 	}
7142     }
7143 }
7144 
7145 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7146 
7147    This differs from the next routine in that it tries hard to prevent
7148    attacks that jump the stack guard.  Thus it is never allowed to allocate
7149    more than PROBE_INTERVAL bytes of stack space without a suitable
7150    probe.
7151 
7152    INT_REGISTERS_SAVED is true if integer registers have already been
7153    pushed on the stack.  */
7154 
7155 static void
ix86_adjust_stack_and_probe_stack_clash(HOST_WIDE_INT size,const bool int_registers_saved)7156 ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
7157 					 const bool int_registers_saved)
7158 {
7159   struct machine_function *m = cfun->machine;
7160 
7161   /* If this function does not statically allocate stack space, then
7162      no probes are needed.  */
7163   if (!size)
7164     {
7165       /* However, the allocation of space via pushes for register
7166 	 saves could be viewed as allocating space, but without the
7167 	 need to probe.  */
7168       if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
7169         dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7170       else
7171 	dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7172       return;
7173     }
7174 
7175   /* If we are a noreturn function, then we have to consider the
7176      possibility that we're called via a jump rather than a call.
7177 
7178      Thus we don't have the implicit probe generated by saving the
7179      return address into the stack at the call.  Thus, the stack
7180      pointer could be anywhere in the guard page.  The safe thing
7181      to do is emit a probe now.
7182 
7183      The probe can be avoided if we have already emitted any callee
7184      register saves into the stack or have a frame pointer (which will
7185      have been saved as well).  Those saves will function as implicit
7186      probes.
7187 
7188      ?!? This should be revamped to work like aarch64 and s390 where
7189      we track the offset from the most recent probe.  Normally that
7190      offset would be zero.  For a noreturn function we would reset
7191      it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT).   Then
7192      we just probe when we cross PROBE_INTERVAL.  */
7193   if (TREE_THIS_VOLATILE (cfun->decl)
7194       && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
7195     {
7196       /* We can safely use any register here since we're just going to push
7197 	 its value and immediately pop it back.  But we do try and avoid
7198 	 argument passing registers so as not to introduce dependencies in
7199 	 the pipeline.  For 32 bit we use %esi and for 64 bit we use %rax.  */
7200       rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
7201       rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
7202       rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
7203       m->fs.sp_offset -= UNITS_PER_WORD;
7204       if (m->fs.cfa_reg == stack_pointer_rtx)
7205 	{
7206 	  m->fs.cfa_offset -= UNITS_PER_WORD;
7207 	  rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7208 	  x = gen_rtx_SET (stack_pointer_rtx, x);
7209 	  add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
7210 	  RTX_FRAME_RELATED_P (insn_push) = 1;
7211 	  x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7212 	  x = gen_rtx_SET (stack_pointer_rtx, x);
7213 	  add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
7214 	  RTX_FRAME_RELATED_P (insn_pop) = 1;
7215 	}
7216       emit_insn (gen_blockage ());
7217     }
7218 
7219   /* If we allocate less than the size of the guard statically,
7220      then no probing is necessary, but we do need to allocate
7221      the stack.  */
7222   if (size < (1 << param_stack_clash_protection_guard_size))
7223     {
7224       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7225 			         GEN_INT (-size), -1,
7226 			         m->fs.cfa_reg == stack_pointer_rtx);
7227       dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7228       return;
7229     }
7230 
7231   /* We're allocating a large enough stack frame that we need to
7232      emit probes.  Either emit them inline or in a loop depending
7233      on the size.  */
7234   HOST_WIDE_INT probe_interval = get_probe_interval ();
7235   if (size <= 4 * probe_interval)
7236     {
7237       HOST_WIDE_INT i;
7238       for (i = probe_interval; i <= size; i += probe_interval)
7239 	{
7240 	  /* Allocate PROBE_INTERVAL bytes.  */
7241 	  rtx insn
7242 	    = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7243 					 GEN_INT (-probe_interval), -1,
7244 					 m->fs.cfa_reg == stack_pointer_rtx);
7245 	  add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
7246 
7247 	  /* And probe at *sp.  */
7248 	  emit_stack_probe (stack_pointer_rtx);
7249 	  emit_insn (gen_blockage ());
7250 	}
7251 
7252       /* We need to allocate space for the residual, but we do not need
7253 	 to probe the residual.  */
7254       HOST_WIDE_INT residual = (i - probe_interval - size);
7255       if (residual)
7256 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7257 				   GEN_INT (residual), -1,
7258 				   m->fs.cfa_reg == stack_pointer_rtx);
7259       dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
7260     }
7261   else
7262     {
7263       /* We expect the GP registers to be saved when probes are used
7264 	 as the probing sequences might need a scratch register and
7265 	 the routine to allocate one assumes the integer registers
7266 	 have already been saved.  */
7267       gcc_assert (int_registers_saved);
7268 
7269       struct scratch_reg sr;
7270       get_scratch_register_on_entry (&sr);
7271 
7272       /* If we needed to save a register, then account for any space
7273 	 that was pushed (we are not going to pop the register when
7274 	 we do the restore).  */
7275       if (sr.saved)
7276 	size -= UNITS_PER_WORD;
7277 
7278       /* Step 1: round SIZE down to a multiple of the interval.  */
7279       HOST_WIDE_INT rounded_size = size & -probe_interval;
7280 
7281       /* Step 2: compute final value of the loop counter.  Use lea if
7282 	 possible.  */
7283       rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
7284       rtx insn;
7285       if (address_no_seg_operand (addr, Pmode))
7286 	insn = emit_insn (gen_rtx_SET (sr.reg, addr));
7287       else
7288 	{
7289 	  emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7290 	  insn = emit_insn (gen_rtx_SET (sr.reg,
7291 					 gen_rtx_PLUS (Pmode, sr.reg,
7292 						       stack_pointer_rtx)));
7293 	}
7294       if (m->fs.cfa_reg == stack_pointer_rtx)
7295 	{
7296 	  add_reg_note (insn, REG_CFA_DEF_CFA,
7297 			plus_constant (Pmode, sr.reg,
7298 				       m->fs.cfa_offset + rounded_size));
7299 	  RTX_FRAME_RELATED_P (insn) = 1;
7300 	}
7301 
7302       /* Step 3: the loop.  */
7303       rtx size_rtx = GEN_INT (rounded_size);
7304       insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
7305 						    size_rtx));
7306       if (m->fs.cfa_reg == stack_pointer_rtx)
7307 	{
7308 	  m->fs.cfa_offset += rounded_size;
7309 	  add_reg_note (insn, REG_CFA_DEF_CFA,
7310 			plus_constant (Pmode, stack_pointer_rtx,
7311 				       m->fs.cfa_offset));
7312 	  RTX_FRAME_RELATED_P (insn) = 1;
7313 	}
7314       m->fs.sp_offset += rounded_size;
7315       emit_insn (gen_blockage ());
7316 
7317       /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7318 	 is equal to ROUNDED_SIZE.  */
7319 
7320       if (size != rounded_size)
7321 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7322 				   GEN_INT (rounded_size - size), -1,
7323 				   m->fs.cfa_reg == stack_pointer_rtx);
7324       dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7325 
7326       /* This does not deallocate the space reserved for the scratch
7327 	 register.  That will be deallocated in the epilogue.  */
7328       release_scratch_register_on_entry (&sr, size, false);
7329     }
7330 
7331   /* Make sure nothing is scheduled before we are done.  */
7332   emit_insn (gen_blockage ());
7333 }
7334 
7335 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7336 
7337    INT_REGISTERS_SAVED is true if integer registers have already been
7338    pushed on the stack.  */
7339 
7340 static void
ix86_adjust_stack_and_probe(HOST_WIDE_INT size,const bool int_registers_saved)7341 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7342 			     const bool int_registers_saved)
7343 {
7344   /* We skip the probe for the first interval + a small dope of 4 words and
7345      probe that many bytes past the specified size to maintain a protection
7346      area at the botton of the stack.  */
7347   const int dope = 4 * UNITS_PER_WORD;
7348   rtx size_rtx = GEN_INT (size), last;
7349 
7350   /* See if we have a constant small number of probes to generate.  If so,
7351      that's the easy case.  The run-time loop is made up of 9 insns in the
7352      generic case while the compile-time loop is made up of 3+2*(n-1) insns
7353      for n # of intervals.  */
7354   if (size <= 4 * get_probe_interval ())
7355     {
7356       HOST_WIDE_INT i, adjust;
7357       bool first_probe = true;
7358 
7359       /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
7360 	 values of N from 1 until it exceeds SIZE.  If only one probe is
7361 	 needed, this will not generate any code.  Then adjust and probe
7362 	 to PROBE_INTERVAL + SIZE.  */
7363       for (i = get_probe_interval (); i < size; i += get_probe_interval ())
7364 	{
7365 	  if (first_probe)
7366 	    {
7367 	      adjust = 2 * get_probe_interval () + dope;
7368 	      first_probe = false;
7369 	    }
7370 	  else
7371 	    adjust = get_probe_interval ();
7372 
7373 	  emit_insn (gen_rtx_SET (stack_pointer_rtx,
7374 				  plus_constant (Pmode, stack_pointer_rtx,
7375 						 -adjust)));
7376 	  emit_stack_probe (stack_pointer_rtx);
7377 	}
7378 
7379       if (first_probe)
7380 	adjust = size + get_probe_interval () + dope;
7381       else
7382         adjust = size + get_probe_interval () - i;
7383 
7384       emit_insn (gen_rtx_SET (stack_pointer_rtx,
7385 			      plus_constant (Pmode, stack_pointer_rtx,
7386 					     -adjust)));
7387       emit_stack_probe (stack_pointer_rtx);
7388 
7389       /* Adjust back to account for the additional first interval.  */
7390       last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
7391 				     plus_constant (Pmode, stack_pointer_rtx,
7392 						    (get_probe_interval ()
7393 						     + dope))));
7394     }
7395 
7396   /* Otherwise, do the same as above, but in a loop.  Note that we must be
7397      extra careful with variables wrapping around because we might be at
7398      the very top (or the very bottom) of the address space and we have
7399      to be able to handle this case properly; in particular, we use an
7400      equality test for the loop condition.  */
7401   else
7402     {
7403       /* We expect the GP registers to be saved when probes are used
7404 	 as the probing sequences might need a scratch register and
7405 	 the routine to allocate one assumes the integer registers
7406 	 have already been saved.  */
7407       gcc_assert (int_registers_saved);
7408 
7409       HOST_WIDE_INT rounded_size;
7410       struct scratch_reg sr;
7411 
7412       get_scratch_register_on_entry (&sr);
7413 
7414       /* If we needed to save a register, then account for any space
7415 	 that was pushed (we are not going to pop the register when
7416 	 we do the restore).  */
7417       if (sr.saved)
7418 	size -= UNITS_PER_WORD;
7419 
7420       /* Step 1: round SIZE to the previous multiple of the interval.  */
7421 
7422       rounded_size = ROUND_DOWN (size, get_probe_interval ());
7423 
7424 
7425       /* Step 2: compute initial and final value of the loop counter.  */
7426 
7427       /* SP = SP_0 + PROBE_INTERVAL.  */
7428       emit_insn (gen_rtx_SET (stack_pointer_rtx,
7429 			      plus_constant (Pmode, stack_pointer_rtx,
7430 					     - (get_probe_interval () + dope))));
7431 
7432       /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE.  */
7433       if (rounded_size <= (HOST_WIDE_INT_1 << 31))
7434 	emit_insn (gen_rtx_SET (sr.reg,
7435 				plus_constant (Pmode, stack_pointer_rtx,
7436 					       -rounded_size)));
7437       else
7438 	{
7439 	  emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7440 	  emit_insn (gen_rtx_SET (sr.reg,
7441 				  gen_rtx_PLUS (Pmode, sr.reg,
7442 						stack_pointer_rtx)));
7443 	}
7444 
7445 
7446       /* Step 3: the loop
7447 
7448 	 do
7449 	   {
7450 	     SP = SP + PROBE_INTERVAL
7451 	     probe at SP
7452 	   }
7453 	 while (SP != LAST_ADDR)
7454 
7455 	 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
7456 	 values of N from 1 until it is equal to ROUNDED_SIZE.  */
7457 
7458       emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg, size_rtx));
7459 
7460 
7461       /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
7462 	 assert at compile-time that SIZE is equal to ROUNDED_SIZE.  */
7463 
7464       if (size != rounded_size)
7465 	{
7466 	  emit_insn (gen_rtx_SET (stack_pointer_rtx,
7467 			          plus_constant (Pmode, stack_pointer_rtx,
7468 						 rounded_size - size)));
7469 	  emit_stack_probe (stack_pointer_rtx);
7470 	}
7471 
7472       /* Adjust back to account for the additional first interval.  */
7473       last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
7474 				     plus_constant (Pmode, stack_pointer_rtx,
7475 						    (get_probe_interval ()
7476 						     + dope))));
7477 
7478       /* This does not deallocate the space reserved for the scratch
7479 	 register.  That will be deallocated in the epilogue.  */
7480       release_scratch_register_on_entry (&sr, size, false);
7481     }
7482 
7483   /* Even if the stack pointer isn't the CFA register, we need to correctly
7484      describe the adjustments made to it, in particular differentiate the
7485      frame-related ones from the frame-unrelated ones.  */
7486   if (size > 0)
7487     {
7488       rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
7489       XVECEXP (expr, 0, 0)
7490 	= gen_rtx_SET (stack_pointer_rtx,
7491 		       plus_constant (Pmode, stack_pointer_rtx, -size));
7492       XVECEXP (expr, 0, 1)
7493 	= gen_rtx_SET (stack_pointer_rtx,
7494 		       plus_constant (Pmode, stack_pointer_rtx,
7495 				      get_probe_interval () + dope + size));
7496       add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
7497       RTX_FRAME_RELATED_P (last) = 1;
7498 
7499       cfun->machine->fs.sp_offset += size;
7500     }
7501 
7502   /* Make sure nothing is scheduled before we are done.  */
7503   emit_insn (gen_blockage ());
7504 }
7505 
7506 /* Adjust the stack pointer up to REG while probing it.  */
7507 
7508 const char *
output_adjust_stack_and_probe(rtx reg)7509 output_adjust_stack_and_probe (rtx reg)
7510 {
7511   static int labelno = 0;
7512   char loop_lab[32];
7513   rtx xops[2];
7514 
7515   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7516 
7517   /* Loop.  */
7518   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7519 
7520   /* SP = SP + PROBE_INTERVAL.  */
7521   xops[0] = stack_pointer_rtx;
7522   xops[1] = GEN_INT (get_probe_interval ());
7523   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7524 
7525   /* Probe at SP.  */
7526   xops[1] = const0_rtx;
7527   output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7528 
7529   /* Test if SP == LAST_ADDR.  */
7530   xops[0] = stack_pointer_rtx;
7531   xops[1] = reg;
7532   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7533 
7534   /* Branch.  */
7535   fputs ("\tjne\t", asm_out_file);
7536   assemble_name_raw (asm_out_file, loop_lab);
7537   fputc ('\n', asm_out_file);
7538 
7539   return "";
7540 }
7541 
7542 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7543    inclusive.  These are offsets from the current stack pointer.
7544 
7545    INT_REGISTERS_SAVED is true if integer registers have already been
7546    pushed on the stack.  */
7547 
7548 static void
ix86_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size,const bool int_registers_saved)7549 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7550 			     const bool int_registers_saved)
7551 {
7552   /* See if we have a constant small number of probes to generate.  If so,
7553      that's the easy case.  The run-time loop is made up of 6 insns in the
7554      generic case while the compile-time loop is made up of n insns for n #
7555      of intervals.  */
7556   if (size <= 6 * get_probe_interval ())
7557     {
7558       HOST_WIDE_INT i;
7559 
7560       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7561 	 it exceeds SIZE.  If only one probe is needed, this will not
7562 	 generate any code.  Then probe at FIRST + SIZE.  */
7563       for (i = get_probe_interval (); i < size; i += get_probe_interval ())
7564 	emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7565 					 -(first + i)));
7566 
7567       emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7568 				       -(first + size)));
7569     }
7570 
7571   /* Otherwise, do the same as above, but in a loop.  Note that we must be
7572      extra careful with variables wrapping around because we might be at
7573      the very top (or the very bottom) of the address space and we have
7574      to be able to handle this case properly; in particular, we use an
7575      equality test for the loop condition.  */
7576   else
7577     {
7578       /* We expect the GP registers to be saved when probes are used
7579 	 as the probing sequences might need a scratch register and
7580 	 the routine to allocate one assumes the integer registers
7581 	 have already been saved.  */
7582       gcc_assert (int_registers_saved);
7583 
7584       HOST_WIDE_INT rounded_size, last;
7585       struct scratch_reg sr;
7586 
7587       get_scratch_register_on_entry (&sr);
7588 
7589 
7590       /* Step 1: round SIZE to the previous multiple of the interval.  */
7591 
7592       rounded_size = ROUND_DOWN (size, get_probe_interval ());
7593 
7594 
7595       /* Step 2: compute initial and final value of the loop counter.  */
7596 
7597       /* TEST_OFFSET = FIRST.  */
7598       emit_move_insn (sr.reg, GEN_INT (-first));
7599 
7600       /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
7601       last = first + rounded_size;
7602 
7603 
7604       /* Step 3: the loop
7605 
7606 	 do
7607 	   {
7608 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7609 	     probe at TEST_ADDR
7610 	   }
7611 	 while (TEST_ADDR != LAST_ADDR)
7612 
7613          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7614          until it is equal to ROUNDED_SIZE.  */
7615 
7616       emit_insn
7617 	(gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
7618 
7619 
7620       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7621 	 that SIZE is equal to ROUNDED_SIZE.  */
7622 
7623       if (size != rounded_size)
7624 	emit_stack_probe (plus_constant (Pmode,
7625 					 gen_rtx_PLUS (Pmode,
7626 						       stack_pointer_rtx,
7627 						       sr.reg),
7628 					 rounded_size - size));
7629 
7630       release_scratch_register_on_entry (&sr, size, true);
7631     }
7632 
7633   /* Make sure nothing is scheduled before we are done.  */
7634   emit_insn (gen_blockage ());
7635 }
7636 
7637 /* Probe a range of stack addresses from REG to END, inclusive.  These are
7638    offsets from the current stack pointer.  */
7639 
7640 const char *
output_probe_stack_range(rtx reg,rtx end)7641 output_probe_stack_range (rtx reg, rtx end)
7642 {
7643   static int labelno = 0;
7644   char loop_lab[32];
7645   rtx xops[3];
7646 
7647   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7648 
7649   /* Loop.  */
7650   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7651 
7652   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
7653   xops[0] = reg;
7654   xops[1] = GEN_INT (get_probe_interval ());
7655   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7656 
7657   /* Probe at TEST_ADDR.  */
7658   xops[0] = stack_pointer_rtx;
7659   xops[1] = reg;
7660   xops[2] = const0_rtx;
7661   output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
7662 
7663   /* Test if TEST_ADDR == LAST_ADDR.  */
7664   xops[0] = reg;
7665   xops[1] = end;
7666   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7667 
7668   /* Branch.  */
7669   fputs ("\tjne\t", asm_out_file);
7670   assemble_name_raw (asm_out_file, loop_lab);
7671   fputc ('\n', asm_out_file);
7672 
7673   return "";
7674 }
7675 
7676 /* Set stack_frame_required to false if stack frame isn't required.
7677    Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
7678    slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
7679 
7680 static void
ix86_find_max_used_stack_alignment(unsigned int & stack_alignment,bool check_stack_slot)7681 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
7682 				    bool check_stack_slot)
7683 {
7684   HARD_REG_SET set_up_by_prologue, prologue_used;
7685   basic_block bb;
7686 
7687   CLEAR_HARD_REG_SET (prologue_used);
7688   CLEAR_HARD_REG_SET (set_up_by_prologue);
7689   add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
7690   add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
7691   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
7692 		       HARD_FRAME_POINTER_REGNUM);
7693 
7694   /* The preferred stack alignment is the minimum stack alignment.  */
7695   if (stack_alignment > crtl->preferred_stack_boundary)
7696     stack_alignment = crtl->preferred_stack_boundary;
7697 
7698   bool require_stack_frame = false;
7699 
7700   FOR_EACH_BB_FN (bb, cfun)
7701     {
7702       rtx_insn *insn;
7703       FOR_BB_INSNS (bb, insn)
7704 	if (NONDEBUG_INSN_P (insn)
7705 	    && requires_stack_frame_p (insn, prologue_used,
7706 				       set_up_by_prologue))
7707 	  {
7708 	    require_stack_frame = true;
7709 
7710 	    if (check_stack_slot)
7711 	      {
7712 		/* Find the maximum stack alignment.  */
7713 		subrtx_iterator::array_type array;
7714 		FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
7715 		  if (MEM_P (*iter)
7716 		      && (reg_mentioned_p (stack_pointer_rtx,
7717 					   *iter)
7718 			  || reg_mentioned_p (frame_pointer_rtx,
7719 					      *iter)))
7720 		    {
7721 		      unsigned int alignment = MEM_ALIGN (*iter);
7722 		      if (alignment > stack_alignment)
7723 			stack_alignment = alignment;
7724 		    }
7725 	      }
7726 	  }
7727     }
7728 
7729   cfun->machine->stack_frame_required = require_stack_frame;
7730 }
7731 
7732 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7733    will guide prologue/epilogue to be generated in correct form.  */
7734 
7735 static void
ix86_finalize_stack_frame_flags(void)7736 ix86_finalize_stack_frame_flags (void)
7737 {
7738   /* Check if stack realign is really needed after reload, and
7739      stores result in cfun */
7740   unsigned int incoming_stack_boundary
7741     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7742        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7743   unsigned int stack_alignment
7744     = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
7745        ? crtl->max_used_stack_slot_alignment
7746        : crtl->stack_alignment_needed);
7747   unsigned int stack_realign
7748     = (incoming_stack_boundary < stack_alignment);
7749   bool recompute_frame_layout_p = false;
7750 
7751   if (crtl->stack_realign_finalized)
7752     {
7753       /* After stack_realign_needed is finalized, we can't no longer
7754 	 change it.  */
7755       gcc_assert (crtl->stack_realign_needed == stack_realign);
7756       return;
7757     }
7758 
7759   /* It is always safe to compute max_used_stack_alignment.  We
7760      compute it only if 128-bit aligned load/store may be generated
7761      on misaligned stack slot which will lead to segfault. */
7762   bool check_stack_slot
7763     = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
7764   ix86_find_max_used_stack_alignment (stack_alignment,
7765 				      check_stack_slot);
7766 
7767   /* If the only reason for frame_pointer_needed is that we conservatively
7768      assumed stack realignment might be needed or -fno-omit-frame-pointer
7769      is used, but in the end nothing that needed the stack alignment had
7770      been spilled nor stack access, clear frame_pointer_needed and say we
7771      don't need stack realignment.  */
7772   if ((stack_realign || (!flag_omit_frame_pointer && optimize))
7773       && frame_pointer_needed
7774       && crtl->is_leaf
7775       && crtl->sp_is_unchanging
7776       && !ix86_current_function_calls_tls_descriptor
7777       && !crtl->accesses_prior_frames
7778       && !cfun->calls_alloca
7779       && !crtl->calls_eh_return
7780       /* See ira_setup_eliminable_regset for the rationale.  */
7781       && !(STACK_CHECK_MOVING_SP
7782 	   && flag_stack_check
7783 	   && flag_exceptions
7784 	   && cfun->can_throw_non_call_exceptions)
7785       && !ix86_frame_pointer_required ()
7786       && ix86_get_frame_size () == 0
7787       && ix86_nsaved_sseregs () == 0
7788       && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
7789     {
7790       if (cfun->machine->stack_frame_required)
7791 	{
7792 	  /* Stack frame is required.  If stack alignment needed is less
7793 	     than incoming stack boundary, don't realign stack.  */
7794 	  stack_realign = incoming_stack_boundary < stack_alignment;
7795 	  if (!stack_realign)
7796 	    {
7797 	      crtl->max_used_stack_slot_alignment
7798 		= incoming_stack_boundary;
7799 	      crtl->stack_alignment_needed
7800 		= incoming_stack_boundary;
7801 	      /* Also update preferred_stack_boundary for leaf
7802 	         functions.  */
7803 	      crtl->preferred_stack_boundary
7804 		= incoming_stack_boundary;
7805 	    }
7806 	}
7807       else
7808 	{
7809 	  /* If drap has been set, but it actually isn't live at the
7810 	     start of the function, there is no reason to set it up.  */
7811 	  if (crtl->drap_reg)
7812 	    {
7813 	      basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7814 	      if (! REGNO_REG_SET_P (DF_LR_IN (bb),
7815 				     REGNO (crtl->drap_reg)))
7816 		{
7817 		  crtl->drap_reg = NULL_RTX;
7818 		  crtl->need_drap = false;
7819 		}
7820 	    }
7821 	  else
7822 	    cfun->machine->no_drap_save_restore = true;
7823 
7824 	  frame_pointer_needed = false;
7825 	  stack_realign = false;
7826 	  crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
7827 	  crtl->stack_alignment_needed = incoming_stack_boundary;
7828 	  crtl->stack_alignment_estimated = incoming_stack_boundary;
7829 	  if (crtl->preferred_stack_boundary > incoming_stack_boundary)
7830 	    crtl->preferred_stack_boundary = incoming_stack_boundary;
7831 	  df_finish_pass (true);
7832 	  df_scan_alloc (NULL);
7833 	  df_scan_blocks ();
7834 	  df_compute_regs_ever_live (true);
7835 	  df_analyze ();
7836 
7837 	  if (flag_var_tracking)
7838 	    {
7839 	      /* Since frame pointer is no longer available, replace it with
7840 		 stack pointer - UNITS_PER_WORD in debug insns.  */
7841 	      df_ref ref, next;
7842 	      for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
7843 		   ref; ref = next)
7844 		{
7845 		  next = DF_REF_NEXT_REG (ref);
7846 		  if (!DF_REF_INSN_INFO (ref))
7847 		    continue;
7848 
7849 		  /* Make sure the next ref is for a different instruction,
7850 		     so that we're not affected by the rescan.  */
7851 		  rtx_insn *insn = DF_REF_INSN (ref);
7852 		  while (next && DF_REF_INSN (next) == insn)
7853 		    next = DF_REF_NEXT_REG (next);
7854 
7855 		  if (DEBUG_INSN_P (insn))
7856 		    {
7857 		      bool changed = false;
7858 		      for (; ref != next; ref = DF_REF_NEXT_REG (ref))
7859 			{
7860 			  rtx *loc = DF_REF_LOC (ref);
7861 			  if (*loc == hard_frame_pointer_rtx)
7862 			    {
7863 			      *loc = plus_constant (Pmode,
7864 						    stack_pointer_rtx,
7865 						    -UNITS_PER_WORD);
7866 			      changed = true;
7867 			    }
7868 			}
7869 		      if (changed)
7870 			df_insn_rescan (insn);
7871 		    }
7872 		}
7873 	    }
7874 
7875 	  recompute_frame_layout_p = true;
7876 	}
7877     }
7878   else if (crtl->max_used_stack_slot_alignment >= 128
7879 	   && cfun->machine->stack_frame_required)
7880     {
7881       /* We don't need to realign stack.  max_used_stack_alignment is
7882 	 used to decide how stack frame should be aligned.  This is
7883 	 independent of any psABIs nor 32-bit vs 64-bit.  */
7884       cfun->machine->max_used_stack_alignment
7885 	= stack_alignment / BITS_PER_UNIT;
7886     }
7887 
7888   if (crtl->stack_realign_needed != stack_realign)
7889     recompute_frame_layout_p = true;
7890   crtl->stack_realign_needed = stack_realign;
7891   crtl->stack_realign_finalized = true;
7892   if (recompute_frame_layout_p)
7893     ix86_compute_frame_layout ();
7894 }
7895 
7896 /* Delete SET_GOT right after entry block if it is allocated to reg.  */
7897 
7898 static void
ix86_elim_entry_set_got(rtx reg)7899 ix86_elim_entry_set_got (rtx reg)
7900 {
7901   basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7902   rtx_insn *c_insn = BB_HEAD (bb);
7903   if (!NONDEBUG_INSN_P (c_insn))
7904     c_insn = next_nonnote_nondebug_insn (c_insn);
7905   if (c_insn && NONJUMP_INSN_P (c_insn))
7906     {
7907       rtx pat = PATTERN (c_insn);
7908       if (GET_CODE (pat) == PARALLEL)
7909 	{
7910 	  rtx vec = XVECEXP (pat, 0, 0);
7911 	  if (GET_CODE (vec) == SET
7912 	      && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
7913 	      && REGNO (XEXP (vec, 0)) == REGNO (reg))
7914 	    delete_insn (c_insn);
7915 	}
7916     }
7917 }
7918 
7919 static rtx
gen_frame_set(rtx reg,rtx frame_reg,int offset,bool store)7920 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
7921 {
7922   rtx addr, mem;
7923 
7924   if (offset)
7925     addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
7926   mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
7927   return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
7928 }
7929 
7930 static inline rtx
gen_frame_load(rtx reg,rtx frame_reg,int offset)7931 gen_frame_load (rtx reg, rtx frame_reg, int offset)
7932 {
7933   return gen_frame_set (reg, frame_reg, offset, false);
7934 }
7935 
7936 static inline rtx
gen_frame_store(rtx reg,rtx frame_reg,int offset)7937 gen_frame_store (rtx reg, rtx frame_reg, int offset)
7938 {
7939   return gen_frame_set (reg, frame_reg, offset, true);
7940 }
7941 
7942 static void
ix86_emit_outlined_ms2sysv_save(const struct ix86_frame & frame)7943 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
7944 {
7945   struct machine_function *m = cfun->machine;
7946   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
7947 			  + m->call_ms2sysv_extra_regs;
7948   rtvec v = rtvec_alloc (ncregs + 1);
7949   unsigned int align, i, vi = 0;
7950   rtx_insn *insn;
7951   rtx sym, addr;
7952   rtx rax = gen_rtx_REG (word_mode, AX_REG);
7953   const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
7954 
7955   /* AL should only be live with sysv_abi.  */
7956   gcc_assert (!ix86_eax_live_at_start_p ());
7957   gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
7958 
7959   /* Setup RAX as the stub's base pointer.  We use stack_realign_offset rather
7960      we've actually realigned the stack or not.  */
7961   align = GET_MODE_ALIGNMENT (V4SFmode);
7962   addr = choose_baseaddr (frame.stack_realign_offset
7963 			  + xlogue.get_stub_ptr_offset (), &align, AX_REG);
7964   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
7965 
7966   emit_insn (gen_rtx_SET (rax, addr));
7967 
7968   /* Get the stub symbol.  */
7969   sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
7970 						  : XLOGUE_STUB_SAVE);
7971   RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
7972 
7973   for (i = 0; i < ncregs; ++i)
7974     {
7975       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
7976       rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
7977 			     r.regno);
7978       RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
7979     }
7980 
7981   gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
7982 
7983   insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
7984   RTX_FRAME_RELATED_P (insn) = true;
7985 }
7986 
7987 /* Generate and return an insn body to AND X with Y.  */
7988 
7989 static rtx_insn *
gen_and2_insn(rtx x,rtx y)7990 gen_and2_insn (rtx x, rtx y)
7991 {
7992   enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
7993 
7994   gcc_assert (insn_operand_matches (icode, 0, x));
7995   gcc_assert (insn_operand_matches (icode, 1, x));
7996   gcc_assert (insn_operand_matches (icode, 2, y));
7997 
7998   return GEN_FCN (icode) (x, x, y);
7999 }
8000 
8001 /* Expand the prologue into a bunch of separate insns.  */
8002 
8003 void
ix86_expand_prologue(void)8004 ix86_expand_prologue (void)
8005 {
8006   struct machine_function *m = cfun->machine;
8007   rtx insn, t;
8008   HOST_WIDE_INT allocate;
8009   bool int_registers_saved;
8010   bool sse_registers_saved;
8011   bool save_stub_call_needed;
8012   rtx static_chain = NULL_RTX;
8013 
8014   if (ix86_function_naked (current_function_decl))
8015     return;
8016 
8017   ix86_finalize_stack_frame_flags ();
8018 
8019   /* DRAP should not coexist with stack_realign_fp */
8020   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8021 
8022   memset (&m->fs, 0, sizeof (m->fs));
8023 
8024   /* Initialize CFA state for before the prologue.  */
8025   m->fs.cfa_reg = stack_pointer_rtx;
8026   m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8027 
8028   /* Track SP offset to the CFA.  We continue tracking this after we've
8029      swapped the CFA register away from SP.  In the case of re-alignment
8030      this is fudged; we're interested to offsets within the local frame.  */
8031   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8032   m->fs.sp_valid = true;
8033   m->fs.sp_realigned = false;
8034 
8035   const struct ix86_frame &frame = cfun->machine->frame;
8036 
8037   if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
8038     {
8039       /* We should have already generated an error for any use of
8040          ms_hook on a nested function.  */
8041       gcc_checking_assert (!ix86_static_chain_on_stack);
8042 
8043       /* Check if profiling is active and we shall use profiling before
8044          prologue variant. If so sorry.  */
8045       if (crtl->profile && flag_fentry != 0)
8046 	sorry ("%<ms_hook_prologue%> attribute is not compatible "
8047 	       "with %<-mfentry%> for 32-bit");
8048 
8049       /* In ix86_asm_output_function_label we emitted:
8050 	 8b ff     movl.s %edi,%edi
8051 	 55        push   %ebp
8052 	 8b ec     movl.s %esp,%ebp
8053 
8054 	 This matches the hookable function prologue in Win32 API
8055 	 functions in Microsoft Windows XP Service Pack 2 and newer.
8056 	 Wine uses this to enable Windows apps to hook the Win32 API
8057 	 functions provided by Wine.
8058 
8059 	 What that means is that we've already set up the frame pointer.  */
8060 
8061       if (frame_pointer_needed
8062 	  && !(crtl->drap_reg && crtl->stack_realign_needed))
8063 	{
8064 	  rtx push, mov;
8065 
8066 	  /* We've decided to use the frame pointer already set up.
8067 	     Describe this to the unwinder by pretending that both
8068 	     push and mov insns happen right here.
8069 
8070 	     Putting the unwind info here at the end of the ms_hook
8071 	     is done so that we can make absolutely certain we get
8072 	     the required byte sequence at the start of the function,
8073 	     rather than relying on an assembler that can produce
8074 	     the exact encoding required.
8075 
8076 	     However it does mean (in the unpatched case) that we have
8077 	     a 1 insn window where the asynchronous unwind info is
8078 	     incorrect.  However, if we placed the unwind info at
8079 	     its correct location we would have incorrect unwind info
8080 	     in the patched case.  Which is probably all moot since
8081 	     I don't expect Wine generates dwarf2 unwind info for the
8082 	     system libraries that use this feature.  */
8083 
8084 	  insn = emit_insn (gen_blockage ());
8085 
8086 	  push = gen_push (hard_frame_pointer_rtx);
8087 	  mov = gen_rtx_SET (hard_frame_pointer_rtx,
8088 			     stack_pointer_rtx);
8089 	  RTX_FRAME_RELATED_P (push) = 1;
8090 	  RTX_FRAME_RELATED_P (mov) = 1;
8091 
8092 	  RTX_FRAME_RELATED_P (insn) = 1;
8093 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8094 			gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8095 
8096 	  /* Note that gen_push incremented m->fs.cfa_offset, even
8097 	     though we didn't emit the push insn here.  */
8098 	  m->fs.cfa_reg = hard_frame_pointer_rtx;
8099 	  m->fs.fp_offset = m->fs.cfa_offset;
8100 	  m->fs.fp_valid = true;
8101 	}
8102       else
8103 	{
8104 	  /* The frame pointer is not needed so pop %ebp again.
8105 	     This leaves us with a pristine state.  */
8106 	  emit_insn (gen_pop (hard_frame_pointer_rtx));
8107 	}
8108     }
8109 
8110   /* The first insn of a function that accepts its static chain on the
8111      stack is to push the register that would be filled in by a direct
8112      call.  This insn will be skipped by the trampoline.  */
8113   else if (ix86_static_chain_on_stack)
8114     {
8115       static_chain = ix86_static_chain (cfun->decl, false);
8116       insn = emit_insn (gen_push (static_chain));
8117       emit_insn (gen_blockage ());
8118 
8119       /* We don't want to interpret this push insn as a register save,
8120 	 only as a stack adjustment.  The real copy of the register as
8121 	 a save will be done later, if needed.  */
8122       t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8123       t = gen_rtx_SET (stack_pointer_rtx, t);
8124       add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8125       RTX_FRAME_RELATED_P (insn) = 1;
8126     }
8127 
8128   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8129      of DRAP is needed and stack realignment is really needed after reload */
8130   if (stack_realign_drap)
8131     {
8132       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8133 
8134       /* Can't use DRAP in interrupt function.  */
8135       if (cfun->machine->func_type != TYPE_NORMAL)
8136 	sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8137 	       "in interrupt service routine.  This may be worked "
8138 	       "around by avoiding functions with aggregate return.");
8139 
8140       /* Only need to push parameter pointer reg if it is caller saved.  */
8141       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8142 	{
8143 	  /* Push arg pointer reg */
8144 	  insn = emit_insn (gen_push (crtl->drap_reg));
8145 	  RTX_FRAME_RELATED_P (insn) = 1;
8146 	}
8147 
8148       /* Grab the argument pointer.  */
8149       t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8150       insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8151       RTX_FRAME_RELATED_P (insn) = 1;
8152       m->fs.cfa_reg = crtl->drap_reg;
8153       m->fs.cfa_offset = 0;
8154 
8155       /* Align the stack.  */
8156       insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8157 				       GEN_INT (-align_bytes)));
8158       RTX_FRAME_RELATED_P (insn) = 1;
8159 
8160       /* Replicate the return address on the stack so that return
8161 	 address can be reached via (argp - 1) slot.  This is needed
8162 	 to implement macro RETURN_ADDR_RTX and intrinsic function
8163 	 expand_builtin_return_addr etc.  */
8164       t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8165       t = gen_frame_mem (word_mode, t);
8166       insn = emit_insn (gen_push (t));
8167       RTX_FRAME_RELATED_P (insn) = 1;
8168 
8169       /* For the purposes of frame and register save area addressing,
8170 	 we've started over with a new frame.  */
8171       m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8172       m->fs.realigned = true;
8173 
8174       if (static_chain)
8175 	{
8176 	  /* Replicate static chain on the stack so that static chain
8177 	     can be reached via (argp - 2) slot.  This is needed for
8178 	     nested function with stack realignment.  */
8179 	  insn = emit_insn (gen_push (static_chain));
8180 	  RTX_FRAME_RELATED_P (insn) = 1;
8181 	}
8182     }
8183 
8184   int_registers_saved = (frame.nregs == 0);
8185   sse_registers_saved = (frame.nsseregs == 0);
8186   save_stub_call_needed = (m->call_ms2sysv);
8187   gcc_assert (sse_registers_saved || !save_stub_call_needed);
8188 
8189   if (frame_pointer_needed && !m->fs.fp_valid)
8190     {
8191       /* Note: AT&T enter does NOT have reversed args.  Enter is probably
8192          slower on all targets.  Also sdb didn't like it.  */
8193       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8194       RTX_FRAME_RELATED_P (insn) = 1;
8195 
8196       if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8197 	{
8198 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8199 	  RTX_FRAME_RELATED_P (insn) = 1;
8200 
8201 	  if (m->fs.cfa_reg == stack_pointer_rtx)
8202 	    m->fs.cfa_reg = hard_frame_pointer_rtx;
8203 	  m->fs.fp_offset = m->fs.sp_offset;
8204 	  m->fs.fp_valid = true;
8205 	}
8206     }
8207 
8208   if (!int_registers_saved)
8209     {
8210       /* If saving registers via PUSH, do so now.  */
8211       if (!frame.save_regs_using_mov)
8212 	{
8213 	  ix86_emit_save_regs ();
8214 	  int_registers_saved = true;
8215 	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8216 	}
8217 
8218       /* When using red zone we may start register saving before allocating
8219 	 the stack frame saving one cycle of the prologue.  However, avoid
8220 	 doing this if we have to probe the stack; at least on x86_64 the
8221 	 stack probe can turn into a call that clobbers a red zone location. */
8222       else if (ix86_using_red_zone ()
8223 	       && (! TARGET_STACK_PROBE
8224 		   || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8225 	{
8226 	  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8227 	  int_registers_saved = true;
8228 	}
8229     }
8230 
8231   if (stack_realign_fp)
8232     {
8233       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8234       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8235 
8236       /* Record last valid frame pointer offset.  */
8237       m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8238 
8239       /* The computation of the size of the re-aligned stack frame means
8240 	 that we must allocate the size of the register save area before
8241 	 performing the actual alignment.  Otherwise we cannot guarantee
8242 	 that there's enough storage above the realignment point.  */
8243       allocate = frame.reg_save_offset - m->fs.sp_offset
8244 		 + frame.stack_realign_allocate;
8245       if (allocate)
8246         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8247 				   GEN_INT (-allocate), -1, false);
8248 
8249       /* Align the stack.  */
8250       emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
8251       m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8252       m->fs.sp_realigned_offset = m->fs.sp_offset
8253 					      - frame.stack_realign_allocate;
8254       /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8255 	 Beyond this point, stack access should be done via choose_baseaddr or
8256 	 by using sp_valid_at and fp_valid_at to determine the correct base
8257 	 register.  Henceforth, any CFA offset should be thought of as logical
8258 	 and not physical.  */
8259       gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8260       gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8261       m->fs.sp_realigned = true;
8262 
8263       /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8264 	 is needed to describe where a register is saved using a realigned
8265 	 stack pointer, so we need to invalidate the stack pointer for that
8266 	 target.  */
8267       if (TARGET_SEH)
8268 	m->fs.sp_valid = false;
8269 
8270       /* If SP offset is non-immediate after allocation of the stack frame,
8271 	 then emit SSE saves or stub call prior to allocating the rest of the
8272 	 stack frame.  This is less efficient for the out-of-line stub because
8273 	 we can't combine allocations across the call barrier, but it's better
8274 	 than using a scratch register.  */
8275       else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8276 						   - m->fs.sp_realigned_offset),
8277 					  Pmode))
8278 	{
8279 	  if (!sse_registers_saved)
8280 	    {
8281 	      ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8282 	      sse_registers_saved = true;
8283 	    }
8284 	  else if (save_stub_call_needed)
8285 	    {
8286 	      ix86_emit_outlined_ms2sysv_save (frame);
8287 	      save_stub_call_needed = false;
8288 	    }
8289 	}
8290     }
8291 
8292   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8293 
8294   if (flag_stack_usage_info)
8295     {
8296       /* We start to count from ARG_POINTER.  */
8297       HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8298 
8299       /* If it was realigned, take into account the fake frame.  */
8300       if (stack_realign_drap)
8301 	{
8302 	  if (ix86_static_chain_on_stack)
8303 	    stack_size += UNITS_PER_WORD;
8304 
8305 	  if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8306 	    stack_size += UNITS_PER_WORD;
8307 
8308 	  /* This over-estimates by 1 minimal-stack-alignment-unit but
8309 	     mitigates that by counting in the new return address slot.  */
8310 	  current_function_dynamic_stack_size
8311 	    += crtl->stack_alignment_needed / BITS_PER_UNIT;
8312 	}
8313 
8314       current_function_static_stack_size = stack_size;
8315     }
8316 
8317   /* On SEH target with very large frame size, allocate an area to save
8318      SSE registers (as the very large allocation won't be described).  */
8319   if (TARGET_SEH
8320       && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8321       && !sse_registers_saved)
8322     {
8323       HOST_WIDE_INT sse_size
8324 	= frame.sse_reg_save_offset - frame.reg_save_offset;
8325 
8326       gcc_assert (int_registers_saved);
8327 
8328       /* No need to do stack checking as the area will be immediately
8329 	 written.  */
8330       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8331 			         GEN_INT (-sse_size), -1,
8332 				 m->fs.cfa_reg == stack_pointer_rtx);
8333       allocate -= sse_size;
8334       ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8335       sse_registers_saved = true;
8336     }
8337 
8338   /* The stack has already been decremented by the instruction calling us
8339      so probe if the size is non-negative to preserve the protection area.  */
8340   if (allocate >= 0
8341       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
8342 	  || flag_stack_clash_protection))
8343     {
8344       if (flag_stack_clash_protection)
8345 	{
8346 	  ix86_adjust_stack_and_probe_stack_clash (allocate,
8347 						   int_registers_saved);
8348 	  allocate = 0;
8349 	}
8350       else if (STACK_CHECK_MOVING_SP)
8351 	{
8352 	  if (!(crtl->is_leaf && !cfun->calls_alloca
8353 		&& allocate <= get_probe_interval ()))
8354 	    {
8355 	      ix86_adjust_stack_and_probe (allocate, int_registers_saved);
8356 	      allocate = 0;
8357 	    }
8358 	}
8359       else
8360 	{
8361 	  HOST_WIDE_INT size = allocate;
8362 
8363 	  if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8364 	    size = 0x80000000 - get_stack_check_protect () - 1;
8365 
8366 	  if (TARGET_STACK_PROBE)
8367 	    {
8368 	      if (crtl->is_leaf && !cfun->calls_alloca)
8369 		{
8370 		  if (size > get_probe_interval ())
8371 		    ix86_emit_probe_stack_range (0, size, int_registers_saved);
8372 		}
8373 	      else
8374 		ix86_emit_probe_stack_range (0,
8375 					     size + get_stack_check_protect (),
8376 					     int_registers_saved);
8377 	    }
8378 	  else
8379 	    {
8380 	      if (crtl->is_leaf && !cfun->calls_alloca)
8381 		{
8382 		  if (size > get_probe_interval ()
8383 		      && size > get_stack_check_protect ())
8384 		    ix86_emit_probe_stack_range (get_stack_check_protect (),
8385 						 (size
8386 						  - get_stack_check_protect ()),
8387 						 int_registers_saved);
8388 		}
8389 	      else
8390 		ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8391 					     int_registers_saved);
8392 	    }
8393 	}
8394     }
8395 
8396   if (allocate == 0)
8397     ;
8398   else if (!ix86_target_stack_probe ()
8399 	   || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8400     {
8401       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8402 			         GEN_INT (-allocate), -1,
8403 			         m->fs.cfa_reg == stack_pointer_rtx);
8404     }
8405   else
8406     {
8407       rtx eax = gen_rtx_REG (Pmode, AX_REG);
8408       rtx r10 = NULL;
8409       const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8410       bool eax_live = ix86_eax_live_at_start_p ();
8411       bool r10_live = false;
8412 
8413       if (TARGET_64BIT)
8414         r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8415 
8416       if (eax_live)
8417 	{
8418 	  insn = emit_insn (gen_push (eax));
8419 	  allocate -= UNITS_PER_WORD;
8420 	  /* Note that SEH directives need to continue tracking the stack
8421 	     pointer even after the frame pointer has been set up.  */
8422 	  if (sp_is_cfa_reg || TARGET_SEH)
8423 	    {
8424 	      if (sp_is_cfa_reg)
8425 		m->fs.cfa_offset += UNITS_PER_WORD;
8426 	      RTX_FRAME_RELATED_P (insn) = 1;
8427 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8428 			    gen_rtx_SET (stack_pointer_rtx,
8429 					 plus_constant (Pmode,
8430 							stack_pointer_rtx,
8431 							-UNITS_PER_WORD)));
8432 	    }
8433 	}
8434 
8435       if (r10_live)
8436 	{
8437 	  r10 = gen_rtx_REG (Pmode, R10_REG);
8438 	  insn = emit_insn (gen_push (r10));
8439 	  allocate -= UNITS_PER_WORD;
8440 	  if (sp_is_cfa_reg || TARGET_SEH)
8441 	    {
8442 	      if (sp_is_cfa_reg)
8443 		m->fs.cfa_offset += UNITS_PER_WORD;
8444 	      RTX_FRAME_RELATED_P (insn) = 1;
8445 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8446 			    gen_rtx_SET (stack_pointer_rtx,
8447 					 plus_constant (Pmode,
8448 							stack_pointer_rtx,
8449 							-UNITS_PER_WORD)));
8450 	    }
8451 	}
8452 
8453       emit_move_insn (eax, GEN_INT (allocate));
8454       emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
8455 
8456       /* Use the fact that AX still contains ALLOCATE.  */
8457       insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
8458 			(Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
8459 
8460       if (sp_is_cfa_reg || TARGET_SEH)
8461 	{
8462 	  if (sp_is_cfa_reg)
8463 	    m->fs.cfa_offset += allocate;
8464 	  RTX_FRAME_RELATED_P (insn) = 1;
8465 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8466 			gen_rtx_SET (stack_pointer_rtx,
8467 				     plus_constant (Pmode, stack_pointer_rtx,
8468 						    -allocate)));
8469 	}
8470       m->fs.sp_offset += allocate;
8471 
8472       /* Use stack_pointer_rtx for relative addressing so that code works for
8473 	 realigned stack.  But this means that we need a blockage to prevent
8474 	 stores based on the frame pointer from being scheduled before.  */
8475       if (r10_live && eax_live)
8476         {
8477 	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8478 	  emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8479 			  gen_frame_mem (word_mode, t));
8480 	  t = plus_constant (Pmode, t, UNITS_PER_WORD);
8481 	  emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8482 			  gen_frame_mem (word_mode, t));
8483 	  emit_insn (gen_memory_blockage ());
8484 	}
8485       else if (eax_live || r10_live)
8486 	{
8487 	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8488 	  emit_move_insn (gen_rtx_REG (word_mode,
8489 				       (eax_live ? AX_REG : R10_REG)),
8490 			  gen_frame_mem (word_mode, t));
8491 	  emit_insn (gen_memory_blockage ());
8492 	}
8493     }
8494   gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8495 
8496   /* If we havn't already set up the frame pointer, do so now.  */
8497   if (frame_pointer_needed && !m->fs.fp_valid)
8498     {
8499       insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8500 			    GEN_INT (frame.stack_pointer_offset
8501 				     - frame.hard_frame_pointer_offset));
8502       insn = emit_insn (insn);
8503       RTX_FRAME_RELATED_P (insn) = 1;
8504       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8505 
8506       if (m->fs.cfa_reg == stack_pointer_rtx)
8507 	m->fs.cfa_reg = hard_frame_pointer_rtx;
8508       m->fs.fp_offset = frame.hard_frame_pointer_offset;
8509       m->fs.fp_valid = true;
8510     }
8511 
8512   if (!int_registers_saved)
8513     ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8514   if (!sse_registers_saved)
8515     ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8516   else if (save_stub_call_needed)
8517     ix86_emit_outlined_ms2sysv_save (frame);
8518 
8519   /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8520      in PROLOGUE.  */
8521   if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
8522     {
8523       rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
8524       insn = emit_insn (gen_set_got (pic));
8525       RTX_FRAME_RELATED_P (insn) = 1;
8526       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8527       emit_insn (gen_prologue_use (pic));
8528       /* Deleting already emmitted SET_GOT if exist and allocated to
8529 	 REAL_PIC_OFFSET_TABLE_REGNUM.  */
8530       ix86_elim_entry_set_got (pic);
8531     }
8532 
8533   if (crtl->drap_reg && !crtl->stack_realign_needed)
8534     {
8535       /* vDRAP is setup but after reload it turns out stack realign
8536          isn't necessary, here we will emit prologue to setup DRAP
8537          without stack realign adjustment */
8538       t = choose_baseaddr (0, NULL);
8539       emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8540     }
8541 
8542   /* Prevent instructions from being scheduled into register save push
8543      sequence when access to the redzone area is done through frame pointer.
8544      The offset between the frame pointer and the stack pointer is calculated
8545      relative to the value of the stack pointer at the end of the function
8546      prologue, and moving instructions that access redzone area via frame
8547      pointer inside push sequence violates this assumption.  */
8548   if (frame_pointer_needed && frame.red_zone_size)
8549     emit_insn (gen_memory_blockage ());
8550 
8551   /* SEH requires that the prologue end within 256 bytes of the start of
8552      the function.  Prevent instruction schedules that would extend that.
8553      Further, prevent alloca modifications to the stack pointer from being
8554      combined with prologue modifications.  */
8555   if (TARGET_SEH)
8556     emit_insn (gen_prologue_use (stack_pointer_rtx));
8557 }
8558 
8559 /* Emit code to restore REG using a POP insn.  */
8560 
8561 static void
ix86_emit_restore_reg_using_pop(rtx reg)8562 ix86_emit_restore_reg_using_pop (rtx reg)
8563 {
8564   struct machine_function *m = cfun->machine;
8565   rtx_insn *insn = emit_insn (gen_pop (reg));
8566 
8567   ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
8568   m->fs.sp_offset -= UNITS_PER_WORD;
8569 
8570   if (m->fs.cfa_reg == crtl->drap_reg
8571       && REGNO (reg) == REGNO (crtl->drap_reg))
8572     {
8573       /* Previously we'd represented the CFA as an expression
8574 	 like *(%ebp - 8).  We've just popped that value from
8575 	 the stack, which means we need to reset the CFA to
8576 	 the drap register.  This will remain until we restore
8577 	 the stack pointer.  */
8578       add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8579       RTX_FRAME_RELATED_P (insn) = 1;
8580 
8581       /* This means that the DRAP register is valid for addressing too.  */
8582       m->fs.drap_valid = true;
8583       return;
8584     }
8585 
8586   if (m->fs.cfa_reg == stack_pointer_rtx)
8587     {
8588       rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8589       x = gen_rtx_SET (stack_pointer_rtx, x);
8590       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
8591       RTX_FRAME_RELATED_P (insn) = 1;
8592 
8593       m->fs.cfa_offset -= UNITS_PER_WORD;
8594     }
8595 
8596   /* When the frame pointer is the CFA, and we pop it, we are
8597      swapping back to the stack pointer as the CFA.  This happens
8598      for stack frames that don't allocate other data, so we assume
8599      the stack pointer is now pointing at the return address, i.e.
8600      the function entry state, which makes the offset be 1 word.  */
8601   if (reg == hard_frame_pointer_rtx)
8602     {
8603       m->fs.fp_valid = false;
8604       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8605 	{
8606 	  m->fs.cfa_reg = stack_pointer_rtx;
8607 	  m->fs.cfa_offset -= UNITS_PER_WORD;
8608 
8609 	  add_reg_note (insn, REG_CFA_DEF_CFA,
8610 			gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8611 				      GEN_INT (m->fs.cfa_offset)));
8612 	  RTX_FRAME_RELATED_P (insn) = 1;
8613 	}
8614     }
8615 }
8616 
8617 /* Emit code to restore saved registers using POP insns.  */
8618 
8619 static void
ix86_emit_restore_regs_using_pop(void)8620 ix86_emit_restore_regs_using_pop (void)
8621 {
8622   unsigned int regno;
8623 
8624   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8625     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
8626       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
8627 }
8628 
8629 /* Emit code and notes for the LEAVE instruction.  If insn is non-null,
8630    omits the emit and only attaches the notes.  */
8631 
8632 static void
ix86_emit_leave(rtx_insn * insn)8633 ix86_emit_leave (rtx_insn *insn)
8634 {
8635   struct machine_function *m = cfun->machine;
8636 
8637   if (!insn)
8638     insn = emit_insn (gen_leave (word_mode));
8639 
8640   ix86_add_queued_cfa_restore_notes (insn);
8641 
8642   gcc_assert (m->fs.fp_valid);
8643   m->fs.sp_valid = true;
8644   m->fs.sp_realigned = false;
8645   m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
8646   m->fs.fp_valid = false;
8647 
8648   if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8649     {
8650       m->fs.cfa_reg = stack_pointer_rtx;
8651       m->fs.cfa_offset = m->fs.sp_offset;
8652 
8653       add_reg_note (insn, REG_CFA_DEF_CFA,
8654 		    plus_constant (Pmode, stack_pointer_rtx,
8655 				   m->fs.sp_offset));
8656       RTX_FRAME_RELATED_P (insn) = 1;
8657     }
8658   ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
8659 			     m->fs.fp_offset);
8660 }
8661 
8662 /* Emit code to restore saved registers using MOV insns.
8663    First register is restored from CFA - CFA_OFFSET.  */
8664 static void
ix86_emit_restore_regs_using_mov(HOST_WIDE_INT cfa_offset,bool maybe_eh_return)8665 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
8666 				  bool maybe_eh_return)
8667 {
8668   struct machine_function *m = cfun->machine;
8669   unsigned int regno;
8670 
8671   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8672     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8673       {
8674 	rtx reg = gen_rtx_REG (word_mode, regno);
8675 	rtx mem;
8676 	rtx_insn *insn;
8677 
8678 	mem = choose_baseaddr (cfa_offset, NULL);
8679 	mem = gen_frame_mem (word_mode, mem);
8680 	insn = emit_move_insn (reg, mem);
8681 
8682         if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8683 	  {
8684 	    /* Previously we'd represented the CFA as an expression
8685 	       like *(%ebp - 8).  We've just popped that value from
8686 	       the stack, which means we need to reset the CFA to
8687 	       the drap register.  This will remain until we restore
8688 	       the stack pointer.  */
8689 	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8690 	    RTX_FRAME_RELATED_P (insn) = 1;
8691 
8692 	    /* This means that the DRAP register is valid for addressing.  */
8693 	    m->fs.drap_valid = true;
8694 	  }
8695 	else
8696 	  ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8697 
8698 	cfa_offset -= UNITS_PER_WORD;
8699       }
8700 }
8701 
8702 /* Emit code to restore saved registers using MOV insns.
8703    First register is restored from CFA - CFA_OFFSET.  */
8704 static void
ix86_emit_restore_sse_regs_using_mov(HOST_WIDE_INT cfa_offset,bool maybe_eh_return)8705 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
8706 				      bool maybe_eh_return)
8707 {
8708   unsigned int regno;
8709 
8710   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8711     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8712       {
8713 	rtx reg = gen_rtx_REG (V4SFmode, regno);
8714 	rtx mem;
8715 	unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
8716 
8717 	mem = choose_baseaddr (cfa_offset, &align);
8718 	mem = gen_rtx_MEM (V4SFmode, mem);
8719 
8720 	/* The location aligment depends upon the base register.  */
8721 	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
8722 	gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
8723 	set_mem_align (mem, align);
8724 	emit_insn (gen_rtx_SET (reg, mem));
8725 
8726 	ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8727 
8728 	cfa_offset -= GET_MODE_SIZE (V4SFmode);
8729       }
8730 }
8731 
8732 static void
ix86_emit_outlined_ms2sysv_restore(const struct ix86_frame & frame,bool use_call,int style)8733 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
8734 				  bool use_call, int style)
8735 {
8736   struct machine_function *m = cfun->machine;
8737   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8738 			  + m->call_ms2sysv_extra_regs;
8739   rtvec v;
8740   unsigned int elems_needed, align, i, vi = 0;
8741   rtx_insn *insn;
8742   rtx sym, tmp;
8743   rtx rsi = gen_rtx_REG (word_mode, SI_REG);
8744   rtx r10 = NULL_RTX;
8745   const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8746   HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
8747   HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
8748   rtx rsi_frame_load = NULL_RTX;
8749   HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
8750   enum xlogue_stub stub;
8751 
8752   gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
8753 
8754   /* If using a realigned stack, we should never start with padding.  */
8755   gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
8756 
8757   /* Setup RSI as the stub's base pointer.  */
8758   align = GET_MODE_ALIGNMENT (V4SFmode);
8759   tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
8760   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8761 
8762   emit_insn (gen_rtx_SET (rsi, tmp));
8763 
8764   /* Get a symbol for the stub.  */
8765   if (frame_pointer_needed)
8766     stub = use_call ? XLOGUE_STUB_RESTORE_HFP
8767 		    : XLOGUE_STUB_RESTORE_HFP_TAIL;
8768   else
8769     stub = use_call ? XLOGUE_STUB_RESTORE
8770 		    : XLOGUE_STUB_RESTORE_TAIL;
8771   sym = xlogue.get_stub_rtx (stub);
8772 
8773   elems_needed = ncregs;
8774   if (use_call)
8775     elems_needed += 1;
8776   else
8777     elems_needed += frame_pointer_needed ? 5 : 3;
8778   v = rtvec_alloc (elems_needed);
8779 
8780   /* We call the epilogue stub when we need to pop incoming args or we are
8781      doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
8782      epilogue stub and it is the tail-call.  */
8783   if (use_call)
8784       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8785   else
8786     {
8787       RTVEC_ELT (v, vi++) = ret_rtx;
8788       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8789       if (frame_pointer_needed)
8790 	{
8791 	  rtx rbp = gen_rtx_REG (DImode, BP_REG);
8792 	  gcc_assert (m->fs.fp_valid);
8793 	  gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
8794 
8795 	  tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
8796 	  RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
8797 	  RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
8798 	  tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8799 	  RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
8800 	}
8801       else
8802 	{
8803 	  /* If no hard frame pointer, we set R10 to the SP restore value.  */
8804 	  gcc_assert (!m->fs.fp_valid);
8805 	  gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8806 	  gcc_assert (m->fs.sp_valid);
8807 
8808 	  r10 = gen_rtx_REG (DImode, R10_REG);
8809 	  tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
8810 	  emit_insn (gen_rtx_SET (r10, tmp));
8811 
8812 	  RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
8813 	}
8814     }
8815 
8816   /* Generate frame load insns and restore notes.  */
8817   for (i = 0; i < ncregs; ++i)
8818     {
8819       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8820       machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
8821       rtx reg, frame_load;
8822 
8823       reg = gen_rtx_REG (mode, r.regno);
8824       frame_load = gen_frame_load (reg, rsi, r.offset);
8825 
8826       /* Save RSI frame load insn & note to add last.  */
8827       if (r.regno == SI_REG)
8828 	{
8829 	  gcc_assert (!rsi_frame_load);
8830 	  rsi_frame_load = frame_load;
8831 	  rsi_restore_offset = r.offset;
8832 	}
8833       else
8834 	{
8835 	  RTVEC_ELT (v, vi++) = frame_load;
8836 	  ix86_add_cfa_restore_note (NULL, reg, r.offset);
8837 	}
8838     }
8839 
8840   /* Add RSI frame load & restore note at the end.  */
8841   gcc_assert (rsi_frame_load);
8842   gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
8843   RTVEC_ELT (v, vi++) = rsi_frame_load;
8844   ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
8845 			     rsi_restore_offset);
8846 
8847   /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
8848   if (!use_call && !frame_pointer_needed)
8849     {
8850       gcc_assert (m->fs.sp_valid);
8851       gcc_assert (!m->fs.sp_realigned);
8852 
8853       /* At this point, R10 should point to frame.stack_realign_offset.  */
8854       if (m->fs.cfa_reg == stack_pointer_rtx)
8855 	m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
8856       m->fs.sp_offset = frame.stack_realign_offset;
8857     }
8858 
8859   gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
8860   tmp = gen_rtx_PARALLEL (VOIDmode, v);
8861   if (use_call)
8862       insn = emit_insn (tmp);
8863   else
8864     {
8865       insn = emit_jump_insn (tmp);
8866       JUMP_LABEL (insn) = ret_rtx;
8867 
8868       if (frame_pointer_needed)
8869 	ix86_emit_leave (insn);
8870       else
8871 	{
8872 	  /* Need CFA adjust note.  */
8873 	  tmp = gen_rtx_SET (stack_pointer_rtx, r10);
8874 	  add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
8875 	}
8876     }
8877 
8878   RTX_FRAME_RELATED_P (insn) = true;
8879   ix86_add_queued_cfa_restore_notes (insn);
8880 
8881   /* If we're not doing a tail-call, we need to adjust the stack.  */
8882   if (use_call && m->fs.sp_valid)
8883     {
8884       HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
8885       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8886 				GEN_INT (dealloc), style,
8887 				m->fs.cfa_reg == stack_pointer_rtx);
8888     }
8889 }
8890 
8891 /* Restore function stack, frame, and registers.  */
8892 
8893 void
ix86_expand_epilogue(int style)8894 ix86_expand_epilogue (int style)
8895 {
8896   struct machine_function *m = cfun->machine;
8897   struct machine_frame_state frame_state_save = m->fs;
8898   bool restore_regs_via_mov;
8899   bool using_drap;
8900   bool restore_stub_is_tail = false;
8901 
8902   if (ix86_function_naked (current_function_decl))
8903     {
8904       /* The program should not reach this point.  */
8905       emit_insn (gen_ud2 ());
8906       return;
8907     }
8908 
8909   ix86_finalize_stack_frame_flags ();
8910   const struct ix86_frame &frame = cfun->machine->frame;
8911 
8912   m->fs.sp_realigned = stack_realign_fp;
8913   m->fs.sp_valid = stack_realign_fp
8914 		   || !frame_pointer_needed
8915 		   || crtl->sp_is_unchanging;
8916   gcc_assert (!m->fs.sp_valid
8917 	      || m->fs.sp_offset == frame.stack_pointer_offset);
8918 
8919   /* The FP must be valid if the frame pointer is present.  */
8920   gcc_assert (frame_pointer_needed == m->fs.fp_valid);
8921   gcc_assert (!m->fs.fp_valid
8922 	      || m->fs.fp_offset == frame.hard_frame_pointer_offset);
8923 
8924   /* We must have *some* valid pointer to the stack frame.  */
8925   gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
8926 
8927   /* The DRAP is never valid at this point.  */
8928   gcc_assert (!m->fs.drap_valid);
8929 
8930   /* See the comment about red zone and frame
8931      pointer usage in ix86_expand_prologue.  */
8932   if (frame_pointer_needed && frame.red_zone_size)
8933     emit_insn (gen_memory_blockage ());
8934 
8935   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8936   gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
8937 
8938   /* Determine the CFA offset of the end of the red-zone.  */
8939   m->fs.red_zone_offset = 0;
8940   if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
8941     {
8942       /* The red-zone begins below return address and error code in
8943 	 exception handler.  */
8944       m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
8945 
8946       /* When the register save area is in the aligned portion of
8947          the stack, determine the maximum runtime displacement that
8948 	 matches up with the aligned frame.  */
8949       if (stack_realign_drap)
8950 	m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
8951 				  + UNITS_PER_WORD);
8952     }
8953 
8954   HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
8955 
8956   /* Special care must be taken for the normal return case of a function
8957      using eh_return: the eax and edx registers are marked as saved, but
8958      not restored along this path.  Adjust the save location to match.  */
8959   if (crtl->calls_eh_return && style != 2)
8960     reg_save_offset -= 2 * UNITS_PER_WORD;
8961 
8962   /* EH_RETURN requires the use of moves to function properly.  */
8963   if (crtl->calls_eh_return)
8964     restore_regs_via_mov = true;
8965   /* SEH requires the use of pops to identify the epilogue.  */
8966   else if (TARGET_SEH)
8967     restore_regs_via_mov = false;
8968   /* If we're only restoring one register and sp cannot be used then
8969      using a move instruction to restore the register since it's
8970      less work than reloading sp and popping the register.  */
8971   else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
8972     restore_regs_via_mov = true;
8973   else if (TARGET_EPILOGUE_USING_MOVE
8974 	   && cfun->machine->use_fast_prologue_epilogue
8975 	   && (frame.nregs > 1
8976 	       || m->fs.sp_offset != reg_save_offset))
8977     restore_regs_via_mov = true;
8978   else if (frame_pointer_needed
8979 	   && !frame.nregs
8980 	   && m->fs.sp_offset != reg_save_offset)
8981     restore_regs_via_mov = true;
8982   else if (frame_pointer_needed
8983 	   && TARGET_USE_LEAVE
8984 	   && cfun->machine->use_fast_prologue_epilogue
8985 	   && frame.nregs == 1)
8986     restore_regs_via_mov = true;
8987   else
8988     restore_regs_via_mov = false;
8989 
8990   if (restore_regs_via_mov || frame.nsseregs)
8991     {
8992       /* Ensure that the entire register save area is addressable via
8993 	 the stack pointer, if we will restore SSE regs via sp.  */
8994       if (TARGET_64BIT
8995 	  && m->fs.sp_offset > 0x7fffffff
8996 	  && sp_valid_at (frame.stack_realign_offset + 1)
8997 	  && (frame.nsseregs + frame.nregs) != 0)
8998 	{
8999 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9000 				     GEN_INT (m->fs.sp_offset
9001 					      - frame.sse_reg_save_offset),
9002 				     style,
9003 				     m->fs.cfa_reg == stack_pointer_rtx);
9004 	}
9005     }
9006 
9007   /* If there are any SSE registers to restore, then we have to do it
9008      via moves, since there's obviously no pop for SSE regs.  */
9009   if (frame.nsseregs)
9010     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
9011 					  style == 2);
9012 
9013   if (m->call_ms2sysv)
9014     {
9015       int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9016 
9017       /* We cannot use a tail-call for the stub if:
9018 	 1. We have to pop incoming args,
9019 	 2. We have additional int regs to restore, or
9020 	 3. A sibling call will be the tail-call, or
9021 	 4. We are emitting an eh_return_internal epilogue.
9022 
9023 	 TODO: Item 4 has not yet tested!
9024 
9025 	 If any of the above are true, we will call the stub rather than
9026 	 jump to it.  */
9027       restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9028       ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
9029     }
9030 
9031   /* If using out-of-line stub that is a tail-call, then...*/
9032   if (m->call_ms2sysv && restore_stub_is_tail)
9033     {
9034       /* TODO: parinoid tests. (remove eventually)  */
9035       gcc_assert (m->fs.sp_valid);
9036       gcc_assert (!m->fs.sp_realigned);
9037       gcc_assert (!m->fs.fp_valid);
9038       gcc_assert (!m->fs.realigned);
9039       gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9040       gcc_assert (!crtl->drap_reg);
9041       gcc_assert (!frame.nregs);
9042     }
9043   else if (restore_regs_via_mov)
9044     {
9045       rtx t;
9046 
9047       if (frame.nregs)
9048 	ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
9049 
9050       /* eh_return epilogues need %ecx added to the stack pointer.  */
9051       if (style == 2)
9052 	{
9053 	  rtx sa = EH_RETURN_STACKADJ_RTX;
9054 	  rtx_insn *insn;
9055 
9056 	  /* %ecx can't be used for both DRAP register and eh_return.  */
9057 	  if (crtl->drap_reg)
9058 	    gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
9059 
9060 	  /* regparm nested functions don't work with eh_return.  */
9061 	  gcc_assert (!ix86_static_chain_on_stack);
9062 
9063 	  if (frame_pointer_needed)
9064 	    {
9065 	      t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9066 	      t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9067 	      emit_insn (gen_rtx_SET (sa, t));
9068 
9069 	      /* NB: eh_return epilogues must restore the frame pointer
9070 		 in word_mode since the upper 32 bits of RBP register
9071 		 can have any values.  */
9072 	      t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9073 	      rtx frame_reg = gen_rtx_REG (word_mode,
9074 					   HARD_FRAME_POINTER_REGNUM);
9075 	      insn = emit_move_insn (frame_reg, t);
9076 
9077 	      /* Note that we use SA as a temporary CFA, as the return
9078 		 address is at the proper place relative to it.  We
9079 		 pretend this happens at the FP restore insn because
9080 		 prior to this insn the FP would be stored at the wrong
9081 		 offset relative to SA, and after this insn we have no
9082 		 other reasonable register to use for the CFA.  We don't
9083 		 bother resetting the CFA to the SP for the duration of
9084 		 the return insn, unless the control flow instrumentation
9085 		 is done.  In this case the SP is used later and we have
9086 		 to reset CFA to SP.  */
9087 	      add_reg_note (insn, REG_CFA_DEF_CFA,
9088 			    plus_constant (Pmode, sa, UNITS_PER_WORD));
9089 	      ix86_add_queued_cfa_restore_notes (insn);
9090 	      add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9091 	      RTX_FRAME_RELATED_P (insn) = 1;
9092 
9093 	      m->fs.cfa_reg = sa;
9094 	      m->fs.cfa_offset = UNITS_PER_WORD;
9095 	      m->fs.fp_valid = false;
9096 
9097 	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9098 					 const0_rtx, style,
9099 					 flag_cf_protection);
9100 	    }
9101 	  else
9102 	    {
9103 	      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9104 	      t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
9105 	      insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
9106 	      ix86_add_queued_cfa_restore_notes (insn);
9107 
9108 	      gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9109 	      if (m->fs.cfa_offset != UNITS_PER_WORD)
9110 		{
9111 		  m->fs.cfa_offset = UNITS_PER_WORD;
9112 		  add_reg_note (insn, REG_CFA_DEF_CFA,
9113 				plus_constant (Pmode, stack_pointer_rtx,
9114 					       UNITS_PER_WORD));
9115 		  RTX_FRAME_RELATED_P (insn) = 1;
9116 		}
9117 	    }
9118 	  m->fs.sp_offset = UNITS_PER_WORD;
9119 	  m->fs.sp_valid = true;
9120 	  m->fs.sp_realigned = false;
9121 	}
9122     }
9123   else
9124     {
9125       /* SEH requires that the function end with (1) a stack adjustment
9126 	 if necessary, (2) a sequence of pops, and (3) a return or
9127 	 jump instruction.  Prevent insns from the function body from
9128 	 being scheduled into this sequence.  */
9129       if (TARGET_SEH)
9130 	{
9131 	  /* Prevent a catch region from being adjacent to the standard
9132 	     epilogue sequence.  Unfortunately neither crtl->uses_eh_lsda
9133 	     nor several other flags that would be interesting to test are
9134 	     set up yet.  */
9135 	  if (flag_non_call_exceptions)
9136 	    emit_insn (gen_nops (const1_rtx));
9137 	  else
9138 	    emit_insn (gen_blockage ());
9139 	}
9140 
9141       /* First step is to deallocate the stack frame so that we can
9142 	 pop the registers.  If the stack pointer was realigned, it needs
9143 	 to be restored now.  Also do it on SEH target for very large
9144 	 frame as the emitted instructions aren't allowed by the ABI
9145 	 in epilogues.  */
9146       if (!m->fs.sp_valid || m->fs.sp_realigned
9147  	  || (TARGET_SEH
9148 	      && (m->fs.sp_offset - reg_save_offset
9149 		  >= SEH_MAX_FRAME_SIZE)))
9150 	{
9151 	  pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
9152 				     GEN_INT (m->fs.fp_offset
9153 					      - reg_save_offset),
9154 				     style, false);
9155 	}
9156       else if (m->fs.sp_offset != reg_save_offset)
9157 	{
9158 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9159 				     GEN_INT (m->fs.sp_offset
9160 					      - reg_save_offset),
9161 				     style,
9162 				     m->fs.cfa_reg == stack_pointer_rtx);
9163 	}
9164 
9165       ix86_emit_restore_regs_using_pop ();
9166     }
9167 
9168   /* If we used a stack pointer and haven't already got rid of it,
9169      then do so now.  */
9170   if (m->fs.fp_valid)
9171     {
9172       /* If the stack pointer is valid and pointing at the frame
9173 	 pointer store address, then we only need a pop.  */
9174       if (sp_valid_at (frame.hfp_save_offset)
9175 	  && m->fs.sp_offset == frame.hfp_save_offset)
9176 	ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9177       /* Leave results in shorter dependency chains on CPUs that are
9178 	 able to grok it fast.  */
9179       else if (TARGET_USE_LEAVE
9180 	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
9181 	       || !cfun->machine->use_fast_prologue_epilogue)
9182 	ix86_emit_leave (NULL);
9183       else
9184         {
9185 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
9186 				     hard_frame_pointer_rtx,
9187 				     const0_rtx, style, !using_drap);
9188 	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9189         }
9190     }
9191 
9192   if (using_drap)
9193     {
9194       int param_ptr_offset = UNITS_PER_WORD;
9195       rtx_insn *insn;
9196 
9197       gcc_assert (stack_realign_drap);
9198 
9199       if (ix86_static_chain_on_stack)
9200 	param_ptr_offset += UNITS_PER_WORD;
9201       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9202 	param_ptr_offset += UNITS_PER_WORD;
9203 
9204       insn = emit_insn (gen_rtx_SET
9205 			(stack_pointer_rtx,
9206 			 gen_rtx_PLUS (Pmode,
9207 				       crtl->drap_reg,
9208 				       GEN_INT (-param_ptr_offset))));
9209       m->fs.cfa_reg = stack_pointer_rtx;
9210       m->fs.cfa_offset = param_ptr_offset;
9211       m->fs.sp_offset = param_ptr_offset;
9212       m->fs.realigned = false;
9213 
9214       add_reg_note (insn, REG_CFA_DEF_CFA,
9215 		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9216 				  GEN_INT (param_ptr_offset)));
9217       RTX_FRAME_RELATED_P (insn) = 1;
9218 
9219       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9220 	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
9221     }
9222 
9223   /* At this point the stack pointer must be valid, and we must have
9224      restored all of the registers.  We may not have deallocated the
9225      entire stack frame.  We've delayed this until now because it may
9226      be possible to merge the local stack deallocation with the
9227      deallocation forced by ix86_static_chain_on_stack.   */
9228   gcc_assert (m->fs.sp_valid);
9229   gcc_assert (!m->fs.sp_realigned);
9230   gcc_assert (!m->fs.fp_valid);
9231   gcc_assert (!m->fs.realigned);
9232   if (m->fs.sp_offset != UNITS_PER_WORD)
9233     {
9234       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9235 				 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
9236 				 style, true);
9237     }
9238   else
9239     ix86_add_queued_cfa_restore_notes (get_last_insn ());
9240 
9241   /* Sibcall epilogues don't want a return instruction.  */
9242   if (style == 0)
9243     {
9244       m->fs = frame_state_save;
9245       return;
9246     }
9247 
9248   if (cfun->machine->func_type != TYPE_NORMAL)
9249     emit_jump_insn (gen_interrupt_return ());
9250   else if (crtl->args.pops_args && crtl->args.size)
9251     {
9252       rtx popc = GEN_INT (crtl->args.pops_args);
9253 
9254       /* i386 can only pop 64K bytes.  If asked to pop more, pop return
9255 	 address, do explicit add, and jump indirectly to the caller.  */
9256 
9257       if (crtl->args.pops_args >= 65536)
9258 	{
9259 	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
9260 	  rtx_insn *insn;
9261 
9262 	  /* There is no "pascal" calling convention in any 64bit ABI.  */
9263 	  gcc_assert (!TARGET_64BIT);
9264 
9265 	  insn = emit_insn (gen_pop (ecx));
9266 	  m->fs.cfa_offset -= UNITS_PER_WORD;
9267 	  m->fs.sp_offset -= UNITS_PER_WORD;
9268 
9269 	  rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9270 	  x = gen_rtx_SET (stack_pointer_rtx, x);
9271 	  add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9272 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9273 	  RTX_FRAME_RELATED_P (insn) = 1;
9274 
9275 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9276 				     popc, -1, true);
9277 	  emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9278 	}
9279       else
9280 	emit_jump_insn (gen_simple_return_pop_internal (popc));
9281     }
9282   else if (!m->call_ms2sysv || !restore_stub_is_tail)
9283     {
9284       /* In case of return from EH a simple return cannot be used
9285 	 as a return address will be compared with a shadow stack
9286 	 return address.  Use indirect jump instead.  */
9287       if (style == 2 && flag_cf_protection)
9288 	{
9289 	  /* Register used in indirect jump must be in word_mode.  But
9290 	     Pmode may not be the same as word_mode for x32.  */
9291 	  rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9292 	  rtx_insn *insn;
9293 
9294 	  insn = emit_insn (gen_pop (ecx));
9295 	  m->fs.cfa_offset -= UNITS_PER_WORD;
9296 	  m->fs.sp_offset -= UNITS_PER_WORD;
9297 
9298 	  rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9299 	  x = gen_rtx_SET (stack_pointer_rtx, x);
9300 	  add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9301 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9302 	  RTX_FRAME_RELATED_P (insn) = 1;
9303 
9304 	  emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9305 	}
9306       else
9307 	emit_jump_insn (gen_simple_return_internal ());
9308     }
9309 
9310   /* Restore the state back to the state from the prologue,
9311      so that it's correct for the next epilogue.  */
9312   m->fs = frame_state_save;
9313 }
9314 
9315 /* Reset from the function's potential modifications.  */
9316 
9317 static void
ix86_output_function_epilogue(FILE * file ATTRIBUTE_UNUSED)9318 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9319 {
9320   if (pic_offset_table_rtx
9321       && !ix86_use_pseudo_pic_reg ())
9322     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9323 
9324   if (TARGET_MACHO)
9325     {
9326       rtx_insn *insn = get_last_insn ();
9327       rtx_insn *deleted_debug_label = NULL;
9328 
9329       /* Mach-O doesn't support labels at the end of objects, so if
9330          it looks like we might want one, take special action.
9331         First, collect any sequence of deleted debug labels.  */
9332       while (insn
9333 	     && NOTE_P (insn)
9334 	     && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9335 	{
9336 	  /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9337 	     notes only, instead set their CODE_LABEL_NUMBER to -1,
9338 	     otherwise there would be code generation differences
9339 	     in between -g and -g0.  */
9340 	  if (NOTE_P (insn) && NOTE_KIND (insn)
9341 	      == NOTE_INSN_DELETED_DEBUG_LABEL)
9342 	    deleted_debug_label = insn;
9343 	  insn = PREV_INSN (insn);
9344 	}
9345 
9346       /* If we have:
9347 	 label:
9348 	    barrier
9349 	  then this needs to be detected, so skip past the barrier.  */
9350 
9351       if (insn && BARRIER_P (insn))
9352 	insn = PREV_INSN (insn);
9353 
9354       /* Up to now we've only seen notes or barriers.  */
9355       if (insn)
9356 	{
9357 	  if (LABEL_P (insn)
9358 	      || (NOTE_P (insn)
9359 		  && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9360 	    /* Trailing label.  */
9361 	    fputs ("\tnop\n", file);
9362 	  else if (cfun && ! cfun->is_thunk)
9363 	    {
9364 	      /* See if we have a completely empty function body, skipping
9365 	         the special case of the picbase thunk emitted as asm.  */
9366 	      while (insn && ! INSN_P (insn))
9367 		insn = PREV_INSN (insn);
9368 	      /* If we don't find any insns, we've got an empty function body;
9369 		 I.e. completely empty - without a return or branch.  This is
9370 		 taken as the case where a function body has been removed
9371 		 because it contains an inline __builtin_unreachable().  GCC
9372 		 declares that reaching __builtin_unreachable() means UB so
9373 		 we're not obliged to do anything special; however, we want
9374 		 non-zero-sized function bodies.  To meet this, and help the
9375 		 user out, let's trap the case.  */
9376 	      if (insn == NULL)
9377 		fputs ("\tud2\n", file);
9378 	    }
9379 	}
9380       else if (deleted_debug_label)
9381 	for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9382 	  if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9383 	    CODE_LABEL_NUMBER (insn) = -1;
9384     }
9385 }
9386 
9387 /* Return a scratch register to use in the split stack prologue.  The
9388    split stack prologue is used for -fsplit-stack.  It is the first
9389    instructions in the function, even before the regular prologue.
9390    The scratch register can be any caller-saved register which is not
9391    used for parameters or for the static chain.  */
9392 
9393 static unsigned int
split_stack_prologue_scratch_regno(void)9394 split_stack_prologue_scratch_regno (void)
9395 {
9396   if (TARGET_64BIT)
9397     return R11_REG;
9398   else
9399     {
9400       bool is_fastcall, is_thiscall;
9401       int regparm;
9402 
9403       is_fastcall = (lookup_attribute ("fastcall",
9404 				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9405 		     != NULL);
9406       is_thiscall = (lookup_attribute ("thiscall",
9407 				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9408 		     != NULL);
9409       regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9410 
9411       if (is_fastcall)
9412 	{
9413 	  if (DECL_STATIC_CHAIN (cfun->decl))
9414 	    {
9415 	      sorry ("%<-fsplit-stack%> does not support fastcall with "
9416 		     "nested function");
9417 	      return INVALID_REGNUM;
9418 	    }
9419 	  return AX_REG;
9420 	}
9421       else if (is_thiscall)
9422         {
9423 	  if (!DECL_STATIC_CHAIN (cfun->decl))
9424 	    return DX_REG;
9425 	  return AX_REG;
9426 	}
9427       else if (regparm < 3)
9428 	{
9429 	  if (!DECL_STATIC_CHAIN (cfun->decl))
9430 	    return CX_REG;
9431 	  else
9432 	    {
9433 	      if (regparm >= 2)
9434 		{
9435 		  sorry ("%<-fsplit-stack%> does not support 2 register "
9436 			 "parameters for a nested function");
9437 		  return INVALID_REGNUM;
9438 		}
9439 	      return DX_REG;
9440 	    }
9441 	}
9442       else
9443 	{
9444 	  /* FIXME: We could make this work by pushing a register
9445 	     around the addition and comparison.  */
9446 	  sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9447 	  return INVALID_REGNUM;
9448 	}
9449     }
9450 }
9451 
9452 /* A SYMBOL_REF for the function which allocates new stackspace for
9453    -fsplit-stack.  */
9454 
9455 static GTY(()) rtx split_stack_fn;
9456 
9457 /* A SYMBOL_REF for the more stack function when using the large
9458    model.  */
9459 
9460 static GTY(()) rtx split_stack_fn_large;
9461 
9462 /* Return location of the stack guard value in the TLS block.  */
9463 
9464 rtx
ix86_split_stack_guard(void)9465 ix86_split_stack_guard (void)
9466 {
9467   int offset;
9468   addr_space_t as = DEFAULT_TLS_SEG_REG;
9469   rtx r;
9470 
9471   gcc_assert (flag_split_stack);
9472 
9473 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9474   offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9475 #else
9476   gcc_unreachable ();
9477 #endif
9478 
9479   r = GEN_INT (offset);
9480   r = gen_const_mem (Pmode, r);
9481   set_mem_addr_space (r, as);
9482 
9483   return r;
9484 }
9485 
9486 /* Handle -fsplit-stack.  These are the first instructions in the
9487    function, even before the regular prologue.  */
9488 
9489 void
ix86_expand_split_stack_prologue(void)9490 ix86_expand_split_stack_prologue (void)
9491 {
9492   HOST_WIDE_INT allocate;
9493   unsigned HOST_WIDE_INT args_size;
9494   rtx_code_label *label;
9495   rtx limit, current, allocate_rtx, call_fusage;
9496   rtx_insn *call_insn;
9497   rtx scratch_reg = NULL_RTX;
9498   rtx_code_label *varargs_label = NULL;
9499   rtx fn;
9500 
9501   gcc_assert (flag_split_stack && reload_completed);
9502 
9503   ix86_finalize_stack_frame_flags ();
9504   struct ix86_frame &frame = cfun->machine->frame;
9505   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
9506 
9507   /* This is the label we will branch to if we have enough stack
9508      space.  We expect the basic block reordering pass to reverse this
9509      branch if optimizing, so that we branch in the unlikely case.  */
9510   label = gen_label_rtx ();
9511 
9512   /* We need to compare the stack pointer minus the frame size with
9513      the stack boundary in the TCB.  The stack boundary always gives
9514      us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9515      can compare directly.  Otherwise we need to do an addition.  */
9516 
9517   limit = ix86_split_stack_guard ();
9518 
9519   if (allocate < SPLIT_STACK_AVAILABLE)
9520     current = stack_pointer_rtx;
9521   else
9522     {
9523       unsigned int scratch_regno;
9524       rtx offset;
9525 
9526       /* We need a scratch register to hold the stack pointer minus
9527 	 the required frame size.  Since this is the very start of the
9528 	 function, the scratch register can be any caller-saved
9529 	 register which is not used for parameters.  */
9530       offset = GEN_INT (- allocate);
9531       scratch_regno = split_stack_prologue_scratch_regno ();
9532       if (scratch_regno == INVALID_REGNUM)
9533 	return;
9534       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9535       if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
9536 	{
9537 	  /* We don't use gen_add in this case because it will
9538 	     want to split to lea, but when not optimizing the insn
9539 	     will not be split after this point.  */
9540 	  emit_insn (gen_rtx_SET (scratch_reg,
9541 				  gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9542 						offset)));
9543 	}
9544       else
9545 	{
9546 	  emit_move_insn (scratch_reg, offset);
9547 	  emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
9548 	}
9549       current = scratch_reg;
9550     }
9551 
9552   ix86_expand_branch (GEU, current, limit, label);
9553   rtx_insn *jump_insn = get_last_insn ();
9554   JUMP_LABEL (jump_insn) = label;
9555 
9556   /* Mark the jump as very likely to be taken.  */
9557   add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
9558 
9559   if (split_stack_fn == NULL_RTX)
9560     {
9561       split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
9562       SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
9563     }
9564   fn = split_stack_fn;
9565 
9566   /* Get more stack space.  We pass in the desired stack space and the
9567      size of the arguments to copy to the new stack.  In 32-bit mode
9568      we push the parameters; __morestack will return on a new stack
9569      anyhow.  In 64-bit mode we pass the parameters in r10 and
9570      r11.  */
9571   allocate_rtx = GEN_INT (allocate);
9572   args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
9573   call_fusage = NULL_RTX;
9574   rtx pop = NULL_RTX;
9575   if (TARGET_64BIT)
9576     {
9577       rtx reg10, reg11;
9578 
9579       reg10 = gen_rtx_REG (Pmode, R10_REG);
9580       reg11 = gen_rtx_REG (Pmode, R11_REG);
9581 
9582       /* If this function uses a static chain, it will be in %r10.
9583 	 Preserve it across the call to __morestack.  */
9584       if (DECL_STATIC_CHAIN (cfun->decl))
9585 	{
9586 	  rtx rax;
9587 
9588 	  rax = gen_rtx_REG (word_mode, AX_REG);
9589 	  emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
9590 	  use_reg (&call_fusage, rax);
9591 	}
9592 
9593       if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
9594           && !TARGET_PECOFF)
9595 	{
9596 	  HOST_WIDE_INT argval;
9597 
9598 	  gcc_assert (Pmode == DImode);
9599 	  /* When using the large model we need to load the address
9600 	     into a register, and we've run out of registers.  So we
9601 	     switch to a different calling convention, and we call a
9602 	     different function: __morestack_large.  We pass the
9603 	     argument size in the upper 32 bits of r10 and pass the
9604 	     frame size in the lower 32 bits.  */
9605 	  gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
9606 	  gcc_assert ((args_size & 0xffffffff) == args_size);
9607 
9608 	  if (split_stack_fn_large == NULL_RTX)
9609 	    {
9610 	      split_stack_fn_large
9611 		= gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
9612 	      SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
9613 	    }
9614 	  if (ix86_cmodel == CM_LARGE_PIC)
9615 	    {
9616 	      rtx_code_label *label;
9617 	      rtx x;
9618 
9619 	      label = gen_label_rtx ();
9620 	      emit_label (label);
9621 	      LABEL_PRESERVE_P (label) = 1;
9622 	      emit_insn (gen_set_rip_rex64 (reg10, label));
9623 	      emit_insn (gen_set_got_offset_rex64 (reg11, label));
9624 	      emit_insn (gen_add2_insn (reg10, reg11));
9625 	      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
9626 				  UNSPEC_GOT);
9627 	      x = gen_rtx_CONST (Pmode, x);
9628 	      emit_move_insn (reg11, x);
9629 	      x = gen_rtx_PLUS (Pmode, reg10, reg11);
9630 	      x = gen_const_mem (Pmode, x);
9631 	      emit_move_insn (reg11, x);
9632 	    }
9633 	  else
9634 	    emit_move_insn (reg11, split_stack_fn_large);
9635 
9636 	  fn = reg11;
9637 
9638 	  argval = ((args_size << 16) << 16) + allocate;
9639 	  emit_move_insn (reg10, GEN_INT (argval));
9640 	}
9641       else
9642 	{
9643 	  emit_move_insn (reg10, allocate_rtx);
9644 	  emit_move_insn (reg11, GEN_INT (args_size));
9645 	  use_reg (&call_fusage, reg11);
9646 	}
9647 
9648       use_reg (&call_fusage, reg10);
9649     }
9650   else
9651     {
9652       rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
9653       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
9654       insn = emit_insn (gen_push (allocate_rtx));
9655       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
9656       pop = GEN_INT (2 * UNITS_PER_WORD);
9657     }
9658   call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
9659 				GEN_INT (UNITS_PER_WORD), constm1_rtx,
9660 				pop, false);
9661   add_function_usage_to (call_insn, call_fusage);
9662   if (!TARGET_64BIT)
9663     add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
9664   /* Indicate that this function can't jump to non-local gotos.  */
9665   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
9666 
9667   /* In order to make call/return prediction work right, we now need
9668      to execute a return instruction.  See
9669      libgcc/config/i386/morestack.S for the details on how this works.
9670 
9671      For flow purposes gcc must not see this as a return
9672      instruction--we need control flow to continue at the subsequent
9673      label.  Therefore, we use an unspec.  */
9674   gcc_assert (crtl->args.pops_args < 65536);
9675   rtx_insn *ret_insn
9676     = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
9677 
9678   if ((flag_cf_protection & CF_BRANCH))
9679     {
9680       /* Insert ENDBR since __morestack will jump back here via indirect
9681 	 call.  */
9682       rtx cet_eb = gen_nop_endbr ();
9683       emit_insn_after (cet_eb, ret_insn);
9684     }
9685 
9686   /* If we are in 64-bit mode and this function uses a static chain,
9687      we saved %r10 in %rax before calling _morestack.  */
9688   if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
9689     emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9690 		    gen_rtx_REG (word_mode, AX_REG));
9691 
9692   /* If this function calls va_start, we need to store a pointer to
9693      the arguments on the old stack, because they may not have been
9694      all copied to the new stack.  At this point the old stack can be
9695      found at the frame pointer value used by __morestack, because
9696      __morestack has set that up before calling back to us.  Here we
9697      store that pointer in a scratch register, and in
9698      ix86_expand_prologue we store the scratch register in a stack
9699      slot.  */
9700   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9701     {
9702       unsigned int scratch_regno;
9703       rtx frame_reg;
9704       int words;
9705 
9706       scratch_regno = split_stack_prologue_scratch_regno ();
9707       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9708       frame_reg = gen_rtx_REG (Pmode, BP_REG);
9709 
9710       /* 64-bit:
9711 	 fp -> old fp value
9712 	       return address within this function
9713 	       return address of caller of this function
9714 	       stack arguments
9715 	 So we add three words to get to the stack arguments.
9716 
9717 	 32-bit:
9718 	 fp -> old fp value
9719 	       return address within this function
9720                first argument to __morestack
9721                second argument to __morestack
9722                return address of caller of this function
9723                stack arguments
9724          So we add five words to get to the stack arguments.
9725       */
9726       words = TARGET_64BIT ? 3 : 5;
9727       emit_insn (gen_rtx_SET (scratch_reg,
9728 			      gen_rtx_PLUS (Pmode, frame_reg,
9729 					    GEN_INT (words * UNITS_PER_WORD))));
9730 
9731       varargs_label = gen_label_rtx ();
9732       emit_jump_insn (gen_jump (varargs_label));
9733       JUMP_LABEL (get_last_insn ()) = varargs_label;
9734 
9735       emit_barrier ();
9736     }
9737 
9738   emit_label (label);
9739   LABEL_NUSES (label) = 1;
9740 
9741   /* If this function calls va_start, we now have to set the scratch
9742      register for the case where we do not call __morestack.  In this
9743      case we need to set it based on the stack pointer.  */
9744   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9745     {
9746       emit_insn (gen_rtx_SET (scratch_reg,
9747 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9748 					    GEN_INT (UNITS_PER_WORD))));
9749 
9750       emit_label (varargs_label);
9751       LABEL_NUSES (varargs_label) = 1;
9752     }
9753 }
9754 
9755 /* We may have to tell the dataflow pass that the split stack prologue
9756    is initializing a scratch register.  */
9757 
9758 static void
ix86_live_on_entry(bitmap regs)9759 ix86_live_on_entry (bitmap regs)
9760 {
9761   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9762     {
9763       gcc_assert (flag_split_stack);
9764       bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
9765     }
9766 }
9767 
9768 /* Extract the parts of an RTL expression that is a valid memory address
9769    for an instruction.  Return 0 if the structure of the address is
9770    grossly off.  Return -1 if the address contains ASHIFT, so it is not
9771    strictly valid, but still used for computing length of lea instruction.  */
9772 
9773 int
ix86_decompose_address(rtx addr,struct ix86_address * out)9774 ix86_decompose_address (rtx addr, struct ix86_address *out)
9775 {
9776   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9777   rtx base_reg, index_reg;
9778   HOST_WIDE_INT scale = 1;
9779   rtx scale_rtx = NULL_RTX;
9780   rtx tmp;
9781   int retval = 1;
9782   addr_space_t seg = ADDR_SPACE_GENERIC;
9783 
9784   /* Allow zero-extended SImode addresses,
9785      they will be emitted with addr32 prefix.  */
9786   if (TARGET_64BIT && GET_MODE (addr) == DImode)
9787     {
9788       if (GET_CODE (addr) == ZERO_EXTEND
9789 	  && GET_MODE (XEXP (addr, 0)) == SImode)
9790 	{
9791 	  addr = XEXP (addr, 0);
9792 	  if (CONST_INT_P (addr))
9793 	    return 0;
9794 	}
9795       else if (GET_CODE (addr) == AND
9796 	       && const_32bit_mask (XEXP (addr, 1), DImode))
9797 	{
9798 	  addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
9799 	  if (addr == NULL_RTX)
9800 	    return 0;
9801 
9802 	  if (CONST_INT_P (addr))
9803 	    return 0;
9804 	}
9805     }
9806 
9807   /* Allow SImode subregs of DImode addresses,
9808      they will be emitted with addr32 prefix.  */
9809   if (TARGET_64BIT && GET_MODE (addr) == SImode)
9810     {
9811       if (SUBREG_P (addr)
9812 	  && GET_MODE (SUBREG_REG (addr)) == DImode)
9813 	{
9814 	  addr = SUBREG_REG (addr);
9815 	  if (CONST_INT_P (addr))
9816 	    return 0;
9817 	}
9818     }
9819 
9820   if (REG_P (addr))
9821     base = addr;
9822   else if (SUBREG_P (addr))
9823     {
9824       if (REG_P (SUBREG_REG (addr)))
9825 	base = addr;
9826       else
9827 	return 0;
9828     }
9829   else if (GET_CODE (addr) == PLUS)
9830     {
9831       rtx addends[4], op;
9832       int n = 0, i;
9833 
9834       op = addr;
9835       do
9836 	{
9837 	  if (n >= 4)
9838 	    return 0;
9839 	  addends[n++] = XEXP (op, 1);
9840 	  op = XEXP (op, 0);
9841 	}
9842       while (GET_CODE (op) == PLUS);
9843       if (n >= 4)
9844 	return 0;
9845       addends[n] = op;
9846 
9847       for (i = n; i >= 0; --i)
9848 	{
9849 	  op = addends[i];
9850 	  switch (GET_CODE (op))
9851 	    {
9852 	    case MULT:
9853 	      if (index)
9854 		return 0;
9855 	      index = XEXP (op, 0);
9856 	      scale_rtx = XEXP (op, 1);
9857 	      break;
9858 
9859 	    case ASHIFT:
9860 	      if (index)
9861 		return 0;
9862 	      index = XEXP (op, 0);
9863 	      tmp = XEXP (op, 1);
9864 	      if (!CONST_INT_P (tmp))
9865 		return 0;
9866 	      scale = INTVAL (tmp);
9867 	      if ((unsigned HOST_WIDE_INT) scale > 3)
9868 		return 0;
9869 	      scale = 1 << scale;
9870 	      break;
9871 
9872 	    case ZERO_EXTEND:
9873 	      op = XEXP (op, 0);
9874 	      if (GET_CODE (op) != UNSPEC)
9875 		return 0;
9876 	      /* FALLTHRU */
9877 
9878 	    case UNSPEC:
9879 	      if (XINT (op, 1) == UNSPEC_TP
9880 	          && TARGET_TLS_DIRECT_SEG_REFS
9881 	          && seg == ADDR_SPACE_GENERIC)
9882 		seg = DEFAULT_TLS_SEG_REG;
9883 	      else
9884 		return 0;
9885 	      break;
9886 
9887 	    case SUBREG:
9888 	      if (!REG_P (SUBREG_REG (op)))
9889 		return 0;
9890 	      /* FALLTHRU */
9891 
9892 	    case REG:
9893 	      if (!base)
9894 		base = op;
9895 	      else if (!index)
9896 		index = op;
9897 	      else
9898 		return 0;
9899 	      break;
9900 
9901 	    case CONST:
9902 	    case CONST_INT:
9903 	    case SYMBOL_REF:
9904 	    case LABEL_REF:
9905 	      if (disp)
9906 		return 0;
9907 	      disp = op;
9908 	      break;
9909 
9910 	    default:
9911 	      return 0;
9912 	    }
9913 	}
9914     }
9915   else if (GET_CODE (addr) == MULT)
9916     {
9917       index = XEXP (addr, 0);		/* index*scale */
9918       scale_rtx = XEXP (addr, 1);
9919     }
9920   else if (GET_CODE (addr) == ASHIFT)
9921     {
9922       /* We're called for lea too, which implements ashift on occasion.  */
9923       index = XEXP (addr, 0);
9924       tmp = XEXP (addr, 1);
9925       if (!CONST_INT_P (tmp))
9926 	return 0;
9927       scale = INTVAL (tmp);
9928       if ((unsigned HOST_WIDE_INT) scale > 3)
9929 	return 0;
9930       scale = 1 << scale;
9931       retval = -1;
9932     }
9933   else
9934     disp = addr;			/* displacement */
9935 
9936   if (index)
9937     {
9938       if (REG_P (index))
9939 	;
9940       else if (SUBREG_P (index)
9941 	       && REG_P (SUBREG_REG (index)))
9942 	;
9943       else
9944 	return 0;
9945     }
9946 
9947   /* Extract the integral value of scale.  */
9948   if (scale_rtx)
9949     {
9950       if (!CONST_INT_P (scale_rtx))
9951 	return 0;
9952       scale = INTVAL (scale_rtx);
9953     }
9954 
9955   base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
9956   index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
9957 
9958   /* Avoid useless 0 displacement.  */
9959   if (disp == const0_rtx && (base || index))
9960     disp = NULL_RTX;
9961 
9962   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
9963   if (base_reg && index_reg && scale == 1
9964       && (REGNO (index_reg) == ARG_POINTER_REGNUM
9965 	  || REGNO (index_reg) == FRAME_POINTER_REGNUM
9966 	  || REGNO (index_reg) == SP_REG))
9967     {
9968       std::swap (base, index);
9969       std::swap (base_reg, index_reg);
9970     }
9971 
9972   /* Special case: %ebp cannot be encoded as a base without a displacement.
9973      Similarly %r13.  */
9974   if (!disp && base_reg
9975       && (REGNO (base_reg) == ARG_POINTER_REGNUM
9976 	  || REGNO (base_reg) == FRAME_POINTER_REGNUM
9977 	  || REGNO (base_reg) == BP_REG
9978 	  || REGNO (base_reg) == R13_REG))
9979     disp = const0_rtx;
9980 
9981   /* Special case: on K6, [%esi] makes the instruction vector decoded.
9982      Avoid this by transforming to [%esi+0].
9983      Reload calls address legitimization without cfun defined, so we need
9984      to test cfun for being non-NULL. */
9985   if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9986       && base_reg && !index_reg && !disp
9987       && REGNO (base_reg) == SI_REG)
9988     disp = const0_rtx;
9989 
9990   /* Special case: encode reg+reg instead of reg*2.  */
9991   if (!base && index && scale == 2)
9992     base = index, base_reg = index_reg, scale = 1;
9993 
9994   /* Special case: scaling cannot be encoded without base or displacement.  */
9995   if (!base && !disp && index && scale != 1)
9996     disp = const0_rtx;
9997 
9998   out->base = base;
9999   out->index = index;
10000   out->disp = disp;
10001   out->scale = scale;
10002   out->seg = seg;
10003 
10004   return retval;
10005 }
10006 
10007 /* Return cost of the memory address x.
10008    For i386, it is better to use a complex address than let gcc copy
10009    the address into a reg and make a new pseudo.  But not if the address
10010    requires to two regs - that would mean more pseudos with longer
10011    lifetimes.  */
10012 static int
ix86_address_cost(rtx x,machine_mode,addr_space_t,bool)10013 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10014 {
10015   struct ix86_address parts;
10016   int cost = 1;
10017   int ok = ix86_decompose_address (x, &parts);
10018 
10019   gcc_assert (ok);
10020 
10021   if (parts.base && SUBREG_P (parts.base))
10022     parts.base = SUBREG_REG (parts.base);
10023   if (parts.index && SUBREG_P (parts.index))
10024     parts.index = SUBREG_REG (parts.index);
10025 
10026   /* Attempt to minimize number of registers in the address by increasing
10027      address cost for each used register.  We don't increase address cost
10028      for "pic_offset_table_rtx".  When a memopt with "pic_offset_table_rtx"
10029      is not invariant itself it most likely means that base or index is not
10030      invariant.  Therefore only "pic_offset_table_rtx" could be hoisted out,
10031      which is not profitable for x86.  */
10032   if (parts.base
10033       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10034       && (current_pass->type == GIMPLE_PASS
10035 	  || !pic_offset_table_rtx
10036 	  || !REG_P (parts.base)
10037 	  || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
10038     cost++;
10039 
10040   if (parts.index
10041       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10042       && (current_pass->type == GIMPLE_PASS
10043 	  || !pic_offset_table_rtx
10044 	  || !REG_P (parts.index)
10045 	  || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
10046     cost++;
10047 
10048   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10049      since it's predecode logic can't detect the length of instructions
10050      and it degenerates to vector decoded.  Increase cost of such
10051      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
10052      to split such addresses or even refuse such addresses at all.
10053 
10054      Following addressing modes are affected:
10055       [base+scale*index]
10056       [scale*index+disp]
10057       [base+index]
10058 
10059      The first and last case  may be avoidable by explicitly coding the zero in
10060      memory address, but I don't have AMD-K6 machine handy to check this
10061      theory.  */
10062 
10063   if (TARGET_K6
10064       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10065 	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10066 	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10067     cost += 10;
10068 
10069   return cost;
10070 }
10071 
10072 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10073    this is used for to form addresses to local data when -fPIC is in
10074    use.  */
10075 
10076 static bool
darwin_local_data_pic(rtx disp)10077 darwin_local_data_pic (rtx disp)
10078 {
10079   return (GET_CODE (disp) == UNSPEC
10080 	  && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10081 }
10082 
10083 /* True if operand X should be loaded from GOT.  */
10084 
10085 bool
ix86_force_load_from_GOT_p(rtx x)10086 ix86_force_load_from_GOT_p (rtx x)
10087 {
10088   return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
10089 	  && !TARGET_PECOFF && !TARGET_MACHO
10090 	  && !flag_pic
10091 	  && ix86_cmodel != CM_LARGE
10092 	  && GET_CODE (x) == SYMBOL_REF
10093 	  && SYMBOL_REF_FUNCTION_P (x)
10094 	  && (!flag_plt
10095 	      || (SYMBOL_REF_DECL (x)
10096 		  && lookup_attribute ("noplt",
10097 				       DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))
10098 	  && !SYMBOL_REF_LOCAL_P (x));
10099 }
10100 
10101 /* Determine if a given RTX is a valid constant.  We already know this
10102    satisfies CONSTANT_P.  */
10103 
10104 static bool
ix86_legitimate_constant_p(machine_mode mode,rtx x)10105 ix86_legitimate_constant_p (machine_mode mode, rtx x)
10106 {
10107   switch (GET_CODE (x))
10108     {
10109     case CONST:
10110       x = XEXP (x, 0);
10111 
10112       if (GET_CODE (x) == PLUS)
10113 	{
10114 	  if (!CONST_INT_P (XEXP (x, 1)))
10115 	    return false;
10116 	  x = XEXP (x, 0);
10117 	}
10118 
10119       if (TARGET_MACHO && darwin_local_data_pic (x))
10120 	return true;
10121 
10122       /* Only some unspecs are valid as "constants".  */
10123       if (GET_CODE (x) == UNSPEC)
10124 	switch (XINT (x, 1))
10125 	  {
10126 	  case UNSPEC_GOT:
10127 	  case UNSPEC_GOTOFF:
10128 	  case UNSPEC_PLTOFF:
10129 	    return TARGET_64BIT;
10130 	  case UNSPEC_TPOFF:
10131 	  case UNSPEC_NTPOFF:
10132 	    x = XVECEXP (x, 0, 0);
10133 	    return (GET_CODE (x) == SYMBOL_REF
10134 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10135 	  case UNSPEC_DTPOFF:
10136 	    x = XVECEXP (x, 0, 0);
10137 	    return (GET_CODE (x) == SYMBOL_REF
10138 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10139 	  default:
10140 	    return false;
10141 	  }
10142 
10143       /* We must have drilled down to a symbol.  */
10144       if (GET_CODE (x) == LABEL_REF)
10145 	return true;
10146       if (GET_CODE (x) != SYMBOL_REF)
10147 	return false;
10148       /* FALLTHRU */
10149 
10150     case SYMBOL_REF:
10151       /* TLS symbols are never valid.  */
10152       if (SYMBOL_REF_TLS_MODEL (x))
10153 	return false;
10154 
10155       /* DLLIMPORT symbols are never valid.  */
10156       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10157 	  && SYMBOL_REF_DLLIMPORT_P (x))
10158 	return false;
10159 
10160 #if TARGET_MACHO
10161       /* mdynamic-no-pic */
10162       if (MACHO_DYNAMIC_NO_PIC_P)
10163 	return machopic_symbol_defined_p (x);
10164 #endif
10165 
10166       /* External function address should be loaded
10167 	 via the GOT slot to avoid PLT.  */
10168       if (ix86_force_load_from_GOT_p (x))
10169 	return false;
10170 
10171       break;
10172 
10173     CASE_CONST_SCALAR_INT:
10174       switch (mode)
10175 	{
10176 	case E_TImode:
10177 	  if (TARGET_64BIT)
10178 	    return true;
10179 	  /* FALLTHRU */
10180 	case E_OImode:
10181 	case E_XImode:
10182 	  if (!standard_sse_constant_p (x, mode))
10183 	    return false;
10184 	default:
10185 	  break;
10186 	}
10187       break;
10188 
10189     case CONST_VECTOR:
10190       if (!standard_sse_constant_p (x, mode))
10191 	return false;
10192 
10193     default:
10194       break;
10195     }
10196 
10197   /* Otherwise we handle everything else in the move patterns.  */
10198   return true;
10199 }
10200 
10201 /* Determine if it's legal to put X into the constant pool.  This
10202    is not possible for the address of thread-local symbols, which
10203    is checked above.  */
10204 
10205 static bool
ix86_cannot_force_const_mem(machine_mode mode,rtx x)10206 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
10207 {
10208   /* We can put any immediate constant in memory.  */
10209   switch (GET_CODE (x))
10210     {
10211     CASE_CONST_ANY:
10212       return false;
10213 
10214     default:
10215       break;
10216     }
10217 
10218   return !ix86_legitimate_constant_p (mode, x);
10219 }
10220 
10221 /*  Nonzero if the symbol is marked as dllimport, or as stub-variable,
10222     otherwise zero.  */
10223 
10224 static bool
is_imported_p(rtx x)10225 is_imported_p (rtx x)
10226 {
10227   if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10228       || GET_CODE (x) != SYMBOL_REF)
10229     return false;
10230 
10231   return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
10232 }
10233 
10234 
10235 /* Nonzero if the constant value X is a legitimate general operand
10236    when generating PIC code.  It is given that flag_pic is on and
10237    that X satisfies CONSTANT_P.  */
10238 
10239 bool
legitimate_pic_operand_p(rtx x)10240 legitimate_pic_operand_p (rtx x)
10241 {
10242   rtx inner;
10243 
10244   switch (GET_CODE (x))
10245     {
10246     case CONST:
10247       inner = XEXP (x, 0);
10248       if (GET_CODE (inner) == PLUS
10249 	  && CONST_INT_P (XEXP (inner, 1)))
10250 	inner = XEXP (inner, 0);
10251 
10252       /* Only some unspecs are valid as "constants".  */
10253       if (GET_CODE (inner) == UNSPEC)
10254 	switch (XINT (inner, 1))
10255 	  {
10256 	  case UNSPEC_GOT:
10257 	  case UNSPEC_GOTOFF:
10258 	  case UNSPEC_PLTOFF:
10259 	    return TARGET_64BIT;
10260 	  case UNSPEC_TPOFF:
10261 	    x = XVECEXP (inner, 0, 0);
10262 	    return (GET_CODE (x) == SYMBOL_REF
10263 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10264 	  case UNSPEC_MACHOPIC_OFFSET:
10265 	    return legitimate_pic_address_disp_p (x);
10266 	  default:
10267 	    return false;
10268 	  }
10269       /* FALLTHRU */
10270 
10271     case SYMBOL_REF:
10272     case LABEL_REF:
10273       return legitimate_pic_address_disp_p (x);
10274 
10275     default:
10276       return true;
10277     }
10278 }
10279 
10280 /* Determine if a given CONST RTX is a valid memory displacement
10281    in PIC mode.  */
10282 
10283 bool
legitimate_pic_address_disp_p(rtx disp)10284 legitimate_pic_address_disp_p (rtx disp)
10285 {
10286   bool saw_plus;
10287 
10288   /* In 64bit mode we can allow direct addresses of symbols and labels
10289      when they are not dynamic symbols.  */
10290   if (TARGET_64BIT)
10291     {
10292       rtx op0 = disp, op1;
10293 
10294       switch (GET_CODE (disp))
10295 	{
10296 	case LABEL_REF:
10297 	  return true;
10298 
10299 	case CONST:
10300 	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
10301 	    break;
10302 	  op0 = XEXP (XEXP (disp, 0), 0);
10303 	  op1 = XEXP (XEXP (disp, 0), 1);
10304 	  if (!CONST_INT_P (op1))
10305 	    break;
10306 	  if (GET_CODE (op0) == UNSPEC
10307 	      && (XINT (op0, 1) == UNSPEC_DTPOFF
10308 		  || XINT (op0, 1) == UNSPEC_NTPOFF)
10309 	      && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10310 	    return true;
10311 	  if (INTVAL (op1) >= 16*1024*1024
10312 	      || INTVAL (op1) < -16*1024*1024)
10313 	    break;
10314 	  if (GET_CODE (op0) == LABEL_REF)
10315 	    return true;
10316 	  if (GET_CODE (op0) == CONST
10317 	      && GET_CODE (XEXP (op0, 0)) == UNSPEC
10318 	      && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10319 	    return true;
10320 	  if (GET_CODE (op0) == UNSPEC
10321 	      && XINT (op0, 1) == UNSPEC_PCREL)
10322 	    return true;
10323 	  if (GET_CODE (op0) != SYMBOL_REF)
10324 	    break;
10325 	  /* FALLTHRU */
10326 
10327 	case SYMBOL_REF:
10328 	  /* TLS references should always be enclosed in UNSPEC.
10329 	     The dllimported symbol needs always to be resolved.  */
10330 	  if (SYMBOL_REF_TLS_MODEL (op0)
10331 	      || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10332 	    return false;
10333 
10334 	  if (TARGET_PECOFF)
10335 	    {
10336 	      if (is_imported_p (op0))
10337 		return true;
10338 
10339 	      if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
10340 		break;
10341 
10342 	      /* Non-external-weak function symbols need to be resolved only
10343 		 for the large model.  Non-external symbols don't need to be
10344 		 resolved for large and medium models.  For the small model,
10345 		 we don't need to resolve anything here.  */
10346 	      if ((ix86_cmodel != CM_LARGE_PIC
10347 		   && SYMBOL_REF_FUNCTION_P (op0)
10348 		   && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
10349 		  || !SYMBOL_REF_EXTERNAL_P (op0)
10350 		  || ix86_cmodel == CM_SMALL_PIC)
10351 		return true;
10352 	    }
10353 	  else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10354 		   && (SYMBOL_REF_LOCAL_P (op0)
10355 		       || (HAVE_LD_PIE_COPYRELOC
10356 			   && flag_pie
10357 			   && !SYMBOL_REF_WEAK (op0)
10358 			   && !SYMBOL_REF_FUNCTION_P (op0)))
10359 		   && ix86_cmodel != CM_LARGE_PIC)
10360 	    return true;
10361 	  break;
10362 
10363 	default:
10364 	  break;
10365 	}
10366     }
10367   if (GET_CODE (disp) != CONST)
10368     return false;
10369   disp = XEXP (disp, 0);
10370 
10371   if (TARGET_64BIT)
10372     {
10373       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
10374          of GOT tables.  We should not need these anyway.  */
10375       if (GET_CODE (disp) != UNSPEC
10376 	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
10377 	      && XINT (disp, 1) != UNSPEC_GOTOFF
10378 	      && XINT (disp, 1) != UNSPEC_PCREL
10379 	      && XINT (disp, 1) != UNSPEC_PLTOFF))
10380 	return false;
10381 
10382       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10383 	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10384 	return false;
10385       return true;
10386     }
10387 
10388   saw_plus = false;
10389   if (GET_CODE (disp) == PLUS)
10390     {
10391       if (!CONST_INT_P (XEXP (disp, 1)))
10392 	return false;
10393       disp = XEXP (disp, 0);
10394       saw_plus = true;
10395     }
10396 
10397   if (TARGET_MACHO && darwin_local_data_pic (disp))
10398     return true;
10399 
10400   if (GET_CODE (disp) != UNSPEC)
10401     return false;
10402 
10403   switch (XINT (disp, 1))
10404     {
10405     case UNSPEC_GOT:
10406       if (saw_plus)
10407 	return false;
10408       /* We need to check for both symbols and labels because VxWorks loads
10409 	 text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
10410 	 details.  */
10411       return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10412 	      || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10413     case UNSPEC_GOTOFF:
10414       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10415 	 While ABI specify also 32bit relocation but we don't produce it in
10416 	 small PIC model at all.  */
10417       if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10418 	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10419 	  && !TARGET_64BIT)
10420         return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10421       return false;
10422     case UNSPEC_GOTTPOFF:
10423     case UNSPEC_GOTNTPOFF:
10424     case UNSPEC_INDNTPOFF:
10425       if (saw_plus)
10426 	return false;
10427       disp = XVECEXP (disp, 0, 0);
10428       return (GET_CODE (disp) == SYMBOL_REF
10429 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10430     case UNSPEC_NTPOFF:
10431       disp = XVECEXP (disp, 0, 0);
10432       return (GET_CODE (disp) == SYMBOL_REF
10433 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10434     case UNSPEC_DTPOFF:
10435       disp = XVECEXP (disp, 0, 0);
10436       return (GET_CODE (disp) == SYMBOL_REF
10437 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10438     }
10439 
10440   return false;
10441 }
10442 
10443 /* Determine if op is suitable RTX for an address register.
10444    Return naked register if a register or a register subreg is
10445    found, otherwise return NULL_RTX.  */
10446 
10447 static rtx
ix86_validate_address_register(rtx op)10448 ix86_validate_address_register (rtx op)
10449 {
10450   machine_mode mode = GET_MODE (op);
10451 
10452   /* Only SImode or DImode registers can form the address.  */
10453   if (mode != SImode && mode != DImode)
10454     return NULL_RTX;
10455 
10456   if (REG_P (op))
10457     return op;
10458   else if (SUBREG_P (op))
10459     {
10460       rtx reg = SUBREG_REG (op);
10461 
10462       if (!REG_P (reg))
10463 	return NULL_RTX;
10464 
10465       mode = GET_MODE (reg);
10466 
10467       /* Don't allow SUBREGs that span more than a word.  It can
10468 	 lead to spill failures when the register is one word out
10469 	 of a two word structure.  */
10470       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10471 	return NULL_RTX;
10472 
10473       /* Allow only SUBREGs of non-eliminable hard registers.  */
10474       if (register_no_elim_operand (reg, mode))
10475 	return reg;
10476     }
10477 
10478   /* Op is not a register.  */
10479   return NULL_RTX;
10480 }
10481 
10482 /* Recognizes RTL expressions that are valid memory addresses for an
10483    instruction.  The MODE argument is the machine mode for the MEM
10484    expression that wants to use this address.
10485 
10486    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
10487    convert common non-canonical forms to canonical form so that they will
10488    be recognized.  */
10489 
10490 static bool
ix86_legitimate_address_p(machine_mode,rtx addr,bool strict)10491 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
10492 {
10493   struct ix86_address parts;
10494   rtx base, index, disp;
10495   HOST_WIDE_INT scale;
10496   addr_space_t seg;
10497 
10498   if (ix86_decompose_address (addr, &parts) <= 0)
10499     /* Decomposition failed.  */
10500     return false;
10501 
10502   base = parts.base;
10503   index = parts.index;
10504   disp = parts.disp;
10505   scale = parts.scale;
10506   seg = parts.seg;
10507 
10508   /* Validate base register.  */
10509   if (base)
10510     {
10511       rtx reg = ix86_validate_address_register (base);
10512 
10513       if (reg == NULL_RTX)
10514 	return false;
10515 
10516       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10517 	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10518 	/* Base is not valid.  */
10519 	return false;
10520     }
10521 
10522   /* Validate index register.  */
10523   if (index)
10524     {
10525       rtx reg = ix86_validate_address_register (index);
10526 
10527       if (reg == NULL_RTX)
10528 	return false;
10529 
10530       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10531 	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10532 	/* Index is not valid.  */
10533 	return false;
10534     }
10535 
10536   /* Index and base should have the same mode.  */
10537   if (base && index
10538       && GET_MODE (base) != GET_MODE (index))
10539     return false;
10540 
10541   /* Address override works only on the (%reg) part of %fs:(%reg).  */
10542   if (seg != ADDR_SPACE_GENERIC
10543       && ((base && GET_MODE (base) != word_mode)
10544 	  || (index && GET_MODE (index) != word_mode)))
10545     return false;
10546 
10547   /* Validate scale factor.  */
10548   if (scale != 1)
10549     {
10550       if (!index)
10551 	/* Scale without index.  */
10552 	return false;
10553 
10554       if (scale != 2 && scale != 4 && scale != 8)
10555 	/* Scale is not a valid multiplier.  */
10556 	return false;
10557     }
10558 
10559   /* Validate displacement.  */
10560   if (disp)
10561     {
10562       if (GET_CODE (disp) == CONST
10563 	  && GET_CODE (XEXP (disp, 0)) == UNSPEC
10564 	  && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10565 	switch (XINT (XEXP (disp, 0), 1))
10566 	  {
10567 	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10568 	     when used.  While ABI specify also 32bit relocations, we
10569 	     don't produce them at all and use IP relative instead.
10570 	     Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10571 	     should be loaded via GOT.  */
10572 	  case UNSPEC_GOT:
10573 	    if (!TARGET_64BIT
10574 		&& ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10575 	      goto is_legitimate_pic;
10576 	    /* FALLTHRU */
10577 	  case UNSPEC_GOTOFF:
10578 	    gcc_assert (flag_pic);
10579 	    if (!TARGET_64BIT)
10580 	      goto is_legitimate_pic;
10581 
10582 	    /* 64bit address unspec.  */
10583 	    return false;
10584 
10585 	  case UNSPEC_GOTPCREL:
10586 	    if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10587 	      goto is_legitimate_pic;
10588 	    /* FALLTHRU */
10589 	  case UNSPEC_PCREL:
10590 	    gcc_assert (flag_pic);
10591 	    goto is_legitimate_pic;
10592 
10593 	  case UNSPEC_GOTTPOFF:
10594 	  case UNSPEC_GOTNTPOFF:
10595 	  case UNSPEC_INDNTPOFF:
10596 	  case UNSPEC_NTPOFF:
10597 	  case UNSPEC_DTPOFF:
10598 	    break;
10599 
10600 	  default:
10601 	    /* Invalid address unspec.  */
10602 	    return false;
10603 	  }
10604 
10605       else if (SYMBOLIC_CONST (disp)
10606 	       && (flag_pic
10607 		   || (TARGET_MACHO
10608 #if TARGET_MACHO
10609 		       && MACHOPIC_INDIRECT
10610 		       && !machopic_operand_p (disp)
10611 #endif
10612 	       )))
10613 	{
10614 
10615 	is_legitimate_pic:
10616 	  if (TARGET_64BIT && (index || base))
10617 	    {
10618 	      /* foo@dtpoff(%rX) is ok.  */
10619 	      if (GET_CODE (disp) != CONST
10620 		  || GET_CODE (XEXP (disp, 0)) != PLUS
10621 		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10622 		  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10623 		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10624 		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10625 		/* Non-constant pic memory reference.  */
10626 		return false;
10627 	    }
10628 	  else if ((!TARGET_MACHO || flag_pic)
10629 		    && ! legitimate_pic_address_disp_p (disp))
10630 	    /* Displacement is an invalid pic construct.  */
10631 	    return false;
10632 #if TARGET_MACHO
10633 	  else if (MACHO_DYNAMIC_NO_PIC_P
10634 		   && !ix86_legitimate_constant_p (Pmode, disp))
10635 	    /* displacment must be referenced via non_lazy_pointer */
10636 	    return false;
10637 #endif
10638 
10639           /* This code used to verify that a symbolic pic displacement
10640 	     includes the pic_offset_table_rtx register.
10641 
10642 	     While this is good idea, unfortunately these constructs may
10643 	     be created by "adds using lea" optimization for incorrect
10644 	     code like:
10645 
10646 	     int a;
10647 	     int foo(int i)
10648 	       {
10649 	         return *(&a+i);
10650 	       }
10651 
10652 	     This code is nonsensical, but results in addressing
10653 	     GOT table with pic_offset_table_rtx base.  We can't
10654 	     just refuse it easily, since it gets matched by
10655 	     "addsi3" pattern, that later gets split to lea in the
10656 	     case output register differs from input.  While this
10657 	     can be handled by separate addsi pattern for this case
10658 	     that never results in lea, this seems to be easier and
10659 	     correct fix for crash to disable this test.  */
10660 	}
10661       else if (GET_CODE (disp) != LABEL_REF
10662 	       && !CONST_INT_P (disp)
10663 	       && (GET_CODE (disp) != CONST
10664 		   || !ix86_legitimate_constant_p (Pmode, disp))
10665 	       && (GET_CODE (disp) != SYMBOL_REF
10666 		   || !ix86_legitimate_constant_p (Pmode, disp)))
10667 	/* Displacement is not constant.  */
10668 	return false;
10669       else if (TARGET_64BIT
10670 	       && !x86_64_immediate_operand (disp, VOIDmode))
10671 	/* Displacement is out of range.  */
10672 	return false;
10673       /* In x32 mode, constant addresses are sign extended to 64bit, so
10674 	 we have to prevent addresses from 0x80000000 to 0xffffffff.  */
10675       else if (TARGET_X32 && !(index || base)
10676 	       && CONST_INT_P (disp)
10677 	       && val_signbit_known_set_p (SImode, INTVAL (disp)))
10678 	return false;
10679     }
10680 
10681   /* Everything looks valid.  */
10682   return true;
10683 }
10684 
10685 /* Determine if a given RTX is a valid constant address.  */
10686 
10687 bool
constant_address_p(rtx x)10688 constant_address_p (rtx x)
10689 {
10690   return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10691 }
10692 
10693 /* Return a unique alias set for the GOT.  */
10694 
10695 alias_set_type
ix86_GOT_alias_set(void)10696 ix86_GOT_alias_set (void)
10697 {
10698   static alias_set_type set = -1;
10699   if (set == -1)
10700     set = new_alias_set ();
10701   return set;
10702 }
10703 
10704 /* Return a legitimate reference for ORIG (an address) using the
10705    register REG.  If REG is 0, a new pseudo is generated.
10706 
10707    There are two types of references that must be handled:
10708 
10709    1. Global data references must load the address from the GOT, via
10710       the PIC reg.  An insn is emitted to do this load, and the reg is
10711       returned.
10712 
10713    2. Static data references, constant pool addresses, and code labels
10714       compute the address as an offset from the GOT, whose base is in
10715       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
10716       differentiate them from global data objects.  The returned
10717       address is the PIC reg + an unspec constant.
10718 
10719    TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10720    reg also appears in the address.  */
10721 
10722 rtx
legitimize_pic_address(rtx orig,rtx reg)10723 legitimize_pic_address (rtx orig, rtx reg)
10724 {
10725   rtx addr = orig;
10726   rtx new_rtx = orig;
10727 
10728 #if TARGET_MACHO
10729   if (TARGET_MACHO && !TARGET_64BIT)
10730     {
10731       if (reg == 0)
10732 	reg = gen_reg_rtx (Pmode);
10733       /* Use the generic Mach-O PIC machinery.  */
10734       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10735     }
10736 #endif
10737 
10738   if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10739     {
10740       rtx tmp = legitimize_pe_coff_symbol (addr, true);
10741       if (tmp)
10742         return tmp;
10743     }
10744 
10745   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10746     new_rtx = addr;
10747   else if ((!TARGET_64BIT
10748 	    || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
10749 	   && !TARGET_PECOFF
10750 	   && gotoff_operand (addr, Pmode))
10751     {
10752       /* This symbol may be referenced via a displacement
10753 	 from the PIC base address (@GOTOFF).  */
10754       if (GET_CODE (addr) == CONST)
10755 	addr = XEXP (addr, 0);
10756 
10757       if (GET_CODE (addr) == PLUS)
10758 	  {
10759             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10760 				      UNSPEC_GOTOFF);
10761 	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10762 	  }
10763 	else
10764           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10765 
10766       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10767 
10768       if (TARGET_64BIT)
10769 	new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10770 
10771       if (reg != 0)
10772 	{
10773  	  gcc_assert (REG_P (reg));
10774 	  new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
10775 					 new_rtx, reg, 1, OPTAB_DIRECT);
10776  	}
10777       else
10778 	new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10779     }
10780   else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10781 	   /* We can't use @GOTOFF for text labels
10782 	      on VxWorks, see gotoff_operand.  */
10783 	   || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10784     {
10785       rtx tmp = legitimize_pe_coff_symbol (addr, true);
10786       if (tmp)
10787         return tmp;
10788 
10789       /* For x64 PE-COFF there is no GOT table,
10790 	 so we use address directly.  */
10791       if (TARGET_64BIT && TARGET_PECOFF)
10792 	{
10793 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
10794 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10795 	}
10796       else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10797 	{
10798 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
10799 				    UNSPEC_GOTPCREL);
10800 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10801 	  new_rtx = gen_const_mem (Pmode, new_rtx);
10802 	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10803 	}
10804       else
10805 	{
10806 	  /* This symbol must be referenced via a load
10807 	     from the Global Offset Table (@GOT).  */
10808 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10809 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10810 	  if (TARGET_64BIT)
10811 	    new_rtx = force_reg (Pmode, new_rtx);
10812 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10813 	  new_rtx = gen_const_mem (Pmode, new_rtx);
10814 	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10815 	}
10816 
10817       new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10818     }
10819   else
10820     {
10821       if (CONST_INT_P (addr)
10822 	  && !x86_64_immediate_operand (addr, VOIDmode))
10823 	new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
10824       else if (GET_CODE (addr) == CONST)
10825 	{
10826 	  addr = XEXP (addr, 0);
10827 
10828 	  /* We must match stuff we generate before.  Assume the only
10829 	     unspecs that can get here are ours.  Not that we could do
10830 	     anything with them anyway....  */
10831 	  if (GET_CODE (addr) == UNSPEC
10832 	      || (GET_CODE (addr) == PLUS
10833 		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10834 	    return orig;
10835 	  gcc_assert (GET_CODE (addr) == PLUS);
10836 	}
10837 
10838       if (GET_CODE (addr) == PLUS)
10839 	{
10840 	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10841 
10842 	  /* Check first to see if this is a constant
10843 	     offset from a @GOTOFF symbol reference.  */
10844 	  if (!TARGET_PECOFF
10845 	      && gotoff_operand (op0, Pmode)
10846 	      && CONST_INT_P (op1))
10847 	    {
10848 	      if (!TARGET_64BIT)
10849 		{
10850 		  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10851 					    UNSPEC_GOTOFF);
10852 		  new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10853 		  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10854 
10855 		  if (reg != 0)
10856 		    {
10857 		      gcc_assert (REG_P (reg));
10858 		      new_rtx = expand_simple_binop (Pmode, PLUS,
10859 						     pic_offset_table_rtx,
10860 						     new_rtx, reg, 1,
10861 						     OPTAB_DIRECT);
10862 		    }
10863 		  else
10864 		    new_rtx
10865 		      = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10866 		}
10867 	      else
10868 		{
10869 		  if (INTVAL (op1) < -16*1024*1024
10870 		      || INTVAL (op1) >= 16*1024*1024)
10871 		    {
10872 		      if (!x86_64_immediate_operand (op1, Pmode))
10873 			op1 = force_reg (Pmode, op1);
10874 
10875 		      new_rtx
10876 			= gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10877 		    }
10878 		}
10879 	    }
10880 	  else
10881 	    {
10882 	      rtx base = legitimize_pic_address (op0, reg);
10883 	      machine_mode mode = GET_MODE (base);
10884 	      new_rtx
10885 	        = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
10886 
10887 	      if (CONST_INT_P (new_rtx))
10888 		{
10889 		  if (INTVAL (new_rtx) < -16*1024*1024
10890 		      || INTVAL (new_rtx) >= 16*1024*1024)
10891 		    {
10892 		      if (!x86_64_immediate_operand (new_rtx, mode))
10893 			new_rtx = force_reg (mode, new_rtx);
10894 
10895 		      new_rtx
10896 		        = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
10897 		    }
10898 		  else
10899 		    new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
10900 		}
10901 	      else
10902 		{
10903 		  /* For %rip addressing, we have to use
10904 		     just disp32, not base nor index.  */
10905 		  if (TARGET_64BIT
10906 		      && (GET_CODE (base) == SYMBOL_REF
10907 			  || GET_CODE (base) == LABEL_REF))
10908 		    base = force_reg (mode, base);
10909 		  if (GET_CODE (new_rtx) == PLUS
10910 		      && CONSTANT_P (XEXP (new_rtx, 1)))
10911 		    {
10912 		      base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
10913 		      new_rtx = XEXP (new_rtx, 1);
10914 		    }
10915 		  new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
10916 		}
10917 	    }
10918 	}
10919     }
10920   return new_rtx;
10921 }
10922 
10923 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
10924 
10925 static rtx
get_thread_pointer(machine_mode tp_mode,bool to_reg)10926 get_thread_pointer (machine_mode tp_mode, bool to_reg)
10927 {
10928   rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10929 
10930   if (GET_MODE (tp) != tp_mode)
10931     {
10932       gcc_assert (GET_MODE (tp) == SImode);
10933       gcc_assert (tp_mode == DImode);
10934 
10935       tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
10936     }
10937 
10938   if (to_reg)
10939     tp = copy_to_mode_reg (tp_mode, tp);
10940 
10941   return tp;
10942 }
10943 
10944 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
10945 
10946 static GTY(()) rtx ix86_tls_symbol;
10947 
10948 static rtx
ix86_tls_get_addr(void)10949 ix86_tls_get_addr (void)
10950 {
10951   if (!ix86_tls_symbol)
10952     {
10953       const char *sym
10954 	= ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
10955 	   ? "___tls_get_addr" : "__tls_get_addr");
10956 
10957       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
10958     }
10959 
10960   if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
10961     {
10962       rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
10963 				   UNSPEC_PLTOFF);
10964       return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10965 			   gen_rtx_CONST (Pmode, unspec));
10966     }
10967 
10968   return ix86_tls_symbol;
10969 }
10970 
10971 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
10972 
10973 static GTY(()) rtx ix86_tls_module_base_symbol;
10974 
10975 rtx
ix86_tls_module_base(void)10976 ix86_tls_module_base (void)
10977 {
10978   if (!ix86_tls_module_base_symbol)
10979     {
10980       ix86_tls_module_base_symbol
10981 	= gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
10982 
10983       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
10984 	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
10985     }
10986 
10987   return ix86_tls_module_base_symbol;
10988 }
10989 
10990 /* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
10991    false if we expect this to be used for a memory address and true if
10992    we expect to load the address into a register.  */
10993 
10994 rtx
legitimize_tls_address(rtx x,enum tls_model model,bool for_mov)10995 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
10996 {
10997   rtx dest, base, off;
10998   rtx pic = NULL_RTX, tp = NULL_RTX;
10999   machine_mode tp_mode = Pmode;
11000   int type;
11001 
11002   /* Fall back to global dynamic model if tool chain cannot support local
11003      dynamic.  */
11004   if (TARGET_SUN_TLS && !TARGET_64BIT
11005       && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
11006       && model == TLS_MODEL_LOCAL_DYNAMIC)
11007     model = TLS_MODEL_GLOBAL_DYNAMIC;
11008 
11009   switch (model)
11010     {
11011     case TLS_MODEL_GLOBAL_DYNAMIC:
11012       if (!TARGET_64BIT)
11013 	{
11014 	  if (flag_pic && !TARGET_PECOFF)
11015 	    pic = pic_offset_table_rtx;
11016 	  else
11017 	    {
11018 	      pic = gen_reg_rtx (Pmode);
11019 	      emit_insn (gen_set_got (pic));
11020 	    }
11021 	}
11022 
11023       if (TARGET_GNU2_TLS)
11024 	{
11025 	  dest = gen_reg_rtx (ptr_mode);
11026 	  if (TARGET_64BIT)
11027 	    emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
11028 	  else
11029 	    emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
11030 
11031 	  tp = get_thread_pointer (ptr_mode, true);
11032 	  dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11033 	  if (GET_MODE (dest) != Pmode)
11034 	     dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11035 	  dest = force_reg (Pmode, dest);
11036 
11037 	  if (GET_MODE (x) != Pmode)
11038 	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
11039 
11040 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11041 	}
11042       else
11043 	{
11044 	  rtx caddr = ix86_tls_get_addr ();
11045 
11046 	  dest = gen_reg_rtx (Pmode);
11047 	  if (TARGET_64BIT)
11048 	    {
11049 	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
11050 	      rtx_insn *insns;
11051 
11052 	      start_sequence ();
11053 	      emit_call_insn
11054 		(gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
11055 	      insns = get_insns ();
11056 	      end_sequence ();
11057 
11058 	      if (GET_MODE (x) != Pmode)
11059 		x = gen_rtx_ZERO_EXTEND (Pmode, x);
11060 
11061 	      RTL_CONST_CALL_P (insns) = 1;
11062 	      emit_libcall_block (insns, dest, rax, x);
11063 	    }
11064 	  else
11065 	    emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
11066 	}
11067       break;
11068 
11069     case TLS_MODEL_LOCAL_DYNAMIC:
11070       if (!TARGET_64BIT)
11071 	{
11072 	  if (flag_pic)
11073 	    pic = pic_offset_table_rtx;
11074 	  else
11075 	    {
11076 	      pic = gen_reg_rtx (Pmode);
11077 	      emit_insn (gen_set_got (pic));
11078 	    }
11079 	}
11080 
11081       if (TARGET_GNU2_TLS)
11082 	{
11083 	  rtx tmp = ix86_tls_module_base ();
11084 
11085 	  base = gen_reg_rtx (ptr_mode);
11086 	  if (TARGET_64BIT)
11087 	    emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
11088 	  else
11089 	    emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
11090 
11091 	  tp = get_thread_pointer (ptr_mode, true);
11092 	  if (GET_MODE (base) != Pmode)
11093 	    base = gen_rtx_ZERO_EXTEND (Pmode, base);
11094 	  base = force_reg (Pmode, base);
11095 	}
11096       else
11097 	{
11098 	  rtx caddr = ix86_tls_get_addr ();
11099 
11100 	  base = gen_reg_rtx (Pmode);
11101 	  if (TARGET_64BIT)
11102 	    {
11103 	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
11104 	      rtx_insn *insns;
11105 	      rtx eqv;
11106 
11107 	      start_sequence ();
11108 	      emit_call_insn
11109 		(gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
11110 	      insns = get_insns ();
11111 	      end_sequence ();
11112 
11113 	      /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11114 		 share the LD_BASE result with other LD model accesses.  */
11115 	      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11116 				    UNSPEC_TLS_LD_BASE);
11117 
11118 	      RTL_CONST_CALL_P (insns) = 1;
11119 	      emit_libcall_block (insns, base, rax, eqv);
11120 	    }
11121 	  else
11122 	    emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
11123 	}
11124 
11125       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11126       off = gen_rtx_CONST (Pmode, off);
11127 
11128       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11129 
11130       if (TARGET_GNU2_TLS)
11131 	{
11132 	  if (GET_MODE (tp) != Pmode)
11133 	    {
11134 	      dest = lowpart_subreg (ptr_mode, dest, Pmode);
11135 	      dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11136 	      dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11137 	    }
11138 	  else
11139 	    dest = gen_rtx_PLUS (Pmode, tp, dest);
11140 	  dest = force_reg (Pmode, dest);
11141 
11142 	  if (GET_MODE (x) != Pmode)
11143 	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
11144 
11145 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11146 	}
11147       break;
11148 
11149     case TLS_MODEL_INITIAL_EXEC:
11150       if (TARGET_64BIT)
11151 	{
11152 	  if (TARGET_SUN_TLS && !TARGET_X32)
11153 	    {
11154 	      /* The Sun linker took the AMD64 TLS spec literally
11155 		 and can only handle %rax as destination of the
11156 		 initial executable code sequence.  */
11157 
11158 	      dest = gen_reg_rtx (DImode);
11159 	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
11160 	      return dest;
11161 	    }
11162 
11163 	  /* Generate DImode references to avoid %fs:(%reg32)
11164 	     problems and linker IE->LE relaxation bug.  */
11165 	  tp_mode = DImode;
11166 	  pic = NULL;
11167 	  type = UNSPEC_GOTNTPOFF;
11168 	}
11169       else if (flag_pic)
11170 	{
11171 	  pic = pic_offset_table_rtx;
11172 	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11173 	}
11174       else if (!TARGET_ANY_GNU_TLS)
11175 	{
11176 	  pic = gen_reg_rtx (Pmode);
11177 	  emit_insn (gen_set_got (pic));
11178 	  type = UNSPEC_GOTTPOFF;
11179 	}
11180       else
11181 	{
11182 	  pic = NULL;
11183 	  type = UNSPEC_INDNTPOFF;
11184 	}
11185 
11186       off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
11187       off = gen_rtx_CONST (tp_mode, off);
11188       if (pic)
11189 	off = gen_rtx_PLUS (tp_mode, pic, off);
11190       off = gen_const_mem (tp_mode, off);
11191       set_mem_alias_set (off, ix86_GOT_alias_set ());
11192 
11193       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11194 	{
11195 	  base = get_thread_pointer (tp_mode,
11196 				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11197 	  off = force_reg (tp_mode, off);
11198 	  dest = gen_rtx_PLUS (tp_mode, base, off);
11199 	  if (tp_mode != Pmode)
11200 	    dest = convert_to_mode (Pmode, dest, 1);
11201 	}
11202       else
11203 	{
11204 	  base = get_thread_pointer (Pmode, true);
11205 	  dest = gen_reg_rtx (Pmode);
11206 	  emit_insn (gen_sub3_insn (dest, base, off));
11207 	}
11208       break;
11209 
11210     case TLS_MODEL_LOCAL_EXEC:
11211       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11212 			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11213 			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11214       off = gen_rtx_CONST (Pmode, off);
11215 
11216       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11217 	{
11218 	  base = get_thread_pointer (Pmode,
11219 				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11220 	  return gen_rtx_PLUS (Pmode, base, off);
11221 	}
11222       else
11223 	{
11224 	  base = get_thread_pointer (Pmode, true);
11225 	  dest = gen_reg_rtx (Pmode);
11226 	  emit_insn (gen_sub3_insn (dest, base, off));
11227 	}
11228       break;
11229 
11230     default:
11231       gcc_unreachable ();
11232     }
11233 
11234   return dest;
11235 }
11236 
11237 /* Return true if OP refers to a TLS address.  */
11238 bool
ix86_tls_address_pattern_p(rtx op)11239 ix86_tls_address_pattern_p (rtx op)
11240 {
11241   subrtx_var_iterator::array_type array;
11242   FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
11243     {
11244       rtx op = *iter;
11245       if (MEM_P (op))
11246 	{
11247 	  rtx *x = &XEXP (op, 0);
11248 	  while (GET_CODE (*x) == PLUS)
11249 	    {
11250 	      int i;
11251 	      for (i = 0; i < 2; i++)
11252 		{
11253 		  rtx u = XEXP (*x, i);
11254 		  if (GET_CODE (u) == ZERO_EXTEND)
11255 		    u = XEXP (u, 0);
11256 		  if (GET_CODE (u) == UNSPEC
11257 		      && XINT (u, 1) == UNSPEC_TP)
11258 		    return true;
11259 		}
11260 	      x = &XEXP (*x, 0);
11261 	    }
11262 
11263 	  iter.skip_subrtxes ();
11264 	}
11265     }
11266 
11267   return false;
11268 }
11269 
11270 /* Rewrite *LOC so that it refers to a default TLS address space.  */
11271 void
ix86_rewrite_tls_address_1(rtx * loc)11272 ix86_rewrite_tls_address_1 (rtx *loc)
11273 {
11274   subrtx_ptr_iterator::array_type array;
11275   FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
11276     {
11277       rtx *loc = *iter;
11278       if (MEM_P (*loc))
11279 	{
11280 	  rtx addr = XEXP (*loc, 0);
11281 	  rtx *x = &addr;
11282 	  while (GET_CODE (*x) == PLUS)
11283 	    {
11284 	      int i;
11285 	      for (i = 0; i < 2; i++)
11286 		{
11287 		  rtx u = XEXP (*x, i);
11288 		  if (GET_CODE (u) == ZERO_EXTEND)
11289 		    u = XEXP (u, 0);
11290 		  if (GET_CODE (u) == UNSPEC
11291 		      && XINT (u, 1) == UNSPEC_TP)
11292 		    {
11293 		      addr_space_t as = DEFAULT_TLS_SEG_REG;
11294 
11295 		      *x = XEXP (*x, 1 - i);
11296 
11297 		      *loc = replace_equiv_address_nv (*loc, addr, true);
11298 		      set_mem_addr_space (*loc, as);
11299 		      return;
11300 		    }
11301 		}
11302 	      x = &XEXP (*x, 0);
11303 	    }
11304 
11305 	  iter.skip_subrtxes ();
11306 	}
11307     }
11308 }
11309 
11310 /* Rewrite instruction pattern involvning TLS address
11311    so that it refers to a default TLS address space.  */
11312 rtx
ix86_rewrite_tls_address(rtx pattern)11313 ix86_rewrite_tls_address (rtx pattern)
11314 {
11315   pattern = copy_insn (pattern);
11316   ix86_rewrite_tls_address_1 (&pattern);
11317   return pattern;
11318 }
11319 
11320 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11321    to symbol DECL if BEIMPORT is true.  Otherwise create or return the
11322    unique refptr-DECL symbol corresponding to symbol DECL.  */
11323 
11324 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11325 {
hashdllimport_hasher11326   static inline hashval_t hash (tree_map *m) { return m->hash; }
11327   static inline bool
equaldllimport_hasher11328   equal (tree_map *a, tree_map *b)
11329   {
11330     return a->base.from == b->base.from;
11331   }
11332 
11333   static int
keep_cache_entrydllimport_hasher11334   keep_cache_entry (tree_map *&m)
11335   {
11336     return ggc_marked_p (m->base.from);
11337   }
11338 };
11339 
11340 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11341 
11342 static tree
get_dllimport_decl(tree decl,bool beimport)11343 get_dllimport_decl (tree decl, bool beimport)
11344 {
11345   struct tree_map *h, in;
11346   const char *name;
11347   const char *prefix;
11348   size_t namelen, prefixlen;
11349   char *imp_name;
11350   tree to;
11351   rtx rtl;
11352 
11353   if (!dllimport_map)
11354     dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11355 
11356   in.hash = htab_hash_pointer (decl);
11357   in.base.from = decl;
11358   tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11359   h = *loc;
11360   if (h)
11361     return h->to;
11362 
11363   *loc = h = ggc_alloc<tree_map> ();
11364   h->hash = in.hash;
11365   h->base.from = decl;
11366   h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11367 			   VAR_DECL, NULL, ptr_type_node);
11368   DECL_ARTIFICIAL (to) = 1;
11369   DECL_IGNORED_P (to) = 1;
11370   DECL_EXTERNAL (to) = 1;
11371   TREE_READONLY (to) = 1;
11372 
11373   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11374   name = targetm.strip_name_encoding (name);
11375   if (beimport)
11376     prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11377       ? "*__imp_" : "*__imp__";
11378   else
11379     prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11380   namelen = strlen (name);
11381   prefixlen = strlen (prefix);
11382   imp_name = (char *) alloca (namelen + prefixlen + 1);
11383   memcpy (imp_name, prefix, prefixlen);
11384   memcpy (imp_name + prefixlen, name, namelen + 1);
11385 
11386   name = ggc_alloc_string (imp_name, namelen + prefixlen);
11387   rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11388   SET_SYMBOL_REF_DECL (rtl, to);
11389   SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11390   if (!beimport)
11391     {
11392       SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11393 #ifdef SUB_TARGET_RECORD_STUB
11394       SUB_TARGET_RECORD_STUB (name);
11395 #endif
11396     }
11397 
11398   rtl = gen_const_mem (Pmode, rtl);
11399   set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11400 
11401   SET_DECL_RTL (to, rtl);
11402   SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11403 
11404   return to;
11405 }
11406 
11407 /* Expand SYMBOL into its corresponding far-address symbol.
11408    WANT_REG is true if we require the result be a register.  */
11409 
11410 static rtx
legitimize_pe_coff_extern_decl(rtx symbol,bool want_reg)11411 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
11412 {
11413   tree imp_decl;
11414   rtx x;
11415 
11416   gcc_assert (SYMBOL_REF_DECL (symbol));
11417   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
11418 
11419   x = DECL_RTL (imp_decl);
11420   if (want_reg)
11421     x = force_reg (Pmode, x);
11422   return x;
11423 }
11424 
11425 /* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
11426    true if we require the result be a register.  */
11427 
11428 static rtx
legitimize_dllimport_symbol(rtx symbol,bool want_reg)11429 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11430 {
11431   tree imp_decl;
11432   rtx x;
11433 
11434   gcc_assert (SYMBOL_REF_DECL (symbol));
11435   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
11436 
11437   x = DECL_RTL (imp_decl);
11438   if (want_reg)
11439     x = force_reg (Pmode, x);
11440   return x;
11441 }
11442 
11443 /* Expand SYMBOL into its corresponding dllimport or refptr symbol.  WANT_REG
11444    is true if we require the result be a register.  */
11445 
11446 rtx
legitimize_pe_coff_symbol(rtx addr,bool inreg)11447 legitimize_pe_coff_symbol (rtx addr, bool inreg)
11448 {
11449   if (!TARGET_PECOFF)
11450     return NULL_RTX;
11451 
11452   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11453     {
11454       if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11455 	return legitimize_dllimport_symbol (addr, inreg);
11456       if (GET_CODE (addr) == CONST
11457 	  && GET_CODE (XEXP (addr, 0)) == PLUS
11458 	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11459 	  && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11460 	{
11461 	  rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
11462 	  return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11463 	}
11464     }
11465 
11466   if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
11467     return NULL_RTX;
11468   if (GET_CODE (addr) == SYMBOL_REF
11469       && !is_imported_p (addr)
11470       && SYMBOL_REF_EXTERNAL_P (addr)
11471       && SYMBOL_REF_DECL (addr))
11472     return legitimize_pe_coff_extern_decl (addr, inreg);
11473 
11474   if (GET_CODE (addr) == CONST
11475       && GET_CODE (XEXP (addr, 0)) == PLUS
11476       && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11477       && !is_imported_p (XEXP (XEXP (addr, 0), 0))
11478       && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
11479       && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
11480     {
11481       rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
11482       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11483     }
11484   return NULL_RTX;
11485 }
11486 
11487 /* Try machine-dependent ways of modifying an illegitimate address
11488    to be legitimate.  If we find one, return the new, valid address.
11489    This macro is used in only one place: `memory_address' in explow.c.
11490 
11491    OLDX is the address as it was before break_out_memory_refs was called.
11492    In some cases it is useful to look at this to decide what needs to be done.
11493 
11494    It is always safe for this macro to do nothing.  It exists to recognize
11495    opportunities to optimize the output.
11496 
11497    For the 80386, we handle X+REG by loading X into a register R and
11498    using R+REG.  R will go in a general reg and indexing will be used.
11499    However, if REG is a broken-out memory address or multiplication,
11500    nothing needs to be done because REG can certainly go in a general reg.
11501 
11502    When -fpic is used, special handling is needed for symbolic references.
11503    See comments by legitimize_pic_address in i386.c for details.  */
11504 
11505 static rtx
ix86_legitimize_address(rtx x,rtx,machine_mode mode)11506 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
11507 {
11508   bool changed = false;
11509   unsigned log;
11510 
11511   log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11512   if (log)
11513     return legitimize_tls_address (x, (enum tls_model) log, false);
11514   if (GET_CODE (x) == CONST
11515       && GET_CODE (XEXP (x, 0)) == PLUS
11516       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11517       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11518     {
11519       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11520 				      (enum tls_model) log, false);
11521       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11522     }
11523 
11524   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11525     {
11526       rtx tmp = legitimize_pe_coff_symbol (x, true);
11527       if (tmp)
11528         return tmp;
11529     }
11530 
11531   if (flag_pic && SYMBOLIC_CONST (x))
11532     return legitimize_pic_address (x, 0);
11533 
11534 #if TARGET_MACHO
11535   if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
11536     return machopic_indirect_data_reference (x, 0);
11537 #endif
11538 
11539   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11540   if (GET_CODE (x) == ASHIFT
11541       && CONST_INT_P (XEXP (x, 1))
11542       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11543     {
11544       changed = true;
11545       log = INTVAL (XEXP (x, 1));
11546       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11547 			GEN_INT (1 << log));
11548     }
11549 
11550   if (GET_CODE (x) == PLUS)
11551     {
11552       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
11553 
11554       if (GET_CODE (XEXP (x, 0)) == ASHIFT
11555 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11556 	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11557 	{
11558 	  changed = true;
11559 	  log = INTVAL (XEXP (XEXP (x, 0), 1));
11560 	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
11561 				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11562 				      GEN_INT (1 << log));
11563 	}
11564 
11565       if (GET_CODE (XEXP (x, 1)) == ASHIFT
11566 	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11567 	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11568 	{
11569 	  changed = true;
11570 	  log = INTVAL (XEXP (XEXP (x, 1), 1));
11571 	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
11572 				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11573 				      GEN_INT (1 << log));
11574 	}
11575 
11576       /* Put multiply first if it isn't already.  */
11577       if (GET_CODE (XEXP (x, 1)) == MULT)
11578 	{
11579 	  std::swap (XEXP (x, 0), XEXP (x, 1));
11580 	  changed = true;
11581 	}
11582 
11583       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11584 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
11585 	 created by virtual register instantiation, register elimination, and
11586 	 similar optimizations.  */
11587       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11588 	{
11589 	  changed = true;
11590 	  x = gen_rtx_PLUS (Pmode,
11591 			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
11592 					  XEXP (XEXP (x, 1), 0)),
11593 			    XEXP (XEXP (x, 1), 1));
11594 	}
11595 
11596       /* Canonicalize
11597 	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11598 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
11599       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11600 	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11601 	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11602 	       && CONSTANT_P (XEXP (x, 1)))
11603 	{
11604 	  rtx constant;
11605 	  rtx other = NULL_RTX;
11606 
11607 	  if (CONST_INT_P (XEXP (x, 1)))
11608 	    {
11609 	      constant = XEXP (x, 1);
11610 	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11611 	    }
11612 	  else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11613 	    {
11614 	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11615 	      other = XEXP (x, 1);
11616 	    }
11617 	  else
11618 	    constant = 0;
11619 
11620 	  if (constant)
11621 	    {
11622 	      changed = true;
11623 	      x = gen_rtx_PLUS (Pmode,
11624 				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11625 					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
11626 				plus_constant (Pmode, other,
11627 					       INTVAL (constant)));
11628 	    }
11629 	}
11630 
11631       if (changed && ix86_legitimate_address_p (mode, x, false))
11632 	return x;
11633 
11634       if (GET_CODE (XEXP (x, 0)) == MULT)
11635 	{
11636 	  changed = true;
11637 	  XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
11638 	}
11639 
11640       if (GET_CODE (XEXP (x, 1)) == MULT)
11641 	{
11642 	  changed = true;
11643 	  XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
11644 	}
11645 
11646       if (changed
11647 	  && REG_P (XEXP (x, 1))
11648 	  && REG_P (XEXP (x, 0)))
11649 	return x;
11650 
11651       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11652 	{
11653 	  changed = true;
11654 	  x = legitimize_pic_address (x, 0);
11655 	}
11656 
11657       if (changed && ix86_legitimate_address_p (mode, x, false))
11658 	return x;
11659 
11660       if (REG_P (XEXP (x, 0)))
11661 	{
11662 	  rtx temp = gen_reg_rtx (Pmode);
11663 	  rtx val  = force_operand (XEXP (x, 1), temp);
11664 	  if (val != temp)
11665 	    {
11666 	      val = convert_to_mode (Pmode, val, 1);
11667 	      emit_move_insn (temp, val);
11668 	    }
11669 
11670 	  XEXP (x, 1) = temp;
11671 	  return x;
11672 	}
11673 
11674       else if (REG_P (XEXP (x, 1)))
11675 	{
11676 	  rtx temp = gen_reg_rtx (Pmode);
11677 	  rtx val  = force_operand (XEXP (x, 0), temp);
11678 	  if (val != temp)
11679 	    {
11680 	      val = convert_to_mode (Pmode, val, 1);
11681 	      emit_move_insn (temp, val);
11682 	    }
11683 
11684 	  XEXP (x, 0) = temp;
11685 	  return x;
11686 	}
11687     }
11688 
11689   return x;
11690 }
11691 
11692 /* Print an integer constant expression in assembler syntax.  Addition
11693    and subtraction are the only arithmetic that may appear in these
11694    expressions.  FILE is the stdio stream to write to, X is the rtx, and
11695    CODE is the operand print code from the output string.  */
11696 
11697 static void
output_pic_addr_const(FILE * file,rtx x,int code)11698 output_pic_addr_const (FILE *file, rtx x, int code)
11699 {
11700   char buf[256];
11701 
11702   switch (GET_CODE (x))
11703     {
11704     case PC:
11705       gcc_assert (flag_pic);
11706       putc ('.', file);
11707       break;
11708 
11709     case SYMBOL_REF:
11710       if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
11711 	output_addr_const (file, x);
11712       else
11713 	{
11714 	  const char *name = XSTR (x, 0);
11715 
11716 	  /* Mark the decl as referenced so that cgraph will
11717 	     output the function.  */
11718 	  if (SYMBOL_REF_DECL (x))
11719 	    mark_decl_referenced (SYMBOL_REF_DECL (x));
11720 
11721 #if TARGET_MACHO
11722 	  if (MACHOPIC_INDIRECT
11723 	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11724 	    name = machopic_indirection_name (x, /*stub_p=*/true);
11725 #endif
11726 	  assemble_name (file, name);
11727 	}
11728       if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
11729 	  && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11730 	fputs ("@PLT", file);
11731       break;
11732 
11733     case LABEL_REF:
11734       x = XEXP (x, 0);
11735       /* FALLTHRU */
11736     case CODE_LABEL:
11737       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11738       assemble_name (asm_out_file, buf);
11739       break;
11740 
11741     case CONST_INT:
11742       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11743       break;
11744 
11745     case CONST:
11746       /* This used to output parentheses around the expression,
11747 	 but that does not work on the 386 (either ATT or BSD assembler).  */
11748       output_pic_addr_const (file, XEXP (x, 0), code);
11749       break;
11750 
11751     case CONST_DOUBLE:
11752       /* We can't handle floating point constants;
11753 	 TARGET_PRINT_OPERAND must handle them.  */
11754       output_operand_lossage ("floating constant misused");
11755       break;
11756 
11757     case PLUS:
11758       /* Some assemblers need integer constants to appear first.  */
11759       if (CONST_INT_P (XEXP (x, 0)))
11760 	{
11761 	  output_pic_addr_const (file, XEXP (x, 0), code);
11762 	  putc ('+', file);
11763 	  output_pic_addr_const (file, XEXP (x, 1), code);
11764 	}
11765       else
11766 	{
11767 	  gcc_assert (CONST_INT_P (XEXP (x, 1)));
11768 	  output_pic_addr_const (file, XEXP (x, 1), code);
11769 	  putc ('+', file);
11770 	  output_pic_addr_const (file, XEXP (x, 0), code);
11771 	}
11772       break;
11773 
11774     case MINUS:
11775       if (!TARGET_MACHO)
11776 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11777       output_pic_addr_const (file, XEXP (x, 0), code);
11778       putc ('-', file);
11779       output_pic_addr_const (file, XEXP (x, 1), code);
11780       if (!TARGET_MACHO)
11781 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11782       break;
11783 
11784     case UNSPEC:
11785       gcc_assert (XVECLEN (x, 0) == 1);
11786       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11787       switch (XINT (x, 1))
11788 	{
11789 	case UNSPEC_GOT:
11790 	  fputs ("@GOT", file);
11791 	  break;
11792 	case UNSPEC_GOTOFF:
11793 	  fputs ("@GOTOFF", file);
11794 	  break;
11795 	case UNSPEC_PLTOFF:
11796 	  fputs ("@PLTOFF", file);
11797 	  break;
11798 	case UNSPEC_PCREL:
11799 	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11800 		 "(%rip)" : "[rip]", file);
11801 	  break;
11802 	case UNSPEC_GOTPCREL:
11803 	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11804 		 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11805 	  break;
11806 	case UNSPEC_GOTTPOFF:
11807 	  /* FIXME: This might be @TPOFF in Sun ld too.  */
11808 	  fputs ("@gottpoff", file);
11809 	  break;
11810 	case UNSPEC_TPOFF:
11811 	  fputs ("@tpoff", file);
11812 	  break;
11813 	case UNSPEC_NTPOFF:
11814 	  if (TARGET_64BIT)
11815 	    fputs ("@tpoff", file);
11816 	  else
11817 	    fputs ("@ntpoff", file);
11818 	  break;
11819 	case UNSPEC_DTPOFF:
11820 	  fputs ("@dtpoff", file);
11821 	  break;
11822 	case UNSPEC_GOTNTPOFF:
11823 	  if (TARGET_64BIT)
11824 	    fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11825 		   "@gottpoff(%rip)": "@gottpoff[rip]", file);
11826 	  else
11827 	    fputs ("@gotntpoff", file);
11828 	  break;
11829 	case UNSPEC_INDNTPOFF:
11830 	  fputs ("@indntpoff", file);
11831 	  break;
11832 #if TARGET_MACHO
11833 	case UNSPEC_MACHOPIC_OFFSET:
11834 	  putc ('-', file);
11835 	  machopic_output_function_base_name (file);
11836 	  break;
11837 #endif
11838 	default:
11839 	  output_operand_lossage ("invalid UNSPEC as operand");
11840 	  break;
11841 	}
11842        break;
11843 
11844     default:
11845       output_operand_lossage ("invalid expression as operand");
11846     }
11847 }
11848 
11849 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11850    We need to emit DTP-relative relocations.  */
11851 
11852 static void ATTRIBUTE_UNUSED
i386_output_dwarf_dtprel(FILE * file,int size,rtx x)11853 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11854 {
11855   fputs (ASM_LONG, file);
11856   output_addr_const (file, x);
11857   fputs ("@dtpoff", file);
11858   switch (size)
11859     {
11860     case 4:
11861       break;
11862     case 8:
11863       fputs (", 0", file);
11864       break;
11865     default:
11866       gcc_unreachable ();
11867    }
11868 }
11869 
11870 /* Return true if X is a representation of the PIC register.  This copes
11871    with calls from ix86_find_base_term, where the register might have
11872    been replaced by a cselib value.  */
11873 
11874 static bool
ix86_pic_register_p(rtx x)11875 ix86_pic_register_p (rtx x)
11876 {
11877   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11878     return (pic_offset_table_rtx
11879 	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11880   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
11881     return true;
11882   else if (!REG_P (x))
11883     return false;
11884   else if (pic_offset_table_rtx)
11885     {
11886       if (REGNO (x) == REGNO (pic_offset_table_rtx))
11887 	return true;
11888       if (HARD_REGISTER_P (x)
11889 	  && !HARD_REGISTER_P (pic_offset_table_rtx)
11890 	  && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
11891 	return true;
11892       return false;
11893     }
11894   else
11895     return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11896 }
11897 
11898 /* Helper function for ix86_delegitimize_address.
11899    Attempt to delegitimize TLS local-exec accesses.  */
11900 
11901 static rtx
ix86_delegitimize_tls_address(rtx orig_x)11902 ix86_delegitimize_tls_address (rtx orig_x)
11903 {
11904   rtx x = orig_x, unspec;
11905   struct ix86_address addr;
11906 
11907   if (!TARGET_TLS_DIRECT_SEG_REFS)
11908     return orig_x;
11909   if (MEM_P (x))
11910     x = XEXP (x, 0);
11911   if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
11912     return orig_x;
11913   if (ix86_decompose_address (x, &addr) == 0
11914       || addr.seg != DEFAULT_TLS_SEG_REG
11915       || addr.disp == NULL_RTX
11916       || GET_CODE (addr.disp) != CONST)
11917     return orig_x;
11918   unspec = XEXP (addr.disp, 0);
11919   if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
11920     unspec = XEXP (unspec, 0);
11921   if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
11922     return orig_x;
11923   x = XVECEXP (unspec, 0, 0);
11924   gcc_assert (GET_CODE (x) == SYMBOL_REF);
11925   if (unspec != XEXP (addr.disp, 0))
11926     x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
11927   if (addr.index)
11928     {
11929       rtx idx = addr.index;
11930       if (addr.scale != 1)
11931 	idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
11932       x = gen_rtx_PLUS (Pmode, idx, x);
11933     }
11934   if (addr.base)
11935     x = gen_rtx_PLUS (Pmode, addr.base, x);
11936   if (MEM_P (orig_x))
11937     x = replace_equiv_address_nv (orig_x, x);
11938   return x;
11939 }
11940 
11941 /* In the name of slightly smaller debug output, and to cater to
11942    general assembler lossage, recognize PIC+GOTOFF and turn it back
11943    into a direct symbol reference.
11944 
11945    On Darwin, this is necessary to avoid a crash, because Darwin
11946    has a different PIC label for each routine but the DWARF debugging
11947    information is not associated with any particular routine, so it's
11948    necessary to remove references to the PIC label from RTL stored by
11949    the DWARF output code.
11950 
11951    This helper is used in the normal ix86_delegitimize_address
11952    entrypoint (e.g. used in the target delegitimization hook) and
11953    in ix86_find_base_term.  As compile time memory optimization, we
11954    avoid allocating rtxes that will not change anything on the outcome
11955    of the callers (find_base_value and find_base_term).  */
11956 
11957 static inline rtx
ix86_delegitimize_address_1(rtx x,bool base_term_p)11958 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
11959 {
11960   rtx orig_x = delegitimize_mem_from_attrs (x);
11961   /* addend is NULL or some rtx if x is something+GOTOFF where
11962      something doesn't include the PIC register.  */
11963   rtx addend = NULL_RTX;
11964   /* reg_addend is NULL or a multiple of some register.  */
11965   rtx reg_addend = NULL_RTX;
11966   /* const_addend is NULL or a const_int.  */
11967   rtx const_addend = NULL_RTX;
11968   /* This is the result, or NULL.  */
11969   rtx result = NULL_RTX;
11970 
11971   x = orig_x;
11972 
11973   if (MEM_P (x))
11974     x = XEXP (x, 0);
11975 
11976   if (TARGET_64BIT)
11977     {
11978       if (GET_CODE (x) == CONST
11979           && GET_CODE (XEXP (x, 0)) == PLUS
11980           && GET_MODE (XEXP (x, 0)) == Pmode
11981           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11982           && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
11983           && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
11984         {
11985 	  /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
11986 	     base.  A CONST can't be arg_pointer_rtx based.  */
11987 	  if (base_term_p && MEM_P (orig_x))
11988 	    return orig_x;
11989 	  rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
11990 	  x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
11991 	  if (MEM_P (orig_x))
11992 	    x = replace_equiv_address_nv (orig_x, x);
11993 	  return x;
11994 	}
11995 
11996       if (GET_CODE (x) == CONST
11997 	  && GET_CODE (XEXP (x, 0)) == UNSPEC
11998 	  && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
11999 	      || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
12000 	  && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
12001 	{
12002 	  x = XVECEXP (XEXP (x, 0), 0, 0);
12003 	  if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
12004 	    {
12005 	      x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
12006 	      if (x == NULL_RTX)
12007 		return orig_x;
12008 	    }
12009 	  return x;
12010 	}
12011 
12012       if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
12013 	return ix86_delegitimize_tls_address (orig_x);
12014 
12015       /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12016 	 and -mcmodel=medium -fpic.  */
12017     }
12018 
12019   if (GET_CODE (x) != PLUS
12020       || GET_CODE (XEXP (x, 1)) != CONST)
12021     return ix86_delegitimize_tls_address (orig_x);
12022 
12023   if (ix86_pic_register_p (XEXP (x, 0)))
12024     /* %ebx + GOT/GOTOFF */
12025     ;
12026   else if (GET_CODE (XEXP (x, 0)) == PLUS)
12027     {
12028       /* %ebx + %reg * scale + GOT/GOTOFF */
12029       reg_addend = XEXP (x, 0);
12030       if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12031 	reg_addend = XEXP (reg_addend, 1);
12032       else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12033 	reg_addend = XEXP (reg_addend, 0);
12034       else
12035 	{
12036 	  reg_addend = NULL_RTX;
12037 	  addend = XEXP (x, 0);
12038 	}
12039     }
12040   else
12041     addend = XEXP (x, 0);
12042 
12043   x = XEXP (XEXP (x, 1), 0);
12044   if (GET_CODE (x) == PLUS
12045       && CONST_INT_P (XEXP (x, 1)))
12046     {
12047       const_addend = XEXP (x, 1);
12048       x = XEXP (x, 0);
12049     }
12050 
12051   if (GET_CODE (x) == UNSPEC
12052       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12053 	  || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
12054 	  || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
12055 	      && !MEM_P (orig_x) && !addend)))
12056     result = XVECEXP (x, 0, 0);
12057 
12058   if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
12059       && !MEM_P (orig_x))
12060     result = XVECEXP (x, 0, 0);
12061 
12062   if (! result)
12063     return ix86_delegitimize_tls_address (orig_x);
12064 
12065   /* For (PLUS something CONST_INT) both find_base_{value,term} just
12066      recurse on the first operand.  */
12067   if (const_addend && !base_term_p)
12068     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12069   if (reg_addend)
12070     result = gen_rtx_PLUS (Pmode, reg_addend, result);
12071   if (addend)
12072     {
12073       /* If the rest of original X doesn't involve the PIC register, add
12074 	 addend and subtract pic_offset_table_rtx.  This can happen e.g.
12075 	 for code like:
12076 	 leal (%ebx, %ecx, 4), %ecx
12077 	 ...
12078 	 movl foo@GOTOFF(%ecx), %edx
12079 	 in which case we return (%ecx - %ebx) + foo
12080 	 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12081 	 and reload has completed.  Don't do the latter for debug,
12082 	 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly.  */
12083       if (pic_offset_table_rtx
12084 	  && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
12085         result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12086 						     pic_offset_table_rtx),
12087 			       result);
12088       else if (base_term_p
12089 	       && pic_offset_table_rtx
12090 	       && !TARGET_MACHO
12091 	       && !TARGET_VXWORKS_RTP)
12092 	{
12093 	  rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
12094 	  tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
12095 	  result = gen_rtx_PLUS (Pmode, tmp, result);
12096 	}
12097       else
12098 	return orig_x;
12099     }
12100   if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12101     {
12102       result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
12103       if (result == NULL_RTX)
12104 	return orig_x;
12105     }
12106   return result;
12107 }
12108 
12109 /* The normal instantiation of the above template.  */
12110 
12111 static rtx
ix86_delegitimize_address(rtx x)12112 ix86_delegitimize_address (rtx x)
12113 {
12114   return ix86_delegitimize_address_1 (x, false);
12115 }
12116 
12117 /* If X is a machine specific address (i.e. a symbol or label being
12118    referenced as a displacement from the GOT implemented using an
12119    UNSPEC), then return the base term.  Otherwise return X.  */
12120 
12121 rtx
ix86_find_base_term(rtx x)12122 ix86_find_base_term (rtx x)
12123 {
12124   rtx term;
12125 
12126   if (TARGET_64BIT)
12127     {
12128       if (GET_CODE (x) != CONST)
12129 	return x;
12130       term = XEXP (x, 0);
12131       if (GET_CODE (term) == PLUS
12132 	  && CONST_INT_P (XEXP (term, 1)))
12133 	term = XEXP (term, 0);
12134       if (GET_CODE (term) != UNSPEC
12135 	  || (XINT (term, 1) != UNSPEC_GOTPCREL
12136 	      && XINT (term, 1) != UNSPEC_PCREL))
12137 	return x;
12138 
12139       return XVECEXP (term, 0, 0);
12140     }
12141 
12142   return ix86_delegitimize_address_1 (x, true);
12143 }
12144 
12145 /* Return true if X shouldn't be emitted into the debug info.
12146    Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12147    symbol easily into the .debug_info section, so we need not to
12148    delegitimize, but instead assemble as @gotoff.
12149    Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12150    assembles that as _GLOBAL_OFFSET_TABLE_-. expression.  */
12151 
12152 static bool
ix86_const_not_ok_for_debug_p(rtx x)12153 ix86_const_not_ok_for_debug_p (rtx x)
12154 {
12155   if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
12156     return true;
12157 
12158   if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
12159     return true;
12160 
12161   return false;
12162 }
12163 
12164 static void
put_condition_code(enum rtx_code code,machine_mode mode,bool reverse,bool fp,FILE * file)12165 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
12166 		    bool fp, FILE *file)
12167 {
12168   const char *suffix;
12169 
12170   if (mode == CCFPmode)
12171     {
12172       code = ix86_fp_compare_code_to_integer (code);
12173       mode = CCmode;
12174     }
12175   if (reverse)
12176     code = reverse_condition (code);
12177 
12178   switch (code)
12179     {
12180     case EQ:
12181       gcc_assert (mode != CCGZmode);
12182       switch (mode)
12183 	{
12184 	case E_CCAmode:
12185 	  suffix = "a";
12186 	  break;
12187 	case E_CCCmode:
12188 	  suffix = "c";
12189 	  break;
12190 	case E_CCOmode:
12191 	  suffix = "o";
12192 	  break;
12193 	case E_CCPmode:
12194 	  suffix = "p";
12195 	  break;
12196 	case E_CCSmode:
12197 	  suffix = "s";
12198 	  break;
12199 	default:
12200 	  suffix = "e";
12201 	  break;
12202 	}
12203       break;
12204     case NE:
12205       gcc_assert (mode != CCGZmode);
12206       switch (mode)
12207 	{
12208 	case E_CCAmode:
12209 	  suffix = "na";
12210 	  break;
12211 	case E_CCCmode:
12212 	  suffix = "nc";
12213 	  break;
12214 	case E_CCOmode:
12215 	  suffix = "no";
12216 	  break;
12217 	case E_CCPmode:
12218 	  suffix = "np";
12219 	  break;
12220 	case E_CCSmode:
12221 	  suffix = "ns";
12222 	  break;
12223 	default:
12224 	  suffix = "ne";
12225 	  break;
12226 	}
12227       break;
12228     case GT:
12229       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12230       suffix = "g";
12231       break;
12232     case GTU:
12233       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12234 	 Those same assemblers have the same but opposite lossage on cmov.  */
12235       if (mode == CCmode)
12236 	suffix = fp ? "nbe" : "a";
12237       else
12238 	gcc_unreachable ();
12239       break;
12240     case LT:
12241       switch (mode)
12242 	{
12243 	case E_CCNOmode:
12244 	case E_CCGOCmode:
12245 	  suffix = "s";
12246 	  break;
12247 
12248 	case E_CCmode:
12249 	case E_CCGCmode:
12250 	case E_CCGZmode:
12251 	  suffix = "l";
12252 	  break;
12253 
12254 	default:
12255 	  gcc_unreachable ();
12256 	}
12257       break;
12258     case LTU:
12259       if (mode == CCmode || mode == CCGZmode)
12260 	suffix = "b";
12261       else if (mode == CCCmode)
12262 	suffix = fp ? "b" : "c";
12263       else
12264 	gcc_unreachable ();
12265       break;
12266     case GE:
12267       switch (mode)
12268 	{
12269 	case E_CCNOmode:
12270 	case E_CCGOCmode:
12271 	  suffix = "ns";
12272 	  break;
12273 
12274 	case E_CCmode:
12275 	case E_CCGCmode:
12276 	case E_CCGZmode:
12277 	  suffix = "ge";
12278 	  break;
12279 
12280 	default:
12281 	  gcc_unreachable ();
12282 	}
12283       break;
12284     case GEU:
12285       if (mode == CCmode || mode == CCGZmode)
12286 	suffix = "nb";
12287       else if (mode == CCCmode)
12288 	suffix = fp ? "nb" : "nc";
12289       else
12290 	gcc_unreachable ();
12291       break;
12292     case LE:
12293       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12294       suffix = "le";
12295       break;
12296     case LEU:
12297       if (mode == CCmode)
12298 	suffix = "be";
12299       else
12300 	gcc_unreachable ();
12301       break;
12302     case UNORDERED:
12303       suffix = fp ? "u" : "p";
12304       break;
12305     case ORDERED:
12306       suffix = fp ? "nu" : "np";
12307       break;
12308     default:
12309       gcc_unreachable ();
12310     }
12311   fputs (suffix, file);
12312 }
12313 
12314 /* Print the name of register X to FILE based on its machine mode and number.
12315    If CODE is 'w', pretend the mode is HImode.
12316    If CODE is 'b', pretend the mode is QImode.
12317    If CODE is 'k', pretend the mode is SImode.
12318    If CODE is 'q', pretend the mode is DImode.
12319    If CODE is 'x', pretend the mode is V4SFmode.
12320    If CODE is 't', pretend the mode is V8SFmode.
12321    If CODE is 'g', pretend the mode is V16SFmode.
12322    If CODE is 'h', pretend the reg is the 'high' byte register.
12323    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12324    If CODE is 'd', duplicate the operand for AVX instruction.
12325    If CODE is 'V', print naked full integer register name without %.
12326  */
12327 
12328 void
print_reg(rtx x,int code,FILE * file)12329 print_reg (rtx x, int code, FILE *file)
12330 {
12331   const char *reg;
12332   int msize;
12333   unsigned int regno;
12334   bool duplicated;
12335 
12336   if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12337     putc ('%', file);
12338 
12339   if (x == pc_rtx)
12340     {
12341       gcc_assert (TARGET_64BIT);
12342       fputs ("rip", file);
12343       return;
12344     }
12345 
12346   if (code == 'y' && STACK_TOP_P (x))
12347     {
12348       fputs ("st(0)", file);
12349       return;
12350     }
12351 
12352   if (code == 'w')
12353     msize = 2;
12354   else if (code == 'b')
12355     msize = 1;
12356   else if (code == 'k')
12357     msize = 4;
12358   else if (code == 'q')
12359     msize = 8;
12360   else if (code == 'h')
12361     msize = 0;
12362   else if (code == 'x')
12363     msize = 16;
12364   else if (code == 't')
12365     msize = 32;
12366   else if (code == 'g')
12367     msize = 64;
12368   else
12369     msize = GET_MODE_SIZE (GET_MODE (x));
12370 
12371   regno = REGNO (x);
12372 
12373   if (regno == ARG_POINTER_REGNUM
12374       || regno == FRAME_POINTER_REGNUM
12375       || regno == FPSR_REG)
12376     {
12377       output_operand_lossage
12378 	("invalid use of register '%s'", reg_names[regno]);
12379       return;
12380     }
12381   else if (regno == FLAGS_REG)
12382     {
12383       output_operand_lossage ("invalid use of asm flag output");
12384       return;
12385     }
12386 
12387   if (code == 'V')
12388     {
12389       if (GENERAL_REGNO_P (regno))
12390 	msize = GET_MODE_SIZE (word_mode);
12391       else
12392 	error ("%<V%> modifier on non-integer register");
12393     }
12394 
12395   duplicated = code == 'd' && TARGET_AVX;
12396 
12397   switch (msize)
12398     {
12399     case 16:
12400     case 12:
12401     case 8:
12402       if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
12403 	warning (0, "unsupported size for integer register");
12404       /* FALLTHRU */
12405     case 4:
12406       if (LEGACY_INT_REGNO_P (regno))
12407 	putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
12408       /* FALLTHRU */
12409     case 2:
12410     normal:
12411       reg = hi_reg_name[regno];
12412       break;
12413     case 1:
12414       if (regno >= ARRAY_SIZE (qi_reg_name))
12415 	goto normal;
12416       if (!ANY_QI_REGNO_P (regno))
12417 	error ("unsupported size for integer register");
12418       reg = qi_reg_name[regno];
12419       break;
12420     case 0:
12421       if (regno >= ARRAY_SIZE (qi_high_reg_name))
12422 	goto normal;
12423       reg = qi_high_reg_name[regno];
12424       break;
12425     case 32:
12426     case 64:
12427       if (SSE_REGNO_P (regno))
12428 	{
12429 	  gcc_assert (!duplicated);
12430 	  putc (msize == 32 ? 'y' : 'z', file);
12431 	  reg = hi_reg_name[regno] + 1;
12432 	  break;
12433 	}
12434       goto normal;
12435     default:
12436       gcc_unreachable ();
12437     }
12438 
12439   fputs (reg, file);
12440 
12441   /* Irritatingly, AMD extended registers use
12442      different naming convention: "r%d[bwd]"  */
12443   if (REX_INT_REGNO_P (regno))
12444     {
12445       gcc_assert (TARGET_64BIT);
12446       switch (msize)
12447 	{
12448 	  case 0:
12449 	    error ("extended registers have no high halves");
12450 	    break;
12451 	  case 1:
12452 	    putc ('b', file);
12453 	    break;
12454 	  case 2:
12455 	    putc ('w', file);
12456 	    break;
12457 	  case 4:
12458 	    putc ('d', file);
12459 	    break;
12460 	  case 8:
12461 	    /* no suffix */
12462 	    break;
12463 	  default:
12464 	    error ("unsupported operand size for extended register");
12465 	    break;
12466 	}
12467       return;
12468     }
12469 
12470   if (duplicated)
12471     {
12472       if (ASSEMBLER_DIALECT == ASM_ATT)
12473 	fprintf (file, ", %%%s", reg);
12474       else
12475 	fprintf (file, ", %s", reg);
12476     }
12477 }
12478 
12479 /* Meaning of CODE:
12480    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12481    C -- print opcode suffix for set/cmov insn.
12482    c -- like C, but print reversed condition
12483    F,f -- likewise, but for floating-point.
12484    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12485 	otherwise nothing
12486    R -- print embedded rounding and sae.
12487    r -- print only sae.
12488    z -- print the opcode suffix for the size of the current operand.
12489    Z -- likewise, with special suffixes for x87 instructions.
12490    * -- print a star (in certain assembler syntax)
12491    A -- print an absolute memory reference.
12492    E -- print address with DImode register names if TARGET_64BIT.
12493    w -- print the operand as if it's a "word" (HImode) even if it isn't.
12494    s -- print a shift double count, followed by the assemblers argument
12495 	delimiter.
12496    b -- print the QImode name of the register for the indicated operand.
12497 	%b0 would print %al if operands[0] is reg 0.
12498    w --  likewise, print the HImode name of the register.
12499    k --  likewise, print the SImode name of the register.
12500    q --  likewise, print the DImode name of the register.
12501    x --  likewise, print the V4SFmode name of the register.
12502    t --  likewise, print the V8SFmode name of the register.
12503    g --  likewise, print the V16SFmode name of the register.
12504    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12505    y -- print "st(0)" instead of "st" as a register.
12506    d -- print duplicated register operand for AVX instruction.
12507    D -- print condition for SSE cmp instruction.
12508    P -- if PIC, print an @PLT suffix.
12509    p -- print raw symbol name.
12510    X -- don't print any sort of PIC '@' suffix for a symbol.
12511    & -- print some in-use local-dynamic symbol name.
12512    H -- print a memory address offset by 8; used for sse high-parts
12513    Y -- print condition for XOP pcom* instruction.
12514    V -- print naked full integer register name without %.
12515    + -- print a branch hint as 'cs' or 'ds' prefix
12516    ; -- print a semicolon (after prefixes due to bug in older gas).
12517    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12518    ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12519    M -- print addr32 prefix for TARGET_X32 with VSIB address.
12520    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12521  */
12522 
12523 void
ix86_print_operand(FILE * file,rtx x,int code)12524 ix86_print_operand (FILE *file, rtx x, int code)
12525 {
12526   if (code)
12527     {
12528       switch (code)
12529 	{
12530 	case 'A':
12531 	  switch (ASSEMBLER_DIALECT)
12532 	    {
12533 	    case ASM_ATT:
12534 	      putc ('*', file);
12535 	      break;
12536 
12537 	    case ASM_INTEL:
12538 	      /* Intel syntax. For absolute addresses, registers should not
12539 		 be surrounded by braces.  */
12540 	      if (!REG_P (x))
12541 		{
12542 		  putc ('[', file);
12543 		  ix86_print_operand (file, x, 0);
12544 		  putc (']', file);
12545 		  return;
12546 		}
12547 	      break;
12548 
12549 	    default:
12550 	      gcc_unreachable ();
12551 	    }
12552 
12553 	  ix86_print_operand (file, x, 0);
12554 	  return;
12555 
12556 	case 'E':
12557 	  /* Wrap address in an UNSPEC to declare special handling.  */
12558 	  if (TARGET_64BIT)
12559 	    x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
12560 
12561 	  output_address (VOIDmode, x);
12562 	  return;
12563 
12564 	case 'L':
12565 	  if (ASSEMBLER_DIALECT == ASM_ATT)
12566 	    putc ('l', file);
12567 	  return;
12568 
12569 	case 'W':
12570 	  if (ASSEMBLER_DIALECT == ASM_ATT)
12571 	    putc ('w', file);
12572 	  return;
12573 
12574 	case 'B':
12575 	  if (ASSEMBLER_DIALECT == ASM_ATT)
12576 	    putc ('b', file);
12577 	  return;
12578 
12579 	case 'Q':
12580 	  if (ASSEMBLER_DIALECT == ASM_ATT)
12581 	    putc ('l', file);
12582 	  return;
12583 
12584 	case 'S':
12585 	  if (ASSEMBLER_DIALECT == ASM_ATT)
12586 	    putc ('s', file);
12587 	  return;
12588 
12589 	case 'T':
12590 	  if (ASSEMBLER_DIALECT == ASM_ATT)
12591 	    putc ('t', file);
12592 	  return;
12593 
12594 	case 'O':
12595 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12596 	  if (ASSEMBLER_DIALECT != ASM_ATT)
12597 	    return;
12598 
12599 	  switch (GET_MODE_SIZE (GET_MODE (x)))
12600 	    {
12601 	    case 2:
12602 	      putc ('w', file);
12603 	      break;
12604 
12605 	    case 4:
12606 	      putc ('l', file);
12607 	      break;
12608 
12609 	    case 8:
12610 	      putc ('q', file);
12611 	      break;
12612 
12613 	    default:
12614 	      output_operand_lossage ("invalid operand size for operand "
12615 				      "code 'O'");
12616 	      return;
12617 	    }
12618 
12619 	  putc ('.', file);
12620 #endif
12621 	  return;
12622 
12623 	case 'z':
12624 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12625 	    {
12626 	      /* Opcodes don't get size suffixes if using Intel opcodes.  */
12627 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
12628 		return;
12629 
12630 	      switch (GET_MODE_SIZE (GET_MODE (x)))
12631 		{
12632 		case 1:
12633 		  putc ('b', file);
12634 		  return;
12635 
12636 		case 2:
12637 		  putc ('w', file);
12638 		  return;
12639 
12640 		case 4:
12641 		  putc ('l', file);
12642 		  return;
12643 
12644 		case 8:
12645 		  putc ('q', file);
12646 		  return;
12647 
12648 		default:
12649 		  output_operand_lossage ("invalid operand size for operand "
12650 					  "code 'z'");
12651 		  return;
12652 		}
12653 	    }
12654 
12655 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12656 	    warning (0, "non-integer operand used with operand code %<z%>");
12657 	  /* FALLTHRU */
12658 
12659 	case 'Z':
12660 	  /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
12661 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
12662 	    return;
12663 
12664 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12665 	    {
12666 	      switch (GET_MODE_SIZE (GET_MODE (x)))
12667 		{
12668 		case 2:
12669 #ifdef HAVE_AS_IX86_FILDS
12670 		  putc ('s', file);
12671 #endif
12672 		  return;
12673 
12674 		case 4:
12675 		  putc ('l', file);
12676 		  return;
12677 
12678 		case 8:
12679 #ifdef HAVE_AS_IX86_FILDQ
12680 		  putc ('q', file);
12681 #else
12682 		  fputs ("ll", file);
12683 #endif
12684 		  return;
12685 
12686 		default:
12687 		  break;
12688 		}
12689 	    }
12690 	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12691 	    {
12692 	      /* 387 opcodes don't get size suffixes
12693 		 if the operands are registers.  */
12694 	      if (STACK_REG_P (x))
12695 		return;
12696 
12697 	      switch (GET_MODE_SIZE (GET_MODE (x)))
12698 		{
12699 		case 4:
12700 		  putc ('s', file);
12701 		  return;
12702 
12703 		case 8:
12704 		  putc ('l', file);
12705 		  return;
12706 
12707 		case 12:
12708 		case 16:
12709 		  putc ('t', file);
12710 		  return;
12711 
12712 		default:
12713 		  break;
12714 		}
12715 	    }
12716 	  else
12717 	    {
12718 	      output_operand_lossage ("invalid operand type used with "
12719 				      "operand code 'Z'");
12720 	      return;
12721 	    }
12722 
12723 	  output_operand_lossage ("invalid operand size for operand code 'Z'");
12724 	  return;
12725 
12726 	case 'd':
12727 	case 'b':
12728 	case 'w':
12729 	case 'k':
12730 	case 'q':
12731 	case 'h':
12732 	case 't':
12733 	case 'g':
12734 	case 'y':
12735 	case 'x':
12736 	case 'X':
12737 	case 'P':
12738 	case 'p':
12739 	case 'V':
12740 	  break;
12741 
12742 	case 's':
12743 	  if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12744 	    {
12745 	      ix86_print_operand (file, x, 0);
12746 	      fputs (", ", file);
12747 	    }
12748 	  return;
12749 
12750 	case 'Y':
12751 	  switch (GET_CODE (x))
12752 	    {
12753 	    case NE:
12754 	      fputs ("neq", file);
12755 	      break;
12756 	    case EQ:
12757 	      fputs ("eq", file);
12758 	      break;
12759 	    case GE:
12760 	    case GEU:
12761 	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12762 	      break;
12763 	    case GT:
12764 	    case GTU:
12765 	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12766 	      break;
12767 	    case LE:
12768 	    case LEU:
12769 	      fputs ("le", file);
12770 	      break;
12771 	    case LT:
12772 	    case LTU:
12773 	      fputs ("lt", file);
12774 	      break;
12775 	    case UNORDERED:
12776 	      fputs ("unord", file);
12777 	      break;
12778 	    case ORDERED:
12779 	      fputs ("ord", file);
12780 	      break;
12781 	    case UNEQ:
12782 	      fputs ("ueq", file);
12783 	      break;
12784 	    case UNGE:
12785 	      fputs ("nlt", file);
12786 	      break;
12787 	    case UNGT:
12788 	      fputs ("nle", file);
12789 	      break;
12790 	    case UNLE:
12791 	      fputs ("ule", file);
12792 	      break;
12793 	    case UNLT:
12794 	      fputs ("ult", file);
12795 	      break;
12796 	    case LTGT:
12797 	      fputs ("une", file);
12798 	      break;
12799 	    default:
12800 	      output_operand_lossage ("operand is not a condition code, "
12801 				      "invalid operand code 'Y'");
12802 	      return;
12803 	    }
12804 	  return;
12805 
12806 	case 'D':
12807 	  /* Little bit of braindamage here.  The SSE compare instructions
12808 	     does use completely different names for the comparisons that the
12809 	     fp conditional moves.  */
12810 	  switch (GET_CODE (x))
12811 	    {
12812 	    case UNEQ:
12813 	      if (TARGET_AVX)
12814 		{
12815 		  fputs ("eq_us", file);
12816 		  break;
12817 		}
12818 	     /* FALLTHRU */
12819 	    case EQ:
12820 	      fputs ("eq", file);
12821 	      break;
12822 	    case UNLT:
12823 	      if (TARGET_AVX)
12824 		{
12825 		  fputs ("nge", file);
12826 		  break;
12827 		}
12828 	     /* FALLTHRU */
12829 	    case LT:
12830 	      fputs ("lt", file);
12831 	      break;
12832 	    case UNLE:
12833 	      if (TARGET_AVX)
12834 		{
12835 		  fputs ("ngt", file);
12836 		  break;
12837 		}
12838 	     /* FALLTHRU */
12839 	    case LE:
12840 	      fputs ("le", file);
12841 	      break;
12842 	    case UNORDERED:
12843 	      fputs ("unord", file);
12844 	      break;
12845 	    case LTGT:
12846 	      if (TARGET_AVX)
12847 		{
12848 		  fputs ("neq_oq", file);
12849 		  break;
12850 		}
12851 	     /* FALLTHRU */
12852 	    case NE:
12853 	      fputs ("neq", file);
12854 	      break;
12855 	    case GE:
12856 	      if (TARGET_AVX)
12857 		{
12858 		  fputs ("ge", file);
12859 		  break;
12860 		}
12861 	     /* FALLTHRU */
12862 	    case UNGE:
12863 	      fputs ("nlt", file);
12864 	      break;
12865 	    case GT:
12866 	      if (TARGET_AVX)
12867 		{
12868 		  fputs ("gt", file);
12869 		  break;
12870 		}
12871 	     /* FALLTHRU */
12872 	    case UNGT:
12873 	      fputs ("nle", file);
12874 	      break;
12875 	    case ORDERED:
12876 	      fputs ("ord", file);
12877 	      break;
12878 	    default:
12879 	      output_operand_lossage ("operand is not a condition code, "
12880 				      "invalid operand code 'D'");
12881 	      return;
12882 	    }
12883 	  return;
12884 
12885 	case 'F':
12886 	case 'f':
12887 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12888 	  if (ASSEMBLER_DIALECT == ASM_ATT)
12889 	    putc ('.', file);
12890 	  gcc_fallthrough ();
12891 #endif
12892 
12893 	case 'C':
12894 	case 'c':
12895 	  if (!COMPARISON_P (x))
12896 	    {
12897 	      output_operand_lossage ("operand is not a condition code, "
12898 				      "invalid operand code '%c'", code);
12899 	      return;
12900 	    }
12901 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
12902 			      code == 'c' || code == 'f',
12903 			      code == 'F' || code == 'f',
12904 			      file);
12905 	  return;
12906 
12907 	case 'H':
12908 	  if (!offsettable_memref_p (x))
12909 	    {
12910 	      output_operand_lossage ("operand is not an offsettable memory "
12911 				      "reference, invalid operand code 'H'");
12912 	      return;
12913 	    }
12914 	  /* It doesn't actually matter what mode we use here, as we're
12915 	     only going to use this for printing.  */
12916 	  x = adjust_address_nv (x, DImode, 8);
12917 	  /* Output 'qword ptr' for intel assembler dialect.  */
12918 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
12919 	    code = 'q';
12920 	  break;
12921 
12922 	case 'K':
12923 	  if (!CONST_INT_P (x))
12924 	    {
12925 	      output_operand_lossage ("operand is not an integer, invalid "
12926 				      "operand code 'K'");
12927 	      return;
12928 	    }
12929 
12930 	  if (INTVAL (x) & IX86_HLE_ACQUIRE)
12931 #ifdef HAVE_AS_IX86_HLE
12932 	    fputs ("xacquire ", file);
12933 #else
12934 	    fputs ("\n" ASM_BYTE "0xf2\n\t", file);
12935 #endif
12936 	  else if (INTVAL (x) & IX86_HLE_RELEASE)
12937 #ifdef HAVE_AS_IX86_HLE
12938 	    fputs ("xrelease ", file);
12939 #else
12940 	    fputs ("\n" ASM_BYTE "0xf3\n\t", file);
12941 #endif
12942 	  /* We do not want to print value of the operand.  */
12943 	  return;
12944 
12945 	case 'N':
12946 	  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
12947 	    fputs ("{z}", file);
12948 	  return;
12949 
12950 	case 'r':
12951 	  if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
12952 	    {
12953 	      output_operand_lossage ("operand is not a specific integer, "
12954 				      "invalid operand code 'r'");
12955 	      return;
12956 	    }
12957 
12958 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
12959 	    fputs (", ", file);
12960 
12961 	  fputs ("{sae}", file);
12962 
12963 	  if (ASSEMBLER_DIALECT == ASM_ATT)
12964 	    fputs (", ", file);
12965 
12966 	  return;
12967 
12968 	case 'R':
12969 	  if (!CONST_INT_P (x))
12970 	    {
12971 	      output_operand_lossage ("operand is not an integer, invalid "
12972 				      "operand code 'R'");
12973 	      return;
12974 	    }
12975 
12976 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
12977 	    fputs (", ", file);
12978 
12979 	  switch (INTVAL (x))
12980 	    {
12981 	    case ROUND_NEAREST_INT | ROUND_SAE:
12982 	      fputs ("{rn-sae}", file);
12983 	      break;
12984 	    case ROUND_NEG_INF | ROUND_SAE:
12985 	      fputs ("{rd-sae}", file);
12986 	      break;
12987 	    case ROUND_POS_INF | ROUND_SAE:
12988 	      fputs ("{ru-sae}", file);
12989 	      break;
12990 	    case ROUND_ZERO | ROUND_SAE:
12991 	      fputs ("{rz-sae}", file);
12992 	      break;
12993 	    default:
12994 	      output_operand_lossage ("operand is not a specific integer, "
12995 				      "invalid operand code 'R'");
12996 	    }
12997 
12998 	  if (ASSEMBLER_DIALECT == ASM_ATT)
12999 	    fputs (", ", file);
13000 
13001 	  return;
13002 
13003 	case '*':
13004 	  if (ASSEMBLER_DIALECT == ASM_ATT)
13005 	    putc ('*', file);
13006 	  return;
13007 
13008 	case '&':
13009 	  {
13010 	    const char *name = get_some_local_dynamic_name ();
13011 	    if (name == NULL)
13012 	      output_operand_lossage ("'%%&' used without any "
13013 				      "local dynamic TLS references");
13014 	    else
13015 	      assemble_name (file, name);
13016 	    return;
13017 	  }
13018 
13019 	case '+':
13020 	  {
13021 	    rtx x;
13022 
13023 	    if (!optimize
13024 	        || optimize_function_for_size_p (cfun)
13025 		|| !TARGET_BRANCH_PREDICTION_HINTS)
13026 	      return;
13027 
13028 	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13029 	    if (x)
13030 	      {
13031 		int pred_val = profile_probability::from_reg_br_prob_note
13032 				 (XINT (x, 0)).to_reg_br_prob_base ();
13033 
13034 		if (pred_val < REG_BR_PROB_BASE * 45 / 100
13035 		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
13036 		  {
13037 		    bool taken = pred_val > REG_BR_PROB_BASE / 2;
13038 		    bool cputaken
13039 		      = final_forward_branch_p (current_output_insn) == 0;
13040 
13041 		    /* Emit hints only in the case default branch prediction
13042 		       heuristics would fail.  */
13043 		    if (taken != cputaken)
13044 		      {
13045 			/* We use 3e (DS) prefix for taken branches and
13046 			   2e (CS) prefix for not taken branches.  */
13047 			if (taken)
13048 			  fputs ("ds ; ", file);
13049 			else
13050 			  fputs ("cs ; ", file);
13051 		      }
13052 		  }
13053 	      }
13054 	    return;
13055 	  }
13056 
13057 	case ';':
13058 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13059 	  putc (';', file);
13060 #endif
13061 	  return;
13062 
13063 	case '~':
13064 	  putc (TARGET_AVX2 ? 'i' : 'f', file);
13065 	  return;
13066 
13067 	case 'M':
13068 	  if (TARGET_X32)
13069 	    {
13070 	      /* NB: 32-bit indices in VSIB address are sign-extended
13071 		 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13072 		 sign-extended to 0xfffffffff7fa3010 which is invalid
13073 		 address.  Add addr32 prefix if there is no base
13074 		 register nor symbol.  */
13075 	      bool ok;
13076 	      struct ix86_address parts;
13077 	      ok = ix86_decompose_address (x, &parts);
13078 	      gcc_assert (ok && parts.index == NULL_RTX);
13079 	      if (parts.base == NULL_RTX
13080 		  && (parts.disp == NULL_RTX
13081 		      || !symbolic_operand (parts.disp,
13082 					    GET_MODE (parts.disp))))
13083 		fputs ("addr32 ", file);
13084 	    }
13085 	  return;
13086 
13087 	case '^':
13088 	  if (TARGET_64BIT && Pmode != word_mode)
13089 	    fputs ("addr32 ", file);
13090 	  return;
13091 
13092 	case '!':
13093 	  if (ix86_notrack_prefixed_insn_p (current_output_insn))
13094 	    fputs ("notrack ", file);
13095 	  return;
13096 
13097 	default:
13098 	  output_operand_lossage ("invalid operand code '%c'", code);
13099 	}
13100     }
13101 
13102   if (REG_P (x))
13103     print_reg (x, code, file);
13104 
13105   else if (MEM_P (x))
13106     {
13107       rtx addr = XEXP (x, 0);
13108 
13109       /* No `byte ptr' prefix for call instructions ... */
13110       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
13111 	{
13112 	  machine_mode mode = GET_MODE (x);
13113 	  const char *size;
13114 
13115 	  /* Check for explicit size override codes.  */
13116 	  if (code == 'b')
13117 	    size = "BYTE";
13118 	  else if (code == 'w')
13119 	    size = "WORD";
13120 	  else if (code == 'k')
13121 	    size = "DWORD";
13122 	  else if (code == 'q')
13123 	    size = "QWORD";
13124 	  else if (code == 'x')
13125 	    size = "XMMWORD";
13126 	  else if (code == 't')
13127 	    size = "YMMWORD";
13128 	  else if (code == 'g')
13129 	    size = "ZMMWORD";
13130 	  else if (mode == BLKmode)
13131 	    /* ... or BLKmode operands, when not overridden.  */
13132 	    size = NULL;
13133 	  else
13134 	    switch (GET_MODE_SIZE (mode))
13135 	      {
13136 	      case 1: size = "BYTE"; break;
13137 	      case 2: size = "WORD"; break;
13138 	      case 4: size = "DWORD"; break;
13139 	      case 8: size = "QWORD"; break;
13140 	      case 12: size = "TBYTE"; break;
13141 	      case 16:
13142 		if (mode == XFmode)
13143 		  size = "TBYTE";
13144 		else
13145 		  size = "XMMWORD";
13146 		break;
13147 	      case 32: size = "YMMWORD"; break;
13148 	      case 64: size = "ZMMWORD"; break;
13149 	      default:
13150 		gcc_unreachable ();
13151 	      }
13152 	  if (size)
13153 	    {
13154 	      fputs (size, file);
13155 	      fputs (" PTR ", file);
13156 	    }
13157 	}
13158 
13159       if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13160 	output_operand_lossage ("invalid constraints for operand");
13161       else
13162 	ix86_print_operand_address_as
13163 	  (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
13164     }
13165 
13166   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
13167     {
13168       long l;
13169 
13170       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13171 
13172       if (ASSEMBLER_DIALECT == ASM_ATT)
13173 	putc ('$', file);
13174       /* Sign extend 32bit SFmode immediate to 8 bytes.  */
13175       if (code == 'q')
13176 	fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
13177 		 (unsigned long long) (int) l);
13178       else
13179 	fprintf (file, "0x%08x", (unsigned int) l);
13180     }
13181 
13182   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
13183     {
13184       long l[2];
13185 
13186       REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13187 
13188       if (ASSEMBLER_DIALECT == ASM_ATT)
13189 	putc ('$', file);
13190       fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13191     }
13192 
13193   /* These float cases don't actually occur as immediate operands.  */
13194   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
13195     {
13196       char dstr[30];
13197 
13198       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13199       fputs (dstr, file);
13200     }
13201 
13202   else
13203     {
13204       /* We have patterns that allow zero sets of memory, for instance.
13205 	 In 64-bit mode, we should probably support all 8-byte vectors,
13206 	 since we can in fact encode that into an immediate.  */
13207       if (GET_CODE (x) == CONST_VECTOR)
13208 	{
13209 	  if (x != CONST0_RTX (GET_MODE (x)))
13210 	    output_operand_lossage ("invalid vector immediate");
13211 	  x = const0_rtx;
13212 	}
13213 
13214       if (code != 'P' && code != 'p')
13215 	{
13216 	  if (CONST_INT_P (x))
13217 	    {
13218 	      if (ASSEMBLER_DIALECT == ASM_ATT)
13219 		putc ('$', file);
13220 	    }
13221 	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13222 		   || GET_CODE (x) == LABEL_REF)
13223 	    {
13224 	      if (ASSEMBLER_DIALECT == ASM_ATT)
13225 		putc ('$', file);
13226 	      else
13227 		fputs ("OFFSET FLAT:", file);
13228 	    }
13229 	}
13230       if (CONST_INT_P (x))
13231 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13232       else if (flag_pic || MACHOPIC_INDIRECT)
13233 	output_pic_addr_const (file, x, code);
13234       else
13235 	output_addr_const (file, x);
13236     }
13237 }
13238 
13239 static bool
ix86_print_operand_punct_valid_p(unsigned char code)13240 ix86_print_operand_punct_valid_p (unsigned char code)
13241 {
13242   return (code == '*' || code == '+' || code == '&' || code == ';'
13243 	  || code == '~' || code == '^' || code == '!');
13244 }
13245 
13246 /* Print a memory operand whose address is ADDR.  */
13247 
13248 static void
ix86_print_operand_address_as(FILE * file,rtx addr,addr_space_t as,bool no_rip)13249 ix86_print_operand_address_as (FILE *file, rtx addr,
13250 			       addr_space_t as, bool no_rip)
13251 {
13252   struct ix86_address parts;
13253   rtx base, index, disp;
13254   int scale;
13255   int ok;
13256   bool vsib = false;
13257   int code = 0;
13258 
13259   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
13260     {
13261       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13262       gcc_assert (parts.index == NULL_RTX);
13263       parts.index = XVECEXP (addr, 0, 1);
13264       parts.scale = INTVAL (XVECEXP (addr, 0, 2));
13265       addr = XVECEXP (addr, 0, 0);
13266       vsib = true;
13267     }
13268   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
13269     {
13270       gcc_assert (TARGET_64BIT);
13271       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13272       code = 'q';
13273     }
13274   else
13275     ok = ix86_decompose_address (addr, &parts);
13276 
13277   gcc_assert (ok);
13278 
13279   base = parts.base;
13280   index = parts.index;
13281   disp = parts.disp;
13282   scale = parts.scale;
13283 
13284   if (ADDR_SPACE_GENERIC_P (as))
13285     as = parts.seg;
13286   else
13287     gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
13288 
13289   if (!ADDR_SPACE_GENERIC_P (as))
13290     {
13291       if (ASSEMBLER_DIALECT == ASM_ATT)
13292 	putc ('%', file);
13293 
13294       switch (as)
13295 	{
13296 	case ADDR_SPACE_SEG_FS:
13297 	  fputs ("fs:", file);
13298 	  break;
13299 	case ADDR_SPACE_SEG_GS:
13300 	  fputs ("gs:", file);
13301 	  break;
13302 	default:
13303 	  gcc_unreachable ();
13304 	}
13305     }
13306 
13307   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
13308   if (TARGET_64BIT && !base && !index && !no_rip)
13309     {
13310       rtx symbol = disp;
13311 
13312       if (GET_CODE (disp) == CONST
13313 	  && GET_CODE (XEXP (disp, 0)) == PLUS
13314 	  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13315 	symbol = XEXP (XEXP (disp, 0), 0);
13316 
13317       if (GET_CODE (symbol) == LABEL_REF
13318 	  || (GET_CODE (symbol) == SYMBOL_REF
13319 	      && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13320 	base = pc_rtx;
13321     }
13322 
13323   if (!base && !index)
13324     {
13325       /* Displacement only requires special attention.  */
13326       if (CONST_INT_P (disp))
13327 	{
13328 	  if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
13329 	    fputs ("ds:", file);
13330 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13331 	}
13332       /* Load the external function address via the GOT slot to avoid PLT.  */
13333       else if (GET_CODE (disp) == CONST
13334 	       && GET_CODE (XEXP (disp, 0)) == UNSPEC
13335 	       && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
13336 		   || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
13337 	       && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
13338 	output_pic_addr_const (file, disp, 0);
13339       else if (flag_pic)
13340 	output_pic_addr_const (file, disp, 0);
13341       else
13342 	output_addr_const (file, disp);
13343     }
13344   else
13345     {
13346       /* Print SImode register names to force addr32 prefix.  */
13347       if (SImode_address_operand (addr, VOIDmode))
13348 	{
13349 	  if (flag_checking)
13350 	    {
13351 	      gcc_assert (TARGET_64BIT);
13352 	      switch (GET_CODE (addr))
13353 		{
13354 		case SUBREG:
13355 		  gcc_assert (GET_MODE (addr) == SImode);
13356 		  gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
13357 		  break;
13358 		case ZERO_EXTEND:
13359 		case AND:
13360 		  gcc_assert (GET_MODE (addr) == DImode);
13361 		  break;
13362 		default:
13363 		  gcc_unreachable ();
13364 		}
13365 	    }
13366 	  gcc_assert (!code);
13367 	  code = 'k';
13368 	}
13369       else if (code == 0
13370 	       && TARGET_X32
13371 	       && disp
13372 	       && CONST_INT_P (disp)
13373 	       && INTVAL (disp) < -16*1024*1024)
13374 	{
13375 	  /* X32 runs in 64-bit mode, where displacement, DISP, in
13376 	     address DISP(%r64), is encoded as 32-bit immediate sign-
13377 	     extended from 32-bit to 64-bit.  For -0x40000300(%r64),
13378 	     address is %r64 + 0xffffffffbffffd00.  When %r64 <
13379 	     0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13380 	     which is invalid for x32.  The correct address is %r64
13381 	     - 0x40000300 == 0xf7ffdd64.  To properly encode
13382 	     -0x40000300(%r64) for x32, we zero-extend negative
13383 	     displacement by forcing addr32 prefix which truncates
13384 	     0xfffffffff7ffdd64 to 0xf7ffdd64.  In theory, we should
13385 	     zero-extend all negative displacements, including -1(%rsp).
13386 	     However, for small negative displacements, sign-extension
13387 	     won't cause overflow.  We only zero-extend negative
13388 	     displacements if they < -16*1024*1024, which is also used
13389 	     to check legitimate address displacements for PIC.  */
13390 	  code = 'k';
13391 	}
13392 
13393       /* Since the upper 32 bits of RSP are always zero for x32,
13394 	 we can encode %esp as %rsp to avoid 0x67 prefix if
13395 	 there is no index register.  */
13396       if (TARGET_X32 && Pmode == SImode
13397 	  && !index && base && REG_P (base) && REGNO (base) == SP_REG)
13398 	code = 'q';
13399 
13400       if (ASSEMBLER_DIALECT == ASM_ATT)
13401 	{
13402 	  if (disp)
13403 	    {
13404 	      if (flag_pic)
13405 		output_pic_addr_const (file, disp, 0);
13406 	      else if (GET_CODE (disp) == LABEL_REF)
13407 		output_asm_label (disp);
13408 	      else
13409 		output_addr_const (file, disp);
13410 	    }
13411 
13412 	  putc ('(', file);
13413 	  if (base)
13414 	    print_reg (base, code, file);
13415 	  if (index)
13416 	    {
13417 	      putc (',', file);
13418 	      print_reg (index, vsib ? 0 : code, file);
13419 	      if (scale != 1 || vsib)
13420 		fprintf (file, ",%d", scale);
13421 	    }
13422 	  putc (')', file);
13423 	}
13424       else
13425 	{
13426 	  rtx offset = NULL_RTX;
13427 
13428 	  if (disp)
13429 	    {
13430 	      /* Pull out the offset of a symbol; print any symbol itself.  */
13431 	      if (GET_CODE (disp) == CONST
13432 		  && GET_CODE (XEXP (disp, 0)) == PLUS
13433 		  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13434 		{
13435 		  offset = XEXP (XEXP (disp, 0), 1);
13436 		  disp = gen_rtx_CONST (VOIDmode,
13437 					XEXP (XEXP (disp, 0), 0));
13438 		}
13439 
13440 	      if (flag_pic)
13441 		output_pic_addr_const (file, disp, 0);
13442 	      else if (GET_CODE (disp) == LABEL_REF)
13443 		output_asm_label (disp);
13444 	      else if (CONST_INT_P (disp))
13445 		offset = disp;
13446 	      else
13447 		output_addr_const (file, disp);
13448 	    }
13449 
13450 	  putc ('[', file);
13451 	  if (base)
13452 	    {
13453 	      print_reg (base, code, file);
13454 	      if (offset)
13455 		{
13456 		  if (INTVAL (offset) >= 0)
13457 		    putc ('+', file);
13458 		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13459 		}
13460 	    }
13461 	  else if (offset)
13462 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13463 	  else
13464 	    putc ('0', file);
13465 
13466 	  if (index)
13467 	    {
13468 	      putc ('+', file);
13469 	      print_reg (index, vsib ? 0 : code, file);
13470 	      if (scale != 1 || vsib)
13471 		fprintf (file, "*%d", scale);
13472 	    }
13473 	  putc (']', file);
13474 	}
13475     }
13476 }
13477 
13478 static void
ix86_print_operand_address(FILE * file,machine_mode,rtx addr)13479 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
13480 {
13481   if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13482     output_operand_lossage ("invalid constraints for operand");
13483   else
13484     ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
13485 }
13486 
13487 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
13488 
13489 static bool
i386_asm_output_addr_const_extra(FILE * file,rtx x)13490 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13491 {
13492   rtx op;
13493 
13494   if (GET_CODE (x) != UNSPEC)
13495     return false;
13496 
13497   op = XVECEXP (x, 0, 0);
13498   switch (XINT (x, 1))
13499     {
13500     case UNSPEC_GOTOFF:
13501       output_addr_const (file, op);
13502       fputs ("@gotoff", file);
13503       break;
13504     case UNSPEC_GOTTPOFF:
13505       output_addr_const (file, op);
13506       /* FIXME: This might be @TPOFF in Sun ld.  */
13507       fputs ("@gottpoff", file);
13508       break;
13509     case UNSPEC_TPOFF:
13510       output_addr_const (file, op);
13511       fputs ("@tpoff", file);
13512       break;
13513     case UNSPEC_NTPOFF:
13514       output_addr_const (file, op);
13515       if (TARGET_64BIT)
13516 	fputs ("@tpoff", file);
13517       else
13518 	fputs ("@ntpoff", file);
13519       break;
13520     case UNSPEC_DTPOFF:
13521       output_addr_const (file, op);
13522       fputs ("@dtpoff", file);
13523       break;
13524     case UNSPEC_GOTNTPOFF:
13525       output_addr_const (file, op);
13526       if (TARGET_64BIT)
13527 	fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13528 	       "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13529       else
13530 	fputs ("@gotntpoff", file);
13531       break;
13532     case UNSPEC_INDNTPOFF:
13533       output_addr_const (file, op);
13534       fputs ("@indntpoff", file);
13535       break;
13536 #if TARGET_MACHO
13537     case UNSPEC_MACHOPIC_OFFSET:
13538       output_addr_const (file, op);
13539       putc ('-', file);
13540       machopic_output_function_base_name (file);
13541       break;
13542 #endif
13543 
13544     default:
13545       return false;
13546     }
13547 
13548   return true;
13549 }
13550 
13551 
13552 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13553    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
13554    is the expression of the binary operation.  The output may either be
13555    emitted here, or returned to the caller, like all output_* functions.
13556 
13557    There is no guarantee that the operands are the same mode, as they
13558    might be within FLOAT or FLOAT_EXTEND expressions.  */
13559 
13560 #ifndef SYSV386_COMPAT
13561 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
13562    wants to fix the assemblers because that causes incompatibility
13563    with gcc.  No-one wants to fix gcc because that causes
13564    incompatibility with assemblers...  You can use the option of
13565    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
13566 #define SYSV386_COMPAT 1
13567 #endif
13568 
13569 const char *
output_387_binary_op(rtx_insn * insn,rtx * operands)13570 output_387_binary_op (rtx_insn *insn, rtx *operands)
13571 {
13572   static char buf[40];
13573   const char *p;
13574   bool is_sse
13575     = (SSE_REG_P (operands[0])
13576        || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
13577 
13578   if (is_sse)
13579     p = "%v";
13580   else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13581 	   || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13582     p = "fi";
13583   else
13584     p = "f";
13585 
13586   strcpy (buf, p);
13587 
13588   switch (GET_CODE (operands[3]))
13589     {
13590     case PLUS:
13591       p = "add"; break;
13592     case MINUS:
13593       p = "sub"; break;
13594     case MULT:
13595       p = "mul"; break;
13596     case DIV:
13597       p = "div"; break;
13598     default:
13599       gcc_unreachable ();
13600     }
13601 
13602   strcat (buf, p);
13603 
13604   if (is_sse)
13605    {
13606      p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
13607      strcat (buf, p);
13608 
13609      if (TARGET_AVX)
13610        p = "\t{%2, %1, %0|%0, %1, %2}";
13611      else
13612        p = "\t{%2, %0|%0, %2}";
13613 
13614      strcat (buf, p);
13615      return buf;
13616    }
13617 
13618   /* Even if we do not want to check the inputs, this documents input
13619      constraints.  Which helps in understanding the following code.  */
13620   if (flag_checking)
13621     {
13622       if (STACK_REG_P (operands[0])
13623 	  && ((REG_P (operands[1])
13624 	       && REGNO (operands[0]) == REGNO (operands[1])
13625 	       && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13626 	      || (REG_P (operands[2])
13627 		  && REGNO (operands[0]) == REGNO (operands[2])
13628 		  && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13629 	  && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13630 	; /* ok */
13631       else
13632 	gcc_unreachable ();
13633     }
13634 
13635   switch (GET_CODE (operands[3]))
13636     {
13637     case MULT:
13638     case PLUS:
13639       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13640 	std::swap (operands[1], operands[2]);
13641 
13642       /* know operands[0] == operands[1].  */
13643 
13644       if (MEM_P (operands[2]))
13645 	{
13646 	  p = "%Z2\t%2";
13647 	  break;
13648 	}
13649 
13650       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13651 	{
13652 	  if (STACK_TOP_P (operands[0]))
13653 	    /* How is it that we are storing to a dead operand[2]?
13654 	       Well, presumably operands[1] is dead too.  We can't
13655 	       store the result to st(0) as st(0) gets popped on this
13656 	       instruction.  Instead store to operands[2] (which I
13657 	       think has to be st(1)).  st(1) will be popped later.
13658 	       gcc <= 2.8.1 didn't have this check and generated
13659 	       assembly code that the Unixware assembler rejected.  */
13660 	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
13661 	  else
13662 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
13663 	  break;
13664 	}
13665 
13666       if (STACK_TOP_P (operands[0]))
13667 	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
13668       else
13669 	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
13670       break;
13671 
13672     case MINUS:
13673     case DIV:
13674       if (MEM_P (operands[1]))
13675 	{
13676 	  p = "r%Z1\t%1";
13677 	  break;
13678 	}
13679 
13680       if (MEM_P (operands[2]))
13681 	{
13682 	  p = "%Z2\t%2";
13683 	  break;
13684 	}
13685 
13686       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13687 	{
13688 #if SYSV386_COMPAT
13689 	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13690 	     derived assemblers, confusingly reverse the direction of
13691 	     the operation for fsub{r} and fdiv{r} when the
13692 	     destination register is not st(0).  The Intel assembler
13693 	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
13694 	     figure out what the hardware really does.  */
13695 	  if (STACK_TOP_P (operands[0]))
13696 	    p = "{p\t%0, %2|rp\t%2, %0}";
13697 	  else
13698 	    p = "{rp\t%2, %0|p\t%0, %2}";
13699 #else
13700 	  if (STACK_TOP_P (operands[0]))
13701 	    /* As above for fmul/fadd, we can't store to st(0).  */
13702 	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
13703 	  else
13704 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
13705 #endif
13706 	  break;
13707 	}
13708 
13709       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13710 	{
13711 #if SYSV386_COMPAT
13712 	  if (STACK_TOP_P (operands[0]))
13713 	    p = "{rp\t%0, %1|p\t%1, %0}";
13714 	  else
13715 	    p = "{p\t%1, %0|rp\t%0, %1}";
13716 #else
13717 	  if (STACK_TOP_P (operands[0]))
13718 	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
13719 	  else
13720 	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
13721 #endif
13722 	  break;
13723 	}
13724 
13725       if (STACK_TOP_P (operands[0]))
13726 	{
13727 	  if (STACK_TOP_P (operands[1]))
13728 	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
13729 	  else
13730 	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
13731 	  break;
13732 	}
13733       else if (STACK_TOP_P (operands[1]))
13734 	{
13735 #if SYSV386_COMPAT
13736 	  p = "{\t%1, %0|r\t%0, %1}";
13737 #else
13738 	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
13739 #endif
13740 	}
13741       else
13742 	{
13743 #if SYSV386_COMPAT
13744 	  p = "{r\t%2, %0|\t%0, %2}";
13745 #else
13746 	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
13747 #endif
13748 	}
13749       break;
13750 
13751     default:
13752       gcc_unreachable ();
13753     }
13754 
13755   strcat (buf, p);
13756   return buf;
13757 }
13758 
13759 /* Return needed mode for entity in optimize_mode_switching pass.  */
13760 
13761 static int
ix86_dirflag_mode_needed(rtx_insn * insn)13762 ix86_dirflag_mode_needed (rtx_insn *insn)
13763 {
13764   if (CALL_P (insn))
13765     {
13766       if (cfun->machine->func_type == TYPE_NORMAL)
13767 	return X86_DIRFLAG_ANY;
13768       else
13769 	/* No need to emit CLD in interrupt handler for TARGET_CLD.  */
13770 	return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
13771     }
13772 
13773   if (recog_memoized (insn) < 0)
13774     return X86_DIRFLAG_ANY;
13775 
13776   if (get_attr_type (insn) == TYPE_STR)
13777     {
13778       /* Emit cld instruction if stringops are used in the function.  */
13779       if (cfun->machine->func_type == TYPE_NORMAL)
13780 	return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
13781       else
13782 	return X86_DIRFLAG_RESET;
13783     }
13784 
13785   return X86_DIRFLAG_ANY;
13786 }
13787 
13788 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP.   */
13789 
13790 static bool
ix86_check_avx_upper_register(const_rtx exp)13791 ix86_check_avx_upper_register (const_rtx exp)
13792 {
13793   return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128;
13794 }
13795 
13796 /* Return needed mode for entity in optimize_mode_switching pass.  */
13797 
13798 static int
ix86_avx_u128_mode_needed(rtx_insn * insn)13799 ix86_avx_u128_mode_needed (rtx_insn *insn)
13800 {
13801   if (CALL_P (insn))
13802     {
13803       rtx link;
13804 
13805       /* Needed mode is set to AVX_U128_CLEAN if there are
13806 	 no 256bit or 512bit modes used in function arguments. */
13807       for (link = CALL_INSN_FUNCTION_USAGE (insn);
13808 	   link;
13809 	   link = XEXP (link, 1))
13810 	{
13811 	  if (GET_CODE (XEXP (link, 0)) == USE)
13812 	    {
13813 	      rtx arg = XEXP (XEXP (link, 0), 0);
13814 
13815 	      if (ix86_check_avx_upper_register (arg))
13816 		return AVX_U128_DIRTY;
13817 	    }
13818 	}
13819 
13820       /* If the function is known to preserve some SSE registers,
13821 	 RA and previous passes can legitimately rely on that for
13822 	 modes wider than 256 bits.  It's only safe to issue a
13823 	 vzeroupper if all SSE registers are clobbered.  */
13824       const function_abi &abi = insn_callee_abi (insn);
13825       if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
13826 				  abi.mode_clobbers (V4DImode)))
13827 	return AVX_U128_ANY;
13828 
13829       return AVX_U128_CLEAN;
13830     }
13831 
13832   /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
13833      Hardware changes state only when a 256bit register is written to,
13834      but we need to prevent the compiler from moving optimal insertion
13835      point above eventual read from 256bit or 512 bit register.  */
13836   subrtx_iterator::array_type array;
13837   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13838     if (ix86_check_avx_upper_register (*iter))
13839       return AVX_U128_DIRTY;
13840 
13841   return AVX_U128_ANY;
13842 }
13843 
13844 /* Return mode that i387 must be switched into
13845    prior to the execution of insn.  */
13846 
13847 static int
ix86_i387_mode_needed(int entity,rtx_insn * insn)13848 ix86_i387_mode_needed (int entity, rtx_insn *insn)
13849 {
13850   enum attr_i387_cw mode;
13851 
13852   /* The mode UNINITIALIZED is used to store control word after a
13853      function call or ASM pattern.  The mode ANY specify that function
13854      has no requirements on the control word and make no changes in the
13855      bits we are interested in.  */
13856 
13857   if (CALL_P (insn)
13858       || (NONJUMP_INSN_P (insn)
13859 	  && (asm_noperands (PATTERN (insn)) >= 0
13860 	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13861     return I387_CW_UNINITIALIZED;
13862 
13863   if (recog_memoized (insn) < 0)
13864     return I387_CW_ANY;
13865 
13866   mode = get_attr_i387_cw (insn);
13867 
13868   switch (entity)
13869     {
13870     case I387_ROUNDEVEN:
13871       if (mode == I387_CW_ROUNDEVEN)
13872 	return mode;
13873       break;
13874 
13875     case I387_TRUNC:
13876       if (mode == I387_CW_TRUNC)
13877 	return mode;
13878       break;
13879 
13880     case I387_FLOOR:
13881       if (mode == I387_CW_FLOOR)
13882 	return mode;
13883       break;
13884 
13885     case I387_CEIL:
13886       if (mode == I387_CW_CEIL)
13887 	return mode;
13888       break;
13889 
13890     default:
13891       gcc_unreachable ();
13892     }
13893 
13894   return I387_CW_ANY;
13895 }
13896 
13897 /* Return mode that entity must be switched into
13898    prior to the execution of insn.  */
13899 
13900 static int
ix86_mode_needed(int entity,rtx_insn * insn)13901 ix86_mode_needed (int entity, rtx_insn *insn)
13902 {
13903   switch (entity)
13904     {
13905     case X86_DIRFLAG:
13906       return ix86_dirflag_mode_needed (insn);
13907     case AVX_U128:
13908       return ix86_avx_u128_mode_needed (insn);
13909     case I387_ROUNDEVEN:
13910     case I387_TRUNC:
13911     case I387_FLOOR:
13912     case I387_CEIL:
13913       return ix86_i387_mode_needed (entity, insn);
13914     default:
13915       gcc_unreachable ();
13916     }
13917   return 0;
13918 }
13919 
13920 /* Check if a 256bit or 512bit AVX register is referenced in stores.   */
13921 
13922 static void
ix86_check_avx_upper_stores(rtx dest,const_rtx,void * data)13923 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
13924  {
13925    if (ix86_check_avx_upper_register (dest))
13926     {
13927       bool *used = (bool *) data;
13928       *used = true;
13929     }
13930  }
13931 
13932 /* Calculate mode of upper 128bit AVX registers after the insn.  */
13933 
13934 static int
ix86_avx_u128_mode_after(int mode,rtx_insn * insn)13935 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
13936 {
13937   rtx pat = PATTERN (insn);
13938 
13939   if (vzeroupper_pattern (pat, VOIDmode)
13940       || vzeroall_pattern (pat, VOIDmode))
13941     return AVX_U128_CLEAN;
13942 
13943   /* We know that state is clean after CALL insn if there are no
13944      256bit or 512bit registers used in the function return register. */
13945   if (CALL_P (insn))
13946     {
13947       bool avx_upper_reg_found = false;
13948       note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
13949 
13950       return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
13951     }
13952 
13953   /* Otherwise, return current mode.  Remember that if insn
13954      references AVX 256bit or 512bit registers, the mode was already
13955      changed to DIRTY from MODE_NEEDED.  */
13956   return mode;
13957 }
13958 
13959 /* Return the mode that an insn results in.  */
13960 
13961 static int
ix86_mode_after(int entity,int mode,rtx_insn * insn)13962 ix86_mode_after (int entity, int mode, rtx_insn *insn)
13963 {
13964   switch (entity)
13965     {
13966     case X86_DIRFLAG:
13967       return mode;
13968     case AVX_U128:
13969       return ix86_avx_u128_mode_after (mode, insn);
13970     case I387_ROUNDEVEN:
13971     case I387_TRUNC:
13972     case I387_FLOOR:
13973     case I387_CEIL:
13974       return mode;
13975     default:
13976       gcc_unreachable ();
13977     }
13978 }
13979 
13980 static int
ix86_dirflag_mode_entry(void)13981 ix86_dirflag_mode_entry (void)
13982 {
13983   /* For TARGET_CLD or in the interrupt handler we can't assume
13984      direction flag state at function entry.  */
13985   if (TARGET_CLD
13986       || cfun->machine->func_type != TYPE_NORMAL)
13987     return X86_DIRFLAG_ANY;
13988 
13989   return X86_DIRFLAG_RESET;
13990 }
13991 
13992 static int
ix86_avx_u128_mode_entry(void)13993 ix86_avx_u128_mode_entry (void)
13994 {
13995   tree arg;
13996 
13997   /* Entry mode is set to AVX_U128_DIRTY if there are
13998      256bit or 512bit modes used in function arguments.  */
13999   for (arg = DECL_ARGUMENTS (current_function_decl); arg;
14000        arg = TREE_CHAIN (arg))
14001     {
14002       rtx incoming = DECL_INCOMING_RTL (arg);
14003 
14004       if (incoming && ix86_check_avx_upper_register (incoming))
14005 	return AVX_U128_DIRTY;
14006     }
14007 
14008   return AVX_U128_CLEAN;
14009 }
14010 
14011 /* Return a mode that ENTITY is assumed to be
14012    switched to at function entry.  */
14013 
14014 static int
ix86_mode_entry(int entity)14015 ix86_mode_entry (int entity)
14016 {
14017   switch (entity)
14018     {
14019     case X86_DIRFLAG:
14020       return ix86_dirflag_mode_entry ();
14021     case AVX_U128:
14022       return ix86_avx_u128_mode_entry ();
14023     case I387_ROUNDEVEN:
14024     case I387_TRUNC:
14025     case I387_FLOOR:
14026     case I387_CEIL:
14027       return I387_CW_ANY;
14028     default:
14029       gcc_unreachable ();
14030     }
14031 }
14032 
14033 static int
ix86_avx_u128_mode_exit(void)14034 ix86_avx_u128_mode_exit (void)
14035 {
14036   rtx reg = crtl->return_rtx;
14037 
14038   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14039      or 512 bit modes used in the function return register. */
14040   if (reg && ix86_check_avx_upper_register (reg))
14041     return AVX_U128_DIRTY;
14042 
14043   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14044      modes used in function arguments, otherwise return AVX_U128_CLEAN.
14045    */
14046   return ix86_avx_u128_mode_entry ();
14047 }
14048 
14049 /* Return a mode that ENTITY is assumed to be
14050    switched to at function exit.  */
14051 
14052 static int
ix86_mode_exit(int entity)14053 ix86_mode_exit (int entity)
14054 {
14055   switch (entity)
14056     {
14057     case X86_DIRFLAG:
14058       return X86_DIRFLAG_ANY;
14059     case AVX_U128:
14060       return ix86_avx_u128_mode_exit ();
14061     case I387_ROUNDEVEN:
14062     case I387_TRUNC:
14063     case I387_FLOOR:
14064     case I387_CEIL:
14065       return I387_CW_ANY;
14066     default:
14067       gcc_unreachable ();
14068     }
14069 }
14070 
14071 static int
ix86_mode_priority(int,int n)14072 ix86_mode_priority (int, int n)
14073 {
14074   return n;
14075 }
14076 
14077 /* Output code to initialize control word copies used by trunc?f?i and
14078    rounding patterns.  CURRENT_MODE is set to current control word,
14079    while NEW_MODE is set to new control word.  */
14080 
14081 static void
emit_i387_cw_initialization(int mode)14082 emit_i387_cw_initialization (int mode)
14083 {
14084   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14085   rtx new_mode;
14086 
14087   enum ix86_stack_slot slot;
14088 
14089   rtx reg = gen_reg_rtx (HImode);
14090 
14091   emit_insn (gen_x86_fnstcw_1 (stored_mode));
14092   emit_move_insn (reg, copy_rtx (stored_mode));
14093 
14094   switch (mode)
14095     {
14096     case I387_CW_ROUNDEVEN:
14097       /* round to nearest */
14098       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14099       slot = SLOT_CW_ROUNDEVEN;
14100       break;
14101 
14102     case I387_CW_TRUNC:
14103       /* round toward zero (truncate) */
14104       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14105       slot = SLOT_CW_TRUNC;
14106       break;
14107 
14108     case I387_CW_FLOOR:
14109       /* round down toward -oo */
14110       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14111       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14112       slot = SLOT_CW_FLOOR;
14113       break;
14114 
14115     case I387_CW_CEIL:
14116       /* round up toward +oo */
14117       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14118       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14119       slot = SLOT_CW_CEIL;
14120       break;
14121 
14122     default:
14123       gcc_unreachable ();
14124     }
14125 
14126   gcc_assert (slot < MAX_386_STACK_LOCALS);
14127 
14128   new_mode = assign_386_stack_local (HImode, slot);
14129   emit_move_insn (new_mode, reg);
14130 }
14131 
14132 /* Generate one or more insns to set ENTITY to MODE.  */
14133 
14134 static void
ix86_emit_mode_set(int entity,int mode,int prev_mode ATTRIBUTE_UNUSED,HARD_REG_SET regs_live ATTRIBUTE_UNUSED)14135 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
14136 		    HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14137 {
14138   switch (entity)
14139     {
14140     case X86_DIRFLAG:
14141       if (mode == X86_DIRFLAG_RESET)
14142 	emit_insn (gen_cld ());
14143       break;
14144     case AVX_U128:
14145       if (mode == AVX_U128_CLEAN)
14146 	emit_insn (gen_avx_vzeroupper ());
14147       break;
14148     case I387_ROUNDEVEN:
14149     case I387_TRUNC:
14150     case I387_FLOOR:
14151     case I387_CEIL:
14152       if (mode != I387_CW_ANY
14153 	  && mode != I387_CW_UNINITIALIZED)
14154 	emit_i387_cw_initialization (mode);
14155       break;
14156     default:
14157       gcc_unreachable ();
14158     }
14159 }
14160 
14161 /* Output code for INSN to convert a float to a signed int.  OPERANDS
14162    are the insn operands.  The output may be [HSD]Imode and the input
14163    operand may be [SDX]Fmode.  */
14164 
14165 const char *
output_fix_trunc(rtx_insn * insn,rtx * operands,bool fisttp)14166 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
14167 {
14168   bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14169   bool dimode_p = GET_MODE (operands[0]) == DImode;
14170   int round_mode = get_attr_i387_cw (insn);
14171 
14172   static char buf[40];
14173   const char *p;
14174 
14175   /* Jump through a hoop or two for DImode, since the hardware has no
14176      non-popping instruction.  We used to do this a different way, but
14177      that was somewhat fragile and broke with post-reload splitters.  */
14178   if ((dimode_p || fisttp) && !stack_top_dies)
14179     output_asm_insn ("fld\t%y1", operands);
14180 
14181   gcc_assert (STACK_TOP_P (operands[1]));
14182   gcc_assert (MEM_P (operands[0]));
14183   gcc_assert (GET_MODE (operands[1]) != TFmode);
14184 
14185   if (fisttp)
14186     return "fisttp%Z0\t%0";
14187 
14188   strcpy (buf, "fist");
14189 
14190   if (round_mode != I387_CW_ANY)
14191     output_asm_insn ("fldcw\t%3", operands);
14192 
14193   p = "p%Z0\t%0";
14194   strcat (buf, p + !(stack_top_dies || dimode_p));
14195 
14196   output_asm_insn (buf, operands);
14197 
14198   if (round_mode != I387_CW_ANY)
14199     output_asm_insn ("fldcw\t%2", operands);
14200 
14201   return "";
14202 }
14203 
14204 /* Output code for x87 ffreep insn.  The OPNO argument, which may only
14205    have the values zero or one, indicates the ffreep insn's operand
14206    from the OPERANDS array.  */
14207 
14208 static const char *
output_387_ffreep(rtx * operands ATTRIBUTE_UNUSED,int opno)14209 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14210 {
14211   if (TARGET_USE_FFREEP)
14212 #ifdef HAVE_AS_IX86_FFREEP
14213     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14214 #else
14215     {
14216       static char retval[32];
14217       int regno = REGNO (operands[opno]);
14218 
14219       gcc_assert (STACK_REGNO_P (regno));
14220 
14221       regno -= FIRST_STACK_REG;
14222 
14223       snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14224       return retval;
14225     }
14226 #endif
14227 
14228   return opno ? "fstp\t%y1" : "fstp\t%y0";
14229 }
14230 
14231 
14232 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
14233    should be used.  UNORDERED_P is true when fucom should be used.  */
14234 
14235 const char *
output_fp_compare(rtx_insn * insn,rtx * operands,bool eflags_p,bool unordered_p)14236 output_fp_compare (rtx_insn *insn, rtx *operands,
14237 		   bool eflags_p, bool unordered_p)
14238 {
14239   rtx *xops = eflags_p ? &operands[0] : &operands[1];
14240   bool stack_top_dies;
14241 
14242   static char buf[40];
14243   const char *p;
14244 
14245   gcc_assert (STACK_TOP_P (xops[0]));
14246 
14247   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14248 
14249   if (eflags_p)
14250     {
14251       p = unordered_p ? "fucomi" : "fcomi";
14252       strcpy (buf, p);
14253 
14254       p = "p\t{%y1, %0|%0, %y1}";
14255       strcat (buf, p + !stack_top_dies);
14256 
14257       return buf;
14258     }
14259 
14260   if (STACK_REG_P (xops[1])
14261       && stack_top_dies
14262       && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
14263     {
14264       gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
14265 
14266       /* If both the top of the 387 stack die, and the other operand
14267 	 is also a stack register that dies, then this must be a
14268 	 `fcompp' float compare.  */
14269       p = unordered_p ? "fucompp" : "fcompp";
14270       strcpy (buf, p);
14271     }
14272   else if (const0_operand (xops[1], VOIDmode))
14273     {
14274       gcc_assert (!unordered_p);
14275       strcpy (buf, "ftst");
14276     }
14277   else
14278     {
14279       if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
14280 	{
14281 	  gcc_assert (!unordered_p);
14282 	  p = "ficom";
14283 	}
14284       else
14285 	p = unordered_p ? "fucom" : "fcom";
14286 
14287       strcpy (buf, p);
14288 
14289       p = "p%Z2\t%y2";
14290       strcat (buf, p + !stack_top_dies);
14291     }
14292 
14293   output_asm_insn (buf, operands);
14294   return "fnstsw\t%0";
14295 }
14296 
14297 void
ix86_output_addr_vec_elt(FILE * file,int value)14298 ix86_output_addr_vec_elt (FILE *file, int value)
14299 {
14300   const char *directive = ASM_LONG;
14301 
14302 #ifdef ASM_QUAD
14303   if (TARGET_LP64)
14304     directive = ASM_QUAD;
14305 #else
14306   gcc_assert (!TARGET_64BIT);
14307 #endif
14308 
14309   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14310 }
14311 
14312 void
ix86_output_addr_diff_elt(FILE * file,int value,int rel)14313 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14314 {
14315   const char *directive = ASM_LONG;
14316 
14317 #ifdef ASM_QUAD
14318   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14319     directive = ASM_QUAD;
14320 #else
14321   gcc_assert (!TARGET_64BIT);
14322 #endif
14323   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
14324   if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14325     fprintf (file, "%s%s%d-%s%d\n",
14326 	     directive, LPREFIX, value, LPREFIX, rel);
14327 #if TARGET_MACHO
14328   else if (TARGET_MACHO)
14329     {
14330       fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14331       machopic_output_function_base_name (file);
14332       putc ('\n', file);
14333     }
14334 #endif
14335   else if (HAVE_AS_GOTOFF_IN_DATA)
14336     fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14337   else
14338     asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14339 		 GOT_SYMBOL_NAME, LPREFIX, value);
14340 }
14341 
14342 #define LEA_MAX_STALL (3)
14343 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14344 
14345 /* Increase given DISTANCE in half-cycles according to
14346    dependencies between PREV and NEXT instructions.
14347    Add 1 half-cycle if there is no dependency and
14348    go to next cycle if there is some dependecy.  */
14349 
14350 static unsigned int
increase_distance(rtx_insn * prev,rtx_insn * next,unsigned int distance)14351 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
14352 {
14353   df_ref def, use;
14354 
14355   if (!prev || !next)
14356     return distance + (distance & 1) + 2;
14357 
14358   if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
14359     return distance + 1;
14360 
14361   FOR_EACH_INSN_USE (use, next)
14362     FOR_EACH_INSN_DEF (def, prev)
14363       if (!DF_REF_IS_ARTIFICIAL (def)
14364 	  && DF_REF_REGNO (use) == DF_REF_REGNO (def))
14365 	return distance + (distance & 1) + 2;
14366 
14367   return distance + 1;
14368 }
14369 
14370 /* Function checks if instruction INSN defines register number
14371    REGNO1 or REGNO2.  */
14372 
14373 bool
insn_defines_reg(unsigned int regno1,unsigned int regno2,rtx_insn * insn)14374 insn_defines_reg (unsigned int regno1, unsigned int regno2,
14375 		  rtx_insn *insn)
14376 {
14377   df_ref def;
14378 
14379   FOR_EACH_INSN_DEF (def, insn)
14380     if (DF_REF_REG_DEF_P (def)
14381 	&& !DF_REF_IS_ARTIFICIAL (def)
14382 	&& (regno1 == DF_REF_REGNO (def)
14383 	    || regno2 == DF_REF_REGNO (def)))
14384       return true;
14385 
14386   return false;
14387 }
14388 
14389 /* Function checks if instruction INSN uses register number
14390    REGNO as a part of address expression.  */
14391 
14392 static bool
insn_uses_reg_mem(unsigned int regno,rtx insn)14393 insn_uses_reg_mem (unsigned int regno, rtx insn)
14394 {
14395   df_ref use;
14396 
14397   FOR_EACH_INSN_USE (use, insn)
14398     if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
14399       return true;
14400 
14401   return false;
14402 }
14403 
14404 /* Search backward for non-agu definition of register number REGNO1
14405    or register number REGNO2 in basic block starting from instruction
14406    START up to head of basic block or instruction INSN.
14407 
14408    Function puts true value into *FOUND var if definition was found
14409    and false otherwise.
14410 
14411    Distance in half-cycles between START and found instruction or head
14412    of BB is added to DISTANCE and returned.  */
14413 
14414 static int
distance_non_agu_define_in_bb(unsigned int regno1,unsigned int regno2,rtx_insn * insn,int distance,rtx_insn * start,bool * found)14415 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
14416 			       rtx_insn *insn, int distance,
14417 			       rtx_insn *start, bool *found)
14418 {
14419   basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
14420   rtx_insn *prev = start;
14421   rtx_insn *next = NULL;
14422 
14423   *found = false;
14424 
14425   while (prev
14426 	 && prev != insn
14427 	 && distance < LEA_SEARCH_THRESHOLD)
14428     {
14429       if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
14430 	{
14431 	  distance = increase_distance (prev, next, distance);
14432 	  if (insn_defines_reg (regno1, regno2, prev))
14433 	    {
14434 	      if (recog_memoized (prev) < 0
14435 		  || get_attr_type (prev) != TYPE_LEA)
14436 		{
14437 		  *found = true;
14438 		  return distance;
14439 		}
14440 	    }
14441 
14442 	  next = prev;
14443 	}
14444       if (prev == BB_HEAD (bb))
14445 	break;
14446 
14447       prev = PREV_INSN (prev);
14448     }
14449 
14450   return distance;
14451 }
14452 
14453 /* Search backward for non-agu definition of register number REGNO1
14454    or register number REGNO2 in INSN's basic block until
14455    1. Pass LEA_SEARCH_THRESHOLD instructions, or
14456    2. Reach neighbor BBs boundary, or
14457    3. Reach agu definition.
14458    Returns the distance between the non-agu definition point and INSN.
14459    If no definition point, returns -1.  */
14460 
14461 static int
distance_non_agu_define(unsigned int regno1,unsigned int regno2,rtx_insn * insn)14462 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14463 			 rtx_insn *insn)
14464 {
14465   basic_block bb = BLOCK_FOR_INSN (insn);
14466   int distance = 0;
14467   bool found = false;
14468 
14469   if (insn != BB_HEAD (bb))
14470     distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
14471 					      distance, PREV_INSN (insn),
14472 					      &found);
14473 
14474   if (!found && distance < LEA_SEARCH_THRESHOLD)
14475     {
14476       edge e;
14477       edge_iterator ei;
14478       bool simple_loop = false;
14479 
14480       FOR_EACH_EDGE (e, ei, bb->preds)
14481 	if (e->src == bb)
14482 	  {
14483 	    simple_loop = true;
14484 	    break;
14485 	  }
14486 
14487       if (simple_loop)
14488 	distance = distance_non_agu_define_in_bb (regno1, regno2,
14489 						  insn, distance,
14490 						  BB_END (bb), &found);
14491       else
14492 	{
14493 	  int shortest_dist = -1;
14494 	  bool found_in_bb = false;
14495 
14496 	  FOR_EACH_EDGE (e, ei, bb->preds)
14497 	    {
14498 	      int bb_dist
14499 		= distance_non_agu_define_in_bb (regno1, regno2,
14500 						 insn, distance,
14501 						 BB_END (e->src),
14502 						 &found_in_bb);
14503 	      if (found_in_bb)
14504 		{
14505 		  if (shortest_dist < 0)
14506 		    shortest_dist = bb_dist;
14507 		  else if (bb_dist > 0)
14508 		    shortest_dist = MIN (bb_dist, shortest_dist);
14509 
14510 		  found = true;
14511 		}
14512 	    }
14513 
14514 	  distance = shortest_dist;
14515 	}
14516     }
14517 
14518   /* get_attr_type may modify recog data.  We want to make sure
14519      that recog data is valid for instruction INSN, on which
14520      distance_non_agu_define is called.  INSN is unchanged here.  */
14521   extract_insn_cached (insn);
14522 
14523   if (!found)
14524     return -1;
14525 
14526   return distance >> 1;
14527 }
14528 
14529 /* Return the distance in half-cycles between INSN and the next
14530    insn that uses register number REGNO in memory address added
14531    to DISTANCE.  Return -1 if REGNO0 is set.
14532 
14533    Put true value into *FOUND if register usage was found and
14534    false otherwise.
14535    Put true value into *REDEFINED if register redefinition was
14536    found and false otherwise.  */
14537 
14538 static int
distance_agu_use_in_bb(unsigned int regno,rtx_insn * insn,int distance,rtx_insn * start,bool * found,bool * redefined)14539 distance_agu_use_in_bb (unsigned int regno,
14540 			rtx_insn *insn, int distance, rtx_insn *start,
14541 			bool *found, bool *redefined)
14542 {
14543   basic_block bb = NULL;
14544   rtx_insn *next = start;
14545   rtx_insn *prev = NULL;
14546 
14547   *found = false;
14548   *redefined = false;
14549 
14550   if (start != NULL_RTX)
14551     {
14552       bb = BLOCK_FOR_INSN (start);
14553       if (start != BB_HEAD (bb))
14554 	/* If insn and start belong to the same bb, set prev to insn,
14555 	   so the call to increase_distance will increase the distance
14556 	   between insns by 1.  */
14557 	prev = insn;
14558     }
14559 
14560   while (next
14561 	 && next != insn
14562 	 && distance < LEA_SEARCH_THRESHOLD)
14563     {
14564       if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
14565 	{
14566 	  distance = increase_distance(prev, next, distance);
14567 	  if (insn_uses_reg_mem (regno, next))
14568 	    {
14569 	      /* Return DISTANCE if OP0 is used in memory
14570 		 address in NEXT.  */
14571 	      *found = true;
14572 	      return distance;
14573 	    }
14574 
14575 	  if (insn_defines_reg (regno, INVALID_REGNUM, next))
14576 	    {
14577 	      /* Return -1 if OP0 is set in NEXT.  */
14578 	      *redefined = true;
14579 	      return -1;
14580 	    }
14581 
14582 	  prev = next;
14583 	}
14584 
14585       if (next == BB_END (bb))
14586 	break;
14587 
14588       next = NEXT_INSN (next);
14589     }
14590 
14591   return distance;
14592 }
14593 
14594 /* Return the distance between INSN and the next insn that uses
14595    register number REGNO0 in memory address.  Return -1 if no such
14596    a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
14597 
14598 static int
distance_agu_use(unsigned int regno0,rtx_insn * insn)14599 distance_agu_use (unsigned int regno0, rtx_insn *insn)
14600 {
14601   basic_block bb = BLOCK_FOR_INSN (insn);
14602   int distance = 0;
14603   bool found = false;
14604   bool redefined = false;
14605 
14606   if (insn != BB_END (bb))
14607     distance = distance_agu_use_in_bb (regno0, insn, distance,
14608 				       NEXT_INSN (insn),
14609 				       &found, &redefined);
14610 
14611   if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
14612     {
14613       edge e;
14614       edge_iterator ei;
14615       bool simple_loop = false;
14616 
14617       FOR_EACH_EDGE (e, ei, bb->succs)
14618         if (e->dest == bb)
14619 	  {
14620 	    simple_loop = true;
14621 	    break;
14622 	  }
14623 
14624       if (simple_loop)
14625 	distance = distance_agu_use_in_bb (regno0, insn,
14626 					   distance, BB_HEAD (bb),
14627 					   &found, &redefined);
14628       else
14629 	{
14630 	  int shortest_dist = -1;
14631 	  bool found_in_bb = false;
14632 	  bool redefined_in_bb = false;
14633 
14634 	  FOR_EACH_EDGE (e, ei, bb->succs)
14635 	    {
14636 	      int bb_dist
14637 		= distance_agu_use_in_bb (regno0, insn,
14638 					  distance, BB_HEAD (e->dest),
14639 					  &found_in_bb, &redefined_in_bb);
14640 	      if (found_in_bb)
14641 		{
14642 		  if (shortest_dist < 0)
14643 		    shortest_dist = bb_dist;
14644 		  else if (bb_dist > 0)
14645 		    shortest_dist = MIN (bb_dist, shortest_dist);
14646 
14647 		  found = true;
14648 		}
14649 	    }
14650 
14651 	  distance = shortest_dist;
14652 	}
14653     }
14654 
14655   if (!found || redefined)
14656     return -1;
14657 
14658   return distance >> 1;
14659 }
14660 
14661 /* Define this macro to tune LEA priority vs ADD, it take effect when
14662    there is a dilemma of choosing LEA or ADD
14663    Negative value: ADD is more preferred than LEA
14664    Zero: Neutral
14665    Positive value: LEA is more preferred than ADD.  */
14666 #define IX86_LEA_PRIORITY 0
14667 
14668 /* Return true if usage of lea INSN has performance advantage
14669    over a sequence of instructions.  Instructions sequence has
14670    SPLIT_COST cycles higher latency than lea latency.  */
14671 
14672 static bool
ix86_lea_outperforms(rtx_insn * insn,unsigned int regno0,unsigned int regno1,unsigned int regno2,int split_cost,bool has_scale)14673 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
14674 		      unsigned int regno2, int split_cost, bool has_scale)
14675 {
14676   int dist_define, dist_use;
14677 
14678   /* For Atom processors newer than Bonnell, if using a 2-source or
14679      3-source LEA for non-destructive destination purposes, or due to
14680      wanting ability to use SCALE, the use of LEA is justified.  */
14681   if (!TARGET_BONNELL)
14682     {
14683       if (has_scale)
14684 	return true;
14685       if (split_cost < 1)
14686 	return false;
14687       if (regno0 == regno1 || regno0 == regno2)
14688 	return false;
14689       return true;
14690     }
14691 
14692   rtx_insn *rinsn = recog_data.insn;
14693 
14694   dist_define = distance_non_agu_define (regno1, regno2, insn);
14695   dist_use = distance_agu_use (regno0, insn);
14696 
14697   /* distance_non_agu_define can call extract_insn_cached.  If this function
14698      is called from define_split conditions, that can break insn splitting,
14699      because split_insns works by clearing recog_data.insn and then modifying
14700      recog_data.operand array and match the various split conditions.  */
14701   if (recog_data.insn != rinsn)
14702     recog_data.insn = NULL;
14703 
14704   if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
14705     {
14706       /* If there is no non AGU operand definition, no AGU
14707 	 operand usage and split cost is 0 then both lea
14708 	 and non lea variants have same priority.  Currently
14709 	 we prefer lea for 64 bit code and non lea on 32 bit
14710 	 code.  */
14711       if (dist_use < 0 && split_cost == 0)
14712 	return TARGET_64BIT || IX86_LEA_PRIORITY;
14713       else
14714 	return true;
14715     }
14716 
14717   /* With longer definitions distance lea is more preferable.
14718      Here we change it to take into account splitting cost and
14719      lea priority.  */
14720   dist_define += split_cost + IX86_LEA_PRIORITY;
14721 
14722   /* If there is no use in memory addess then we just check
14723      that split cost exceeds AGU stall.  */
14724   if (dist_use < 0)
14725     return dist_define > LEA_MAX_STALL;
14726 
14727   /* If this insn has both backward non-agu dependence and forward
14728      agu dependence, the one with short distance takes effect.  */
14729   return dist_define >= dist_use;
14730 }
14731 
14732 /* Return true if it is legal to clobber flags by INSN and
14733    false otherwise.  */
14734 
14735 static bool
ix86_ok_to_clobber_flags(rtx_insn * insn)14736 ix86_ok_to_clobber_flags (rtx_insn *insn)
14737 {
14738   basic_block bb = BLOCK_FOR_INSN (insn);
14739   df_ref use;
14740   bitmap live;
14741 
14742   while (insn)
14743     {
14744       if (NONDEBUG_INSN_P (insn))
14745 	{
14746 	  FOR_EACH_INSN_USE (use, insn)
14747 	    if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
14748 	      return false;
14749 
14750 	  if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
14751 	    return true;
14752 	}
14753 
14754       if (insn == BB_END (bb))
14755 	break;
14756 
14757       insn = NEXT_INSN (insn);
14758     }
14759 
14760   live = df_get_live_out(bb);
14761   return !REGNO_REG_SET_P (live, FLAGS_REG);
14762 }
14763 
14764 /* Return true if we need to split op0 = op1 + op2 into a sequence of
14765    move and add to avoid AGU stalls.  */
14766 
14767 bool
ix86_avoid_lea_for_add(rtx_insn * insn,rtx operands[])14768 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
14769 {
14770   unsigned int regno0, regno1, regno2;
14771 
14772   /* Check if we need to optimize.  */
14773   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14774     return false;
14775 
14776   /* Check it is correct to split here.  */
14777   if (!ix86_ok_to_clobber_flags(insn))
14778     return false;
14779 
14780   regno0 = true_regnum (operands[0]);
14781   regno1 = true_regnum (operands[1]);
14782   regno2 = true_regnum (operands[2]);
14783 
14784   /* We need to split only adds with non destructive
14785      destination operand.  */
14786   if (regno0 == regno1 || regno0 == regno2)
14787     return false;
14788   else
14789     return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
14790 }
14791 
14792 /* Return true if we should emit lea instruction instead of mov
14793    instruction.  */
14794 
14795 bool
ix86_use_lea_for_mov(rtx_insn * insn,rtx operands[])14796 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
14797 {
14798   unsigned int regno0, regno1;
14799 
14800   /* Check if we need to optimize.  */
14801   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14802     return false;
14803 
14804   /* Use lea for reg to reg moves only.  */
14805   if (!REG_P (operands[0]) || !REG_P (operands[1]))
14806     return false;
14807 
14808   regno0 = true_regnum (operands[0]);
14809   regno1 = true_regnum (operands[1]);
14810 
14811   return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
14812 }
14813 
14814 /* Return true if we need to split lea into a sequence of
14815    instructions to avoid AGU stalls. */
14816 
14817 bool
ix86_avoid_lea_for_addr(rtx_insn * insn,rtx operands[])14818 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
14819 {
14820   unsigned int regno0, regno1, regno2;
14821   int split_cost;
14822   struct ix86_address parts;
14823   int ok;
14824 
14825   /* The "at least two components" test below might not catch simple
14826      move or zero extension insns if parts.base is non-NULL and parts.disp
14827      is const0_rtx as the only components in the address, e.g. if the
14828      register is %rbp or %r13.  As this test is much cheaper and moves or
14829      zero extensions are the common case, do this check first.  */
14830   if (REG_P (operands[1])
14831       || (SImode_address_operand (operands[1], VOIDmode)
14832 	  && REG_P (XEXP (operands[1], 0))))
14833     return false;
14834 
14835   /* Check if it is OK to split here.  */
14836   if (!ix86_ok_to_clobber_flags (insn))
14837     return false;
14838 
14839   ok = ix86_decompose_address (operands[1], &parts);
14840   gcc_assert (ok);
14841 
14842   /* There should be at least two components in the address.  */
14843   if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
14844       + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
14845     return false;
14846 
14847   /* We should not split into add if non legitimate pic
14848      operand is used as displacement. */
14849   if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
14850     return false;
14851 
14852   regno0 = true_regnum (operands[0]) ;
14853   regno1 = INVALID_REGNUM;
14854   regno2 = INVALID_REGNUM;
14855 
14856   if (parts.base)
14857     regno1 = true_regnum (parts.base);
14858   if (parts.index)
14859     regno2 = true_regnum (parts.index);
14860 
14861   /* Use add for a = a + b and a = b + a since it is faster and shorter
14862      than lea for most processors.  For the processors like BONNELL, if
14863      the destination register of LEA holds an actual address which will
14864      be used soon, LEA is better and otherwise ADD is better.  */
14865   if (!TARGET_BONNELL
14866       && parts.scale == 1
14867       && (!parts.disp || parts.disp == const0_rtx)
14868       && (regno0 == regno1 || regno0 == regno2))
14869     return true;
14870 
14871   /* Check we need to optimize.  */
14872   if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
14873     return false;
14874 
14875   split_cost = 0;
14876 
14877   /* Compute how many cycles we will add to execution time
14878      if split lea into a sequence of instructions.  */
14879   if (parts.base || parts.index)
14880     {
14881       /* Have to use mov instruction if non desctructive
14882 	 destination form is used.  */
14883       if (regno1 != regno0 && regno2 != regno0)
14884 	split_cost += 1;
14885 
14886       /* Have to add index to base if both exist.  */
14887       if (parts.base && parts.index)
14888 	split_cost += 1;
14889 
14890       /* Have to use shift and adds if scale is 2 or greater.  */
14891       if (parts.scale > 1)
14892 	{
14893 	  if (regno0 != regno1)
14894 	    split_cost += 1;
14895 	  else if (regno2 == regno0)
14896 	    split_cost += 4;
14897 	  else
14898 	    split_cost += parts.scale;
14899 	}
14900 
14901       /* Have to use add instruction with immediate if
14902 	 disp is non zero.  */
14903       if (parts.disp && parts.disp != const0_rtx)
14904 	split_cost += 1;
14905 
14906       /* Subtract the price of lea.  */
14907       split_cost -= 1;
14908     }
14909 
14910   return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
14911 				parts.scale > 1);
14912 }
14913 
14914 /* Return true if it is ok to optimize an ADD operation to LEA
14915    operation to avoid flag register consumation.  For most processors,
14916    ADD is faster than LEA.  For the processors like BONNELL, if the
14917    destination register of LEA holds an actual address which will be
14918    used soon, LEA is better and otherwise ADD is better.  */
14919 
14920 bool
ix86_lea_for_add_ok(rtx_insn * insn,rtx operands[])14921 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
14922 {
14923   unsigned int regno0 = true_regnum (operands[0]);
14924   unsigned int regno1 = true_regnum (operands[1]);
14925   unsigned int regno2 = true_regnum (operands[2]);
14926 
14927   /* If a = b + c, (a!=b && a!=c), must use lea form. */
14928   if (regno0 != regno1 && regno0 != regno2)
14929     return true;
14930 
14931   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14932     return false;
14933 
14934   return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
14935 }
14936 
14937 /* Return true if destination reg of SET_BODY is shift count of
14938    USE_BODY.  */
14939 
14940 static bool
ix86_dep_by_shift_count_body(const_rtx set_body,const_rtx use_body)14941 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14942 {
14943   rtx set_dest;
14944   rtx shift_rtx;
14945   int i;
14946 
14947   /* Retrieve destination of SET_BODY.  */
14948   switch (GET_CODE (set_body))
14949     {
14950     case SET:
14951       set_dest = SET_DEST (set_body);
14952       if (!set_dest || !REG_P (set_dest))
14953 	return false;
14954       break;
14955     case PARALLEL:
14956       for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14957 	if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14958 					  use_body))
14959 	  return true;
14960       /* FALLTHROUGH */
14961     default:
14962       return false;
14963     }
14964 
14965   /* Retrieve shift count of USE_BODY.  */
14966   switch (GET_CODE (use_body))
14967     {
14968     case SET:
14969       shift_rtx = XEXP (use_body, 1);
14970       break;
14971     case PARALLEL:
14972       for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14973 	if (ix86_dep_by_shift_count_body (set_body,
14974 					  XVECEXP (use_body, 0, i)))
14975 	  return true;
14976       /* FALLTHROUGH */
14977     default:
14978       return false;
14979     }
14980 
14981   if (shift_rtx
14982       && (GET_CODE (shift_rtx) == ASHIFT
14983 	  || GET_CODE (shift_rtx) == LSHIFTRT
14984 	  || GET_CODE (shift_rtx) == ASHIFTRT
14985 	  || GET_CODE (shift_rtx) == ROTATE
14986 	  || GET_CODE (shift_rtx) == ROTATERT))
14987     {
14988       rtx shift_count = XEXP (shift_rtx, 1);
14989 
14990       /* Return true if shift count is dest of SET_BODY.  */
14991       if (REG_P (shift_count))
14992 	{
14993 	  /* Add check since it can be invoked before register
14994 	     allocation in pre-reload schedule.  */
14995 	  if (reload_completed
14996 	      && true_regnum (set_dest) == true_regnum (shift_count))
14997 	    return true;
14998 	  else if (REGNO(set_dest) == REGNO(shift_count))
14999 	    return true;
15000 	}
15001     }
15002 
15003   return false;
15004 }
15005 
15006 /* Return true if destination reg of SET_INSN is shift count of
15007    USE_INSN.  */
15008 
15009 bool
ix86_dep_by_shift_count(const_rtx set_insn,const_rtx use_insn)15010 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15011 {
15012   return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15013 				       PATTERN (use_insn));
15014 }
15015 
15016 /* Return TRUE or FALSE depending on whether the unary operator meets the
15017    appropriate constraints.  */
15018 
15019 bool
ix86_unary_operator_ok(enum rtx_code,machine_mode,rtx operands[2])15020 ix86_unary_operator_ok (enum rtx_code,
15021 			machine_mode,
15022 			rtx operands[2])
15023 {
15024   /* If one of operands is memory, source and destination must match.  */
15025   if ((MEM_P (operands[0])
15026        || MEM_P (operands[1]))
15027       && ! rtx_equal_p (operands[0], operands[1]))
15028     return false;
15029   return true;
15030 }
15031 
15032 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15033    are ok, keeping in mind the possible movddup alternative.  */
15034 
15035 bool
ix86_vec_interleave_v2df_operator_ok(rtx operands[3],bool high)15036 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15037 {
15038   if (MEM_P (operands[0]))
15039     return rtx_equal_p (operands[0], operands[1 + high]);
15040   if (MEM_P (operands[1]) && MEM_P (operands[2]))
15041     return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15042   return true;
15043 }
15044 
15045 /* A subroutine of ix86_build_signbit_mask.  If VECT is true,
15046    then replicate the value for all elements of the vector
15047    register.  */
15048 
15049 rtx
ix86_build_const_vector(machine_mode mode,bool vect,rtx value)15050 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
15051 {
15052   int i, n_elt;
15053   rtvec v;
15054   machine_mode scalar_mode;
15055 
15056   switch (mode)
15057     {
15058     case E_V64QImode:
15059     case E_V32QImode:
15060     case E_V16QImode:
15061     case E_V32HImode:
15062     case E_V16HImode:
15063     case E_V8HImode:
15064     case E_V16SImode:
15065     case E_V8SImode:
15066     case E_V4SImode:
15067     case E_V8DImode:
15068     case E_V4DImode:
15069     case E_V2DImode:
15070       gcc_assert (vect);
15071       /* FALLTHRU */
15072     case E_V16SFmode:
15073     case E_V8SFmode:
15074     case E_V4SFmode:
15075     case E_V8DFmode:
15076     case E_V4DFmode:
15077     case E_V2DFmode:
15078       n_elt = GET_MODE_NUNITS (mode);
15079       v = rtvec_alloc (n_elt);
15080       scalar_mode = GET_MODE_INNER (mode);
15081 
15082       RTVEC_ELT (v, 0) = value;
15083 
15084       for (i = 1; i < n_elt; ++i)
15085 	RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
15086 
15087       return gen_rtx_CONST_VECTOR (mode, v);
15088 
15089     default:
15090       gcc_unreachable ();
15091     }
15092 }
15093 
15094 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15095    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
15096    for an SSE register.  If VECT is true, then replicate the mask for
15097    all elements of the vector register.  If INVERT is true, then create
15098    a mask excluding the sign bit.  */
15099 
15100 rtx
ix86_build_signbit_mask(machine_mode mode,bool vect,bool invert)15101 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
15102 {
15103   machine_mode vec_mode, imode;
15104   wide_int w;
15105   rtx mask, v;
15106 
15107   switch (mode)
15108     {
15109     case E_V16SImode:
15110     case E_V16SFmode:
15111     case E_V8SImode:
15112     case E_V4SImode:
15113     case E_V8SFmode:
15114     case E_V4SFmode:
15115       vec_mode = mode;
15116       imode = SImode;
15117       break;
15118 
15119     case E_V8DImode:
15120     case E_V4DImode:
15121     case E_V2DImode:
15122     case E_V8DFmode:
15123     case E_V4DFmode:
15124     case E_V2DFmode:
15125       vec_mode = mode;
15126       imode = DImode;
15127       break;
15128 
15129     case E_TImode:
15130     case E_TFmode:
15131       vec_mode = VOIDmode;
15132       imode = TImode;
15133       break;
15134 
15135     default:
15136       gcc_unreachable ();
15137     }
15138 
15139   machine_mode inner_mode = GET_MODE_INNER (mode);
15140   w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
15141 			   GET_MODE_BITSIZE (inner_mode));
15142   if (invert)
15143     w = wi::bit_not (w);
15144 
15145   /* Force this value into the low part of a fp vector constant.  */
15146   mask = immed_wide_int_const (w, imode);
15147   mask = gen_lowpart (inner_mode, mask);
15148 
15149   if (vec_mode == VOIDmode)
15150     return force_reg (inner_mode, mask);
15151 
15152   v = ix86_build_const_vector (vec_mode, vect, mask);
15153   return force_reg (vec_mode, v);
15154 }
15155 
15156 /* Return TRUE or FALSE depending on whether the first SET in INSN
15157    has source and destination with matching CC modes, and that the
15158    CC mode is at least as constrained as REQ_MODE.  */
15159 
15160 bool
ix86_match_ccmode(rtx insn,machine_mode req_mode)15161 ix86_match_ccmode (rtx insn, machine_mode req_mode)
15162 {
15163   rtx set;
15164   machine_mode set_mode;
15165 
15166   set = PATTERN (insn);
15167   if (GET_CODE (set) == PARALLEL)
15168     set = XVECEXP (set, 0, 0);
15169   gcc_assert (GET_CODE (set) == SET);
15170   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15171 
15172   set_mode = GET_MODE (SET_DEST (set));
15173   switch (set_mode)
15174     {
15175     case E_CCNOmode:
15176       if (req_mode != CCNOmode
15177 	  && (req_mode != CCmode
15178 	      || XEXP (SET_SRC (set), 1) != const0_rtx))
15179 	return false;
15180       break;
15181     case E_CCmode:
15182       if (req_mode == CCGCmode)
15183 	return false;
15184       /* FALLTHRU */
15185     case E_CCGCmode:
15186       if (req_mode == CCGOCmode || req_mode == CCNOmode)
15187 	return false;
15188       /* FALLTHRU */
15189     case E_CCGOCmode:
15190       if (req_mode == CCZmode)
15191 	return false;
15192       /* FALLTHRU */
15193     case E_CCZmode:
15194       break;
15195 
15196     case E_CCGZmode:
15197 
15198     case E_CCAmode:
15199     case E_CCCmode:
15200     case E_CCOmode:
15201     case E_CCPmode:
15202     case E_CCSmode:
15203       if (set_mode != req_mode)
15204 	return false;
15205       break;
15206 
15207     default:
15208       gcc_unreachable ();
15209     }
15210 
15211   return GET_MODE (SET_SRC (set)) == set_mode;
15212 }
15213 
15214 machine_mode
ix86_cc_mode(enum rtx_code code,rtx op0,rtx op1)15215 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15216 {
15217   machine_mode mode = GET_MODE (op0);
15218 
15219   if (SCALAR_FLOAT_MODE_P (mode))
15220     {
15221       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15222       return CCFPmode;
15223     }
15224 
15225   switch (code)
15226     {
15227       /* Only zero flag is needed.  */
15228     case EQ:			/* ZF=0 */
15229     case NE:			/* ZF!=0 */
15230       return CCZmode;
15231       /* Codes needing carry flag.  */
15232     case GEU:			/* CF=0 */
15233     case LTU:			/* CF=1 */
15234       /* Detect overflow checks.  They need just the carry flag.  */
15235       if (GET_CODE (op0) == PLUS
15236 	  && (rtx_equal_p (op1, XEXP (op0, 0))
15237 	      || rtx_equal_p (op1, XEXP (op0, 1))))
15238 	return CCCmode;
15239       else
15240 	return CCmode;
15241     case GTU:			/* CF=0 & ZF=0 */
15242     case LEU:			/* CF=1 | ZF=1 */
15243       return CCmode;
15244       /* Codes possibly doable only with sign flag when
15245          comparing against zero.  */
15246     case GE:			/* SF=OF   or   SF=0 */
15247     case LT:			/* SF<>OF  or   SF=1 */
15248       if (op1 == const0_rtx)
15249 	return CCGOCmode;
15250       else
15251 	/* For other cases Carry flag is not required.  */
15252 	return CCGCmode;
15253       /* Codes doable only with sign flag when comparing
15254          against zero, but we miss jump instruction for it
15255          so we need to use relational tests against overflow
15256          that thus needs to be zero.  */
15257     case GT:			/* ZF=0 & SF=OF */
15258     case LE:			/* ZF=1 | SF<>OF */
15259       if (op1 == const0_rtx)
15260 	return CCNOmode;
15261       else
15262 	return CCGCmode;
15263       /* strcmp pattern do (use flags) and combine may ask us for proper
15264 	 mode.  */
15265     case USE:
15266       return CCmode;
15267     default:
15268       gcc_unreachable ();
15269     }
15270 }
15271 
15272 /* Return the fixed registers used for condition codes.  */
15273 
15274 static bool
ix86_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)15275 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15276 {
15277   *p1 = FLAGS_REG;
15278   *p2 = INVALID_REGNUM;
15279   return true;
15280 }
15281 
15282 /* If two condition code modes are compatible, return a condition code
15283    mode which is compatible with both.  Otherwise, return
15284    VOIDmode.  */
15285 
15286 static machine_mode
ix86_cc_modes_compatible(machine_mode m1,machine_mode m2)15287 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
15288 {
15289   if (m1 == m2)
15290     return m1;
15291 
15292   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15293     return VOIDmode;
15294 
15295   if ((m1 == CCGCmode && m2 == CCGOCmode)
15296       || (m1 == CCGOCmode && m2 == CCGCmode))
15297     return CCGCmode;
15298 
15299   if ((m1 == CCNOmode && m2 == CCGOCmode)
15300       || (m1 == CCGOCmode && m2 == CCNOmode))
15301     return CCNOmode;
15302 
15303   if (m1 == CCZmode
15304       && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
15305     return m2;
15306   else if (m2 == CCZmode
15307 	   && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
15308     return m1;
15309 
15310   switch (m1)
15311     {
15312     default:
15313       gcc_unreachable ();
15314 
15315     case E_CCmode:
15316     case E_CCGCmode:
15317     case E_CCGOCmode:
15318     case E_CCNOmode:
15319     case E_CCAmode:
15320     case E_CCCmode:
15321     case E_CCOmode:
15322     case E_CCPmode:
15323     case E_CCSmode:
15324     case E_CCZmode:
15325       switch (m2)
15326 	{
15327 	default:
15328 	  return VOIDmode;
15329 
15330 	case E_CCmode:
15331 	case E_CCGCmode:
15332 	case E_CCGOCmode:
15333 	case E_CCNOmode:
15334 	case E_CCAmode:
15335 	case E_CCCmode:
15336 	case E_CCOmode:
15337 	case E_CCPmode:
15338 	case E_CCSmode:
15339 	case E_CCZmode:
15340 	  return CCmode;
15341 	}
15342 
15343     case E_CCFPmode:
15344       /* These are only compatible with themselves, which we already
15345 	 checked above.  */
15346       return VOIDmode;
15347     }
15348 }
15349 
15350 /* Return strategy to use for floating-point.  We assume that fcomi is always
15351    preferrable where available, since that is also true when looking at size
15352    (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
15353 
15354 enum ix86_fpcmp_strategy
ix86_fp_comparison_strategy(enum rtx_code)15355 ix86_fp_comparison_strategy (enum rtx_code)
15356 {
15357   /* Do fcomi/sahf based test when profitable.  */
15358 
15359   if (TARGET_CMOVE)
15360     return IX86_FPCMP_COMI;
15361 
15362   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
15363     return IX86_FPCMP_SAHF;
15364 
15365   return IX86_FPCMP_ARITH;
15366 }
15367 
15368 /* Convert comparison codes we use to represent FP comparison to integer
15369    code that will result in proper branch.  Return UNKNOWN if no such code
15370    is available.  */
15371 
15372 enum rtx_code
ix86_fp_compare_code_to_integer(enum rtx_code code)15373 ix86_fp_compare_code_to_integer (enum rtx_code code)
15374 {
15375   switch (code)
15376     {
15377     case GT:
15378       return GTU;
15379     case GE:
15380       return GEU;
15381     case ORDERED:
15382     case UNORDERED:
15383       return code;
15384     case UNEQ:
15385       return EQ;
15386     case UNLT:
15387       return LTU;
15388     case UNLE:
15389       return LEU;
15390     case LTGT:
15391       return NE;
15392     default:
15393       return UNKNOWN;
15394     }
15395 }
15396 
15397 /* Zero extend possibly SImode EXP to Pmode register.  */
15398 rtx
ix86_zero_extend_to_Pmode(rtx exp)15399 ix86_zero_extend_to_Pmode (rtx exp)
15400 {
15401   return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
15402 }
15403 
15404 /* Return true if the function being called was marked with attribute
15405    "noplt" or using -fno-plt and we are compiling for non-PIC.  We need
15406    to handle the non-PIC case in the backend because there is no easy
15407    interface for the front-end to force non-PLT calls to use the GOT.
15408    This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15409    to call the function marked "noplt" indirectly.  */
15410 
15411 static bool
ix86_nopic_noplt_attribute_p(rtx call_op)15412 ix86_nopic_noplt_attribute_p (rtx call_op)
15413 {
15414   if (flag_pic || ix86_cmodel == CM_LARGE
15415       || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15416       || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
15417       || SYMBOL_REF_LOCAL_P (call_op))
15418     return false;
15419 
15420   tree symbol_decl = SYMBOL_REF_DECL (call_op);
15421 
15422   if (!flag_plt
15423       || (symbol_decl != NULL_TREE
15424           && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
15425     return true;
15426 
15427   return false;
15428 }
15429 
15430 /* Helper to output the jmp/call.  */
15431 static void
ix86_output_jmp_thunk_or_indirect(const char * thunk_name,const int regno)15432 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
15433 {
15434   if (thunk_name != NULL)
15435     {
15436       fprintf (asm_out_file, "\tjmp\t");
15437       assemble_name (asm_out_file, thunk_name);
15438       putc ('\n', asm_out_file);
15439     }
15440   else
15441     output_indirect_thunk (regno);
15442 }
15443 
15444 /* Output indirect branch via a call and return thunk.  CALL_OP is a
15445    register which contains the branch target.  XASM is the assembly
15446    template for CALL_OP.  Branch is a tail call if SIBCALL_P is true.
15447    A normal call is converted to:
15448 
15449 	call __x86_indirect_thunk_reg
15450 
15451    and a tail call is converted to:
15452 
15453 	jmp __x86_indirect_thunk_reg
15454  */
15455 
15456 static void
ix86_output_indirect_branch_via_reg(rtx call_op,bool sibcall_p)15457 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
15458 {
15459   char thunk_name_buf[32];
15460   char *thunk_name;
15461   enum indirect_thunk_prefix need_prefix
15462     = indirect_thunk_need_prefix (current_output_insn);
15463   int regno = REGNO (call_op);
15464 
15465   if (cfun->machine->indirect_branch_type
15466       != indirect_branch_thunk_inline)
15467     {
15468       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15469 	{
15470 	  int i = regno;
15471 	  if (i >= FIRST_REX_INT_REG)
15472 	    i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
15473 	  indirect_thunks_used |= 1 << i;
15474 	}
15475       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15476       thunk_name = thunk_name_buf;
15477     }
15478   else
15479     thunk_name = NULL;
15480 
15481   if (sibcall_p)
15482      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15483   else
15484     {
15485       if (thunk_name != NULL)
15486 	{
15487 	  fprintf (asm_out_file, "\tcall\t");
15488 	  assemble_name (asm_out_file, thunk_name);
15489 	  putc ('\n', asm_out_file);
15490 	  return;
15491 	}
15492 
15493       char indirectlabel1[32];
15494       char indirectlabel2[32];
15495 
15496       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15497 				   INDIRECT_LABEL,
15498 				   indirectlabelno++);
15499       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15500 				   INDIRECT_LABEL,
15501 				   indirectlabelno++);
15502 
15503       /* Jump.  */
15504       fputs ("\tjmp\t", asm_out_file);
15505       assemble_name_raw (asm_out_file, indirectlabel2);
15506       fputc ('\n', asm_out_file);
15507 
15508       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15509 
15510      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15511 
15512       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15513 
15514       /* Call.  */
15515       fputs ("\tcall\t", asm_out_file);
15516       assemble_name_raw (asm_out_file, indirectlabel1);
15517       fputc ('\n', asm_out_file);
15518     }
15519 }
15520 
15521 /* Output indirect branch via a call and return thunk.  CALL_OP is
15522    the branch target.  XASM is the assembly template for CALL_OP.
15523    Branch is a tail call if SIBCALL_P is true.  A normal call is
15524    converted to:
15525 
15526 	jmp L2
15527    L1:
15528 	push CALL_OP
15529 	jmp __x86_indirect_thunk
15530    L2:
15531 	call L1
15532 
15533    and a tail call is converted to:
15534 
15535 	push CALL_OP
15536 	jmp __x86_indirect_thunk
15537  */
15538 
15539 static void
ix86_output_indirect_branch_via_push(rtx call_op,const char * xasm,bool sibcall_p)15540 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
15541 				      bool sibcall_p)
15542 {
15543   char thunk_name_buf[32];
15544   char *thunk_name;
15545   char push_buf[64];
15546   enum indirect_thunk_prefix need_prefix
15547     = indirect_thunk_need_prefix (current_output_insn);
15548   int regno = -1;
15549 
15550   if (cfun->machine->indirect_branch_type
15551       != indirect_branch_thunk_inline)
15552     {
15553       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15554 	indirect_thunk_needed = true;
15555       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15556       thunk_name = thunk_name_buf;
15557     }
15558   else
15559     thunk_name = NULL;
15560 
15561   snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
15562 	    TARGET_64BIT ? 'q' : 'l', xasm);
15563 
15564   if (sibcall_p)
15565     {
15566       output_asm_insn (push_buf, &call_op);
15567       ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15568     }
15569   else
15570     {
15571       char indirectlabel1[32];
15572       char indirectlabel2[32];
15573 
15574       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15575 				   INDIRECT_LABEL,
15576 				   indirectlabelno++);
15577       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15578 				   INDIRECT_LABEL,
15579 				   indirectlabelno++);
15580 
15581       /* Jump.  */
15582       fputs ("\tjmp\t", asm_out_file);
15583       assemble_name_raw (asm_out_file, indirectlabel2);
15584       fputc ('\n', asm_out_file);
15585 
15586       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15587 
15588       /* An external function may be called via GOT, instead of PLT.  */
15589       if (MEM_P (call_op))
15590 	{
15591 	  struct ix86_address parts;
15592 	  rtx addr = XEXP (call_op, 0);
15593 	  if (ix86_decompose_address (addr, &parts)
15594 	      && parts.base == stack_pointer_rtx)
15595 	    {
15596 	      /* Since call will adjust stack by -UNITS_PER_WORD,
15597 		 we must convert "disp(stack, index, scale)" to
15598 		 "disp+UNITS_PER_WORD(stack, index, scale)".  */
15599 	      if (parts.index)
15600 		{
15601 		  addr = gen_rtx_MULT (Pmode, parts.index,
15602 				       GEN_INT (parts.scale));
15603 		  addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15604 				       addr);
15605 		}
15606 	      else
15607 		addr = stack_pointer_rtx;
15608 
15609 	      rtx disp;
15610 	      if (parts.disp != NULL_RTX)
15611 		disp = plus_constant (Pmode, parts.disp,
15612 				      UNITS_PER_WORD);
15613 	      else
15614 		disp = GEN_INT (UNITS_PER_WORD);
15615 
15616 	      addr = gen_rtx_PLUS (Pmode, addr, disp);
15617 	      call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
15618 	    }
15619 	}
15620 
15621       output_asm_insn (push_buf, &call_op);
15622 
15623       ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15624 
15625       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15626 
15627       /* Call.  */
15628       fputs ("\tcall\t", asm_out_file);
15629       assemble_name_raw (asm_out_file, indirectlabel1);
15630       fputc ('\n', asm_out_file);
15631     }
15632 }
15633 
15634 /* Output indirect branch via a call and return thunk.  CALL_OP is
15635    the branch target.  XASM is the assembly template for CALL_OP.
15636    Branch is a tail call if SIBCALL_P is true.   */
15637 
15638 static void
ix86_output_indirect_branch(rtx call_op,const char * xasm,bool sibcall_p)15639 ix86_output_indirect_branch (rtx call_op, const char *xasm,
15640 			     bool sibcall_p)
15641 {
15642   if (REG_P (call_op))
15643     ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
15644   else
15645     ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
15646 }
15647 
15648 /* Output indirect jump.  CALL_OP is the jump target.  */
15649 
15650 const char *
ix86_output_indirect_jmp(rtx call_op)15651 ix86_output_indirect_jmp (rtx call_op)
15652 {
15653   if (cfun->machine->indirect_branch_type != indirect_branch_keep)
15654     {
15655       /* We can't have red-zone since "call" in the indirect thunk
15656          pushes the return address onto stack, destroying red-zone.  */
15657       if (ix86_red_zone_size != 0)
15658 	gcc_unreachable ();
15659 
15660       ix86_output_indirect_branch (call_op, "%0", true);
15661       return "";
15662     }
15663   else
15664     return "%!jmp\t%A0";
15665 }
15666 
15667 /* Output return instrumentation for current function if needed.  */
15668 
15669 static void
output_return_instrumentation(void)15670 output_return_instrumentation (void)
15671 {
15672   if (ix86_instrument_return != instrument_return_none
15673       && flag_fentry
15674       && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
15675     {
15676       if (ix86_flag_record_return)
15677 	fprintf (asm_out_file, "1:\n");
15678       switch (ix86_instrument_return)
15679 	{
15680 	case instrument_return_call:
15681 	  fprintf (asm_out_file, "\tcall\t__return__\n");
15682 	  break;
15683 	case instrument_return_nop5:
15684 	  /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
15685 	  fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15686 	  break;
15687 	case instrument_return_none:
15688 	  break;
15689 	}
15690 
15691       if (ix86_flag_record_return)
15692 	{
15693 	  fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
15694 	  fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
15695 	  fprintf (asm_out_file, "\t.previous\n");
15696 	}
15697     }
15698 }
15699 
15700 /* Output function return.  CALL_OP is the jump target.  Add a REP
15701    prefix to RET if LONG_P is true and function return is kept.  */
15702 
15703 const char *
ix86_output_function_return(bool long_p)15704 ix86_output_function_return (bool long_p)
15705 {
15706   output_return_instrumentation ();
15707 
15708   if (cfun->machine->function_return_type != indirect_branch_keep)
15709     {
15710       char thunk_name[32];
15711       enum indirect_thunk_prefix need_prefix
15712 	= indirect_thunk_need_prefix (current_output_insn);
15713 
15714       if (cfun->machine->function_return_type
15715 	  != indirect_branch_thunk_inline)
15716 	{
15717 	  bool need_thunk = (cfun->machine->function_return_type
15718 			     == indirect_branch_thunk);
15719 	  indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
15720 			       true);
15721 	  indirect_return_needed |= need_thunk;
15722 	  fprintf (asm_out_file, "\tjmp\t");
15723 	  assemble_name (asm_out_file, thunk_name);
15724 	  putc ('\n', asm_out_file);
15725 	}
15726       else
15727 	output_indirect_thunk (INVALID_REGNUM);
15728 
15729       return "";
15730     }
15731 
15732   if (!long_p)
15733     return "%!ret";
15734 
15735   return "rep%; ret";
15736 }
15737 
15738 /* Output indirect function return.  RET_OP is the function return
15739    target.  */
15740 
15741 const char *
ix86_output_indirect_function_return(rtx ret_op)15742 ix86_output_indirect_function_return (rtx ret_op)
15743 {
15744   if (cfun->machine->function_return_type != indirect_branch_keep)
15745     {
15746       char thunk_name[32];
15747       enum indirect_thunk_prefix need_prefix
15748 	= indirect_thunk_need_prefix (current_output_insn);
15749       unsigned int regno = REGNO (ret_op);
15750       gcc_assert (regno == CX_REG);
15751 
15752       if (cfun->machine->function_return_type
15753 	  != indirect_branch_thunk_inline)
15754 	{
15755 	  bool need_thunk = (cfun->machine->function_return_type
15756 			     == indirect_branch_thunk);
15757 	  indirect_thunk_name (thunk_name, regno, need_prefix, true);
15758 
15759 	  if (need_thunk)
15760 	    {
15761 	      indirect_return_via_cx = true;
15762 	      indirect_thunks_used |= 1 << CX_REG;
15763 	    }
15764 	  fprintf (asm_out_file, "\tjmp\t");
15765 	  assemble_name (asm_out_file, thunk_name);
15766 	  putc ('\n', asm_out_file);
15767 	}
15768       else
15769 	output_indirect_thunk (regno);
15770 
15771       return "";
15772     }
15773   else
15774     return "%!jmp\t%A0";
15775 }
15776 
15777 /* Output the assembly for a call instruction.  */
15778 
15779 const char *
ix86_output_call_insn(rtx_insn * insn,rtx call_op)15780 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
15781 {
15782   bool direct_p = constant_call_address_operand (call_op, VOIDmode);
15783   bool output_indirect_p
15784     = (!TARGET_SEH
15785        && cfun->machine->indirect_branch_type != indirect_branch_keep);
15786   bool seh_nop_p = false;
15787   const char *xasm;
15788 
15789   if (SIBLING_CALL_P (insn))
15790     {
15791       output_return_instrumentation ();
15792       if (direct_p)
15793 	{
15794 	  if (ix86_nopic_noplt_attribute_p (call_op))
15795 	    {
15796 	      direct_p = false;
15797 	      if (TARGET_64BIT)
15798 		{
15799 		  if (output_indirect_p)
15800 		    xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15801 		  else
15802 		    xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15803 		}
15804 	      else
15805 		{
15806 		  if (output_indirect_p)
15807 		    xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15808 		  else
15809 		    xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15810 		}
15811 	    }
15812 	  else
15813 	    xasm = "%!jmp\t%P0";
15814 	}
15815       /* SEH epilogue detection requires the indirect branch case
15816 	 to include REX.W.  */
15817       else if (TARGET_SEH)
15818 	xasm = "%!rex.W jmp\t%A0";
15819       else
15820 	{
15821 	  if (output_indirect_p)
15822 	    xasm = "%0";
15823 	  else
15824 	    xasm = "%!jmp\t%A0";
15825 	}
15826 
15827       if (output_indirect_p && !direct_p)
15828 	ix86_output_indirect_branch (call_op, xasm, true);
15829       else
15830 	output_asm_insn (xasm, &call_op);
15831       return "";
15832     }
15833 
15834   /* SEH unwinding can require an extra nop to be emitted in several
15835      circumstances.  Determine if we have one of those.  */
15836   if (TARGET_SEH)
15837     {
15838       rtx_insn *i;
15839 
15840       for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
15841 	{
15842 	  /* Prevent a catch region from being adjacent to a jump that would
15843 	     be interpreted as an epilogue sequence by the unwinder.  */
15844 	  if (JUMP_P(i) && CROSSING_JUMP_P (i))
15845 	    {
15846 	      seh_nop_p = true;
15847 	      break;
15848 	    }
15849 
15850 	  /* If we get to another real insn, we don't need the nop.  */
15851 	  if (INSN_P (i))
15852 	    break;
15853 
15854 	  /* If we get to the epilogue note, prevent a catch region from
15855 	     being adjacent to the standard epilogue sequence.  Note that,
15856 	     if non-call exceptions are enabled, we already did it during
15857 	     epilogue expansion, or else, if the insn can throw internally,
15858 	     we already did it during the reorg pass.  */
15859 	  if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
15860 	      && !flag_non_call_exceptions
15861 	      && !can_throw_internal (insn))
15862 	    {
15863 	      seh_nop_p = true;
15864 	      break;
15865 	    }
15866 	}
15867 
15868       /* If we didn't find a real insn following the call, prevent the
15869 	 unwinder from looking into the next function.  */
15870       if (i == NULL)
15871 	seh_nop_p = true;
15872     }
15873 
15874   if (direct_p)
15875     {
15876       if (ix86_nopic_noplt_attribute_p (call_op))
15877 	{
15878 	  direct_p = false;
15879 	  if (TARGET_64BIT)
15880 	    {
15881 	      if (output_indirect_p)
15882 		xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15883 	      else
15884 		xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15885 	    }
15886 	  else
15887 	    {
15888 	      if (output_indirect_p)
15889 		xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15890 	      else
15891 		xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15892 	    }
15893 	}
15894       else
15895 	xasm = "%!call\t%P0";
15896     }
15897   else
15898     {
15899       if (output_indirect_p)
15900 	xasm = "%0";
15901       else
15902 	xasm = "%!call\t%A0";
15903     }
15904 
15905   if (output_indirect_p && !direct_p)
15906     ix86_output_indirect_branch (call_op, xasm, false);
15907   else
15908     output_asm_insn (xasm, &call_op);
15909 
15910   if (seh_nop_p)
15911     return "nop";
15912 
15913   return "";
15914 }
15915 
15916 /* Return a MEM corresponding to a stack slot with mode MODE.
15917    Allocate a new slot if necessary.
15918 
15919    The RTL for a function can have several slots available: N is
15920    which slot to use.  */
15921 
15922 rtx
assign_386_stack_local(machine_mode mode,enum ix86_stack_slot n)15923 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
15924 {
15925   struct stack_local_entry *s;
15926 
15927   gcc_assert (n < MAX_386_STACK_LOCALS);
15928 
15929   for (s = ix86_stack_locals; s; s = s->next)
15930     if (s->mode == mode && s->n == n)
15931       return validize_mem (copy_rtx (s->rtl));
15932 
15933   s = ggc_alloc<stack_local_entry> ();
15934   s->n = n;
15935   s->mode = mode;
15936   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15937 
15938   s->next = ix86_stack_locals;
15939   ix86_stack_locals = s;
15940   return validize_mem (copy_rtx (s->rtl));
15941 }
15942 
15943 static void
ix86_instantiate_decls(void)15944 ix86_instantiate_decls (void)
15945 {
15946   struct stack_local_entry *s;
15947 
15948   for (s = ix86_stack_locals; s; s = s->next)
15949     if (s->rtl != NULL_RTX)
15950       instantiate_decl_rtl (s->rtl);
15951 }
15952 
15953 /* Check whether x86 address PARTS is a pc-relative address.  */
15954 
15955 bool
ix86_rip_relative_addr_p(struct ix86_address * parts)15956 ix86_rip_relative_addr_p (struct ix86_address *parts)
15957 {
15958   rtx base, index, disp;
15959 
15960   base = parts->base;
15961   index = parts->index;
15962   disp = parts->disp;
15963 
15964   if (disp && !base && !index)
15965     {
15966       if (TARGET_64BIT)
15967 	{
15968 	  rtx symbol = disp;
15969 
15970 	  if (GET_CODE (disp) == CONST)
15971 	    symbol = XEXP (disp, 0);
15972 	  if (GET_CODE (symbol) == PLUS
15973 	      && CONST_INT_P (XEXP (symbol, 1)))
15974 	    symbol = XEXP (symbol, 0);
15975 
15976 	  if (GET_CODE (symbol) == LABEL_REF
15977 	      || (GET_CODE (symbol) == SYMBOL_REF
15978 		  && SYMBOL_REF_TLS_MODEL (symbol) == 0)
15979 	      || (GET_CODE (symbol) == UNSPEC
15980 		  && (XINT (symbol, 1) == UNSPEC_GOTPCREL
15981 		      || XINT (symbol, 1) == UNSPEC_PCREL
15982 		      || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
15983 	    return true;
15984 	}
15985     }
15986   return false;
15987 }
15988 
15989 /* Calculate the length of the memory address in the instruction encoding.
15990    Includes addr32 prefix, does not include the one-byte modrm, opcode,
15991    or other prefixes.  We never generate addr32 prefix for LEA insn.  */
15992 
15993 int
memory_address_length(rtx addr,bool lea)15994 memory_address_length (rtx addr, bool lea)
15995 {
15996   struct ix86_address parts;
15997   rtx base, index, disp;
15998   int len;
15999   int ok;
16000 
16001   if (GET_CODE (addr) == PRE_DEC
16002       || GET_CODE (addr) == POST_INC
16003       || GET_CODE (addr) == PRE_MODIFY
16004       || GET_CODE (addr) == POST_MODIFY)
16005     return 0;
16006 
16007   ok = ix86_decompose_address (addr, &parts);
16008   gcc_assert (ok);
16009 
16010   len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
16011 
16012   /*  If this is not LEA instruction, add the length of addr32 prefix.  */
16013   if (TARGET_64BIT && !lea
16014       && (SImode_address_operand (addr, VOIDmode)
16015 	  || (parts.base && GET_MODE (parts.base) == SImode)
16016 	  || (parts.index && GET_MODE (parts.index) == SImode)))
16017     len++;
16018 
16019   base = parts.base;
16020   index = parts.index;
16021   disp = parts.disp;
16022 
16023   if (base && SUBREG_P (base))
16024     base = SUBREG_REG (base);
16025   if (index && SUBREG_P (index))
16026     index = SUBREG_REG (index);
16027 
16028   gcc_assert (base == NULL_RTX || REG_P (base));
16029   gcc_assert (index == NULL_RTX || REG_P (index));
16030 
16031   /* Rule of thumb:
16032        - esp as the base always wants an index,
16033        - ebp as the base always wants a displacement,
16034        - r12 as the base always wants an index,
16035        - r13 as the base always wants a displacement.  */
16036 
16037   /* Register Indirect.  */
16038   if (base && !index && !disp)
16039     {
16040       /* esp (for its index) and ebp (for its displacement) need
16041 	 the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
16042 	 code.  */
16043       if (base == arg_pointer_rtx
16044 	  || base == frame_pointer_rtx
16045 	  || REGNO (base) == SP_REG
16046 	  || REGNO (base) == BP_REG
16047 	  || REGNO (base) == R12_REG
16048 	  || REGNO (base) == R13_REG)
16049 	len++;
16050     }
16051 
16052   /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
16053      is not disp32, but disp32(%rip), so for disp32
16054      SIB byte is needed, unless print_operand_address
16055      optimizes it into disp32(%rip) or (%rip) is implied
16056      by UNSPEC.  */
16057   else if (disp && !base && !index)
16058     {
16059       len += 4;
16060       if (!ix86_rip_relative_addr_p (&parts))
16061 	len++;
16062     }
16063   else
16064     {
16065       /* Find the length of the displacement constant.  */
16066       if (disp)
16067 	{
16068 	  if (base && satisfies_constraint_K (disp))
16069 	    len += 1;
16070 	  else
16071 	    len += 4;
16072 	}
16073       /* ebp always wants a displacement.  Similarly r13.  */
16074       else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
16075 	len++;
16076 
16077       /* An index requires the two-byte modrm form....  */
16078       if (index
16079 	  /* ...like esp (or r12), which always wants an index.  */
16080 	  || base == arg_pointer_rtx
16081 	  || base == frame_pointer_rtx
16082 	  || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
16083 	len++;
16084     }
16085 
16086   return len;
16087 }
16088 
16089 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
16090    is set, expect that insn have 8bit immediate alternative.  */
16091 int
ix86_attr_length_immediate_default(rtx_insn * insn,bool shortform)16092 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
16093 {
16094   int len = 0;
16095   int i;
16096   extract_insn_cached (insn);
16097   for (i = recog_data.n_operands - 1; i >= 0; --i)
16098     if (CONSTANT_P (recog_data.operand[i]))
16099       {
16100         enum attr_mode mode = get_attr_mode (insn);
16101 
16102 	gcc_assert (!len);
16103 	if (shortform && CONST_INT_P (recog_data.operand[i]))
16104 	  {
16105 	    HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
16106 	    switch (mode)
16107 	      {
16108 	      case MODE_QI:
16109 		len = 1;
16110 		continue;
16111 	      case MODE_HI:
16112 		ival = trunc_int_for_mode (ival, HImode);
16113 		break;
16114 	      case MODE_SI:
16115 		ival = trunc_int_for_mode (ival, SImode);
16116 		break;
16117 	      default:
16118 		break;
16119 	      }
16120 	    if (IN_RANGE (ival, -128, 127))
16121 	      {
16122 		len = 1;
16123 		continue;
16124 	      }
16125 	  }
16126 	switch (mode)
16127 	  {
16128 	  case MODE_QI:
16129 	    len = 1;
16130 	    break;
16131 	  case MODE_HI:
16132 	    len = 2;
16133 	    break;
16134 	  case MODE_SI:
16135 	    len = 4;
16136 	    break;
16137 	  /* Immediates for DImode instructions are encoded
16138 	     as 32bit sign extended values.  */
16139 	  case MODE_DI:
16140 	    len = 4;
16141 	    break;
16142 	  default:
16143 	    fatal_insn ("unknown insn mode", insn);
16144 	}
16145       }
16146   return len;
16147 }
16148 
16149 /* Compute default value for "length_address" attribute.  */
16150 int
ix86_attr_length_address_default(rtx_insn * insn)16151 ix86_attr_length_address_default (rtx_insn *insn)
16152 {
16153   int i;
16154 
16155   if (get_attr_type (insn) == TYPE_LEA)
16156     {
16157       rtx set = PATTERN (insn), addr;
16158 
16159       if (GET_CODE (set) == PARALLEL)
16160 	set = XVECEXP (set, 0, 0);
16161 
16162       gcc_assert (GET_CODE (set) == SET);
16163 
16164       addr = SET_SRC (set);
16165 
16166       return memory_address_length (addr, true);
16167     }
16168 
16169   extract_insn_cached (insn);
16170   for (i = recog_data.n_operands - 1; i >= 0; --i)
16171     {
16172       rtx op = recog_data.operand[i];
16173       if (MEM_P (op))
16174 	{
16175 	  constrain_operands_cached (insn, reload_completed);
16176 	  if (which_alternative != -1)
16177 	    {
16178 	      const char *constraints = recog_data.constraints[i];
16179 	      int alt = which_alternative;
16180 
16181 	      while (*constraints == '=' || *constraints == '+')
16182 		constraints++;
16183 	      while (alt-- > 0)
16184 	        while (*constraints++ != ',')
16185 		  ;
16186 	      /* Skip ignored operands.  */
16187 	      if (*constraints == 'X')
16188 		continue;
16189 	    }
16190 
16191 	  int len = memory_address_length (XEXP (op, 0), false);
16192 
16193 	  /* Account for segment prefix for non-default addr spaces.  */
16194 	  if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
16195 	    len++;
16196 
16197 	  return len;
16198 	}
16199     }
16200   return 0;
16201 }
16202 
16203 /* Compute default value for "length_vex" attribute. It includes
16204    2 or 3 byte VEX prefix and 1 opcode byte.  */
16205 
16206 int
ix86_attr_length_vex_default(rtx_insn * insn,bool has_0f_opcode,bool has_vex_w)16207 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
16208 			      bool has_vex_w)
16209 {
16210   int i;
16211 
16212   /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
16213      byte VEX prefix.  */
16214   if (!has_0f_opcode || has_vex_w)
16215     return 3 + 1;
16216 
16217  /* We can always use 2 byte VEX prefix in 32bit.  */
16218   if (!TARGET_64BIT)
16219     return 2 + 1;
16220 
16221   extract_insn_cached (insn);
16222 
16223   for (i = recog_data.n_operands - 1; i >= 0; --i)
16224     if (REG_P (recog_data.operand[i]))
16225       {
16226 	/* REX.W bit uses 3 byte VEX prefix.  */
16227 	if (GET_MODE (recog_data.operand[i]) == DImode
16228 	    && GENERAL_REG_P (recog_data.operand[i]))
16229 	  return 3 + 1;
16230       }
16231     else
16232       {
16233 	/* REX.X or REX.B bits use 3 byte VEX prefix.  */
16234 	if (MEM_P (recog_data.operand[i])
16235 	    && x86_extended_reg_mentioned_p (recog_data.operand[i]))
16236 	  return 3 + 1;
16237       }
16238 
16239   return 2 + 1;
16240 }
16241 
16242 
16243 static bool
16244 ix86_class_likely_spilled_p (reg_class_t);
16245 
16246 /* Returns true if lhs of insn is HW function argument register and set up
16247    is_spilled to true if it is likely spilled HW register.  */
16248 static bool
insn_is_function_arg(rtx insn,bool * is_spilled)16249 insn_is_function_arg (rtx insn, bool* is_spilled)
16250 {
16251   rtx dst;
16252 
16253   if (!NONDEBUG_INSN_P (insn))
16254     return false;
16255   /* Call instructions are not movable, ignore it.  */
16256   if (CALL_P (insn))
16257     return false;
16258   insn = PATTERN (insn);
16259   if (GET_CODE (insn) == PARALLEL)
16260     insn = XVECEXP (insn, 0, 0);
16261   if (GET_CODE (insn) != SET)
16262     return false;
16263   dst = SET_DEST (insn);
16264   if (REG_P (dst) && HARD_REGISTER_P (dst)
16265       && ix86_function_arg_regno_p (REGNO (dst)))
16266     {
16267       /* Is it likely spilled HW register?  */
16268       if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
16269 	  && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
16270 	*is_spilled = true;
16271       return true;
16272     }
16273   return false;
16274 }
16275 
16276 /* Add output dependencies for chain of function adjacent arguments if only
16277    there is a move to likely spilled HW register.  Return first argument
16278    if at least one dependence was added or NULL otherwise.  */
16279 static rtx_insn *
add_parameter_dependencies(rtx_insn * call,rtx_insn * head)16280 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
16281 {
16282   rtx_insn *insn;
16283   rtx_insn *last = call;
16284   rtx_insn *first_arg = NULL;
16285   bool is_spilled = false;
16286 
16287   head = PREV_INSN (head);
16288 
16289   /* Find nearest to call argument passing instruction.  */
16290   while (true)
16291     {
16292       last = PREV_INSN (last);
16293       if (last == head)
16294 	return NULL;
16295       if (!NONDEBUG_INSN_P (last))
16296 	continue;
16297       if (insn_is_function_arg (last, &is_spilled))
16298 	break;
16299       return NULL;
16300     }
16301 
16302   first_arg = last;
16303   while (true)
16304     {
16305       insn = PREV_INSN (last);
16306       if (!INSN_P (insn))
16307 	break;
16308       if (insn == head)
16309 	break;
16310       if (!NONDEBUG_INSN_P (insn))
16311 	{
16312 	  last = insn;
16313 	  continue;
16314 	}
16315       if (insn_is_function_arg (insn, &is_spilled))
16316 	{
16317 	  /* Add output depdendence between two function arguments if chain
16318 	     of output arguments contains likely spilled HW registers.  */
16319 	  if (is_spilled)
16320 	    add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16321 	  first_arg = last = insn;
16322 	}
16323       else
16324 	break;
16325     }
16326   if (!is_spilled)
16327     return NULL;
16328   return first_arg;
16329 }
16330 
16331 /* Add output or anti dependency from insn to first_arg to restrict its code
16332    motion.  */
16333 static void
avoid_func_arg_motion(rtx_insn * first_arg,rtx_insn * insn)16334 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
16335 {
16336   rtx set;
16337   rtx tmp;
16338 
16339   set = single_set (insn);
16340   if (!set)
16341     return;
16342   tmp = SET_DEST (set);
16343   if (REG_P (tmp))
16344     {
16345       /* Add output dependency to the first function argument.  */
16346       add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16347       return;
16348     }
16349   /* Add anti dependency.  */
16350   add_dependence (first_arg, insn, REG_DEP_ANTI);
16351 }
16352 
16353 /* Avoid cross block motion of function argument through adding dependency
16354    from the first non-jump instruction in bb.  */
16355 static void
add_dependee_for_func_arg(rtx_insn * arg,basic_block bb)16356 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
16357 {
16358   rtx_insn *insn = BB_END (bb);
16359 
16360   while (insn)
16361     {
16362       if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
16363 	{
16364 	  rtx set = single_set (insn);
16365 	  if (set)
16366 	    {
16367 	      avoid_func_arg_motion (arg, insn);
16368 	      return;
16369 	    }
16370 	}
16371       if (insn == BB_HEAD (bb))
16372 	return;
16373       insn = PREV_INSN (insn);
16374     }
16375 }
16376 
16377 /* Hook for pre-reload schedule - avoid motion of function arguments
16378    passed in likely spilled HW registers.  */
16379 static void
ix86_dependencies_evaluation_hook(rtx_insn * head,rtx_insn * tail)16380 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
16381 {
16382   rtx_insn *insn;
16383   rtx_insn *first_arg = NULL;
16384   if (reload_completed)
16385     return;
16386   while (head != tail && DEBUG_INSN_P (head))
16387     head = NEXT_INSN (head);
16388   for (insn = tail; insn != head; insn = PREV_INSN (insn))
16389     if (INSN_P (insn) && CALL_P (insn))
16390       {
16391 	first_arg = add_parameter_dependencies (insn, head);
16392 	if (first_arg)
16393 	  {
16394 	    /* Add dependee for first argument to predecessors if only
16395 	       region contains more than one block.  */
16396 	    basic_block bb =  BLOCK_FOR_INSN (insn);
16397 	    int rgn = CONTAINING_RGN (bb->index);
16398 	    int nr_blks = RGN_NR_BLOCKS (rgn);
16399 	    /* Skip trivial regions and region head blocks that can have
16400 	       predecessors outside of region.  */
16401 	    if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
16402 	      {
16403 		edge e;
16404 		edge_iterator ei;
16405 
16406 		/* Regions are SCCs with the exception of selective
16407 		   scheduling with pipelining of outer blocks enabled.
16408 		   So also check that immediate predecessors of a non-head
16409 		   block are in the same region.  */
16410 		FOR_EACH_EDGE (e, ei, bb->preds)
16411 		  {
16412 		    /* Avoid creating of loop-carried dependencies through
16413 		       using topological ordering in the region.  */
16414 		    if (rgn == CONTAINING_RGN (e->src->index)
16415 			&& BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
16416 		      add_dependee_for_func_arg (first_arg, e->src);
16417 		  }
16418 	      }
16419 	    insn = first_arg;
16420 	    if (insn == head)
16421 	      break;
16422 	  }
16423       }
16424     else if (first_arg)
16425       avoid_func_arg_motion (first_arg, insn);
16426 }
16427 
16428 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16429    HW registers to maximum, to schedule them at soon as possible. These are
16430    moves from function argument registers at the top of the function entry
16431    and moves from function return value registers after call.  */
16432 static int
ix86_adjust_priority(rtx_insn * insn,int priority)16433 ix86_adjust_priority (rtx_insn *insn, int priority)
16434 {
16435   rtx set;
16436 
16437   if (reload_completed)
16438     return priority;
16439 
16440   if (!NONDEBUG_INSN_P (insn))
16441     return priority;
16442 
16443   set = single_set (insn);
16444   if (set)
16445     {
16446       rtx tmp = SET_SRC (set);
16447       if (REG_P (tmp)
16448           && HARD_REGISTER_P (tmp)
16449           && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
16450           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
16451 	return current_sched_info->sched_max_insns_priority;
16452     }
16453 
16454   return priority;
16455 }
16456 
16457 /* Prepare for scheduling pass.  */
16458 static void
ix86_sched_init_global(FILE *,int,int)16459 ix86_sched_init_global (FILE *, int, int)
16460 {
16461   /* Install scheduling hooks for current CPU.  Some of these hooks are used
16462      in time-critical parts of the scheduler, so we only set them up when
16463      they are actually used.  */
16464   switch (ix86_tune)
16465     {
16466     case PROCESSOR_CORE2:
16467     case PROCESSOR_NEHALEM:
16468     case PROCESSOR_SANDYBRIDGE:
16469     case PROCESSOR_HASWELL:
16470     case PROCESSOR_GENERIC:
16471       /* Do not perform multipass scheduling for pre-reload schedule
16472          to save compile time.  */
16473       if (reload_completed)
16474 	{
16475 	  ix86_core2i7_init_hooks ();
16476 	  break;
16477 	}
16478       /* Fall through.  */
16479     default:
16480       targetm.sched.dfa_post_advance_cycle = NULL;
16481       targetm.sched.first_cycle_multipass_init = NULL;
16482       targetm.sched.first_cycle_multipass_begin = NULL;
16483       targetm.sched.first_cycle_multipass_issue = NULL;
16484       targetm.sched.first_cycle_multipass_backtrack = NULL;
16485       targetm.sched.first_cycle_multipass_end = NULL;
16486       targetm.sched.first_cycle_multipass_fini = NULL;
16487       break;
16488     }
16489 }
16490 
16491 
16492 /* Implement TARGET_STATIC_RTX_ALIGNMENT.  */
16493 
16494 static HOST_WIDE_INT
ix86_static_rtx_alignment(machine_mode mode)16495 ix86_static_rtx_alignment (machine_mode mode)
16496 {
16497   if (mode == DFmode)
16498     return 64;
16499   if (ALIGN_MODE_128 (mode))
16500     return MAX (128, GET_MODE_ALIGNMENT (mode));
16501   return GET_MODE_ALIGNMENT (mode);
16502 }
16503 
16504 /* Implement TARGET_CONSTANT_ALIGNMENT.  */
16505 
16506 static HOST_WIDE_INT
ix86_constant_alignment(const_tree exp,HOST_WIDE_INT align)16507 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
16508 {
16509   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16510       || TREE_CODE (exp) == INTEGER_CST)
16511     {
16512       machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
16513       HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
16514       return MAX (mode_align, align);
16515     }
16516   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16517 	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16518     return BITS_PER_WORD;
16519 
16520   return align;
16521 }
16522 
16523 /* Implement TARGET_EMPTY_RECORD_P.  */
16524 
16525 static bool
ix86_is_empty_record(const_tree type)16526 ix86_is_empty_record (const_tree type)
16527 {
16528   if (!TARGET_64BIT)
16529     return false;
16530   return default_is_empty_record (type);
16531 }
16532 
16533 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI.  */
16534 
16535 static void
ix86_warn_parameter_passing_abi(cumulative_args_t cum_v,tree type)16536 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
16537 {
16538   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
16539 
16540   if (!cum->warn_empty)
16541     return;
16542 
16543   if (!TYPE_EMPTY_P (type))
16544     return;
16545 
16546   /* Don't warn if the function isn't visible outside of the TU.  */
16547   if (cum->decl && !TREE_PUBLIC (cum->decl))
16548     return;
16549 
16550   const_tree ctx = get_ultimate_context (cum->decl);
16551   if (ctx != NULL_TREE
16552       && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
16553     return;
16554 
16555   /* If the actual size of the type is zero, then there is no change
16556      in how objects of this size are passed.  */
16557   if (int_size_in_bytes (type) == 0)
16558     return;
16559 
16560   warning (OPT_Wabi, "empty class %qT parameter passing ABI "
16561 	   "changes in %<-fabi-version=12%> (GCC 8)", type);
16562 
16563   /* Only warn once.  */
16564   cum->warn_empty = false;
16565 }
16566 
16567 /* This hook returns name of multilib ABI.  */
16568 
16569 static const char *
ix86_get_multilib_abi_name(void)16570 ix86_get_multilib_abi_name (void)
16571 {
16572   if (!(TARGET_64BIT_P (ix86_isa_flags)))
16573     return "i386";
16574   else if (TARGET_X32_P (ix86_isa_flags))
16575     return "x32";
16576   else
16577     return "x86_64";
16578 }
16579 
16580 /* Compute the alignment for a variable for Intel MCU psABI.  TYPE is
16581    the data type, and ALIGN is the alignment that the object would
16582    ordinarily have.  */
16583 
16584 static int
iamcu_alignment(tree type,int align)16585 iamcu_alignment (tree type, int align)
16586 {
16587   machine_mode mode;
16588 
16589   if (align < 32 || TYPE_USER_ALIGN (type))
16590     return align;
16591 
16592   /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16593      bytes.  */
16594   mode = TYPE_MODE (strip_array_types (type));
16595   switch (GET_MODE_CLASS (mode))
16596     {
16597     case MODE_INT:
16598     case MODE_COMPLEX_INT:
16599     case MODE_COMPLEX_FLOAT:
16600     case MODE_FLOAT:
16601     case MODE_DECIMAL_FLOAT:
16602       return 32;
16603     default:
16604       return align;
16605     }
16606 }
16607 
16608 /* Compute the alignment for a static variable.
16609    TYPE is the data type, and ALIGN is the alignment that
16610    the object would ordinarily have.  The value of this function is used
16611    instead of that alignment to align the object.  */
16612 
16613 int
ix86_data_alignment(tree type,unsigned int align,bool opt)16614 ix86_data_alignment (tree type, unsigned int align, bool opt)
16615 {
16616   /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16617      for symbols from other compilation units or symbols that don't need
16618      to bind locally.  In order to preserve some ABI compatibility with
16619      those compilers, ensure we don't decrease alignment from what we
16620      used to assume.  */
16621 
16622   unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
16623 
16624   /* A data structure, equal or greater than the size of a cache line
16625      (64 bytes in the Pentium 4 and other recent Intel processors, including
16626      processors based on Intel Core microarchitecture) should be aligned
16627      so that its base address is a multiple of a cache line size.  */
16628 
16629   unsigned int max_align
16630     = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
16631 
16632   if (max_align < BITS_PER_WORD)
16633     max_align = BITS_PER_WORD;
16634 
16635   switch (ix86_align_data_type)
16636     {
16637     case ix86_align_data_type_abi: opt = false; break;
16638     case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
16639     case ix86_align_data_type_cacheline: break;
16640     }
16641 
16642   if (TARGET_IAMCU)
16643     align = iamcu_alignment (type, align);
16644 
16645   if (opt
16646       && AGGREGATE_TYPE_P (type)
16647       && TYPE_SIZE (type)
16648       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
16649     {
16650       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
16651 	  && align < max_align_compat)
16652 	align = max_align_compat;
16653       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
16654 	  && align < max_align)
16655 	align = max_align;
16656     }
16657 
16658   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16659      to 16byte boundary.  */
16660   if (TARGET_64BIT)
16661     {
16662       if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
16663 	  && TYPE_SIZE (type)
16664 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16665 	  && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16666 	  && align < 128)
16667 	return 128;
16668     }
16669 
16670   if (!opt)
16671     return align;
16672 
16673   if (TREE_CODE (type) == ARRAY_TYPE)
16674     {
16675       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16676 	return 64;
16677       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16678 	return 128;
16679     }
16680   else if (TREE_CODE (type) == COMPLEX_TYPE)
16681     {
16682 
16683       if (TYPE_MODE (type) == DCmode && align < 64)
16684 	return 64;
16685       if ((TYPE_MODE (type) == XCmode
16686 	   || TYPE_MODE (type) == TCmode) && align < 128)
16687 	return 128;
16688     }
16689   else if ((TREE_CODE (type) == RECORD_TYPE
16690 	    || TREE_CODE (type) == UNION_TYPE
16691 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
16692 	   && TYPE_FIELDS (type))
16693     {
16694       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16695 	return 64;
16696       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16697 	return 128;
16698     }
16699   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16700 	   || TREE_CODE (type) == INTEGER_TYPE)
16701     {
16702       if (TYPE_MODE (type) == DFmode && align < 64)
16703 	return 64;
16704       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16705 	return 128;
16706     }
16707 
16708   return align;
16709 }
16710 
16711 /* Compute the alignment for a local variable or a stack slot.  EXP is
16712    the data type or decl itself, MODE is the widest mode available and
16713    ALIGN is the alignment that the object would ordinarily have.  The
16714    value of this macro is used instead of that alignment to align the
16715    object.  */
16716 
16717 unsigned int
ix86_local_alignment(tree exp,machine_mode mode,unsigned int align)16718 ix86_local_alignment (tree exp, machine_mode mode,
16719 		      unsigned int align)
16720 {
16721   tree type, decl;
16722 
16723   if (exp && DECL_P (exp))
16724     {
16725       type = TREE_TYPE (exp);
16726       decl = exp;
16727     }
16728   else
16729     {
16730       type = exp;
16731       decl = NULL;
16732     }
16733 
16734   /* Don't do dynamic stack realignment for long long objects with
16735      -mpreferred-stack-boundary=2.  */
16736   if (!TARGET_64BIT
16737       && align == 64
16738       && ix86_preferred_stack_boundary < 64
16739       && (mode == DImode || (type && TYPE_MODE (type) == DImode))
16740       && (!type || !TYPE_USER_ALIGN (type))
16741       && (!decl || !DECL_USER_ALIGN (decl)))
16742     align = 32;
16743 
16744   /* If TYPE is NULL, we are allocating a stack slot for caller-save
16745      register in MODE.  We will return the largest alignment of XF
16746      and DF.  */
16747   if (!type)
16748     {
16749       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
16750 	align = GET_MODE_ALIGNMENT (DFmode);
16751       return align;
16752     }
16753 
16754   /* Don't increase alignment for Intel MCU psABI.  */
16755   if (TARGET_IAMCU)
16756     return align;
16757 
16758   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16759      to 16byte boundary.  Exact wording is:
16760 
16761      An array uses the same alignment as its elements, except that a local or
16762      global array variable of length at least 16 bytes or
16763      a C99 variable-length array variable always has alignment of at least 16 bytes.
16764 
16765      This was added to allow use of aligned SSE instructions at arrays.  This
16766      rule is meant for static storage (where compiler cannot do the analysis
16767      by itself).  We follow it for automatic variables only when convenient.
16768      We fully control everything in the function compiled and functions from
16769      other unit cannot rely on the alignment.
16770 
16771      Exclude va_list type.  It is the common case of local array where
16772      we cannot benefit from the alignment.
16773 
16774      TODO: Probably one should optimize for size only when var is not escaping.  */
16775   if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
16776       && TARGET_SSE)
16777     {
16778       if (AGGREGATE_TYPE_P (type)
16779 	  && (va_list_type_node == NULL_TREE
16780 	      || (TYPE_MAIN_VARIANT (type)
16781 		  != TYPE_MAIN_VARIANT (va_list_type_node)))
16782 	  && TYPE_SIZE (type)
16783 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16784 	  && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16785 	  && align < 128)
16786 	return 128;
16787     }
16788   if (TREE_CODE (type) == ARRAY_TYPE)
16789     {
16790       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16791 	return 64;
16792       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16793 	return 128;
16794     }
16795   else if (TREE_CODE (type) == COMPLEX_TYPE)
16796     {
16797       if (TYPE_MODE (type) == DCmode && align < 64)
16798 	return 64;
16799       if ((TYPE_MODE (type) == XCmode
16800 	   || TYPE_MODE (type) == TCmode) && align < 128)
16801 	return 128;
16802     }
16803   else if ((TREE_CODE (type) == RECORD_TYPE
16804 	    || TREE_CODE (type) == UNION_TYPE
16805 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
16806 	   && TYPE_FIELDS (type))
16807     {
16808       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16809 	return 64;
16810       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16811 	return 128;
16812     }
16813   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16814 	   || TREE_CODE (type) == INTEGER_TYPE)
16815     {
16816 
16817       if (TYPE_MODE (type) == DFmode && align < 64)
16818 	return 64;
16819       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16820 	return 128;
16821     }
16822   return align;
16823 }
16824 
16825 /* Compute the minimum required alignment for dynamic stack realignment
16826    purposes for a local variable, parameter or a stack slot.  EXP is
16827    the data type or decl itself, MODE is its mode and ALIGN is the
16828    alignment that the object would ordinarily have.  */
16829 
16830 unsigned int
ix86_minimum_alignment(tree exp,machine_mode mode,unsigned int align)16831 ix86_minimum_alignment (tree exp, machine_mode mode,
16832 			unsigned int align)
16833 {
16834   tree type, decl;
16835 
16836   if (exp && DECL_P (exp))
16837     {
16838       type = TREE_TYPE (exp);
16839       decl = exp;
16840     }
16841   else
16842     {
16843       type = exp;
16844       decl = NULL;
16845     }
16846 
16847   if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
16848     return align;
16849 
16850   /* Don't do dynamic stack realignment for long long objects with
16851      -mpreferred-stack-boundary=2.  */
16852   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
16853       && (!type || !TYPE_USER_ALIGN (type))
16854       && (!decl || !DECL_USER_ALIGN (decl)))
16855     {
16856       gcc_checking_assert (!TARGET_STV);
16857       return 32;
16858     }
16859 
16860   return align;
16861 }
16862 
16863 /* Find a location for the static chain incoming to a nested function.
16864    This is a register, unless all free registers are used by arguments.  */
16865 
16866 static rtx
ix86_static_chain(const_tree fndecl_or_type,bool incoming_p)16867 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
16868 {
16869   unsigned regno;
16870 
16871   if (TARGET_64BIT)
16872     {
16873       /* We always use R10 in 64-bit mode.  */
16874       regno = R10_REG;
16875     }
16876   else
16877     {
16878       const_tree fntype, fndecl;
16879       unsigned int ccvt;
16880 
16881       /* By default in 32-bit mode we use ECX to pass the static chain.  */
16882       regno = CX_REG;
16883 
16884       if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
16885 	{
16886           fntype = TREE_TYPE (fndecl_or_type);
16887 	  fndecl = fndecl_or_type;
16888 	}
16889       else
16890 	{
16891 	  fntype = fndecl_or_type;
16892 	  fndecl = NULL;
16893 	}
16894 
16895       ccvt = ix86_get_callcvt (fntype);
16896       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
16897 	{
16898 	  /* Fastcall functions use ecx/edx for arguments, which leaves
16899 	     us with EAX for the static chain.
16900 	     Thiscall functions use ecx for arguments, which also
16901 	     leaves us with EAX for the static chain.  */
16902 	  regno = AX_REG;
16903 	}
16904       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
16905 	{
16906 	  /* Thiscall functions use ecx for arguments, which leaves
16907 	     us with EAX and EDX for the static chain.
16908 	     We are using for abi-compatibility EAX.  */
16909 	  regno = AX_REG;
16910 	}
16911       else if (ix86_function_regparm (fntype, fndecl) == 3)
16912 	{
16913 	  /* For regparm 3, we have no free call-clobbered registers in
16914 	     which to store the static chain.  In order to implement this,
16915 	     we have the trampoline push the static chain to the stack.
16916 	     However, we can't push a value below the return address when
16917 	     we call the nested function directly, so we have to use an
16918 	     alternate entry point.  For this we use ESI, and have the
16919 	     alternate entry point push ESI, so that things appear the
16920 	     same once we're executing the nested function.  */
16921 	  if (incoming_p)
16922 	    {
16923 	      if (fndecl == current_function_decl
16924 		  && !ix86_static_chain_on_stack)
16925 		{
16926 		  gcc_assert (!reload_completed);
16927 		  ix86_static_chain_on_stack = true;
16928 		}
16929 	      return gen_frame_mem (SImode,
16930 				    plus_constant (Pmode,
16931 						   arg_pointer_rtx, -8));
16932 	    }
16933 	  regno = SI_REG;
16934 	}
16935     }
16936 
16937   return gen_rtx_REG (Pmode, regno);
16938 }
16939 
16940 /* Emit RTL insns to initialize the variable parts of a trampoline.
16941    FNDECL is the decl of the target address; M_TRAMP is a MEM for
16942    the trampoline, and CHAIN_VALUE is an RTX for the static chain
16943    to be passed to the target function.  */
16944 
16945 static void
ix86_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)16946 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
16947 {
16948   rtx mem, fnaddr;
16949   int opcode;
16950   int offset = 0;
16951   bool need_endbr = (flag_cf_protection & CF_BRANCH);
16952 
16953   fnaddr = XEXP (DECL_RTL (fndecl), 0);
16954 
16955   if (TARGET_64BIT)
16956     {
16957       int size;
16958 
16959       if (need_endbr)
16960 	{
16961 	  /* Insert ENDBR64.  */
16962 	  mem = adjust_address (m_tramp, SImode, offset);
16963 	  emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
16964 	  offset += 4;
16965 	}
16966 
16967       /* Load the function address to r11.  Try to load address using
16968 	 the shorter movl instead of movabs.  We may want to support
16969 	 movq for kernel mode, but kernel does not use trampolines at
16970 	 the moment.  FNADDR is a 32bit address and may not be in
16971 	 DImode when ptr_mode == SImode.  Always use movl in this
16972 	 case.  */
16973       if (ptr_mode == SImode
16974 	  || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16975 	{
16976 	  fnaddr = copy_addr_to_reg (fnaddr);
16977 
16978 	  mem = adjust_address (m_tramp, HImode, offset);
16979 	  emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
16980 
16981 	  mem = adjust_address (m_tramp, SImode, offset + 2);
16982 	  emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
16983 	  offset += 6;
16984 	}
16985       else
16986 	{
16987 	  mem = adjust_address (m_tramp, HImode, offset);
16988 	  emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
16989 
16990 	  mem = adjust_address (m_tramp, DImode, offset + 2);
16991 	  emit_move_insn (mem, fnaddr);
16992 	  offset += 10;
16993 	}
16994 
16995       /* Load static chain using movabs to r10.  Use the shorter movl
16996          instead of movabs when ptr_mode == SImode.  */
16997       if (ptr_mode == SImode)
16998 	{
16999 	  opcode = 0xba41;
17000 	  size = 6;
17001 	}
17002       else
17003 	{
17004 	  opcode = 0xba49;
17005 	  size = 10;
17006 	}
17007 
17008       mem = adjust_address (m_tramp, HImode, offset);
17009       emit_move_insn (mem, gen_int_mode (opcode, HImode));
17010 
17011       mem = adjust_address (m_tramp, ptr_mode, offset + 2);
17012       emit_move_insn (mem, chain_value);
17013       offset += size;
17014 
17015       /* Jump to r11; the last (unused) byte is a nop, only there to
17016 	 pad the write out to a single 32-bit store.  */
17017       mem = adjust_address (m_tramp, SImode, offset);
17018       emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
17019       offset += 4;
17020     }
17021   else
17022     {
17023       rtx disp, chain;
17024 
17025       /* Depending on the static chain location, either load a register
17026 	 with a constant, or push the constant to the stack.  All of the
17027 	 instructions are the same size.  */
17028       chain = ix86_static_chain (fndecl, true);
17029       if (REG_P (chain))
17030 	{
17031 	  switch (REGNO (chain))
17032 	    {
17033 	    case AX_REG:
17034 	      opcode = 0xb8; break;
17035 	    case CX_REG:
17036 	      opcode = 0xb9; break;
17037 	    default:
17038 	      gcc_unreachable ();
17039 	    }
17040 	}
17041       else
17042 	opcode = 0x68;
17043 
17044       if (need_endbr)
17045 	{
17046 	  /* Insert ENDBR32.  */
17047 	  mem = adjust_address (m_tramp, SImode, offset);
17048 	  emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
17049 	  offset += 4;
17050 	}
17051 
17052       mem = adjust_address (m_tramp, QImode, offset);
17053       emit_move_insn (mem, gen_int_mode (opcode, QImode));
17054 
17055       mem = adjust_address (m_tramp, SImode, offset + 1);
17056       emit_move_insn (mem, chain_value);
17057       offset += 5;
17058 
17059       mem = adjust_address (m_tramp, QImode, offset);
17060       emit_move_insn (mem, gen_int_mode (0xe9, QImode));
17061 
17062       mem = adjust_address (m_tramp, SImode, offset + 1);
17063 
17064       /* Compute offset from the end of the jmp to the target function.
17065 	 In the case in which the trampoline stores the static chain on
17066 	 the stack, we need to skip the first insn which pushes the
17067 	 (call-saved) register static chain; this push is 1 byte.  */
17068       offset += 5;
17069       int skip = MEM_P (chain) ? 1 : 0;
17070       /* Skip ENDBR32 at the entry of the target function.  */
17071       if (need_endbr
17072 	  && !cgraph_node::get (fndecl)->only_called_directly_p ())
17073 	skip += 4;
17074       disp = expand_binop (SImode, sub_optab, fnaddr,
17075 			   plus_constant (Pmode, XEXP (m_tramp, 0),
17076 					  offset - skip),
17077 			   NULL_RTX, 1, OPTAB_DIRECT);
17078       emit_move_insn (mem, disp);
17079     }
17080 
17081   gcc_assert (offset <= TRAMPOLINE_SIZE);
17082 
17083 #ifdef HAVE_ENABLE_EXECUTE_STACK
17084 #ifdef CHECK_EXECUTE_STACK_ENABLED
17085   if (CHECK_EXECUTE_STACK_ENABLED)
17086 #endif
17087   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17088 		     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
17089 #endif
17090 }
17091 
17092 static bool
ix86_allocate_stack_slots_for_args(void)17093 ix86_allocate_stack_slots_for_args (void)
17094 {
17095   /* Naked functions should not allocate stack slots for arguments.  */
17096   return !ix86_function_naked (current_function_decl);
17097 }
17098 
17099 static bool
ix86_warn_func_return(tree decl)17100 ix86_warn_func_return (tree decl)
17101 {
17102   /* Naked functions are implemented entirely in assembly, including the
17103      return sequence, so suppress warnings about this.  */
17104   return !ix86_function_naked (decl);
17105 }
17106 
17107 /* Return the shift count of a vector by scalar shift builtin second argument
17108    ARG1.  */
17109 static tree
ix86_vector_shift_count(tree arg1)17110 ix86_vector_shift_count (tree arg1)
17111 {
17112   if (tree_fits_uhwi_p (arg1))
17113     return arg1;
17114   else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
17115     {
17116       /* The count argument is weird, passed in as various 128-bit
17117 	 (or 64-bit) vectors, the low 64 bits from it are the count.  */
17118       unsigned char buf[16];
17119       int len = native_encode_expr (arg1, buf, 16);
17120       if (len == 0)
17121 	return NULL_TREE;
17122       tree t = native_interpret_expr (uint64_type_node, buf, len);
17123       if (t && tree_fits_uhwi_p (t))
17124 	return t;
17125     }
17126   return NULL_TREE;
17127 }
17128 
17129 static tree
ix86_fold_builtin(tree fndecl,int n_args,tree * args,bool ignore ATTRIBUTE_UNUSED)17130 ix86_fold_builtin (tree fndecl, int n_args,
17131 		   tree *args, bool ignore ATTRIBUTE_UNUSED)
17132 {
17133   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17134     {
17135       enum ix86_builtins fn_code
17136 	= (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17137       enum rtx_code rcode;
17138       bool is_vshift;
17139       unsigned HOST_WIDE_INT mask;
17140 
17141       switch (fn_code)
17142 	{
17143 	case IX86_BUILTIN_CPU_IS:
17144 	case IX86_BUILTIN_CPU_SUPPORTS:
17145 	  gcc_assert (n_args == 1);
17146 	  return fold_builtin_cpu (fndecl, args);
17147 
17148 	case IX86_BUILTIN_NANQ:
17149 	case IX86_BUILTIN_NANSQ:
17150 	  {
17151 	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
17152 	    const char *str = c_getstr (*args);
17153 	    int quiet = fn_code == IX86_BUILTIN_NANQ;
17154 	    REAL_VALUE_TYPE real;
17155 
17156 	    if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17157 	      return build_real (type, real);
17158 	    return NULL_TREE;
17159 	  }
17160 
17161 	case IX86_BUILTIN_INFQ:
17162 	case IX86_BUILTIN_HUGE_VALQ:
17163 	  {
17164 	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
17165 	    REAL_VALUE_TYPE inf;
17166 	    real_inf (&inf);
17167 	    return build_real (type, inf);
17168 	  }
17169 
17170 	case IX86_BUILTIN_TZCNT16:
17171 	case IX86_BUILTIN_CTZS:
17172 	case IX86_BUILTIN_TZCNT32:
17173 	case IX86_BUILTIN_TZCNT64:
17174 	  gcc_assert (n_args == 1);
17175 	  if (TREE_CODE (args[0]) == INTEGER_CST)
17176 	    {
17177 	      tree type = TREE_TYPE (TREE_TYPE (fndecl));
17178 	      tree arg = args[0];
17179 	      if (fn_code == IX86_BUILTIN_TZCNT16
17180 		  || fn_code == IX86_BUILTIN_CTZS)
17181 		arg = fold_convert (short_unsigned_type_node, arg);
17182 	      if (integer_zerop (arg))
17183 		return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17184 	      else
17185 		return fold_const_call (CFN_CTZ, type, arg);
17186 	    }
17187 	  break;
17188 
17189 	case IX86_BUILTIN_LZCNT16:
17190 	case IX86_BUILTIN_CLZS:
17191 	case IX86_BUILTIN_LZCNT32:
17192 	case IX86_BUILTIN_LZCNT64:
17193 	  gcc_assert (n_args == 1);
17194 	  if (TREE_CODE (args[0]) == INTEGER_CST)
17195 	    {
17196 	      tree type = TREE_TYPE (TREE_TYPE (fndecl));
17197 	      tree arg = args[0];
17198 	      if (fn_code == IX86_BUILTIN_LZCNT16
17199 		  || fn_code == IX86_BUILTIN_CLZS)
17200 		arg = fold_convert (short_unsigned_type_node, arg);
17201 	      if (integer_zerop (arg))
17202 		return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17203 	      else
17204 		return fold_const_call (CFN_CLZ, type, arg);
17205 	    }
17206 	  break;
17207 
17208 	case IX86_BUILTIN_BEXTR32:
17209 	case IX86_BUILTIN_BEXTR64:
17210 	case IX86_BUILTIN_BEXTRI32:
17211 	case IX86_BUILTIN_BEXTRI64:
17212 	  gcc_assert (n_args == 2);
17213 	  if (tree_fits_uhwi_p (args[1]))
17214 	    {
17215 	      unsigned HOST_WIDE_INT res = 0;
17216 	      unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
17217 	      unsigned int start = tree_to_uhwi (args[1]);
17218 	      unsigned int len = (start & 0xff00) >> 8;
17219 	      start &= 0xff;
17220 	      if (start >= prec || len == 0)
17221 		res = 0;
17222 	      else if (!tree_fits_uhwi_p (args[0]))
17223 		break;
17224 	      else
17225 		res = tree_to_uhwi (args[0]) >> start;
17226 	      if (len > prec)
17227 		len = prec;
17228 	      if (len < HOST_BITS_PER_WIDE_INT)
17229 		res &= (HOST_WIDE_INT_1U << len) - 1;
17230 	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17231 	    }
17232 	  break;
17233 
17234 	case IX86_BUILTIN_BZHI32:
17235 	case IX86_BUILTIN_BZHI64:
17236 	  gcc_assert (n_args == 2);
17237 	  if (tree_fits_uhwi_p (args[1]))
17238 	    {
17239 	      unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
17240 	      if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
17241 		return args[0];
17242 	      if (idx == 0)
17243 		return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
17244 	      if (!tree_fits_uhwi_p (args[0]))
17245 		break;
17246 	      unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
17247 	      res &= ~(HOST_WIDE_INT_M1U << idx);
17248 	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17249 	    }
17250 	  break;
17251 
17252 	case IX86_BUILTIN_PDEP32:
17253 	case IX86_BUILTIN_PDEP64:
17254 	  gcc_assert (n_args == 2);
17255 	  if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17256 	    {
17257 	      unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17258 	      unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17259 	      unsigned HOST_WIDE_INT res = 0;
17260 	      unsigned HOST_WIDE_INT m, k = 1;
17261 	      for (m = 1; m; m <<= 1)
17262 		if ((mask & m) != 0)
17263 		  {
17264 		    if ((src & k) != 0)
17265 		      res |= m;
17266 		    k <<= 1;
17267 		  }
17268 	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17269 	    }
17270 	  break;
17271 
17272 	case IX86_BUILTIN_PEXT32:
17273 	case IX86_BUILTIN_PEXT64:
17274 	  gcc_assert (n_args == 2);
17275 	  if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17276 	    {
17277 	      unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17278 	      unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17279 	      unsigned HOST_WIDE_INT res = 0;
17280 	      unsigned HOST_WIDE_INT m, k = 1;
17281 	      for (m = 1; m; m <<= 1)
17282 		if ((mask & m) != 0)
17283 		  {
17284 		    if ((src & m) != 0)
17285 		      res |= k;
17286 		    k <<= 1;
17287 		  }
17288 	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17289 	    }
17290 	  break;
17291 
17292 	case IX86_BUILTIN_MOVMSKPS:
17293 	case IX86_BUILTIN_PMOVMSKB:
17294 	case IX86_BUILTIN_MOVMSKPD:
17295 	case IX86_BUILTIN_PMOVMSKB128:
17296 	case IX86_BUILTIN_MOVMSKPD256:
17297 	case IX86_BUILTIN_MOVMSKPS256:
17298 	case IX86_BUILTIN_PMOVMSKB256:
17299 	  gcc_assert (n_args == 1);
17300 	  if (TREE_CODE (args[0]) == VECTOR_CST)
17301 	    {
17302 	      HOST_WIDE_INT res = 0;
17303 	      for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
17304 		{
17305 		  tree e = VECTOR_CST_ELT (args[0], i);
17306 		  if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
17307 		    {
17308 		      if (wi::neg_p (wi::to_wide (e)))
17309 			res |= HOST_WIDE_INT_1 << i;
17310 		    }
17311 		  else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
17312 		    {
17313 		      if (TREE_REAL_CST (e).sign)
17314 			res |= HOST_WIDE_INT_1 << i;
17315 		    }
17316 		  else
17317 		    return NULL_TREE;
17318 		}
17319 	      return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
17320 	    }
17321 	  break;
17322 
17323 	case IX86_BUILTIN_PSLLD:
17324 	case IX86_BUILTIN_PSLLD128:
17325 	case IX86_BUILTIN_PSLLD128_MASK:
17326 	case IX86_BUILTIN_PSLLD256:
17327 	case IX86_BUILTIN_PSLLD256_MASK:
17328 	case IX86_BUILTIN_PSLLD512:
17329 	case IX86_BUILTIN_PSLLDI:
17330 	case IX86_BUILTIN_PSLLDI128:
17331 	case IX86_BUILTIN_PSLLDI128_MASK:
17332 	case IX86_BUILTIN_PSLLDI256:
17333 	case IX86_BUILTIN_PSLLDI256_MASK:
17334 	case IX86_BUILTIN_PSLLDI512:
17335 	case IX86_BUILTIN_PSLLQ:
17336 	case IX86_BUILTIN_PSLLQ128:
17337 	case IX86_BUILTIN_PSLLQ128_MASK:
17338 	case IX86_BUILTIN_PSLLQ256:
17339 	case IX86_BUILTIN_PSLLQ256_MASK:
17340 	case IX86_BUILTIN_PSLLQ512:
17341 	case IX86_BUILTIN_PSLLQI:
17342 	case IX86_BUILTIN_PSLLQI128:
17343 	case IX86_BUILTIN_PSLLQI128_MASK:
17344 	case IX86_BUILTIN_PSLLQI256:
17345 	case IX86_BUILTIN_PSLLQI256_MASK:
17346 	case IX86_BUILTIN_PSLLQI512:
17347 	case IX86_BUILTIN_PSLLW:
17348 	case IX86_BUILTIN_PSLLW128:
17349 	case IX86_BUILTIN_PSLLW128_MASK:
17350 	case IX86_BUILTIN_PSLLW256:
17351 	case IX86_BUILTIN_PSLLW256_MASK:
17352 	case IX86_BUILTIN_PSLLW512_MASK:
17353 	case IX86_BUILTIN_PSLLWI:
17354 	case IX86_BUILTIN_PSLLWI128:
17355 	case IX86_BUILTIN_PSLLWI128_MASK:
17356 	case IX86_BUILTIN_PSLLWI256:
17357 	case IX86_BUILTIN_PSLLWI256_MASK:
17358 	case IX86_BUILTIN_PSLLWI512_MASK:
17359 	  rcode = ASHIFT;
17360 	  is_vshift = false;
17361 	  goto do_shift;
17362 	case IX86_BUILTIN_PSRAD:
17363 	case IX86_BUILTIN_PSRAD128:
17364 	case IX86_BUILTIN_PSRAD128_MASK:
17365 	case IX86_BUILTIN_PSRAD256:
17366 	case IX86_BUILTIN_PSRAD256_MASK:
17367 	case IX86_BUILTIN_PSRAD512:
17368 	case IX86_BUILTIN_PSRADI:
17369 	case IX86_BUILTIN_PSRADI128:
17370 	case IX86_BUILTIN_PSRADI128_MASK:
17371 	case IX86_BUILTIN_PSRADI256:
17372 	case IX86_BUILTIN_PSRADI256_MASK:
17373 	case IX86_BUILTIN_PSRADI512:
17374 	case IX86_BUILTIN_PSRAQ128_MASK:
17375 	case IX86_BUILTIN_PSRAQ256_MASK:
17376 	case IX86_BUILTIN_PSRAQ512:
17377 	case IX86_BUILTIN_PSRAQI128_MASK:
17378 	case IX86_BUILTIN_PSRAQI256_MASK:
17379 	case IX86_BUILTIN_PSRAQI512:
17380 	case IX86_BUILTIN_PSRAW:
17381 	case IX86_BUILTIN_PSRAW128:
17382 	case IX86_BUILTIN_PSRAW128_MASK:
17383 	case IX86_BUILTIN_PSRAW256:
17384 	case IX86_BUILTIN_PSRAW256_MASK:
17385 	case IX86_BUILTIN_PSRAW512:
17386 	case IX86_BUILTIN_PSRAWI:
17387 	case IX86_BUILTIN_PSRAWI128:
17388 	case IX86_BUILTIN_PSRAWI128_MASK:
17389 	case IX86_BUILTIN_PSRAWI256:
17390 	case IX86_BUILTIN_PSRAWI256_MASK:
17391 	case IX86_BUILTIN_PSRAWI512:
17392 	  rcode = ASHIFTRT;
17393 	  is_vshift = false;
17394 	  goto do_shift;
17395 	case IX86_BUILTIN_PSRLD:
17396 	case IX86_BUILTIN_PSRLD128:
17397 	case IX86_BUILTIN_PSRLD128_MASK:
17398 	case IX86_BUILTIN_PSRLD256:
17399 	case IX86_BUILTIN_PSRLD256_MASK:
17400 	case IX86_BUILTIN_PSRLD512:
17401 	case IX86_BUILTIN_PSRLDI:
17402 	case IX86_BUILTIN_PSRLDI128:
17403 	case IX86_BUILTIN_PSRLDI128_MASK:
17404 	case IX86_BUILTIN_PSRLDI256:
17405 	case IX86_BUILTIN_PSRLDI256_MASK:
17406 	case IX86_BUILTIN_PSRLDI512:
17407 	case IX86_BUILTIN_PSRLQ:
17408 	case IX86_BUILTIN_PSRLQ128:
17409 	case IX86_BUILTIN_PSRLQ128_MASK:
17410 	case IX86_BUILTIN_PSRLQ256:
17411 	case IX86_BUILTIN_PSRLQ256_MASK:
17412 	case IX86_BUILTIN_PSRLQ512:
17413 	case IX86_BUILTIN_PSRLQI:
17414 	case IX86_BUILTIN_PSRLQI128:
17415 	case IX86_BUILTIN_PSRLQI128_MASK:
17416 	case IX86_BUILTIN_PSRLQI256:
17417 	case IX86_BUILTIN_PSRLQI256_MASK:
17418 	case IX86_BUILTIN_PSRLQI512:
17419 	case IX86_BUILTIN_PSRLW:
17420 	case IX86_BUILTIN_PSRLW128:
17421 	case IX86_BUILTIN_PSRLW128_MASK:
17422 	case IX86_BUILTIN_PSRLW256:
17423 	case IX86_BUILTIN_PSRLW256_MASK:
17424 	case IX86_BUILTIN_PSRLW512:
17425 	case IX86_BUILTIN_PSRLWI:
17426 	case IX86_BUILTIN_PSRLWI128:
17427 	case IX86_BUILTIN_PSRLWI128_MASK:
17428 	case IX86_BUILTIN_PSRLWI256:
17429 	case IX86_BUILTIN_PSRLWI256_MASK:
17430 	case IX86_BUILTIN_PSRLWI512:
17431 	  rcode = LSHIFTRT;
17432 	  is_vshift = false;
17433 	  goto do_shift;
17434 	case IX86_BUILTIN_PSLLVV16HI:
17435 	case IX86_BUILTIN_PSLLVV16SI:
17436 	case IX86_BUILTIN_PSLLVV2DI:
17437 	case IX86_BUILTIN_PSLLVV2DI_MASK:
17438 	case IX86_BUILTIN_PSLLVV32HI:
17439 	case IX86_BUILTIN_PSLLVV4DI:
17440 	case IX86_BUILTIN_PSLLVV4DI_MASK:
17441 	case IX86_BUILTIN_PSLLVV4SI:
17442 	case IX86_BUILTIN_PSLLVV4SI_MASK:
17443 	case IX86_BUILTIN_PSLLVV8DI:
17444 	case IX86_BUILTIN_PSLLVV8HI:
17445 	case IX86_BUILTIN_PSLLVV8SI:
17446 	case IX86_BUILTIN_PSLLVV8SI_MASK:
17447 	  rcode = ASHIFT;
17448 	  is_vshift = true;
17449 	  goto do_shift;
17450 	case IX86_BUILTIN_PSRAVQ128:
17451 	case IX86_BUILTIN_PSRAVQ256:
17452 	case IX86_BUILTIN_PSRAVV16HI:
17453 	case IX86_BUILTIN_PSRAVV16SI:
17454 	case IX86_BUILTIN_PSRAVV32HI:
17455 	case IX86_BUILTIN_PSRAVV4SI:
17456 	case IX86_BUILTIN_PSRAVV4SI_MASK:
17457 	case IX86_BUILTIN_PSRAVV8DI:
17458 	case IX86_BUILTIN_PSRAVV8HI:
17459 	case IX86_BUILTIN_PSRAVV8SI:
17460 	case IX86_BUILTIN_PSRAVV8SI_MASK:
17461 	  rcode = ASHIFTRT;
17462 	  is_vshift = true;
17463 	  goto do_shift;
17464 	case IX86_BUILTIN_PSRLVV16HI:
17465 	case IX86_BUILTIN_PSRLVV16SI:
17466 	case IX86_BUILTIN_PSRLVV2DI:
17467 	case IX86_BUILTIN_PSRLVV2DI_MASK:
17468 	case IX86_BUILTIN_PSRLVV32HI:
17469 	case IX86_BUILTIN_PSRLVV4DI:
17470 	case IX86_BUILTIN_PSRLVV4DI_MASK:
17471 	case IX86_BUILTIN_PSRLVV4SI:
17472 	case IX86_BUILTIN_PSRLVV4SI_MASK:
17473 	case IX86_BUILTIN_PSRLVV8DI:
17474 	case IX86_BUILTIN_PSRLVV8HI:
17475 	case IX86_BUILTIN_PSRLVV8SI:
17476 	case IX86_BUILTIN_PSRLVV8SI_MASK:
17477 	  rcode = LSHIFTRT;
17478 	  is_vshift = true;
17479 	  goto do_shift;
17480 
17481 	do_shift:
17482 	  gcc_assert (n_args >= 2);
17483 	  if (TREE_CODE (args[0]) != VECTOR_CST)
17484 	    break;
17485 	  mask = HOST_WIDE_INT_M1U;
17486 	  if (n_args > 2)
17487 	    {
17488 	      /* This is masked shift.  */
17489 	      if (!tree_fits_uhwi_p (args[n_args - 1])
17490 		  || TREE_SIDE_EFFECTS (args[n_args - 2]))
17491 		break;
17492 	      mask = tree_to_uhwi (args[n_args - 1]);
17493 	      unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
17494 	      mask |= HOST_WIDE_INT_M1U << elems;
17495 	      if (mask != HOST_WIDE_INT_M1U
17496 		  && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
17497 		break;
17498 	      if (mask == (HOST_WIDE_INT_M1U << elems))
17499 		return args[n_args - 2];
17500 	    }
17501 	  if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
17502 	    break;
17503 	  if (tree tem = (is_vshift ? integer_one_node
17504 			  : ix86_vector_shift_count (args[1])))
17505 	    {
17506 	      unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
17507 	      unsigned HOST_WIDE_INT prec
17508 		= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
17509 	      if (count == 0 && mask == HOST_WIDE_INT_M1U)
17510 		return args[0];
17511 	      if (count >= prec)
17512 		{
17513 		  if (rcode == ASHIFTRT)
17514 		    count = prec - 1;
17515 		  else if (mask == HOST_WIDE_INT_M1U)
17516 		    return build_zero_cst (TREE_TYPE (args[0]));
17517 		}
17518 	      tree countt = NULL_TREE;
17519 	      if (!is_vshift)
17520 		{
17521 		  if (count >= prec)
17522 		    countt = integer_zero_node;
17523 		  else
17524 		    countt = build_int_cst (integer_type_node, count);
17525 		}
17526 	      tree_vector_builder builder;
17527 	      if (mask != HOST_WIDE_INT_M1U || is_vshift)
17528 		builder.new_vector (TREE_TYPE (args[0]),
17529 				    TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
17530 				    1);
17531 	      else
17532 		builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
17533 					     false);
17534 	      unsigned int cnt = builder.encoded_nelts ();
17535 	      for (unsigned int i = 0; i < cnt; ++i)
17536 		{
17537 		  tree elt = VECTOR_CST_ELT (args[0], i);
17538 		  if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
17539 		    return NULL_TREE;
17540 		  tree type = TREE_TYPE (elt);
17541 		  if (rcode == LSHIFTRT)
17542 		    elt = fold_convert (unsigned_type_for (type), elt);
17543 		  if (is_vshift)
17544 		    {
17545 		      countt = VECTOR_CST_ELT (args[1], i);
17546 		      if (TREE_CODE (countt) != INTEGER_CST
17547 			  || TREE_OVERFLOW (countt))
17548 			return NULL_TREE;
17549 		      if (wi::neg_p (wi::to_wide (countt))
17550 			  || wi::to_widest (countt) >= prec)
17551 			{
17552 			  if (rcode == ASHIFTRT)
17553 			    countt = build_int_cst (TREE_TYPE (countt),
17554 						    prec - 1);
17555 			  else
17556 			    {
17557 			      elt = build_zero_cst (TREE_TYPE (elt));
17558 			      countt = build_zero_cst (TREE_TYPE (countt));
17559 			    }
17560 			}
17561 		    }
17562 		  else if (count >= prec)
17563 		    elt = build_zero_cst (TREE_TYPE (elt));
17564 		  elt = const_binop (rcode == ASHIFT
17565 				     ? LSHIFT_EXPR : RSHIFT_EXPR,
17566 				     TREE_TYPE (elt), elt, countt);
17567 		  if (!elt || TREE_CODE (elt) != INTEGER_CST)
17568 		    return NULL_TREE;
17569 		  if (rcode == LSHIFTRT)
17570 		    elt = fold_convert (type, elt);
17571 		  if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
17572 		    {
17573 		      elt = VECTOR_CST_ELT (args[n_args - 2], i);
17574 		      if (TREE_CODE (elt) != INTEGER_CST
17575 			  || TREE_OVERFLOW (elt))
17576 			return NULL_TREE;
17577 		    }
17578 		  builder.quick_push (elt);
17579 		}
17580 	      return builder.build ();
17581 	    }
17582 	  break;
17583 
17584 	default:
17585 	  break;
17586 	}
17587     }
17588 
17589 #ifdef SUBTARGET_FOLD_BUILTIN
17590   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17591 #endif
17592 
17593   return NULL_TREE;
17594 }
17595 
17596 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17597    constant) in GIMPLE.  */
17598 
17599 bool
ix86_gimple_fold_builtin(gimple_stmt_iterator * gsi)17600 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17601 {
17602   gimple *stmt = gsi_stmt (*gsi);
17603   tree fndecl = gimple_call_fndecl (stmt);
17604   gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
17605   int n_args = gimple_call_num_args (stmt);
17606   enum ix86_builtins fn_code
17607     = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17608   tree decl = NULL_TREE;
17609   tree arg0, arg1, arg2;
17610   enum rtx_code rcode;
17611   unsigned HOST_WIDE_INT count;
17612   bool is_vshift;
17613 
17614   switch (fn_code)
17615     {
17616     case IX86_BUILTIN_TZCNT32:
17617       decl = builtin_decl_implicit (BUILT_IN_CTZ);
17618       goto fold_tzcnt_lzcnt;
17619 
17620     case IX86_BUILTIN_TZCNT64:
17621       decl = builtin_decl_implicit (BUILT_IN_CTZLL);
17622       goto fold_tzcnt_lzcnt;
17623 
17624     case IX86_BUILTIN_LZCNT32:
17625       decl = builtin_decl_implicit (BUILT_IN_CLZ);
17626       goto fold_tzcnt_lzcnt;
17627 
17628     case IX86_BUILTIN_LZCNT64:
17629       decl = builtin_decl_implicit (BUILT_IN_CLZLL);
17630       goto fold_tzcnt_lzcnt;
17631 
17632     fold_tzcnt_lzcnt:
17633       gcc_assert (n_args == 1);
17634       arg0 = gimple_call_arg (stmt, 0);
17635       if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
17636 	{
17637 	  int prec = TYPE_PRECISION (TREE_TYPE (arg0));
17638 	  /* If arg0 is provably non-zero, optimize into generic
17639 	     __builtin_c[tl]z{,ll} function the middle-end handles
17640 	     better.  */
17641 	  if (!expr_not_equal_to (arg0, wi::zero (prec)))
17642 	    return false;
17643 
17644 	  location_t loc = gimple_location (stmt);
17645 	  gimple *g = gimple_build_call (decl, 1, arg0);
17646 	  gimple_set_location (g, loc);
17647 	  tree lhs = make_ssa_name (integer_type_node);
17648 	  gimple_call_set_lhs (g, lhs);
17649 	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
17650 	  g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
17651 	  gimple_set_location (g, loc);
17652 	  gsi_replace (gsi, g, false);
17653 	  return true;
17654 	}
17655       break;
17656 
17657     case IX86_BUILTIN_BZHI32:
17658     case IX86_BUILTIN_BZHI64:
17659       gcc_assert (n_args == 2);
17660       arg1 = gimple_call_arg (stmt, 1);
17661       if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
17662 	{
17663 	  unsigned int idx = tree_to_uhwi (arg1) & 0xff;
17664 	  arg0 = gimple_call_arg (stmt, 0);
17665 	  if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
17666 	    break;
17667 	  location_t loc = gimple_location (stmt);
17668 	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17669 	  gimple_set_location (g, loc);
17670 	  gsi_replace (gsi, g, false);
17671 	  return true;
17672 	}
17673       break;
17674 
17675     case IX86_BUILTIN_PDEP32:
17676     case IX86_BUILTIN_PDEP64:
17677     case IX86_BUILTIN_PEXT32:
17678     case IX86_BUILTIN_PEXT64:
17679       gcc_assert (n_args == 2);
17680       arg1 = gimple_call_arg (stmt, 1);
17681       if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
17682 	{
17683 	  location_t loc = gimple_location (stmt);
17684 	  arg0 = gimple_call_arg (stmt, 0);
17685 	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17686 	  gimple_set_location (g, loc);
17687 	  gsi_replace (gsi, g, false);
17688 	  return true;
17689 	}
17690       break;
17691 
17692     case IX86_BUILTIN_PSLLD:
17693     case IX86_BUILTIN_PSLLD128:
17694     case IX86_BUILTIN_PSLLD128_MASK:
17695     case IX86_BUILTIN_PSLLD256:
17696     case IX86_BUILTIN_PSLLD256_MASK:
17697     case IX86_BUILTIN_PSLLD512:
17698     case IX86_BUILTIN_PSLLDI:
17699     case IX86_BUILTIN_PSLLDI128:
17700     case IX86_BUILTIN_PSLLDI128_MASK:
17701     case IX86_BUILTIN_PSLLDI256:
17702     case IX86_BUILTIN_PSLLDI256_MASK:
17703     case IX86_BUILTIN_PSLLDI512:
17704     case IX86_BUILTIN_PSLLQ:
17705     case IX86_BUILTIN_PSLLQ128:
17706     case IX86_BUILTIN_PSLLQ128_MASK:
17707     case IX86_BUILTIN_PSLLQ256:
17708     case IX86_BUILTIN_PSLLQ256_MASK:
17709     case IX86_BUILTIN_PSLLQ512:
17710     case IX86_BUILTIN_PSLLQI:
17711     case IX86_BUILTIN_PSLLQI128:
17712     case IX86_BUILTIN_PSLLQI128_MASK:
17713     case IX86_BUILTIN_PSLLQI256:
17714     case IX86_BUILTIN_PSLLQI256_MASK:
17715     case IX86_BUILTIN_PSLLQI512:
17716     case IX86_BUILTIN_PSLLW:
17717     case IX86_BUILTIN_PSLLW128:
17718     case IX86_BUILTIN_PSLLW128_MASK:
17719     case IX86_BUILTIN_PSLLW256:
17720     case IX86_BUILTIN_PSLLW256_MASK:
17721     case IX86_BUILTIN_PSLLW512_MASK:
17722     case IX86_BUILTIN_PSLLWI:
17723     case IX86_BUILTIN_PSLLWI128:
17724     case IX86_BUILTIN_PSLLWI128_MASK:
17725     case IX86_BUILTIN_PSLLWI256:
17726     case IX86_BUILTIN_PSLLWI256_MASK:
17727     case IX86_BUILTIN_PSLLWI512_MASK:
17728       rcode = ASHIFT;
17729       is_vshift = false;
17730       goto do_shift;
17731     case IX86_BUILTIN_PSRAD:
17732     case IX86_BUILTIN_PSRAD128:
17733     case IX86_BUILTIN_PSRAD128_MASK:
17734     case IX86_BUILTIN_PSRAD256:
17735     case IX86_BUILTIN_PSRAD256_MASK:
17736     case IX86_BUILTIN_PSRAD512:
17737     case IX86_BUILTIN_PSRADI:
17738     case IX86_BUILTIN_PSRADI128:
17739     case IX86_BUILTIN_PSRADI128_MASK:
17740     case IX86_BUILTIN_PSRADI256:
17741     case IX86_BUILTIN_PSRADI256_MASK:
17742     case IX86_BUILTIN_PSRADI512:
17743     case IX86_BUILTIN_PSRAQ128_MASK:
17744     case IX86_BUILTIN_PSRAQ256_MASK:
17745     case IX86_BUILTIN_PSRAQ512:
17746     case IX86_BUILTIN_PSRAQI128_MASK:
17747     case IX86_BUILTIN_PSRAQI256_MASK:
17748     case IX86_BUILTIN_PSRAQI512:
17749     case IX86_BUILTIN_PSRAW:
17750     case IX86_BUILTIN_PSRAW128:
17751     case IX86_BUILTIN_PSRAW128_MASK:
17752     case IX86_BUILTIN_PSRAW256:
17753     case IX86_BUILTIN_PSRAW256_MASK:
17754     case IX86_BUILTIN_PSRAW512:
17755     case IX86_BUILTIN_PSRAWI:
17756     case IX86_BUILTIN_PSRAWI128:
17757     case IX86_BUILTIN_PSRAWI128_MASK:
17758     case IX86_BUILTIN_PSRAWI256:
17759     case IX86_BUILTIN_PSRAWI256_MASK:
17760     case IX86_BUILTIN_PSRAWI512:
17761       rcode = ASHIFTRT;
17762       is_vshift = false;
17763       goto do_shift;
17764     case IX86_BUILTIN_PSRLD:
17765     case IX86_BUILTIN_PSRLD128:
17766     case IX86_BUILTIN_PSRLD128_MASK:
17767     case IX86_BUILTIN_PSRLD256:
17768     case IX86_BUILTIN_PSRLD256_MASK:
17769     case IX86_BUILTIN_PSRLD512:
17770     case IX86_BUILTIN_PSRLDI:
17771     case IX86_BUILTIN_PSRLDI128:
17772     case IX86_BUILTIN_PSRLDI128_MASK:
17773     case IX86_BUILTIN_PSRLDI256:
17774     case IX86_BUILTIN_PSRLDI256_MASK:
17775     case IX86_BUILTIN_PSRLDI512:
17776     case IX86_BUILTIN_PSRLQ:
17777     case IX86_BUILTIN_PSRLQ128:
17778     case IX86_BUILTIN_PSRLQ128_MASK:
17779     case IX86_BUILTIN_PSRLQ256:
17780     case IX86_BUILTIN_PSRLQ256_MASK:
17781     case IX86_BUILTIN_PSRLQ512:
17782     case IX86_BUILTIN_PSRLQI:
17783     case IX86_BUILTIN_PSRLQI128:
17784     case IX86_BUILTIN_PSRLQI128_MASK:
17785     case IX86_BUILTIN_PSRLQI256:
17786     case IX86_BUILTIN_PSRLQI256_MASK:
17787     case IX86_BUILTIN_PSRLQI512:
17788     case IX86_BUILTIN_PSRLW:
17789     case IX86_BUILTIN_PSRLW128:
17790     case IX86_BUILTIN_PSRLW128_MASK:
17791     case IX86_BUILTIN_PSRLW256:
17792     case IX86_BUILTIN_PSRLW256_MASK:
17793     case IX86_BUILTIN_PSRLW512:
17794     case IX86_BUILTIN_PSRLWI:
17795     case IX86_BUILTIN_PSRLWI128:
17796     case IX86_BUILTIN_PSRLWI128_MASK:
17797     case IX86_BUILTIN_PSRLWI256:
17798     case IX86_BUILTIN_PSRLWI256_MASK:
17799     case IX86_BUILTIN_PSRLWI512:
17800       rcode = LSHIFTRT;
17801       is_vshift = false;
17802       goto do_shift;
17803     case IX86_BUILTIN_PSLLVV16HI:
17804     case IX86_BUILTIN_PSLLVV16SI:
17805     case IX86_BUILTIN_PSLLVV2DI:
17806     case IX86_BUILTIN_PSLLVV2DI_MASK:
17807     case IX86_BUILTIN_PSLLVV32HI:
17808     case IX86_BUILTIN_PSLLVV4DI:
17809     case IX86_BUILTIN_PSLLVV4DI_MASK:
17810     case IX86_BUILTIN_PSLLVV4SI:
17811     case IX86_BUILTIN_PSLLVV4SI_MASK:
17812     case IX86_BUILTIN_PSLLVV8DI:
17813     case IX86_BUILTIN_PSLLVV8HI:
17814     case IX86_BUILTIN_PSLLVV8SI:
17815     case IX86_BUILTIN_PSLLVV8SI_MASK:
17816       rcode = ASHIFT;
17817       is_vshift = true;
17818       goto do_shift;
17819     case IX86_BUILTIN_PSRAVQ128:
17820     case IX86_BUILTIN_PSRAVQ256:
17821     case IX86_BUILTIN_PSRAVV16HI:
17822     case IX86_BUILTIN_PSRAVV16SI:
17823     case IX86_BUILTIN_PSRAVV32HI:
17824     case IX86_BUILTIN_PSRAVV4SI:
17825     case IX86_BUILTIN_PSRAVV4SI_MASK:
17826     case IX86_BUILTIN_PSRAVV8DI:
17827     case IX86_BUILTIN_PSRAVV8HI:
17828     case IX86_BUILTIN_PSRAVV8SI:
17829     case IX86_BUILTIN_PSRAVV8SI_MASK:
17830       rcode = ASHIFTRT;
17831       is_vshift = true;
17832       goto do_shift;
17833     case IX86_BUILTIN_PSRLVV16HI:
17834     case IX86_BUILTIN_PSRLVV16SI:
17835     case IX86_BUILTIN_PSRLVV2DI:
17836     case IX86_BUILTIN_PSRLVV2DI_MASK:
17837     case IX86_BUILTIN_PSRLVV32HI:
17838     case IX86_BUILTIN_PSRLVV4DI:
17839     case IX86_BUILTIN_PSRLVV4DI_MASK:
17840     case IX86_BUILTIN_PSRLVV4SI:
17841     case IX86_BUILTIN_PSRLVV4SI_MASK:
17842     case IX86_BUILTIN_PSRLVV8DI:
17843     case IX86_BUILTIN_PSRLVV8HI:
17844     case IX86_BUILTIN_PSRLVV8SI:
17845     case IX86_BUILTIN_PSRLVV8SI_MASK:
17846       rcode = LSHIFTRT;
17847       is_vshift = true;
17848       goto do_shift;
17849 
17850     do_shift:
17851       gcc_assert (n_args >= 2);
17852       if (!gimple_call_lhs (stmt))
17853 	break;
17854       arg0 = gimple_call_arg (stmt, 0);
17855       arg1 = gimple_call_arg (stmt, 1);
17856       if (n_args > 2)
17857 	{
17858 	  /* This is masked shift.  Only optimize if the mask is all ones.  */
17859 	  tree argl = gimple_call_arg (stmt, n_args - 1);
17860 	  if (!tree_fits_uhwi_p (argl))
17861 	    break;
17862 	  unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
17863 	  unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
17864 	  if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
17865 	    break;
17866 	}
17867       if (is_vshift)
17868 	{
17869 	  if (TREE_CODE (arg1) != VECTOR_CST)
17870 	    break;
17871 	  count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
17872 	  if (integer_zerop (arg1))
17873 	    count = 0;
17874 	  else if (rcode == ASHIFTRT)
17875 	    break;
17876 	  else
17877 	    for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
17878 	      {
17879 		tree elt = VECTOR_CST_ELT (arg1, i);
17880 		if (!wi::neg_p (wi::to_wide (elt))
17881 		    && wi::to_widest (elt) < count)
17882 		  return false;
17883 	      }
17884 	}
17885       else
17886 	{
17887 	  arg1 = ix86_vector_shift_count (arg1);
17888 	  if (!arg1)
17889 	    break;
17890 	  count = tree_to_uhwi (arg1);
17891 	}
17892       if (count == 0)
17893 	{
17894 	  /* Just return the first argument for shift by 0.  */
17895 	  location_t loc = gimple_location (stmt);
17896 	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17897 	  gimple_set_location (g, loc);
17898 	  gsi_replace (gsi, g, false);
17899 	  return true;
17900 	}
17901       if (rcode != ASHIFTRT
17902 	  && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
17903 	{
17904 	  /* For shift counts equal or greater than precision, except for
17905 	     arithmetic right shift the result is zero.  */
17906 	  location_t loc = gimple_location (stmt);
17907 	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17908 					   build_zero_cst (TREE_TYPE (arg0)));
17909 	  gimple_set_location (g, loc);
17910 	  gsi_replace (gsi, g, false);
17911 	  return true;
17912 	}
17913       break;
17914 
17915     case IX86_BUILTIN_SHUFPD:
17916       arg2 = gimple_call_arg (stmt, 2);
17917       if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
17918 	{
17919 	  location_t loc = gimple_location (stmt);
17920 	  unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
17921 	  arg0 = gimple_call_arg (stmt, 0);
17922 	  arg1 = gimple_call_arg (stmt, 1);
17923 	  tree itype = long_long_integer_type_node;
17924 	  tree vtype = build_vector_type (itype, 2); /* V2DI */
17925 	  tree_vector_builder elts (vtype, 2, 1);
17926 	  /* Ignore bits other than the lowest 2.  */
17927 	  elts.quick_push (build_int_cst (itype, imask & 1));
17928 	  imask >>= 1;
17929 	  elts.quick_push (build_int_cst (itype, 2 + (imask & 1)));
17930 	  tree omask = elts.build ();
17931 	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17932 					   VEC_PERM_EXPR,
17933 					   arg0, arg1, omask);
17934 	  gimple_set_location (g, loc);
17935 	  gsi_replace (gsi, g, false);
17936 	  return true;
17937 	}
17938       // Do not error yet, the constant could be propagated later?
17939       break;
17940 
17941     default:
17942       break;
17943     }
17944 
17945   return false;
17946 }
17947 
17948 /* Handler for an SVML-style interface to
17949    a library with vectorized intrinsics.  */
17950 
17951 tree
ix86_veclibabi_svml(combined_fn fn,tree type_out,tree type_in)17952 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
17953 {
17954   char name[20];
17955   tree fntype, new_fndecl, args;
17956   unsigned arity;
17957   const char *bname;
17958   machine_mode el_mode, in_mode;
17959   int n, in_n;
17960 
17961   /* The SVML is suitable for unsafe math only.  */
17962   if (!flag_unsafe_math_optimizations)
17963     return NULL_TREE;
17964 
17965   el_mode = TYPE_MODE (TREE_TYPE (type_out));
17966   n = TYPE_VECTOR_SUBPARTS (type_out);
17967   in_mode = TYPE_MODE (TREE_TYPE (type_in));
17968   in_n = TYPE_VECTOR_SUBPARTS (type_in);
17969   if (el_mode != in_mode
17970       || n != in_n)
17971     return NULL_TREE;
17972 
17973   switch (fn)
17974     {
17975     CASE_CFN_EXP:
17976     CASE_CFN_LOG:
17977     CASE_CFN_LOG10:
17978     CASE_CFN_POW:
17979     CASE_CFN_TANH:
17980     CASE_CFN_TAN:
17981     CASE_CFN_ATAN:
17982     CASE_CFN_ATAN2:
17983     CASE_CFN_ATANH:
17984     CASE_CFN_CBRT:
17985     CASE_CFN_SINH:
17986     CASE_CFN_SIN:
17987     CASE_CFN_ASINH:
17988     CASE_CFN_ASIN:
17989     CASE_CFN_COSH:
17990     CASE_CFN_COS:
17991     CASE_CFN_ACOSH:
17992     CASE_CFN_ACOS:
17993       if ((el_mode != DFmode || n != 2)
17994 	  && (el_mode != SFmode || n != 4))
17995 	return NULL_TREE;
17996       break;
17997 
17998     default:
17999       return NULL_TREE;
18000     }
18001 
18002   tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18003   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18004 
18005   if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
18006     strcpy (name, "vmlsLn4");
18007   else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
18008     strcpy (name, "vmldLn2");
18009   else if (n == 4)
18010     {
18011       sprintf (name, "vmls%s", bname+10);
18012       name[strlen (name)-1] = '4';
18013     }
18014   else
18015     sprintf (name, "vmld%s2", bname+10);
18016 
18017   /* Convert to uppercase. */
18018   name[4] &= ~0x20;
18019 
18020   arity = 0;
18021   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18022     arity++;
18023 
18024   if (arity == 1)
18025     fntype = build_function_type_list (type_out, type_in, NULL);
18026   else
18027     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18028 
18029   /* Build a function declaration for the vectorized function.  */
18030   new_fndecl = build_decl (BUILTINS_LOCATION,
18031 			   FUNCTION_DECL, get_identifier (name), fntype);
18032   TREE_PUBLIC (new_fndecl) = 1;
18033   DECL_EXTERNAL (new_fndecl) = 1;
18034   DECL_IS_NOVOPS (new_fndecl) = 1;
18035   TREE_READONLY (new_fndecl) = 1;
18036 
18037   return new_fndecl;
18038 }
18039 
18040 /* Handler for an ACML-style interface to
18041    a library with vectorized intrinsics.  */
18042 
18043 tree
ix86_veclibabi_acml(combined_fn fn,tree type_out,tree type_in)18044 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
18045 {
18046   char name[20] = "__vr.._";
18047   tree fntype, new_fndecl, args;
18048   unsigned arity;
18049   const char *bname;
18050   machine_mode el_mode, in_mode;
18051   int n, in_n;
18052 
18053   /* The ACML is 64bits only and suitable for unsafe math only as
18054      it does not correctly support parts of IEEE with the required
18055      precision such as denormals.  */
18056   if (!TARGET_64BIT
18057       || !flag_unsafe_math_optimizations)
18058     return NULL_TREE;
18059 
18060   el_mode = TYPE_MODE (TREE_TYPE (type_out));
18061   n = TYPE_VECTOR_SUBPARTS (type_out);
18062   in_mode = TYPE_MODE (TREE_TYPE (type_in));
18063   in_n = TYPE_VECTOR_SUBPARTS (type_in);
18064   if (el_mode != in_mode
18065       || n != in_n)
18066     return NULL_TREE;
18067 
18068   switch (fn)
18069     {
18070     CASE_CFN_SIN:
18071     CASE_CFN_COS:
18072     CASE_CFN_EXP:
18073     CASE_CFN_LOG:
18074     CASE_CFN_LOG2:
18075     CASE_CFN_LOG10:
18076       if (el_mode == DFmode && n == 2)
18077 	{
18078 	  name[4] = 'd';
18079 	  name[5] = '2';
18080 	}
18081       else if (el_mode == SFmode && n == 4)
18082 	{
18083 	  name[4] = 's';
18084 	  name[5] = '4';
18085 	}
18086       else
18087 	return NULL_TREE;
18088       break;
18089 
18090     default:
18091       return NULL_TREE;
18092     }
18093 
18094   tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18095   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18096   sprintf (name + 7, "%s", bname+10);
18097 
18098   arity = 0;
18099   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18100     arity++;
18101 
18102   if (arity == 1)
18103     fntype = build_function_type_list (type_out, type_in, NULL);
18104   else
18105     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18106 
18107   /* Build a function declaration for the vectorized function.  */
18108   new_fndecl = build_decl (BUILTINS_LOCATION,
18109 			   FUNCTION_DECL, get_identifier (name), fntype);
18110   TREE_PUBLIC (new_fndecl) = 1;
18111   DECL_EXTERNAL (new_fndecl) = 1;
18112   DECL_IS_NOVOPS (new_fndecl) = 1;
18113   TREE_READONLY (new_fndecl) = 1;
18114 
18115   return new_fndecl;
18116 }
18117 
18118 /* Returns a decl of a function that implements scatter store with
18119    register type VECTYPE and index type INDEX_TYPE and SCALE.
18120    Return NULL_TREE if it is not available.  */
18121 
18122 static tree
ix86_vectorize_builtin_scatter(const_tree vectype,const_tree index_type,int scale)18123 ix86_vectorize_builtin_scatter (const_tree vectype,
18124 				const_tree index_type, int scale)
18125 {
18126   bool si;
18127   enum ix86_builtins code;
18128 
18129   if (!TARGET_AVX512F)
18130     return NULL_TREE;
18131 
18132   if ((TREE_CODE (index_type) != INTEGER_TYPE
18133        && !POINTER_TYPE_P (index_type))
18134       || (TYPE_MODE (index_type) != SImode
18135 	  && TYPE_MODE (index_type) != DImode))
18136     return NULL_TREE;
18137 
18138   if (TYPE_PRECISION (index_type) > POINTER_SIZE)
18139     return NULL_TREE;
18140 
18141   /* v*scatter* insn sign extends index to pointer mode.  */
18142   if (TYPE_PRECISION (index_type) < POINTER_SIZE
18143       && TYPE_UNSIGNED (index_type))
18144     return NULL_TREE;
18145 
18146   /* Scale can be 1, 2, 4 or 8.  */
18147   if (scale <= 0
18148       || scale > 8
18149       || (scale & (scale - 1)) != 0)
18150     return NULL_TREE;
18151 
18152   si = TYPE_MODE (index_type) == SImode;
18153   switch (TYPE_MODE (vectype))
18154     {
18155     case E_V8DFmode:
18156       code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
18157       break;
18158     case E_V8DImode:
18159       code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
18160       break;
18161     case E_V16SFmode:
18162       code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
18163       break;
18164     case E_V16SImode:
18165       code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
18166       break;
18167     case E_V4DFmode:
18168       if (TARGET_AVX512VL)
18169 	code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
18170       else
18171 	return NULL_TREE;
18172       break;
18173     case E_V4DImode:
18174       if (TARGET_AVX512VL)
18175 	code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
18176       else
18177 	return NULL_TREE;
18178       break;
18179     case E_V8SFmode:
18180       if (TARGET_AVX512VL)
18181 	code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
18182       else
18183 	return NULL_TREE;
18184       break;
18185     case E_V8SImode:
18186       if (TARGET_AVX512VL)
18187 	code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
18188       else
18189 	return NULL_TREE;
18190       break;
18191     case E_V2DFmode:
18192       if (TARGET_AVX512VL)
18193 	code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
18194       else
18195 	return NULL_TREE;
18196       break;
18197     case E_V2DImode:
18198       if (TARGET_AVX512VL)
18199 	code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
18200       else
18201 	return NULL_TREE;
18202       break;
18203     case E_V4SFmode:
18204       if (TARGET_AVX512VL)
18205 	code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
18206       else
18207 	return NULL_TREE;
18208       break;
18209     case E_V4SImode:
18210       if (TARGET_AVX512VL)
18211 	code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
18212       else
18213 	return NULL_TREE;
18214       break;
18215     default:
18216       return NULL_TREE;
18217     }
18218 
18219   return get_ix86_builtin (code);
18220 }
18221 
18222 /* Return true if it is safe to use the rsqrt optabs to optimize
18223    1.0/sqrt.  */
18224 
18225 static bool
use_rsqrt_p()18226 use_rsqrt_p ()
18227 {
18228   return (TARGET_SSE && TARGET_SSE_MATH
18229 	  && flag_finite_math_only
18230 	  && !flag_trapping_math
18231 	  && flag_unsafe_math_optimizations);
18232 }
18233 
18234 /* Helper for avx_vpermilps256_operand et al.  This is also used by
18235    the expansion functions to turn the parallel back into a mask.
18236    The return value is 0 for no match and the imm8+1 for a match.  */
18237 
18238 int
avx_vpermilp_parallel(rtx par,machine_mode mode)18239 avx_vpermilp_parallel (rtx par, machine_mode mode)
18240 {
18241   unsigned i, nelt = GET_MODE_NUNITS (mode);
18242   unsigned mask = 0;
18243   unsigned char ipar[16] = {};  /* Silence -Wuninitialized warning.  */
18244 
18245   if (XVECLEN (par, 0) != (int) nelt)
18246     return 0;
18247 
18248   /* Validate that all of the elements are constants, and not totally
18249      out of range.  Copy the data into an integral array to make the
18250      subsequent checks easier.  */
18251   for (i = 0; i < nelt; ++i)
18252     {
18253       rtx er = XVECEXP (par, 0, i);
18254       unsigned HOST_WIDE_INT ei;
18255 
18256       if (!CONST_INT_P (er))
18257 	return 0;
18258       ei = INTVAL (er);
18259       if (ei >= nelt)
18260 	return 0;
18261       ipar[i] = ei;
18262     }
18263 
18264   switch (mode)
18265     {
18266     case E_V8DFmode:
18267       /* In the 512-bit DFmode case, we can only move elements within
18268          a 128-bit lane.  First fill the second part of the mask,
18269 	 then fallthru.  */
18270       for (i = 4; i < 6; ++i)
18271 	{
18272 	  if (ipar[i] < 4 || ipar[i] >= 6)
18273 	    return 0;
18274 	  mask |= (ipar[i] - 4) << i;
18275 	}
18276       for (i = 6; i < 8; ++i)
18277 	{
18278 	  if (ipar[i] < 6)
18279 	    return 0;
18280 	  mask |= (ipar[i] - 6) << i;
18281 	}
18282       /* FALLTHRU */
18283 
18284     case E_V4DFmode:
18285       /* In the 256-bit DFmode case, we can only move elements within
18286          a 128-bit lane.  */
18287       for (i = 0; i < 2; ++i)
18288 	{
18289 	  if (ipar[i] >= 2)
18290 	    return 0;
18291 	  mask |= ipar[i] << i;
18292 	}
18293       for (i = 2; i < 4; ++i)
18294 	{
18295 	  if (ipar[i] < 2)
18296 	    return 0;
18297 	  mask |= (ipar[i] - 2) << i;
18298 	}
18299       break;
18300 
18301     case E_V16SFmode:
18302       /* In 512 bit SFmode case, permutation in the upper 256 bits
18303 	 must mirror the permutation in the lower 256-bits.  */
18304       for (i = 0; i < 8; ++i)
18305 	if (ipar[i] + 8 != ipar[i + 8])
18306 	  return 0;
18307       /* FALLTHRU */
18308 
18309     case E_V8SFmode:
18310       /* In 256 bit SFmode case, we have full freedom of
18311          movement within the low 128-bit lane, but the high 128-bit
18312          lane must mirror the exact same pattern.  */
18313       for (i = 0; i < 4; ++i)
18314 	if (ipar[i] + 4 != ipar[i + 4])
18315 	  return 0;
18316       nelt = 4;
18317       /* FALLTHRU */
18318 
18319     case E_V2DFmode:
18320     case E_V4SFmode:
18321       /* In the 128-bit case, we've full freedom in the placement of
18322 	 the elements from the source operand.  */
18323       for (i = 0; i < nelt; ++i)
18324 	mask |= ipar[i] << (i * (nelt / 2));
18325       break;
18326 
18327     default:
18328       gcc_unreachable ();
18329     }
18330 
18331   /* Make sure success has a non-zero value by adding one.  */
18332   return mask + 1;
18333 }
18334 
18335 /* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
18336    the expansion functions to turn the parallel back into a mask.
18337    The return value is 0 for no match and the imm8+1 for a match.  */
18338 
18339 int
avx_vperm2f128_parallel(rtx par,machine_mode mode)18340 avx_vperm2f128_parallel (rtx par, machine_mode mode)
18341 {
18342   unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
18343   unsigned mask = 0;
18344   unsigned char ipar[8] = {};  /* Silence -Wuninitialized warning.  */
18345 
18346   if (XVECLEN (par, 0) != (int) nelt)
18347     return 0;
18348 
18349   /* Validate that all of the elements are constants, and not totally
18350      out of range.  Copy the data into an integral array to make the
18351      subsequent checks easier.  */
18352   for (i = 0; i < nelt; ++i)
18353     {
18354       rtx er = XVECEXP (par, 0, i);
18355       unsigned HOST_WIDE_INT ei;
18356 
18357       if (!CONST_INT_P (er))
18358 	return 0;
18359       ei = INTVAL (er);
18360       if (ei >= 2 * nelt)
18361 	return 0;
18362       ipar[i] = ei;
18363     }
18364 
18365   /* Validate that the halves of the permute are halves.  */
18366   for (i = 0; i < nelt2 - 1; ++i)
18367     if (ipar[i] + 1 != ipar[i + 1])
18368       return 0;
18369   for (i = nelt2; i < nelt - 1; ++i)
18370     if (ipar[i] + 1 != ipar[i + 1])
18371       return 0;
18372 
18373   /* Reconstruct the mask.  */
18374   for (i = 0; i < 2; ++i)
18375     {
18376       unsigned e = ipar[i * nelt2];
18377       if (e % nelt2)
18378 	return 0;
18379       e /= nelt2;
18380       mask |= e << (i * 4);
18381     }
18382 
18383   /* Make sure success has a non-zero value by adding one.  */
18384   return mask + 1;
18385 }
18386 
18387 /* Return a register priority for hard reg REGNO.  */
18388 static int
ix86_register_priority(int hard_regno)18389 ix86_register_priority (int hard_regno)
18390 {
18391   /* ebp and r13 as the base always wants a displacement, r12 as the
18392      base always wants an index.  So discourage their usage in an
18393      address.  */
18394   if (hard_regno == R12_REG || hard_regno == R13_REG)
18395     return 0;
18396   if (hard_regno == BP_REG)
18397     return 1;
18398   /* New x86-64 int registers result in bigger code size.  Discourage
18399      them.  */
18400   if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
18401     return 2;
18402   /* New x86-64 SSE registers result in bigger code size.  Discourage
18403      them.  */
18404   if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
18405     return 2;
18406   if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG))
18407     return 1;
18408   /* Usage of AX register results in smaller code.  Prefer it.  */
18409   if (hard_regno == AX_REG)
18410     return 4;
18411   return 3;
18412 }
18413 
18414 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18415 
18416    Put float CONST_DOUBLE in the constant pool instead of fp regs.
18417    QImode must go into class Q_REGS.
18418    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
18419    movdf to do mem-to-mem moves through integer regs.  */
18420 
18421 static reg_class_t
ix86_preferred_reload_class(rtx x,reg_class_t regclass)18422 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
18423 {
18424   machine_mode mode = GET_MODE (x);
18425 
18426   /* We're only allowed to return a subclass of CLASS.  Many of the
18427      following checks fail for NO_REGS, so eliminate that early.  */
18428   if (regclass == NO_REGS)
18429     return NO_REGS;
18430 
18431   /* All classes can load zeros.  */
18432   if (x == CONST0_RTX (mode))
18433     return regclass;
18434 
18435   /* Force constants into memory if we are loading a (nonzero) constant into
18436      an MMX, SSE or MASK register.  This is because there are no MMX/SSE/MASK
18437      instructions to load from a constant.  */
18438   if (CONSTANT_P (x)
18439       && (MAYBE_MMX_CLASS_P (regclass)
18440 	  || MAYBE_SSE_CLASS_P (regclass)
18441 	  || MAYBE_MASK_CLASS_P (regclass)))
18442     return NO_REGS;
18443 
18444   /* Floating-point constants need more complex checks.  */
18445   if (CONST_DOUBLE_P (x))
18446     {
18447       /* General regs can load everything.  */
18448       if (INTEGER_CLASS_P (regclass))
18449         return regclass;
18450 
18451       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
18452 	 zero above.  We only want to wind up preferring 80387 registers if
18453 	 we plan on doing computation with them.  */
18454       if (IS_STACK_MODE (mode)
18455 	  && standard_80387_constant_p (x) > 0)
18456 	{
18457 	  /* Limit class to FP regs.  */
18458 	  if (FLOAT_CLASS_P (regclass))
18459 	    return FLOAT_REGS;
18460 	}
18461 
18462       return NO_REGS;
18463     }
18464 
18465   /* Prefer SSE regs only, if we can use them for math.  */
18466   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18467     return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
18468 
18469   /* Generally when we see PLUS here, it's the function invariant
18470      (plus soft-fp const_int).  Which can only be computed into general
18471      regs.  */
18472   if (GET_CODE (x) == PLUS)
18473     return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
18474 
18475   /* QImode constants are easy to load, but non-constant QImode data
18476      must go into Q_REGS.  */
18477   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18478     {
18479       if (Q_CLASS_P (regclass))
18480 	return regclass;
18481       else if (reg_class_subset_p (Q_REGS, regclass))
18482 	return Q_REGS;
18483       else
18484 	return NO_REGS;
18485     }
18486 
18487   return regclass;
18488 }
18489 
18490 /* Discourage putting floating-point values in SSE registers unless
18491    SSE math is being used, and likewise for the 387 registers.  */
18492 static reg_class_t
ix86_preferred_output_reload_class(rtx x,reg_class_t regclass)18493 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
18494 {
18495   /* Restrict the output reload class to the register bank that we are doing
18496      math on.  If we would like not to return a subset of CLASS, reject this
18497      alternative: if reload cannot do this, it will still use its choice.  */
18498   machine_mode mode = GET_MODE (x);
18499   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18500     return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
18501 
18502   if (IS_STACK_MODE (mode))
18503     return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
18504 
18505   return regclass;
18506 }
18507 
18508 static reg_class_t
ix86_secondary_reload(bool in_p,rtx x,reg_class_t rclass,machine_mode mode,secondary_reload_info * sri)18509 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
18510 		       machine_mode mode, secondary_reload_info *sri)
18511 {
18512   /* Double-word spills from general registers to non-offsettable memory
18513      references (zero-extended addresses) require special handling.  */
18514   if (TARGET_64BIT
18515       && MEM_P (x)
18516       && GET_MODE_SIZE (mode) > UNITS_PER_WORD
18517       && INTEGER_CLASS_P (rclass)
18518       && !offsettable_memref_p (x))
18519     {
18520       sri->icode = (in_p
18521 		    ? CODE_FOR_reload_noff_load
18522 		    : CODE_FOR_reload_noff_store);
18523       /* Add the cost of moving address to a temporary.  */
18524       sri->extra_cost = 1;
18525 
18526       return NO_REGS;
18527     }
18528 
18529   /* QImode spills from non-QI registers require
18530      intermediate register on 32bit targets.  */
18531   if (mode == QImode
18532       && ((!TARGET_64BIT && !in_p
18533 	   && INTEGER_CLASS_P (rclass)
18534 	   && MAYBE_NON_Q_CLASS_P (rclass))
18535 	  || (!TARGET_AVX512DQ
18536 	      && MAYBE_MASK_CLASS_P (rclass))))
18537     {
18538       int regno = true_regnum (x);
18539 
18540       /* Return Q_REGS if the operand is in memory.  */
18541       if (regno == -1)
18542 	return Q_REGS;
18543 
18544       return NO_REGS;
18545     }
18546 
18547   /* This condition handles corner case where an expression involving
18548      pointers gets vectorized.  We're trying to use the address of a
18549      stack slot as a vector initializer.
18550 
18551      (set (reg:V2DI 74 [ vect_cst_.2 ])
18552           (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18553 
18554      Eventually frame gets turned into sp+offset like this:
18555 
18556      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18557           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18558 	                               (const_int 392 [0x188]))))
18559 
18560      That later gets turned into:
18561 
18562      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18563           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18564 	    (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18565 
18566      We'll have the following reload recorded:
18567 
18568      Reload 0: reload_in (DI) =
18569            (plus:DI (reg/f:DI 7 sp)
18570             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18571      reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18572      SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18573      reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18574      reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18575      reload_reg_rtx: (reg:V2DI 22 xmm1)
18576 
18577      Which isn't going to work since SSE instructions can't handle scalar
18578      additions.  Returning GENERAL_REGS forces the addition into integer
18579      register and reload can handle subsequent reloads without problems.  */
18580 
18581   if (in_p && GET_CODE (x) == PLUS
18582       && SSE_CLASS_P (rclass)
18583       && SCALAR_INT_MODE_P (mode))
18584     return GENERAL_REGS;
18585 
18586   return NO_REGS;
18587 }
18588 
18589 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
18590 
18591 static bool
ix86_class_likely_spilled_p(reg_class_t rclass)18592 ix86_class_likely_spilled_p (reg_class_t rclass)
18593 {
18594   switch (rclass)
18595     {
18596       case AREG:
18597       case DREG:
18598       case CREG:
18599       case BREG:
18600       case AD_REGS:
18601       case SIREG:
18602       case DIREG:
18603       case SSE_FIRST_REG:
18604       case FP_TOP_REG:
18605       case FP_SECOND_REG:
18606 	return true;
18607 
18608       default:
18609 	break;
18610     }
18611 
18612   return false;
18613 }
18614 
18615 /* If we are copying between registers from different register sets
18616    (e.g. FP and integer), we may need a memory location.
18617 
18618    The function can't work reliably when one of the CLASSES is a class
18619    containing registers from multiple sets.  We avoid this by never combining
18620    different sets in a single alternative in the machine description.
18621    Ensure that this constraint holds to avoid unexpected surprises.
18622 
18623    When STRICT is false, we are being called from REGISTER_MOVE_COST,
18624    so do not enforce these sanity checks.
18625 
18626    To optimize register_move_cost performance, define inline variant.  */
18627 
18628 static inline bool
inline_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2,int strict)18629 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18630 				reg_class_t class2, int strict)
18631 {
18632   if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
18633     return false;
18634 
18635   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18636       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18637       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18638       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18639       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18640       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
18641       || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
18642       || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
18643     {
18644       gcc_assert (!strict || lra_in_progress);
18645       return true;
18646     }
18647 
18648   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18649     return true;
18650 
18651   /* ??? This is a lie.  We do have moves between mmx/general, and for
18652      mmx/sse2.  But by saying we need secondary memory we discourage the
18653      register allocator from using the mmx registers unless needed.  */
18654   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18655     return true;
18656 
18657   /* Between mask and general, we have moves no larger than word size.  */
18658   if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
18659     {
18660       if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18661 	  || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18662 	return true;
18663     }
18664 
18665   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18666     {
18667       /* SSE1 doesn't have any direct moves from other classes.  */
18668       if (!TARGET_SSE2)
18669 	return true;
18670 
18671       /* Between SSE and general, we have moves no larger than word size.  */
18672       if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18673 	  || GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)
18674 	  || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18675 	return true;
18676 
18677       /* If the target says that inter-unit moves are more expensive
18678 	 than moving through memory, then don't generate them.  */
18679       if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
18680 	  || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
18681 	return true;
18682     }
18683 
18684   return false;
18685 }
18686 
18687 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
18688 
18689 static bool
ix86_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)18690 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18691 			      reg_class_t class2)
18692 {
18693   return inline_secondary_memory_needed (mode, class1, class2, true);
18694 }
18695 
18696 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18697 
18698    get_secondary_mem widens integral modes to BITS_PER_WORD.
18699    There is no need to emit full 64 bit move on 64 bit targets
18700    for integral modes that can be moved using 32 bit move.  */
18701 
18702 static machine_mode
ix86_secondary_memory_needed_mode(machine_mode mode)18703 ix86_secondary_memory_needed_mode (machine_mode mode)
18704 {
18705   if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
18706     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
18707   return mode;
18708 }
18709 
18710 /* Implement the TARGET_CLASS_MAX_NREGS hook.
18711 
18712    On the 80386, this is the size of MODE in words,
18713    except in the FP regs, where a single reg is always enough.  */
18714 
18715 static unsigned char
ix86_class_max_nregs(reg_class_t rclass,machine_mode mode)18716 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
18717 {
18718   if (MAYBE_INTEGER_CLASS_P (rclass))
18719     {
18720       if (mode == XFmode)
18721 	return (TARGET_64BIT ? 2 : 3);
18722       else if (mode == XCmode)
18723 	return (TARGET_64BIT ? 4 : 6);
18724       else
18725 	return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18726     }
18727   else
18728     {
18729       if (COMPLEX_MODE_P (mode))
18730 	return 2;
18731       else
18732 	return 1;
18733     }
18734 }
18735 
18736 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
18737 
18738 static bool
ix86_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t regclass)18739 ix86_can_change_mode_class (machine_mode from, machine_mode to,
18740 			    reg_class_t regclass)
18741 {
18742   if (from == to)
18743     return true;
18744 
18745   /* x87 registers can't do subreg at all, as all values are reformatted
18746      to extended precision.  */
18747   if (MAYBE_FLOAT_CLASS_P (regclass))
18748     return false;
18749 
18750   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
18751     {
18752       /* Vector registers do not support QI or HImode loads.  If we don't
18753 	 disallow a change to these modes, reload will assume it's ok to
18754 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
18755 	 the vec_dupv4hi pattern.  */
18756       if (GET_MODE_SIZE (from) < 4)
18757 	return false;
18758     }
18759 
18760   return true;
18761 }
18762 
18763 /* Return index of MODE in the sse load/store tables.  */
18764 
18765 static inline int
sse_store_index(machine_mode mode)18766 sse_store_index (machine_mode mode)
18767 {
18768       switch (GET_MODE_SIZE (mode))
18769 	{
18770 	  case 4:
18771 	    return 0;
18772 	  case 8:
18773 	    return 1;
18774 	  case 16:
18775 	    return 2;
18776 	  case 32:
18777 	    return 3;
18778 	  case 64:
18779 	    return 4;
18780 	  default:
18781 	    return -1;
18782 	}
18783 }
18784 
18785 /* Return the cost of moving data of mode M between a
18786    register and memory.  A value of 2 is the default; this cost is
18787    relative to those in `REGISTER_MOVE_COST'.
18788 
18789    This function is used extensively by register_move_cost that is used to
18790    build tables at startup.  Make it inline in this case.
18791    When IN is 2, return maximum of in and out move cost.
18792 
18793    If moving between registers and memory is more expensive than
18794    between two registers, you should define this macro to express the
18795    relative cost.
18796 
18797    Model also increased moving costs of QImode registers in non
18798    Q_REGS classes.
18799  */
18800 static inline int
inline_memory_move_cost(machine_mode mode,enum reg_class regclass,int in)18801 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
18802 {
18803   int cost;
18804   if (FLOAT_CLASS_P (regclass))
18805     {
18806       int index;
18807       switch (mode)
18808 	{
18809 	  case E_SFmode:
18810 	    index = 0;
18811 	    break;
18812 	  case E_DFmode:
18813 	    index = 1;
18814 	    break;
18815 	  case E_XFmode:
18816 	    index = 2;
18817 	    break;
18818 	  default:
18819 	    return 100;
18820 	}
18821       if (in == 2)
18822         return MAX (ix86_cost->hard_register.fp_load [index],
18823 		    ix86_cost->hard_register.fp_store [index]);
18824       return in ? ix86_cost->hard_register.fp_load [index]
18825 		: ix86_cost->hard_register.fp_store [index];
18826     }
18827   if (SSE_CLASS_P (regclass))
18828     {
18829       int index = sse_store_index (mode);
18830       if (index == -1)
18831 	return 100;
18832       if (in == 2)
18833         return MAX (ix86_cost->hard_register.sse_load [index],
18834 		    ix86_cost->hard_register.sse_store [index]);
18835       return in ? ix86_cost->hard_register.sse_load [index]
18836 		: ix86_cost->hard_register.sse_store [index];
18837     }
18838   if (MMX_CLASS_P (regclass))
18839     {
18840       int index;
18841       switch (GET_MODE_SIZE (mode))
18842 	{
18843 	  case 4:
18844 	    index = 0;
18845 	    break;
18846 	  case 8:
18847 	    index = 1;
18848 	    break;
18849 	  default:
18850 	    return 100;
18851 	}
18852       if (in == 2)
18853         return MAX (ix86_cost->hard_register.mmx_load [index],
18854 		    ix86_cost->hard_register.mmx_store [index]);
18855       return in ? ix86_cost->hard_register.mmx_load [index]
18856 		: ix86_cost->hard_register.mmx_store [index];
18857     }
18858   switch (GET_MODE_SIZE (mode))
18859     {
18860       case 1:
18861 	if (Q_CLASS_P (regclass) || TARGET_64BIT)
18862 	  {
18863 	    if (!in)
18864 	      return ix86_cost->hard_register.int_store[0];
18865 	    if (TARGET_PARTIAL_REG_DEPENDENCY
18866 	        && optimize_function_for_speed_p (cfun))
18867 	      cost = ix86_cost->hard_register.movzbl_load;
18868 	    else
18869 	      cost = ix86_cost->hard_register.int_load[0];
18870 	    if (in == 2)
18871 	      return MAX (cost, ix86_cost->hard_register.int_store[0]);
18872 	    return cost;
18873 	  }
18874 	else
18875 	  {
18876 	   if (in == 2)
18877 	     return MAX (ix86_cost->hard_register.movzbl_load,
18878 			 ix86_cost->hard_register.int_store[0] + 4);
18879 	   if (in)
18880 	     return ix86_cost->hard_register.movzbl_load;
18881 	   else
18882 	     return ix86_cost->hard_register.int_store[0] + 4;
18883 	  }
18884 	break;
18885       case 2:
18886 	if (in == 2)
18887 	  return MAX (ix86_cost->hard_register.int_load[1],
18888 		      ix86_cost->hard_register.int_store[1]);
18889 	return in ? ix86_cost->hard_register.int_load[1]
18890 		  : ix86_cost->hard_register.int_store[1];
18891       default:
18892 	if (in == 2)
18893 	  cost = MAX (ix86_cost->hard_register.int_load[2],
18894 		      ix86_cost->hard_register.int_store[2]);
18895 	else if (in)
18896 	  cost = ix86_cost->hard_register.int_load[2];
18897 	else
18898 	  cost = ix86_cost->hard_register.int_store[2];
18899 	/* Multiply with the number of GPR moves needed.  */
18900 	return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
18901     }
18902 }
18903 
18904 static int
ix86_memory_move_cost(machine_mode mode,reg_class_t regclass,bool in)18905 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
18906 {
18907   return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
18908 }
18909 
18910 
18911 /* Return the cost of moving data from a register in class CLASS1 to
18912    one in class CLASS2.
18913 
18914    It is not required that the cost always equal 2 when FROM is the same as TO;
18915    on some machines it is expensive to move between registers if they are not
18916    general registers.  */
18917 
18918 static int
ix86_register_move_cost(machine_mode mode,reg_class_t class1_i,reg_class_t class2_i)18919 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
18920 			 reg_class_t class2_i)
18921 {
18922   enum reg_class class1 = (enum reg_class) class1_i;
18923   enum reg_class class2 = (enum reg_class) class2_i;
18924 
18925   /* In case we require secondary memory, compute cost of the store followed
18926      by load.  In order to avoid bad register allocation choices, we need
18927      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
18928 
18929   if (inline_secondary_memory_needed (mode, class1, class2, false))
18930     {
18931       int cost = 1;
18932 
18933       cost += inline_memory_move_cost (mode, class1, 2);
18934       cost += inline_memory_move_cost (mode, class2, 2);
18935 
18936       /* In case of copying from general_purpose_register we may emit multiple
18937          stores followed by single load causing memory size mismatch stall.
18938          Count this as arbitrarily high cost of 20.  */
18939       if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
18940 	  && TARGET_MEMORY_MISMATCH_STALL
18941 	  && targetm.class_max_nregs (class1, mode)
18942 	     > targetm.class_max_nregs (class2, mode))
18943 	cost += 20;
18944 
18945       /* In the case of FP/MMX moves, the registers actually overlap, and we
18946 	 have to switch modes in order to treat them differently.  */
18947       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18948           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18949 	cost += 20;
18950 
18951       return cost;
18952     }
18953 
18954   /* Moves between MMX and non-MMX units require secondary memory.  */
18955   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18956     gcc_unreachable ();
18957 
18958   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18959     return (SSE_CLASS_P (class1)
18960 	    ? ix86_cost->hard_register.sse_to_integer
18961 	    : ix86_cost->hard_register.integer_to_sse);
18962 
18963   if (MAYBE_FLOAT_CLASS_P (class1))
18964     return ix86_cost->hard_register.fp_move;
18965   if (MAYBE_SSE_CLASS_P (class1))
18966     {
18967       if (GET_MODE_BITSIZE (mode) <= 128)
18968 	return ix86_cost->hard_register.xmm_move;
18969       if (GET_MODE_BITSIZE (mode) <= 256)
18970 	return ix86_cost->hard_register.ymm_move;
18971       return ix86_cost->hard_register.zmm_move;
18972     }
18973   if (MAYBE_MMX_CLASS_P (class1))
18974     return ix86_cost->hard_register.mmx_move;
18975   return 2;
18976 }
18977 
18978 /* Implement TARGET_HARD_REGNO_NREGS.  This is ordinarily the length in
18979    words of a value of mode MODE but can be less for certain modes in
18980    special long registers.
18981 
18982    Actually there are no two word move instructions for consecutive
18983    registers.  And only registers 0-3 may have mov byte instructions
18984    applied to them.  */
18985 
18986 static unsigned int
ix86_hard_regno_nregs(unsigned int regno,machine_mode mode)18987 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
18988 {
18989   if (GENERAL_REGNO_P (regno))
18990     {
18991       if (mode == XFmode)
18992 	return TARGET_64BIT ? 2 : 3;
18993       if (mode == XCmode)
18994 	return TARGET_64BIT ? 4 : 6;
18995       return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18996     }
18997   if (COMPLEX_MODE_P (mode))
18998     return 2;
18999   /* Register pair for mask registers.  */
19000   if (mode == P2QImode || mode == P2HImode)
19001     return 2;
19002   if (mode == V64SFmode || mode == V64SImode)
19003     return 4;
19004   return 1;
19005 }
19006 
19007 /* Implement REGMODE_NATURAL_SIZE(MODE).  */
19008 unsigned int
ix86_regmode_natural_size(machine_mode mode)19009 ix86_regmode_natural_size (machine_mode mode)
19010 {
19011   if (mode == P2HImode || mode == P2QImode)
19012     return GET_MODE_SIZE (mode) / 2;
19013   return UNITS_PER_WORD;
19014 }
19015 
19016 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
19017 
19018 static bool
ix86_hard_regno_mode_ok(unsigned int regno,machine_mode mode)19019 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
19020 {
19021   /* Flags and only flags can only hold CCmode values.  */
19022   if (CC_REGNO_P (regno))
19023     return GET_MODE_CLASS (mode) == MODE_CC;
19024   if (GET_MODE_CLASS (mode) == MODE_CC
19025       || GET_MODE_CLASS (mode) == MODE_RANDOM)
19026     return false;
19027   if (STACK_REGNO_P (regno))
19028     return VALID_FP_MODE_P (mode);
19029   if (MASK_REGNO_P (regno))
19030     {
19031       /* Register pair only starts at even register number.  */
19032       if ((mode == P2QImode || mode == P2HImode))
19033 	return MASK_PAIR_REGNO_P(regno);
19034 
19035       return (VALID_MASK_REG_MODE (mode)
19036 	      || (TARGET_AVX512BW
19037 		  && VALID_MASK_AVX512BW_MODE (mode)));
19038     }
19039 
19040   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19041     return false;
19042 
19043   if (SSE_REGNO_P (regno))
19044     {
19045       /* We implement the move patterns for all vector modes into and
19046 	 out of SSE registers, even when no operation instructions
19047 	 are available.  */
19048 
19049       /* For AVX-512 we allow, regardless of regno:
19050 	  - XI mode
19051 	  - any of 512-bit wide vector mode
19052 	  - any scalar mode.  */
19053       if (TARGET_AVX512F
19054 	  && (mode == XImode
19055 	      || VALID_AVX512F_REG_MODE (mode)
19056 	      || VALID_AVX512F_SCALAR_MODE (mode)))
19057 	return true;
19058 
19059       /* For AVX-5124FMAPS or AVX-5124VNNIW
19060 	 allow V64SF and V64SI modes for special regnos.  */
19061       if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
19062 	  && (mode == V64SFmode || mode == V64SImode)
19063 	  && MOD4_SSE_REGNO_P (regno))
19064 	return true;
19065 
19066       /* TODO check for QI/HI scalars.  */
19067       /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
19068       if (TARGET_AVX512VL
19069 	  && (mode == OImode
19070 	      || mode == TImode
19071 	      || VALID_AVX256_REG_MODE (mode)
19072 	      || VALID_AVX512VL_128_REG_MODE (mode)))
19073 	return true;
19074 
19075       /* xmm16-xmm31 are only available for AVX-512.  */
19076       if (EXT_REX_SSE_REGNO_P (regno))
19077 	return false;
19078 
19079       /* OImode and AVX modes are available only when AVX is enabled.  */
19080       return ((TARGET_AVX
19081 	       && VALID_AVX256_REG_OR_OI_MODE (mode))
19082 	      || VALID_SSE_REG_MODE (mode)
19083 	      || VALID_SSE2_REG_MODE (mode)
19084 	      || VALID_MMX_REG_MODE (mode)
19085 	      || VALID_MMX_REG_MODE_3DNOW (mode));
19086     }
19087   if (MMX_REGNO_P (regno))
19088     {
19089       /* We implement the move patterns for 3DNOW modes even in MMX mode,
19090 	 so if the register is available at all, then we can move data of
19091 	 the given mode into or out of it.  */
19092       return (VALID_MMX_REG_MODE (mode)
19093 	      || VALID_MMX_REG_MODE_3DNOW (mode));
19094     }
19095 
19096   if (mode == QImode)
19097     {
19098       /* Take care for QImode values - they can be in non-QI regs,
19099 	 but then they do cause partial register stalls.  */
19100       if (ANY_QI_REGNO_P (regno))
19101 	return true;
19102       if (!TARGET_PARTIAL_REG_STALL)
19103 	return true;
19104       /* LRA checks if the hard register is OK for the given mode.
19105 	 QImode values can live in non-QI regs, so we allow all
19106 	 registers here.  */
19107       if (lra_in_progress)
19108        return true;
19109       return !can_create_pseudo_p ();
19110     }
19111   /* We handle both integer and floats in the general purpose registers.  */
19112   else if (VALID_INT_MODE_P (mode))
19113     return true;
19114   else if (VALID_FP_MODE_P (mode))
19115     return true;
19116   else if (VALID_DFP_MODE_P (mode))
19117     return true;
19118   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
19119      on to use that value in smaller contexts, this can easily force a
19120      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
19121      supporting DImode, allow it.  */
19122   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19123     return true;
19124 
19125   return false;
19126 }
19127 
19128 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  The only ABI that
19129    saves SSE registers across calls is Win64 (thus no need to check the
19130    current ABI here), and with AVX enabled Win64 only guarantees that
19131    the low 16 bytes are saved.  */
19132 
19133 static bool
ix86_hard_regno_call_part_clobbered(unsigned int,unsigned int regno,machine_mode mode)19134 ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
19135 				     machine_mode mode)
19136 {
19137   return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
19138 }
19139 
19140 /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
19141    tieable integer mode.  */
19142 
19143 static bool
ix86_tieable_integer_mode_p(machine_mode mode)19144 ix86_tieable_integer_mode_p (machine_mode mode)
19145 {
19146   switch (mode)
19147     {
19148     case E_HImode:
19149     case E_SImode:
19150       return true;
19151 
19152     case E_QImode:
19153       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19154 
19155     case E_DImode:
19156       return TARGET_64BIT;
19157 
19158     default:
19159       return false;
19160     }
19161 }
19162 
19163 /* Implement TARGET_MODES_TIEABLE_P.
19164 
19165    Return true if MODE1 is accessible in a register that can hold MODE2
19166    without copying.  That is, all register classes that can hold MODE2
19167    can also hold MODE1.  */
19168 
19169 static bool
ix86_modes_tieable_p(machine_mode mode1,machine_mode mode2)19170 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
19171 {
19172   if (mode1 == mode2)
19173     return true;
19174 
19175   if (ix86_tieable_integer_mode_p (mode1)
19176       && ix86_tieable_integer_mode_p (mode2))
19177     return true;
19178 
19179   /* MODE2 being XFmode implies fp stack or general regs, which means we
19180      can tie any smaller floating point modes to it.  Note that we do not
19181      tie this with TFmode.  */
19182   if (mode2 == XFmode)
19183     return mode1 == SFmode || mode1 == DFmode;
19184 
19185   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19186      that we can tie it with SFmode.  */
19187   if (mode2 == DFmode)
19188     return mode1 == SFmode;
19189 
19190   /* If MODE2 is only appropriate for an SSE register, then tie with
19191      any other mode acceptable to SSE registers.  */
19192   if (GET_MODE_SIZE (mode2) == 64
19193       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19194     return (GET_MODE_SIZE (mode1) == 64
19195 	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19196   if (GET_MODE_SIZE (mode2) == 32
19197       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19198     return (GET_MODE_SIZE (mode1) == 32
19199 	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19200   if (GET_MODE_SIZE (mode2) == 16
19201       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19202     return (GET_MODE_SIZE (mode1) == 16
19203 	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19204 
19205   /* If MODE2 is appropriate for an MMX register, then tie
19206      with any other mode acceptable to MMX registers.  */
19207   if (GET_MODE_SIZE (mode2) == 8
19208       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19209     return (GET_MODE_SIZE (mode1) == 8
19210 	    && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19211 
19212   return false;
19213 }
19214 
19215 /* Return the cost of moving between two registers of mode MODE.  */
19216 
19217 static int
ix86_set_reg_reg_cost(machine_mode mode)19218 ix86_set_reg_reg_cost (machine_mode mode)
19219 {
19220   unsigned int units = UNITS_PER_WORD;
19221 
19222   switch (GET_MODE_CLASS (mode))
19223     {
19224     default:
19225       break;
19226 
19227     case MODE_CC:
19228       units = GET_MODE_SIZE (CCmode);
19229       break;
19230 
19231     case MODE_FLOAT:
19232       if ((TARGET_SSE && mode == TFmode)
19233 	  || (TARGET_80387 && mode == XFmode)
19234 	  || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
19235 	  || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
19236 	units = GET_MODE_SIZE (mode);
19237       break;
19238 
19239     case MODE_COMPLEX_FLOAT:
19240       if ((TARGET_SSE && mode == TCmode)
19241 	  || (TARGET_80387 && mode == XCmode)
19242 	  || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
19243 	  || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
19244 	units = GET_MODE_SIZE (mode);
19245       break;
19246 
19247     case MODE_VECTOR_INT:
19248     case MODE_VECTOR_FLOAT:
19249       if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
19250 	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
19251 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19252 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19253 	  || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
19254 	      && VALID_MMX_REG_MODE (mode)))
19255 	units = GET_MODE_SIZE (mode);
19256     }
19257 
19258   /* Return the cost of moving between two registers of mode MODE,
19259      assuming that the move will be in pieces of at most UNITS bytes.  */
19260   return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
19261 }
19262 
19263 /* Return cost of vector operation in MODE given that scalar version has
19264    COST.  */
19265 
19266 static int
ix86_vec_cost(machine_mode mode,int cost)19267 ix86_vec_cost (machine_mode mode, int cost)
19268 {
19269   if (!VECTOR_MODE_P (mode))
19270     return cost;
19271 
19272   if (GET_MODE_BITSIZE (mode) == 128
19273       && TARGET_SSE_SPLIT_REGS)
19274     return cost * 2;
19275   if (GET_MODE_BITSIZE (mode) > 128
19276       && TARGET_AVX256_SPLIT_REGS)
19277     return cost * GET_MODE_BITSIZE (mode) / 128;
19278   return cost;
19279 }
19280 
19281 /* Return cost of multiplication in MODE.  */
19282 
19283 static int
ix86_multiplication_cost(const struct processor_costs * cost,enum machine_mode mode)19284 ix86_multiplication_cost (const struct processor_costs *cost,
19285 			  enum machine_mode mode)
19286 {
19287   machine_mode inner_mode = mode;
19288   if (VECTOR_MODE_P (mode))
19289     inner_mode = GET_MODE_INNER (mode);
19290 
19291   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19292     return inner_mode == DFmode ? cost->mulsd : cost->mulss;
19293   else if (X87_FLOAT_MODE_P (mode))
19294     return cost->fmul;
19295   else if (FLOAT_MODE_P (mode))
19296     return  ix86_vec_cost (mode,
19297 			   inner_mode == DFmode ? cost->mulsd : cost->mulss);
19298   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19299     {
19300       /* vpmullq is used in this case. No emulation is needed.  */
19301       if (TARGET_AVX512DQ)
19302 	return ix86_vec_cost (mode, cost->mulss);
19303 
19304       /* V*QImode is emulated with 7-13 insns.  */
19305       if (mode == V16QImode || mode == V32QImode)
19306 	{
19307 	  int extra = 11;
19308 	  if (TARGET_XOP && mode == V16QImode)
19309 	    extra = 5;
19310 	  else if (TARGET_SSSE3)
19311 	    extra = 6;
19312 	  return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
19313 	}
19314       /* V*DImode is emulated with 5-8 insns.  */
19315       else if (mode == V2DImode || mode == V4DImode)
19316 	{
19317 	  if (TARGET_XOP && mode == V2DImode)
19318 	    return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
19319 	  else
19320 	    return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
19321 	}
19322       /* Without sse4.1, we don't have PMULLD; it's emulated with 7
19323 	 insns, including two PMULUDQ.  */
19324       else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
19325 	return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
19326       else
19327 	return ix86_vec_cost (mode, cost->mulss);
19328     }
19329   else
19330     return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
19331 }
19332 
19333 /* Return cost of multiplication in MODE.  */
19334 
19335 static int
ix86_division_cost(const struct processor_costs * cost,enum machine_mode mode)19336 ix86_division_cost (const struct processor_costs *cost,
19337 			  enum machine_mode mode)
19338 {
19339   machine_mode inner_mode = mode;
19340   if (VECTOR_MODE_P (mode))
19341     inner_mode = GET_MODE_INNER (mode);
19342 
19343   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19344     return inner_mode == DFmode ? cost->divsd : cost->divss;
19345   else if (X87_FLOAT_MODE_P (mode))
19346     return cost->fdiv;
19347   else if (FLOAT_MODE_P (mode))
19348     return ix86_vec_cost (mode,
19349 			  inner_mode == DFmode ? cost->divsd : cost->divss);
19350   else
19351     return cost->divide[MODE_INDEX (mode)];
19352 }
19353 
19354 #define COSTS_N_BYTES(N) ((N) * 2)
19355 
19356 /* Return cost of shift in MODE.
19357    If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19358    AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19359    if op1 is a result of subreg.
19360 
19361    SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored.  */
19362 
19363 static int
ix86_shift_rotate_cost(const struct processor_costs * cost,enum machine_mode mode,bool constant_op1,HOST_WIDE_INT op1_val,bool speed,bool and_in_op1,bool shift_and_truncate,bool * skip_op0,bool * skip_op1)19364 ix86_shift_rotate_cost (const struct processor_costs *cost,
19365 			enum machine_mode mode, bool constant_op1,
19366 			HOST_WIDE_INT op1_val,
19367 			bool speed,
19368 			bool and_in_op1,
19369 			bool shift_and_truncate,
19370 			bool *skip_op0, bool *skip_op1)
19371 {
19372   if (skip_op0)
19373     *skip_op0 = *skip_op1 = false;
19374   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19375     {
19376       /* V*QImode is emulated with 1-11 insns.  */
19377       if (mode == V16QImode || mode == V32QImode)
19378 	{
19379 	  int count = 11;
19380 	  if (TARGET_XOP && mode == V16QImode)
19381 	    {
19382 	      /* For XOP we use vpshab, which requires a broadcast of the
19383 		 value to the variable shift insn.  For constants this
19384 		 means a V16Q const in mem; even when we can perform the
19385 		 shift with one insn set the cost to prefer paddb.  */
19386 	      if (constant_op1)
19387 		{
19388 		  if (skip_op1)
19389 		    *skip_op1 = true;
19390 		  return ix86_vec_cost (mode,
19391 					cost->sse_op
19392 					+ (speed
19393 					   ? 2
19394 					   : COSTS_N_BYTES
19395 					       (GET_MODE_UNIT_SIZE (mode))));
19396 		}
19397 	      count = 3;
19398 	    }
19399 	  else if (TARGET_SSSE3)
19400 	    count = 7;
19401 	  return ix86_vec_cost (mode, cost->sse_op * count);
19402 	}
19403       else
19404 	return ix86_vec_cost (mode, cost->sse_op);
19405     }
19406   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19407     {
19408       if (constant_op1)
19409 	{
19410 	  if (op1_val > 32)
19411 	    return cost->shift_const + COSTS_N_INSNS (2);
19412 	  else
19413 	    return cost->shift_const * 2;
19414 	}
19415       else
19416 	{
19417 	  if (and_in_op1)
19418 	    return cost->shift_var * 2;
19419 	  else
19420 	    return cost->shift_var * 6 + COSTS_N_INSNS (2);
19421 	}
19422     }
19423   else
19424     {
19425       if (constant_op1)
19426 	return cost->shift_const;
19427       else if (shift_and_truncate)
19428 	{
19429 	  if (skip_op0)
19430 	    *skip_op0 = *skip_op1 = true;
19431 	  /* Return the cost after shift-and truncation.  */
19432 	  return cost->shift_var;
19433 	}
19434       else
19435 	return cost->shift_var;
19436     }
19437   return cost->shift_const;
19438 }
19439 
19440 /* Compute a (partial) cost for rtx X.  Return true if the complete
19441    cost has been computed, and false if subexpressions should be
19442    scanned.  In either case, *TOTAL contains the cost result.  */
19443 
19444 static bool
ix86_rtx_costs(rtx x,machine_mode mode,int outer_code_i,int opno,int * total,bool speed)19445 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
19446 		int *total, bool speed)
19447 {
19448   rtx mask;
19449   enum rtx_code code = GET_CODE (x);
19450   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
19451   const struct processor_costs *cost
19452     = speed ? ix86_tune_cost : &ix86_size_cost;
19453   int src_cost;
19454 
19455   switch (code)
19456     {
19457     case SET:
19458       if (register_operand (SET_DEST (x), VOIDmode)
19459 	  && register_operand (SET_SRC (x), VOIDmode))
19460 	{
19461 	  *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
19462 	  return true;
19463 	}
19464 
19465       if (register_operand (SET_SRC (x), VOIDmode))
19466 	/* Avoid potentially incorrect high cost from rtx_costs
19467 	   for non-tieable SUBREGs.  */
19468 	src_cost = 0;
19469       else
19470 	{
19471 	  src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
19472 
19473 	  if (CONSTANT_P (SET_SRC (x)))
19474 	    /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19475 	       a small value, possibly zero for cheap constants.  */
19476 	    src_cost += COSTS_N_INSNS (1);
19477 	}
19478 
19479       *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
19480       return true;
19481 
19482     case CONST_INT:
19483     case CONST:
19484     case LABEL_REF:
19485     case SYMBOL_REF:
19486       if (x86_64_immediate_operand (x, VOIDmode))
19487 	*total = 0;
19488      else
19489 	*total = 1;
19490       return true;
19491 
19492     case CONST_DOUBLE:
19493       if (IS_STACK_MODE (mode))
19494 	switch (standard_80387_constant_p (x))
19495 	  {
19496 	  case -1:
19497 	  case 0:
19498 	    break;
19499 	  case 1: /* 0.0 */
19500 	    *total = 1;
19501 	    return true;
19502 	  default: /* Other constants */
19503 	    *total = 2;
19504 	    return true;
19505 	  }
19506       /* FALLTHRU */
19507 
19508     case CONST_VECTOR:
19509       switch (standard_sse_constant_p (x, mode))
19510 	{
19511 	case 0:
19512 	  break;
19513 	case 1:  /* 0: xor eliminates false dependency */
19514 	  *total = 0;
19515 	  return true;
19516 	default: /* -1: cmp contains false dependency */
19517 	  *total = 1;
19518 	  return true;
19519 	}
19520       /* FALLTHRU */
19521 
19522     case CONST_WIDE_INT:
19523       /* Fall back to (MEM (SYMBOL_REF)), since that's where
19524 	 it'll probably end up.  Add a penalty for size.  */
19525       *total = (COSTS_N_INSNS (1)
19526 		+ (!TARGET_64BIT && flag_pic)
19527 		+ (GET_MODE_SIZE (mode) <= 4
19528 		   ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
19529       return true;
19530 
19531     case ZERO_EXTEND:
19532       /* The zero extensions is often completely free on x86_64, so make
19533 	 it as cheap as possible.  */
19534       if (TARGET_64BIT && mode == DImode
19535 	  && GET_MODE (XEXP (x, 0)) == SImode)
19536 	*total = 1;
19537       else if (TARGET_ZERO_EXTEND_WITH_AND)
19538 	*total = cost->add;
19539       else
19540 	*total = cost->movzx;
19541       return false;
19542 
19543     case SIGN_EXTEND:
19544       *total = cost->movsx;
19545       return false;
19546 
19547     case ASHIFT:
19548       if (SCALAR_INT_MODE_P (mode)
19549 	  && GET_MODE_SIZE (mode) < UNITS_PER_WORD
19550 	  && CONST_INT_P (XEXP (x, 1)))
19551 	{
19552 	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19553 	  if (value == 1)
19554 	    {
19555 	      *total = cost->add;
19556 	      return false;
19557 	    }
19558 	  if ((value == 2 || value == 3)
19559 	      && cost->lea <= cost->shift_const)
19560 	    {
19561 	      *total = cost->lea;
19562 	      return false;
19563 	    }
19564 	}
19565       /* FALLTHRU */
19566 
19567     case ROTATE:
19568     case ASHIFTRT:
19569     case LSHIFTRT:
19570     case ROTATERT:
19571       bool skip_op0, skip_op1;
19572       *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
19573 				       CONST_INT_P (XEXP (x, 1))
19574 					 ? INTVAL (XEXP (x, 1)) : -1,
19575 				       speed,
19576 				       GET_CODE (XEXP (x, 1)) == AND,
19577 				       SUBREG_P (XEXP (x, 1))
19578 				       && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
19579 				       &skip_op0, &skip_op1);
19580       if (skip_op0 || skip_op1)
19581 	{
19582 	  if (!skip_op0)
19583 	    *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
19584 	  if (!skip_op1)
19585 	    *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
19586 	  return true;
19587 	}
19588       return false;
19589 
19590     case FMA:
19591       {
19592 	rtx sub;
19593 
19594         gcc_assert (FLOAT_MODE_P (mode));
19595         gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
19596 
19597         *total = ix86_vec_cost (mode,
19598 				GET_MODE_INNER (mode) == SFmode
19599 				? cost->fmass : cost->fmasd);
19600 	*total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
19601 
19602         /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
19603 	sub = XEXP (x, 0);
19604 	if (GET_CODE (sub) == NEG)
19605 	  sub = XEXP (sub, 0);
19606 	*total += rtx_cost (sub, mode, FMA, 0, speed);
19607 
19608 	sub = XEXP (x, 2);
19609 	if (GET_CODE (sub) == NEG)
19610 	  sub = XEXP (sub, 0);
19611 	*total += rtx_cost (sub, mode, FMA, 2, speed);
19612 	return true;
19613       }
19614 
19615     case MULT:
19616       if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
19617 	{
19618 	  rtx op0 = XEXP (x, 0);
19619 	  rtx op1 = XEXP (x, 1);
19620 	  int nbits;
19621 	  if (CONST_INT_P (XEXP (x, 1)))
19622 	    {
19623 	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19624 	      for (nbits = 0; value != 0; value &= value - 1)
19625 	        nbits++;
19626 	    }
19627 	  else
19628 	    /* This is arbitrary.  */
19629 	    nbits = 7;
19630 
19631 	  /* Compute costs correctly for widening multiplication.  */
19632 	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
19633 	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19634 	         == GET_MODE_SIZE (mode))
19635 	    {
19636 	      int is_mulwiden = 0;
19637 	      machine_mode inner_mode = GET_MODE (op0);
19638 
19639 	      if (GET_CODE (op0) == GET_CODE (op1))
19640 		is_mulwiden = 1, op1 = XEXP (op1, 0);
19641 	      else if (CONST_INT_P (op1))
19642 		{
19643 		  if (GET_CODE (op0) == SIGN_EXTEND)
19644 		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19645 			          == INTVAL (op1);
19646 		  else
19647 		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19648 	        }
19649 
19650 	      if (is_mulwiden)
19651 	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19652 	    }
19653 
19654   	  *total = (cost->mult_init[MODE_INDEX (mode)]
19655 		    + nbits * cost->mult_bit
19656 	            + rtx_cost (op0, mode, outer_code, opno, speed)
19657 		    + rtx_cost (op1, mode, outer_code, opno, speed));
19658 
19659           return true;
19660 	}
19661       *total = ix86_multiplication_cost (cost, mode);
19662       return false;
19663 
19664     case DIV:
19665     case UDIV:
19666     case MOD:
19667     case UMOD:
19668       *total = ix86_division_cost (cost, mode);
19669       return false;
19670 
19671     case PLUS:
19672       if (GET_MODE_CLASS (mode) == MODE_INT
19673 	  && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
19674 	{
19675 	  if (GET_CODE (XEXP (x, 0)) == PLUS
19676 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19677 	      && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19678 	      && CONSTANT_P (XEXP (x, 1)))
19679 	    {
19680 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19681 	      if (val == 2 || val == 4 || val == 8)
19682 		{
19683 		  *total = cost->lea;
19684 		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19685 				      outer_code, opno, speed);
19686 		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
19687 				      outer_code, opno, speed);
19688 		  *total += rtx_cost (XEXP (x, 1), mode,
19689 				      outer_code, opno, speed);
19690 		  return true;
19691 		}
19692 	    }
19693 	  else if (GET_CODE (XEXP (x, 0)) == MULT
19694 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19695 	    {
19696 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19697 	      if (val == 2 || val == 4 || val == 8)
19698 		{
19699 		  *total = cost->lea;
19700 		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19701 				      outer_code, opno, speed);
19702 		  *total += rtx_cost (XEXP (x, 1), mode,
19703 				      outer_code, opno, speed);
19704 		  return true;
19705 		}
19706 	    }
19707 	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
19708 	    {
19709 	      /* Add with carry, ignore the cost of adding a carry flag.  */
19710 	      if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
19711 		*total = cost->add;
19712 	      else
19713 		{
19714 		  *total = cost->lea;
19715 		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19716 				      outer_code, opno, speed);
19717 		}
19718 
19719 	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19720 				  outer_code, opno, speed);
19721 	      *total += rtx_cost (XEXP (x, 1), mode,
19722 				  outer_code, opno, speed);
19723 	      return true;
19724 	    }
19725 	}
19726       /* FALLTHRU */
19727 
19728     case MINUS:
19729       /* Subtract with borrow, ignore the cost of subtracting a carry flag.  */
19730       if (GET_MODE_CLASS (mode) == MODE_INT
19731 	  && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
19732 	  && GET_CODE (XEXP (x, 0)) == MINUS
19733 	  && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
19734 	{
19735 	  *total = cost->add;
19736 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19737 			      outer_code, opno, speed);
19738 	  *total += rtx_cost (XEXP (x, 1), mode,
19739 			      outer_code, opno, speed);
19740 	  return true;
19741 	}
19742 
19743       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19744 	{
19745 	  *total = cost->addss;
19746 	  return false;
19747 	}
19748       else if (X87_FLOAT_MODE_P (mode))
19749 	{
19750 	  *total = cost->fadd;
19751 	  return false;
19752 	}
19753       else if (FLOAT_MODE_P (mode))
19754 	{
19755 	  *total = ix86_vec_cost (mode, cost->addss);
19756 	  return false;
19757 	}
19758       /* FALLTHRU */
19759 
19760     case AND:
19761     case IOR:
19762     case XOR:
19763       if (GET_MODE_CLASS (mode) == MODE_INT
19764 	  && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19765 	{
19766 	  *total = (cost->add * 2
19767 		    + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
19768 		       << (GET_MODE (XEXP (x, 0)) != DImode))
19769 		    + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
19770 	               << (GET_MODE (XEXP (x, 1)) != DImode)));
19771 	  return true;
19772 	}
19773       /* FALLTHRU */
19774 
19775     case NEG:
19776       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19777 	{
19778 	  *total = cost->sse_op;
19779 	  return false;
19780 	}
19781       else if (X87_FLOAT_MODE_P (mode))
19782 	{
19783 	  *total = cost->fchs;
19784 	  return false;
19785 	}
19786       else if (FLOAT_MODE_P (mode))
19787 	{
19788 	  *total = ix86_vec_cost (mode, cost->sse_op);
19789 	  return false;
19790 	}
19791       /* FALLTHRU */
19792 
19793     case NOT:
19794       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19795 	*total = ix86_vec_cost (mode, cost->sse_op);
19796       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19797 	*total = cost->add * 2;
19798       else
19799 	*total = cost->add;
19800       return false;
19801 
19802     case COMPARE:
19803       if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19804 	  && XEXP (XEXP (x, 0), 1) == const1_rtx
19805 	  && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19806 	  && XEXP (x, 1) == const0_rtx)
19807 	{
19808 	  /* This kind of construct is implemented using test[bwl].
19809 	     Treat it as if we had an AND.  */
19810 	  mode = GET_MODE (XEXP (XEXP (x, 0), 0));
19811 	  *total = (cost->add
19812 		    + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
19813 				opno, speed)
19814 		    + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
19815 	  return true;
19816 	}
19817 
19818       if (GET_CODE (XEXP (x, 0)) == PLUS
19819 	  && rtx_equal_p (XEXP (XEXP (x, 0), 0), XEXP (x, 1)))
19820 	{
19821 	  /* This is an overflow detection, count it as a normal compare.  */
19822 	  *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
19823 			     COMPARE, 0, speed);
19824 	  return true;
19825 	}
19826 
19827       /* The embedded comparison operand is completely free.  */
19828       if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
19829 	  && XEXP (x, 1) == const0_rtx)
19830 	*total = 0;
19831 
19832       return false;
19833 
19834     case FLOAT_EXTEND:
19835       if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19836 	*total = 0;
19837       else
19838         *total = ix86_vec_cost (mode, cost->addss);
19839       return false;
19840 
19841     case FLOAT_TRUNCATE:
19842       if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19843 	*total = cost->fadd;
19844       else
19845         *total = ix86_vec_cost (mode, cost->addss);
19846       return false;
19847 
19848     case ABS:
19849       /* SSE requires memory load for the constant operand. It may make
19850 	 sense to account for this.  Of course the constant operand may or
19851 	 may not be reused. */
19852       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19853 	*total = cost->sse_op;
19854       else if (X87_FLOAT_MODE_P (mode))
19855 	*total = cost->fabs;
19856       else if (FLOAT_MODE_P (mode))
19857 	*total = ix86_vec_cost (mode, cost->sse_op);
19858       return false;
19859 
19860     case SQRT:
19861       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19862 	*total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
19863       else if (X87_FLOAT_MODE_P (mode))
19864 	*total = cost->fsqrt;
19865       else if (FLOAT_MODE_P (mode))
19866 	*total = ix86_vec_cost (mode,
19867 				mode == SFmode ? cost->sqrtss : cost->sqrtsd);
19868       return false;
19869 
19870     case UNSPEC:
19871       if (XINT (x, 1) == UNSPEC_TP)
19872 	*total = 0;
19873       return false;
19874 
19875     case VEC_SELECT:
19876     case VEC_CONCAT:
19877     case VEC_DUPLICATE:
19878       /* ??? Assume all of these vector manipulation patterns are
19879 	 recognizable.  In which case they all pretty much have the
19880 	 same cost.  */
19881      *total = cost->sse_op;
19882      return true;
19883     case VEC_MERGE:
19884       mask = XEXP (x, 2);
19885       /* This is masked instruction, assume the same cost,
19886 	 as nonmasked variant.  */
19887       if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
19888 	*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
19889       else
19890 	*total = cost->sse_op;
19891       return true;
19892 
19893     default:
19894       return false;
19895     }
19896 }
19897 
19898 #if TARGET_MACHO
19899 
19900 static int current_machopic_label_num;
19901 
19902 /* Given a symbol name and its associated stub, write out the
19903    definition of the stub.  */
19904 
19905 void
machopic_output_stub(FILE * file,const char * symb,const char * stub)19906 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19907 {
19908   unsigned int length;
19909   char *binder_name, *symbol_name, lazy_ptr_name[32];
19910   int label = ++current_machopic_label_num;
19911 
19912   /* For 64-bit we shouldn't get here.  */
19913   gcc_assert (!TARGET_64BIT);
19914 
19915   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
19916   symb = targetm.strip_name_encoding (symb);
19917 
19918   length = strlen (stub);
19919   binder_name = XALLOCAVEC (char, length + 32);
19920   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19921 
19922   length = strlen (symb);
19923   symbol_name = XALLOCAVEC (char, length + 32);
19924   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19925 
19926   sprintf (lazy_ptr_name, "L%d$lz", label);
19927 
19928   if (MACHOPIC_ATT_STUB)
19929     switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
19930   else if (MACHOPIC_PURE)
19931     switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
19932   else
19933     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19934 
19935   fprintf (file, "%s:\n", stub);
19936   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19937 
19938   if (MACHOPIC_ATT_STUB)
19939     {
19940       fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
19941     }
19942   else if (MACHOPIC_PURE)
19943     {
19944       /* PIC stub.  */
19945       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
19946       rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19947       output_set_got (tmp, NULL_RTX);	/* "CALL ___<cpu>.get_pc_thunk.cx".  */
19948       fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
19949 	       label, lazy_ptr_name, label);
19950       fprintf (file, "\tjmp\t*%%ecx\n");
19951     }
19952   else
19953     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19954 
19955   /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
19956      it needs no stub-binding-helper.  */
19957   if (MACHOPIC_ATT_STUB)
19958     return;
19959 
19960   fprintf (file, "%s:\n", binder_name);
19961 
19962   if (MACHOPIC_PURE)
19963     {
19964       fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
19965       fprintf (file, "\tpushl\t%%ecx\n");
19966     }
19967   else
19968     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19969 
19970   fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
19971 
19972   /* N.B. Keep the correspondence of these
19973      'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
19974      old-pic/new-pic/non-pic stubs; altering this will break
19975      compatibility with existing dylibs.  */
19976   if (MACHOPIC_PURE)
19977     {
19978       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
19979       switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
19980     }
19981   else
19982     /* 16-byte -mdynamic-no-pic stub.  */
19983     switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
19984 
19985   fprintf (file, "%s:\n", lazy_ptr_name);
19986   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19987   fprintf (file, ASM_LONG "%s\n", binder_name);
19988 }
19989 #endif /* TARGET_MACHO */
19990 
19991 /* Order the registers for register allocator.  */
19992 
19993 void
x86_order_regs_for_local_alloc(void)19994 x86_order_regs_for_local_alloc (void)
19995 {
19996    int pos = 0;
19997    int i;
19998 
19999    /* First allocate the local general purpose registers.  */
20000    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20001      if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
20002 	reg_alloc_order [pos++] = i;
20003 
20004    /* Global general purpose registers.  */
20005    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20006      if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
20007 	reg_alloc_order [pos++] = i;
20008 
20009    /* x87 registers come first in case we are doing FP math
20010       using them.  */
20011    if (!TARGET_SSE_MATH)
20012      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20013        reg_alloc_order [pos++] = i;
20014 
20015    /* SSE registers.  */
20016    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20017      reg_alloc_order [pos++] = i;
20018    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20019      reg_alloc_order [pos++] = i;
20020 
20021    /* Extended REX SSE registers.  */
20022    for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
20023      reg_alloc_order [pos++] = i;
20024 
20025    /* Mask register.  */
20026    for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
20027      reg_alloc_order [pos++] = i;
20028 
20029    /* x87 registers.  */
20030    if (TARGET_SSE_MATH)
20031      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20032        reg_alloc_order [pos++] = i;
20033 
20034    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20035      reg_alloc_order [pos++] = i;
20036 
20037    /* Initialize the rest of array as we do not allocate some registers
20038       at all.  */
20039    while (pos < FIRST_PSEUDO_REGISTER)
20040      reg_alloc_order [pos++] = 0;
20041 }
20042 
20043 static bool
ix86_ms_bitfield_layout_p(const_tree record_type)20044 ix86_ms_bitfield_layout_p (const_tree record_type)
20045 {
20046   return ((TARGET_MS_BITFIELD_LAYOUT
20047 	   && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20048           || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
20049 }
20050 
20051 /* Returns an expression indicating where the this parameter is
20052    located on entry to the FUNCTION.  */
20053 
20054 static rtx
x86_this_parameter(tree function)20055 x86_this_parameter (tree function)
20056 {
20057   tree type = TREE_TYPE (function);
20058   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20059   int nregs;
20060 
20061   if (TARGET_64BIT)
20062     {
20063       const int *parm_regs;
20064 
20065       if (ix86_function_type_abi (type) == MS_ABI)
20066         parm_regs = x86_64_ms_abi_int_parameter_registers;
20067       else
20068         parm_regs = x86_64_int_parameter_registers;
20069       return gen_rtx_REG (Pmode, parm_regs[aggr]);
20070     }
20071 
20072   nregs = ix86_function_regparm (type, function);
20073 
20074   if (nregs > 0 && !stdarg_p (type))
20075     {
20076       int regno;
20077       unsigned int ccvt = ix86_get_callcvt (type);
20078 
20079       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20080 	regno = aggr ? DX_REG : CX_REG;
20081       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20082         {
20083 	  regno = CX_REG;
20084 	  if (aggr)
20085 	    return gen_rtx_MEM (SImode,
20086 				plus_constant (Pmode, stack_pointer_rtx, 4));
20087 	}
20088       else
20089         {
20090 	  regno = AX_REG;
20091 	  if (aggr)
20092 	    {
20093 	      regno = DX_REG;
20094 	      if (nregs == 1)
20095 		return gen_rtx_MEM (SImode,
20096 				    plus_constant (Pmode,
20097 						   stack_pointer_rtx, 4));
20098 	    }
20099 	}
20100       return gen_rtx_REG (SImode, regno);
20101     }
20102 
20103   return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
20104 					     aggr ? 8 : 4));
20105 }
20106 
20107 /* Determine whether x86_output_mi_thunk can succeed.  */
20108 
20109 static bool
x86_can_output_mi_thunk(const_tree,HOST_WIDE_INT,HOST_WIDE_INT vcall_offset,const_tree function)20110 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
20111 			 const_tree function)
20112 {
20113   /* 64-bit can handle anything.  */
20114   if (TARGET_64BIT)
20115     return true;
20116 
20117   /* For 32-bit, everything's fine if we have one free register.  */
20118   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20119     return true;
20120 
20121   /* Need a free register for vcall_offset.  */
20122   if (vcall_offset)
20123     return false;
20124 
20125   /* Need a free register for GOT references.  */
20126   if (flag_pic && !targetm.binds_local_p (function))
20127     return false;
20128 
20129   /* Otherwise ok.  */
20130   return true;
20131 }
20132 
20133 /* Output the assembler code for a thunk function.  THUNK_DECL is the
20134    declaration for the thunk function itself, FUNCTION is the decl for
20135    the target function.  DELTA is an immediate constant offset to be
20136    added to THIS.  If VCALL_OFFSET is nonzero, the word at
20137    *(*this + vcall_offset) should be added to THIS.  */
20138 
20139 static void
x86_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)20140 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
20141 		     HOST_WIDE_INT vcall_offset, tree function)
20142 {
20143   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
20144   rtx this_param = x86_this_parameter (function);
20145   rtx this_reg, tmp, fnaddr;
20146   unsigned int tmp_regno;
20147   rtx_insn *insn;
20148 
20149   if (TARGET_64BIT)
20150     tmp_regno = R10_REG;
20151   else
20152     {
20153       unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
20154       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20155 	tmp_regno = AX_REG;
20156       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20157 	tmp_regno = DX_REG;
20158       else
20159 	tmp_regno = CX_REG;
20160     }
20161 
20162   emit_note (NOTE_INSN_PROLOGUE_END);
20163 
20164   /* CET is enabled, insert EB instruction.  */
20165   if ((flag_cf_protection & CF_BRANCH))
20166     emit_insn (gen_nop_endbr ());
20167 
20168   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
20169      pull it in now and let DELTA benefit.  */
20170   if (REG_P (this_param))
20171     this_reg = this_param;
20172   else if (vcall_offset)
20173     {
20174       /* Put the this parameter into %eax.  */
20175       this_reg = gen_rtx_REG (Pmode, AX_REG);
20176       emit_move_insn (this_reg, this_param);
20177     }
20178   else
20179     this_reg = NULL_RTX;
20180 
20181   /* Adjust the this parameter by a fixed constant.  */
20182   if (delta)
20183     {
20184       rtx delta_rtx = GEN_INT (delta);
20185       rtx delta_dst = this_reg ? this_reg : this_param;
20186 
20187       if (TARGET_64BIT)
20188 	{
20189 	  if (!x86_64_general_operand (delta_rtx, Pmode))
20190 	    {
20191 	      tmp = gen_rtx_REG (Pmode, tmp_regno);
20192 	      emit_move_insn (tmp, delta_rtx);
20193 	      delta_rtx = tmp;
20194 	    }
20195 	}
20196 
20197       ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
20198     }
20199 
20200   /* Adjust the this parameter by a value stored in the vtable.  */
20201   if (vcall_offset)
20202     {
20203       rtx vcall_addr, vcall_mem, this_mem;
20204 
20205       tmp = gen_rtx_REG (Pmode, tmp_regno);
20206 
20207       this_mem = gen_rtx_MEM (ptr_mode, this_reg);
20208       if (Pmode != ptr_mode)
20209 	this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
20210       emit_move_insn (tmp, this_mem);
20211 
20212       /* Adjust the this parameter.  */
20213       vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
20214       if (TARGET_64BIT
20215 	  && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
20216 	{
20217 	  rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
20218 	  emit_move_insn (tmp2, GEN_INT (vcall_offset));
20219 	  vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
20220 	}
20221 
20222       vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
20223       if (Pmode != ptr_mode)
20224 	emit_insn (gen_addsi_1_zext (this_reg,
20225 				     gen_rtx_REG (ptr_mode,
20226 						  REGNO (this_reg)),
20227 				     vcall_mem));
20228       else
20229 	ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
20230     }
20231 
20232   /* If necessary, drop THIS back to its stack slot.  */
20233   if (this_reg && this_reg != this_param)
20234     emit_move_insn (this_param, this_reg);
20235 
20236   fnaddr = XEXP (DECL_RTL (function), 0);
20237   if (TARGET_64BIT)
20238     {
20239       if (!flag_pic || targetm.binds_local_p (function)
20240 	  || TARGET_PECOFF)
20241 	;
20242       else
20243 	{
20244 	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
20245 	  tmp = gen_rtx_CONST (Pmode, tmp);
20246 	  fnaddr = gen_const_mem (Pmode, tmp);
20247 	}
20248     }
20249   else
20250     {
20251       if (!flag_pic || targetm.binds_local_p (function))
20252 	;
20253 #if TARGET_MACHO
20254       else if (TARGET_MACHO)
20255 	{
20256 	  fnaddr = machopic_indirect_call_target (DECL_RTL (function));
20257 	  fnaddr = XEXP (fnaddr, 0);
20258 	}
20259 #endif /* TARGET_MACHO */
20260       else
20261 	{
20262 	  tmp = gen_rtx_REG (Pmode, CX_REG);
20263 	  output_set_got (tmp, NULL_RTX);
20264 
20265 	  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
20266 	  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
20267 	  fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
20268 	  fnaddr = gen_const_mem (Pmode, fnaddr);
20269 	}
20270     }
20271 
20272   /* Our sibling call patterns do not allow memories, because we have no
20273      predicate that can distinguish between frame and non-frame memory.
20274      For our purposes here, we can get away with (ab)using a jump pattern,
20275      because we're going to do no optimization.  */
20276   if (MEM_P (fnaddr))
20277     {
20278       if (sibcall_insn_operand (fnaddr, word_mode))
20279 	{
20280 	  fnaddr = XEXP (DECL_RTL (function), 0);
20281 	  tmp = gen_rtx_MEM (QImode, fnaddr);
20282 	  tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20283 	  tmp = emit_call_insn (tmp);
20284 	  SIBLING_CALL_P (tmp) = 1;
20285 	}
20286       else
20287 	emit_jump_insn (gen_indirect_jump (fnaddr));
20288     }
20289   else
20290     {
20291       if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
20292 	{
20293 	  // CM_LARGE_PIC always uses pseudo PIC register which is
20294 	  // uninitialized.  Since FUNCTION is local and calling it
20295 	  // doesn't go through PLT, we use scratch register %r11 as
20296 	  // PIC register and initialize it here.
20297 	  pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
20298 	  ix86_init_large_pic_reg (tmp_regno);
20299 	  fnaddr = legitimize_pic_address (fnaddr,
20300 					   gen_rtx_REG (Pmode, tmp_regno));
20301 	}
20302 
20303       if (!sibcall_insn_operand (fnaddr, word_mode))
20304 	{
20305 	  tmp = gen_rtx_REG (word_mode, tmp_regno);
20306 	  if (GET_MODE (fnaddr) != word_mode)
20307 	    fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
20308 	  emit_move_insn (tmp, fnaddr);
20309 	  fnaddr = tmp;
20310 	}
20311 
20312       tmp = gen_rtx_MEM (QImode, fnaddr);
20313       tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20314       tmp = emit_call_insn (tmp);
20315       SIBLING_CALL_P (tmp) = 1;
20316     }
20317   emit_barrier ();
20318 
20319   /* Emit just enough of rest_of_compilation to get the insns emitted.  */
20320   insn = get_insns ();
20321   shorten_branches (insn);
20322   assemble_start_function (thunk_fndecl, fnname);
20323   final_start_function (insn, file, 1);
20324   final (insn, file, 1);
20325   final_end_function ();
20326   assemble_end_function (thunk_fndecl, fnname);
20327 }
20328 
20329 static void
x86_file_start(void)20330 x86_file_start (void)
20331 {
20332   default_file_start ();
20333   if (TARGET_16BIT)
20334     fputs ("\t.code16gcc\n", asm_out_file);
20335 #if TARGET_MACHO
20336   darwin_file_start ();
20337 #endif
20338   if (X86_FILE_START_VERSION_DIRECTIVE)
20339     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20340   if (X86_FILE_START_FLTUSED)
20341     fputs ("\t.global\t__fltused\n", asm_out_file);
20342   if (ix86_asm_dialect == ASM_INTEL)
20343     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
20344 }
20345 
20346 int
x86_field_alignment(tree type,int computed)20347 x86_field_alignment (tree type, int computed)
20348 {
20349   machine_mode mode;
20350 
20351   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20352     return computed;
20353   if (TARGET_IAMCU)
20354     return iamcu_alignment (type, computed);
20355   mode = TYPE_MODE (strip_array_types (type));
20356   if (mode == DFmode || mode == DCmode
20357       || GET_MODE_CLASS (mode) == MODE_INT
20358       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20359     return MIN (32, computed);
20360   return computed;
20361 }
20362 
20363 /* Print call to TARGET to FILE.  */
20364 
20365 static void
x86_print_call_or_nop(FILE * file,const char * target)20366 x86_print_call_or_nop (FILE *file, const char *target)
20367 {
20368   if (flag_nop_mcount || !strcmp (target, "nop"))
20369     /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20370     fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20371   else
20372     fprintf (file, "1:\tcall\t%s\n", target);
20373 }
20374 
20375 static bool
current_fentry_name(const char ** name)20376 current_fentry_name (const char **name)
20377 {
20378   tree attr = lookup_attribute ("fentry_name",
20379 				DECL_ATTRIBUTES (current_function_decl));
20380   if (!attr)
20381     return false;
20382   *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20383   return true;
20384 }
20385 
20386 static bool
current_fentry_section(const char ** name)20387 current_fentry_section (const char **name)
20388 {
20389   tree attr = lookup_attribute ("fentry_section",
20390 				DECL_ATTRIBUTES (current_function_decl));
20391   if (!attr)
20392     return false;
20393   *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20394   return true;
20395 }
20396 
20397 /* Output assembler code to FILE to increment profiler label # LABELNO
20398    for profiling a function entry.  */
20399 void
x86_function_profiler(FILE * file,int labelno ATTRIBUTE_UNUSED)20400 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20401 {
20402   if (cfun->machine->endbr_queued_at_entrance)
20403     fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
20404 
20405   const char *mcount_name = MCOUNT_NAME;
20406 
20407   if (current_fentry_name (&mcount_name))
20408     ;
20409   else if (fentry_name)
20410     mcount_name = fentry_name;
20411   else if (flag_fentry)
20412     mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
20413 
20414   if (TARGET_64BIT)
20415     {
20416 #ifndef NO_PROFILE_COUNTERS
20417       fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
20418 #endif
20419 
20420       if (!TARGET_PECOFF && flag_pic)
20421 	fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
20422       else
20423 	x86_print_call_or_nop (file, mcount_name);
20424     }
20425   else if (flag_pic)
20426     {
20427 #ifndef NO_PROFILE_COUNTERS
20428       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
20429 	       LPREFIX, labelno);
20430 #endif
20431       fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
20432     }
20433   else
20434     {
20435 #ifndef NO_PROFILE_COUNTERS
20436       fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
20437 	       LPREFIX, labelno);
20438 #endif
20439       x86_print_call_or_nop (file, mcount_name);
20440     }
20441 
20442   if (flag_record_mcount
20443 	|| lookup_attribute ("fentry_section",
20444                                 DECL_ATTRIBUTES (current_function_decl)))
20445     {
20446       const char *sname = "__mcount_loc";
20447 
20448       if (current_fentry_section (&sname))
20449 	;
20450       else if (fentry_section)
20451 	sname = fentry_section;
20452 
20453       fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
20454       fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
20455       fprintf (file, "\t.previous\n");
20456     }
20457 }
20458 
20459 /* We don't have exact information about the insn sizes, but we may assume
20460    quite safely that we are informed about all 1 byte insns and memory
20461    address sizes.  This is enough to eliminate unnecessary padding in
20462    99% of cases.  */
20463 
20464 int
ix86_min_insn_size(rtx_insn * insn)20465 ix86_min_insn_size (rtx_insn *insn)
20466 {
20467   int l = 0, len;
20468 
20469   if (!INSN_P (insn) || !active_insn_p (insn))
20470     return 0;
20471 
20472   /* Discard alignments we've emit and jump instructions.  */
20473   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20474       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20475     return 0;
20476 
20477   /* Important case - calls are always 5 bytes.
20478      It is common to have many calls in the row.  */
20479   if (CALL_P (insn)
20480       && symbolic_reference_mentioned_p (PATTERN (insn))
20481       && !SIBLING_CALL_P (insn))
20482     return 5;
20483   len = get_attr_length (insn);
20484   if (len <= 1)
20485     return 1;
20486 
20487   /* For normal instructions we rely on get_attr_length being exact,
20488      with a few exceptions.  */
20489   if (!JUMP_P (insn))
20490     {
20491       enum attr_type type = get_attr_type (insn);
20492 
20493       switch (type)
20494 	{
20495 	case TYPE_MULTI:
20496 	  if (GET_CODE (PATTERN (insn)) == ASM_INPUT
20497 	      || asm_noperands (PATTERN (insn)) >= 0)
20498 	    return 0;
20499 	  break;
20500 	case TYPE_OTHER:
20501 	case TYPE_FCMP:
20502 	  break;
20503 	default:
20504 	  /* Otherwise trust get_attr_length.  */
20505 	  return len;
20506 	}
20507 
20508       l = get_attr_length_address (insn);
20509       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20510 	l = 4;
20511     }
20512   if (l)
20513     return 1+l;
20514   else
20515     return 2;
20516 }
20517 
20518 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20519 
20520 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20521    window.  */
20522 
20523 static void
ix86_avoid_jump_mispredicts(void)20524 ix86_avoid_jump_mispredicts (void)
20525 {
20526   rtx_insn *insn, *start = get_insns ();
20527   int nbytes = 0, njumps = 0;
20528   bool isjump = false;
20529 
20530   /* Look for all minimal intervals of instructions containing 4 jumps.
20531      The intervals are bounded by START and INSN.  NBYTES is the total
20532      size of instructions in the interval including INSN and not including
20533      START.  When the NBYTES is smaller than 16 bytes, it is possible
20534      that the end of START and INSN ends up in the same 16byte page.
20535 
20536      The smallest offset in the page INSN can start is the case where START
20537      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
20538      We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20539 
20540      Don't consider asm goto as jump, while it can contain a jump, it doesn't
20541      have to, control transfer to label(s) can be performed through other
20542      means, and also we estimate minimum length of all asm stmts as 0.  */
20543   for (insn = start; insn; insn = NEXT_INSN (insn))
20544     {
20545       int min_size;
20546 
20547       if (LABEL_P (insn))
20548 	{
20549 	  align_flags alignment = label_to_alignment (insn);
20550 	  int align = alignment.levels[0].log;
20551 	  int max_skip = alignment.levels[0].maxskip;
20552 
20553 	  if (max_skip > 15)
20554 	    max_skip = 15;
20555 	  /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20556 	     already in the current 16 byte page, because otherwise
20557 	     ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20558 	     bytes to reach 16 byte boundary.  */
20559 	  if (align <= 0
20560 	      || (align <= 3 && max_skip != (1 << align) - 1))
20561 	    max_skip = 0;
20562 	  if (dump_file)
20563 	    fprintf (dump_file, "Label %i with max_skip %i\n",
20564 		     INSN_UID (insn), max_skip);
20565 	  if (max_skip)
20566 	    {
20567 	      while (nbytes + max_skip >= 16)
20568 		{
20569 		  start = NEXT_INSN (start);
20570 		  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20571 		      || CALL_P (start))
20572 		    njumps--, isjump = true;
20573 		  else
20574 		    isjump = false;
20575 		  nbytes -= ix86_min_insn_size (start);
20576 		}
20577 	    }
20578 	  continue;
20579 	}
20580 
20581       min_size = ix86_min_insn_size (insn);
20582       nbytes += min_size;
20583       if (dump_file)
20584 	fprintf (dump_file, "Insn %i estimated to %i bytes\n",
20585 		 INSN_UID (insn), min_size);
20586       if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
20587 	  || CALL_P (insn))
20588 	njumps++;
20589       else
20590 	continue;
20591 
20592       while (njumps > 3)
20593 	{
20594 	  start = NEXT_INSN (start);
20595 	  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20596 	      || CALL_P (start))
20597 	    njumps--, isjump = true;
20598 	  else
20599 	    isjump = false;
20600 	  nbytes -= ix86_min_insn_size (start);
20601 	}
20602       gcc_assert (njumps >= 0);
20603       if (dump_file)
20604         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20605 		 INSN_UID (start), INSN_UID (insn), nbytes);
20606 
20607       if (njumps == 3 && isjump && nbytes < 16)
20608 	{
20609 	  int padsize = 15 - nbytes + ix86_min_insn_size (insn);
20610 
20611 	  if (dump_file)
20612 	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20613 		     INSN_UID (insn), padsize);
20614           emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
20615 	}
20616     }
20617 }
20618 #endif
20619 
20620 /* AMD Athlon works faster
20621    when RET is not destination of conditional jump or directly preceded
20622    by other jump instruction.  We avoid the penalty by inserting NOP just
20623    before the RET instructions in such cases.  */
20624 static void
ix86_pad_returns(void)20625 ix86_pad_returns (void)
20626 {
20627   edge e;
20628   edge_iterator ei;
20629 
20630   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20631     {
20632       basic_block bb = e->src;
20633       rtx_insn *ret = BB_END (bb);
20634       rtx_insn *prev;
20635       bool replace = false;
20636 
20637       if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
20638 	  || optimize_bb_for_size_p (bb))
20639 	continue;
20640       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20641 	if (active_insn_p (prev) || LABEL_P (prev))
20642 	  break;
20643       if (prev && LABEL_P (prev))
20644 	{
20645 	  edge e;
20646 	  edge_iterator ei;
20647 
20648 	  FOR_EACH_EDGE (e, ei, bb->preds)
20649 	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
20650 		&& !(e->flags & EDGE_FALLTHRU))
20651 	      {
20652 		replace = true;
20653 		break;
20654 	      }
20655 	}
20656       if (!replace)
20657 	{
20658 	  prev = prev_active_insn (ret);
20659 	  if (prev
20660 	      && ((JUMP_P (prev) && any_condjump_p (prev))
20661 		  || CALL_P (prev)))
20662 	    replace = true;
20663 	  /* Empty functions get branch mispredict even when
20664 	     the jump destination is not visible to us.  */
20665 	  if (!prev && !optimize_function_for_size_p (cfun))
20666 	    replace = true;
20667 	}
20668       if (replace)
20669 	{
20670 	  emit_jump_insn_before (gen_simple_return_internal_long (), ret);
20671 	  delete_insn (ret);
20672 	}
20673     }
20674 }
20675 
20676 /* Count the minimum number of instructions in BB.  Return 4 if the
20677    number of instructions >= 4.  */
20678 
20679 static int
ix86_count_insn_bb(basic_block bb)20680 ix86_count_insn_bb (basic_block bb)
20681 {
20682   rtx_insn *insn;
20683   int insn_count = 0;
20684 
20685   /* Count number of instructions in this block.  Return 4 if the number
20686      of instructions >= 4.  */
20687   FOR_BB_INSNS (bb, insn)
20688     {
20689       /* Only happen in exit blocks.  */
20690       if (JUMP_P (insn)
20691 	  && ANY_RETURN_P (PATTERN (insn)))
20692 	break;
20693 
20694       if (NONDEBUG_INSN_P (insn)
20695 	  && GET_CODE (PATTERN (insn)) != USE
20696 	  && GET_CODE (PATTERN (insn)) != CLOBBER)
20697 	{
20698 	  insn_count++;
20699 	  if (insn_count >= 4)
20700 	    return insn_count;
20701 	}
20702     }
20703 
20704   return insn_count;
20705 }
20706 
20707 
20708 /* Count the minimum number of instructions in code path in BB.
20709    Return 4 if the number of instructions >= 4.  */
20710 
20711 static int
ix86_count_insn(basic_block bb)20712 ix86_count_insn (basic_block bb)
20713 {
20714   edge e;
20715   edge_iterator ei;
20716   int min_prev_count;
20717 
20718   /* Only bother counting instructions along paths with no
20719      more than 2 basic blocks between entry and exit.  Given
20720      that BB has an edge to exit, determine if a predecessor
20721      of BB has an edge from entry.  If so, compute the number
20722      of instructions in the predecessor block.  If there
20723      happen to be multiple such blocks, compute the minimum.  */
20724   min_prev_count = 4;
20725   FOR_EACH_EDGE (e, ei, bb->preds)
20726     {
20727       edge prev_e;
20728       edge_iterator prev_ei;
20729 
20730       if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20731 	{
20732 	  min_prev_count = 0;
20733 	  break;
20734 	}
20735       FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
20736 	{
20737 	  if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20738 	    {
20739 	      int count = ix86_count_insn_bb (e->src);
20740 	      if (count < min_prev_count)
20741 		min_prev_count = count;
20742 	      break;
20743 	    }
20744 	}
20745     }
20746 
20747   if (min_prev_count < 4)
20748     min_prev_count += ix86_count_insn_bb (bb);
20749 
20750   return min_prev_count;
20751 }
20752 
20753 /* Pad short function to 4 instructions.   */
20754 
20755 static void
ix86_pad_short_function(void)20756 ix86_pad_short_function (void)
20757 {
20758   edge e;
20759   edge_iterator ei;
20760 
20761   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20762     {
20763       rtx_insn *ret = BB_END (e->src);
20764       if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
20765 	{
20766 	  int insn_count = ix86_count_insn (e->src);
20767 
20768 	  /* Pad short function.  */
20769 	  if (insn_count < 4)
20770 	    {
20771 	      rtx_insn *insn = ret;
20772 
20773 	      /* Find epilogue.  */
20774 	      while (insn
20775 		     && (!NOTE_P (insn)
20776 			 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
20777 		insn = PREV_INSN (insn);
20778 
20779 	      if (!insn)
20780 		insn = ret;
20781 
20782 	      /* Two NOPs count as one instruction.  */
20783 	      insn_count = 2 * (4 - insn_count);
20784 	      emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
20785 	    }
20786 	}
20787     }
20788 }
20789 
20790 /* Fix up a Windows system unwinder issue.  If an EH region falls through into
20791    the epilogue, the Windows system unwinder will apply epilogue logic and
20792    produce incorrect offsets.  This can be avoided by adding a nop between
20793    the last insn that can throw and the first insn of the epilogue.  */
20794 
20795 static void
ix86_seh_fixup_eh_fallthru(void)20796 ix86_seh_fixup_eh_fallthru (void)
20797 {
20798   edge e;
20799   edge_iterator ei;
20800 
20801   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20802     {
20803       rtx_insn *insn, *next;
20804 
20805       /* Find the beginning of the epilogue.  */
20806       for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
20807 	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
20808 	  break;
20809       if (insn == NULL)
20810 	continue;
20811 
20812       /* We only care about preceding insns that can throw.  */
20813       insn = prev_active_insn (insn);
20814       if (insn == NULL || !can_throw_internal (insn))
20815 	continue;
20816 
20817       /* Do not separate calls from their debug information.  */
20818       for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
20819 	if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
20820 	  insn = next;
20821 	else
20822 	  break;
20823 
20824       emit_insn_after (gen_nops (const1_rtx), insn);
20825     }
20826 }
20827 
20828 /* Implement machine specific optimizations.  We implement padding of returns
20829    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
20830 static void
ix86_reorg(void)20831 ix86_reorg (void)
20832 {
20833   /* We are freeing block_for_insn in the toplev to keep compatibility
20834      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
20835   compute_bb_for_insn ();
20836 
20837   if (TARGET_SEH && current_function_has_exception_handlers ())
20838     ix86_seh_fixup_eh_fallthru ();
20839 
20840   if (optimize && optimize_function_for_speed_p (cfun))
20841     {
20842       if (TARGET_PAD_SHORT_FUNCTION)
20843 	ix86_pad_short_function ();
20844       else if (TARGET_PAD_RETURNS)
20845 	ix86_pad_returns ();
20846 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20847       if (TARGET_FOUR_JUMP_LIMIT)
20848 	ix86_avoid_jump_mispredicts ();
20849 #endif
20850     }
20851 }
20852 
20853 /* Return nonzero when QImode register that must be represented via REX prefix
20854    is used.  */
20855 bool
x86_extended_QIreg_mentioned_p(rtx_insn * insn)20856 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
20857 {
20858   int i;
20859   extract_insn_cached (insn);
20860   for (i = 0; i < recog_data.n_operands; i++)
20861     if (GENERAL_REG_P (recog_data.operand[i])
20862 	&& !QI_REGNO_P (REGNO (recog_data.operand[i])))
20863        return true;
20864   return false;
20865 }
20866 
20867 /* Return true when INSN mentions register that must be encoded using REX
20868    prefix.  */
20869 bool
x86_extended_reg_mentioned_p(rtx insn)20870 x86_extended_reg_mentioned_p (rtx insn)
20871 {
20872   subrtx_iterator::array_type array;
20873   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
20874     {
20875       const_rtx x = *iter;
20876       if (REG_P (x)
20877 	  && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
20878 	return true;
20879     }
20880   return false;
20881 }
20882 
20883 /* If profitable, negate (without causing overflow) integer constant
20884    of mode MODE at location LOC.  Return true in this case.  */
20885 bool
x86_maybe_negate_const_int(rtx * loc,machine_mode mode)20886 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
20887 {
20888   HOST_WIDE_INT val;
20889 
20890   if (!CONST_INT_P (*loc))
20891     return false;
20892 
20893   switch (mode)
20894     {
20895     case E_DImode:
20896       /* DImode x86_64 constants must fit in 32 bits.  */
20897       gcc_assert (x86_64_immediate_operand (*loc, mode));
20898 
20899       mode = SImode;
20900       break;
20901 
20902     case E_SImode:
20903     case E_HImode:
20904     case E_QImode:
20905       break;
20906 
20907     default:
20908       gcc_unreachable ();
20909     }
20910 
20911   /* Avoid overflows.  */
20912   if (mode_signbit_p (mode, *loc))
20913     return false;
20914 
20915   val = INTVAL (*loc);
20916 
20917   /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
20918      Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
20919   if ((val < 0 && val != -128)
20920       || val == 128)
20921     {
20922       *loc = GEN_INT (-val);
20923       return true;
20924     }
20925 
20926   return false;
20927 }
20928 
20929 /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
20930    optabs would emit if we didn't have TFmode patterns.  */
20931 
20932 void
x86_emit_floatuns(rtx operands[2])20933 x86_emit_floatuns (rtx operands[2])
20934 {
20935   rtx_code_label *neglab, *donelab;
20936   rtx i0, i1, f0, in, out;
20937   machine_mode mode, inmode;
20938 
20939   inmode = GET_MODE (operands[1]);
20940   gcc_assert (inmode == SImode || inmode == DImode);
20941 
20942   out = operands[0];
20943   in = force_reg (inmode, operands[1]);
20944   mode = GET_MODE (out);
20945   neglab = gen_label_rtx ();
20946   donelab = gen_label_rtx ();
20947   f0 = gen_reg_rtx (mode);
20948 
20949   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20950 
20951   expand_float (out, in, 0);
20952 
20953   emit_jump_insn (gen_jump (donelab));
20954   emit_barrier ();
20955 
20956   emit_label (neglab);
20957 
20958   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20959 			    1, OPTAB_DIRECT);
20960   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20961 			    1, OPTAB_DIRECT);
20962   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20963 
20964   expand_float (f0, i0, 0);
20965 
20966   emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
20967 
20968   emit_label (donelab);
20969 }
20970 
20971 /* Target hook for scalar_mode_supported_p.  */
20972 static bool
ix86_scalar_mode_supported_p(scalar_mode mode)20973 ix86_scalar_mode_supported_p (scalar_mode mode)
20974 {
20975   if (DECIMAL_FLOAT_MODE_P (mode))
20976     return default_decimal_float_supported_p ();
20977   else if (mode == TFmode)
20978     return true;
20979   else
20980     return default_scalar_mode_supported_p (mode);
20981 }
20982 
20983 /* Implements target hook vector_mode_supported_p.  */
20984 static bool
ix86_vector_mode_supported_p(machine_mode mode)20985 ix86_vector_mode_supported_p (machine_mode mode)
20986 {
20987   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20988     return true;
20989   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20990     return true;
20991   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
20992     return true;
20993   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
20994     return true;
20995   if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
20996     return true;
20997   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20998     return true;
20999   return false;
21000 }
21001 
21002 /* Target hook for c_mode_for_suffix.  */
21003 static machine_mode
ix86_c_mode_for_suffix(char suffix)21004 ix86_c_mode_for_suffix (char suffix)
21005 {
21006   if (suffix == 'q')
21007     return TFmode;
21008   if (suffix == 'w')
21009     return XFmode;
21010 
21011   return VOIDmode;
21012 }
21013 
21014 /* Worker function for TARGET_MD_ASM_ADJUST.
21015 
21016    We implement asm flag outputs, and maintain source compatibility
21017    with the old cc0-based compiler.  */
21018 
21019 static rtx_insn *
ix86_md_asm_adjust(vec<rtx> & outputs,vec<rtx> &,vec<const char * > & constraints,vec<rtx> & clobbers,HARD_REG_SET & clobbered_regs)21020 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
21021 		    vec<const char *> &constraints,
21022 		    vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
21023 {
21024   bool saw_asm_flag = false;
21025 
21026   start_sequence ();
21027   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
21028     {
21029       const char *con = constraints[i];
21030       if (strncmp (con, "=@cc", 4) != 0)
21031 	continue;
21032       con += 4;
21033       if (strchr (con, ',') != NULL)
21034 	{
21035 	  error ("alternatives not allowed in %<asm%> flag output");
21036 	  continue;
21037 	}
21038 
21039       bool invert = false;
21040       if (con[0] == 'n')
21041 	invert = true, con++;
21042 
21043       machine_mode mode = CCmode;
21044       rtx_code code = UNKNOWN;
21045 
21046       switch (con[0])
21047 	{
21048 	case 'a':
21049 	  if (con[1] == 0)
21050 	    mode = CCAmode, code = EQ;
21051 	  else if (con[1] == 'e' && con[2] == 0)
21052 	    mode = CCCmode, code = NE;
21053 	  break;
21054 	case 'b':
21055 	  if (con[1] == 0)
21056 	    mode = CCCmode, code = EQ;
21057 	  else if (con[1] == 'e' && con[2] == 0)
21058 	    mode = CCAmode, code = NE;
21059 	  break;
21060 	case 'c':
21061 	  if (con[1] == 0)
21062 	    mode = CCCmode, code = EQ;
21063 	  break;
21064 	case 'e':
21065 	  if (con[1] == 0)
21066 	    mode = CCZmode, code = EQ;
21067 	  break;
21068 	case 'g':
21069 	  if (con[1] == 0)
21070 	    mode = CCGCmode, code = GT;
21071 	  else if (con[1] == 'e' && con[2] == 0)
21072 	    mode = CCGCmode, code = GE;
21073 	  break;
21074 	case 'l':
21075 	  if (con[1] == 0)
21076 	    mode = CCGCmode, code = LT;
21077 	  else if (con[1] == 'e' && con[2] == 0)
21078 	    mode = CCGCmode, code = LE;
21079 	  break;
21080 	case 'o':
21081 	  if (con[1] == 0)
21082 	    mode = CCOmode, code = EQ;
21083 	  break;
21084 	case 'p':
21085 	  if (con[1] == 0)
21086 	    mode = CCPmode, code = EQ;
21087 	  break;
21088 	case 's':
21089 	  if (con[1] == 0)
21090 	    mode = CCSmode, code = EQ;
21091 	  break;
21092 	case 'z':
21093 	  if (con[1] == 0)
21094 	    mode = CCZmode, code = EQ;
21095 	  break;
21096 	}
21097       if (code == UNKNOWN)
21098 	{
21099 	  error ("unknown %<asm%> flag output %qs", constraints[i]);
21100 	  continue;
21101 	}
21102       if (invert)
21103 	code = reverse_condition (code);
21104 
21105       rtx dest = outputs[i];
21106       if (!saw_asm_flag)
21107 	{
21108 	  /* This is the first asm flag output.  Here we put the flags
21109 	     register in as the real output and adjust the condition to
21110 	     allow it.  */
21111 	  constraints[i] = "=Bf";
21112 	  outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
21113 	  saw_asm_flag = true;
21114 	}
21115       else
21116 	{
21117 	  /* We don't need the flags register as output twice.  */
21118 	  constraints[i] = "=X";
21119 	  outputs[i] = gen_rtx_SCRATCH (SImode);
21120 	}
21121 
21122       rtx x = gen_rtx_REG (mode, FLAGS_REG);
21123       x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
21124 
21125       machine_mode dest_mode = GET_MODE (dest);
21126       if (!SCALAR_INT_MODE_P (dest_mode))
21127 	{
21128 	  error ("invalid type for %<asm%> flag output");
21129 	  continue;
21130 	}
21131 
21132       if (dest_mode == QImode)
21133 	emit_insn (gen_rtx_SET (dest, x));
21134       else
21135 	{
21136 	  rtx reg = gen_reg_rtx (QImode);
21137 	  emit_insn (gen_rtx_SET (reg, x));
21138 
21139 	  reg = convert_to_mode (dest_mode, reg, 1);
21140 	  emit_move_insn (dest, reg);
21141 	}
21142     }
21143 
21144   rtx_insn *seq = get_insns ();
21145   end_sequence ();
21146 
21147   if (saw_asm_flag)
21148     return seq;
21149   else
21150     {
21151       /* If we had no asm flag outputs, clobber the flags.  */
21152       clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
21153       SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
21154       return NULL;
21155     }
21156 }
21157 
21158 /* Implements target vector targetm.asm.encode_section_info.  */
21159 
21160 static void ATTRIBUTE_UNUSED
ix86_encode_section_info(tree decl,rtx rtl,int first)21161 ix86_encode_section_info (tree decl, rtx rtl, int first)
21162 {
21163   default_encode_section_info (decl, rtl, first);
21164 
21165   if (ix86_in_large_data_p (decl))
21166     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21167 }
21168 
21169 /* Worker function for REVERSE_CONDITION.  */
21170 
21171 enum rtx_code
ix86_reverse_condition(enum rtx_code code,machine_mode mode)21172 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
21173 {
21174   return (mode == CCFPmode
21175 	  ? reverse_condition_maybe_unordered (code)
21176 	  : reverse_condition (code));
21177 }
21178 
21179 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21180    to OPERANDS[0].  */
21181 
21182 const char *
output_387_reg_move(rtx_insn * insn,rtx * operands)21183 output_387_reg_move (rtx_insn *insn, rtx *operands)
21184 {
21185   if (REG_P (operands[0]))
21186     {
21187       if (REG_P (operands[1])
21188 	  && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21189 	{
21190 	  if (REGNO (operands[0]) == FIRST_STACK_REG)
21191 	    return output_387_ffreep (operands, 0);
21192 	  return "fstp\t%y0";
21193 	}
21194       if (STACK_TOP_P (operands[0]))
21195 	return "fld%Z1\t%y1";
21196       return "fst\t%y0";
21197     }
21198   else if (MEM_P (operands[0]))
21199     {
21200       gcc_assert (REG_P (operands[1]));
21201       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21202 	return "fstp%Z0\t%y0";
21203       else
21204 	{
21205 	  /* There is no non-popping store to memory for XFmode.
21206 	     So if we need one, follow the store with a load.  */
21207 	  if (GET_MODE (operands[0]) == XFmode)
21208 	    return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
21209 	  else
21210 	    return "fst%Z0\t%y0";
21211 	}
21212     }
21213   else
21214     gcc_unreachable();
21215 }
21216 #ifdef TARGET_SOLARIS
21217 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
21218 
21219 static void
i386_solaris_elf_named_section(const char * name,unsigned int flags,tree decl)21220 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21221 				tree decl)
21222 {
21223   /* With Binutils 2.15, the "@unwind" marker must be specified on
21224      every occurrence of the ".eh_frame" section, not just the first
21225      one.  */
21226   if (TARGET_64BIT
21227       && strcmp (name, ".eh_frame") == 0)
21228     {
21229       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21230 	       flags & SECTION_WRITE ? "aw" : "a");
21231       return;
21232     }
21233 
21234 #ifndef USE_GAS
21235   if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
21236     {
21237       solaris_elf_asm_comdat_section (name, flags, decl);
21238       return;
21239     }
21240 
21241   /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
21242      SPARC assembler.  One cannot mix single-letter flags and #exclude, so
21243      only emit the latter here.  */
21244   if (flags & SECTION_EXCLUDE)
21245     {
21246       fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
21247       return;
21248     }
21249 #endif
21250 
21251   default_elf_asm_named_section (name, flags, decl);
21252 }
21253 #endif /* TARGET_SOLARIS */
21254 
21255 /* Return the mangling of TYPE if it is an extended fundamental type.  */
21256 
21257 static const char *
ix86_mangle_type(const_tree type)21258 ix86_mangle_type (const_tree type)
21259 {
21260   type = TYPE_MAIN_VARIANT (type);
21261 
21262   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
21263       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
21264     return NULL;
21265 
21266   switch (TYPE_MODE (type))
21267     {
21268     case E_TFmode:
21269       /* __float128 is "g".  */
21270       return "g";
21271     case E_XFmode:
21272       /* "long double" or __float80 is "e".  */
21273       return "e";
21274     default:
21275       return NULL;
21276     }
21277 }
21278 
21279 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
21280 
21281 static tree
ix86_stack_protect_guard(void)21282 ix86_stack_protect_guard (void)
21283 {
21284   if (TARGET_SSP_TLS_GUARD)
21285     {
21286       tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
21287       int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
21288       tree type = build_qualified_type (type_node, qual);
21289       tree t;
21290 
21291       if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
21292 	{
21293 	  t = ix86_tls_stack_chk_guard_decl;
21294 
21295 	  if (t == NULL)
21296 	    {
21297 	      rtx x;
21298 
21299 	      t = build_decl
21300 		(UNKNOWN_LOCATION, VAR_DECL,
21301 		 get_identifier (ix86_stack_protector_guard_symbol_str),
21302 		 type);
21303 	      TREE_STATIC (t) = 1;
21304 	      TREE_PUBLIC (t) = 1;
21305 	      DECL_EXTERNAL (t) = 1;
21306 	      TREE_USED (t) = 1;
21307 	      TREE_THIS_VOLATILE (t) = 1;
21308 	      DECL_ARTIFICIAL (t) = 1;
21309 	      DECL_IGNORED_P (t) = 1;
21310 
21311 	      /* Do not share RTL as the declaration is visible outside of
21312 		 current function.  */
21313 	      x = DECL_RTL (t);
21314 	      RTX_FLAG (x, used) = 1;
21315 
21316 	      ix86_tls_stack_chk_guard_decl = t;
21317 	    }
21318 	}
21319       else
21320 	{
21321 	  tree asptrtype = build_pointer_type (type);
21322 
21323 	  t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
21324 	  t = build2 (MEM_REF, asptrtype, t,
21325 		      build_int_cst (asptrtype, 0));
21326 	  TREE_THIS_VOLATILE (t) = 1;
21327 	}
21328 
21329       return t;
21330     }
21331 
21332   return default_stack_protect_guard ();
21333 }
21334 
21335 /* For 32-bit code we can save PIC register setup by using
21336    __stack_chk_fail_local hidden function instead of calling
21337    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
21338    register, so it is better to call __stack_chk_fail directly.  */
21339 
21340 static tree ATTRIBUTE_UNUSED
ix86_stack_protect_fail(void)21341 ix86_stack_protect_fail (void)
21342 {
21343   return TARGET_64BIT
21344 	 ? default_external_stack_protect_fail ()
21345 	 : default_hidden_stack_protect_fail ();
21346 }
21347 
21348 /* Select a format to encode pointers in exception handling data.  CODE
21349    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
21350    true if the symbol may be affected by dynamic relocations.
21351 
21352    ??? All x86 object file formats are capable of representing this.
21353    After all, the relocation needed is the same as for the call insn.
21354    Whether or not a particular assembler allows us to enter such, I
21355    guess we'll have to see.  */
21356 
21357 int
asm_preferred_eh_data_format(int code,int global)21358 asm_preferred_eh_data_format (int code, int global)
21359 {
21360   /* PE-COFF is effectively always -fPIC because of the .reloc section.  */
21361   if (flag_pic || TARGET_PECOFF)
21362     {
21363       int type = DW_EH_PE_sdata8;
21364       if (!TARGET_64BIT
21365 	  || ix86_cmodel == CM_SMALL_PIC
21366 	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21367 	type = DW_EH_PE_sdata4;
21368       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21369     }
21370 
21371   if (ix86_cmodel == CM_SMALL
21372       || (ix86_cmodel == CM_MEDIUM && code))
21373     return DW_EH_PE_udata4;
21374 
21375   return DW_EH_PE_absptr;
21376 }
21377 
21378 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
21379 static int
ix86_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int)21380 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
21381                                  tree vectype, int)
21382 {
21383   bool fp = false;
21384   machine_mode mode = TImode;
21385   int index;
21386   if (vectype != NULL)
21387     {
21388       fp = FLOAT_TYPE_P (vectype);
21389       mode = TYPE_MODE (vectype);
21390     }
21391 
21392   switch (type_of_cost)
21393     {
21394       case scalar_stmt:
21395         return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
21396 
21397       case scalar_load:
21398 	/* load/store costs are relative to register move which is 2. Recompute
21399  	   it to COSTS_N_INSNS so everything have same base.  */
21400         return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
21401 			      : ix86_cost->int_load [2]) / 2;
21402 
21403       case scalar_store:
21404         return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
21405 			      : ix86_cost->int_store [2]) / 2;
21406 
21407       case vector_stmt:
21408         return ix86_vec_cost (mode,
21409 			      fp ? ix86_cost->addss : ix86_cost->sse_op);
21410 
21411       case vector_load:
21412 	index = sse_store_index (mode);
21413 	/* See PR82713 - we may end up being called on non-vector type.  */
21414 	if (index < 0)
21415 	  index = 2;
21416         return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
21417 
21418       case vector_store:
21419 	index = sse_store_index (mode);
21420 	/* See PR82713 - we may end up being called on non-vector type.  */
21421 	if (index < 0)
21422 	  index = 2;
21423         return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
21424 
21425       case vec_to_scalar:
21426       case scalar_to_vec:
21427         return ix86_vec_cost (mode, ix86_cost->sse_op);
21428 
21429       /* We should have separate costs for unaligned loads and gather/scatter.
21430 	 Do that incrementally.  */
21431       case unaligned_load:
21432 	index = sse_store_index (mode);
21433 	/* See PR82713 - we may end up being called on non-vector type.  */
21434 	if (index < 0)
21435 	  index = 2;
21436         return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
21437 
21438       case unaligned_store:
21439 	index = sse_store_index (mode);
21440 	/* See PR82713 - we may end up being called on non-vector type.  */
21441 	if (index < 0)
21442 	  index = 2;
21443         return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
21444 
21445       case vector_gather_load:
21446         return ix86_vec_cost (mode,
21447 			      COSTS_N_INSNS
21448 				 (ix86_cost->gather_static
21449 				  + ix86_cost->gather_per_elt
21450 				    * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21451 
21452       case vector_scatter_store:
21453         return ix86_vec_cost (mode,
21454 			      COSTS_N_INSNS
21455 				 (ix86_cost->scatter_static
21456 				  + ix86_cost->scatter_per_elt
21457 				    * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21458 
21459       case cond_branch_taken:
21460         return ix86_cost->cond_taken_branch_cost;
21461 
21462       case cond_branch_not_taken:
21463         return ix86_cost->cond_not_taken_branch_cost;
21464 
21465       case vec_perm:
21466       case vec_promote_demote:
21467         return ix86_vec_cost (mode, ix86_cost->sse_op);
21468 
21469       case vec_construct:
21470 	{
21471 	  /* N element inserts into SSE vectors.  */
21472 	  int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
21473 	  /* One vinserti128 for combining two SSE vectors for AVX256.  */
21474 	  if (GET_MODE_BITSIZE (mode) == 256)
21475 	    cost += ix86_vec_cost (mode, ix86_cost->addss);
21476 	  /* One vinserti64x4 and two vinserti128 for combining SSE
21477 	     and AVX256 vectors to AVX512.  */
21478 	  else if (GET_MODE_BITSIZE (mode) == 512)
21479 	    cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
21480 	  return cost;
21481 	}
21482 
21483       default:
21484         gcc_unreachable ();
21485     }
21486 }
21487 
21488 
21489 /* This function returns the calling abi specific va_list type node.
21490    It returns  the FNDECL specific va_list type.  */
21491 
21492 static tree
ix86_fn_abi_va_list(tree fndecl)21493 ix86_fn_abi_va_list (tree fndecl)
21494 {
21495   if (!TARGET_64BIT)
21496     return va_list_type_node;
21497   gcc_assert (fndecl != NULL_TREE);
21498 
21499   if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
21500     return ms_va_list_type_node;
21501   else
21502     return sysv_va_list_type_node;
21503 }
21504 
21505 /* Returns the canonical va_list type specified by TYPE. If there
21506    is no valid TYPE provided, it return NULL_TREE.  */
21507 
21508 static tree
ix86_canonical_va_list_type(tree type)21509 ix86_canonical_va_list_type (tree type)
21510 {
21511   if (TARGET_64BIT)
21512     {
21513       if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
21514 	return ms_va_list_type_node;
21515 
21516       if ((TREE_CODE (type) == ARRAY_TYPE
21517 	   && integer_zerop (array_type_nelts (type)))
21518 	  || POINTER_TYPE_P (type))
21519 	{
21520 	  tree elem_type = TREE_TYPE (type);
21521 	  if (TREE_CODE (elem_type) == RECORD_TYPE
21522 	      && lookup_attribute ("sysv_abi va_list",
21523 				   TYPE_ATTRIBUTES (elem_type)))
21524 	    return sysv_va_list_type_node;
21525 	}
21526 
21527       return NULL_TREE;
21528     }
21529 
21530   return std_canonical_va_list_type (type);
21531 }
21532 
21533 /* Iterate through the target-specific builtin types for va_list.
21534    IDX denotes the iterator, *PTREE is set to the result type of
21535    the va_list builtin, and *PNAME to its internal type.
21536    Returns zero if there is no element for this index, otherwise
21537    IDX should be increased upon the next call.
21538    Note, do not iterate a base builtin's name like __builtin_va_list.
21539    Used from c_common_nodes_and_builtins.  */
21540 
21541 static int
ix86_enum_va_list(int idx,const char ** pname,tree * ptree)21542 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
21543 {
21544   if (TARGET_64BIT)
21545     {
21546       switch (idx)
21547 	{
21548 	default:
21549 	  break;
21550 
21551 	case 0:
21552 	  *ptree = ms_va_list_type_node;
21553 	  *pname = "__builtin_ms_va_list";
21554 	  return 1;
21555 
21556 	case 1:
21557 	  *ptree = sysv_va_list_type_node;
21558 	  *pname = "__builtin_sysv_va_list";
21559 	  return 1;
21560 	}
21561     }
21562 
21563   return 0;
21564 }
21565 
21566 #undef TARGET_SCHED_DISPATCH
21567 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21568 #undef TARGET_SCHED_DISPATCH_DO
21569 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21570 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21571 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21572 #undef TARGET_SCHED_REORDER
21573 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21574 #undef TARGET_SCHED_ADJUST_PRIORITY
21575 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21576 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21577 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21578   ix86_dependencies_evaluation_hook
21579 
21580 
21581 /* Implementation of reassociation_width target hook used by
21582    reassoc phase to identify parallelism level in reassociated
21583    tree.  Statements tree_code is passed in OPC.  Arguments type
21584    is passed in MODE.  */
21585 
21586 static int
ix86_reassociation_width(unsigned int op,machine_mode mode)21587 ix86_reassociation_width (unsigned int op, machine_mode mode)
21588 {
21589   int width = 1;
21590   /* Vector part.  */
21591   if (VECTOR_MODE_P (mode))
21592     {
21593       int div = 1;
21594       if (INTEGRAL_MODE_P (mode))
21595 	width = ix86_cost->reassoc_vec_int;
21596       else if (FLOAT_MODE_P (mode))
21597 	width = ix86_cost->reassoc_vec_fp;
21598 
21599       if (width == 1)
21600 	return 1;
21601 
21602       /* Integer vector instructions execute in FP unit
21603 	 and can execute 3 additions and one multiplication per cycle.  */
21604       if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
21605 	   || ix86_tune == PROCESSOR_ZNVER3)
21606    	  && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
21607 	return 1;
21608 
21609       /* Account for targets that splits wide vectors into multiple parts.  */
21610       if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
21611 	div = GET_MODE_BITSIZE (mode) / 128;
21612       else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
21613 	div = GET_MODE_BITSIZE (mode) / 64;
21614       width = (width + div - 1) / div;
21615     }
21616   /* Scalar part.  */
21617   else if (INTEGRAL_MODE_P (mode))
21618     width = ix86_cost->reassoc_int;
21619   else if (FLOAT_MODE_P (mode))
21620     width = ix86_cost->reassoc_fp;
21621 
21622   /* Avoid using too many registers in 32bit mode.  */
21623   if (!TARGET_64BIT && width > 2)
21624     width = 2;
21625   return width;
21626 }
21627 
21628 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
21629    place emms and femms instructions.  */
21630 
21631 static machine_mode
ix86_preferred_simd_mode(scalar_mode mode)21632 ix86_preferred_simd_mode (scalar_mode mode)
21633 {
21634   if (!TARGET_SSE)
21635     return word_mode;
21636 
21637   switch (mode)
21638     {
21639     case E_QImode:
21640       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21641 	return V64QImode;
21642       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21643 	return V32QImode;
21644       else
21645 	return V16QImode;
21646 
21647     case E_HImode:
21648       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21649 	return V32HImode;
21650       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21651 	return V16HImode;
21652       else
21653 	return V8HImode;
21654 
21655     case E_SImode:
21656       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21657 	return V16SImode;
21658       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21659 	return V8SImode;
21660       else
21661 	return V4SImode;
21662 
21663     case E_DImode:
21664       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21665 	return V8DImode;
21666       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21667 	return V4DImode;
21668       else
21669 	return V2DImode;
21670 
21671     case E_SFmode:
21672       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21673 	return V16SFmode;
21674       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21675 	return V8SFmode;
21676       else
21677 	return V4SFmode;
21678 
21679     case E_DFmode:
21680       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21681 	return V8DFmode;
21682       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21683 	return V4DFmode;
21684       else if (TARGET_SSE2)
21685 	return V2DFmode;
21686       /* FALLTHRU */
21687 
21688     default:
21689       return word_mode;
21690     }
21691 }
21692 
21693 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
21694    vectors.  If AVX512F is enabled then try vectorizing with 512bit,
21695    256bit and 128bit vectors.  */
21696 
21697 static unsigned int
ix86_autovectorize_vector_modes(vector_modes * modes,bool all)21698 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
21699 {
21700   if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21701     {
21702       modes->safe_push (V64QImode);
21703       modes->safe_push (V32QImode);
21704       modes->safe_push (V16QImode);
21705     }
21706   else if (TARGET_AVX512F && all)
21707     {
21708       modes->safe_push (V32QImode);
21709       modes->safe_push (V16QImode);
21710       modes->safe_push (V64QImode);
21711     }
21712   else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21713     {
21714       modes->safe_push (V32QImode);
21715       modes->safe_push (V16QImode);
21716     }
21717   else if (TARGET_AVX && all)
21718     {
21719       modes->safe_push (V16QImode);
21720       modes->safe_push (V32QImode);
21721     }
21722   else if (TARGET_MMX_WITH_SSE)
21723     modes->safe_push (V16QImode);
21724 
21725   if (TARGET_MMX_WITH_SSE)
21726     modes->safe_push (V8QImode);
21727 
21728   return 0;
21729 }
21730 
21731 /* Implemenation of targetm.vectorize.get_mask_mode.  */
21732 
21733 static opt_machine_mode
ix86_get_mask_mode(machine_mode data_mode)21734 ix86_get_mask_mode (machine_mode data_mode)
21735 {
21736   unsigned vector_size = GET_MODE_SIZE (data_mode);
21737   unsigned nunits = GET_MODE_NUNITS (data_mode);
21738   unsigned elem_size = vector_size / nunits;
21739 
21740   /* Scalar mask case.  */
21741   if ((TARGET_AVX512F && vector_size == 64)
21742       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
21743     {
21744       if (elem_size == 4
21745 	  || elem_size == 8
21746 	  || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
21747 	return smallest_int_mode_for_size (nunits);
21748     }
21749 
21750   scalar_int_mode elem_mode
21751     = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
21752 
21753   gcc_assert (elem_size * nunits == vector_size);
21754 
21755   return mode_for_vector (elem_mode, nunits);
21756 }
21757 
21758 
21759 
21760 /* Return class of registers which could be used for pseudo of MODE
21761    and of class RCLASS for spilling instead of memory.  Return NO_REGS
21762    if it is not possible or non-profitable.  */
21763 
21764 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
21765 
21766 static reg_class_t
ix86_spill_class(reg_class_t rclass,machine_mode mode)21767 ix86_spill_class (reg_class_t rclass, machine_mode mode)
21768 {
21769   if (0 && TARGET_GENERAL_REGS_SSE_SPILL
21770       && TARGET_SSE2
21771       && TARGET_INTER_UNIT_MOVES_TO_VEC
21772       && TARGET_INTER_UNIT_MOVES_FROM_VEC
21773       && (mode == SImode || (TARGET_64BIT && mode == DImode))
21774       && INTEGER_CLASS_P (rclass))
21775     return ALL_SSE_REGS;
21776   return NO_REGS;
21777 }
21778 
21779 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  Like the default implementation,
21780    but returns a lower bound.  */
21781 
21782 static unsigned int
ix86_max_noce_ifcvt_seq_cost(edge e)21783 ix86_max_noce_ifcvt_seq_cost (edge e)
21784 {
21785   bool predictable_p = predictable_edge_p (e);
21786   if (predictable_p)
21787     {
21788       if (global_options_set.x_param_max_rtl_if_conversion_predictable_cost)
21789 	return param_max_rtl_if_conversion_predictable_cost;
21790     }
21791   else
21792     {
21793       if (global_options_set.x_param_max_rtl_if_conversion_unpredictable_cost)
21794 	return param_max_rtl_if_conversion_unpredictable_cost;
21795     }
21796 
21797   return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
21798 }
21799 
21800 /* Return true if SEQ is a good candidate as a replacement for the
21801    if-convertible sequence described in IF_INFO.  */
21802 
21803 static bool
ix86_noce_conversion_profitable_p(rtx_insn * seq,struct noce_if_info * if_info)21804 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
21805 {
21806   if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
21807     {
21808       int cmov_cnt = 0;
21809       /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
21810 	 Maybe we should allow even more conditional moves as long as they
21811 	 are used far enough not to stall the CPU, or also consider
21812 	 IF_INFO->TEST_BB succ edge probabilities.  */
21813       for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
21814 	{
21815 	  rtx set = single_set (insn);
21816 	  if (!set)
21817 	    continue;
21818 	  if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
21819 	    continue;
21820 	  rtx src = SET_SRC (set);
21821 	  machine_mode mode = GET_MODE (src);
21822 	  if (GET_MODE_CLASS (mode) != MODE_INT
21823 	      && GET_MODE_CLASS (mode) != MODE_FLOAT)
21824 	    continue;
21825 	  if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
21826 	      || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
21827 	    continue;
21828 	  /* insn is CMOV or FCMOV.  */
21829 	  if (++cmov_cnt > 1)
21830 	    return false;
21831 	}
21832     }
21833   return default_noce_conversion_profitable_p (seq, if_info);
21834 }
21835 
21836 /* Implement targetm.vectorize.init_cost.  */
21837 
21838 static void *
ix86_init_cost(class loop *)21839 ix86_init_cost (class loop *)
21840 {
21841   unsigned *cost = XNEWVEC (unsigned, 3);
21842   cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
21843   return cost;
21844 }
21845 
21846 /* Implement targetm.vectorize.add_stmt_cost.  */
21847 
21848 static unsigned
ix86_add_stmt_cost(void * data,int count,enum vect_cost_for_stmt kind,class _stmt_vec_info * stmt_info,int misalign,enum vect_cost_model_location where)21849 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
21850 		    class _stmt_vec_info *stmt_info, int misalign,
21851 		    enum vect_cost_model_location where)
21852 {
21853   unsigned *cost = (unsigned *) data;
21854   unsigned retval = 0;
21855   bool scalar_p
21856     = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
21857 
21858   tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
21859   int stmt_cost = - 1;
21860 
21861   bool fp = false;
21862   machine_mode mode = scalar_p ? SImode : TImode;
21863 
21864   if (vectype != NULL)
21865     {
21866       fp = FLOAT_TYPE_P (vectype);
21867       mode = TYPE_MODE (vectype);
21868       if (scalar_p)
21869 	mode = TYPE_MODE (TREE_TYPE (vectype));
21870     }
21871 
21872   if ((kind == vector_stmt || kind == scalar_stmt)
21873       && stmt_info
21874       && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
21875     {
21876       tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
21877       /*machine_mode inner_mode = mode;
21878       if (VECTOR_MODE_P (mode))
21879 	inner_mode = GET_MODE_INNER (mode);*/
21880 
21881       switch (subcode)
21882 	{
21883 	case PLUS_EXPR:
21884 	case POINTER_PLUS_EXPR:
21885 	case MINUS_EXPR:
21886 	  if (kind == scalar_stmt)
21887 	    {
21888 	      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21889 		stmt_cost = ix86_cost->addss;
21890 	      else if (X87_FLOAT_MODE_P (mode))
21891 		stmt_cost = ix86_cost->fadd;
21892 	      else
21893 	        stmt_cost = ix86_cost->add;
21894 	    }
21895 	  else
21896 	    stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
21897 				       : ix86_cost->sse_op);
21898 	  break;
21899 
21900 	case MULT_EXPR:
21901 	case WIDEN_MULT_EXPR:
21902 	case MULT_HIGHPART_EXPR:
21903 	  stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
21904 	  break;
21905 	case NEGATE_EXPR:
21906 	  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21907 	    stmt_cost = ix86_cost->sse_op;
21908 	  else if (X87_FLOAT_MODE_P (mode))
21909 	    stmt_cost = ix86_cost->fchs;
21910 	  else if (VECTOR_MODE_P (mode))
21911 	    stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21912 	  else
21913 	    stmt_cost = ix86_cost->add;
21914 	  break;
21915 	case TRUNC_DIV_EXPR:
21916 	case CEIL_DIV_EXPR:
21917 	case FLOOR_DIV_EXPR:
21918 	case ROUND_DIV_EXPR:
21919 	case TRUNC_MOD_EXPR:
21920 	case CEIL_MOD_EXPR:
21921 	case FLOOR_MOD_EXPR:
21922 	case RDIV_EXPR:
21923 	case ROUND_MOD_EXPR:
21924 	case EXACT_DIV_EXPR:
21925 	  stmt_cost = ix86_division_cost (ix86_cost, mode);
21926 	  break;
21927 
21928 	case RSHIFT_EXPR:
21929 	case LSHIFT_EXPR:
21930 	case LROTATE_EXPR:
21931 	case RROTATE_EXPR:
21932 	  {
21933 	    tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
21934 	    stmt_cost = ix86_shift_rotate_cost
21935 			   (ix86_cost, mode,
21936 		            TREE_CODE (op2) == INTEGER_CST,
21937 			    cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
21938 		            true, false, false, NULL, NULL);
21939 	  }
21940 	  break;
21941 	case NOP_EXPR:
21942 	  /* Only sign-conversions are free.  */
21943 	  if (tree_nop_conversion_p
21944 	        (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
21945 		 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
21946 	    stmt_cost = 0;
21947 	  break;
21948 
21949 	case BIT_IOR_EXPR:
21950 	case ABS_EXPR:
21951 	case ABSU_EXPR:
21952 	case MIN_EXPR:
21953 	case MAX_EXPR:
21954 	case BIT_XOR_EXPR:
21955 	case BIT_AND_EXPR:
21956 	case BIT_NOT_EXPR:
21957 	  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21958 	    stmt_cost = ix86_cost->sse_op;
21959 	  else if (VECTOR_MODE_P (mode))
21960 	    stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21961 	  else
21962 	    stmt_cost = ix86_cost->add;
21963 	  break;
21964 	default:
21965 	  break;
21966 	}
21967     }
21968 
21969   combined_fn cfn;
21970   if ((kind == vector_stmt || kind == scalar_stmt)
21971       && stmt_info
21972       && stmt_info->stmt
21973       && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
21974     switch (cfn)
21975       {
21976       case CFN_FMA:
21977 	stmt_cost = ix86_vec_cost (mode,
21978 				   mode == SFmode ? ix86_cost->fmass
21979 				   : ix86_cost->fmasd);
21980 	break;
21981       default:
21982 	break;
21983       }
21984 
21985   /* If we do elementwise loads into a vector then we are bound by
21986      latency and execution resources for the many scalar loads
21987      (AGU and load ports).  Try to account for this by scaling the
21988      construction cost by the number of elements involved.  */
21989   if ((kind == vec_construct || kind == vec_to_scalar)
21990       && stmt_info
21991       && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
21992 	  || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
21993       && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
21994       && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
21995     {
21996       stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
21997       stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
21998     }
21999   if (stmt_cost == -1)
22000     stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
22001 
22002   /* Penalize DFmode vector operations for Bonnell.  */
22003   if (TARGET_BONNELL && kind == vector_stmt
22004       && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
22005     stmt_cost *= 5;  /* FIXME: The value here is arbitrary.  */
22006 
22007   /* Statements in an inner loop relative to the loop being
22008      vectorized are weighted more heavily.  The value here is
22009      arbitrary and could potentially be improved with analysis.  */
22010   if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
22011     count *= 50;  /* FIXME.  */
22012 
22013   retval = (unsigned) (count * stmt_cost);
22014 
22015   /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
22016      for Silvermont as it has out of order integer pipeline and can execute
22017      2 scalar instruction per tick, but has in order SIMD pipeline.  */
22018   if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
22019        || TARGET_TREMONT || TARGET_INTEL) && stmt_info && stmt_info->stmt)
22020     {
22021       tree lhs_op = gimple_get_lhs (stmt_info->stmt);
22022       if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
22023 	retval = (retval * 17) / 10;
22024     }
22025 
22026   cost[where] += retval;
22027 
22028   return retval;
22029 }
22030 
22031 /* Implement targetm.vectorize.finish_cost.  */
22032 
22033 static void
ix86_finish_cost(void * data,unsigned * prologue_cost,unsigned * body_cost,unsigned * epilogue_cost)22034 ix86_finish_cost (void *data, unsigned *prologue_cost,
22035 		  unsigned *body_cost, unsigned *epilogue_cost)
22036 {
22037   unsigned *cost = (unsigned *) data;
22038   *prologue_cost = cost[vect_prologue];
22039   *body_cost     = cost[vect_body];
22040   *epilogue_cost = cost[vect_epilogue];
22041 }
22042 
22043 /* Implement targetm.vectorize.destroy_cost_data.  */
22044 
22045 static void
ix86_destroy_cost_data(void * data)22046 ix86_destroy_cost_data (void *data)
22047 {
22048   free (data);
22049 }
22050 
22051 /* Validate target specific memory model bits in VAL. */
22052 
22053 static unsigned HOST_WIDE_INT
ix86_memmodel_check(unsigned HOST_WIDE_INT val)22054 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
22055 {
22056   enum memmodel model = memmodel_from_int (val);
22057   bool strong;
22058 
22059   if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
22060 				      |MEMMODEL_MASK)
22061       || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
22062     {
22063       warning (OPT_Winvalid_memory_model,
22064 	       "unknown architecture specific memory model");
22065       return MEMMODEL_SEQ_CST;
22066     }
22067   strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
22068   if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
22069     {
22070       warning (OPT_Winvalid_memory_model,
22071 	      "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
22072 	       "memory model");
22073       return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
22074     }
22075   if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
22076     {
22077       warning (OPT_Winvalid_memory_model,
22078 	      "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
22079 	       "memory model");
22080       return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
22081     }
22082   return val;
22083 }
22084 
22085 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
22086    CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
22087    CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
22088    or number of vecsize_mangle variants that should be emitted.  */
22089 
22090 static int
ix86_simd_clone_compute_vecsize_and_simdlen(struct cgraph_node * node,struct cgraph_simd_clone * clonei,tree base_type,int num)22091 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
22092 					     struct cgraph_simd_clone *clonei,
22093 					     tree base_type, int num)
22094 {
22095   int ret = 1;
22096 
22097   if (clonei->simdlen
22098       && (clonei->simdlen < 2
22099 	  || clonei->simdlen > 1024
22100 	  || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
22101     {
22102       warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22103 		  "unsupported simdlen %d", clonei->simdlen);
22104       return 0;
22105     }
22106 
22107   tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
22108   if (TREE_CODE (ret_type) != VOID_TYPE)
22109     switch (TYPE_MODE (ret_type))
22110       {
22111       case E_QImode:
22112       case E_HImode:
22113       case E_SImode:
22114       case E_DImode:
22115       case E_SFmode:
22116       case E_DFmode:
22117       /* case E_SCmode: */
22118       /* case E_DCmode: */
22119 	if (!AGGREGATE_TYPE_P (ret_type))
22120 	  break;
22121 	/* FALLTHRU */
22122       default:
22123 	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22124 		    "unsupported return type %qT for simd", ret_type);
22125 	return 0;
22126       }
22127 
22128   tree t;
22129   int i;
22130   tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
22131   bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
22132 
22133   for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
22134        t && t != void_list_node; t = TREE_CHAIN (t), i++)
22135     {
22136       tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
22137       switch (TYPE_MODE (arg_type))
22138 	{
22139 	case E_QImode:
22140 	case E_HImode:
22141 	case E_SImode:
22142 	case E_DImode:
22143 	case E_SFmode:
22144 	case E_DFmode:
22145 	/* case E_SCmode: */
22146 	/* case E_DCmode: */
22147 	  if (!AGGREGATE_TYPE_P (arg_type))
22148 	    break;
22149 	  /* FALLTHRU */
22150 	default:
22151 	  if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
22152 	    break;
22153 	  warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22154 		      "unsupported argument type %qT for simd", arg_type);
22155 	  return 0;
22156 	}
22157     }
22158 
22159   if (!TREE_PUBLIC (node->decl))
22160     {
22161       /* If the function isn't exported, we can pick up just one ISA
22162 	 for the clones.  */
22163       if (TARGET_AVX512F)
22164 	clonei->vecsize_mangle = 'e';
22165       else if (TARGET_AVX2)
22166 	clonei->vecsize_mangle = 'd';
22167       else if (TARGET_AVX)
22168 	clonei->vecsize_mangle = 'c';
22169       else
22170 	clonei->vecsize_mangle = 'b';
22171       ret = 1;
22172     }
22173   else
22174     {
22175       clonei->vecsize_mangle = "bcde"[num];
22176       ret = 4;
22177     }
22178   clonei->mask_mode = VOIDmode;
22179   switch (clonei->vecsize_mangle)
22180     {
22181     case 'b':
22182       clonei->vecsize_int = 128;
22183       clonei->vecsize_float = 128;
22184       break;
22185     case 'c':
22186       clonei->vecsize_int = 128;
22187       clonei->vecsize_float = 256;
22188       break;
22189     case 'd':
22190       clonei->vecsize_int = 256;
22191       clonei->vecsize_float = 256;
22192       break;
22193     case 'e':
22194       clonei->vecsize_int = 512;
22195       clonei->vecsize_float = 512;
22196       if (TYPE_MODE (base_type) == QImode)
22197 	clonei->mask_mode = DImode;
22198       else
22199 	clonei->mask_mode = SImode;
22200       break;
22201     }
22202   if (clonei->simdlen == 0)
22203     {
22204       if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
22205 	clonei->simdlen = clonei->vecsize_int;
22206       else
22207 	clonei->simdlen = clonei->vecsize_float;
22208       clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
22209     }
22210   else if (clonei->simdlen > 16)
22211     {
22212       /* For compatibility with ICC, use the same upper bounds
22213 	 for simdlen.  In particular, for CTYPE below, use the return type,
22214 	 unless the function returns void, in that case use the characteristic
22215 	 type.  If it is possible for given SIMDLEN to pass CTYPE value
22216 	 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
22217 	 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
22218 	 emit corresponding clone.  */
22219       tree ctype = ret_type;
22220       if (TREE_CODE (ret_type) == VOID_TYPE)
22221 	ctype = base_type;
22222       int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
22223       if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
22224 	cnt /= clonei->vecsize_int;
22225       else
22226 	cnt /= clonei->vecsize_float;
22227       if (cnt > (TARGET_64BIT ? 16 : 8))
22228 	{
22229 	  warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22230 		      "unsupported simdlen %d", clonei->simdlen);
22231 	  return 0;
22232 	}
22233       }
22234   return ret;
22235 }
22236 
22237 /* If SIMD clone NODE can't be used in a vectorized loop
22238    in current function, return -1, otherwise return a badness of using it
22239    (0 if it is most desirable from vecsize_mangle point of view, 1
22240    slightly less desirable, etc.).  */
22241 
22242 static int
ix86_simd_clone_usable(struct cgraph_node * node)22243 ix86_simd_clone_usable (struct cgraph_node *node)
22244 {
22245   switch (node->simdclone->vecsize_mangle)
22246     {
22247     case 'b':
22248       if (!TARGET_SSE2)
22249 	return -1;
22250       if (!TARGET_AVX)
22251 	return 0;
22252       return TARGET_AVX2 ? 2 : 1;
22253     case 'c':
22254       if (!TARGET_AVX)
22255 	return -1;
22256       return TARGET_AVX2 ? 1 : 0;
22257     case 'd':
22258       if (!TARGET_AVX2)
22259 	return -1;
22260       return 0;
22261     case 'e':
22262       if (!TARGET_AVX512F)
22263 	return -1;
22264       return 0;
22265     default:
22266       gcc_unreachable ();
22267     }
22268 }
22269 
22270 /* This function adjusts the unroll factor based on
22271    the hardware capabilities. For ex, bdver3 has
22272    a loop buffer which makes unrolling of smaller
22273    loops less important. This function decides the
22274    unroll factor using number of memory references
22275    (value 32 is used) as a heuristic. */
22276 
22277 static unsigned
ix86_loop_unroll_adjust(unsigned nunroll,class loop * loop)22278 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
22279 {
22280   basic_block *bbs;
22281   rtx_insn *insn;
22282   unsigned i;
22283   unsigned mem_count = 0;
22284 
22285   if (!TARGET_ADJUST_UNROLL)
22286      return nunroll;
22287 
22288   /* Count the number of memory references within the loop body.
22289      This value determines the unrolling factor for bdver3 and bdver4
22290      architectures. */
22291   subrtx_iterator::array_type array;
22292   bbs = get_loop_body (loop);
22293   for (i = 0; i < loop->num_nodes; i++)
22294     FOR_BB_INSNS (bbs[i], insn)
22295       if (NONDEBUG_INSN_P (insn))
22296 	FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
22297 	  if (const_rtx x = *iter)
22298 	    if (MEM_P (x))
22299 	      {
22300 		machine_mode mode = GET_MODE (x);
22301 		unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22302 		if (n_words > 4)
22303 		  mem_count += 2;
22304 		else
22305 		  mem_count += 1;
22306 	      }
22307   free (bbs);
22308 
22309   if (mem_count && mem_count <=32)
22310     return MIN (nunroll, 32 / mem_count);
22311 
22312   return nunroll;
22313 }
22314 
22315 
22316 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
22317 
22318 static bool
ix86_float_exceptions_rounding_supported_p(void)22319 ix86_float_exceptions_rounding_supported_p (void)
22320 {
22321   /* For x87 floating point with standard excess precision handling,
22322      there is no adddf3 pattern (since x87 floating point only has
22323      XFmode operations) so the default hook implementation gets this
22324      wrong.  */
22325   return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
22326 }
22327 
22328 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
22329 
22330 static void
ix86_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)22331 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
22332 {
22333   if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
22334     return;
22335   tree exceptions_var = create_tmp_var_raw (integer_type_node);
22336   if (TARGET_80387)
22337     {
22338       tree fenv_index_type = build_index_type (size_int (6));
22339       tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
22340       tree fenv_var = create_tmp_var_raw (fenv_type);
22341       TREE_ADDRESSABLE (fenv_var) = 1;
22342       tree fenv_ptr = build_pointer_type (fenv_type);
22343       tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
22344       fenv_addr = fold_convert (ptr_type_node, fenv_addr);
22345       tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
22346       tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
22347       tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
22348       tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
22349       tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
22350       tree hold_fnclex = build_call_expr (fnclex, 0);
22351       fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
22352 			 NULL_TREE, NULL_TREE);
22353       *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
22354 		      hold_fnclex);
22355       *clear = build_call_expr (fnclex, 0);
22356       tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
22357       tree fnstsw_call = build_call_expr (fnstsw, 0);
22358       tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
22359 			    fnstsw_call, NULL_TREE, NULL_TREE);
22360       tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
22361       tree update_mod = build4 (TARGET_EXPR, integer_type_node,
22362 				exceptions_var, exceptions_x87,
22363 				NULL_TREE, NULL_TREE);
22364       *update = build2 (COMPOUND_EXPR, integer_type_node,
22365 			sw_mod, update_mod);
22366       tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
22367       *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
22368     }
22369   if (TARGET_SSE && TARGET_SSE_MATH)
22370     {
22371       tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
22372       tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
22373       tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
22374       tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
22375       tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
22376       tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
22377 				      mxcsr_orig_var, stmxcsr_hold_call,
22378 				      NULL_TREE, NULL_TREE);
22379       tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
22380 				  mxcsr_orig_var,
22381 				  build_int_cst (unsigned_type_node, 0x1f80));
22382       hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
22383 			     build_int_cst (unsigned_type_node, 0xffffffc0));
22384       tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
22385 				     mxcsr_mod_var, hold_mod_val,
22386 				     NULL_TREE, NULL_TREE);
22387       tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22388       tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
22389 			      hold_assign_orig, hold_assign_mod);
22390       hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
22391 			 ldmxcsr_hold_call);
22392       if (*hold)
22393 	*hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
22394       else
22395 	*hold = hold_all;
22396       tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22397       if (*clear)
22398 	*clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
22399 			 ldmxcsr_clear_call);
22400       else
22401 	*clear = ldmxcsr_clear_call;
22402       tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
22403       tree exceptions_sse = fold_convert (integer_type_node,
22404 					  stxmcsr_update_call);
22405       if (*update)
22406 	{
22407 	  tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
22408 					exceptions_var, exceptions_sse);
22409 	  tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
22410 					   exceptions_var, exceptions_mod);
22411 	  *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
22412 			    exceptions_assign);
22413 	}
22414       else
22415 	*update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
22416 			  exceptions_sse, NULL_TREE, NULL_TREE);
22417       tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
22418       *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22419 			ldmxcsr_update_call);
22420     }
22421   tree atomic_feraiseexcept
22422     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
22423   tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
22424 						    1, exceptions_var);
22425   *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22426 		    atomic_feraiseexcept_call);
22427 }
22428 
22429 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22430 /* For i386, common symbol is local only for non-PIE binaries.  For
22431    x86-64, common symbol is local only for non-PIE binaries or linker
22432    supports copy reloc in PIE binaries.   */
22433 
22434 static bool
ix86_binds_local_p(const_tree exp)22435 ix86_binds_local_p (const_tree exp)
22436 {
22437   return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
22438 				  (!flag_pic
22439 				   || (TARGET_64BIT
22440 				       && HAVE_LD_PIE_COPYRELOC != 0)));
22441 }
22442 #endif
22443 
22444 /* If MEM is in the form of [base+offset], extract the two parts
22445    of address and set to BASE and OFFSET, otherwise return false.  */
22446 
22447 static bool
extract_base_offset_in_addr(rtx mem,rtx * base,rtx * offset)22448 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
22449 {
22450   rtx addr;
22451 
22452   gcc_assert (MEM_P (mem));
22453 
22454   addr = XEXP (mem, 0);
22455 
22456   if (GET_CODE (addr) == CONST)
22457     addr = XEXP (addr, 0);
22458 
22459   if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
22460     {
22461       *base = addr;
22462       *offset = const0_rtx;
22463       return true;
22464     }
22465 
22466   if (GET_CODE (addr) == PLUS
22467       && (REG_P (XEXP (addr, 0))
22468 	  || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
22469       && CONST_INT_P (XEXP (addr, 1)))
22470     {
22471       *base = XEXP (addr, 0);
22472       *offset = XEXP (addr, 1);
22473       return true;
22474     }
22475 
22476   return false;
22477 }
22478 
22479 /* Given OPERANDS of consecutive load/store, check if we can merge
22480    them into move multiple.  LOAD is true if they are load instructions.
22481    MODE is the mode of memory operands.  */
22482 
22483 bool
ix86_operands_ok_for_move_multiple(rtx * operands,bool load,machine_mode mode)22484 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
22485 				    machine_mode mode)
22486 {
22487   HOST_WIDE_INT offval_1, offval_2, msize;
22488   rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
22489 
22490   if (load)
22491     {
22492       mem_1 = operands[1];
22493       mem_2 = operands[3];
22494       reg_1 = operands[0];
22495       reg_2 = operands[2];
22496     }
22497   else
22498     {
22499       mem_1 = operands[0];
22500       mem_2 = operands[2];
22501       reg_1 = operands[1];
22502       reg_2 = operands[3];
22503     }
22504 
22505   gcc_assert (REG_P (reg_1) && REG_P (reg_2));
22506 
22507   if (REGNO (reg_1) != REGNO (reg_2))
22508     return false;
22509 
22510   /* Check if the addresses are in the form of [base+offset].  */
22511   if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
22512     return false;
22513   if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
22514     return false;
22515 
22516   /* Check if the bases are the same.  */
22517   if (!rtx_equal_p (base_1, base_2))
22518     return false;
22519 
22520   offval_1 = INTVAL (offset_1);
22521   offval_2 = INTVAL (offset_2);
22522   msize = GET_MODE_SIZE (mode);
22523   /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address.  */
22524   if (offval_1 + msize != offval_2)
22525     return false;
22526 
22527   return true;
22528 }
22529 
22530 /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
22531 
22532 static bool
ix86_optab_supported_p(int op,machine_mode mode1,machine_mode,optimization_type opt_type)22533 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
22534 			optimization_type opt_type)
22535 {
22536   switch (op)
22537     {
22538     case asin_optab:
22539     case acos_optab:
22540     case log1p_optab:
22541     case exp_optab:
22542     case exp10_optab:
22543     case exp2_optab:
22544     case expm1_optab:
22545     case ldexp_optab:
22546     case scalb_optab:
22547     case round_optab:
22548       return opt_type == OPTIMIZE_FOR_SPEED;
22549 
22550     case rint_optab:
22551       if (SSE_FLOAT_MODE_P (mode1)
22552 	  && TARGET_SSE_MATH
22553 	  && !flag_trapping_math
22554 	  && !TARGET_SSE4_1)
22555 	return opt_type == OPTIMIZE_FOR_SPEED;
22556       return true;
22557 
22558     case floor_optab:
22559     case ceil_optab:
22560     case btrunc_optab:
22561       if (SSE_FLOAT_MODE_P (mode1)
22562 	  && TARGET_SSE_MATH
22563 	  && !flag_trapping_math
22564 	  && TARGET_SSE4_1)
22565 	return true;
22566       return opt_type == OPTIMIZE_FOR_SPEED;
22567 
22568     case rsqrt_optab:
22569       return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
22570 
22571     default:
22572       return true;
22573     }
22574 }
22575 
22576 /* Address space support.
22577 
22578    This is not "far pointers" in the 16-bit sense, but an easy way
22579    to use %fs and %gs segment prefixes.  Therefore:
22580 
22581     (a) All address spaces have the same modes,
22582     (b) All address spaces have the same addresss forms,
22583     (c) While %fs and %gs are technically subsets of the generic
22584         address space, they are probably not subsets of each other.
22585     (d) Since we have no access to the segment base register values
22586         without resorting to a system call, we cannot convert a
22587         non-default address space to a default address space.
22588         Therefore we do not claim %fs or %gs are subsets of generic.
22589 
22590    Therefore we can (mostly) use the default hooks.  */
22591 
22592 /* All use of segmentation is assumed to make address 0 valid.  */
22593 
22594 static bool
ix86_addr_space_zero_address_valid(addr_space_t as)22595 ix86_addr_space_zero_address_valid (addr_space_t as)
22596 {
22597   return as != ADDR_SPACE_GENERIC;
22598 }
22599 
22600 static void
ix86_init_libfuncs(void)22601 ix86_init_libfuncs (void)
22602 {
22603   if (TARGET_64BIT)
22604     {
22605       set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
22606       set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
22607     }
22608   else
22609     {
22610       set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
22611       set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
22612     }
22613 
22614 #if TARGET_MACHO
22615   darwin_rename_builtins ();
22616 #endif
22617 }
22618 
22619 /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
22620    FPU, assume that the fpcw is set to extended precision; when using
22621    only SSE, rounding is correct; when using both SSE and the FPU,
22622    the rounding precision is indeterminate, since either may be chosen
22623    apparently at random.  */
22624 
22625 static enum flt_eval_method
ix86_get_excess_precision(enum excess_precision_type type)22626 ix86_get_excess_precision (enum excess_precision_type type)
22627 {
22628   switch (type)
22629     {
22630       case EXCESS_PRECISION_TYPE_FAST:
22631 	/* The fastest type to promote to will always be the native type,
22632 	   whether that occurs with implicit excess precision or
22633 	   otherwise.  */
22634 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22635       case EXCESS_PRECISION_TYPE_STANDARD:
22636       case EXCESS_PRECISION_TYPE_IMPLICIT:
22637 	/* Otherwise, the excess precision we want when we are
22638 	   in a standards compliant mode, and the implicit precision we
22639 	   provide would be identical were it not for the unpredictable
22640 	   cases.  */
22641 	if (!TARGET_80387)
22642 	  return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22643 	else if (!TARGET_MIX_SSE_I387)
22644 	  {
22645 	    if (!(TARGET_SSE && TARGET_SSE_MATH))
22646 	      return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
22647 	    else if (TARGET_SSE2)
22648 	      return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22649 	  }
22650 
22651 	/* If we are in standards compliant mode, but we know we will
22652 	   calculate in unpredictable precision, return
22653 	   FLT_EVAL_METHOD_FLOAT.  There is no reason to introduce explicit
22654 	   excess precision if the target can't guarantee it will honor
22655 	   it.  */
22656 	return (type == EXCESS_PRECISION_TYPE_STANDARD
22657 		? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
22658 		: FLT_EVAL_METHOD_UNPREDICTABLE);
22659       default:
22660 	gcc_unreachable ();
22661     }
22662 
22663   return FLT_EVAL_METHOD_UNPREDICTABLE;
22664 }
22665 
22666 /* Implement PUSH_ROUNDING.  On 386, we have pushw instruction that
22667    decrements by exactly 2 no matter what the position was, there is no pushb.
22668 
22669    But as CIE data alignment factor on this arch is -4 for 32bit targets
22670    and -8 for 64bit targets, we need to make sure all stack pointer adjustments
22671    are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
22672 
22673 poly_int64
ix86_push_rounding(poly_int64 bytes)22674 ix86_push_rounding (poly_int64 bytes)
22675 {
22676   return ROUND_UP (bytes, UNITS_PER_WORD);
22677 }
22678 
22679 /* Target-specific selftests.  */
22680 
22681 #if CHECKING_P
22682 
22683 namespace selftest {
22684 
22685 /* Verify that hard regs are dumped as expected (in compact mode).  */
22686 
22687 static void
ix86_test_dumping_hard_regs()22688 ix86_test_dumping_hard_regs ()
22689 {
22690   ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
22691   ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
22692 }
22693 
22694 /* Test dumping an insn with repeated references to the same SCRATCH,
22695    to verify the rtx_reuse code.  */
22696 
22697 static void
ix86_test_dumping_memory_blockage()22698 ix86_test_dumping_memory_blockage ()
22699 {
22700   set_new_first_and_last_insn (NULL, NULL);
22701 
22702   rtx pat = gen_memory_blockage ();
22703   rtx_reuse_manager r;
22704   r.preprocess (pat);
22705 
22706   /* Verify that the repeated references to the SCRATCH show use
22707      reuse IDS.  The first should be prefixed with a reuse ID,
22708      and the second should be dumped as a "reuse_rtx" of that ID.
22709      The expected string assumes Pmode == DImode.  */
22710   if (Pmode == DImode)
22711     ASSERT_RTL_DUMP_EQ_WITH_REUSE
22712       ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0  A8])\n"
22713        "        (unspec:BLK [\n"
22714        "                (mem/v:BLK (reuse_rtx 0) [0  A8])\n"
22715        "            ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
22716 }
22717 
22718 /* Verify loading an RTL dump; specifically a dump of copying
22719    a param on x86_64 from a hard reg into the frame.
22720    This test is target-specific since the dump contains target-specific
22721    hard reg names.  */
22722 
22723 static void
ix86_test_loading_dump_fragment_1()22724 ix86_test_loading_dump_fragment_1 ()
22725 {
22726   rtl_dump_test t (SELFTEST_LOCATION,
22727 		   locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
22728 
22729   rtx_insn *insn = get_insn_by_uid (1);
22730 
22731   /* The block structure and indentation here is purely for
22732      readability; it mirrors the structure of the rtx.  */
22733   tree mem_expr;
22734   {
22735     rtx pat = PATTERN (insn);
22736     ASSERT_EQ (SET, GET_CODE (pat));
22737     {
22738       rtx dest = SET_DEST (pat);
22739       ASSERT_EQ (MEM, GET_CODE (dest));
22740       /* Verify the "/c" was parsed.  */
22741       ASSERT_TRUE (RTX_FLAG (dest, call));
22742       ASSERT_EQ (SImode, GET_MODE (dest));
22743       {
22744 	rtx addr = XEXP (dest, 0);
22745 	ASSERT_EQ (PLUS, GET_CODE (addr));
22746 	ASSERT_EQ (DImode, GET_MODE (addr));
22747 	{
22748 	  rtx lhs = XEXP (addr, 0);
22749 	  /* Verify that the "frame" REG was consolidated.  */
22750 	  ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
22751 	}
22752 	{
22753 	  rtx rhs = XEXP (addr, 1);
22754 	  ASSERT_EQ (CONST_INT, GET_CODE (rhs));
22755 	  ASSERT_EQ (-4, INTVAL (rhs));
22756 	}
22757       }
22758       /* Verify the "[1 i+0 S4 A32]" was parsed.  */
22759       ASSERT_EQ (1, MEM_ALIAS_SET (dest));
22760       /* "i" should have been handled by synthesizing a global int
22761 	 variable named "i".  */
22762       mem_expr = MEM_EXPR (dest);
22763       ASSERT_NE (mem_expr, NULL);
22764       ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
22765       ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
22766       ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
22767       ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
22768       /* "+0".  */
22769       ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
22770       ASSERT_EQ (0, MEM_OFFSET (dest));
22771       /* "S4".  */
22772       ASSERT_EQ (4, MEM_SIZE (dest));
22773       /* "A32.  */
22774       ASSERT_EQ (32, MEM_ALIGN (dest));
22775     }
22776     {
22777       rtx src = SET_SRC (pat);
22778       ASSERT_EQ (REG, GET_CODE (src));
22779       ASSERT_EQ (SImode, GET_MODE (src));
22780       ASSERT_EQ (5, REGNO (src));
22781       tree reg_expr = REG_EXPR (src);
22782       /* "i" here should point to the same var as for the MEM_EXPR.  */
22783       ASSERT_EQ (reg_expr, mem_expr);
22784     }
22785   }
22786 }
22787 
22788 /* Verify that the RTL loader copes with a call_insn dump.
22789    This test is target-specific since the dump contains a target-specific
22790    hard reg name.  */
22791 
22792 static void
ix86_test_loading_call_insn()22793 ix86_test_loading_call_insn ()
22794 {
22795   /* The test dump includes register "xmm0", where requires TARGET_SSE
22796      to exist.  */
22797   if (!TARGET_SSE)
22798     return;
22799 
22800   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
22801 
22802   rtx_insn *insn = get_insns ();
22803   ASSERT_EQ (CALL_INSN, GET_CODE (insn));
22804 
22805   /* "/j".  */
22806   ASSERT_TRUE (RTX_FLAG (insn, jump));
22807 
22808   rtx pat = PATTERN (insn);
22809   ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
22810 
22811   /* Verify REG_NOTES.  */
22812   {
22813     /* "(expr_list:REG_CALL_DECL".   */
22814     ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
22815     rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
22816     ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
22817 
22818     /* "(expr_list:REG_EH_REGION (const_int 0 [0])".  */
22819     rtx_expr_list *note1 = note0->next ();
22820     ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
22821 
22822     ASSERT_EQ (NULL, note1->next ());
22823   }
22824 
22825   /* Verify CALL_INSN_FUNCTION_USAGE.  */
22826   {
22827     /* "(expr_list:DF (use (reg:DF 21 xmm0))".  */
22828     rtx_expr_list *usage
22829       = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
22830     ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
22831     ASSERT_EQ (DFmode, GET_MODE (usage));
22832     ASSERT_EQ (USE, GET_CODE (usage->element ()));
22833     ASSERT_EQ (NULL, usage->next ());
22834   }
22835 }
22836 
22837 /* Verify that the RTL loader copes a dump from print_rtx_function.
22838    This test is target-specific since the dump contains target-specific
22839    hard reg names.  */
22840 
22841 static void
ix86_test_loading_full_dump()22842 ix86_test_loading_full_dump ()
22843 {
22844   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
22845 
22846   ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22847 
22848   rtx_insn *insn_1 = get_insn_by_uid (1);
22849   ASSERT_EQ (NOTE, GET_CODE (insn_1));
22850 
22851   rtx_insn *insn_7 = get_insn_by_uid (7);
22852   ASSERT_EQ (INSN, GET_CODE (insn_7));
22853   ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
22854 
22855   rtx_insn *insn_15 = get_insn_by_uid (15);
22856   ASSERT_EQ (INSN, GET_CODE (insn_15));
22857   ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
22858 
22859   /* Verify crtl->return_rtx.  */
22860   ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
22861   ASSERT_EQ (0, REGNO (crtl->return_rtx));
22862   ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
22863 }
22864 
22865 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
22866    In particular, verify that it correctly loads the 2nd operand.
22867    This test is target-specific since these are machine-specific
22868    operands (and enums).  */
22869 
22870 static void
ix86_test_loading_unspec()22871 ix86_test_loading_unspec ()
22872 {
22873   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
22874 
22875   ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22876 
22877   ASSERT_TRUE (cfun);
22878 
22879   /* Test of an UNSPEC.  */
22880    rtx_insn *insn = get_insns ();
22881   ASSERT_EQ (INSN, GET_CODE (insn));
22882   rtx set = single_set (insn);
22883   ASSERT_NE (NULL, set);
22884   rtx dst = SET_DEST (set);
22885   ASSERT_EQ (MEM, GET_CODE (dst));
22886   rtx src = SET_SRC (set);
22887   ASSERT_EQ (UNSPEC, GET_CODE (src));
22888   ASSERT_EQ (BLKmode, GET_MODE (src));
22889   ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
22890 
22891   rtx v0 = XVECEXP (src, 0, 0);
22892 
22893   /* Verify that the two uses of the first SCRATCH have pointer
22894      equality.  */
22895   rtx scratch_a = XEXP (dst, 0);
22896   ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
22897 
22898   rtx scratch_b = XEXP (v0, 0);
22899   ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
22900 
22901   ASSERT_EQ (scratch_a, scratch_b);
22902 
22903   /* Verify that the two mems are thus treated as equal.  */
22904   ASSERT_TRUE (rtx_equal_p (dst, v0));
22905 
22906   /* Verify that the insn is recognized.  */
22907   ASSERT_NE(-1, recog_memoized (insn));
22908 
22909   /* Test of an UNSPEC_VOLATILE, which has its own enum values.  */
22910   insn = NEXT_INSN (insn);
22911   ASSERT_EQ (INSN, GET_CODE (insn));
22912 
22913   set = single_set (insn);
22914   ASSERT_NE (NULL, set);
22915 
22916   src = SET_SRC (set);
22917   ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
22918   ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
22919 }
22920 
22921 /* Run all target-specific selftests.  */
22922 
22923 static void
ix86_run_selftests(void)22924 ix86_run_selftests (void)
22925 {
22926   ix86_test_dumping_hard_regs ();
22927   ix86_test_dumping_memory_blockage ();
22928 
22929   /* Various tests of loading RTL dumps, here because they contain
22930      ix86-isms (e.g. names of hard regs).  */
22931   ix86_test_loading_dump_fragment_1 ();
22932   ix86_test_loading_call_insn ();
22933   ix86_test_loading_full_dump ();
22934   ix86_test_loading_unspec ();
22935 }
22936 
22937 } // namespace selftest
22938 
22939 #endif /* CHECKING_P */
22940 
22941 /* Initialize the GCC target structure.  */
22942 #undef TARGET_RETURN_IN_MEMORY
22943 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
22944 
22945 #undef TARGET_LEGITIMIZE_ADDRESS
22946 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
22947 
22948 #undef TARGET_ATTRIBUTE_TABLE
22949 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22950 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
22951 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
22952 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22953 #  undef TARGET_MERGE_DECL_ATTRIBUTES
22954 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22955 #endif
22956 
22957 #undef TARGET_COMP_TYPE_ATTRIBUTES
22958 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22959 
22960 #undef TARGET_INIT_BUILTINS
22961 #define TARGET_INIT_BUILTINS ix86_init_builtins
22962 #undef TARGET_BUILTIN_DECL
22963 #define TARGET_BUILTIN_DECL ix86_builtin_decl
22964 #undef TARGET_EXPAND_BUILTIN
22965 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22966 
22967 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22968 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
22969   ix86_builtin_vectorized_function
22970 
22971 #undef TARGET_VECTORIZE_BUILTIN_GATHER
22972 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
22973 
22974 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
22975 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
22976 
22977 #undef TARGET_BUILTIN_RECIPROCAL
22978 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
22979 
22980 #undef TARGET_ASM_FUNCTION_EPILOGUE
22981 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22982 
22983 #undef TARGET_ENCODE_SECTION_INFO
22984 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22985 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22986 #else
22987 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22988 #endif
22989 
22990 #undef TARGET_ASM_OPEN_PAREN
22991 #define TARGET_ASM_OPEN_PAREN ""
22992 #undef TARGET_ASM_CLOSE_PAREN
22993 #define TARGET_ASM_CLOSE_PAREN ""
22994 
22995 #undef TARGET_ASM_BYTE_OP
22996 #define TARGET_ASM_BYTE_OP ASM_BYTE
22997 
22998 #undef TARGET_ASM_ALIGNED_HI_OP
22999 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23000 #undef TARGET_ASM_ALIGNED_SI_OP
23001 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23002 #ifdef ASM_QUAD
23003 #undef TARGET_ASM_ALIGNED_DI_OP
23004 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23005 #endif
23006 
23007 #undef TARGET_PROFILE_BEFORE_PROLOGUE
23008 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
23009 
23010 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
23011 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
23012 
23013 #undef TARGET_ASM_UNALIGNED_HI_OP
23014 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23015 #undef TARGET_ASM_UNALIGNED_SI_OP
23016 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23017 #undef TARGET_ASM_UNALIGNED_DI_OP
23018 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23019 
23020 #undef TARGET_PRINT_OPERAND
23021 #define TARGET_PRINT_OPERAND ix86_print_operand
23022 #undef TARGET_PRINT_OPERAND_ADDRESS
23023 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
23024 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
23025 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
23026 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
23027 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
23028 
23029 #undef TARGET_SCHED_INIT_GLOBAL
23030 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
23031 #undef TARGET_SCHED_ADJUST_COST
23032 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23033 #undef TARGET_SCHED_ISSUE_RATE
23034 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23035 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23036 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23037   ia32_multipass_dfa_lookahead
23038 #undef TARGET_SCHED_MACRO_FUSION_P
23039 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
23040 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
23041 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
23042 
23043 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23044 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23045 
23046 #undef TARGET_MEMMODEL_CHECK
23047 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
23048 
23049 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
23050 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
23051 
23052 #ifdef HAVE_AS_TLS
23053 #undef TARGET_HAVE_TLS
23054 #define TARGET_HAVE_TLS true
23055 #endif
23056 #undef TARGET_CANNOT_FORCE_CONST_MEM
23057 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23058 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23059 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23060 
23061 #undef TARGET_DELEGITIMIZE_ADDRESS
23062 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23063 
23064 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
23065 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
23066 
23067 #undef TARGET_MS_BITFIELD_LAYOUT_P
23068 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23069 
23070 #if TARGET_MACHO
23071 #undef TARGET_BINDS_LOCAL_P
23072 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23073 #else
23074 #undef TARGET_BINDS_LOCAL_P
23075 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
23076 #endif
23077 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23078 #undef TARGET_BINDS_LOCAL_P
23079 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23080 #endif
23081 
23082 #undef TARGET_ASM_OUTPUT_MI_THUNK
23083 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23084 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23085 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23086 
23087 #undef TARGET_ASM_FILE_START
23088 #define TARGET_ASM_FILE_START x86_file_start
23089 
23090 #undef TARGET_OPTION_OVERRIDE
23091 #define TARGET_OPTION_OVERRIDE ix86_option_override
23092 
23093 #undef TARGET_REGISTER_MOVE_COST
23094 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
23095 #undef TARGET_MEMORY_MOVE_COST
23096 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
23097 #undef TARGET_RTX_COSTS
23098 #define TARGET_RTX_COSTS ix86_rtx_costs
23099 #undef TARGET_ADDRESS_COST
23100 #define TARGET_ADDRESS_COST ix86_address_cost
23101 
23102 #undef TARGET_FLAGS_REGNUM
23103 #define TARGET_FLAGS_REGNUM FLAGS_REG
23104 #undef TARGET_FIXED_CONDITION_CODE_REGS
23105 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23106 #undef TARGET_CC_MODES_COMPATIBLE
23107 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23108 
23109 #undef TARGET_MACHINE_DEPENDENT_REORG
23110 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23111 
23112 #undef TARGET_BUILD_BUILTIN_VA_LIST
23113 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23114 
23115 #undef TARGET_FOLD_BUILTIN
23116 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
23117 
23118 #undef TARGET_GIMPLE_FOLD_BUILTIN
23119 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
23120 
23121 #undef TARGET_COMPARE_VERSION_PRIORITY
23122 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
23123 
23124 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
23125 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
23126   ix86_generate_version_dispatcher_body
23127 
23128 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
23129 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
23130   ix86_get_function_versions_dispatcher
23131 
23132 #undef TARGET_ENUM_VA_LIST_P
23133 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
23134 
23135 #undef TARGET_FN_ABI_VA_LIST
23136 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
23137 
23138 #undef TARGET_CANONICAL_VA_LIST_TYPE
23139 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
23140 
23141 #undef TARGET_EXPAND_BUILTIN_VA_START
23142 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
23143 
23144 #undef TARGET_MD_ASM_ADJUST
23145 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
23146 
23147 #undef TARGET_C_EXCESS_PRECISION
23148 #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
23149 #undef TARGET_PROMOTE_PROTOTYPES
23150 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23151 #undef TARGET_SETUP_INCOMING_VARARGS
23152 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23153 #undef TARGET_MUST_PASS_IN_STACK
23154 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23155 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
23156 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
23157 #undef TARGET_FUNCTION_ARG_ADVANCE
23158 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
23159 #undef TARGET_FUNCTION_ARG
23160 #define TARGET_FUNCTION_ARG ix86_function_arg
23161 #undef TARGET_INIT_PIC_REG
23162 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
23163 #undef TARGET_USE_PSEUDO_PIC_REG
23164 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
23165 #undef TARGET_FUNCTION_ARG_BOUNDARY
23166 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
23167 #undef TARGET_PASS_BY_REFERENCE
23168 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23169 #undef TARGET_INTERNAL_ARG_POINTER
23170 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23171 #undef TARGET_UPDATE_STACK_BOUNDARY
23172 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
23173 #undef TARGET_GET_DRAP_RTX
23174 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
23175 #undef TARGET_STRICT_ARGUMENT_NAMING
23176 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23177 #undef TARGET_STATIC_CHAIN
23178 #define TARGET_STATIC_CHAIN ix86_static_chain
23179 #undef TARGET_TRAMPOLINE_INIT
23180 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
23181 #undef TARGET_RETURN_POPS_ARGS
23182 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
23183 
23184 #undef TARGET_WARN_FUNC_RETURN
23185 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
23186 
23187 #undef TARGET_LEGITIMATE_COMBINED_INSN
23188 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
23189 
23190 #undef TARGET_ASAN_SHADOW_OFFSET
23191 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
23192 
23193 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23194 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23195 
23196 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23197 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23198 
23199 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23200 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23201 
23202 #undef TARGET_C_MODE_FOR_SUFFIX
23203 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23204 
23205 #ifdef HAVE_AS_TLS
23206 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23207 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23208 #endif
23209 
23210 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23211 #undef TARGET_INSERT_ATTRIBUTES
23212 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23213 #endif
23214 
23215 #undef TARGET_MANGLE_TYPE
23216 #define TARGET_MANGLE_TYPE ix86_mangle_type
23217 
23218 #undef TARGET_STACK_PROTECT_GUARD
23219 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
23220 
23221 #if !TARGET_MACHO
23222 #undef TARGET_STACK_PROTECT_FAIL
23223 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23224 #endif
23225 
23226 #undef TARGET_FUNCTION_VALUE
23227 #define TARGET_FUNCTION_VALUE ix86_function_value
23228 
23229 #undef TARGET_FUNCTION_VALUE_REGNO_P
23230 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
23231 
23232 #undef TARGET_PROMOTE_FUNCTION_MODE
23233 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
23234 
23235 #undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
23236 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
23237 
23238 #undef TARGET_MEMBER_TYPE_FORCES_BLK
23239 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
23240 
23241 #undef TARGET_INSTANTIATE_DECLS
23242 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
23243 
23244 #undef TARGET_SECONDARY_RELOAD
23245 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
23246 #undef TARGET_SECONDARY_MEMORY_NEEDED
23247 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
23248 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
23249 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
23250 
23251 #undef TARGET_CLASS_MAX_NREGS
23252 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
23253 
23254 #undef TARGET_PREFERRED_RELOAD_CLASS
23255 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
23256 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
23257 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
23258 #undef TARGET_CLASS_LIKELY_SPILLED_P
23259 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
23260 
23261 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23262 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
23263   ix86_builtin_vectorization_cost
23264 #undef TARGET_VECTORIZE_VEC_PERM_CONST
23265 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
23266 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
23267 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
23268   ix86_preferred_simd_mode
23269 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
23270 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
23271   ix86_split_reduction
23272 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
23273 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
23274   ix86_autovectorize_vector_modes
23275 #undef TARGET_VECTORIZE_GET_MASK_MODE
23276 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
23277 #undef TARGET_VECTORIZE_INIT_COST
23278 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
23279 #undef TARGET_VECTORIZE_ADD_STMT_COST
23280 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
23281 #undef TARGET_VECTORIZE_FINISH_COST
23282 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
23283 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
23284 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
23285 
23286 #undef TARGET_SET_CURRENT_FUNCTION
23287 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
23288 
23289 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
23290 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
23291 
23292 #undef TARGET_OPTION_SAVE
23293 #define TARGET_OPTION_SAVE ix86_function_specific_save
23294 
23295 #undef TARGET_OPTION_RESTORE
23296 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
23297 
23298 #undef TARGET_OPTION_POST_STREAM_IN
23299 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
23300 
23301 #undef TARGET_OPTION_PRINT
23302 #define TARGET_OPTION_PRINT ix86_function_specific_print
23303 
23304 #undef TARGET_OPTION_FUNCTION_VERSIONS
23305 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
23306 
23307 #undef TARGET_CAN_INLINE_P
23308 #define TARGET_CAN_INLINE_P ix86_can_inline_p
23309 
23310 #undef TARGET_LEGITIMATE_ADDRESS_P
23311 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
23312 
23313 #undef TARGET_REGISTER_PRIORITY
23314 #define TARGET_REGISTER_PRIORITY ix86_register_priority
23315 
23316 #undef TARGET_REGISTER_USAGE_LEVELING_P
23317 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
23318 
23319 #undef TARGET_LEGITIMATE_CONSTANT_P
23320 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
23321 
23322 #undef TARGET_COMPUTE_FRAME_LAYOUT
23323 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
23324 
23325 #undef TARGET_FRAME_POINTER_REQUIRED
23326 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
23327 
23328 #undef TARGET_CAN_ELIMINATE
23329 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
23330 
23331 #undef TARGET_EXTRA_LIVE_ON_ENTRY
23332 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
23333 
23334 #undef TARGET_ASM_CODE_END
23335 #define TARGET_ASM_CODE_END ix86_code_end
23336 
23337 #undef TARGET_CONDITIONAL_REGISTER_USAGE
23338 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
23339 
23340 #undef TARGET_CANONICALIZE_COMPARISON
23341 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
23342 
23343 #undef TARGET_LOOP_UNROLL_ADJUST
23344 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
23345 
23346 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
23347 #undef TARGET_SPILL_CLASS
23348 #define TARGET_SPILL_CLASS ix86_spill_class
23349 
23350 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23351 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23352   ix86_simd_clone_compute_vecsize_and_simdlen
23353 
23354 #undef TARGET_SIMD_CLONE_ADJUST
23355 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
23356 
23357 #undef TARGET_SIMD_CLONE_USABLE
23358 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
23359 
23360 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
23361 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
23362 
23363 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23364 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23365   ix86_float_exceptions_rounding_supported_p
23366 
23367 #undef TARGET_MODE_EMIT
23368 #define TARGET_MODE_EMIT ix86_emit_mode_set
23369 
23370 #undef TARGET_MODE_NEEDED
23371 #define TARGET_MODE_NEEDED ix86_mode_needed
23372 
23373 #undef TARGET_MODE_AFTER
23374 #define TARGET_MODE_AFTER ix86_mode_after
23375 
23376 #undef TARGET_MODE_ENTRY
23377 #define TARGET_MODE_ENTRY ix86_mode_entry
23378 
23379 #undef TARGET_MODE_EXIT
23380 #define TARGET_MODE_EXIT ix86_mode_exit
23381 
23382 #undef TARGET_MODE_PRIORITY
23383 #define TARGET_MODE_PRIORITY ix86_mode_priority
23384 
23385 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23386 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23387 
23388 #undef TARGET_OFFLOAD_OPTIONS
23389 #define TARGET_OFFLOAD_OPTIONS \
23390   ix86_offload_options
23391 
23392 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23393 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23394 
23395 #undef TARGET_OPTAB_SUPPORTED_P
23396 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23397 
23398 #undef TARGET_HARD_REGNO_SCRATCH_OK
23399 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23400 
23401 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23402 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23403 
23404 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23405 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23406 
23407 #undef TARGET_INIT_LIBFUNCS
23408 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23409 
23410 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23411 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23412 
23413 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23414 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23415 
23416 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23417 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23418 
23419 #undef TARGET_HARD_REGNO_NREGS
23420 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23421 #undef TARGET_HARD_REGNO_MODE_OK
23422 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23423 
23424 #undef TARGET_MODES_TIEABLE_P
23425 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23426 
23427 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23428 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23429   ix86_hard_regno_call_part_clobbered
23430 
23431 #undef TARGET_CAN_CHANGE_MODE_CLASS
23432 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23433 
23434 #undef TARGET_STATIC_RTX_ALIGNMENT
23435 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23436 #undef TARGET_CONSTANT_ALIGNMENT
23437 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23438 
23439 #undef TARGET_EMPTY_RECORD_P
23440 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23441 
23442 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23443 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23444 
23445 #undef TARGET_GET_MULTILIB_ABI_NAME
23446 #define TARGET_GET_MULTILIB_ABI_NAME \
23447   ix86_get_multilib_abi_name
23448 
ix86_libc_has_fast_function(int fcode ATTRIBUTE_UNUSED)23449 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
23450 {
23451 #ifdef OPTION_GLIBC
23452   if (OPTION_GLIBC)
23453     return (built_in_function)fcode == BUILT_IN_MEMPCPY;
23454   else
23455     return false;
23456 #else
23457   return false;
23458 #endif
23459 }
23460 
23461 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23462 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23463 
23464 #if CHECKING_P
23465 #undef TARGET_RUN_TARGET_SELFTESTS
23466 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23467 #endif /* #if CHECKING_P */
23468 
23469 struct gcc_target targetm = TARGET_INITIALIZER;
23470 
23471 #include "gt-i386.h"
23472