xref: /dflybsd-src/contrib/gcc-8.0/gcc/config/i386/i386.c (revision b227f3f50d5dc0f5fdecd8f9df23e96e8521baaf)
1 /* Subroutines used for code generation on IA-32.
2    Copyright (C) 1988-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10 
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #define IN_TARGET_CODE 1
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "params.h"
63 #include "cselib.h"
64 #include "sched-int.h"
65 #include "opts.h"
66 #include "tree-pass.h"
67 #include "context.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
73 #include "builtins.h"
74 #include "rtl-iter.h"
75 #include "tree-iterator.h"
76 #include "tree-chkp.h"
77 #include "rtl-chkp.h"
78 #include "dbgcnt.h"
79 #include "case-cfn-macros.h"
80 #include "regrename.h"
81 #include "dojump.h"
82 #include "fold-const-call.h"
83 #include "tree-vrp.h"
84 #include "tree-ssanames.h"
85 #include "selftest.h"
86 #include "selftest-rtl.h"
87 #include "print-rtl.h"
88 #include "intl.h"
89 #include "ifcvt.h"
90 #include "symbol-summary.h"
91 #include "ipa-prop.h"
92 #include "ipa-fnsummary.h"
93 #include "wide-int-bitmask.h"
94 
95 /* This file should be included last.  */
96 #include "target-def.h"
97 
98 #include "x86-tune-costs.h"
99 
100 static rtx legitimize_dllimport_symbol (rtx, bool);
101 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
102 static rtx legitimize_pe_coff_symbol (rtx, bool);
103 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
104 static bool ix86_save_reg (unsigned int, bool, bool);
105 static bool ix86_function_naked (const_tree);
106 static bool ix86_notrack_prefixed_insn_p (rtx);
107 static void ix86_emit_restore_reg_using_pop (rtx);
108 
109 
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
113 
114 /* Return index of given mode in mult and division cost tables.  */
115 #define MODE_INDEX(mode)					\
116   ((mode) == QImode ? 0						\
117    : (mode) == HImode ? 1					\
118    : (mode) == SImode ? 2					\
119    : (mode) == DImode ? 3					\
120    : 4)
121 
122 
123 /* Set by -mtune.  */
124 const struct processor_costs *ix86_tune_cost = NULL;
125 
126 /* Set by -mtune or -Os.  */
127 const struct processor_costs *ix86_cost = NULL;
128 
129 /* Processor feature/optimization bitmasks.  */
130 #define m_386 (HOST_WIDE_INT_1U<<PROCESSOR_I386)
131 #define m_486 (HOST_WIDE_INT_1U<<PROCESSOR_I486)
132 #define m_PENT (HOST_WIDE_INT_1U<<PROCESSOR_PENTIUM)
133 #define m_LAKEMONT (HOST_WIDE_INT_1U<<PROCESSOR_LAKEMONT)
134 #define m_PPRO (HOST_WIDE_INT_1U<<PROCESSOR_PENTIUMPRO)
135 #define m_PENT4 (HOST_WIDE_INT_1U<<PROCESSOR_PENTIUM4)
136 #define m_NOCONA (HOST_WIDE_INT_1U<<PROCESSOR_NOCONA)
137 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
138 #define m_CORE2 (HOST_WIDE_INT_1U<<PROCESSOR_CORE2)
139 #define m_NEHALEM (HOST_WIDE_INT_1U<<PROCESSOR_NEHALEM)
140 #define m_SANDYBRIDGE (HOST_WIDE_INT_1U<<PROCESSOR_SANDYBRIDGE)
141 #define m_HASWELL (HOST_WIDE_INT_1U<<PROCESSOR_HASWELL)
142 #define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_HASWELL)
143 #define m_BONNELL (HOST_WIDE_INT_1U<<PROCESSOR_BONNELL)
144 #define m_SILVERMONT (HOST_WIDE_INT_1U<<PROCESSOR_SILVERMONT)
145 #define m_KNL (HOST_WIDE_INT_1U<<PROCESSOR_KNL)
146 #define m_KNM (HOST_WIDE_INT_1U<<PROCESSOR_KNM)
147 #define m_SKYLAKE (HOST_WIDE_INT_1U<<PROCESSOR_SKYLAKE)
148 #define m_SKYLAKE_AVX512 (HOST_WIDE_INT_1U<<PROCESSOR_SKYLAKE_AVX512)
149 #define m_CANNONLAKE (HOST_WIDE_INT_1U<<PROCESSOR_CANNONLAKE)
150 #define m_ICELAKE_CLIENT (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_CLIENT)
151 #define m_ICELAKE_SERVER (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_SERVER)
152 #define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL)
153 
154 #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
155 #define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
156 #define m_K6_GEODE (m_K6 | m_GEODE)
157 #define m_K8 (HOST_WIDE_INT_1U<<PROCESSOR_K8)
158 #define m_ATHLON (HOST_WIDE_INT_1U<<PROCESSOR_ATHLON)
159 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
160 #define m_AMDFAM10 (HOST_WIDE_INT_1U<<PROCESSOR_AMDFAM10)
161 #define m_BDVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER1)
162 #define m_BDVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER2)
163 #define m_BDVER3 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER3)
164 #define m_BDVER4 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER4)
165 #define m_ZNVER1 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER1)
166 #define m_BTVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER1)
167 #define m_BTVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER2)
168 #define m_BDVER	(m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
169 #define m_BTVER (m_BTVER1 | m_BTVER2)
170 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
171 			| m_ZNVER1)
172 
173 #define m_GENERIC (HOST_WIDE_INT_1U<<PROCESSOR_GENERIC)
174 
175 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
176 #undef DEF_TUNE
177 #define DEF_TUNE(tune, name, selector) name,
178 #include "x86-tune.def"
179 #undef DEF_TUNE
180 };
181 
182 /* Feature tests against the various tunings.  */
183 unsigned char ix86_tune_features[X86_TUNE_LAST];
184 
185 /* Feature tests against the various tunings used to create ix86_tune_features
186    based on the processor mask.  */
187 static unsigned HOST_WIDE_INT initial_ix86_tune_features[X86_TUNE_LAST] = {
188 #undef DEF_TUNE
189 #define DEF_TUNE(tune, name, selector) selector,
190 #include "x86-tune.def"
191 #undef DEF_TUNE
192 };
193 
194 /* Feature tests against the various architecture variations.  */
195 unsigned char ix86_arch_features[X86_ARCH_LAST];
196 
197 /* Feature tests against the various architecture variations, used to create
198    ix86_arch_features based on the processor mask.  */
199 static unsigned HOST_WIDE_INT initial_ix86_arch_features[X86_ARCH_LAST] = {
200   /* X86_ARCH_CMOV: Conditional move was added for pentiumpro.  */
201   ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
202 
203   /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
204   ~m_386,
205 
206   /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
207   ~(m_386 | m_486),
208 
209   /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
210   ~m_386,
211 
212   /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
213   ~m_386,
214 };
215 
216 /* In case the average insn count for single function invocation is
217    lower than this constant, emit fast (but longer) prologue and
218    epilogue code.  */
219 #define FAST_PROLOGUE_INSN_COUNT 20
220 
221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
222 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
223 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
224 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
225 
226 /* Array of the smallest class containing reg number REGNO, indexed by
227    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
228 
229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
230 {
231   /* ax, dx, cx, bx */
232   AREG, DREG, CREG, BREG,
233   /* si, di, bp, sp */
234   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
235   /* FP registers */
236   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
237   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
238   /* arg pointer */
239   NON_Q_REGS,
240   /* flags, fpsr, fpcr, frame */
241   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
242   /* SSE registers */
243   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
244   SSE_REGS, SSE_REGS,
245   /* MMX registers */
246   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
247   MMX_REGS, MMX_REGS,
248   /* REX registers */
249   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
250   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
251   /* SSE REX registers */
252   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
253   SSE_REGS, SSE_REGS,
254   /* AVX-512 SSE registers */
255   EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
256   EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
257   EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
258   EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
259   /* Mask registers.  */
260   MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
261   MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
262   /* MPX bound registers */
263   BND_REGS, BND_REGS, BND_REGS, BND_REGS,
264 };
265 
266 /* The "default" register map used in 32bit mode.  */
267 
268 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
269 {
270   0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
271   12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
272   -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
273   21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
274   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
275   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
276   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
277   -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 16-23*/
278   -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 24-31*/
279   93, 94, 95, 96, 97, 98, 99, 100,      /* Mask registers */
280   101, 102, 103, 104,			/* bound registers */
281 };
282 
283 /* The "default" register map used in 64bit mode.  */
284 
285 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
286 {
287   0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
288   33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
289   -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
290   17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
291   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
292   8,9,10,11,12,13,14,15,		/* extended integer registers */
293   25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
294   67, 68, 69, 70, 71, 72, 73, 74,       /* AVX-512 registers 16-23 */
295   75, 76, 77, 78, 79, 80, 81, 82,       /* AVX-512 registers 24-31 */
296   118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
297   126, 127, 128, 129,			/* bound registers */
298 };
299 
300 /* Define the register numbers to be used in Dwarf debugging information.
301    The SVR4 reference port C compiler uses the following register numbers
302    in its Dwarf output code:
303 	0 for %eax (gcc regno = 0)
304 	1 for %ecx (gcc regno = 2)
305 	2 for %edx (gcc regno = 1)
306 	3 for %ebx (gcc regno = 3)
307 	4 for %esp (gcc regno = 7)
308 	5 for %ebp (gcc regno = 6)
309 	6 for %esi (gcc regno = 4)
310 	7 for %edi (gcc regno = 5)
311    The following three DWARF register numbers are never generated by
312    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
313    believed these numbers have these meanings.
314 	8  for %eip    (no gcc equivalent)
315 	9  for %eflags (gcc regno = 17)
316 	10 for %trapno (no gcc equivalent)
317    It is not at all clear how we should number the FP stack registers
318    for the x86 architecture.  If the version of SDB on x86/svr4 were
319    a bit less brain dead with respect to floating-point then we would
320    have a precedent to follow with respect to DWARF register numbers
321    for x86 FP registers, but the SDB on x86/svr4 was so completely
322    broken with respect to FP registers that it is hardly worth thinking
323    of it as something to strive for compatibility with.
324    The version of x86/svr4 SDB I had does (partially)
325    seem to believe that DWARF register number 11 is associated with
326    the x86 register %st(0), but that's about all.  Higher DWARF
327    register numbers don't seem to be associated with anything in
328    particular, and even for DWARF regno 11, SDB only seemed to under-
329    stand that it should say that a variable lives in %st(0) (when
330    asked via an `=' command) if we said it was in DWARF regno 11,
331    but SDB still printed garbage when asked for the value of the
332    variable in question (via a `/' command).
333    (Also note that the labels SDB printed for various FP stack regs
334    when doing an `x' command were all wrong.)
335    Note that these problems generally don't affect the native SVR4
336    C compiler because it doesn't allow the use of -O with -g and
337    because when it is *not* optimizing, it allocates a memory
338    location for each floating-point variable, and the memory
339    location is what gets described in the DWARF AT_location
340    attribute for the variable in question.
341    Regardless of the severe mental illness of the x86/svr4 SDB, we
342    do something sensible here and we use the following DWARF
343    register numbers.  Note that these are all stack-top-relative
344    numbers.
345 	11 for %st(0) (gcc regno = 8)
346 	12 for %st(1) (gcc regno = 9)
347 	13 for %st(2) (gcc regno = 10)
348 	14 for %st(3) (gcc regno = 11)
349 	15 for %st(4) (gcc regno = 12)
350 	16 for %st(5) (gcc regno = 13)
351 	17 for %st(6) (gcc regno = 14)
352 	18 for %st(7) (gcc regno = 15)
353 */
354 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
355 {
356   0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
357   11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
358   -1, 9, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
359   21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
360   29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
361   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
362   -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
363   -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 16-23*/
364   -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 24-31*/
365   93, 94, 95, 96, 97, 98, 99, 100,      /* Mask registers */
366   101, 102, 103, 104,			/* bound registers */
367 };
368 
369 /* Define parameter passing and return registers.  */
370 
371 static int const x86_64_int_parameter_registers[6] =
372 {
373   DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
374 };
375 
376 static int const x86_64_ms_abi_int_parameter_registers[4] =
377 {
378   CX_REG, DX_REG, R8_REG, R9_REG
379 };
380 
381 static int const x86_64_int_return_registers[4] =
382 {
383   AX_REG, DX_REG, DI_REG, SI_REG
384 };
385 
386 /* Additional registers that are clobbered by SYSV calls.  */
387 
388 #define NUM_X86_64_MS_CLOBBERED_REGS 12
389 static int const x86_64_ms_sysv_extra_clobbered_registers
390 		 [NUM_X86_64_MS_CLOBBERED_REGS] =
391 {
392   SI_REG, DI_REG,
393   XMM6_REG, XMM7_REG,
394   XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
395   XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
396 };
397 
398 enum xlogue_stub {
399   XLOGUE_STUB_SAVE,
400   XLOGUE_STUB_RESTORE,
401   XLOGUE_STUB_RESTORE_TAIL,
402   XLOGUE_STUB_SAVE_HFP,
403   XLOGUE_STUB_RESTORE_HFP,
404   XLOGUE_STUB_RESTORE_HFP_TAIL,
405 
406   XLOGUE_STUB_COUNT
407 };
408 
409 enum xlogue_stub_sets {
410   XLOGUE_SET_ALIGNED,
411   XLOGUE_SET_ALIGNED_PLUS_8,
412   XLOGUE_SET_HFP_ALIGNED_OR_REALIGN,
413   XLOGUE_SET_HFP_ALIGNED_PLUS_8,
414 
415   XLOGUE_SET_COUNT
416 };
417 
418 /* Register save/restore layout used by out-of-line stubs.  */
419 class xlogue_layout {
420 public:
421   struct reginfo
422   {
423     unsigned regno;
424     HOST_WIDE_INT offset;	/* Offset used by stub base pointer (rax or
425 				   rsi) to where each register is stored.  */
426   };
427 
428   unsigned get_nregs () const			{return m_nregs;}
429   HOST_WIDE_INT get_stack_align_off_in () const	{return m_stack_align_off_in;}
430 
431   const reginfo &get_reginfo (unsigned reg) const
432   {
433     gcc_assert (reg < m_nregs);
434     return m_regs[reg];
435   }
436 
437   static const char *get_stub_name (enum xlogue_stub stub,
438 				    unsigned n_extra_args);
439 
440   /* Returns an rtx for the stub's symbol based upon
441        1.) the specified stub (save, restore or restore_ret) and
442        2.) the value of cfun->machine->call_ms2sysv_extra_regs and
443        3.) rather or not stack alignment is being performed.  */
444   static rtx get_stub_rtx (enum xlogue_stub stub);
445 
446   /* Returns the amount of stack space (including padding) that the stub
447      needs to store registers based upon data in the machine_function.  */
448   HOST_WIDE_INT get_stack_space_used () const
449   {
450     const struct machine_function *m = cfun->machine;
451     unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1;
452 
453     gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS);
454     return m_regs[last_reg].offset + STUB_INDEX_OFFSET;
455   }
456 
457   /* Returns the offset for the base pointer used by the stub.  */
458   HOST_WIDE_INT get_stub_ptr_offset () const
459   {
460     return STUB_INDEX_OFFSET + m_stack_align_off_in;
461   }
462 
463   static const struct xlogue_layout &get_instance ();
464   static unsigned count_stub_managed_regs ();
465   static bool is_stub_managed_reg (unsigned regno, unsigned count);
466 
467   static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70;
468   static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS;
469   static const unsigned MAX_REGS = 18;
470   static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS;
471   static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1;
472   static const unsigned STUB_NAME_MAX_LEN = 20;
473   static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT];
474   static const unsigned REG_ORDER[MAX_REGS];
475   static const unsigned REG_ORDER_REALIGN[MAX_REGS];
476 
477 private:
478   xlogue_layout ();
479   xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp);
480   xlogue_layout (const xlogue_layout &);
481 
482   /* True if hard frame pointer is used.  */
483   bool m_hfp;
484 
485   /* Max number of register this layout manages.  */
486   unsigned m_nregs;
487 
488   /* Incoming offset from 16-byte alignment.  */
489   HOST_WIDE_INT m_stack_align_off_in;
490 
491   /* Register order and offsets.  */
492   struct reginfo m_regs[MAX_REGS];
493 
494   /* Lazy-inited cache of symbol names for stubs.  */
495   static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
496 			  [STUB_NAME_MAX_LEN];
497 
498   static const xlogue_layout s_instances[XLOGUE_SET_COUNT];
499 };
500 
501 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
502   "savms64",
503   "resms64",
504   "resms64x",
505   "savms64f",
506   "resms64f",
507   "resms64fx"
508 };
509 
510 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
511 /* The below offset values are where each register is stored for the layout
512    relative to incoming stack pointer.  The value of each m_regs[].offset will
513    be relative to the incoming base pointer (rax or rsi) used by the stub.
514 
515     s_instances:   0		1		2		3
516     Offset:					realigned or	aligned + 8
517     Register	   aligned	aligned + 8	aligned w/HFP	w/HFP	*/
518     XMM15_REG,	/* 0x10		0x18		0x10		0x18	*/
519     XMM14_REG,	/* 0x20		0x28		0x20		0x28	*/
520     XMM13_REG,	/* 0x30		0x38		0x30		0x38	*/
521     XMM12_REG,	/* 0x40		0x48		0x40		0x48	*/
522     XMM11_REG,	/* 0x50		0x58		0x50		0x58	*/
523     XMM10_REG,	/* 0x60		0x68		0x60		0x68	*/
524     XMM9_REG,	/* 0x70		0x78		0x70		0x78	*/
525     XMM8_REG,	/* 0x80		0x88		0x80		0x88	*/
526     XMM7_REG,	/* 0x90		0x98		0x90		0x98	*/
527     XMM6_REG,	/* 0xa0		0xa8		0xa0		0xa8	*/
528     SI_REG,	/* 0xa8		0xb0		0xa8		0xb0	*/
529     DI_REG,	/* 0xb0		0xb8		0xb0		0xb8	*/
530     BX_REG,	/* 0xb8		0xc0		0xb8		0xc0	*/
531     BP_REG,	/* 0xc0		0xc8		N/A		N/A	*/
532     R12_REG,	/* 0xc8		0xd0		0xc0		0xc8	*/
533     R13_REG,	/* 0xd0		0xd8		0xc8		0xd0	*/
534     R14_REG,	/* 0xd8		0xe0		0xd0		0xd8	*/
535     R15_REG,	/* 0xe0		0xe8		0xd8		0xe0	*/
536 };
537 
538 /* Instantiate static const values.  */
539 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
540 const unsigned xlogue_layout::MIN_REGS;
541 const unsigned xlogue_layout::MAX_REGS;
542 const unsigned xlogue_layout::MAX_EXTRA_REGS;
543 const unsigned xlogue_layout::VARIANT_COUNT;
544 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
545 
546 /* Initialize xlogue_layout::s_stub_names to zero.  */
547 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
548 				[STUB_NAME_MAX_LEN];
549 
550 /* Instantiates all xlogue_layout instances.  */
551 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
552   xlogue_layout (0, false),
553   xlogue_layout (8, false),
554   xlogue_layout (0, true),
555   xlogue_layout (8, true)
556 };
557 
558 /* Return an appropriate const instance of xlogue_layout based upon values
559    in cfun->machine and crtl.  */
560 const struct xlogue_layout &
561 xlogue_layout::get_instance ()
562 {
563   enum xlogue_stub_sets stub_set;
564   bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
565 
566   if (stack_realign_fp)
567     stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
568   else if (frame_pointer_needed)
569     stub_set = aligned_plus_8
570 	      ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
571 	      : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
572   else
573     stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
574 
575   return s_instances[stub_set];
576 }
577 
578 /* Determine how many clobbered registers can be saved by the stub.
579    Returns the count of registers the stub will save and restore.  */
580 unsigned
581 xlogue_layout::count_stub_managed_regs ()
582 {
583   bool hfp = frame_pointer_needed || stack_realign_fp;
584   unsigned i, count;
585   unsigned regno;
586 
587   for (count = i = MIN_REGS; i < MAX_REGS; ++i)
588     {
589       regno = REG_ORDER[i];
590       if (regno == BP_REG && hfp)
591 	continue;
592       if (!ix86_save_reg (regno, false, false))
593 	break;
594       ++count;
595     }
596   return count;
597 }
598 
599 /* Determine if register REGNO is a stub managed register given the
600    total COUNT of stub managed registers.  */
601 bool
602 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
603 {
604   bool hfp = frame_pointer_needed || stack_realign_fp;
605   unsigned i;
606 
607   for (i = 0; i < count; ++i)
608     {
609       gcc_assert (i < MAX_REGS);
610       if (REG_ORDER[i] == BP_REG && hfp)
611 	++count;
612       else if (REG_ORDER[i] == regno)
613 	return true;
614     }
615   return false;
616 }
617 
618 /* Constructor for xlogue_layout.  */
619 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
620   : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
621     m_stack_align_off_in (stack_align_off_in)
622 {
623   HOST_WIDE_INT offset = stack_align_off_in;
624   unsigned i, j;
625 
626   for (i = j = 0; i < MAX_REGS; ++i)
627     {
628       unsigned regno = REG_ORDER[i];
629 
630       if (regno == BP_REG && hfp)
631 	continue;
632       if (SSE_REGNO_P (regno))
633 	{
634 	  offset += 16;
635 	  /* Verify that SSE regs are always aligned.  */
636 	  gcc_assert (!((stack_align_off_in + offset) & 15));
637 	}
638       else
639 	offset += 8;
640 
641       m_regs[j].regno    = regno;
642       m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
643     }
644   gcc_assert (j == m_nregs);
645 }
646 
647 const char *
648 xlogue_layout::get_stub_name (enum xlogue_stub stub,
649 			      unsigned n_extra_regs)
650 {
651   const int have_avx = TARGET_AVX;
652   char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
653 
654   /* Lazy init */
655   if (!*name)
656     {
657       int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
658 			  (have_avx ? "avx" : "sse"),
659 			  STUB_BASE_NAMES[stub],
660 			  MIN_REGS + n_extra_regs);
661       gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
662     }
663 
664   return name;
665 }
666 
667 /* Return rtx of a symbol ref for the entry point (based upon
668    cfun->machine->call_ms2sysv_extra_regs) of the specified stub.  */
669 rtx
670 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
671 {
672   const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
673   gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
674   gcc_assert (stub < XLOGUE_STUB_COUNT);
675   gcc_assert (crtl->stack_realign_finalized);
676 
677   return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
678 }
679 
680 /* Define the structure for the machine field in struct function.  */
681 
682 struct GTY(()) stack_local_entry {
683   unsigned short mode;
684   unsigned short n;
685   rtx rtl;
686   struct stack_local_entry *next;
687 };
688 
689 /* Which cpu are we scheduling for.  */
690 enum attr_cpu ix86_schedule;
691 
692 /* Which cpu are we optimizing for.  */
693 enum processor_type ix86_tune;
694 
695 /* Which instruction set architecture to use.  */
696 enum processor_type ix86_arch;
697 
698 /* True if processor has SSE prefetch instruction.  */
699 unsigned char x86_prefetch_sse;
700 
701 /* -mstackrealign option */
702 static const char ix86_force_align_arg_pointer_string[]
703   = "force_align_arg_pointer";
704 
705 static rtx (*ix86_gen_leave) (void);
706 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
707 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
708 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
709 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
710 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
711 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
712 static rtx (*ix86_gen_clzero) (rtx);
713 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
714 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
715 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
716 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
717 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
718 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
719 
720 /* Preferred alignment for stack boundary in bits.  */
721 unsigned int ix86_preferred_stack_boundary;
722 
723 /* Alignment for incoming stack boundary in bits specified at
724    command line.  */
725 static unsigned int ix86_user_incoming_stack_boundary;
726 
727 /* Default alignment for incoming stack boundary in bits.  */
728 static unsigned int ix86_default_incoming_stack_boundary;
729 
730 /* Alignment for incoming stack boundary in bits.  */
731 unsigned int ix86_incoming_stack_boundary;
732 
733 /* Calling abi specific va_list type nodes.  */
734 static GTY(()) tree sysv_va_list_type_node;
735 static GTY(()) tree ms_va_list_type_node;
736 
737 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
738 char internal_label_prefix[16];
739 int internal_label_prefix_len;
740 
741 /* Fence to use after loop using movnt.  */
742 tree x86_mfence;
743 
744 /* Register class used for passing given 64bit part of the argument.
745    These represent classes as documented by the PS ABI, with the exception
746    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
747    use SF or DFmode move instead of DImode to avoid reformatting penalties.
748 
749    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
750    whenever possible (upper half does contain padding).  */
751 enum x86_64_reg_class
752   {
753     X86_64_NO_CLASS,
754     X86_64_INTEGER_CLASS,
755     X86_64_INTEGERSI_CLASS,
756     X86_64_SSE_CLASS,
757     X86_64_SSESF_CLASS,
758     X86_64_SSEDF_CLASS,
759     X86_64_SSEUP_CLASS,
760     X86_64_X87_CLASS,
761     X86_64_X87UP_CLASS,
762     X86_64_COMPLEX_X87_CLASS,
763     X86_64_MEMORY_CLASS
764   };
765 
766 #define MAX_CLASSES 8
767 
768 /* Table of constants used by fldpi, fldln2, etc....  */
769 static REAL_VALUE_TYPE ext_80387_constants_table [5];
770 static bool ext_80387_constants_init;
771 
772 
773 static struct machine_function * ix86_init_machine_status (void);
774 static rtx ix86_function_value (const_tree, const_tree, bool);
775 static bool ix86_function_value_regno_p (const unsigned int);
776 static unsigned int ix86_function_arg_boundary (machine_mode,
777 						const_tree);
778 static rtx ix86_static_chain (const_tree, bool);
779 static int ix86_function_regparm (const_tree, const_tree);
780 static void ix86_compute_frame_layout (void);
781 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
782 						 rtx, rtx, int);
783 static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
784 static tree ix86_canonical_va_list_type (tree);
785 static void predict_jump (int);
786 static unsigned int split_stack_prologue_scratch_regno (void);
787 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
788 
789 enum ix86_function_specific_strings
790 {
791   IX86_FUNCTION_SPECIFIC_ARCH,
792   IX86_FUNCTION_SPECIFIC_TUNE,
793   IX86_FUNCTION_SPECIFIC_MAX
794 };
795 
796 static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int,
797 				 const char *, const char *, enum fpmath_unit,
798 				 bool);
799 static void ix86_function_specific_save (struct cl_target_option *,
800 					 struct gcc_options *opts);
801 static void ix86_function_specific_restore (struct gcc_options *opts,
802 					    struct cl_target_option *);
803 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
804 static void ix86_function_specific_print (FILE *, int,
805 					  struct cl_target_option *);
806 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
807 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
808 						 struct gcc_options *,
809 						 struct gcc_options *,
810 						 struct gcc_options *);
811 static bool ix86_can_inline_p (tree, tree);
812 static void ix86_set_current_function (tree);
813 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
814 
815 static enum calling_abi ix86_function_abi (const_tree);
816 
817 
818 #ifndef SUBTARGET32_DEFAULT_CPU
819 #define SUBTARGET32_DEFAULT_CPU "i386"
820 #endif
821 
822 /* Whether -mtune= or -march= were specified */
823 static int ix86_tune_defaulted;
824 static int ix86_arch_specified;
825 
826 /* Vectorization library interface and handlers.  */
827 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
828 
829 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
830 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
831 
832 /* Processor target table, indexed by processor number */
833 struct ptt
834 {
835   const char *const name;			/* processor name  */
836   const struct processor_costs *cost;		/* Processor costs */
837   const int align_loop;				/* Default alignments.  */
838   const int align_loop_max_skip;
839   const int align_jump;
840   const int align_jump_max_skip;
841   const int align_func;
842 };
843 
844 /* This table must be in sync with enum processor_type in i386.h.  */
845 static const struct ptt processor_target_table[PROCESSOR_max] =
846 {
847   {"generic", &generic_cost, 16, 10, 16, 10, 16},
848   {"i386", &i386_cost, 4, 3, 4, 3, 4},
849   {"i486", &i486_cost, 16, 15, 16, 15, 16},
850   {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
851   {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
852   {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
853   {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
854   {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
855   {"core2", &core_cost, 16, 10, 16, 10, 16},
856   {"nehalem", &core_cost, 16, 10, 16, 10, 16},
857   {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
858   {"haswell", &core_cost, 16, 10, 16, 10, 16},
859   {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
860   {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
861   {"knl", &slm_cost, 16, 15, 16, 7, 16},
862   {"knm", &slm_cost, 16, 15, 16, 7, 16},
863   {"skylake", &skylake_cost, 16, 10, 16, 10, 16},
864   {"skylake-avx512", &skylake_cost, 16, 10, 16, 10, 16},
865   {"cannonlake", &skylake_cost, 16, 10, 16, 10, 16},
866   {"icelake-client", &skylake_cost, 16, 10, 16, 10, 16},
867   {"icelake-server", &skylake_cost, 16, 10, 16, 10, 16},
868   {"intel", &intel_cost, 16, 15, 16, 7, 16},
869   {"geode", &geode_cost, 0, 0, 0, 0, 0},
870   {"k6", &k6_cost, 32, 7, 32, 7, 32},
871   {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
872   {"k8", &k8_cost, 16, 7, 16, 7, 16},
873   {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
874   {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
875   {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
876   {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
877   {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
878   {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
879   {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
880   {"znver1", &znver1_cost, 16, 15, 16, 15, 16}
881 };
882 
883 static unsigned int
884 rest_of_handle_insert_vzeroupper (void)
885 {
886   int i;
887 
888   /* vzeroupper instructions are inserted immediately after reload to
889      account for possible spills from 256bit or 512bit registers.  The pass
890      reuses mode switching infrastructure by re-running mode insertion
891      pass, so disable entities that have already been processed.  */
892   for (i = 0; i < MAX_386_ENTITIES; i++)
893     ix86_optimize_mode_switching[i] = 0;
894 
895   ix86_optimize_mode_switching[AVX_U128] = 1;
896 
897   /* Call optimize_mode_switching.  */
898   g->get_passes ()->execute_pass_mode_switching ();
899   return 0;
900 }
901 
902 /* Return 1 if INSN uses or defines a hard register.
903    Hard register uses in a memory address are ignored.
904    Clobbers and flags definitions are ignored.  */
905 
906 static bool
907 has_non_address_hard_reg (rtx_insn *insn)
908 {
909   df_ref ref;
910   FOR_EACH_INSN_DEF (ref, insn)
911     if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
912 	&& !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
913 	&& DF_REF_REGNO (ref) != FLAGS_REG)
914       return true;
915 
916   FOR_EACH_INSN_USE (ref, insn)
917     if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
918       return true;
919 
920   return false;
921 }
922 
923 /* Check if comparison INSN may be transformed
924    into vector comparison.  Currently we transform
925    zero checks only which look like:
926 
927    (set (reg:CCZ 17 flags)
928         (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
929                              (subreg:SI (reg:DI x) 0))
930 		     (const_int 0 [0])))  */
931 
932 static bool
933 convertible_comparison_p (rtx_insn *insn)
934 {
935   if (!TARGET_SSE4_1)
936     return false;
937 
938   rtx def_set = single_set (insn);
939 
940   gcc_assert (def_set);
941 
942   rtx src = SET_SRC (def_set);
943   rtx dst = SET_DEST (def_set);
944 
945   gcc_assert (GET_CODE (src) == COMPARE);
946 
947   if (GET_CODE (dst) != REG
948       || REGNO (dst) != FLAGS_REG
949       || GET_MODE (dst) != CCZmode)
950     return false;
951 
952   rtx op1 = XEXP (src, 0);
953   rtx op2 = XEXP (src, 1);
954 
955   if (op2 != CONST0_RTX (GET_MODE (op2)))
956     return false;
957 
958   if (GET_CODE (op1) != IOR)
959     return false;
960 
961   op2 = XEXP (op1, 1);
962   op1 = XEXP (op1, 0);
963 
964   if (!SUBREG_P (op1)
965       || !SUBREG_P (op2)
966       || GET_MODE (op1) != SImode
967       || GET_MODE (op2) != SImode
968       || ((SUBREG_BYTE (op1) != 0
969 	   || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
970 	  && (SUBREG_BYTE (op2) != 0
971 	      || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
972     return false;
973 
974   op1 = SUBREG_REG (op1);
975   op2 = SUBREG_REG (op2);
976 
977   if (op1 != op2
978       || !REG_P (op1)
979       || GET_MODE (op1) != DImode)
980     return false;
981 
982   return true;
983 }
984 
985 /* The DImode version of scalar_to_vector_candidate_p.  */
986 
987 static bool
988 dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
989 {
990   rtx def_set = single_set (insn);
991 
992   if (!def_set)
993     return false;
994 
995   if (has_non_address_hard_reg (insn))
996     return false;
997 
998   rtx src = SET_SRC (def_set);
999   rtx dst = SET_DEST (def_set);
1000 
1001   if (GET_CODE (src) == COMPARE)
1002     return convertible_comparison_p (insn);
1003 
1004   /* We are interested in DImode promotion only.  */
1005   if ((GET_MODE (src) != DImode
1006        && !CONST_INT_P (src))
1007       || GET_MODE (dst) != DImode)
1008     return false;
1009 
1010   if (!REG_P (dst) && !MEM_P (dst))
1011     return false;
1012 
1013   switch (GET_CODE (src))
1014     {
1015     case ASHIFTRT:
1016       if (!TARGET_AVX512VL)
1017 	return false;
1018       /* FALLTHRU */
1019 
1020     case ASHIFT:
1021     case LSHIFTRT:
1022       if (!REG_P (XEXP (src, 1))
1023 	  && (!SUBREG_P (XEXP (src, 1))
1024 	      || SUBREG_BYTE (XEXP (src, 1)) != 0
1025 	      || !REG_P (SUBREG_REG (XEXP (src, 1))))
1026 	  && (!CONST_INT_P (XEXP (src, 1))
1027 	      || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)))
1028 	return false;
1029 
1030       if (GET_MODE (XEXP (src, 1)) != QImode
1031 	  && !CONST_INT_P (XEXP (src, 1)))
1032 	return false;
1033       break;
1034 
1035     case PLUS:
1036     case MINUS:
1037     case IOR:
1038     case XOR:
1039     case AND:
1040       if (!REG_P (XEXP (src, 1))
1041 	  && !MEM_P (XEXP (src, 1))
1042 	  && !CONST_INT_P (XEXP (src, 1)))
1043 	return false;
1044 
1045       if (GET_MODE (XEXP (src, 1)) != DImode
1046 	  && !CONST_INT_P (XEXP (src, 1)))
1047 	return false;
1048       break;
1049 
1050     case NEG:
1051     case NOT:
1052       break;
1053 
1054     case REG:
1055       return true;
1056 
1057     case MEM:
1058     case CONST_INT:
1059       return REG_P (dst);
1060 
1061     default:
1062       return false;
1063     }
1064 
1065   if (!REG_P (XEXP (src, 0))
1066       && !MEM_P (XEXP (src, 0))
1067       && !CONST_INT_P (XEXP (src, 0))
1068       /* Check for andnot case.  */
1069       && (GET_CODE (src) != AND
1070 	  || GET_CODE (XEXP (src, 0)) != NOT
1071 	  || !REG_P (XEXP (XEXP (src, 0), 0))))
1072       return false;
1073 
1074   if (GET_MODE (XEXP (src, 0)) != DImode
1075       && !CONST_INT_P (XEXP (src, 0)))
1076     return false;
1077 
1078   return true;
1079 }
1080 
1081 /* The TImode version of scalar_to_vector_candidate_p.  */
1082 
1083 static bool
1084 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1085 {
1086   rtx def_set = single_set (insn);
1087 
1088   if (!def_set)
1089     return false;
1090 
1091   if (has_non_address_hard_reg (insn))
1092     return false;
1093 
1094   rtx src = SET_SRC (def_set);
1095   rtx dst = SET_DEST (def_set);
1096 
1097   /* Only TImode load and store are allowed.  */
1098   if (GET_MODE (dst) != TImode)
1099     return false;
1100 
1101   if (MEM_P (dst))
1102     {
1103       /* Check for store.  Memory must be aligned or unaligned store
1104 	 is optimal.  Only support store from register, standard SSE
1105 	 constant or CONST_WIDE_INT generated from piecewise store.
1106 
1107 	 ??? Verify performance impact before enabling CONST_INT for
1108 	 __int128 store.  */
1109       if (misaligned_operand (dst, TImode)
1110 	  && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1111 	return false;
1112 
1113       switch (GET_CODE (src))
1114 	{
1115 	default:
1116 	  return false;
1117 
1118 	case REG:
1119 	case CONST_WIDE_INT:
1120 	  return true;
1121 
1122 	case CONST_INT:
1123 	  return standard_sse_constant_p (src, TImode);
1124 	}
1125     }
1126   else if (MEM_P (src))
1127     {
1128       /* Check for load.  Memory must be aligned or unaligned load is
1129 	 optimal.  */
1130       return (REG_P (dst)
1131 	      && (!misaligned_operand (src, TImode)
1132 		  || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1133     }
1134 
1135   return false;
1136 }
1137 
1138 /* Return 1 if INSN may be converted into vector
1139    instruction.  */
1140 
1141 static bool
1142 scalar_to_vector_candidate_p (rtx_insn *insn)
1143 {
1144   if (TARGET_64BIT)
1145     return timode_scalar_to_vector_candidate_p (insn);
1146   else
1147     return dimode_scalar_to_vector_candidate_p (insn);
1148 }
1149 
1150 /* The DImode version of remove_non_convertible_regs.  */
1151 
1152 static void
1153 dimode_remove_non_convertible_regs (bitmap candidates)
1154 {
1155   bitmap_iterator bi;
1156   unsigned id;
1157   bitmap regs = BITMAP_ALLOC (NULL);
1158 
1159   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1160     {
1161       rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1162       rtx reg = SET_DEST (def_set);
1163 
1164       if (!REG_P (reg)
1165 	  || bitmap_bit_p (regs, REGNO (reg))
1166 	  || HARD_REGISTER_P (reg))
1167 	continue;
1168 
1169       for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
1170 	   def;
1171 	   def = DF_REF_NEXT_REG (def))
1172 	{
1173 	  if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1174 	    {
1175 	      if (dump_file)
1176 		fprintf (dump_file,
1177 			 "r%d has non convertible definition in insn %d\n",
1178 			 REGNO (reg), DF_REF_INSN_UID (def));
1179 
1180 	      bitmap_set_bit (regs, REGNO (reg));
1181 	      break;
1182 	    }
1183 	}
1184     }
1185 
1186   EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1187     {
1188       for (df_ref def = DF_REG_DEF_CHAIN (id);
1189 	   def;
1190 	   def = DF_REF_NEXT_REG (def))
1191 	if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1192 	  {
1193 	    if (dump_file)
1194 	      fprintf (dump_file, "Removing insn %d from candidates list\n",
1195 		       DF_REF_INSN_UID (def));
1196 
1197 	    bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1198 	  }
1199     }
1200 
1201   BITMAP_FREE (regs);
1202 }
1203 
1204 /* For a register REGNO, scan instructions for its defs and uses.
1205    Put REGNO in REGS if a def or use isn't in CANDIDATES.  */
1206 
1207 static void
1208 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1209 				   unsigned int regno)
1210 {
1211   for (df_ref def = DF_REG_DEF_CHAIN (regno);
1212        def;
1213        def = DF_REF_NEXT_REG (def))
1214     {
1215       if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1216 	{
1217 	  if (dump_file)
1218 	    fprintf (dump_file,
1219 		     "r%d has non convertible def in insn %d\n",
1220 		     regno, DF_REF_INSN_UID (def));
1221 
1222 	  bitmap_set_bit (regs, regno);
1223 	  break;
1224 	}
1225     }
1226 
1227   for (df_ref ref = DF_REG_USE_CHAIN (regno);
1228        ref;
1229        ref = DF_REF_NEXT_REG (ref))
1230     {
1231       /* Debug instructions are skipped.  */
1232       if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1233 	  && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1234 	{
1235 	  if (dump_file)
1236 	    fprintf (dump_file,
1237 		     "r%d has non convertible use in insn %d\n",
1238 		     regno, DF_REF_INSN_UID (ref));
1239 
1240 	  bitmap_set_bit (regs, regno);
1241 	  break;
1242 	}
1243     }
1244 }
1245 
1246 /* The TImode version of remove_non_convertible_regs.  */
1247 
1248 static void
1249 timode_remove_non_convertible_regs (bitmap candidates)
1250 {
1251   bitmap_iterator bi;
1252   unsigned id;
1253   bitmap regs = BITMAP_ALLOC (NULL);
1254 
1255   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1256     {
1257       rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1258       rtx dest = SET_DEST (def_set);
1259       rtx src = SET_SRC (def_set);
1260 
1261       if ((!REG_P (dest)
1262 	   || bitmap_bit_p (regs, REGNO (dest))
1263 	   || HARD_REGISTER_P (dest))
1264 	  && (!REG_P (src)
1265 	      || bitmap_bit_p (regs, REGNO (src))
1266 	      || HARD_REGISTER_P (src)))
1267 	continue;
1268 
1269       if (REG_P (dest))
1270 	timode_check_non_convertible_regs (candidates, regs,
1271 					   REGNO (dest));
1272 
1273       if (REG_P (src))
1274 	timode_check_non_convertible_regs (candidates, regs,
1275 					   REGNO (src));
1276     }
1277 
1278   EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1279     {
1280       for (df_ref def = DF_REG_DEF_CHAIN (id);
1281 	   def;
1282 	   def = DF_REF_NEXT_REG (def))
1283 	if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1284 	  {
1285 	    if (dump_file)
1286 	      fprintf (dump_file, "Removing insn %d from candidates list\n",
1287 		       DF_REF_INSN_UID (def));
1288 
1289 	    bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1290 	  }
1291 
1292       for (df_ref ref = DF_REG_USE_CHAIN (id);
1293 	   ref;
1294 	   ref = DF_REF_NEXT_REG (ref))
1295 	if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1296 	  {
1297 	    if (dump_file)
1298 	      fprintf (dump_file, "Removing insn %d from candidates list\n",
1299 		       DF_REF_INSN_UID (ref));
1300 
1301 	    bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1302 	  }
1303     }
1304 
1305   BITMAP_FREE (regs);
1306 }
1307 
1308 /* For a given bitmap of insn UIDs scans all instruction and
1309    remove insn from CANDIDATES in case it has both convertible
1310    and not convertible definitions.
1311 
1312    All insns in a bitmap are conversion candidates according to
1313    scalar_to_vector_candidate_p.  Currently it implies all insns
1314    are single_set.  */
1315 
1316 static void
1317 remove_non_convertible_regs (bitmap candidates)
1318 {
1319   if (TARGET_64BIT)
1320     timode_remove_non_convertible_regs (candidates);
1321   else
1322     dimode_remove_non_convertible_regs (candidates);
1323 }
1324 
1325 class scalar_chain
1326 {
1327  public:
1328   scalar_chain ();
1329   virtual ~scalar_chain ();
1330 
1331   static unsigned max_id;
1332 
1333   /* ID of a chain.  */
1334   unsigned int chain_id;
1335   /* A queue of instructions to be included into a chain.  */
1336   bitmap queue;
1337   /* Instructions included into a chain.  */
1338   bitmap insns;
1339   /* All registers defined by a chain.  */
1340   bitmap defs;
1341   /* Registers used in both vector and sclar modes.  */
1342   bitmap defs_conv;
1343 
1344   void build (bitmap candidates, unsigned insn_uid);
1345   virtual int compute_convert_gain () = 0;
1346   int convert ();
1347 
1348  protected:
1349   void add_to_queue (unsigned insn_uid);
1350   void emit_conversion_insns (rtx insns, rtx_insn *pos);
1351 
1352  private:
1353   void add_insn (bitmap candidates, unsigned insn_uid);
1354   void analyze_register_chain (bitmap candidates, df_ref ref);
1355   virtual void mark_dual_mode_def (df_ref def) = 0;
1356   virtual void convert_insn (rtx_insn *insn) = 0;
1357   virtual void convert_registers () = 0;
1358 };
1359 
1360 class dimode_scalar_chain : public scalar_chain
1361 {
1362  public:
1363   int compute_convert_gain ();
1364  private:
1365   void mark_dual_mode_def (df_ref def);
1366   rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
1367   void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
1368   void convert_insn (rtx_insn *insn);
1369   void convert_op (rtx *op, rtx_insn *insn);
1370   void convert_reg (unsigned regno);
1371   void make_vector_copies (unsigned regno);
1372   void convert_registers ();
1373   int vector_const_cost (rtx exp);
1374 };
1375 
1376 class timode_scalar_chain : public scalar_chain
1377 {
1378  public:
1379   /* Convert from TImode to V1TImode is always faster.  */
1380   int compute_convert_gain () { return 1; }
1381 
1382  private:
1383   void mark_dual_mode_def (df_ref def);
1384   void fix_debug_reg_uses (rtx reg);
1385   void convert_insn (rtx_insn *insn);
1386   /* We don't convert registers to difference size.  */
1387   void convert_registers () {}
1388 };
1389 
1390 unsigned scalar_chain::max_id = 0;
1391 
1392 /* Initialize new chain.  */
1393 
1394 scalar_chain::scalar_chain ()
1395 {
1396   chain_id = ++max_id;
1397 
1398    if (dump_file)
1399     fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
1400 
1401   bitmap_obstack_initialize (NULL);
1402   insns = BITMAP_ALLOC (NULL);
1403   defs = BITMAP_ALLOC (NULL);
1404   defs_conv = BITMAP_ALLOC (NULL);
1405   queue = NULL;
1406 }
1407 
1408 /* Free chain's data.  */
1409 
1410 scalar_chain::~scalar_chain ()
1411 {
1412   BITMAP_FREE (insns);
1413   BITMAP_FREE (defs);
1414   BITMAP_FREE (defs_conv);
1415   bitmap_obstack_release (NULL);
1416 }
1417 
1418 /* Add instruction into chains' queue.  */
1419 
1420 void
1421 scalar_chain::add_to_queue (unsigned insn_uid)
1422 {
1423   if (bitmap_bit_p (insns, insn_uid)
1424       || bitmap_bit_p (queue, insn_uid))
1425     return;
1426 
1427   if (dump_file)
1428     fprintf (dump_file, "  Adding insn %d into chain's #%d queue\n",
1429 	     insn_uid, chain_id);
1430   bitmap_set_bit (queue, insn_uid);
1431 }
1432 
1433 /* For DImode conversion, mark register defined by DEF as requiring
1434    conversion.  */
1435 
1436 void
1437 dimode_scalar_chain::mark_dual_mode_def (df_ref def)
1438 {
1439   gcc_assert (DF_REF_REG_DEF_P (def));
1440 
1441   if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
1442     return;
1443 
1444   if (dump_file)
1445     fprintf (dump_file,
1446 	     "  Mark r%d def in insn %d as requiring both modes in chain #%d\n",
1447 	     DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
1448 
1449   bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
1450 }
1451 
1452 /* For TImode conversion, it is unused.  */
1453 
1454 void
1455 timode_scalar_chain::mark_dual_mode_def (df_ref)
1456 {
1457   gcc_unreachable ();
1458 }
1459 
1460 /* Check REF's chain to add new insns into a queue
1461    and find registers requiring conversion.  */
1462 
1463 void
1464 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
1465 {
1466   df_link *chain;
1467 
1468   gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
1469 	      || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
1470   add_to_queue (DF_REF_INSN_UID (ref));
1471 
1472   for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
1473     {
1474       unsigned uid = DF_REF_INSN_UID (chain->ref);
1475 
1476       if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
1477 	continue;
1478 
1479       if (!DF_REF_REG_MEM_P (chain->ref))
1480 	{
1481 	  if (bitmap_bit_p (insns, uid))
1482 	    continue;
1483 
1484 	  if (bitmap_bit_p (candidates, uid))
1485 	    {
1486 	      add_to_queue (uid);
1487 	      continue;
1488 	    }
1489 	}
1490 
1491       if (DF_REF_REG_DEF_P (chain->ref))
1492 	{
1493 	  if (dump_file)
1494 	    fprintf (dump_file, "  r%d def in insn %d isn't convertible\n",
1495 		     DF_REF_REGNO (chain->ref), uid);
1496 	  mark_dual_mode_def (chain->ref);
1497 	}
1498       else
1499 	{
1500 	  if (dump_file)
1501 	    fprintf (dump_file, "  r%d use in insn %d isn't convertible\n",
1502 		     DF_REF_REGNO (chain->ref), uid);
1503 	  mark_dual_mode_def (ref);
1504 	}
1505     }
1506 }
1507 
1508 /* Add instruction into a chain.  */
1509 
1510 void
1511 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
1512 {
1513   if (bitmap_bit_p (insns, insn_uid))
1514     return;
1515 
1516   if (dump_file)
1517     fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, chain_id);
1518 
1519   bitmap_set_bit (insns, insn_uid);
1520 
1521   rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1522   rtx def_set = single_set (insn);
1523   if (def_set && REG_P (SET_DEST (def_set))
1524       && !HARD_REGISTER_P (SET_DEST (def_set)))
1525     bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
1526 
1527   df_ref ref;
1528   df_ref def;
1529   for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1530     if (!HARD_REGISTER_P (DF_REF_REG (ref)))
1531       for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
1532 	   def;
1533 	   def = DF_REF_NEXT_REG (def))
1534 	analyze_register_chain (candidates, def);
1535   for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
1536     if (!DF_REF_REG_MEM_P (ref))
1537       analyze_register_chain (candidates, ref);
1538 }
1539 
1540 /* Build new chain starting from insn INSN_UID recursively
1541    adding all dependent uses and definitions.  */
1542 
1543 void
1544 scalar_chain::build (bitmap candidates, unsigned insn_uid)
1545 {
1546   queue = BITMAP_ALLOC (NULL);
1547   bitmap_set_bit (queue, insn_uid);
1548 
1549   if (dump_file)
1550     fprintf (dump_file, "Building chain #%d...\n", chain_id);
1551 
1552   while (!bitmap_empty_p (queue))
1553     {
1554       insn_uid = bitmap_first_set_bit (queue);
1555       bitmap_clear_bit (queue, insn_uid);
1556       bitmap_clear_bit (candidates, insn_uid);
1557       add_insn (candidates, insn_uid);
1558     }
1559 
1560   if (dump_file)
1561     {
1562       fprintf (dump_file, "Collected chain #%d...\n", chain_id);
1563       fprintf (dump_file, "  insns: ");
1564       dump_bitmap (dump_file, insns);
1565       if (!bitmap_empty_p (defs_conv))
1566 	{
1567 	  bitmap_iterator bi;
1568 	  unsigned id;
1569 	  const char *comma = "";
1570 	  fprintf (dump_file, "  defs to convert: ");
1571 	  EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1572 	    {
1573 	      fprintf (dump_file, "%sr%d", comma, id);
1574 	      comma = ", ";
1575 	    }
1576 	  fprintf (dump_file, "\n");
1577 	}
1578     }
1579 
1580   BITMAP_FREE (queue);
1581 }
1582 
1583 /* Return a cost of building a vector costant
1584    instead of using a scalar one.  */
1585 
1586 int
1587 dimode_scalar_chain::vector_const_cost (rtx exp)
1588 {
1589   gcc_assert (CONST_INT_P (exp));
1590 
1591   if (standard_sse_constant_p (exp, V2DImode))
1592     return COSTS_N_INSNS (1);
1593   return ix86_cost->sse_load[1];
1594 }
1595 
1596 /* Compute a gain for chain conversion.  */
1597 
1598 int
1599 dimode_scalar_chain::compute_convert_gain ()
1600 {
1601   bitmap_iterator bi;
1602   unsigned insn_uid;
1603   int gain = 0;
1604   int cost = 0;
1605 
1606   if (dump_file)
1607     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
1608 
1609   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
1610     {
1611       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
1612       rtx def_set = single_set (insn);
1613       rtx src = SET_SRC (def_set);
1614       rtx dst = SET_DEST (def_set);
1615 
1616       if (REG_P (src) && REG_P (dst))
1617 	gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move;
1618       else if (REG_P (src) && MEM_P (dst))
1619 	gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1620       else if (MEM_P (src) && REG_P (dst))
1621 	gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
1622       else if (GET_CODE (src) == ASHIFT
1623 	       || GET_CODE (src) == ASHIFTRT
1624 	       || GET_CODE (src) == LSHIFTRT)
1625 	{
1626     	  if (CONST_INT_P (XEXP (src, 0)))
1627 	    gain -= vector_const_cost (XEXP (src, 0));
1628 	  if (CONST_INT_P (XEXP (src, 1)))
1629 	    {
1630 	      gain += ix86_cost->shift_const;
1631 	      if (INTVAL (XEXP (src, 1)) >= 32)
1632 		gain -= COSTS_N_INSNS (1);
1633 	    }
1634 	  else
1635 	    /* Additional gain for omitting two CMOVs.  */
1636 	    gain += ix86_cost->shift_var + COSTS_N_INSNS (2);
1637 	}
1638       else if (GET_CODE (src) == PLUS
1639 	       || GET_CODE (src) == MINUS
1640 	       || GET_CODE (src) == IOR
1641 	       || GET_CODE (src) == XOR
1642 	       || GET_CODE (src) == AND)
1643 	{
1644 	  gain += ix86_cost->add;
1645 	  /* Additional gain for andnot for targets without BMI.  */
1646 	  if (GET_CODE (XEXP (src, 0)) == NOT
1647 	      && !TARGET_BMI)
1648 	    gain += 2 * ix86_cost->add;
1649 
1650 	  if (CONST_INT_P (XEXP (src, 0)))
1651 	    gain -= vector_const_cost (XEXP (src, 0));
1652 	  if (CONST_INT_P (XEXP (src, 1)))
1653 	    gain -= vector_const_cost (XEXP (src, 1));
1654 	}
1655       else if (GET_CODE (src) == NEG
1656 	       || GET_CODE (src) == NOT)
1657 	gain += ix86_cost->add - COSTS_N_INSNS (1);
1658       else if (GET_CODE (src) == COMPARE)
1659 	{
1660 	  /* Assume comparison cost is the same.  */
1661 	}
1662       else if (CONST_INT_P (src))
1663 	{
1664 	  if (REG_P (dst))
1665 	    gain += COSTS_N_INSNS (2);
1666 	  else if (MEM_P (dst))
1667 	    gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
1668 	  gain -= vector_const_cost (src);
1669 	}
1670       else
1671 	gcc_unreachable ();
1672     }
1673 
1674   if (dump_file)
1675     fprintf (dump_file, "  Instruction conversion gain: %d\n", gain);
1676 
1677   EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
1678     cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
1679 
1680   if (dump_file)
1681     fprintf (dump_file, "  Registers conversion cost: %d\n", cost);
1682 
1683   gain -= cost;
1684 
1685   if (dump_file)
1686     fprintf (dump_file, "  Total gain: %d\n", gain);
1687 
1688   return gain;
1689 }
1690 
1691 /* Replace REG in X with a V2DI subreg of NEW_REG.  */
1692 
1693 rtx
1694 dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
1695 {
1696   if (x == reg)
1697     return gen_rtx_SUBREG (V2DImode, new_reg, 0);
1698 
1699   const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
1700   int i, j;
1701   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
1702     {
1703       if (fmt[i] == 'e')
1704 	XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
1705       else if (fmt[i] == 'E')
1706 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1707 	  XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
1708 						   reg, new_reg);
1709     }
1710 
1711   return x;
1712 }
1713 
1714 /* Replace REG in INSN with a V2DI subreg of NEW_REG.  */
1715 
1716 void
1717 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
1718 						  rtx reg, rtx new_reg)
1719 {
1720   replace_with_subreg (single_set (insn), reg, new_reg);
1721 }
1722 
1723 /* Insert generated conversion instruction sequence INSNS
1724    after instruction AFTER.  New BB may be required in case
1725    instruction has EH region attached.  */
1726 
1727 void
1728 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
1729 {
1730   if (!control_flow_insn_p (after))
1731     {
1732       emit_insn_after (insns, after);
1733       return;
1734     }
1735 
1736   basic_block bb = BLOCK_FOR_INSN (after);
1737   edge e = find_fallthru_edge (bb->succs);
1738   gcc_assert (e);
1739 
1740   basic_block new_bb = split_edge (e);
1741   emit_insn_after (insns, BB_HEAD (new_bb));
1742 }
1743 
1744 /* Make vector copies for all register REGNO definitions
1745    and replace its uses in a chain.  */
1746 
1747 void
1748 dimode_scalar_chain::make_vector_copies (unsigned regno)
1749 {
1750   rtx reg = regno_reg_rtx[regno];
1751   rtx vreg = gen_reg_rtx (DImode);
1752   bool count_reg = false;
1753   df_ref ref;
1754 
1755   for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1756     if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1757       {
1758 	df_ref use;
1759 
1760 	/* Detect the count register of a shift instruction.  */
1761 	for (use = DF_REG_USE_CHAIN (regno); use; use = DF_REF_NEXT_REG (use))
1762 	  if (bitmap_bit_p (insns, DF_REF_INSN_UID (use)))
1763 	    {
1764 	      rtx_insn *insn = DF_REF_INSN (use);
1765 	      rtx def_set = single_set (insn);
1766 
1767 	      gcc_assert (def_set);
1768 
1769 	      rtx src = SET_SRC (def_set);
1770 
1771 	      if ((GET_CODE (src) == ASHIFT
1772 		   || GET_CODE (src) == ASHIFTRT
1773 		   || GET_CODE (src) == LSHIFTRT)
1774 		  && !CONST_INT_P (XEXP (src, 1))
1775 		  && reg_or_subregno (XEXP (src, 1)) == regno)
1776 		count_reg = true;
1777 	    }
1778 
1779 	start_sequence ();
1780 	if (count_reg)
1781 	  {
1782 	    rtx qreg = gen_lowpart (QImode, reg);
1783 	    rtx tmp = gen_reg_rtx (SImode);
1784 
1785 	    if (TARGET_ZERO_EXTEND_WITH_AND
1786 		&& optimize_function_for_speed_p (cfun))
1787 	      {
1788 		emit_move_insn (tmp, const0_rtx);
1789 		emit_insn (gen_movstrictqi
1790 			   (gen_lowpart (QImode, tmp), qreg));
1791 	      }
1792 	    else
1793 	      emit_insn (gen_rtx_SET
1794 			 (tmp, gen_rtx_ZERO_EXTEND (SImode, qreg)));
1795 
1796 	    if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1797 	      {
1798 		rtx slot = assign_386_stack_local (SImode, SLOT_STV_TEMP);
1799 		emit_move_insn (slot, tmp);
1800 		tmp = copy_rtx (slot);
1801 	      }
1802 
1803 	    emit_insn (gen_zero_extendsidi2 (vreg, tmp));
1804 	  }
1805 	else if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
1806 	  {
1807 	    rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1808 	    emit_move_insn (adjust_address (tmp, SImode, 0),
1809 			    gen_rtx_SUBREG (SImode, reg, 0));
1810 	    emit_move_insn (adjust_address (tmp, SImode, 4),
1811 			    gen_rtx_SUBREG (SImode, reg, 4));
1812 	    emit_move_insn (vreg, tmp);
1813 	  }
1814 	else if (TARGET_SSE4_1)
1815 	  {
1816 	    emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1817 					CONST0_RTX (V4SImode),
1818 					gen_rtx_SUBREG (SImode, reg, 0)));
1819 	    emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
1820 					  gen_rtx_SUBREG (V4SImode, vreg, 0),
1821 					  gen_rtx_SUBREG (SImode, reg, 4),
1822 					  GEN_INT (2)));
1823 	  }
1824 	else
1825 	  {
1826 	    rtx tmp = gen_reg_rtx (DImode);
1827 	    emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
1828 					CONST0_RTX (V4SImode),
1829 					gen_rtx_SUBREG (SImode, reg, 0)));
1830 	    emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
1831 					CONST0_RTX (V4SImode),
1832 					gen_rtx_SUBREG (SImode, reg, 4)));
1833 	    emit_insn (gen_vec_interleave_lowv4si
1834 		       (gen_rtx_SUBREG (V4SImode, vreg, 0),
1835 			gen_rtx_SUBREG (V4SImode, vreg, 0),
1836 			gen_rtx_SUBREG (V4SImode, tmp, 0)));
1837 	  }
1838 	rtx_insn *seq = get_insns ();
1839 	end_sequence ();
1840 	rtx_insn *insn = DF_REF_INSN (ref);
1841 	emit_conversion_insns (seq, insn);
1842 
1843 	if (dump_file)
1844 	  fprintf (dump_file,
1845 		   "  Copied r%d to a vector register r%d for insn %d\n",
1846 		   regno, REGNO (vreg), INSN_UID (insn));
1847       }
1848 
1849   for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1850     if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1851       {
1852 	rtx_insn *insn = DF_REF_INSN (ref);
1853 	if (count_reg)
1854 	  {
1855 	    rtx def_set = single_set (insn);
1856 	    gcc_assert (def_set);
1857 
1858 	    rtx src = SET_SRC (def_set);
1859 
1860 	    if ((GET_CODE (src) == ASHIFT
1861 		 || GET_CODE (src) == ASHIFTRT
1862 		 || GET_CODE (src) == LSHIFTRT)
1863 		&& !CONST_INT_P (XEXP (src, 1))
1864 		&& reg_or_subregno (XEXP (src, 1)) == regno)
1865 	      XEXP (src, 1) = vreg;
1866 	  }
1867 	else
1868 	  replace_with_subreg_in_insn (insn, reg, vreg);
1869 
1870 	if (dump_file)
1871 	  fprintf (dump_file, "  Replaced r%d with r%d in insn %d\n",
1872 		   regno, REGNO (vreg), INSN_UID (insn));
1873       }
1874 }
1875 
1876 /* Convert all definitions of register REGNO
1877    and fix its uses.  Scalar copies may be created
1878    in case register is used in not convertible insn.  */
1879 
1880 void
1881 dimode_scalar_chain::convert_reg (unsigned regno)
1882 {
1883   bool scalar_copy = bitmap_bit_p (defs_conv, regno);
1884   rtx reg = regno_reg_rtx[regno];
1885   rtx scopy = NULL_RTX;
1886   df_ref ref;
1887   bitmap conv;
1888 
1889   conv = BITMAP_ALLOC (NULL);
1890   bitmap_copy (conv, insns);
1891 
1892   if (scalar_copy)
1893     scopy = gen_reg_rtx (DImode);
1894 
1895   for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1896     {
1897       rtx_insn *insn = DF_REF_INSN (ref);
1898       rtx def_set = single_set (insn);
1899       rtx src = SET_SRC (def_set);
1900       rtx reg = DF_REF_REG (ref);
1901 
1902       if (!MEM_P (src))
1903 	{
1904 	  replace_with_subreg_in_insn (insn, reg, reg);
1905 	  bitmap_clear_bit (conv, INSN_UID (insn));
1906 	}
1907 
1908       if (scalar_copy)
1909 	{
1910 	  start_sequence ();
1911 	  if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
1912 	    {
1913 	      rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP);
1914 	      emit_move_insn (tmp, reg);
1915 	      emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1916 			      adjust_address (tmp, SImode, 0));
1917 	      emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1918 			      adjust_address (tmp, SImode, 4));
1919 	    }
1920 	  else if (TARGET_SSE4_1)
1921 	    {
1922 	      rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
1923 	      emit_insn
1924 		(gen_rtx_SET
1925 		 (gen_rtx_SUBREG (SImode, scopy, 0),
1926 		  gen_rtx_VEC_SELECT (SImode,
1927 				      gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1928 
1929 	      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
1930 	      emit_insn
1931 		(gen_rtx_SET
1932 		 (gen_rtx_SUBREG (SImode, scopy, 4),
1933 		  gen_rtx_VEC_SELECT (SImode,
1934 				      gen_rtx_SUBREG (V4SImode, reg, 0), tmp)));
1935 	    }
1936 	  else
1937 	    {
1938 	      rtx vcopy = gen_reg_rtx (V2DImode);
1939 	      emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
1940 	      emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
1941 			      gen_rtx_SUBREG (SImode, vcopy, 0));
1942 	      emit_move_insn (vcopy,
1943 			      gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
1944 	      emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
1945 			      gen_rtx_SUBREG (SImode, vcopy, 0));
1946 	    }
1947 	  rtx_insn *seq = get_insns ();
1948 	  end_sequence ();
1949 	  emit_conversion_insns (seq, insn);
1950 
1951 	  if (dump_file)
1952 	    fprintf (dump_file,
1953 		     "  Copied r%d to a scalar register r%d for insn %d\n",
1954 		     regno, REGNO (scopy), INSN_UID (insn));
1955 	}
1956     }
1957 
1958   for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
1959     if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
1960       {
1961 	if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
1962 	  {
1963 	    rtx_insn *insn = DF_REF_INSN (ref);
1964 
1965 	    rtx def_set = single_set (insn);
1966 	    gcc_assert (def_set);
1967 
1968 	    rtx src = SET_SRC (def_set);
1969 	    rtx dst = SET_DEST (def_set);
1970 
1971 	    if ((GET_CODE (src) == ASHIFT
1972 		 || GET_CODE (src) == ASHIFTRT
1973 		 || GET_CODE (src) == LSHIFTRT)
1974 		&& !CONST_INT_P (XEXP (src, 1))
1975 		&& reg_or_subregno (XEXP (src, 1)) == regno)
1976 	      {
1977 		rtx tmp2 = gen_reg_rtx (V2DImode);
1978 
1979 		start_sequence ();
1980 
1981 		if (TARGET_SSE4_1)
1982 		  emit_insn (gen_sse4_1_zero_extendv2qiv2di2
1983 			     (tmp2, gen_rtx_SUBREG (V16QImode, reg, 0)));
1984 		else
1985 		  {
1986 		    rtx vec_cst
1987 		      = gen_rtx_CONST_VECTOR (V2DImode,
1988 					      gen_rtvec (2, GEN_INT (0xff),
1989 							 const0_rtx));
1990 		    vec_cst
1991 		      = validize_mem (force_const_mem (V2DImode, vec_cst));
1992 
1993 		    emit_insn (gen_rtx_SET
1994 			       (tmp2,
1995 				gen_rtx_AND (V2DImode,
1996 					     gen_rtx_SUBREG (V2DImode, reg, 0),
1997 					     vec_cst)));
1998 		  }
1999 		rtx_insn *seq = get_insns ();
2000 		end_sequence ();
2001 
2002 		emit_insn_before (seq, insn);
2003 
2004 		XEXP (src, 1) = gen_rtx_SUBREG (DImode, tmp2, 0);
2005 	      }
2006 	    else if (!MEM_P (dst) || !REG_P (src))
2007 	      replace_with_subreg_in_insn (insn, reg, reg);
2008 
2009 	    bitmap_clear_bit (conv, INSN_UID (insn));
2010 	  }
2011       }
2012     /* Skip debug insns and uninitialized uses.  */
2013     else if (DF_REF_CHAIN (ref)
2014 	     && NONDEBUG_INSN_P (DF_REF_INSN (ref)))
2015       {
2016 	gcc_assert (scopy);
2017 	replace_rtx (DF_REF_INSN (ref), reg, scopy);
2018 	df_insn_rescan (DF_REF_INSN (ref));
2019       }
2020 
2021   BITMAP_FREE (conv);
2022 }
2023 
2024 /* Convert operand OP in INSN.  We should handle
2025    memory operands and uninitialized registers.
2026    All other register uses are converted during
2027    registers conversion.  */
2028 
2029 void
2030 dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2031 {
2032   *op = copy_rtx_if_shared (*op);
2033 
2034   if (GET_CODE (*op) == NOT)
2035     {
2036       convert_op (&XEXP (*op, 0), insn);
2037       PUT_MODE (*op, V2DImode);
2038     }
2039   else if (MEM_P (*op))
2040     {
2041       rtx tmp = gen_reg_rtx (DImode);
2042 
2043       emit_insn_before (gen_move_insn (tmp, *op), insn);
2044       *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
2045 
2046       if (dump_file)
2047 	fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
2048 		 INSN_UID (insn), REGNO (tmp));
2049     }
2050   else if (REG_P (*op))
2051     {
2052       /* We may have not converted register usage in case
2053 	 this register has no definition.  Otherwise it
2054 	 should be converted in convert_reg.  */
2055       df_ref ref;
2056       FOR_EACH_INSN_USE (ref, insn)
2057 	if (DF_REF_REGNO (ref) == REGNO (*op))
2058 	  {
2059 	    gcc_assert (!DF_REF_CHAIN (ref));
2060 	    break;
2061 	  }
2062       *op = gen_rtx_SUBREG (V2DImode, *op, 0);
2063     }
2064   else if (CONST_INT_P (*op))
2065     {
2066       rtx vec_cst;
2067       rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0);
2068 
2069       /* Prefer all ones vector in case of -1.  */
2070       if (constm1_operand (*op, GET_MODE (*op)))
2071 	vec_cst = CONSTM1_RTX (V2DImode);
2072       else
2073 	vec_cst = gen_rtx_CONST_VECTOR (V2DImode,
2074 					gen_rtvec (2, *op, const0_rtx));
2075 
2076       if (!standard_sse_constant_p (vec_cst, V2DImode))
2077 	{
2078 	  start_sequence ();
2079 	  vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst));
2080 	  rtx_insn *seq = get_insns ();
2081 	  end_sequence ();
2082 	  emit_insn_before (seq, insn);
2083 	}
2084 
2085       emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
2086       *op = tmp;
2087     }
2088   else
2089     {
2090       gcc_assert (SUBREG_P (*op));
2091       gcc_assert (GET_MODE (*op) == V2DImode);
2092     }
2093 }
2094 
2095 /* Convert INSN to vector mode.  */
2096 
2097 void
2098 dimode_scalar_chain::convert_insn (rtx_insn *insn)
2099 {
2100   rtx def_set = single_set (insn);
2101   rtx src = SET_SRC (def_set);
2102   rtx dst = SET_DEST (def_set);
2103   rtx subreg;
2104 
2105   if (MEM_P (dst) && !REG_P (src))
2106     {
2107       /* There are no scalar integer instructions and therefore
2108 	 temporary register usage is required.  */
2109       rtx tmp = gen_reg_rtx (DImode);
2110       emit_conversion_insns (gen_move_insn (dst, tmp), insn);
2111       dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
2112     }
2113 
2114   switch (GET_CODE (src))
2115     {
2116     case ASHIFT:
2117     case ASHIFTRT:
2118     case LSHIFTRT:
2119       convert_op (&XEXP (src, 0), insn);
2120       PUT_MODE (src, V2DImode);
2121       break;
2122 
2123     case PLUS:
2124     case MINUS:
2125     case IOR:
2126     case XOR:
2127     case AND:
2128       convert_op (&XEXP (src, 0), insn);
2129       convert_op (&XEXP (src, 1), insn);
2130       PUT_MODE (src, V2DImode);
2131       break;
2132 
2133     case NEG:
2134       src = XEXP (src, 0);
2135       convert_op (&src, insn);
2136       subreg = gen_reg_rtx (V2DImode);
2137       emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn);
2138       src = gen_rtx_MINUS (V2DImode, subreg, src);
2139       break;
2140 
2141     case NOT:
2142       src = XEXP (src, 0);
2143       convert_op (&src, insn);
2144       subreg = gen_reg_rtx (V2DImode);
2145       emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn);
2146       src = gen_rtx_XOR (V2DImode, src, subreg);
2147       break;
2148 
2149     case MEM:
2150       if (!REG_P (dst))
2151 	convert_op (&src, insn);
2152       break;
2153 
2154     case REG:
2155       if (!MEM_P (dst))
2156 	convert_op (&src, insn);
2157       break;
2158 
2159     case SUBREG:
2160       gcc_assert (GET_MODE (src) == V2DImode);
2161       break;
2162 
2163     case COMPARE:
2164       src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
2165 
2166       gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
2167 		  || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
2168 
2169       if (REG_P (src))
2170 	subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2171       else
2172 	subreg = copy_rtx_if_shared (src);
2173       emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
2174 						    copy_rtx_if_shared (subreg),
2175 						    copy_rtx_if_shared (subreg)),
2176 			insn);
2177       dst = gen_rtx_REG (CCmode, FLAGS_REG);
2178       src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
2179 					       copy_rtx_if_shared (src)),
2180 			    UNSPEC_PTEST);
2181       break;
2182 
2183     case CONST_INT:
2184       convert_op (&src, insn);
2185       break;
2186 
2187     default:
2188       gcc_unreachable ();
2189     }
2190 
2191   SET_SRC (def_set) = src;
2192   SET_DEST (def_set) = dst;
2193 
2194   /* Drop possible dead definitions.  */
2195   PATTERN (insn) = def_set;
2196 
2197   INSN_CODE (insn) = -1;
2198   recog_memoized (insn);
2199   df_insn_rescan (insn);
2200 }
2201 
2202 /* Fix uses of converted REG in debug insns.  */
2203 
2204 void
2205 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
2206 {
2207   if (!flag_var_tracking)
2208     return;
2209 
2210   df_ref ref, next;
2211   for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
2212     {
2213       rtx_insn *insn = DF_REF_INSN (ref);
2214       /* Make sure the next ref is for a different instruction,
2215          so that we're not affected by the rescan.  */
2216       next = DF_REF_NEXT_REG (ref);
2217       while (next && DF_REF_INSN (next) == insn)
2218 	next = DF_REF_NEXT_REG (next);
2219 
2220       if (DEBUG_INSN_P (insn))
2221 	{
2222 	  /* It may be a debug insn with a TImode variable in
2223 	     register.  */
2224 	  bool changed = false;
2225 	  for (; ref != next; ref = DF_REF_NEXT_REG (ref))
2226 	    {
2227 	      rtx *loc = DF_REF_LOC (ref);
2228 	      if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
2229 		{
2230 		  *loc = gen_rtx_SUBREG (TImode, *loc, 0);
2231 		  changed = true;
2232 		}
2233 	    }
2234 	  if (changed)
2235 	    df_insn_rescan (insn);
2236 	}
2237     }
2238 }
2239 
2240 /* Convert INSN from TImode to V1T1mode.  */
2241 
2242 void
2243 timode_scalar_chain::convert_insn (rtx_insn *insn)
2244 {
2245   rtx def_set = single_set (insn);
2246   rtx src = SET_SRC (def_set);
2247   rtx dst = SET_DEST (def_set);
2248 
2249   switch (GET_CODE (dst))
2250     {
2251     case REG:
2252       {
2253 	rtx tmp = find_reg_equal_equiv_note (insn);
2254 	if (tmp)
2255 	  PUT_MODE (XEXP (tmp, 0), V1TImode);
2256 	PUT_MODE (dst, V1TImode);
2257 	fix_debug_reg_uses (dst);
2258       }
2259       break;
2260     case MEM:
2261       PUT_MODE (dst, V1TImode);
2262       break;
2263 
2264     default:
2265       gcc_unreachable ();
2266     }
2267 
2268   switch (GET_CODE (src))
2269     {
2270     case REG:
2271       PUT_MODE (src, V1TImode);
2272       /* Call fix_debug_reg_uses only if SRC is never defined.  */
2273       if (!DF_REG_DEF_CHAIN (REGNO (src)))
2274 	fix_debug_reg_uses (src);
2275       break;
2276 
2277     case MEM:
2278       PUT_MODE (src, V1TImode);
2279       break;
2280 
2281     case CONST_WIDE_INT:
2282       if (NONDEBUG_INSN_P (insn))
2283 	{
2284 	  /* Since there are no instructions to store 128-bit constant,
2285 	     temporary register usage is required.  */
2286 	  rtx tmp = gen_reg_rtx (V1TImode);
2287 	  start_sequence ();
2288 	  src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
2289 	  src = validize_mem (force_const_mem (V1TImode, src));
2290 	  rtx_insn *seq = get_insns ();
2291 	  end_sequence ();
2292 	  if (seq)
2293 	    emit_insn_before (seq, insn);
2294 	  emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2295 	  dst = tmp;
2296 	}
2297       break;
2298 
2299     case CONST_INT:
2300       switch (standard_sse_constant_p (src, TImode))
2301 	{
2302 	case 1:
2303 	  src = CONST0_RTX (GET_MODE (dst));
2304 	  break;
2305 	case 2:
2306 	  src = CONSTM1_RTX (GET_MODE (dst));
2307 	  break;
2308 	default:
2309 	  gcc_unreachable ();
2310 	}
2311       if (NONDEBUG_INSN_P (insn))
2312 	{
2313 	  rtx tmp = gen_reg_rtx (V1TImode);
2314 	  /* Since there are no instructions to store standard SSE
2315 	     constant, temporary register usage is required.  */
2316 	  emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
2317 	  dst = tmp;
2318 	}
2319       break;
2320 
2321     default:
2322       gcc_unreachable ();
2323     }
2324 
2325   SET_SRC (def_set) = src;
2326   SET_DEST (def_set) = dst;
2327 
2328   /* Drop possible dead definitions.  */
2329   PATTERN (insn) = def_set;
2330 
2331   INSN_CODE (insn) = -1;
2332   recog_memoized (insn);
2333   df_insn_rescan (insn);
2334 }
2335 
2336 void
2337 dimode_scalar_chain::convert_registers ()
2338 {
2339   bitmap_iterator bi;
2340   unsigned id;
2341 
2342   EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
2343     convert_reg (id);
2344 
2345   EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
2346     make_vector_copies (id);
2347 }
2348 
2349 /* Convert whole chain creating required register
2350    conversions and copies.  */
2351 
2352 int
2353 scalar_chain::convert ()
2354 {
2355   bitmap_iterator bi;
2356   unsigned id;
2357   int converted_insns = 0;
2358 
2359   if (!dbg_cnt (stv_conversion))
2360     return 0;
2361 
2362   if (dump_file)
2363     fprintf (dump_file, "Converting chain #%d...\n", chain_id);
2364 
2365   convert_registers ();
2366 
2367   EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
2368     {
2369       convert_insn (DF_INSN_UID_GET (id)->insn);
2370       converted_insns++;
2371     }
2372 
2373   return converted_insns;
2374 }
2375 
2376 /* Main STV pass function.  Find and convert scalar
2377    instructions into vector mode when profitable.  */
2378 
2379 static unsigned int
2380 convert_scalars_to_vector ()
2381 {
2382   basic_block bb;
2383   bitmap candidates;
2384   int converted_insns = 0;
2385 
2386   bitmap_obstack_initialize (NULL);
2387   candidates = BITMAP_ALLOC (NULL);
2388 
2389   calculate_dominance_info (CDI_DOMINATORS);
2390   df_set_flags (DF_DEFER_INSN_RESCAN);
2391   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2392   df_md_add_problem ();
2393   df_analyze ();
2394 
2395   /* Find all instructions we want to convert into vector mode.  */
2396   if (dump_file)
2397     fprintf (dump_file, "Searching for mode conversion candidates...\n");
2398 
2399   FOR_EACH_BB_FN (bb, cfun)
2400     {
2401       rtx_insn *insn;
2402       FOR_BB_INSNS (bb, insn)
2403 	if (scalar_to_vector_candidate_p (insn))
2404 	  {
2405 	    if (dump_file)
2406 	      fprintf (dump_file, "  insn %d is marked as a candidate\n",
2407 		       INSN_UID (insn));
2408 
2409 	    bitmap_set_bit (candidates, INSN_UID (insn));
2410 	  }
2411     }
2412 
2413   remove_non_convertible_regs (candidates);
2414 
2415   if (bitmap_empty_p (candidates))
2416     if (dump_file)
2417       fprintf (dump_file, "There are no candidates for optimization.\n");
2418 
2419   while (!bitmap_empty_p (candidates))
2420     {
2421       unsigned uid = bitmap_first_set_bit (candidates);
2422       scalar_chain *chain;
2423 
2424       if (TARGET_64BIT)
2425 	chain = new timode_scalar_chain;
2426       else
2427 	chain = new dimode_scalar_chain;
2428 
2429       /* Find instructions chain we want to convert to vector mode.
2430 	 Check all uses and definitions to estimate all required
2431 	 conversions.  */
2432       chain->build (candidates, uid);
2433 
2434       if (chain->compute_convert_gain () > 0)
2435 	converted_insns += chain->convert ();
2436       else
2437 	if (dump_file)
2438 	  fprintf (dump_file, "Chain #%d conversion is not profitable\n",
2439 		   chain->chain_id);
2440 
2441       delete chain;
2442     }
2443 
2444   if (dump_file)
2445     fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
2446 
2447   BITMAP_FREE (candidates);
2448   bitmap_obstack_release (NULL);
2449   df_process_deferred_rescans ();
2450 
2451   /* Conversion means we may have 128bit register spills/fills
2452      which require aligned stack.  */
2453   if (converted_insns)
2454     {
2455       if (crtl->stack_alignment_needed < 128)
2456 	crtl->stack_alignment_needed = 128;
2457       if (crtl->stack_alignment_estimated < 128)
2458 	crtl->stack_alignment_estimated = 128;
2459       /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments.  */
2460       if (TARGET_64BIT)
2461 	for (tree parm = DECL_ARGUMENTS (current_function_decl);
2462 	     parm; parm = DECL_CHAIN (parm))
2463 	  {
2464 	    if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
2465 	      continue;
2466 	    if (DECL_RTL_SET_P (parm)
2467 		&& GET_MODE (DECL_RTL (parm)) == V1TImode)
2468 	      {
2469 		rtx r = DECL_RTL (parm);
2470 		if (REG_P (r))
2471 		  SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
2472 	      }
2473 	    if (DECL_INCOMING_RTL (parm)
2474 		&& GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
2475 	      {
2476 		rtx r = DECL_INCOMING_RTL (parm);
2477 		if (REG_P (r))
2478 		  DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
2479 	      }
2480 	  }
2481     }
2482 
2483   return 0;
2484 }
2485 
2486 namespace {
2487 
2488 const pass_data pass_data_insert_vzeroupper =
2489 {
2490   RTL_PASS, /* type */
2491   "vzeroupper", /* name */
2492   OPTGROUP_NONE, /* optinfo_flags */
2493   TV_MACH_DEP, /* tv_id */
2494   0, /* properties_required */
2495   0, /* properties_provided */
2496   0, /* properties_destroyed */
2497   0, /* todo_flags_start */
2498   TODO_df_finish, /* todo_flags_finish */
2499 };
2500 
2501 class pass_insert_vzeroupper : public rtl_opt_pass
2502 {
2503 public:
2504   pass_insert_vzeroupper(gcc::context *ctxt)
2505     : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2506   {}
2507 
2508   /* opt_pass methods: */
2509   virtual bool gate (function *)
2510     {
2511       return TARGET_AVX
2512 	     && TARGET_VZEROUPPER && flag_expensive_optimizations
2513 	     && !optimize_size;
2514     }
2515 
2516   virtual unsigned int execute (function *)
2517     {
2518       return rest_of_handle_insert_vzeroupper ();
2519     }
2520 
2521 }; // class pass_insert_vzeroupper
2522 
2523 const pass_data pass_data_stv =
2524 {
2525   RTL_PASS, /* type */
2526   "stv", /* name */
2527   OPTGROUP_NONE, /* optinfo_flags */
2528   TV_MACH_DEP, /* tv_id */
2529   0, /* properties_required */
2530   0, /* properties_provided */
2531   0, /* properties_destroyed */
2532   0, /* todo_flags_start */
2533   TODO_df_finish, /* todo_flags_finish */
2534 };
2535 
2536 class pass_stv : public rtl_opt_pass
2537 {
2538 public:
2539   pass_stv (gcc::context *ctxt)
2540     : rtl_opt_pass (pass_data_stv, ctxt),
2541       timode_p (false)
2542   {}
2543 
2544   /* opt_pass methods: */
2545   virtual bool gate (function *)
2546     {
2547       return (timode_p == !!TARGET_64BIT
2548 	      && TARGET_STV && TARGET_SSE2 && optimize > 1);
2549     }
2550 
2551   virtual unsigned int execute (function *)
2552     {
2553       return convert_scalars_to_vector ();
2554     }
2555 
2556   opt_pass *clone ()
2557     {
2558       return new pass_stv (m_ctxt);
2559     }
2560 
2561   void set_pass_param (unsigned int n, bool param)
2562     {
2563       gcc_assert (n == 0);
2564       timode_p = param;
2565     }
2566 
2567 private:
2568   bool timode_p;
2569 }; // class pass_stv
2570 
2571 } // anon namespace
2572 
2573 rtl_opt_pass *
2574 make_pass_insert_vzeroupper (gcc::context *ctxt)
2575 {
2576   return new pass_insert_vzeroupper (ctxt);
2577 }
2578 
2579 rtl_opt_pass *
2580 make_pass_stv (gcc::context *ctxt)
2581 {
2582   return new pass_stv (ctxt);
2583 }
2584 
2585 /* Inserting ENDBRANCH instructions.  */
2586 
2587 static unsigned int
2588 rest_of_insert_endbranch (void)
2589 {
2590   timevar_push (TV_MACH_DEP);
2591 
2592   rtx cet_eb;
2593   rtx_insn *insn;
2594   basic_block bb;
2595 
2596   /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is
2597      absent among function attributes.  Later an optimization will be
2598      introduced to make analysis if an address of a static function is
2599      taken.  A static function whose address is not taken will get a
2600      nocf_check attribute.  This will allow to reduce the number of EB.  */
2601 
2602   if (!lookup_attribute ("nocf_check",
2603 			 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
2604       && !cgraph_node::get (cfun->decl)->only_called_directly_p ())
2605     {
2606       cet_eb = gen_nop_endbr ();
2607 
2608       bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2609       insn = BB_HEAD (bb);
2610       emit_insn_before (cet_eb, insn);
2611     }
2612 
2613   bb = 0;
2614   FOR_EACH_BB_FN (bb, cfun)
2615     {
2616       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2617 	   insn = NEXT_INSN (insn))
2618 	{
2619 	  if (CALL_P (insn))
2620 	    {
2621 	      if (find_reg_note (insn, REG_SETJMP, NULL) == NULL)
2622 		continue;
2623 	      /* Generate ENDBRANCH after CALL, which can return more than
2624 		 twice, setjmp-like functions.  */
2625 
2626 	      cet_eb = gen_nop_endbr ();
2627 	      emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn));
2628 	      continue;
2629 	    }
2630 
2631 	  if (JUMP_P (insn) && flag_cet_switch)
2632 	    {
2633 	      rtx target = JUMP_LABEL (insn);
2634 	      if (target == NULL_RTX || ANY_RETURN_P (target))
2635 		continue;
2636 
2637 	      /* Check the jump is a switch table.  */
2638 	      rtx_insn *label = as_a<rtx_insn *> (target);
2639 	      rtx_insn *table = next_insn (label);
2640 	      if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2641 		continue;
2642 
2643 	      /* For the indirect jump find out all places it jumps and insert
2644 		 ENDBRANCH there.  It should be done under a special flag to
2645 		 control ENDBRANCH generation for switch stmts.  */
2646 	      edge_iterator ei;
2647 	      edge e;
2648 	      basic_block dest_blk;
2649 
2650 	      FOR_EACH_EDGE (e, ei, bb->succs)
2651 		{
2652 		  rtx_insn *insn;
2653 
2654 		  dest_blk = e->dest;
2655 		  insn = BB_HEAD (dest_blk);
2656 		  gcc_assert (LABEL_P (insn));
2657 		  cet_eb = gen_nop_endbr ();
2658 		  emit_insn_after (cet_eb, insn);
2659 		}
2660 	      continue;
2661 	    }
2662 
2663 	  if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2664 	      || (NOTE_P (insn)
2665 		  && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
2666 	    /* TODO.  Check /s bit also.  */
2667 	    {
2668 	      cet_eb = gen_nop_endbr ();
2669 	      emit_insn_after (cet_eb, insn);
2670 	      continue;
2671 	    }
2672 	}
2673     }
2674 
2675   timevar_pop (TV_MACH_DEP);
2676   return 0;
2677 }
2678 
2679 namespace {
2680 
2681 const pass_data pass_data_insert_endbranch =
2682 {
2683   RTL_PASS, /* type.  */
2684   "cet", /* name.  */
2685   OPTGROUP_NONE, /* optinfo_flags.  */
2686   TV_MACH_DEP, /* tv_id.  */
2687   0, /* properties_required.  */
2688   0, /* properties_provided.  */
2689   0, /* properties_destroyed.  */
2690   0, /* todo_flags_start.  */
2691   0, /* todo_flags_finish.  */
2692 };
2693 
2694 class pass_insert_endbranch : public rtl_opt_pass
2695 {
2696 public:
2697   pass_insert_endbranch (gcc::context *ctxt)
2698     : rtl_opt_pass (pass_data_insert_endbranch, ctxt)
2699   {}
2700 
2701   /* opt_pass methods: */
2702   virtual bool gate (function *)
2703     {
2704       return ((flag_cf_protection & CF_BRANCH));
2705     }
2706 
2707   virtual unsigned int execute (function *)
2708     {
2709       return rest_of_insert_endbranch ();
2710     }
2711 
2712 }; // class pass_insert_endbranch
2713 
2714 } // anon namespace
2715 
2716 rtl_opt_pass *
2717 make_pass_insert_endbranch (gcc::context *ctxt)
2718 {
2719   return new pass_insert_endbranch (ctxt);
2720 }
2721 
2722 /* Return true if a red-zone is in use.  We can't use red-zone when
2723    there are local indirect jumps, like "indirect_jump" or "tablejump",
2724    which jumps to another place in the function, since "call" in the
2725    indirect thunk pushes the return address onto stack, destroying
2726    red-zone.
2727 
2728    TODO: If we can reserve the first 2 WORDs, for PUSH and, another
2729    for CALL, in red-zone, we can allow local indirect jumps with
2730    indirect thunk.  */
2731 
2732 bool
2733 ix86_using_red_zone (void)
2734 {
2735   return (TARGET_RED_ZONE
2736 	  && !TARGET_64BIT_MS_ABI
2737 	  && (!cfun->machine->has_local_indirect_jump
2738 	      || cfun->machine->indirect_branch_type == indirect_branch_keep));
2739 }
2740 
2741 /* Return a string that documents the current -m options.  The caller is
2742    responsible for freeing the string.  */
2743 
2744 static char *
2745 ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
2746 		    int flags, int flags2,
2747 		    const char *arch, const char *tune,
2748 		    enum fpmath_unit fpmath, bool add_nl_p)
2749 {
2750   struct ix86_target_opts
2751   {
2752     const char *option;		/* option string */
2753     HOST_WIDE_INT mask;		/* isa mask options */
2754   };
2755 
2756   /* This table is ordered so that options like -msse4.2 that imply other
2757      ISAs come first.  Target string will be displayed in the same order.  */
2758   static struct ix86_target_opts isa2_opts[] =
2759   {
2760     { "-mcx16",		OPTION_MASK_ISA_CX16 },
2761     { "-mmpx",		OPTION_MASK_ISA_MPX },
2762     { "-mvaes",		OPTION_MASK_ISA_VAES },
2763     { "-mrdpid",	OPTION_MASK_ISA_RDPID },
2764     { "-mpconfig",	OPTION_MASK_ISA_PCONFIG },
2765     { "-mwbnoinvd",     OPTION_MASK_ISA_WBNOINVD },
2766     { "-msgx",		OPTION_MASK_ISA_SGX },
2767     { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW },
2768     { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS },
2769     { "-mhle",		OPTION_MASK_ISA_HLE },
2770     { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
2771     { "-mclzero",	OPTION_MASK_ISA_CLZERO },
2772     { "-mmwaitx",	OPTION_MASK_ISA_MWAITX },
2773     { "-mmovdir64b",	OPTION_MASK_ISA_MOVDIR64B }
2774   };
2775   static struct ix86_target_opts isa_opts[] =
2776   {
2777     { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ },
2778     { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG },
2779     { "-mvpclmulqdq",	OPTION_MASK_ISA_VPCLMULQDQ },
2780     { "-mgfni",		OPTION_MASK_ISA_GFNI },
2781     { "-mavx512vnni",	OPTION_MASK_ISA_AVX512VNNI },
2782     { "-mavx512vbmi2",	OPTION_MASK_ISA_AVX512VBMI2 },
2783     { "-mavx512vbmi",	OPTION_MASK_ISA_AVX512VBMI },
2784     { "-mavx512ifma",	OPTION_MASK_ISA_AVX512IFMA },
2785     { "-mavx512vl",	OPTION_MASK_ISA_AVX512VL },
2786     { "-mavx512bw",	OPTION_MASK_ISA_AVX512BW },
2787     { "-mavx512dq",	OPTION_MASK_ISA_AVX512DQ },
2788     { "-mavx512er",	OPTION_MASK_ISA_AVX512ER },
2789     { "-mavx512pf",	OPTION_MASK_ISA_AVX512PF },
2790     { "-mavx512cd",	OPTION_MASK_ISA_AVX512CD },
2791     { "-mavx512f",	OPTION_MASK_ISA_AVX512F },
2792     { "-mavx2",		OPTION_MASK_ISA_AVX2 },
2793     { "-mfma",		OPTION_MASK_ISA_FMA },
2794     { "-mxop",		OPTION_MASK_ISA_XOP },
2795     { "-mfma4",		OPTION_MASK_ISA_FMA4 },
2796     { "-mf16c",		OPTION_MASK_ISA_F16C },
2797     { "-mavx",		OPTION_MASK_ISA_AVX },
2798 /*  { "-msse4"		OPTION_MASK_ISA_SSE4 }, */
2799     { "-msse4.2",	OPTION_MASK_ISA_SSE4_2 },
2800     { "-msse4.1",	OPTION_MASK_ISA_SSE4_1 },
2801     { "-msse4a",	OPTION_MASK_ISA_SSE4A },
2802     { "-mssse3",	OPTION_MASK_ISA_SSSE3 },
2803     { "-msse3",		OPTION_MASK_ISA_SSE3 },
2804     { "-maes",		OPTION_MASK_ISA_AES },
2805     { "-msha",		OPTION_MASK_ISA_SHA },
2806     { "-mpclmul",	OPTION_MASK_ISA_PCLMUL },
2807     { "-msse2",		OPTION_MASK_ISA_SSE2 },
2808     { "-msse",		OPTION_MASK_ISA_SSE },
2809     { "-m3dnowa",	OPTION_MASK_ISA_3DNOW_A },
2810     { "-m3dnow",	OPTION_MASK_ISA_3DNOW },
2811     { "-mmmx",		OPTION_MASK_ISA_MMX },
2812     { "-mrtm",		OPTION_MASK_ISA_RTM },
2813     { "-mprfchw",	OPTION_MASK_ISA_PRFCHW },
2814     { "-mrdseed",	OPTION_MASK_ISA_RDSEED },
2815     { "-madx",		OPTION_MASK_ISA_ADX },
2816     { "-mprefetchwt1",	OPTION_MASK_ISA_PREFETCHWT1 },
2817     { "-mclflushopt",	OPTION_MASK_ISA_CLFLUSHOPT },
2818     { "-mxsaves",	OPTION_MASK_ISA_XSAVES },
2819     { "-mxsavec",	OPTION_MASK_ISA_XSAVEC },
2820     { "-mxsaveopt",	OPTION_MASK_ISA_XSAVEOPT },
2821     { "-mxsave",	OPTION_MASK_ISA_XSAVE },
2822     { "-mabm",		OPTION_MASK_ISA_ABM },
2823     { "-mbmi",		OPTION_MASK_ISA_BMI },
2824     { "-mbmi2",		OPTION_MASK_ISA_BMI2 },
2825     { "-mlzcnt",	OPTION_MASK_ISA_LZCNT },
2826     { "-mtbm",		OPTION_MASK_ISA_TBM },
2827     { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
2828     { "-msahf",		OPTION_MASK_ISA_SAHF },
2829     { "-mcrc32",	OPTION_MASK_ISA_CRC32 },
2830     { "-mfsgsbase",	OPTION_MASK_ISA_FSGSBASE },
2831     { "-mrdrnd",	OPTION_MASK_ISA_RDRND },
2832     { "-mpku",		OPTION_MASK_ISA_PKU },
2833     { "-mlwp",		OPTION_MASK_ISA_LWP },
2834     { "-mfxsr",		OPTION_MASK_ISA_FXSR },
2835     { "-mclwb",		OPTION_MASK_ISA_CLWB },
2836     { "-mshstk",	OPTION_MASK_ISA_SHSTK },
2837     { "-mmovdiri",	OPTION_MASK_ISA_MOVDIRI }
2838   };
2839 
2840   /* Flag options.  */
2841   static struct ix86_target_opts flag_opts[] =
2842   {
2843     { "-m128bit-long-double",		MASK_128BIT_LONG_DOUBLE },
2844     { "-mlong-double-128",		MASK_LONG_DOUBLE_128 },
2845     { "-mlong-double-64",		MASK_LONG_DOUBLE_64 },
2846     { "-m80387",			MASK_80387 },
2847     { "-maccumulate-outgoing-args",	MASK_ACCUMULATE_OUTGOING_ARGS },
2848     { "-malign-double",			MASK_ALIGN_DOUBLE },
2849     { "-mcld",				MASK_CLD },
2850     { "-mfp-ret-in-387",		MASK_FLOAT_RETURNS },
2851     { "-mieee-fp",			MASK_IEEE_FP },
2852     { "-minline-all-stringops",		MASK_INLINE_ALL_STRINGOPS },
2853     { "-minline-stringops-dynamically",	MASK_INLINE_STRINGOPS_DYNAMICALLY },
2854     { "-mms-bitfields",			MASK_MS_BITFIELD_LAYOUT },
2855     { "-mno-align-stringops",		MASK_NO_ALIGN_STRINGOPS },
2856     { "-mno-fancy-math-387",		MASK_NO_FANCY_MATH_387 },
2857     { "-mno-push-args",			MASK_NO_PUSH_ARGS },
2858     { "-mno-red-zone",			MASK_NO_RED_ZONE },
2859     { "-momit-leaf-frame-pointer",	MASK_OMIT_LEAF_FRAME_POINTER },
2860     { "-mrecip",			MASK_RECIP },
2861     { "-mrtd",				MASK_RTD },
2862     { "-msseregparm",			MASK_SSEREGPARM },
2863     { "-mstack-arg-probe",		MASK_STACK_PROBE },
2864     { "-mtls-direct-seg-refs",		MASK_TLS_DIRECT_SEG_REFS },
2865     { "-mvect8-ret-in-mem",		MASK_VECT8_RETURNS },
2866     { "-m8bit-idiv",			MASK_USE_8BIT_IDIV },
2867     { "-mvzeroupper",			MASK_VZEROUPPER },
2868     { "-mstv",				MASK_STV },
2869     { "-mavx256-split-unaligned-load",	MASK_AVX256_SPLIT_UNALIGNED_LOAD },
2870     { "-mavx256-split-unaligned-store",	MASK_AVX256_SPLIT_UNALIGNED_STORE },
2871     { "-mcall-ms2sysv-xlogues",		MASK_CALL_MS2SYSV_XLOGUES }
2872   };
2873 
2874   /* Additional flag options.  */
2875   static struct ix86_target_opts flag2_opts[] =
2876   {
2877     { "-mgeneral-regs-only",		OPTION_MASK_GENERAL_REGS_ONLY }
2878   };
2879 
2880   const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts)
2881 		   + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2];
2882 
2883   char isa_other[40];
2884   char isa2_other[40];
2885   char flags_other[40];
2886   char flags2_other[40];
2887   unsigned num = 0;
2888   unsigned i, j;
2889   char *ret;
2890   char *ptr;
2891   size_t len;
2892   size_t line_len;
2893   size_t sep_len;
2894   const char *abi;
2895 
2896   memset (opts, '\0', sizeof (opts));
2897 
2898   /* Add -march= option.  */
2899   if (arch)
2900     {
2901       opts[num][0] = "-march=";
2902       opts[num++][1] = arch;
2903     }
2904 
2905   /* Add -mtune= option.  */
2906   if (tune)
2907     {
2908       opts[num][0] = "-mtune=";
2909       opts[num++][1] = tune;
2910     }
2911 
2912   /* Add -m32/-m64/-mx32.  */
2913   if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2914     {
2915       if ((isa & OPTION_MASK_ABI_64) != 0)
2916 	abi = "-m64";
2917       else
2918 	abi = "-mx32";
2919       isa &= ~ (OPTION_MASK_ISA_64BIT
2920 		| OPTION_MASK_ABI_64
2921 		| OPTION_MASK_ABI_X32);
2922     }
2923   else
2924     abi = "-m32";
2925   opts[num++][0] = abi;
2926 
2927   /* Pick out the options in isa2 options.  */
2928   for (i = 0; i < ARRAY_SIZE (isa2_opts); i++)
2929     {
2930       if ((isa2 & isa2_opts[i].mask) != 0)
2931 	{
2932 	  opts[num++][0] = isa2_opts[i].option;
2933 	  isa2 &= ~ isa2_opts[i].mask;
2934 	}
2935     }
2936 
2937   if (isa2 && add_nl_p)
2938     {
2939       opts[num++][0] = isa2_other;
2940       sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2);
2941     }
2942 
2943   /* Pick out the options in isa options.  */
2944   for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2945     {
2946       if ((isa & isa_opts[i].mask) != 0)
2947 	{
2948 	  opts[num++][0] = isa_opts[i].option;
2949 	  isa &= ~ isa_opts[i].mask;
2950 	}
2951     }
2952 
2953   if (isa && add_nl_p)
2954     {
2955       opts[num++][0] = isa_other;
2956       sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa);
2957     }
2958 
2959   /* Add flag options.  */
2960   for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2961     {
2962       if ((flags & flag_opts[i].mask) != 0)
2963 	{
2964 	  opts[num++][0] = flag_opts[i].option;
2965 	  flags &= ~ flag_opts[i].mask;
2966 	}
2967     }
2968 
2969   if (flags && add_nl_p)
2970     {
2971       opts[num++][0] = flags_other;
2972       sprintf (flags_other, "(other flags: %#x)", flags);
2973     }
2974 
2975     /* Add additional flag options.  */
2976   for (i = 0; i < ARRAY_SIZE (flag2_opts); i++)
2977     {
2978       if ((flags2 & flag2_opts[i].mask) != 0)
2979 	{
2980 	  opts[num++][0] = flag2_opts[i].option;
2981 	  flags2 &= ~ flag2_opts[i].mask;
2982 	}
2983     }
2984 
2985   if (flags2 && add_nl_p)
2986     {
2987       opts[num++][0] = flags2_other;
2988       sprintf (flags2_other, "(other flags2: %#x)", flags2);
2989     }
2990 
2991   /* Add -fpmath= option.  */
2992   if (fpmath)
2993     {
2994       opts[num][0] = "-mfpmath=";
2995       switch ((int) fpmath)
2996 	{
2997 	case FPMATH_387:
2998 	  opts[num++][1] = "387";
2999 	  break;
3000 
3001 	case FPMATH_SSE:
3002 	  opts[num++][1] = "sse";
3003 	  break;
3004 
3005 	case FPMATH_387 | FPMATH_SSE:
3006 	  opts[num++][1] = "sse+387";
3007 	  break;
3008 
3009 	default:
3010 	  gcc_unreachable ();
3011 	}
3012     }
3013 
3014   /* Any options?  */
3015   if (num == 0)
3016     return NULL;
3017 
3018   gcc_assert (num < ARRAY_SIZE (opts));
3019 
3020   /* Size the string.  */
3021   len = 0;
3022   sep_len = (add_nl_p) ? 3 : 1;
3023   for (i = 0; i < num; i++)
3024     {
3025       len += sep_len;
3026       for (j = 0; j < 2; j++)
3027 	if (opts[i][j])
3028 	  len += strlen (opts[i][j]);
3029     }
3030 
3031   /* Build the string.  */
3032   ret = ptr = (char *) xmalloc (len);
3033   line_len = 0;
3034 
3035   for (i = 0; i < num; i++)
3036     {
3037       size_t len2[2];
3038 
3039       for (j = 0; j < 2; j++)
3040 	len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3041 
3042       if (i != 0)
3043 	{
3044 	  *ptr++ = ' ';
3045 	  line_len++;
3046 
3047 	  if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3048 	    {
3049 	      *ptr++ = '\\';
3050 	      *ptr++ = '\n';
3051 	      line_len = 0;
3052 	    }
3053 	}
3054 
3055       for (j = 0; j < 2; j++)
3056 	if (opts[i][j])
3057 	  {
3058 	    memcpy (ptr, opts[i][j], len2[j]);
3059 	    ptr += len2[j];
3060 	    line_len += len2[j];
3061 	  }
3062     }
3063 
3064   *ptr = '\0';
3065   gcc_assert (ret + len >= ptr);
3066 
3067   return ret;
3068 }
3069 
3070 /* Return true, if profiling code should be emitted before
3071    prologue. Otherwise it returns false.
3072    Note: For x86 with "hotfix" it is sorried.  */
3073 static bool
3074 ix86_profile_before_prologue (void)
3075 {
3076   return flag_fentry != 0;
3077 }
3078 
3079 /* Function that is callable from the debugger to print the current
3080    options.  */
3081 void ATTRIBUTE_UNUSED
3082 ix86_debug_options (void)
3083 {
3084   char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2,
3085 				   target_flags, ix86_target_flags,
3086 				   ix86_arch_string,ix86_tune_string,
3087 				   ix86_fpmath, true);
3088 
3089   if (opts)
3090     {
3091       fprintf (stderr, "%s\n\n", opts);
3092       free (opts);
3093     }
3094   else
3095     fputs ("<no options>\n\n", stderr);
3096 
3097   return;
3098 }
3099 
3100 /* Return true if T is one of the bytes we should avoid with
3101    -mmitigate-rop.  */
3102 
3103 static bool
3104 ix86_rop_should_change_byte_p (int t)
3105 {
3106   return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3107 }
3108 
3109 static const char *stringop_alg_names[] = {
3110 #define DEF_ENUM
3111 #define DEF_ALG(alg, name) #name,
3112 #include "stringop.def"
3113 #undef DEF_ENUM
3114 #undef DEF_ALG
3115 };
3116 
3117 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3118    The string is of the following form (or comma separated list of it):
3119 
3120      strategy_alg:max_size:[align|noalign]
3121 
3122    where the full size range for the strategy is either [0, max_size] or
3123    [min_size, max_size], in which min_size is the max_size + 1 of the
3124    preceding range.  The last size range must have max_size == -1.
3125 
3126    Examples:
3127 
3128     1.
3129        -mmemcpy-strategy=libcall:-1:noalign
3130 
3131       this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3132 
3133 
3134    2.
3135       -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
3136 
3137       This is to tell the compiler to use the following strategy for memset
3138       1) when the expected size is between [1, 16], use rep_8byte strategy;
3139       2) when the size is between [17, 2048], use vector_loop;
3140       3) when the size is > 2048, use libcall.  */
3141 
3142 struct stringop_size_range
3143 {
3144   int max;
3145   stringop_alg alg;
3146   bool noalign;
3147 };
3148 
3149 static void
3150 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
3151 {
3152   const struct stringop_algs *default_algs;
3153   stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
3154   char *curr_range_str, *next_range_str;
3155   const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=";
3156   int i = 0, n = 0;
3157 
3158   if (is_memset)
3159     default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
3160   else
3161     default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3162 
3163   curr_range_str = strategy_str;
3164 
3165   do
3166     {
3167       int maxs;
3168       char alg_name[128];
3169       char align[16];
3170       next_range_str = strchr (curr_range_str, ',');
3171       if (next_range_str)
3172         *next_range_str++ = '\0';
3173 
3174       if (sscanf (curr_range_str, "%20[^:]:%d:%10s", alg_name, &maxs,
3175 		  align) != 3)
3176         {
3177 	  error ("wrong argument %qs to option %qs", curr_range_str, opt);
3178           return;
3179         }
3180 
3181       if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3182         {
3183 	  error ("size ranges of option %qs should be increasing", opt);
3184           return;
3185         }
3186 
3187       for (i = 0; i < last_alg; i++)
3188 	if (!strcmp (alg_name, stringop_alg_names[i]))
3189 	  break;
3190 
3191       if (i == last_alg)
3192         {
3193 	  error ("wrong strategy name %qs specified for option %qs",
3194 		 alg_name, opt);
3195 
3196 	  auto_vec <const char *> candidates;
3197 	  for (i = 0; i < last_alg; i++)
3198 	    if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT)
3199 	      candidates.safe_push (stringop_alg_names[i]);
3200 
3201 	  char *s;
3202 	  const char *hint
3203 	    = candidates_list_and_hint (alg_name, s, candidates);
3204 	  if (hint)
3205 	    inform (input_location,
3206 		    "valid arguments to %qs are: %s; did you mean %qs?",
3207 		    opt, s, hint);
3208 	  else
3209 	    inform (input_location, "valid arguments to %qs are: %s",
3210 		    opt, s);
3211 	  XDELETEVEC (s);
3212           return;
3213         }
3214 
3215       if ((stringop_alg) i == rep_prefix_8_byte
3216 	  && !TARGET_64BIT)
3217 	{
3218 	  /* rep; movq isn't available in 32-bit code.  */
3219 	  error ("strategy name %qs specified for option %qs "
3220 		 "not supported for 32-bit code", alg_name, opt);
3221 	  return;
3222 	}
3223 
3224       input_ranges[n].max = maxs;
3225       input_ranges[n].alg = (stringop_alg) i;
3226       if (!strcmp (align, "align"))
3227         input_ranges[n].noalign = false;
3228       else if (!strcmp (align, "noalign"))
3229         input_ranges[n].noalign = true;
3230       else
3231         {
3232 	  error ("unknown alignment %qs specified for option %qs", align, opt);
3233           return;
3234         }
3235       n++;
3236       curr_range_str = next_range_str;
3237     }
3238   while (curr_range_str);
3239 
3240   if (input_ranges[n - 1].max != -1)
3241     {
3242       error ("the max value for the last size range should be -1"
3243              " for option %qs", opt);
3244       return;
3245     }
3246 
3247   if (n > MAX_STRINGOP_ALGS)
3248     {
3249       error ("too many size ranges specified in option %qs", opt);
3250       return;
3251     }
3252 
3253   /* Now override the default algs array.  */
3254   for (i = 0; i < n; i++)
3255     {
3256       *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3257       *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3258           = input_ranges[i].alg;
3259       *const_cast<int *>(&default_algs->size[i].noalign)
3260           = input_ranges[i].noalign;
3261     }
3262 }
3263 
3264 
3265 /* parse -mtune-ctrl= option. When DUMP is true,
3266    print the features that are explicitly set.  */
3267 
3268 static void
3269 parse_mtune_ctrl_str (bool dump)
3270 {
3271   if (!ix86_tune_ctrl_string)
3272     return;
3273 
3274   char *next_feature_string = NULL;
3275   char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3276   char *orig = curr_feature_string;
3277   int i;
3278   do
3279     {
3280       bool clear = false;
3281 
3282       next_feature_string = strchr (curr_feature_string, ',');
3283       if (next_feature_string)
3284         *next_feature_string++ = '\0';
3285       if (*curr_feature_string == '^')
3286         {
3287           curr_feature_string++;
3288           clear = true;
3289         }
3290       for (i = 0; i < X86_TUNE_LAST; i++)
3291         {
3292           if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3293             {
3294               ix86_tune_features[i] = !clear;
3295               if (dump)
3296                 fprintf (stderr, "Explicitly %s feature %s\n",
3297                          clear ? "clear" : "set", ix86_tune_feature_names[i]);
3298               break;
3299             }
3300         }
3301       if (i == X86_TUNE_LAST)
3302         error ("unknown parameter to option -mtune-ctrl: %s",
3303                clear ? curr_feature_string - 1 : curr_feature_string);
3304       curr_feature_string = next_feature_string;
3305     }
3306   while (curr_feature_string);
3307   free (orig);
3308 }
3309 
3310 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3311    processor type.  */
3312 
3313 static void
3314 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3315 {
3316   unsigned HOST_WIDE_INT ix86_tune_mask = HOST_WIDE_INT_1U << ix86_tune;
3317   int i;
3318 
3319   for (i = 0; i < X86_TUNE_LAST; ++i)
3320     {
3321       if (ix86_tune_no_default)
3322         ix86_tune_features[i] = 0;
3323       else
3324 	ix86_tune_features[i]
3325 	  = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3326     }
3327 
3328   if (dump)
3329     {
3330       fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3331       for (i = 0; i < X86_TUNE_LAST; i++)
3332         fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3333                  ix86_tune_features[i] ? "on" : "off");
3334     }
3335 
3336   parse_mtune_ctrl_str (dump);
3337 }
3338 
3339 
3340 /* Default align_* from the processor table.  */
3341 
3342 static void
3343 ix86_default_align (struct gcc_options *opts)
3344 {
3345   if (opts->x_align_loops == 0)
3346     {
3347       opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3348       align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3349     }
3350   if (opts->x_align_jumps == 0)
3351     {
3352       opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3353       align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3354     }
3355   if (opts->x_align_functions == 0)
3356     {
3357       opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3358     }
3359 }
3360 
3361 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook.  */
3362 
3363 static void
3364 ix86_override_options_after_change (void)
3365 {
3366   ix86_default_align (&global_options);
3367 }
3368 
3369 /* Override various settings based on options.  If MAIN_ARGS_P, the
3370    options are from the command line, otherwise they are from
3371    attributes.  Return true if there's an error related to march
3372    option.  */
3373 
3374 static bool
3375 ix86_option_override_internal (bool main_args_p,
3376 			       struct gcc_options *opts,
3377 			       struct gcc_options *opts_set)
3378 {
3379   int i;
3380   unsigned HOST_WIDE_INT ix86_arch_mask;
3381   const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3382 
3383   const wide_int_bitmask PTA_3DNOW (HOST_WIDE_INT_1U << 0);
3384   const wide_int_bitmask PTA_3DNOW_A (HOST_WIDE_INT_1U << 1);
3385   const wide_int_bitmask PTA_64BIT (HOST_WIDE_INT_1U << 2);
3386   const wide_int_bitmask PTA_ABM (HOST_WIDE_INT_1U << 3);
3387   const wide_int_bitmask PTA_AES (HOST_WIDE_INT_1U << 4);
3388   const wide_int_bitmask PTA_AVX (HOST_WIDE_INT_1U << 5);
3389   const wide_int_bitmask PTA_BMI (HOST_WIDE_INT_1U << 6);
3390   const wide_int_bitmask PTA_CX16 (HOST_WIDE_INT_1U << 7);
3391   const wide_int_bitmask PTA_F16C (HOST_WIDE_INT_1U << 8);
3392   const wide_int_bitmask PTA_FMA (HOST_WIDE_INT_1U << 9);
3393   const wide_int_bitmask PTA_FMA4 (HOST_WIDE_INT_1U << 10);
3394   const wide_int_bitmask PTA_FSGSBASE (HOST_WIDE_INT_1U << 11);
3395   const wide_int_bitmask PTA_LWP (HOST_WIDE_INT_1U << 12);
3396   const wide_int_bitmask PTA_LZCNT (HOST_WIDE_INT_1U << 13);
3397   const wide_int_bitmask PTA_MMX (HOST_WIDE_INT_1U << 14);
3398   const wide_int_bitmask PTA_MOVBE (HOST_WIDE_INT_1U << 15);
3399   const wide_int_bitmask PTA_NO_SAHF (HOST_WIDE_INT_1U << 16);
3400   const wide_int_bitmask PTA_PCLMUL (HOST_WIDE_INT_1U << 17);
3401   const wide_int_bitmask PTA_POPCNT (HOST_WIDE_INT_1U << 18);
3402   const wide_int_bitmask PTA_PREFETCH_SSE (HOST_WIDE_INT_1U << 19);
3403   const wide_int_bitmask PTA_RDRND (HOST_WIDE_INT_1U << 20);
3404   const wide_int_bitmask PTA_SSE (HOST_WIDE_INT_1U << 21);
3405   const wide_int_bitmask PTA_SSE2 (HOST_WIDE_INT_1U << 22);
3406   const wide_int_bitmask PTA_SSE3 (HOST_WIDE_INT_1U << 23);
3407   const wide_int_bitmask PTA_SSE4_1 (HOST_WIDE_INT_1U << 24);
3408   const wide_int_bitmask PTA_SSE4_2 (HOST_WIDE_INT_1U << 25);
3409   const wide_int_bitmask PTA_SSE4A (HOST_WIDE_INT_1U << 26);
3410   const wide_int_bitmask PTA_SSSE3 (HOST_WIDE_INT_1U << 27);
3411   const wide_int_bitmask PTA_TBM (HOST_WIDE_INT_1U << 28);
3412   const wide_int_bitmask PTA_XOP (HOST_WIDE_INT_1U << 29);
3413   const wide_int_bitmask PTA_AVX2 (HOST_WIDE_INT_1U << 30);
3414   const wide_int_bitmask PTA_BMI2 (HOST_WIDE_INT_1U << 31);
3415   const wide_int_bitmask PTA_RTM (HOST_WIDE_INT_1U << 32);
3416   const wide_int_bitmask PTA_HLE (HOST_WIDE_INT_1U << 33);
3417   const wide_int_bitmask PTA_PRFCHW (HOST_WIDE_INT_1U << 34);
3418   const wide_int_bitmask PTA_RDSEED (HOST_WIDE_INT_1U << 35);
3419   const wide_int_bitmask PTA_ADX (HOST_WIDE_INT_1U << 36);
3420   const wide_int_bitmask PTA_FXSR (HOST_WIDE_INT_1U << 37);
3421   const wide_int_bitmask PTA_XSAVE (HOST_WIDE_INT_1U << 38);
3422   const wide_int_bitmask PTA_XSAVEOPT (HOST_WIDE_INT_1U << 39);
3423   const wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40);
3424   const wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41);
3425   const wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42);
3426   const wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43);
3427   const wide_int_bitmask PTA_MPX (HOST_WIDE_INT_1U << 44);
3428   const wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45);
3429   const wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46);
3430   const wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47);
3431   const wide_int_bitmask PTA_XSAVEC (HOST_WIDE_INT_1U << 48);
3432   const wide_int_bitmask PTA_XSAVES (HOST_WIDE_INT_1U << 49);
3433   const wide_int_bitmask PTA_AVX512DQ (HOST_WIDE_INT_1U << 50);
3434   const wide_int_bitmask PTA_AVX512BW (HOST_WIDE_INT_1U << 51);
3435   const wide_int_bitmask PTA_AVX512VL (HOST_WIDE_INT_1U << 52);
3436   const wide_int_bitmask PTA_AVX512IFMA (HOST_WIDE_INT_1U << 53);
3437   const wide_int_bitmask PTA_AVX512VBMI (HOST_WIDE_INT_1U << 54);
3438   const wide_int_bitmask PTA_CLWB (HOST_WIDE_INT_1U << 55);
3439   const wide_int_bitmask PTA_MWAITX (HOST_WIDE_INT_1U << 56);
3440   const wide_int_bitmask PTA_CLZERO (HOST_WIDE_INT_1U << 57);
3441   const wide_int_bitmask PTA_NO_80387 (HOST_WIDE_INT_1U << 58);
3442   const wide_int_bitmask PTA_PKU (HOST_WIDE_INT_1U << 59);
3443   const wide_int_bitmask PTA_AVX5124VNNIW (HOST_WIDE_INT_1U << 60);
3444   const wide_int_bitmask PTA_AVX5124FMAPS (HOST_WIDE_INT_1U << 61);
3445   const wide_int_bitmask PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1U << 62);
3446   const wide_int_bitmask PTA_SGX (HOST_WIDE_INT_1U << 63);
3447   const wide_int_bitmask PTA_AVX512VNNI (0, HOST_WIDE_INT_1U);
3448   const wide_int_bitmask PTA_GFNI (0, HOST_WIDE_INT_1U << 1);
3449   const wide_int_bitmask PTA_VAES (0, HOST_WIDE_INT_1U << 2);
3450   const wide_int_bitmask PTA_AVX512VBMI2 (0, HOST_WIDE_INT_1U << 3);
3451   const wide_int_bitmask PTA_VPCLMULQDQ (0, HOST_WIDE_INT_1U << 4);
3452   const wide_int_bitmask PTA_AVX512BITALG (0, HOST_WIDE_INT_1U << 5);
3453   const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
3454   const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
3455   const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
3456 
3457   const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
3458     | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
3459   const wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2
3460     | PTA_POPCNT;
3461   const wide_int_bitmask PTA_WESTMERE = PTA_NEHALEM | PTA_AES | PTA_PCLMUL;
3462   const wide_int_bitmask PTA_SANDYBRIDGE = PTA_WESTMERE | PTA_AVX | PTA_XSAVE
3463     | PTA_XSAVEOPT;
3464   const wide_int_bitmask PTA_IVYBRIDGE = PTA_SANDYBRIDGE | PTA_FSGSBASE
3465     | PTA_RDRND | PTA_F16C;
3466   const wide_int_bitmask PTA_HASWELL = PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI
3467     | PTA_BMI2 | PTA_LZCNT | PTA_FMA | PTA_MOVBE | PTA_HLE;
3468   const wide_int_bitmask PTA_BROADWELL = PTA_HASWELL | PTA_ADX | PTA_PRFCHW
3469     | PTA_RDSEED;
3470   const wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_CLFLUSHOPT
3471     | PTA_XSAVEC | PTA_XSAVES | PTA_SGX;
3472   const wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F
3473     | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
3474     | PTA_CLWB;
3475   const wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F
3476     | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
3477     | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA;
3478   const wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI
3479     | PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG
3480     | PTA_RDPID | PTA_CLWB;
3481   const wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT | PTA_PCONFIG
3482     | PTA_WBNOINVD;
3483   const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
3484     | PTA_AVX512F | PTA_AVX512CD;
3485   const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
3486   const wide_int_bitmask PTA_SILVERMONT = PTA_WESTMERE | PTA_MOVBE | PTA_RDRND;
3487   const wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
3488     | PTA_AVX5124FMAPS | PTA_AVX512VPOPCNTDQ;
3489 
3490   static struct pta
3491     {
3492       const char *const name;		/* processor name or nickname.  */
3493       const enum processor_type processor;
3494       const enum attr_cpu schedule;
3495       const wide_int_bitmask flags;
3496     }
3497   const processor_alias_table[] =
3498     {
3499       {"i386", PROCESSOR_I386, CPU_NONE, 0},
3500       {"i486", PROCESSOR_I486, CPU_NONE, 0},
3501       {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3502       {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3503       {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
3504       {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3505       {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3506       {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3507       {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3508       {"samuel-2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3509       {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3510 	PTA_MMX | PTA_SSE | PTA_FXSR},
3511       {"nehemiah", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3512         PTA_MMX | PTA_SSE | PTA_FXSR},
3513       {"c7", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3514         PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3515       {"esther", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3516         PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3517       {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3518       {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3519       {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3520       {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3521 	PTA_MMX | PTA_SSE | PTA_FXSR},
3522       {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3523 	PTA_MMX | PTA_SSE | PTA_FXSR},
3524       {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3525 	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3526       {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3527 	PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3528       {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3529 	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3530       {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3531 	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3532       {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3533 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3534 	| PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3535       {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3536       {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3537       {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3538       {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3539       {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3540 	PTA_SANDYBRIDGE},
3541       {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3542 	PTA_SANDYBRIDGE},
3543       {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3544 	PTA_IVYBRIDGE},
3545       {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3546 	PTA_IVYBRIDGE},
3547       {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3548       {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
3549       {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
3550       {"skylake", PROCESSOR_SKYLAKE, CPU_HASWELL, PTA_SKYLAKE},
3551       {"skylake-avx512", PROCESSOR_SKYLAKE_AVX512, CPU_HASWELL,
3552         PTA_SKYLAKE_AVX512},
3553       {"cannonlake", PROCESSOR_CANNONLAKE, CPU_HASWELL, PTA_CANNONLAKE},
3554       {"icelake-client", PROCESSOR_ICELAKE_CLIENT, CPU_HASWELL,
3555 	PTA_ICELAKE_CLIENT},
3556       {"icelake-server", PROCESSOR_ICELAKE_SERVER, CPU_HASWELL,
3557 	PTA_ICELAKE_SERVER},
3558       {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3559       {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3560       {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3561       {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3562       {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
3563       {"knm", PROCESSOR_KNM, CPU_SLM, PTA_KNM},
3564       {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3565       {"geode", PROCESSOR_GEODE, CPU_GEODE,
3566 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3567       {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3568       {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3569       {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3570       {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3571 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3572       {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3573 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3574       {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3575 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3576       {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3577 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3578       {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3579 	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR},
3580       {"x86-64", PROCESSOR_K8, CPU_K8,
3581 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3582       {"eden-x2", PROCESSOR_K8, CPU_K8,
3583         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3584       {"nano", PROCESSOR_K8, CPU_K8,
3585         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3586         | PTA_SSSE3 | PTA_FXSR},
3587       {"nano-1000", PROCESSOR_K8, CPU_K8,
3588         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3589         | PTA_SSSE3 | PTA_FXSR},
3590       {"nano-2000", PROCESSOR_K8, CPU_K8,
3591         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3592         | PTA_SSSE3 | PTA_FXSR},
3593       {"nano-3000", PROCESSOR_K8, CPU_K8,
3594         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3595         | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3596       {"nano-x2", PROCESSOR_K8, CPU_K8,
3597         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3598         | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3599       {"eden-x4", PROCESSOR_K8, CPU_K8,
3600         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3601         | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3602       {"nano-x4", PROCESSOR_K8, CPU_K8,
3603         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3604         | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR},
3605       {"k8", PROCESSOR_K8, CPU_K8,
3606 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3607 	| PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3608       {"k8-sse3", PROCESSOR_K8, CPU_K8,
3609 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3610 	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3611       {"opteron", PROCESSOR_K8, CPU_K8,
3612 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3613 	| PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3614       {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3615 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3616 	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3617       {"athlon64", PROCESSOR_K8, CPU_K8,
3618 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3619 	| PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3620       {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3621 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3622 	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_FXSR},
3623       {"athlon-fx", PROCESSOR_K8, CPU_K8,
3624 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3625 	| PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3626       {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3627 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3628 	| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3629       {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3630 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3631 	| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3632       {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3633 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3634 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3635 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3636 	| PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3637       {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3638 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3639 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3640 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3641 	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3642 	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3643       {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3644 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3645 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3646 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3647 	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3648 	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3649 	| PTA_XSAVEOPT | PTA_FSGSBASE},
3650       {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3651 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3652 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3653 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3654 	| PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3655 	| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3656 	| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3657 	| PTA_MOVBE | PTA_MWAITX},
3658       {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
3659 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3660 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3661 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3662 	| PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
3663 	| PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
3664 	| PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
3665 	| PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
3666 	| PTA_SHA | PTA_LZCNT | PTA_POPCNT},
3667       {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3668 	PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
3669 	| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3670 	| PTA_FXSR | PTA_XSAVE},
3671       {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3672 	PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
3673 	| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3674 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3675 	| PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3676 	| PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3677 
3678       {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3679 	PTA_64BIT
3680 	| PTA_HLE /* flags are only used for -march switch.  */ },
3681     };
3682 
3683   /* -mrecip options.  */
3684   static struct
3685     {
3686       const char *string;           /* option name */
3687       unsigned int mask;            /* mask bits to set */
3688     }
3689   const recip_options[] =
3690     {
3691       { "all",       RECIP_MASK_ALL },
3692       { "none",      RECIP_MASK_NONE },
3693       { "div",       RECIP_MASK_DIV },
3694       { "sqrt",      RECIP_MASK_SQRT },
3695       { "vec-div",   RECIP_MASK_VEC_DIV },
3696       { "vec-sqrt",  RECIP_MASK_VEC_SQRT },
3697     };
3698 
3699   int const pta_size = ARRAY_SIZE (processor_alias_table);
3700 
3701   /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3702      TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false.  */
3703   if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3704     opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3705 #ifdef TARGET_BI_ARCH
3706   else
3707     {
3708 #if TARGET_BI_ARCH == 1
3709       /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3710 	 is on and OPTION_MASK_ABI_X32 is off.  We turn off
3711 	 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3712 	 -mx32.  */
3713       if (TARGET_X32_P (opts->x_ix86_isa_flags))
3714 	opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3715 #else
3716       /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3717 	 on and OPTION_MASK_ABI_64 is off.  We turn off
3718 	 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3719 	 -m64 or OPTION_MASK_CODE16 is turned on by -m16.  */
3720       if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3721 	  || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3722 	opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3723 #endif
3724       if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3725 	  && TARGET_IAMCU_P (opts->x_target_flags))
3726 	sorry ("Intel MCU psABI isn%'t supported in %s mode",
3727 	       TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3728     }
3729 #endif
3730 
3731   if (TARGET_X32_P (opts->x_ix86_isa_flags))
3732     {
3733       /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3734 	 OPTION_MASK_ABI_64 for TARGET_X32.  */
3735       opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3736       opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3737     }
3738   else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3739     opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3740 				| OPTION_MASK_ABI_X32
3741 				| OPTION_MASK_ABI_64);
3742   else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3743     {
3744       /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3745 	 OPTION_MASK_ABI_X32 for TARGET_LP64.  */
3746       opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3747       opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3748     }
3749 
3750 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3751   SUBTARGET_OVERRIDE_OPTIONS;
3752 #endif
3753 
3754 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3755   SUBSUBTARGET_OVERRIDE_OPTIONS;
3756 #endif
3757 
3758   /* -fPIC is the default for x86_64.  */
3759   if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3760     opts->x_flag_pic = 2;
3761 
3762   /* Need to check -mtune=generic first.  */
3763   if (opts->x_ix86_tune_string)
3764     {
3765       /* As special support for cross compilers we read -mtune=native
3766 	     as -mtune=generic.  With native compilers we won't see the
3767 	     -mtune=native, as it was changed by the driver.  */
3768       if (!strcmp (opts->x_ix86_tune_string, "native"))
3769 	{
3770 	  opts->x_ix86_tune_string = "generic";
3771 	}
3772       else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3773         warning (OPT_Wdeprecated,
3774 		 main_args_p
3775 		 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
3776 		      "or %<-mtune=generic%> instead as appropriate")
3777 		 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
3778 		      "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
3779 		      " instead as appropriate"));
3780     }
3781   else
3782     {
3783       if (opts->x_ix86_arch_string)
3784 	opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3785       if (!opts->x_ix86_tune_string)
3786 	{
3787 	  opts->x_ix86_tune_string
3788 	    = processor_target_table[TARGET_CPU_DEFAULT].name;
3789 	  ix86_tune_defaulted = 1;
3790 	}
3791 
3792       /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3793 	 or defaulted.  We need to use a sensible tune option.  */
3794       if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3795 	{
3796 	  opts->x_ix86_tune_string = "generic";
3797 	}
3798     }
3799 
3800   if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3801       && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3802     {
3803       /* rep; movq isn't available in 32-bit code.  */
3804       error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3805       opts->x_ix86_stringop_alg = no_stringop;
3806     }
3807 
3808   if (!opts->x_ix86_arch_string)
3809     opts->x_ix86_arch_string
3810       = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3811 	? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3812   else
3813     ix86_arch_specified = 1;
3814 
3815   if (opts_set->x_ix86_pmode)
3816     {
3817       if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3818 	   && opts->x_ix86_pmode == PMODE_SI)
3819 	  || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3820 	       && opts->x_ix86_pmode == PMODE_DI))
3821 	error ("address mode %qs not supported in the %s bit mode",
3822 	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3823 	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3824     }
3825   else
3826     opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3827 			 ? PMODE_DI : PMODE_SI;
3828 
3829   if (!opts_set->x_ix86_abi)
3830     opts->x_ix86_abi = DEFAULT_ABI;
3831 
3832   if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
3833     error ("-mabi=ms not supported with X32 ABI");
3834   gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
3835 
3836   /* For targets using ms ABI enable ms-extensions, if not
3837      explicit turned off.  For non-ms ABI we turn off this
3838      option.  */
3839   if (!opts_set->x_flag_ms_extensions)
3840     opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3841 
3842   if (opts_set->x_ix86_cmodel)
3843     {
3844       switch (opts->x_ix86_cmodel)
3845 	{
3846 	case CM_SMALL:
3847 	case CM_SMALL_PIC:
3848 	  if (opts->x_flag_pic)
3849 	    opts->x_ix86_cmodel = CM_SMALL_PIC;
3850 	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3851 	    error ("code model %qs not supported in the %s bit mode",
3852 		   "small", "32");
3853 	  break;
3854 
3855 	case CM_MEDIUM:
3856 	case CM_MEDIUM_PIC:
3857 	  if (opts->x_flag_pic)
3858 	    opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3859 	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3860 	    error ("code model %qs not supported in the %s bit mode",
3861 		   "medium", "32");
3862 	  else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3863 	    error ("code model %qs not supported in x32 mode",
3864 		   "medium");
3865 	  break;
3866 
3867 	case CM_LARGE:
3868 	case CM_LARGE_PIC:
3869 	  if (opts->x_flag_pic)
3870 	    opts->x_ix86_cmodel = CM_LARGE_PIC;
3871 	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3872 	    error ("code model %qs not supported in the %s bit mode",
3873 		   "large", "32");
3874 	  else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3875 	    error ("code model %qs not supported in x32 mode",
3876 		   "large");
3877 	  break;
3878 
3879 	case CM_32:
3880 	  if (opts->x_flag_pic)
3881 	    error ("code model %s does not support PIC mode", "32");
3882 	  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3883 	    error ("code model %qs not supported in the %s bit mode",
3884 		   "32", "64");
3885 	  break;
3886 
3887 	case CM_KERNEL:
3888 	  if (opts->x_flag_pic)
3889 	    {
3890 	      error ("code model %s does not support PIC mode", "kernel");
3891 	      opts->x_ix86_cmodel = CM_32;
3892 	    }
3893 	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3894 	    error ("code model %qs not supported in the %s bit mode",
3895 		   "kernel", "32");
3896 	  break;
3897 
3898 	default:
3899 	  gcc_unreachable ();
3900 	}
3901     }
3902   else
3903     {
3904       /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3905 	 use of rip-relative addressing.  This eliminates fixups that
3906 	 would otherwise be needed if this object is to be placed in a
3907 	 DLL, and is essentially just as efficient as direct addressing.  */
3908       if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3909 	  && (TARGET_RDOS || TARGET_PECOFF))
3910 	opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3911       else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3912 	opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3913       else
3914 	opts->x_ix86_cmodel = CM_32;
3915     }
3916   if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3917     {
3918       error ("-masm=intel not supported in this configuration");
3919       opts->x_ix86_asm_dialect = ASM_ATT;
3920     }
3921   if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3922       != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3923     sorry ("%i-bit mode not compiled in",
3924 	   (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3925 
3926   for (i = 0; i < pta_size; i++)
3927     if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3928       {
3929 	if (!strcmp (opts->x_ix86_arch_string, "generic"))
3930 	  {
3931 	    error (main_args_p
3932 		   ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
3933 			"switch")
3934 		   : G_("%<generic%> CPU can be used only for "
3935 			"%<target(\"tune=\")%> attribute"));
3936 	    return false;
3937 	  }
3938 	else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3939 	  {
3940 	    error (main_args_p
3941 		   ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
3942 			"switch")
3943 		   : G_("%<intel%> CPU can be used only for "
3944 			"%<target(\"tune=\")%> attribute"));
3945 	    return false;
3946 	  }
3947 
3948 	if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3949 	    && !((processor_alias_table[i].flags & PTA_64BIT) != 0))
3950 	  {
3951 	    error ("CPU you selected does not support x86-64 "
3952 		   "instruction set");
3953 	    return false;
3954 	  }
3955 
3956 	ix86_schedule = processor_alias_table[i].schedule;
3957 	ix86_arch = processor_alias_table[i].processor;
3958 	/* Default cpu tuning to the architecture.  */
3959 	ix86_tune = ix86_arch;
3960 
3961 	if (((processor_alias_table[i].flags & PTA_MMX) != 0)
3962 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3963 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3964 	if (((processor_alias_table[i].flags & PTA_3DNOW) != 0)
3965 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3966 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3967 	if (((processor_alias_table[i].flags & PTA_3DNOW_A) != 0)
3968 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3969 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3970 	if (((processor_alias_table[i].flags & PTA_SSE) != 0)
3971 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3972 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3973 	if (((processor_alias_table[i].flags & PTA_SSE2) != 0)
3974 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3975 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3976 	if (((processor_alias_table[i].flags & PTA_SSE3) != 0)
3977 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3978 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3979 	if (((processor_alias_table[i].flags & PTA_SSSE3) != 0)
3980 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3981 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3982 	if (((processor_alias_table[i].flags & PTA_SSE4_1) != 0)
3983 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3984 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3985 	if (((processor_alias_table[i].flags & PTA_SSE4_2) != 0)
3986 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3987 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3988 	if (((processor_alias_table[i].flags & PTA_AVX) != 0)
3989 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3990 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3991 	if (((processor_alias_table[i].flags & PTA_AVX2) != 0)
3992 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3993 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3994 	if (((processor_alias_table[i].flags & PTA_FMA) != 0)
3995 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3996 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3997 	if (((processor_alias_table[i].flags & PTA_SSE4A) != 0)
3998 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3999 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
4000 	if (((processor_alias_table[i].flags & PTA_FMA4) != 0)
4001 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
4002 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
4003 	if (((processor_alias_table[i].flags & PTA_XOP) != 0)
4004 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
4005 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
4006 	if (((processor_alias_table[i].flags & PTA_LWP) != 0)
4007 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
4008 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
4009 	if (((processor_alias_table[i].flags & PTA_ABM) != 0)
4010 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
4011 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
4012 	if (((processor_alias_table[i].flags & PTA_BMI) != 0)
4013 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
4014 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
4015 	if (((processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)) != 0)
4016 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
4017 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
4018 	if (((processor_alias_table[i].flags & PTA_TBM) != 0)
4019 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
4020 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
4021 	if (((processor_alias_table[i].flags & PTA_BMI2) != 0)
4022 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4023 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4024 	if (((processor_alias_table[i].flags & PTA_CX16) != 0)
4025 	    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16))
4026 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16;
4027 	if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0)
4028 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4029 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4030 	if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4031 	    && ((processor_alias_table[i].flags & PTA_NO_SAHF) != 0))
4032 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4033 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4034 	if (((processor_alias_table[i].flags & PTA_MOVBE) != 0)
4035 	    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MOVBE))
4036 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MOVBE;
4037 	if (((processor_alias_table[i].flags & PTA_AES) != 0)
4038 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4039 	  ix86_isa_flags |= OPTION_MASK_ISA_AES;
4040 	if (((processor_alias_table[i].flags & PTA_SHA) != 0)
4041 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4042 	  ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4043 	if (((processor_alias_table[i].flags & PTA_PCLMUL) != 0)
4044 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4045 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4046 	if (((processor_alias_table[i].flags & PTA_FSGSBASE) != 0)
4047 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4048 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4049 	if (((processor_alias_table[i].flags & PTA_RDRND) != 0)
4050 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4051 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4052 	if (((processor_alias_table[i].flags & PTA_F16C) != 0)
4053 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4054 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4055 	if (((processor_alias_table[i].flags & PTA_RTM) != 0)
4056 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4057 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4058 	if (((processor_alias_table[i].flags & PTA_HLE) != 0)
4059 	    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_HLE))
4060 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_HLE;
4061 	if (((processor_alias_table[i].flags & PTA_PRFCHW) != 0)
4062 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4063 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4064 	if (((processor_alias_table[i].flags & PTA_RDSEED) != 0)
4065 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4066 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4067 	if (((processor_alias_table[i].flags & PTA_ADX) != 0)
4068 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4069 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4070 	if (((processor_alias_table[i].flags & PTA_FXSR) != 0)
4071 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4072 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4073 	if (((processor_alias_table[i].flags & PTA_XSAVE) != 0)
4074 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4075 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4076 	if (((processor_alias_table[i].flags & PTA_XSAVEOPT) != 0)
4077 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4078 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4079 	if (((processor_alias_table[i].flags & PTA_AVX512F) != 0)
4080 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4081 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4082 	if (((processor_alias_table[i].flags & PTA_AVX512ER) != 0)
4083 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4084 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4085 	if (((processor_alias_table[i].flags & PTA_AVX512PF) != 0)
4086 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4087 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4088 	if (((processor_alias_table[i].flags & PTA_AVX512CD) != 0)
4089 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4090 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4091 	if (((processor_alias_table[i].flags & PTA_PREFETCHWT1) != 0)
4092 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4093 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4094 	if (((processor_alias_table[i].flags & PTA_CLWB) != 0)
4095 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4096 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4097 	if (((processor_alias_table[i].flags & PTA_CLFLUSHOPT) != 0)
4098 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4099 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4100 	if (((processor_alias_table[i].flags & PTA_CLZERO) != 0)
4101 	    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CLZERO))
4102 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CLZERO;
4103 	if (((processor_alias_table[i].flags & PTA_XSAVEC) != 0)
4104 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4105 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4106 	if (((processor_alias_table[i].flags & PTA_XSAVES) != 0)
4107 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4108 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4109 	if (((processor_alias_table[i].flags & PTA_AVX512DQ) != 0)
4110 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4111 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4112 	if (((processor_alias_table[i].flags & PTA_AVX512BW) != 0)
4113 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4114 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4115 	if (((processor_alias_table[i].flags & PTA_AVX512VL) != 0)
4116 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4117 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4118 	if (((processor_alias_table[i].flags & PTA_MPX) != 0)
4119             && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MPX))
4120           opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MPX;
4121 	if (((processor_alias_table[i].flags & PTA_AVX512VBMI) != 0)
4122 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4123 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4124 	if (((processor_alias_table[i].flags & PTA_AVX512IFMA) != 0)
4125 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4126 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4127 	if (((processor_alias_table[i].flags & PTA_AVX512VNNI) != 0)
4128 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VNNI))
4129 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VNNI;
4130 	if (((processor_alias_table[i].flags & PTA_GFNI) != 0)
4131 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_GFNI))
4132 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_GFNI;
4133 	if (((processor_alias_table[i].flags & PTA_AVX512VBMI2) != 0)
4134 	    && !(opts->x_ix86_isa_flags_explicit
4135 	    & OPTION_MASK_ISA_AVX512VBMI2))
4136 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI2;
4137 	if (((processor_alias_table[i].flags & PTA_VPCLMULQDQ) != 0)
4138 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_VPCLMULQDQ))
4139 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ;
4140 	if (((processor_alias_table[i].flags & PTA_AVX512BITALG) != 0)
4141 	    && !(opts->x_ix86_isa_flags_explicit
4142 	    & OPTION_MASK_ISA_AVX512BITALG))
4143 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BITALG;
4144 
4145 	if (((processor_alias_table[i].flags & PTA_AVX5124VNNIW) != 0)
4146 	    && !(opts->x_ix86_isa_flags2_explicit
4147 		 & OPTION_MASK_ISA_AVX5124VNNIW))
4148 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW;
4149 	if (((processor_alias_table[i].flags & PTA_AVX5124FMAPS) != 0)
4150 	    && !(opts->x_ix86_isa_flags2_explicit
4151 		 & OPTION_MASK_ISA_AVX5124FMAPS))
4152 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS;
4153 	if (((processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ) != 0)
4154 	    && !(opts->x_ix86_isa_flags_explicit
4155 		 & OPTION_MASK_ISA_AVX512VPOPCNTDQ))
4156 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
4157 	if (((processor_alias_table[i].flags & PTA_SGX) != 0)
4158 	    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
4159 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
4160 	if (((processor_alias_table[i].flags & PTA_VAES) != 0)
4161 	    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_VAES))
4162 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_VAES;
4163 	if (((processor_alias_table[i].flags & PTA_RDPID) != 0)
4164 	    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_RDPID))
4165 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_RDPID;
4166 	if (((processor_alias_table[i].flags & PTA_PCONFIG) != 0)
4167 	    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PCONFIG))
4168 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PCONFIG;
4169 	if (((processor_alias_table[i].flags & PTA_WBNOINVD) != 0)
4170 	    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_WBNOINVD))
4171 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_WBNOINVD;
4172 
4173 	if ((processor_alias_table[i].flags
4174 	   & (PTA_PREFETCH_SSE | PTA_SSE)) != 0)
4175 	  x86_prefetch_sse = true;
4176 	if (((processor_alias_table[i].flags & PTA_MWAITX) != 0)
4177 	    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MWAITX))
4178 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MWAITX;
4179 	if (((processor_alias_table[i].flags & PTA_PKU) != 0)
4180 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4181 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4182 
4183 	/* Don't enable x87 instructions if only
4184 	   general registers are allowed.  */
4185 	if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY)
4186 	    && !(opts_set->x_target_flags & MASK_80387))
4187 	  {
4188 	    if (((processor_alias_table[i].flags & PTA_NO_80387) != 0))
4189 	      opts->x_target_flags &= ~MASK_80387;
4190 	    else
4191 	      opts->x_target_flags |= MASK_80387;
4192 	  }
4193 	break;
4194       }
4195 
4196   if (TARGET_X32 && (opts->x_ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4197     error ("Intel MPX does not support x32");
4198 
4199   if (TARGET_X32 && (ix86_isa_flags2 & OPTION_MASK_ISA_MPX))
4200     error ("Intel MPX does not support x32");
4201 
4202   if (i == pta_size)
4203     {
4204       error (main_args_p
4205 	     ? G_("bad value (%qs) for %<-march=%> switch")
4206 	     : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
4207 	     opts->x_ix86_arch_string);
4208 
4209       auto_vec <const char *> candidates;
4210       for (i = 0; i < pta_size; i++)
4211 	if (strcmp (processor_alias_table[i].name, "generic")
4212 	    && strcmp (processor_alias_table[i].name, "intel")
4213 	    && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4214 		|| ((processor_alias_table[i].flags & PTA_64BIT) != 0)))
4215 	  candidates.safe_push (processor_alias_table[i].name);
4216 
4217 #ifdef HAVE_LOCAL_CPU_DETECT
4218       /* Add also "native" as possible value.  */
4219       candidates.safe_push ("native");
4220 #endif
4221 
4222       char *s;
4223       const char *hint
4224 	= candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates);
4225       if (hint)
4226 	inform (input_location,
4227 		main_args_p
4228 		? G_("valid arguments to %<-march=%> switch are: "
4229 		     "%s; did you mean %qs?")
4230 		: G_("valid arguments to %<target(\"arch=\")%> attribute are: "
4231 		     "%s; did you mean %qs?"), s, hint);
4232       else
4233 	inform (input_location,
4234 		main_args_p
4235 		? G_("valid arguments to %<-march=%> switch are: %s")
4236 		: G_("valid arguments to %<target(\"arch=\")%> attribute "
4237 		     "are: %s"), s);
4238       XDELETEVEC (s);
4239     }
4240 
4241   ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch;
4242   for (i = 0; i < X86_ARCH_LAST; ++i)
4243     ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4244 
4245   for (i = 0; i < pta_size; i++)
4246     if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4247       {
4248 	ix86_schedule = processor_alias_table[i].schedule;
4249 	ix86_tune = processor_alias_table[i].processor;
4250 	if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4251 	  {
4252 	    if (!((processor_alias_table[i].flags & PTA_64BIT) != 0))
4253 	      {
4254 		if (ix86_tune_defaulted)
4255 		  {
4256 		    opts->x_ix86_tune_string = "x86-64";
4257 		    for (i = 0; i < pta_size; i++)
4258 		      if (! strcmp (opts->x_ix86_tune_string,
4259 				    processor_alias_table[i].name))
4260 			break;
4261 		    ix86_schedule = processor_alias_table[i].schedule;
4262 		    ix86_tune = processor_alias_table[i].processor;
4263 		  }
4264 		else
4265 		  error ("CPU you selected does not support x86-64 "
4266 			 "instruction set");
4267 	      }
4268 	  }
4269 	/* Intel CPUs have always interpreted SSE prefetch instructions as
4270 	   NOPs; so, we can enable SSE prefetch instructions even when
4271 	   -mtune (rather than -march) points us to a processor that has them.
4272 	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
4273 	   higher processors.  */
4274 	if (TARGET_CMOV
4275 	    && ((processor_alias_table[i].flags
4276 	      & (PTA_PREFETCH_SSE | PTA_SSE)) != 0))
4277 	  x86_prefetch_sse = true;
4278 	break;
4279       }
4280 
4281   if (ix86_tune_specified && i == pta_size)
4282     {
4283       error (main_args_p
4284 	     ? G_("bad value (%qs) for %<-mtune=%> switch")
4285 	     : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
4286 	     opts->x_ix86_tune_string);
4287 
4288       auto_vec <const char *> candidates;
4289       for (i = 0; i < pta_size; i++)
4290 	if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4291 	    || ((processor_alias_table[i].flags & PTA_64BIT) != 0))
4292 	  candidates.safe_push (processor_alias_table[i].name);
4293 
4294 #ifdef HAVE_LOCAL_CPU_DETECT
4295       /* Add also "native" as possible value.  */
4296       candidates.safe_push ("native");
4297 #endif
4298 
4299       char *s;
4300       const char *hint
4301 	= candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates);
4302       if (hint)
4303 	inform (input_location,
4304 		main_args_p
4305 		? G_("valid arguments to %<-mtune=%> switch are: "
4306 		     "%s; did you mean %qs?")
4307 		: G_("valid arguments to %<target(\"tune=\")%> attribute are: "
4308 		     "%s; did you mean %qs?"), s, hint);
4309       else
4310 	inform (input_location,
4311 		main_args_p
4312 		? G_("valid arguments to %<-mtune=%> switch are: %s")
4313 		: G_("valid arguments to %<target(\"tune=\")%> attribute "
4314 		     "are: %s"), s);
4315       XDELETEVEC (s);
4316     }
4317 
4318   set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
4319 
4320 #ifndef USE_IX86_FRAME_POINTER
4321 #define USE_IX86_FRAME_POINTER 0
4322 #endif
4323 
4324 #ifndef USE_X86_64_FRAME_POINTER
4325 #define USE_X86_64_FRAME_POINTER 0
4326 #endif
4327 
4328   /* Set the default values for switches whose default depends on TARGET_64BIT
4329      in case they weren't overwritten by command line options.  */
4330   if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4331     {
4332       if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4333 	opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
4334       if (opts->x_flag_asynchronous_unwind_tables
4335 	  && !opts_set->x_flag_unwind_tables
4336 	  && TARGET_64BIT_MS_ABI)
4337 	opts->x_flag_unwind_tables = 1;
4338       if (opts->x_flag_asynchronous_unwind_tables == 2)
4339 	opts->x_flag_unwind_tables
4340 	  = opts->x_flag_asynchronous_unwind_tables = 1;
4341       if (opts->x_flag_pcc_struct_return == 2)
4342 	opts->x_flag_pcc_struct_return = 0;
4343     }
4344   else
4345     {
4346       if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
4347 	opts->x_flag_omit_frame_pointer
4348 	  = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
4349       if (opts->x_flag_asynchronous_unwind_tables == 2)
4350 	opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
4351       if (opts->x_flag_pcc_struct_return == 2)
4352 	{
4353 	  /* Intel MCU psABI specifies that -freg-struct-return should
4354 	     be on.  Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
4355 	     we check -miamcu so that -freg-struct-return is always
4356 	     turned on if -miamcu is used.  */
4357 	  if (TARGET_IAMCU_P (opts->x_target_flags))
4358 	    opts->x_flag_pcc_struct_return = 0;
4359 	  else
4360 	    opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4361 	}
4362     }
4363 
4364   ix86_tune_cost = processor_target_table[ix86_tune].cost;
4365   /* TODO: ix86_cost should be chosen at instruction or function granuality
4366      so for cold code we use size_cost even in !optimize_size compilation.  */
4367   if (opts->x_optimize_size)
4368     ix86_cost = &ix86_size_cost;
4369   else
4370     ix86_cost = ix86_tune_cost;
4371 
4372   /* Arrange to set up i386_stack_locals for all functions.  */
4373   init_machine_status = ix86_init_machine_status;
4374 
4375   /* Validate -mregparm= value.  */
4376   if (opts_set->x_ix86_regparm)
4377     {
4378       if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4379 	warning (0, "-mregparm is ignored in 64-bit mode");
4380       else if (TARGET_IAMCU_P (opts->x_target_flags))
4381 	warning (0, "-mregparm is ignored for Intel MCU psABI");
4382       if (opts->x_ix86_regparm > REGPARM_MAX)
4383 	{
4384 	  error ("-mregparm=%d is not between 0 and %d",
4385 		 opts->x_ix86_regparm, REGPARM_MAX);
4386 	  opts->x_ix86_regparm = 0;
4387 	}
4388     }
4389   if (TARGET_IAMCU_P (opts->x_target_flags)
4390       || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4391     opts->x_ix86_regparm = REGPARM_MAX;
4392 
4393   /* Default align_* from the processor table.  */
4394   ix86_default_align (opts);
4395 
4396   /* Provide default for -mbranch-cost= value.  */
4397   if (!opts_set->x_ix86_branch_cost)
4398     opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4399 
4400   if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4401     {
4402       opts->x_target_flags
4403 	|= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4404 
4405       /* Enable by default the SSE and MMX builtins.  Do allow the user to
4406 	 explicitly disable any of these.  In particular, disabling SSE and
4407 	 MMX for kernel code is extremely useful.  */
4408       if (!ix86_arch_specified)
4409       opts->x_ix86_isa_flags
4410 	|= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4411 	     | TARGET_SUBTARGET64_ISA_DEFAULT)
4412             & ~opts->x_ix86_isa_flags_explicit);
4413 
4414       if (TARGET_RTD_P (opts->x_target_flags))
4415 	warning (0,
4416 		 main_args_p
4417 		 ? G_("%<-mrtd%> is ignored in 64bit mode")
4418 		 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
4419     }
4420   else
4421     {
4422       opts->x_target_flags
4423 	|= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4424 
4425       if (!ix86_arch_specified)
4426         opts->x_ix86_isa_flags
4427 	  |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4428 
4429       /* i386 ABI does not specify red zone.  It still makes sense to use it
4430          when programmer takes care to stack from being destroyed.  */
4431       if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4432         opts->x_target_flags |= MASK_NO_RED_ZONE;
4433     }
4434 
4435   /* Keep nonleaf frame pointers.  */
4436   if (opts->x_flag_omit_frame_pointer)
4437     opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4438   else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4439     opts->x_flag_omit_frame_pointer = 1;
4440 
4441   /* If we're doing fast math, we don't care about comparison order
4442      wrt NaNs.  This lets us use a shorter comparison sequence.  */
4443   if (opts->x_flag_finite_math_only)
4444     opts->x_target_flags &= ~MASK_IEEE_FP;
4445 
4446   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4447      since the insns won't need emulation.  */
4448   if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4449     opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4450 
4451   /* Likewise, if the target doesn't have a 387, or we've specified
4452      software floating point, don't use 387 inline intrinsics.  */
4453   if (!TARGET_80387_P (opts->x_target_flags))
4454     opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4455 
4456   /* Turn on MMX builtins for -msse.  */
4457   if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4458     opts->x_ix86_isa_flags
4459       |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4460 
4461   /* Enable SSE prefetch.  */
4462   if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4463       || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags)
4464 	  && !TARGET_3DNOW_P (opts->x_ix86_isa_flags))
4465       || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4466     x86_prefetch_sse = true;
4467 
4468   /* Enable popcnt instruction for -msse4.2 or -mabm.  */
4469   if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4470       || TARGET_ABM_P (opts->x_ix86_isa_flags))
4471     opts->x_ix86_isa_flags
4472       |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4473 
4474   /* Enable lzcnt instruction for -mabm.  */
4475   if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4476     opts->x_ix86_isa_flags
4477       |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4478 
4479   /* Disable BMI, BMI2 and TBM instructions for -m16.  */
4480   if (TARGET_16BIT_P(opts->x_ix86_isa_flags))
4481     opts->x_ix86_isa_flags
4482       &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
4483 	   & ~opts->x_ix86_isa_flags_explicit);
4484 
4485   /* Validate -mpreferred-stack-boundary= value or default it to
4486      PREFERRED_STACK_BOUNDARY_DEFAULT.  */
4487   ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4488   if (opts_set->x_ix86_preferred_stack_boundary_arg)
4489     {
4490       int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2;
4491       int max = TARGET_SEH ? 4 : 12;
4492 
4493       if (opts->x_ix86_preferred_stack_boundary_arg < min
4494 	  || opts->x_ix86_preferred_stack_boundary_arg > max)
4495 	{
4496 	  if (min == max)
4497 	    error ("-mpreferred-stack-boundary is not supported "
4498 		   "for this target");
4499 	  else
4500 	    error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4501 		   opts->x_ix86_preferred_stack_boundary_arg, min, max);
4502 	}
4503       else
4504 	ix86_preferred_stack_boundary
4505 	  = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4506     }
4507 
4508   /* Set the default value for -mstackrealign.  */
4509   if (!opts_set->x_ix86_force_align_arg_pointer)
4510     opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4511 
4512   ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4513 
4514   /* Validate -mincoming-stack-boundary= value or default it to
4515      MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY.  */
4516   ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4517   if (opts_set->x_ix86_incoming_stack_boundary_arg)
4518     {
4519       int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
4520 
4521       if (opts->x_ix86_incoming_stack_boundary_arg < min
4522 	  || opts->x_ix86_incoming_stack_boundary_arg > 12)
4523 	error ("-mincoming-stack-boundary=%d is not between %d and 12",
4524 	       opts->x_ix86_incoming_stack_boundary_arg, min);
4525       else
4526 	{
4527 	  ix86_user_incoming_stack_boundary
4528 	    = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4529 	  ix86_incoming_stack_boundary
4530 	    = ix86_user_incoming_stack_boundary;
4531 	}
4532     }
4533 
4534 #ifndef NO_PROFILE_COUNTERS
4535   if (flag_nop_mcount)
4536     error ("-mnop-mcount is not compatible with this target");
4537 #endif
4538   if (flag_nop_mcount && flag_pic)
4539     error ("-mnop-mcount is not implemented for -fPIC");
4540 
4541   /* Accept -msseregparm only if at least SSE support is enabled.  */
4542   if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4543       && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4544     error (main_args_p
4545 	   ? G_("%<-msseregparm%> used without SSE enabled")
4546 	   : G_("%<target(\"sseregparm\")%> used without SSE enabled"));
4547 
4548   if (opts_set->x_ix86_fpmath)
4549     {
4550       if (opts->x_ix86_fpmath & FPMATH_SSE)
4551 	{
4552 	  if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4553 	    {
4554 	      if (TARGET_80387_P (opts->x_target_flags))
4555 		{
4556 		  warning (0, "SSE instruction set disabled, using 387 arithmetics");
4557 		  opts->x_ix86_fpmath = FPMATH_387;
4558 		}
4559 	    }
4560 	  else if ((opts->x_ix86_fpmath & FPMATH_387)
4561 		   && !TARGET_80387_P (opts->x_target_flags))
4562 	    {
4563 	      warning (0, "387 instruction set disabled, using SSE arithmetics");
4564 	      opts->x_ix86_fpmath = FPMATH_SSE;
4565 	    }
4566 	}
4567     }
4568   /* For all chips supporting SSE2, -mfpmath=sse performs better than
4569      fpmath=387.  The second is however default at many targets since the
4570      extra 80bit precision of temporaries is considered to be part of ABI.
4571      Overwrite the default at least for -ffast-math.
4572      TODO: -mfpmath=both seems to produce same performing code with bit
4573      smaller binaries.  It is however not clear if register allocation is
4574      ready for this setting.
4575      Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4576      codegen.  We may switch to 387 with -ffast-math for size optimized
4577      functions. */
4578   else if (fast_math_flags_set_p (&global_options)
4579 	   && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4580     opts->x_ix86_fpmath = FPMATH_SSE;
4581   else
4582     opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4583 
4584   /* Use external vectorized library in vectorizing intrinsics.  */
4585   if (opts_set->x_ix86_veclibabi_type)
4586     switch (opts->x_ix86_veclibabi_type)
4587       {
4588       case ix86_veclibabi_type_svml:
4589 	ix86_veclib_handler = ix86_veclibabi_svml;
4590 	break;
4591 
4592       case ix86_veclibabi_type_acml:
4593 	ix86_veclib_handler = ix86_veclibabi_acml;
4594 	break;
4595 
4596       default:
4597 	gcc_unreachable ();
4598       }
4599 
4600   if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4601       && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4602     opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4603 
4604   /* If stack probes are required, the space used for large function
4605      arguments on the stack must also be probed, so enable
4606      -maccumulate-outgoing-args so this happens in the prologue.  */
4607   if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4608       && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4609     {
4610       if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4611 	warning (0,
4612 		 main_args_p
4613 		 ? G_("stack probing requires %<-maccumulate-outgoing-args%> "
4614 		      "for correctness")
4615 		 : G_("stack probing requires "
4616 		      "%<target(\"accumulate-outgoing-args\")%> for "
4617 		      "correctness"));
4618       opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4619     }
4620 
4621   /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4622      so enable -maccumulate-outgoing-args when %ebp is fixed.  */
4623   if (fixed_regs[BP_REG]
4624       && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4625     {
4626       if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4627 	warning (0,
4628 		 main_args_p
4629 		 ? G_("fixed ebp register requires "
4630 		      "%<-maccumulate-outgoing-args%>")
4631 		 : G_("fixed ebp register requires "
4632 		      "%<target(\"accumulate-outgoing-args\")%>"));
4633       opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4634     }
4635 
4636   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
4637   {
4638     char *p;
4639     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4640     p = strchr (internal_label_prefix, 'X');
4641     internal_label_prefix_len = p - internal_label_prefix;
4642     *p = '\0';
4643   }
4644 
4645   /* When scheduling description is not available, disable scheduler pass
4646      so it won't slow down the compilation and make x87 code slower.  */
4647   if (!TARGET_SCHEDULE)
4648     opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4649 
4650   maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4651 			 ix86_tune_cost->simultaneous_prefetches,
4652 			 opts->x_param_values,
4653 			 opts_set->x_param_values);
4654   maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4655 			 ix86_tune_cost->prefetch_block,
4656 			 opts->x_param_values,
4657 			 opts_set->x_param_values);
4658   maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4659 			 ix86_tune_cost->l1_cache_size,
4660 			 opts->x_param_values,
4661 			 opts_set->x_param_values);
4662   maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4663 			 ix86_tune_cost->l2_cache_size,
4664 			 opts->x_param_values,
4665 			 opts_set->x_param_values);
4666 
4667   /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
4668   if (opts->x_flag_prefetch_loop_arrays < 0
4669       && HAVE_prefetch
4670       && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4671       && !opts->x_optimize_size
4672       && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4673     opts->x_flag_prefetch_loop_arrays = 1;
4674 
4675   /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4676      can be opts->x_optimized to ap = __builtin_next_arg (0).  */
4677   if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4678     targetm.expand_builtin_va_start = NULL;
4679 
4680   if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4681     {
4682       ix86_gen_leave = gen_leave_rex64;
4683       if (Pmode == DImode)
4684 	{
4685 	  ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4686 	  ix86_gen_tls_local_dynamic_base_64
4687 	    = gen_tls_local_dynamic_base_64_di;
4688 	}
4689       else
4690 	{
4691 	  ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4692 	  ix86_gen_tls_local_dynamic_base_64
4693 	    = gen_tls_local_dynamic_base_64_si;
4694 	}
4695     }
4696   else
4697     ix86_gen_leave = gen_leave;
4698 
4699   if (Pmode == DImode)
4700     {
4701       ix86_gen_add3 = gen_adddi3;
4702       ix86_gen_sub3 = gen_subdi3;
4703       ix86_gen_sub3_carry = gen_subdi3_carry;
4704       ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4705       ix86_gen_andsp = gen_anddi3;
4706       ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4707       ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4708       ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4709       ix86_gen_monitor = gen_sse3_monitor_di;
4710       ix86_gen_monitorx = gen_monitorx_di;
4711       ix86_gen_clzero = gen_clzero_di;
4712     }
4713   else
4714     {
4715       ix86_gen_add3 = gen_addsi3;
4716       ix86_gen_sub3 = gen_subsi3;
4717       ix86_gen_sub3_carry = gen_subsi3_carry;
4718       ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4719       ix86_gen_andsp = gen_andsi3;
4720       ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4721       ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4722       ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4723       ix86_gen_monitor = gen_sse3_monitor_si;
4724       ix86_gen_monitorx = gen_monitorx_si;
4725       ix86_gen_clzero = gen_clzero_si;
4726     }
4727 
4728 #ifdef USE_IX86_CLD
4729   /* Use -mcld by default for 32-bit code if configured with --enable-cld.  */
4730   if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4731     opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4732 #endif
4733 
4734   /* Set the default value for -mfentry.  */
4735   if (!opts_set->x_flag_fentry)
4736     opts->x_flag_fentry = TARGET_SEH;
4737   else
4738     {
4739       if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic
4740 	  && opts->x_flag_fentry)
4741 	sorry ("-mfentry isn%'t supported for 32-bit in combination "
4742 	       "with -fpic");
4743       else if (TARGET_SEH && !opts->x_flag_fentry)
4744 	sorry ("-mno-fentry isn%'t compatible with SEH");
4745     }
4746 
4747   if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
4748     sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
4749 
4750   if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
4751       && TARGET_EMIT_VZEROUPPER)
4752     opts->x_target_flags |= MASK_VZEROUPPER;
4753   if (!(opts_set->x_target_flags & MASK_STV))
4754     opts->x_target_flags |= MASK_STV;
4755   /* Disable STV if -mpreferred-stack-boundary={2,3} or
4756      -mincoming-stack-boundary={2,3} or -mstackrealign - the needed
4757      stack realignment will be extra cost the pass doesn't take into
4758      account and the pass can't realign the stack.  */
4759   if (ix86_preferred_stack_boundary < 128
4760       || ix86_incoming_stack_boundary < 128
4761       || opts->x_ix86_force_align_arg_pointer)
4762     opts->x_target_flags &= ~MASK_STV;
4763   if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4764       && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4765     opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4766   if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4767       && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4768     opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4769 
4770   /* Enable 128-bit AVX instruction generation
4771      for the auto-vectorizer.  */
4772   if (TARGET_AVX128_OPTIMAL
4773       && (opts_set->x_prefer_vector_width_type == PVW_NONE))
4774     opts->x_prefer_vector_width_type = PVW_AVX128;
4775 
4776   /* Use 256-bit AVX instruction generation
4777      in the auto-vectorizer.  */
4778   if (ix86_tune_features[X86_TUNE_AVX256_OPTIMAL]
4779       && (opts_set->x_prefer_vector_width_type == PVW_NONE))
4780     opts->x_prefer_vector_width_type = PVW_AVX256;
4781 
4782   if (opts->x_ix86_recip_name)
4783     {
4784       char *p = ASTRDUP (opts->x_ix86_recip_name);
4785       char *q;
4786       unsigned int mask, i;
4787       bool invert;
4788 
4789       while ((q = strtok (p, ",")) != NULL)
4790 	{
4791 	  p = NULL;
4792 	  if (*q == '!')
4793 	    {
4794 	      invert = true;
4795 	      q++;
4796 	    }
4797 	  else
4798 	    invert = false;
4799 
4800 	  if (!strcmp (q, "default"))
4801 	    mask = RECIP_MASK_ALL;
4802 	  else
4803 	    {
4804 	      for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4805 		if (!strcmp (q, recip_options[i].string))
4806 		  {
4807 		    mask = recip_options[i].mask;
4808 		    break;
4809 		  }
4810 
4811 	      if (i == ARRAY_SIZE (recip_options))
4812 		{
4813 		  error ("unknown option for -mrecip=%s", q);
4814 		  invert = false;
4815 		  mask = RECIP_MASK_NONE;
4816 		}
4817 	    }
4818 
4819 	  opts->x_recip_mask_explicit |= mask;
4820 	  if (invert)
4821 	    opts->x_recip_mask &= ~mask;
4822 	  else
4823 	    opts->x_recip_mask |= mask;
4824 	}
4825     }
4826 
4827   if (TARGET_RECIP_P (opts->x_target_flags))
4828     opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4829   else if (opts_set->x_target_flags & MASK_RECIP)
4830     opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4831 
4832   /* Default long double to 64-bit for 32-bit Bionic and to __float128
4833      for 64-bit Bionic.  Also default long double to 64-bit for Intel
4834      MCU psABI.  */
4835   if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4836       && !(opts_set->x_target_flags
4837 	   & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4838     opts->x_target_flags |= (TARGET_64BIT
4839 			     ? MASK_LONG_DOUBLE_128
4840 			     : MASK_LONG_DOUBLE_64);
4841 
4842   /* Only one of them can be active.  */
4843   gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4844 	      || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4845 
4846   /* Handle stack protector */
4847   if (!opts_set->x_ix86_stack_protector_guard)
4848     opts->x_ix86_stack_protector_guard
4849       = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4850 
4851 #ifdef TARGET_THREAD_SSP_OFFSET
4852   ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4853 #endif
4854 
4855   if (global_options_set.x_ix86_stack_protector_guard_offset_str)
4856     {
4857       char *endp;
4858       const char *str = ix86_stack_protector_guard_offset_str;
4859 
4860       errno = 0;
4861       int64_t offset;
4862 
4863 #if defined(INT64_T_IS_LONG)
4864       offset = strtol (str, &endp, 0);
4865 #else
4866       offset = strtoll (str, &endp, 0);
4867 #endif
4868 
4869       if (!*str || *endp || errno)
4870 	error ("%qs is not a valid number "
4871 	       "in -mstack-protector-guard-offset=", str);
4872 
4873       if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000),
4874 		     HOST_WIDE_INT_C (0x7fffffff)))
4875 	error ("%qs is not a valid offset "
4876 	       "in -mstack-protector-guard-offset=", str);
4877 
4878       ix86_stack_protector_guard_offset = offset;
4879     }
4880 
4881   ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG;
4882 
4883   /* The kernel uses a different segment register for performance
4884      reasons; a system call would not have to trash the userspace
4885      segment register, which would be expensive.  */
4886   if (ix86_cmodel == CM_KERNEL)
4887     ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS;
4888 
4889   if (global_options_set.x_ix86_stack_protector_guard_reg_str)
4890     {
4891       const char *str = ix86_stack_protector_guard_reg_str;
4892       addr_space_t seg = ADDR_SPACE_GENERIC;
4893 
4894       /* Discard optional register prefix.  */
4895       if (str[0] == '%')
4896 	str++;
4897 
4898       if (strlen (str) == 2 && str[1] == 's')
4899 	{
4900 	  if (str[0] == 'f')
4901 	    seg = ADDR_SPACE_SEG_FS;
4902 	  else if (str[0] == 'g')
4903 	    seg = ADDR_SPACE_SEG_GS;
4904 	}
4905 
4906       if (seg == ADDR_SPACE_GENERIC)
4907 	error ("%qs is not a valid base register "
4908 	       "in -mstack-protector-guard-reg=",
4909 	       ix86_stack_protector_guard_reg_str);
4910 
4911       ix86_stack_protector_guard_reg = seg;
4912     }
4913 
4914   /* Handle -mmemcpy-strategy= and -mmemset-strategy=  */
4915   if (opts->x_ix86_tune_memcpy_strategy)
4916     {
4917       char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4918       ix86_parse_stringop_strategy_string (str, false);
4919       free (str);
4920     }
4921 
4922   if (opts->x_ix86_tune_memset_strategy)
4923     {
4924       char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4925       ix86_parse_stringop_strategy_string (str, true);
4926       free (str);
4927     }
4928 
4929   /* Save the initial options in case the user does function specific
4930      options.  */
4931   if (main_args_p)
4932     target_option_default_node = target_option_current_node
4933       = build_target_option_node (opts);
4934 
4935   if (opts->x_flag_cf_protection != CF_NONE)
4936     opts->x_flag_cf_protection =
4937       (cf_protection_level) (opts->x_flag_cf_protection | CF_SET);
4938 
4939   if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS])
4940     maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 128,
4941 			   opts->x_param_values,
4942 			   opts_set->x_param_values);
4943 
4944   return true;
4945 }
4946 
4947 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
4948 
4949 static void
4950 ix86_option_override (void)
4951 {
4952   ix86_option_override_internal (true, &global_options, &global_options_set);
4953 }
4954 
4955 /* Implement the TARGET_OFFLOAD_OPTIONS hook.  */
4956 static char *
4957 ix86_offload_options (void)
4958 {
4959   if (TARGET_LP64)
4960     return xstrdup ("-foffload-abi=lp64");
4961   return xstrdup ("-foffload-abi=ilp32");
4962 }
4963 
4964 /* Update register usage after having seen the compiler flags.  */
4965 
4966 static void
4967 ix86_conditional_register_usage (void)
4968 {
4969   int i, c_mask;
4970 
4971   /* If there are no caller-saved registers, preserve all registers.
4972      except fixed_regs and registers used for function return value
4973      since aggregate_value_p checks call_used_regs[regno] on return
4974      value.  */
4975   if (cfun && cfun->machine->no_caller_saved_registers)
4976     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4977       if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
4978 	call_used_regs[i] = 0;
4979 
4980   /* For 32-bit targets, squash the REX registers.  */
4981   if (! TARGET_64BIT)
4982     {
4983       for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4984 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4985       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4986 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4987       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4988 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4989     }
4990 
4991   /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
4992   c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
4993 
4994   CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4995 
4996   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4997     {
4998       /* Set/reset conditionally defined registers from
4999 	 CALL_USED_REGISTERS initializer.  */
5000       if (call_used_regs[i] > 1)
5001 	call_used_regs[i] = !!(call_used_regs[i] & c_mask);
5002 
5003       /* Calculate registers of CLOBBERED_REGS register set
5004 	 as call used registers from GENERAL_REGS register set.  */
5005       if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
5006 	  && call_used_regs[i])
5007 	SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
5008     }
5009 
5010   /* If MMX is disabled, squash the registers.  */
5011   if (! TARGET_MMX)
5012     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5013       if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
5014 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5015 
5016   /* If SSE is disabled, squash the registers.  */
5017   if (! TARGET_SSE)
5018     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5019       if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
5020 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5021 
5022   /* If the FPU is disabled, squash the registers.  */
5023   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
5024     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5025       if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
5026 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5027 
5028   /* If AVX512F is disabled, squash the registers.  */
5029   if (! TARGET_AVX512F)
5030     {
5031       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5032 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5033 
5034       for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
5035 	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5036     }
5037 
5038   /* If MPX is disabled, squash the registers.  */
5039   if (! TARGET_MPX)
5040     for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
5041       fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5042 }
5043 
5044 /* Canonicalize a comparison from one we don't have to one we do have.  */
5045 
5046 static void
5047 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5048 			      bool op0_preserve_value)
5049 {
5050   /* The order of operands in x87 ficom compare is forced by combine in
5051      simplify_comparison () function. Float operator is treated as RTX_OBJ
5052      with a precedence over other operators and is always put in the first
5053      place. Swap condition and operands to match ficom instruction.  */
5054   if (!op0_preserve_value
5055       && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
5056     {
5057       enum rtx_code scode = swap_condition ((enum rtx_code) *code);
5058 
5059       /* We are called only for compares that are split to SAHF instruction.
5060 	 Ensure that we have setcc/jcc insn for the swapped condition.  */
5061       if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
5062 	{
5063 	  std::swap (*op0, *op1);
5064 	  *code = (int) scode;
5065 	}
5066     }
5067 }
5068 
5069 /* Save the current options */
5070 
5071 static void
5072 ix86_function_specific_save (struct cl_target_option *ptr,
5073 			     struct gcc_options *opts)
5074 {
5075   ptr->arch = ix86_arch;
5076   ptr->schedule = ix86_schedule;
5077   ptr->prefetch_sse = x86_prefetch_sse;
5078   ptr->tune = ix86_tune;
5079   ptr->branch_cost = ix86_branch_cost;
5080   ptr->tune_defaulted = ix86_tune_defaulted;
5081   ptr->arch_specified = ix86_arch_specified;
5082   ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5083   ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
5084   ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5085   ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5086   ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5087   ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5088   ptr->x_ix86_abi = opts->x_ix86_abi;
5089   ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5090   ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5091   ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5092   ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5093   ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5094   ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5095   ptr->x_ix86_pmode = opts->x_ix86_pmode;
5096   ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5097   ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5098   ptr->x_ix86_regparm = opts->x_ix86_regparm;
5099   ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5100   ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5101   ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5102   ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5103   ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5104   ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5105   ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5106   ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5107   ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5108   ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5109 
5110   /* The fields are char but the variables are not; make sure the
5111      values fit in the fields.  */
5112   gcc_assert (ptr->arch == ix86_arch);
5113   gcc_assert (ptr->schedule == ix86_schedule);
5114   gcc_assert (ptr->tune == ix86_tune);
5115   gcc_assert (ptr->branch_cost == ix86_branch_cost);
5116 }
5117 
5118 /* Restore the current options */
5119 
5120 static void
5121 ix86_function_specific_restore (struct gcc_options *opts,
5122 				struct cl_target_option *ptr)
5123 {
5124   enum processor_type old_tune = ix86_tune;
5125   enum processor_type old_arch = ix86_arch;
5126   unsigned HOST_WIDE_INT ix86_arch_mask;
5127   int i;
5128 
5129   /* We don't change -fPIC.  */
5130   opts->x_flag_pic = flag_pic;
5131 
5132   ix86_arch = (enum processor_type) ptr->arch;
5133   ix86_schedule = (enum attr_cpu) ptr->schedule;
5134   ix86_tune = (enum processor_type) ptr->tune;
5135   x86_prefetch_sse = ptr->prefetch_sse;
5136   opts->x_ix86_branch_cost = ptr->branch_cost;
5137   ix86_tune_defaulted = ptr->tune_defaulted;
5138   ix86_arch_specified = ptr->arch_specified;
5139   opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5140   opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
5141   opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5142   opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5143   opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5144   opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5145   opts->x_ix86_abi = ptr->x_ix86_abi;
5146   opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5147   opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5148   opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5149   opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5150   opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5151   opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5152   opts->x_ix86_pmode = ptr->x_ix86_pmode;
5153   opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5154   opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5155   opts->x_ix86_regparm = ptr->x_ix86_regparm;
5156   opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5157   opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5158   opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5159   opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5160   opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5161   opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5162   opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5163   opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5164   opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5165   opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5166   ix86_tune_cost = processor_target_table[ix86_tune].cost;
5167   /* TODO: ix86_cost should be chosen at instruction or function granuality
5168      so for cold code we use size_cost even in !optimize_size compilation.  */
5169   if (opts->x_optimize_size)
5170     ix86_cost = &ix86_size_cost;
5171   else
5172     ix86_cost = ix86_tune_cost;
5173 
5174   /* Recreate the arch feature tests if the arch changed */
5175   if (old_arch != ix86_arch)
5176     {
5177       ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch;
5178       for (i = 0; i < X86_ARCH_LAST; ++i)
5179 	ix86_arch_features[i]
5180 	  = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5181     }
5182 
5183   /* Recreate the tune optimization tests */
5184   if (old_tune != ix86_tune)
5185     set_ix86_tune_features (ix86_tune, false);
5186 }
5187 
5188 /* Adjust target options after streaming them in.  This is mainly about
5189    reconciling them with global options.  */
5190 
5191 static void
5192 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5193 {
5194   /* flag_pic is a global option, but ix86_cmodel is target saved option
5195      partly computed from flag_pic.  If flag_pic is on, adjust x_ix86_cmodel
5196      for PIC, or error out.  */
5197   if (flag_pic)
5198     switch (ptr->x_ix86_cmodel)
5199       {
5200       case CM_SMALL:
5201 	ptr->x_ix86_cmodel = CM_SMALL_PIC;
5202 	break;
5203 
5204       case CM_MEDIUM:
5205 	ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5206 	break;
5207 
5208       case CM_LARGE:
5209 	ptr->x_ix86_cmodel = CM_LARGE_PIC;
5210 	break;
5211 
5212       case CM_KERNEL:
5213 	error ("code model %s does not support PIC mode", "kernel");
5214 	break;
5215 
5216       default:
5217 	break;
5218       }
5219   else
5220     switch (ptr->x_ix86_cmodel)
5221       {
5222       case CM_SMALL_PIC:
5223 	ptr->x_ix86_cmodel = CM_SMALL;
5224 	break;
5225 
5226       case CM_MEDIUM_PIC:
5227 	ptr->x_ix86_cmodel = CM_MEDIUM;
5228 	break;
5229 
5230       case CM_LARGE_PIC:
5231 	ptr->x_ix86_cmodel = CM_LARGE;
5232 	break;
5233 
5234       default:
5235 	break;
5236       }
5237 }
5238 
5239 /* Print the current options */
5240 
5241 static void
5242 ix86_function_specific_print (FILE *file, int indent,
5243 			      struct cl_target_option *ptr)
5244 {
5245   char *target_string
5246     = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2,
5247 			  ptr->x_target_flags, ptr->x_ix86_target_flags,
5248 			  NULL, NULL, ptr->x_ix86_fpmath, false);
5249 
5250   gcc_assert (ptr->arch < PROCESSOR_max);
5251   fprintf (file, "%*sarch = %d (%s)\n",
5252 	   indent, "",
5253 	   ptr->arch, processor_target_table[ptr->arch].name);
5254 
5255   gcc_assert (ptr->tune < PROCESSOR_max);
5256   fprintf (file, "%*stune = %d (%s)\n",
5257 	   indent, "",
5258 	   ptr->tune, processor_target_table[ptr->tune].name);
5259 
5260   fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5261 
5262   if (target_string)
5263     {
5264       fprintf (file, "%*s%s\n", indent, "", target_string);
5265       free (target_string);
5266     }
5267 }
5268 
5269 
5270 /* Inner function to process the attribute((target(...))), take an argument and
5271    set the current options from the argument. If we have a list, recursively go
5272    over the list.  */
5273 
5274 static bool
5275 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5276 				     struct gcc_options *opts,
5277 				     struct gcc_options *opts_set,
5278 				     struct gcc_options *enum_opts_set)
5279 {
5280   char *next_optstr;
5281   bool ret = true;
5282 
5283 #define IX86_ATTR_ISA(S,O)   { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5284 #define IX86_ATTR_STR(S,O)   { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5285 #define IX86_ATTR_ENUM(S,O)  { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5286 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5287 #define IX86_ATTR_NO(S,O,M)  { S, sizeof (S)-1, ix86_opt_no,  O, M }
5288 
5289   enum ix86_opt_type
5290   {
5291     ix86_opt_unknown,
5292     ix86_opt_yes,
5293     ix86_opt_no,
5294     ix86_opt_str,
5295     ix86_opt_enum,
5296     ix86_opt_isa
5297   };
5298 
5299   static const struct
5300   {
5301     const char *string;
5302     size_t len;
5303     enum ix86_opt_type type;
5304     int opt;
5305     int mask;
5306   } attrs[] = {
5307     /* isa options */
5308     IX86_ATTR_ISA ("pconfig",	OPT_mpconfig),
5309     IX86_ATTR_ISA ("wbnoinvd",	OPT_mwbnoinvd),
5310     IX86_ATTR_ISA ("sgx",	OPT_msgx),
5311     IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps),
5312     IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw),
5313     IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
5314     IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2),
5315     IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni),
5316     IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg),
5317 
5318     IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5319     IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5320     IX86_ATTR_ISA ("avx512vl",	OPT_mavx512vl),
5321     IX86_ATTR_ISA ("avx512bw",	OPT_mavx512bw),
5322     IX86_ATTR_ISA ("avx512dq",	OPT_mavx512dq),
5323     IX86_ATTR_ISA ("avx512er",	OPT_mavx512er),
5324     IX86_ATTR_ISA ("avx512pf",	OPT_mavx512pf),
5325     IX86_ATTR_ISA ("avx512cd",	OPT_mavx512cd),
5326     IX86_ATTR_ISA ("avx512f",	OPT_mavx512f),
5327     IX86_ATTR_ISA ("avx2",	OPT_mavx2),
5328     IX86_ATTR_ISA ("fma",	OPT_mfma),
5329     IX86_ATTR_ISA ("xop",	OPT_mxop),
5330     IX86_ATTR_ISA ("fma4",	OPT_mfma4),
5331     IX86_ATTR_ISA ("f16c",	OPT_mf16c),
5332     IX86_ATTR_ISA ("avx",	OPT_mavx),
5333     IX86_ATTR_ISA ("sse4",	OPT_msse4),
5334     IX86_ATTR_ISA ("sse4.2",	OPT_msse4_2),
5335     IX86_ATTR_ISA ("sse4.1",	OPT_msse4_1),
5336     IX86_ATTR_ISA ("sse4a",	OPT_msse4a),
5337     IX86_ATTR_ISA ("ssse3",	OPT_mssse3),
5338     IX86_ATTR_ISA ("sse3",	OPT_msse3),
5339     IX86_ATTR_ISA ("aes",	OPT_maes),
5340     IX86_ATTR_ISA ("sha",	OPT_msha),
5341     IX86_ATTR_ISA ("pclmul",	OPT_mpclmul),
5342     IX86_ATTR_ISA ("sse2",	OPT_msse2),
5343     IX86_ATTR_ISA ("sse",	OPT_msse),
5344     IX86_ATTR_ISA ("3dnowa",	OPT_m3dnowa),
5345     IX86_ATTR_ISA ("3dnow",	OPT_m3dnow),
5346     IX86_ATTR_ISA ("mmx",	OPT_mmmx),
5347     IX86_ATTR_ISA ("rtm",	OPT_mrtm),
5348     IX86_ATTR_ISA ("prfchw",	OPT_mprfchw),
5349     IX86_ATTR_ISA ("rdseed",	OPT_mrdseed),
5350     IX86_ATTR_ISA ("adx",	OPT_madx),
5351     IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5352     IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5353     IX86_ATTR_ISA ("xsaves",	OPT_mxsaves),
5354     IX86_ATTR_ISA ("xsavec",	OPT_mxsavec),
5355     IX86_ATTR_ISA ("xsaveopt",	OPT_mxsaveopt),
5356     IX86_ATTR_ISA ("xsave",	OPT_mxsave),
5357     IX86_ATTR_ISA ("abm",	OPT_mabm),
5358     IX86_ATTR_ISA ("bmi",	OPT_mbmi),
5359     IX86_ATTR_ISA ("bmi2",	OPT_mbmi2),
5360     IX86_ATTR_ISA ("lzcnt",	OPT_mlzcnt),
5361     IX86_ATTR_ISA ("tbm",	OPT_mtbm),
5362     IX86_ATTR_ISA ("popcnt",	OPT_mpopcnt),
5363     IX86_ATTR_ISA ("cx16",	OPT_mcx16),
5364     IX86_ATTR_ISA ("sahf",	OPT_msahf),
5365     IX86_ATTR_ISA ("movbe",	OPT_mmovbe),
5366     IX86_ATTR_ISA ("crc32",	OPT_mcrc32),
5367     IX86_ATTR_ISA ("fsgsbase",	OPT_mfsgsbase),
5368     IX86_ATTR_ISA ("rdrnd",	OPT_mrdrnd),
5369     IX86_ATTR_ISA ("mwaitx",	OPT_mmwaitx),
5370     IX86_ATTR_ISA ("clzero",	OPT_mclzero),
5371     IX86_ATTR_ISA ("pku",	OPT_mpku),
5372     IX86_ATTR_ISA ("lwp",	OPT_mlwp),
5373     IX86_ATTR_ISA ("hle",	OPT_mhle),
5374     IX86_ATTR_ISA ("fxsr",	OPT_mfxsr),
5375     IX86_ATTR_ISA ("mpx",	OPT_mmpx),
5376     IX86_ATTR_ISA ("clwb",	OPT_mclwb),
5377     IX86_ATTR_ISA ("rdpid",	OPT_mrdpid),
5378     IX86_ATTR_ISA ("gfni",	OPT_mgfni),
5379     IX86_ATTR_ISA ("shstk",	OPT_mshstk),
5380     IX86_ATTR_ISA ("vaes",	OPT_mvaes),
5381     IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq),
5382     IX86_ATTR_ISA ("movdiri", OPT_mmovdiri),
5383     IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b),
5384 
5385     /* enum options */
5386     IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
5387 
5388     /* string options */
5389     IX86_ATTR_STR ("arch=",	IX86_FUNCTION_SPECIFIC_ARCH),
5390     IX86_ATTR_STR ("tune=",	IX86_FUNCTION_SPECIFIC_TUNE),
5391 
5392     /* flag options */
5393     IX86_ATTR_YES ("cld",
5394 		   OPT_mcld,
5395 		   MASK_CLD),
5396 
5397     IX86_ATTR_NO ("fancy-math-387",
5398 		  OPT_mfancy_math_387,
5399 		  MASK_NO_FANCY_MATH_387),
5400 
5401     IX86_ATTR_YES ("ieee-fp",
5402 		   OPT_mieee_fp,
5403 		   MASK_IEEE_FP),
5404 
5405     IX86_ATTR_YES ("inline-all-stringops",
5406 		   OPT_minline_all_stringops,
5407 		   MASK_INLINE_ALL_STRINGOPS),
5408 
5409     IX86_ATTR_YES ("inline-stringops-dynamically",
5410 		   OPT_minline_stringops_dynamically,
5411 		   MASK_INLINE_STRINGOPS_DYNAMICALLY),
5412 
5413     IX86_ATTR_NO ("align-stringops",
5414 		  OPT_mno_align_stringops,
5415 		  MASK_NO_ALIGN_STRINGOPS),
5416 
5417     IX86_ATTR_YES ("recip",
5418 		   OPT_mrecip,
5419 		   MASK_RECIP),
5420 
5421   };
5422 
5423   /* If this is a list, recurse to get the options.  */
5424   if (TREE_CODE (args) == TREE_LIST)
5425     {
5426       bool ret = true;
5427 
5428       for (; args; args = TREE_CHAIN (args))
5429 	if (TREE_VALUE (args)
5430 	    && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5431 						     p_strings, opts, opts_set,
5432 						     enum_opts_set))
5433 	  ret = false;
5434 
5435       return ret;
5436     }
5437 
5438   else if (TREE_CODE (args) != STRING_CST)
5439     {
5440       error ("attribute %<target%> argument not a string");
5441       return false;
5442     }
5443 
5444   /* Handle multiple arguments separated by commas.  */
5445   next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
5446 
5447   while (next_optstr && *next_optstr != '\0')
5448     {
5449       char *p = next_optstr;
5450       char *orig_p = p;
5451       char *comma = strchr (next_optstr, ',');
5452       const char *opt_string;
5453       size_t len, opt_len;
5454       int opt;
5455       bool opt_set_p;
5456       char ch;
5457       unsigned i;
5458       enum ix86_opt_type type = ix86_opt_unknown;
5459       int mask = 0;
5460 
5461       if (comma)
5462 	{
5463 	  *comma = '\0';
5464 	  len = comma - next_optstr;
5465 	  next_optstr = comma + 1;
5466 	}
5467       else
5468 	{
5469 	  len = strlen (p);
5470 	  next_optstr = NULL;
5471 	}
5472 
5473       /* Recognize no-xxx.  */
5474       if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
5475 	{
5476 	  opt_set_p = false;
5477 	  p += 3;
5478 	  len -= 3;
5479 	}
5480       else
5481 	opt_set_p = true;
5482 
5483       /* Find the option.  */
5484       ch = *p;
5485       opt = N_OPTS;
5486       for (i = 0; i < ARRAY_SIZE (attrs); i++)
5487 	{
5488 	  type = attrs[i].type;
5489 	  opt_len = attrs[i].len;
5490 	  if (ch == attrs[i].string[0]
5491 	      && ((type != ix86_opt_str && type != ix86_opt_enum)
5492 		  ? len == opt_len
5493 		  : len > opt_len)
5494 	      && memcmp (p, attrs[i].string, opt_len) == 0)
5495 	    {
5496 	      opt = attrs[i].opt;
5497 	      mask = attrs[i].mask;
5498 	      opt_string = attrs[i].string;
5499 	      break;
5500 	    }
5501 	}
5502 
5503       /* Process the option.  */
5504       if (opt == N_OPTS)
5505 	{
5506 	  error ("attribute(target(\"%s\")) is unknown", orig_p);
5507 	  ret = false;
5508 	}
5509 
5510       else if (type == ix86_opt_isa)
5511 	{
5512 	  struct cl_decoded_option decoded;
5513 
5514 	  generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
5515 	  ix86_handle_option (opts, opts_set,
5516 			      &decoded, input_location);
5517 	}
5518 
5519       else if (type == ix86_opt_yes || type == ix86_opt_no)
5520 	{
5521 	  if (type == ix86_opt_no)
5522 	    opt_set_p = !opt_set_p;
5523 
5524 	  if (opt_set_p)
5525 	    opts->x_target_flags |= mask;
5526 	  else
5527 	    opts->x_target_flags &= ~mask;
5528 	}
5529 
5530       else if (type == ix86_opt_str)
5531 	{
5532 	  if (p_strings[opt])
5533 	    {
5534 	      error ("option(\"%s\") was already specified", opt_string);
5535 	      ret = false;
5536 	    }
5537 	  else
5538 	    p_strings[opt] = xstrdup (p + opt_len);
5539 	}
5540 
5541       else if (type == ix86_opt_enum)
5542 	{
5543 	  bool arg_ok;
5544 	  int value;
5545 
5546 	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5547 	  if (arg_ok)
5548 	    set_option (opts, enum_opts_set, opt, value,
5549 			p + opt_len, DK_UNSPECIFIED, input_location,
5550 			global_dc);
5551 	  else
5552 	    {
5553 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
5554 	      ret = false;
5555 	    }
5556 	}
5557 
5558       else
5559 	gcc_unreachable ();
5560     }
5561 
5562   return ret;
5563 }
5564 
5565 /* Release allocated strings.  */
5566 static void
5567 release_options_strings (char **option_strings)
5568 {
5569   /* Free up memory allocated to hold the strings */
5570   for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5571     free (option_strings[i]);
5572 }
5573 
5574 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
5575 
5576 tree
5577 ix86_valid_target_attribute_tree (tree args,
5578 				  struct gcc_options *opts,
5579 				  struct gcc_options *opts_set)
5580 {
5581   const char *orig_arch_string = opts->x_ix86_arch_string;
5582   const char *orig_tune_string = opts->x_ix86_tune_string;
5583   enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5584   int orig_tune_defaulted = ix86_tune_defaulted;
5585   int orig_arch_specified = ix86_arch_specified;
5586   char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5587   tree t = NULL_TREE;
5588   struct cl_target_option *def
5589     = TREE_TARGET_OPTION (target_option_default_node);
5590   struct gcc_options enum_opts_set;
5591 
5592   memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5593 
5594   /* Process each of the options on the chain.  */
5595   if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5596 					     opts_set, &enum_opts_set))
5597     return error_mark_node;
5598 
5599   /* If the changed options are different from the default, rerun
5600      ix86_option_override_internal, and then save the options away.
5601      The string options are attribute options, and will be undone
5602      when we copy the save structure.  */
5603   if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5604       || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2
5605       || opts->x_target_flags != def->x_target_flags
5606       || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5607       || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5608       || enum_opts_set.x_ix86_fpmath)
5609     {
5610       /* If we are using the default tune= or arch=, undo the string assigned,
5611 	 and use the default.  */
5612       if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5613 	{
5614 	  opts->x_ix86_arch_string
5615 	    = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
5616 
5617 	  /* If arch= is set,  clear all bits in x_ix86_isa_flags,
5618 	     except for ISA_64BIT, ABI_64, ABI_X32, and CODE16.  */
5619 	  opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
5620 				     | OPTION_MASK_ABI_64
5621 				     | OPTION_MASK_ABI_X32
5622 				     | OPTION_MASK_CODE16);
5623 	  opts->x_ix86_isa_flags2 = 0;
5624 	}
5625       else if (!orig_arch_specified)
5626 	opts->x_ix86_arch_string = NULL;
5627 
5628       if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5629 	opts->x_ix86_tune_string
5630 	  = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5631       else if (orig_tune_defaulted)
5632 	opts->x_ix86_tune_string = NULL;
5633 
5634       /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
5635       if (enum_opts_set.x_ix86_fpmath)
5636 	opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5637 
5638       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
5639       bool r = ix86_option_override_internal (false, opts, opts_set);
5640       if (!r)
5641 	{
5642 	  release_options_strings (option_strings);
5643 	  return error_mark_node;
5644 	}
5645 
5646       /* Add any builtin functions with the new isa if any.  */
5647       ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2);
5648 
5649       /* Save the current options unless we are validating options for
5650 	 #pragma.  */
5651       t = build_target_option_node (opts);
5652 
5653       opts->x_ix86_arch_string = orig_arch_string;
5654       opts->x_ix86_tune_string = orig_tune_string;
5655       opts_set->x_ix86_fpmath = orig_fpmath_set;
5656 
5657       release_options_strings (option_strings);
5658     }
5659 
5660   return t;
5661 }
5662 
5663 /* Hook to validate attribute((target("string"))).  */
5664 
5665 static bool
5666 ix86_valid_target_attribute_p (tree fndecl,
5667 			       tree ARG_UNUSED (name),
5668 			       tree args,
5669 			       int ARG_UNUSED (flags))
5670 {
5671   struct gcc_options func_options;
5672   tree new_target, new_optimize;
5673   bool ret = true;
5674 
5675   /* attribute((target("default"))) does nothing, beyond
5676      affecting multi-versioning.  */
5677   if (TREE_VALUE (args)
5678       && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5679       && TREE_CHAIN (args) == NULL_TREE
5680       && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5681     return true;
5682 
5683   tree old_optimize = build_optimization_node (&global_options);
5684 
5685   /* Get the optimization options of the current function.  */
5686   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5687 
5688   if (!func_optimize)
5689     func_optimize = old_optimize;
5690 
5691   /* Init func_options.  */
5692   memset (&func_options, 0, sizeof (func_options));
5693   init_options_struct (&func_options, NULL);
5694   lang_hooks.init_options_struct (&func_options);
5695 
5696   cl_optimization_restore (&func_options,
5697 			   TREE_OPTIMIZATION (func_optimize));
5698 
5699   /* Initialize func_options to the default before its target options can
5700      be set.  */
5701   cl_target_option_restore (&func_options,
5702 			    TREE_TARGET_OPTION (target_option_default_node));
5703 
5704   new_target = ix86_valid_target_attribute_tree (args, &func_options,
5705 						 &global_options_set);
5706 
5707   new_optimize = build_optimization_node (&func_options);
5708 
5709   if (new_target == error_mark_node)
5710     ret = false;
5711 
5712   else if (fndecl && new_target)
5713     {
5714       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5715 
5716       if (old_optimize != new_optimize)
5717 	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5718     }
5719 
5720   finalize_options_struct (&func_options);
5721 
5722   return ret;
5723 }
5724 
5725 
5726 /* Hook to determine if one function can safely inline another.  */
5727 
5728 static bool
5729 ix86_can_inline_p (tree caller, tree callee)
5730 {
5731   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5732   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5733 
5734   /* Changes of those flags can be tolerated for always inlines. Lets hope
5735      user knows what he is doing.  */
5736   const unsigned HOST_WIDE_INT always_inline_safe_mask
5737 	 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
5738 	    | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
5739 	    | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
5740 	    | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
5741 	    | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
5742 	    | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
5743 	    | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
5744 
5745 
5746   if (!callee_tree)
5747     callee_tree = target_option_default_node;
5748   if (!caller_tree)
5749     caller_tree = target_option_default_node;
5750   if (callee_tree == caller_tree)
5751     return true;
5752 
5753   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5754   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5755   bool ret = false;
5756   bool always_inline =
5757      (DECL_DISREGARD_INLINE_LIMITS (callee)
5758       && lookup_attribute ("always_inline",
5759 			   DECL_ATTRIBUTES (callee)));
5760 
5761   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
5762      function can inline a SSE2 function but a SSE2 function can't inline
5763      a SSE4 function.  */
5764   if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5765        != callee_opts->x_ix86_isa_flags)
5766       || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
5767 	  != callee_opts->x_ix86_isa_flags2))
5768     ret = false;
5769 
5770   /* See if we have the same non-isa options.  */
5771   else if ((!always_inline
5772 	    && caller_opts->x_target_flags != callee_opts->x_target_flags)
5773 	   || (caller_opts->x_target_flags & ~always_inline_safe_mask)
5774 	       != (callee_opts->x_target_flags & ~always_inline_safe_mask))
5775     ret = false;
5776 
5777   /* See if arch, tune, etc. are the same.  */
5778   else if (caller_opts->arch != callee_opts->arch)
5779     ret = false;
5780 
5781   else if (!always_inline && caller_opts->tune != callee_opts->tune)
5782     ret = false;
5783 
5784   else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
5785 	   /* If the calle doesn't use FP expressions differences in
5786 	      ix86_fpmath can be ignored.  We are called from FEs
5787 	      for multi-versioning call optimization, so beware of
5788 	      ipa_fn_summaries not available.  */
5789 	   && (! ipa_fn_summaries
5790 	       || ipa_fn_summaries->get
5791 	       (cgraph_node::get (callee))->fp_expressions))
5792     ret = false;
5793 
5794   else if (!always_inline
5795 	   && caller_opts->branch_cost != callee_opts->branch_cost)
5796     ret = false;
5797 
5798   else
5799     ret = true;
5800 
5801   return ret;
5802 }
5803 
5804 
5805 /* Remember the last target of ix86_set_current_function.  */
5806 static GTY(()) tree ix86_previous_fndecl;
5807 
5808 /* Set targets globals to the default (or current #pragma GCC target
5809    if active).  Invalidate ix86_previous_fndecl cache.  */
5810 
5811 void
5812 ix86_reset_previous_fndecl (void)
5813 {
5814   tree new_tree = target_option_current_node;
5815   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5816   if (TREE_TARGET_GLOBALS (new_tree))
5817     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5818   else if (new_tree == target_option_default_node)
5819     restore_target_globals (&default_target_globals);
5820   else
5821     TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5822   ix86_previous_fndecl = NULL_TREE;
5823 }
5824 
5825 /* Set the func_type field from the function FNDECL.  */
5826 
5827 static void
5828 ix86_set_func_type (tree fndecl)
5829 {
5830   if (cfun->machine->func_type == TYPE_UNKNOWN)
5831     {
5832       if (lookup_attribute ("interrupt",
5833 			    TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5834 	{
5835 	  if (ix86_function_naked (fndecl))
5836 	    error_at (DECL_SOURCE_LOCATION (fndecl),
5837 		      "interrupt and naked attributes are not compatible");
5838 
5839 	  int nargs = 0;
5840 	  for (tree arg = DECL_ARGUMENTS (fndecl);
5841 	       arg;
5842 	       arg = TREE_CHAIN (arg))
5843 	    nargs++;
5844 	  cfun->machine->no_caller_saved_registers = true;
5845 	  cfun->machine->func_type
5846 	    = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT;
5847 
5848 	  ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
5849 
5850 	  /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument.  */
5851 	  if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG)
5852 	    sorry ("Only DWARF debug format is supported for interrupt "
5853 		   "service routine.");
5854 	}
5855       else
5856 	{
5857 	  cfun->machine->func_type = TYPE_NORMAL;
5858 	  if (lookup_attribute ("no_caller_saved_registers",
5859 				TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
5860 	    cfun->machine->no_caller_saved_registers = true;
5861 	}
5862     }
5863 }
5864 
5865 /* Set the indirect_branch_type field from the function FNDECL.  */
5866 
5867 static void
5868 ix86_set_indirect_branch_type (tree fndecl)
5869 {
5870   if (cfun->machine->indirect_branch_type == indirect_branch_unset)
5871     {
5872       tree attr = lookup_attribute ("indirect_branch",
5873 				    DECL_ATTRIBUTES (fndecl));
5874       if (attr != NULL)
5875 	{
5876 	  tree args = TREE_VALUE (attr);
5877 	  if (args == NULL)
5878 	    gcc_unreachable ();
5879 	  tree cst = TREE_VALUE (args);
5880 	  if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0)
5881 	    cfun->machine->indirect_branch_type = indirect_branch_keep;
5882 	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0)
5883 	    cfun->machine->indirect_branch_type = indirect_branch_thunk;
5884 	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0)
5885 	    cfun->machine->indirect_branch_type = indirect_branch_thunk_inline;
5886 	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0)
5887 	    cfun->machine->indirect_branch_type = indirect_branch_thunk_extern;
5888 	  else
5889 	    gcc_unreachable ();
5890 	}
5891       else
5892 	cfun->machine->indirect_branch_type = ix86_indirect_branch;
5893 
5894       /* -mcmodel=large is not compatible with -mindirect-branch=thunk
5895 	 nor -mindirect-branch=thunk-extern.  */
5896       if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
5897 	  && ((cfun->machine->indirect_branch_type
5898 	       == indirect_branch_thunk_extern)
5899 	      || (cfun->machine->indirect_branch_type
5900 		  == indirect_branch_thunk)))
5901 	error ("%<-mindirect-branch=%s%> and %<-mcmodel=large%> are not "
5902 	       "compatible",
5903 	       ((cfun->machine->indirect_branch_type
5904 		 == indirect_branch_thunk_extern)
5905 		? "thunk-extern" : "thunk"));
5906 
5907       /* -mindirect-branch=thunk-extern, -fcf-protection=branch and
5908 	 -fcheck-pointer-bounds are not compatible.  */
5909       if ((cfun->machine->indirect_branch_type
5910 	   == indirect_branch_thunk_extern)
5911 	  && flag_check_pointer_bounds
5912 	  && (flag_cf_protection & CF_BRANCH) != 0)
5913 	error ("%<-mindirect-branch=thunk-extern%>, "
5914 	       "%<-fcf-protection=branch%> and "
5915 	       "%<-fcheck-pointer-bounds%> are not compatible");
5916     }
5917 
5918   if (cfun->machine->function_return_type == indirect_branch_unset)
5919     {
5920       tree attr = lookup_attribute ("function_return",
5921 				    DECL_ATTRIBUTES (fndecl));
5922       if (attr != NULL)
5923 	{
5924 	  tree args = TREE_VALUE (attr);
5925 	  if (args == NULL)
5926 	    gcc_unreachable ();
5927 	  tree cst = TREE_VALUE (args);
5928 	  if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0)
5929 	    cfun->machine->function_return_type = indirect_branch_keep;
5930 	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0)
5931 	    cfun->machine->function_return_type = indirect_branch_thunk;
5932 	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0)
5933 	    cfun->machine->function_return_type = indirect_branch_thunk_inline;
5934 	  else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0)
5935 	    cfun->machine->function_return_type = indirect_branch_thunk_extern;
5936 	  else
5937 	    gcc_unreachable ();
5938 	}
5939       else
5940 	cfun->machine->function_return_type = ix86_function_return;
5941 
5942       /* -mcmodel=large is not compatible with -mfunction-return=thunk
5943 	 nor -mfunction-return=thunk-extern.  */
5944       if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
5945 	  && ((cfun->machine->function_return_type
5946 	       == indirect_branch_thunk_extern)
5947 	      || (cfun->machine->function_return_type
5948 		  == indirect_branch_thunk)))
5949 	error ("%<-mfunction-return=%s%> and %<-mcmodel=large%> are not "
5950 	       "compatible",
5951 	       ((cfun->machine->function_return_type
5952 		 == indirect_branch_thunk_extern)
5953 		? "thunk-extern" : "thunk"));
5954     }
5955 }
5956 
5957 /* Establish appropriate back-end context for processing the function
5958    FNDECL.  The argument might be NULL to indicate processing at top
5959    level, outside of any function scope.  */
5960 static void
5961 ix86_set_current_function (tree fndecl)
5962 {
5963   /* Only change the context if the function changes.  This hook is called
5964      several times in the course of compiling a function, and we don't want to
5965      slow things down too much or call target_reinit when it isn't safe.  */
5966   if (fndecl == ix86_previous_fndecl)
5967     {
5968       /* There may be 2 function bodies for the same function FNDECL,
5969 	 one is extern inline and one isn't.  Call ix86_set_func_type
5970 	 to set the func_type field.  */
5971       if (fndecl != NULL_TREE)
5972 	{
5973 	  ix86_set_func_type (fndecl);
5974 	  ix86_set_indirect_branch_type (fndecl);
5975 	}
5976       return;
5977     }
5978 
5979   tree old_tree;
5980   if (ix86_previous_fndecl == NULL_TREE)
5981     old_tree = target_option_current_node;
5982   else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5983     old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5984   else
5985     old_tree = target_option_default_node;
5986 
5987   if (fndecl == NULL_TREE)
5988     {
5989       if (old_tree != target_option_current_node)
5990 	ix86_reset_previous_fndecl ();
5991       return;
5992     }
5993 
5994   ix86_set_func_type (fndecl);
5995   ix86_set_indirect_branch_type (fndecl);
5996 
5997   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5998   if (new_tree == NULL_TREE)
5999     new_tree = target_option_default_node;
6000 
6001   if (old_tree != new_tree)
6002     {
6003       cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6004       if (TREE_TARGET_GLOBALS (new_tree))
6005 	restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6006       else if (new_tree == target_option_default_node)
6007 	restore_target_globals (&default_target_globals);
6008       else
6009 	TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6010     }
6011   ix86_previous_fndecl = fndecl;
6012 
6013   static bool prev_no_caller_saved_registers;
6014 
6015   /* 64-bit MS and SYSV ABI have different set of call used registers.
6016      Avoid expensive re-initialization of init_regs each time we switch
6017      function context.  */
6018   if (TARGET_64BIT
6019       && (call_used_regs[SI_REG]
6020 	  == (cfun->machine->call_abi == MS_ABI)))
6021     reinit_regs ();
6022   /* Need to re-initialize init_regs if caller-saved registers are
6023      changed.  */
6024   else if (prev_no_caller_saved_registers
6025 	   != cfun->machine->no_caller_saved_registers)
6026     reinit_regs ();
6027 
6028   if (cfun->machine->func_type != TYPE_NORMAL
6029       || cfun->machine->no_caller_saved_registers)
6030     {
6031       /* Don't allow MPX, SSE, MMX nor x87 instructions since they
6032 	 may change processor state.  */
6033       const char *isa;
6034       if (TARGET_MPX)
6035 	isa = "MPX";
6036       else if (TARGET_SSE)
6037 	isa = "SSE";
6038       else if (TARGET_MMX)
6039 	isa = "MMX/3Dnow";
6040       else if (TARGET_80387)
6041 	isa = "80387";
6042       else
6043 	isa = NULL;
6044       if (isa != NULL)
6045 	{
6046 	  if (cfun->machine->func_type != TYPE_NORMAL)
6047 	    sorry ("%s instructions aren't allowed in %s service routine",
6048 		   isa, (cfun->machine->func_type == TYPE_EXCEPTION
6049 			 ? "exception" : "interrupt"));
6050 	  else
6051 	    sorry ("%s instructions aren't allowed in function with "
6052 		   "no_caller_saved_registers attribute", isa);
6053 	  /* Don't issue the same error twice.  */
6054 	  cfun->machine->func_type = TYPE_NORMAL;
6055 	  cfun->machine->no_caller_saved_registers = false;
6056 	}
6057     }
6058 
6059   prev_no_caller_saved_registers
6060     = cfun->machine->no_caller_saved_registers;
6061 }
6062 
6063 
6064 /* Return true if this goes in large data/bss.  */
6065 
6066 static bool
6067 ix86_in_large_data_p (tree exp)
6068 {
6069   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
6070     return false;
6071 
6072   if (exp == NULL_TREE)
6073     return false;
6074 
6075   /* Functions are never large data.  */
6076   if (TREE_CODE (exp) == FUNCTION_DECL)
6077     return false;
6078 
6079   /* Automatic variables are never large data.  */
6080   if (VAR_P (exp) && !is_global_var (exp))
6081     return false;
6082 
6083   if (VAR_P (exp) && DECL_SECTION_NAME (exp))
6084     {
6085       const char *section = DECL_SECTION_NAME (exp);
6086       if (strcmp (section, ".ldata") == 0
6087 	  || strcmp (section, ".lbss") == 0)
6088 	return true;
6089       return false;
6090     }
6091   else
6092     {
6093       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6094 
6095       /* If this is an incomplete type with size 0, then we can't put it
6096 	 in data because it might be too big when completed.  Also,
6097 	 int_size_in_bytes returns -1 if size can vary or is larger than
6098 	 an integer in which case also it is safer to assume that it goes in
6099 	 large data.  */
6100       if (size <= 0 || size > ix86_section_threshold)
6101 	return true;
6102     }
6103 
6104   return false;
6105 }
6106 
6107 /* i386-specific section flag to mark large sections.  */
6108 #define SECTION_LARGE SECTION_MACH_DEP
6109 
6110 /* Switch to the appropriate section for output of DECL.
6111    DECL is either a `VAR_DECL' node or a constant of some sort.
6112    RELOC indicates whether forming the initial value of DECL requires
6113    link-time relocations.  */
6114 
6115 ATTRIBUTE_UNUSED static section *
6116 x86_64_elf_select_section (tree decl, int reloc,
6117 			   unsigned HOST_WIDE_INT align)
6118 {
6119   if (ix86_in_large_data_p (decl))
6120     {
6121       const char *sname = NULL;
6122       unsigned int flags = SECTION_WRITE | SECTION_LARGE;
6123       switch (categorize_decl_for_section (decl, reloc))
6124 	{
6125 	case SECCAT_DATA:
6126 	  sname = ".ldata";
6127 	  break;
6128 	case SECCAT_DATA_REL:
6129 	  sname = ".ldata.rel";
6130 	  break;
6131 	case SECCAT_DATA_REL_LOCAL:
6132 	  sname = ".ldata.rel.local";
6133 	  break;
6134 	case SECCAT_DATA_REL_RO:
6135 	  sname = ".ldata.rel.ro";
6136 	  break;
6137 	case SECCAT_DATA_REL_RO_LOCAL:
6138 	  sname = ".ldata.rel.ro.local";
6139 	  break;
6140 	case SECCAT_BSS:
6141 	  sname = ".lbss";
6142 	  flags |= SECTION_BSS;
6143 	  break;
6144 	case SECCAT_RODATA:
6145 	case SECCAT_RODATA_MERGE_STR:
6146 	case SECCAT_RODATA_MERGE_STR_INIT:
6147 	case SECCAT_RODATA_MERGE_CONST:
6148 	  sname = ".lrodata";
6149 	  flags &= ~SECTION_WRITE;
6150 	  break;
6151 	case SECCAT_SRODATA:
6152 	case SECCAT_SDATA:
6153 	case SECCAT_SBSS:
6154 	  gcc_unreachable ();
6155 	case SECCAT_TEXT:
6156 	case SECCAT_TDATA:
6157 	case SECCAT_TBSS:
6158 	  /* We don't split these for medium model.  Place them into
6159 	     default sections and hope for best.  */
6160 	  break;
6161 	}
6162       if (sname)
6163 	{
6164 	  /* We might get called with string constants, but get_named_section
6165 	     doesn't like them as they are not DECLs.  Also, we need to set
6166 	     flags in that case.  */
6167 	  if (!DECL_P (decl))
6168 	    return get_section (sname, flags, NULL);
6169 	  return get_named_section (decl, sname, reloc);
6170 	}
6171     }
6172   return default_elf_select_section (decl, reloc, align);
6173 }
6174 
6175 /* Select a set of attributes for section NAME based on the properties
6176    of DECL and whether or not RELOC indicates that DECL's initializer
6177    might contain runtime relocations.  */
6178 
6179 static unsigned int ATTRIBUTE_UNUSED
6180 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6181 {
6182   unsigned int flags = default_section_type_flags (decl, name, reloc);
6183 
6184   if (ix86_in_large_data_p (decl))
6185     flags |= SECTION_LARGE;
6186 
6187   if (decl == NULL_TREE
6188       && (strcmp (name, ".ldata.rel.ro") == 0
6189 	  || strcmp (name, ".ldata.rel.ro.local") == 0))
6190     flags |= SECTION_RELRO;
6191 
6192   if (strcmp (name, ".lbss") == 0
6193       || strncmp (name, ".lbss.", 5) == 0
6194       || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6195     flags |= SECTION_BSS;
6196 
6197   return flags;
6198 }
6199 
6200 /* Build up a unique section name, expressed as a
6201    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6202    RELOC indicates whether the initial value of EXP requires
6203    link-time relocations.  */
6204 
6205 static void ATTRIBUTE_UNUSED
6206 x86_64_elf_unique_section (tree decl, int reloc)
6207 {
6208   if (ix86_in_large_data_p (decl))
6209     {
6210       const char *prefix = NULL;
6211       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
6212       bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6213 
6214       switch (categorize_decl_for_section (decl, reloc))
6215 	{
6216 	case SECCAT_DATA:
6217 	case SECCAT_DATA_REL:
6218 	case SECCAT_DATA_REL_LOCAL:
6219 	case SECCAT_DATA_REL_RO:
6220 	case SECCAT_DATA_REL_RO_LOCAL:
6221           prefix = one_only ? ".ld" : ".ldata";
6222 	  break;
6223 	case SECCAT_BSS:
6224           prefix = one_only ? ".lb" : ".lbss";
6225 	  break;
6226 	case SECCAT_RODATA:
6227 	case SECCAT_RODATA_MERGE_STR:
6228 	case SECCAT_RODATA_MERGE_STR_INIT:
6229 	case SECCAT_RODATA_MERGE_CONST:
6230           prefix = one_only ? ".lr" : ".lrodata";
6231 	  break;
6232 	case SECCAT_SRODATA:
6233 	case SECCAT_SDATA:
6234 	case SECCAT_SBSS:
6235 	  gcc_unreachable ();
6236 	case SECCAT_TEXT:
6237 	case SECCAT_TDATA:
6238 	case SECCAT_TBSS:
6239 	  /* We don't split these for medium model.  Place them into
6240 	     default sections and hope for best.  */
6241 	  break;
6242 	}
6243       if (prefix)
6244 	{
6245 	  const char *name, *linkonce;
6246 	  char *string;
6247 
6248 	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6249 	  name = targetm.strip_name_encoding (name);
6250 
6251 	  /* If we're using one_only, then there needs to be a .gnu.linkonce
6252      	     prefix to the section name.  */
6253 	  linkonce = one_only ? ".gnu.linkonce" : "";
6254 
6255 	  string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6256 
6257 	  set_decl_section_name (decl, string);
6258 	  return;
6259 	}
6260     }
6261   default_unique_section (decl, reloc);
6262 }
6263 
6264 #ifdef COMMON_ASM_OP
6265 
6266 #ifndef LARGECOMM_SECTION_ASM_OP
6267 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
6268 #endif
6269 
6270 /* This says how to output assembler code to declare an
6271    uninitialized external linkage data object.
6272 
6273    For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
6274    large objects.  */
6275 void
6276 x86_elf_aligned_decl_common (FILE *file, tree decl,
6277 			const char *name, unsigned HOST_WIDE_INT size,
6278 			int align)
6279 {
6280   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6281       && size > (unsigned int)ix86_section_threshold)
6282     {
6283       switch_to_section (get_named_section (decl, ".lbss", 0));
6284       fputs (LARGECOMM_SECTION_ASM_OP, file);
6285     }
6286   else
6287     fputs (COMMON_ASM_OP, file);
6288   assemble_name (file, name);
6289   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6290 	   size, align / BITS_PER_UNIT);
6291 }
6292 #endif
6293 
6294 /* Utility function for targets to use in implementing
6295    ASM_OUTPUT_ALIGNED_BSS.  */
6296 
6297 void
6298 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6299 		       	unsigned HOST_WIDE_INT size, int align)
6300 {
6301   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6302       && size > (unsigned int)ix86_section_threshold)
6303     switch_to_section (get_named_section (decl, ".lbss", 0));
6304   else
6305     switch_to_section (bss_section);
6306   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6307 #ifdef ASM_DECLARE_OBJECT_NAME
6308   last_assemble_variable_decl = decl;
6309   ASM_DECLARE_OBJECT_NAME (file, name, decl);
6310 #else
6311   /* Standard thing is just output label for the object.  */
6312   ASM_OUTPUT_LABEL (file, name);
6313 #endif /* ASM_DECLARE_OBJECT_NAME */
6314   ASM_OUTPUT_SKIP (file, size ? size : 1);
6315 }
6316 
6317 /* Decide whether we must probe the stack before any space allocation
6318    on this target.  It's essentially TARGET_STACK_PROBE except when
6319    -fstack-check causes the stack to be already probed differently.  */
6320 
6321 bool
6322 ix86_target_stack_probe (void)
6323 {
6324   /* Do not probe the stack twice if static stack checking is enabled.  */
6325   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6326     return false;
6327 
6328   return TARGET_STACK_PROBE;
6329 }
6330 
6331 /* Decide whether we can make a sibling call to a function.  DECL is the
6332    declaration of the function being targeted by the call and EXP is the
6333    CALL_EXPR representing the call.  */
6334 
6335 static bool
6336 ix86_function_ok_for_sibcall (tree decl, tree exp)
6337 {
6338   tree type, decl_or_type;
6339   rtx a, b;
6340   bool bind_global = decl && !targetm.binds_local_p (decl);
6341 
6342   if (ix86_function_naked (current_function_decl))
6343     return false;
6344 
6345   /* Sibling call isn't OK if there are no caller-saved registers
6346      since all registers must be preserved before return.  */
6347   if (cfun->machine->no_caller_saved_registers)
6348     return false;
6349 
6350   /* If we are generating position-independent code, we cannot sibcall
6351      optimize direct calls to global functions, as the PLT requires
6352      %ebx be live. (Darwin does not have a PLT.)  */
6353   if (!TARGET_MACHO
6354       && !TARGET_64BIT
6355       && flag_pic
6356       && flag_plt
6357       && bind_global)
6358     return false;
6359 
6360   /* If we need to align the outgoing stack, then sibcalling would
6361      unalign the stack, which may break the called function.  */
6362   if (ix86_minimum_incoming_stack_boundary (true)
6363       < PREFERRED_STACK_BOUNDARY)
6364     return false;
6365 
6366   if (decl)
6367     {
6368       decl_or_type = decl;
6369       type = TREE_TYPE (decl);
6370     }
6371   else
6372     {
6373       /* We're looking at the CALL_EXPR, we need the type of the function.  */
6374       type = CALL_EXPR_FN (exp);		/* pointer expression */
6375       type = TREE_TYPE (type);			/* pointer type */
6376       type = TREE_TYPE (type);			/* function type */
6377       decl_or_type = type;
6378     }
6379 
6380   /* Check that the return value locations are the same.  Like
6381      if we are returning floats on the 80387 register stack, we cannot
6382      make a sibcall from a function that doesn't return a float to a
6383      function that does or, conversely, from a function that does return
6384      a float to a function that doesn't; the necessary stack adjustment
6385      would not be executed.  This is also the place we notice
6386      differences in the return value ABI.  Note that it is ok for one
6387      of the functions to have void return type as long as the return
6388      value of the other is passed in a register.  */
6389   a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6390   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6391 			   cfun->decl, false);
6392   if (STACK_REG_P (a) || STACK_REG_P (b))
6393     {
6394       if (!rtx_equal_p (a, b))
6395 	return false;
6396     }
6397   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6398     ;
6399   else if (!rtx_equal_p (a, b))
6400     return false;
6401 
6402   if (TARGET_64BIT)
6403     {
6404       /* The SYSV ABI has more call-clobbered registers;
6405 	 disallow sibcalls from MS to SYSV.  */
6406       if (cfun->machine->call_abi == MS_ABI
6407 	  && ix86_function_type_abi (type) == SYSV_ABI)
6408 	return false;
6409     }
6410   else
6411     {
6412       /* If this call is indirect, we'll need to be able to use a
6413 	 call-clobbered register for the address of the target function.
6414 	 Make sure that all such registers are not used for passing
6415 	 parameters.  Note that DLLIMPORT functions and call to global
6416 	 function via GOT slot are indirect.  */
6417       if (!decl
6418 	  || (bind_global && flag_pic && !flag_plt)
6419 	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
6420 	  || flag_force_indirect_call)
6421 	{
6422 	  /* Check if regparm >= 3 since arg_reg_available is set to
6423 	     false if regparm == 0.  If regparm is 1 or 2, there is
6424 	     always a call-clobbered register available.
6425 
6426 	     ??? The symbol indirect call doesn't need a call-clobbered
6427 	     register.  But we don't know if this is a symbol indirect
6428 	     call or not here.  */
6429 	  if (ix86_function_regparm (type, decl) >= 3
6430 	      && !cfun->machine->arg_reg_available)
6431 	    return false;
6432 	}
6433     }
6434 
6435   /* Otherwise okay.  That also includes certain types of indirect calls.  */
6436   return true;
6437 }
6438 
6439 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6440    and "sseregparm" calling convention attributes;
6441    arguments as in struct attribute_spec.handler.  */
6442 
6443 static tree
6444 ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
6445 			     bool *no_add_attrs)
6446 {
6447   if (TREE_CODE (*node) != FUNCTION_TYPE
6448       && TREE_CODE (*node) != METHOD_TYPE
6449       && TREE_CODE (*node) != FIELD_DECL
6450       && TREE_CODE (*node) != TYPE_DECL)
6451     {
6452       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6453 	       name);
6454       *no_add_attrs = true;
6455       return NULL_TREE;
6456     }
6457 
6458   /* Can combine regparm with all attributes but fastcall, and thiscall.  */
6459   if (is_attribute_p ("regparm", name))
6460     {
6461       tree cst;
6462 
6463       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6464         {
6465 	  error ("fastcall and regparm attributes are not compatible");
6466 	}
6467 
6468       if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6469 	{
6470 	  error ("regparam and thiscall attributes are not compatible");
6471 	}
6472 
6473       cst = TREE_VALUE (args);
6474       if (TREE_CODE (cst) != INTEGER_CST)
6475 	{
6476 	  warning (OPT_Wattributes,
6477 		   "%qE attribute requires an integer constant argument",
6478 		   name);
6479 	  *no_add_attrs = true;
6480 	}
6481       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6482 	{
6483 	  warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6484 		   name, REGPARM_MAX);
6485 	  *no_add_attrs = true;
6486 	}
6487 
6488       return NULL_TREE;
6489     }
6490 
6491   if (TARGET_64BIT)
6492     {
6493       /* Do not warn when emulating the MS ABI.  */
6494       if ((TREE_CODE (*node) != FUNCTION_TYPE
6495 	   && TREE_CODE (*node) != METHOD_TYPE)
6496 	  || ix86_function_type_abi (*node) != MS_ABI)
6497 	warning (OPT_Wattributes, "%qE attribute ignored",
6498 	         name);
6499       *no_add_attrs = true;
6500       return NULL_TREE;
6501     }
6502 
6503   /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
6504   if (is_attribute_p ("fastcall", name))
6505     {
6506       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6507         {
6508 	  error ("fastcall and cdecl attributes are not compatible");
6509 	}
6510       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6511         {
6512 	  error ("fastcall and stdcall attributes are not compatible");
6513 	}
6514       if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6515         {
6516 	  error ("fastcall and regparm attributes are not compatible");
6517 	}
6518       if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6519 	{
6520 	  error ("fastcall and thiscall attributes are not compatible");
6521 	}
6522     }
6523 
6524   /* Can combine stdcall with fastcall (redundant), regparm and
6525      sseregparm.  */
6526   else if (is_attribute_p ("stdcall", name))
6527     {
6528       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6529         {
6530 	  error ("stdcall and cdecl attributes are not compatible");
6531 	}
6532       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6533         {
6534 	  error ("stdcall and fastcall attributes are not compatible");
6535 	}
6536       if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6537 	{
6538 	  error ("stdcall and thiscall attributes are not compatible");
6539 	}
6540     }
6541 
6542   /* Can combine cdecl with regparm and sseregparm.  */
6543   else if (is_attribute_p ("cdecl", name))
6544     {
6545       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6546         {
6547 	  error ("stdcall and cdecl attributes are not compatible");
6548 	}
6549       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6550         {
6551 	  error ("fastcall and cdecl attributes are not compatible");
6552 	}
6553       if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6554 	{
6555 	  error ("cdecl and thiscall attributes are not compatible");
6556 	}
6557     }
6558   else if (is_attribute_p ("thiscall", name))
6559     {
6560       if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6561 	warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6562 	         name);
6563       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6564 	{
6565 	  error ("stdcall and thiscall attributes are not compatible");
6566 	}
6567       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6568 	{
6569 	  error ("fastcall and thiscall attributes are not compatible");
6570 	}
6571       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6572 	{
6573 	  error ("cdecl and thiscall attributes are not compatible");
6574 	}
6575     }
6576 
6577   /* Can combine sseregparm with all attributes.  */
6578 
6579   return NULL_TREE;
6580 }
6581 
6582 /* The transactional memory builtins are implicitly regparm or fastcall
6583    depending on the ABI.  Override the generic do-nothing attribute that
6584    these builtins were declared with, and replace it with one of the two
6585    attributes that we expect elsewhere.  */
6586 
6587 static tree
6588 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6589 				  int flags, bool *no_add_attrs)
6590 {
6591   tree alt;
6592 
6593   /* In no case do we want to add the placeholder attribute.  */
6594   *no_add_attrs = true;
6595 
6596   /* The 64-bit ABI is unchanged for transactional memory.  */
6597   if (TARGET_64BIT)
6598     return NULL_TREE;
6599 
6600   /* ??? Is there a better way to validate 32-bit windows?  We have
6601      cfun->machine->call_abi, but that seems to be set only for 64-bit.  */
6602   if (CHECK_STACK_LIMIT > 0)
6603     alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6604   else
6605     {
6606       alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6607       alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6608     }
6609   decl_attributes (node, alt, flags);
6610 
6611   return NULL_TREE;
6612 }
6613 
6614 /* This function determines from TYPE the calling-convention.  */
6615 
6616 unsigned int
6617 ix86_get_callcvt (const_tree type)
6618 {
6619   unsigned int ret = 0;
6620   bool is_stdarg;
6621   tree attrs;
6622 
6623   if (TARGET_64BIT)
6624     return IX86_CALLCVT_CDECL;
6625 
6626   attrs = TYPE_ATTRIBUTES (type);
6627   if (attrs != NULL_TREE)
6628     {
6629       if (lookup_attribute ("cdecl", attrs))
6630 	ret |= IX86_CALLCVT_CDECL;
6631       else if (lookup_attribute ("stdcall", attrs))
6632 	ret |= IX86_CALLCVT_STDCALL;
6633       else if (lookup_attribute ("fastcall", attrs))
6634 	ret |= IX86_CALLCVT_FASTCALL;
6635       else if (lookup_attribute ("thiscall", attrs))
6636 	ret |= IX86_CALLCVT_THISCALL;
6637 
6638       /* Regparam isn't allowed for thiscall and fastcall.  */
6639       if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6640 	{
6641 	  if (lookup_attribute ("regparm", attrs))
6642 	    ret |= IX86_CALLCVT_REGPARM;
6643 	  if (lookup_attribute ("sseregparm", attrs))
6644 	    ret |= IX86_CALLCVT_SSEREGPARM;
6645 	}
6646 
6647       if (IX86_BASE_CALLCVT(ret) != 0)
6648 	return ret;
6649     }
6650 
6651   is_stdarg = stdarg_p (type);
6652   if (TARGET_RTD && !is_stdarg)
6653     return IX86_CALLCVT_STDCALL | ret;
6654 
6655   if (ret != 0
6656       || is_stdarg
6657       || TREE_CODE (type) != METHOD_TYPE
6658       || ix86_function_type_abi (type) != MS_ABI)
6659     return IX86_CALLCVT_CDECL | ret;
6660 
6661   return IX86_CALLCVT_THISCALL;
6662 }
6663 
6664 /* Return 0 if the attributes for two types are incompatible, 1 if they
6665    are compatible, and 2 if they are nearly compatible (which causes a
6666    warning to be generated).  */
6667 
6668 static int
6669 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6670 {
6671   unsigned int ccvt1, ccvt2;
6672 
6673   if (TREE_CODE (type1) != FUNCTION_TYPE
6674       && TREE_CODE (type1) != METHOD_TYPE)
6675     return 1;
6676 
6677   ccvt1 = ix86_get_callcvt (type1);
6678   ccvt2 = ix86_get_callcvt (type2);
6679   if (ccvt1 != ccvt2)
6680     return 0;
6681   if (ix86_function_regparm (type1, NULL)
6682       != ix86_function_regparm (type2, NULL))
6683     return 0;
6684 
6685   return 1;
6686 }
6687 
6688 /* Return the regparm value for a function with the indicated TYPE and DECL.
6689    DECL may be NULL when calling function indirectly
6690    or considering a libcall.  */
6691 
6692 static int
6693 ix86_function_regparm (const_tree type, const_tree decl)
6694 {
6695   tree attr;
6696   int regparm;
6697   unsigned int ccvt;
6698 
6699   if (TARGET_64BIT)
6700     return (ix86_function_type_abi (type) == SYSV_ABI
6701 	    ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6702   ccvt = ix86_get_callcvt (type);
6703   regparm = ix86_regparm;
6704 
6705   if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
6706     {
6707       attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
6708       if (attr)
6709 	{
6710 	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
6711 	  return regparm;
6712 	}
6713     }
6714   else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6715     return 2;
6716   else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6717     return 1;
6718 
6719   /* Use register calling convention for local functions when possible.  */
6720   if (decl
6721       && TREE_CODE (decl) == FUNCTION_DECL)
6722     {
6723       cgraph_node *target = cgraph_node::get (decl);
6724       if (target)
6725 	target = target->function_symbol ();
6726 
6727       /* Caller and callee must agree on the calling convention, so
6728 	 checking here just optimize means that with
6729 	 __attribute__((optimize (...))) caller could use regparm convention
6730 	 and callee not, or vice versa.  Instead look at whether the callee
6731 	 is optimized or not.  */
6732       if (target && opt_for_fn (target->decl, optimize)
6733 	  && !(profile_flag && !flag_fentry))
6734 	{
6735 	  cgraph_local_info *i = &target->local;
6736 	  if (i && i->local && i->can_change_signature)
6737 	    {
6738 	      int local_regparm, globals = 0, regno;
6739 
6740 	      /* Make sure no regparm register is taken by a
6741 		 fixed register variable.  */
6742 	      for (local_regparm = 0; local_regparm < REGPARM_MAX;
6743 		   local_regparm++)
6744 		if (fixed_regs[local_regparm])
6745 		  break;
6746 
6747 	      /* We don't want to use regparm(3) for nested functions as
6748 		 these use a static chain pointer in the third argument.  */
6749 	      if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
6750 		local_regparm = 2;
6751 
6752 	      /* Save a register for the split stack.  */
6753 	      if (flag_split_stack)
6754 		{
6755 		  if (local_regparm == 3)
6756 		    local_regparm = 2;
6757 		  else if (local_regparm == 2
6758 			   && DECL_STATIC_CHAIN (target->decl))
6759 		    local_regparm = 1;
6760 		}
6761 
6762 	      /* Each fixed register usage increases register pressure,
6763 		 so less registers should be used for argument passing.
6764 		 This functionality can be overriden by an explicit
6765 		 regparm value.  */
6766 	      for (regno = AX_REG; regno <= DI_REG; regno++)
6767 		if (fixed_regs[regno])
6768 		  globals++;
6769 
6770 	      local_regparm
6771 		= globals < local_regparm ? local_regparm - globals : 0;
6772 
6773 	      if (local_regparm > regparm)
6774 		regparm = local_regparm;
6775 	    }
6776 	}
6777     }
6778 
6779   return regparm;
6780 }
6781 
6782 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
6783    DFmode (2) arguments in SSE registers for a function with the
6784    indicated TYPE and DECL.  DECL may be NULL when calling function
6785    indirectly or considering a libcall.  Return -1 if any FP parameter
6786    should be rejected by error.  This is used in siutation we imply SSE
6787    calling convetion but the function is called from another function with
6788    SSE disabled. Otherwise return 0.  */
6789 
6790 static int
6791 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6792 {
6793   gcc_assert (!TARGET_64BIT);
6794 
6795   /* Use SSE registers to pass SFmode and DFmode arguments if requested
6796      by the sseregparm attribute.  */
6797   if (TARGET_SSEREGPARM
6798       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6799     {
6800       if (!TARGET_SSE)
6801 	{
6802 	  if (warn)
6803 	    {
6804 	      if (decl)
6805 		error ("calling %qD with attribute sseregparm without "
6806 		       "SSE/SSE2 enabled", decl);
6807 	      else
6808 		error ("calling %qT with attribute sseregparm without "
6809 		       "SSE/SSE2 enabled", type);
6810 	    }
6811 	  return 0;
6812 	}
6813 
6814       return 2;
6815     }
6816 
6817   if (!decl)
6818     return 0;
6819 
6820   cgraph_node *target = cgraph_node::get (decl);
6821   if (target)
6822     target = target->function_symbol ();
6823 
6824   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6825      (and DFmode for SSE2) arguments in SSE registers.  */
6826   if (target
6827       /* TARGET_SSE_MATH */
6828       && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6829       && opt_for_fn (target->decl, optimize)
6830       && !(profile_flag && !flag_fentry))
6831     {
6832       cgraph_local_info *i = &target->local;
6833       if (i && i->local && i->can_change_signature)
6834 	{
6835 	  /* Refuse to produce wrong code when local function with SSE enabled
6836 	     is called from SSE disabled function.
6837 	     FIXME: We need a way to detect these cases cross-ltrans partition
6838 	     and avoid using SSE calling conventions on local functions called
6839 	     from function with SSE disabled.  For now at least delay the
6840 	     warning until we know we are going to produce wrong code.
6841 	     See PR66047  */
6842 	  if (!TARGET_SSE && warn)
6843 	    return -1;
6844 	  return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6845 				->x_ix86_isa_flags) ? 2 : 1;
6846 	}
6847     }
6848 
6849   return 0;
6850 }
6851 
6852 /* Return true if EAX is live at the start of the function.  Used by
6853    ix86_expand_prologue to determine if we need special help before
6854    calling allocate_stack_worker.  */
6855 
6856 static bool
6857 ix86_eax_live_at_start_p (void)
6858 {
6859   /* Cheat.  Don't bother working forward from ix86_function_regparm
6860      to the function type to whether an actual argument is located in
6861      eax.  Instead just look at cfg info, which is still close enough
6862      to correct at this point.  This gives false positives for broken
6863      functions that might use uninitialized data that happens to be
6864      allocated in eax, but who cares?  */
6865   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6866 }
6867 
6868 static bool
6869 ix86_keep_aggregate_return_pointer (tree fntype)
6870 {
6871   tree attr;
6872 
6873   if (!TARGET_64BIT)
6874     {
6875       attr = lookup_attribute ("callee_pop_aggregate_return",
6876 			       TYPE_ATTRIBUTES (fntype));
6877       if (attr)
6878 	return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6879 
6880       /* For 32-bit MS-ABI the default is to keep aggregate
6881          return pointer.  */
6882       if (ix86_function_type_abi (fntype) == MS_ABI)
6883 	return true;
6884     }
6885   return KEEP_AGGREGATE_RETURN_POINTER != 0;
6886 }
6887 
6888 /* Value is the number of bytes of arguments automatically
6889    popped when returning from a subroutine call.
6890    FUNDECL is the declaration node of the function (as a tree),
6891    FUNTYPE is the data type of the function (as a tree),
6892    or for a library call it is an identifier node for the subroutine name.
6893    SIZE is the number of bytes of arguments passed on the stack.
6894 
6895    On the 80386, the RTD insn may be used to pop them if the number
6896      of args is fixed, but if the number is variable then the caller
6897      must pop them all.  RTD can't be used for library calls now
6898      because the library is compiled with the Unix compiler.
6899    Use of RTD is a selectable option, since it is incompatible with
6900    standard Unix calling sequences.  If the option is not selected,
6901    the caller must always pop the args.
6902 
6903    The attribute stdcall is equivalent to RTD on a per module basis.  */
6904 
6905 static poly_int64
6906 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
6907 {
6908   unsigned int ccvt;
6909 
6910   /* None of the 64-bit ABIs pop arguments.  */
6911   if (TARGET_64BIT)
6912     return 0;
6913 
6914   ccvt = ix86_get_callcvt (funtype);
6915 
6916   if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6917 	       | IX86_CALLCVT_THISCALL)) != 0
6918       && ! stdarg_p (funtype))
6919     return size;
6920 
6921   /* Lose any fake structure return argument if it is passed on the stack.  */
6922   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6923       && !ix86_keep_aggregate_return_pointer (funtype))
6924     {
6925       int nregs = ix86_function_regparm (funtype, fundecl);
6926       if (nregs == 0)
6927 	return GET_MODE_SIZE (Pmode);
6928     }
6929 
6930   return 0;
6931 }
6932 
6933 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
6934 
6935 static bool
6936 ix86_legitimate_combined_insn (rtx_insn *insn)
6937 {
6938   int i;
6939 
6940   /* Check operand constraints in case hard registers were propagated
6941      into insn pattern.  This check prevents combine pass from
6942      generating insn patterns with invalid hard register operands.
6943      These invalid insns can eventually confuse reload to error out
6944      with a spill failure.  See also PRs 46829 and 46843.  */
6945 
6946   gcc_assert (INSN_CODE (insn) >= 0);
6947 
6948   extract_insn (insn);
6949   preprocess_constraints (insn);
6950 
6951   int n_operands = recog_data.n_operands;
6952   int n_alternatives = recog_data.n_alternatives;
6953   for (i = 0; i < n_operands; i++)
6954     {
6955       rtx op = recog_data.operand[i];
6956       machine_mode mode = GET_MODE (op);
6957       const operand_alternative *op_alt;
6958       int offset = 0;
6959       bool win;
6960       int j;
6961 
6962       /* A unary operator may be accepted by the predicate, but it
6963 	 is irrelevant for matching constraints.  */
6964       if (UNARY_P (op))
6965 	op = XEXP (op, 0);
6966 
6967       if (SUBREG_P (op))
6968 	{
6969 	  if (REG_P (SUBREG_REG (op))
6970 	      && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6971 	    offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6972 					  GET_MODE (SUBREG_REG (op)),
6973 					  SUBREG_BYTE (op),
6974 					  GET_MODE (op));
6975 	  op = SUBREG_REG (op);
6976 	}
6977 
6978       if (!(REG_P (op) && HARD_REGISTER_P (op)))
6979 	continue;
6980 
6981       op_alt = recog_op_alt;
6982 
6983       /* Operand has no constraints, anything is OK.  */
6984       win = !n_alternatives;
6985 
6986       alternative_mask preferred = get_preferred_alternatives (insn);
6987       for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6988 	{
6989 	  if (!TEST_BIT (preferred, j))
6990 	    continue;
6991 	  if (op_alt[i].anything_ok
6992 	      || (op_alt[i].matches != -1
6993 		  && operands_match_p
6994 		  (recog_data.operand[i],
6995 		   recog_data.operand[op_alt[i].matches]))
6996 	      || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6997 	    {
6998 	      win = true;
6999 	      break;
7000 	    }
7001 	}
7002 
7003       if (!win)
7004 	return false;
7005     }
7006 
7007   return true;
7008 }
7009 
7010 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
7011 
7012 static unsigned HOST_WIDE_INT
7013 ix86_asan_shadow_offset (void)
7014 {
7015   return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7016 				     : HOST_WIDE_INT_C (0x7fff8000))
7017 		     : (HOST_WIDE_INT_1 << 29);
7018 }
7019 
7020 /* Argument support functions.  */
7021 
7022 /* Return true when register may be used to pass function parameters.  */
7023 bool
7024 ix86_function_arg_regno_p (int regno)
7025 {
7026   int i;
7027   enum calling_abi call_abi;
7028   const int *parm_regs;
7029 
7030   if (TARGET_MPX && BND_REGNO_P (regno))
7031     return true;
7032 
7033   if (!TARGET_64BIT)
7034     {
7035       if (TARGET_MACHO)
7036         return (regno < REGPARM_MAX
7037                 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
7038       else
7039         return (regno < REGPARM_MAX
7040 	        || (TARGET_MMX && MMX_REGNO_P (regno)
7041 	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
7042 	        || (TARGET_SSE && SSE_REGNO_P (regno)
7043 		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
7044     }
7045 
7046   if (TARGET_SSE && SSE_REGNO_P (regno)
7047       && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
7048     return true;
7049 
7050   /* TODO: The function should depend on current function ABI but
7051      builtins.c would need updating then. Therefore we use the
7052      default ABI.  */
7053   call_abi = ix86_cfun_abi ();
7054 
7055   /* RAX is used as hidden argument to va_arg functions.  */
7056   if (call_abi == SYSV_ABI && regno == AX_REG)
7057     return true;
7058 
7059   if (call_abi == MS_ABI)
7060     parm_regs = x86_64_ms_abi_int_parameter_registers;
7061   else
7062     parm_regs = x86_64_int_parameter_registers;
7063 
7064   for (i = 0; i < (call_abi == MS_ABI
7065 		   ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
7066     if (regno == parm_regs[i])
7067       return true;
7068   return false;
7069 }
7070 
7071 /* Return if we do not know how to pass TYPE solely in registers.  */
7072 
7073 static bool
7074 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
7075 {
7076   if (must_pass_in_stack_var_size_or_pad (mode, type))
7077     return true;
7078 
7079   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
7080      The layout_type routine is crafty and tries to trick us into passing
7081      currently unsupported vector types on the stack by using TImode.  */
7082   return (!TARGET_64BIT && mode == TImode
7083 	  && type && TREE_CODE (type) != VECTOR_TYPE);
7084 }
7085 
7086 /* It returns the size, in bytes, of the area reserved for arguments passed
7087    in registers for the function represented by fndecl dependent to the used
7088    abi format.  */
7089 int
7090 ix86_reg_parm_stack_space (const_tree fndecl)
7091 {
7092   enum calling_abi call_abi = SYSV_ABI;
7093   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
7094     call_abi = ix86_function_abi (fndecl);
7095   else
7096     call_abi = ix86_function_type_abi (fndecl);
7097   if (TARGET_64BIT && call_abi == MS_ABI)
7098     return 32;
7099   return 0;
7100 }
7101 
7102 /* We add this as a workaround in order to use libc_has_function
7103    hook in i386.md.  */
7104 bool
7105 ix86_libc_has_function (enum function_class fn_class)
7106 {
7107   return targetm.libc_has_function (fn_class);
7108 }
7109 
7110 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
7111    specifying the call abi used.  */
7112 enum calling_abi
7113 ix86_function_type_abi (const_tree fntype)
7114 {
7115   enum calling_abi abi = ix86_abi;
7116 
7117   if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
7118     return abi;
7119 
7120   if (abi == SYSV_ABI
7121       && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
7122     {
7123       static int warned;
7124       if (TARGET_X32 && !warned)
7125 	{
7126 	  error ("X32 does not support ms_abi attribute");
7127 	  warned = 1;
7128 	}
7129 
7130       abi = MS_ABI;
7131     }
7132   else if (abi == MS_ABI
7133 	   && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
7134     abi = SYSV_ABI;
7135 
7136   return abi;
7137 }
7138 
7139 static enum calling_abi
7140 ix86_function_abi (const_tree fndecl)
7141 {
7142   return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
7143 }
7144 
7145 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
7146    specifying the call abi used.  */
7147 enum calling_abi
7148 ix86_cfun_abi (void)
7149 {
7150   return cfun ? cfun->machine->call_abi : ix86_abi;
7151 }
7152 
7153 static bool
7154 ix86_function_ms_hook_prologue (const_tree fn)
7155 {
7156   if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
7157     {
7158       if (decl_function_context (fn) != NULL_TREE)
7159 	error_at (DECL_SOURCE_LOCATION (fn),
7160 		  "ms_hook_prologue is not compatible with nested function");
7161       else
7162         return true;
7163     }
7164   return false;
7165 }
7166 
7167 static bool
7168 ix86_function_naked (const_tree fn)
7169 {
7170   if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
7171     return true;
7172 
7173   return false;
7174 }
7175 
7176 /* Write the extra assembler code needed to declare a function properly.  */
7177 
7178 void
7179 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7180 				tree decl)
7181 {
7182   bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7183 
7184   if (is_ms_hook)
7185     {
7186       int i, filler_count = (TARGET_64BIT ? 32 : 16);
7187       unsigned int filler_cc = 0xcccccccc;
7188 
7189       for (i = 0; i < filler_count; i += 4)
7190         fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7191     }
7192 
7193 #ifdef SUBTARGET_ASM_UNWIND_INIT
7194   SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7195 #endif
7196 
7197   ASM_OUTPUT_LABEL (asm_out_file, fname);
7198 
7199   /* Output magic byte marker, if hot-patch attribute is set.  */
7200   if (is_ms_hook)
7201     {
7202       if (TARGET_64BIT)
7203 	{
7204 	  /* leaq [%rsp + 0], %rsp  */
7205 	  fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
7206 		 asm_out_file);
7207 	}
7208       else
7209 	{
7210           /* movl.s %edi, %edi
7211 	     push   %ebp
7212 	     movl.s %esp, %ebp */
7213 	  fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
7214 	}
7215     }
7216 }
7217 
7218 /* Implementation of call abi switching target hook. Specific to FNDECL
7219    the specific call register sets are set.  See also
7220    ix86_conditional_register_usage for more details.  */
7221 void
7222 ix86_call_abi_override (const_tree fndecl)
7223 {
7224   cfun->machine->call_abi = ix86_function_abi (fndecl);
7225 }
7226 
7227 /* Return 1 if pseudo register should be created and used to hold
7228    GOT address for PIC code.  */
7229 bool
7230 ix86_use_pseudo_pic_reg (void)
7231 {
7232   if ((TARGET_64BIT
7233        && (ix86_cmodel == CM_SMALL_PIC
7234 	   || TARGET_PECOFF))
7235       || !flag_pic)
7236     return false;
7237   return true;
7238 }
7239 
7240 /* Initialize large model PIC register.  */
7241 
7242 static void
7243 ix86_init_large_pic_reg (unsigned int tmp_regno)
7244 {
7245   rtx_code_label *label;
7246   rtx tmp_reg;
7247 
7248   gcc_assert (Pmode == DImode);
7249   label = gen_label_rtx ();
7250   emit_label (label);
7251   LABEL_PRESERVE_P (label) = 1;
7252   tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7253   gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7254   emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7255 				label));
7256   emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7257   emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7258 			    pic_offset_table_rtx, tmp_reg));
7259   const char *name = LABEL_NAME (label);
7260   PUT_CODE (label, NOTE);
7261   NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
7262   NOTE_DELETED_LABEL_NAME (label) = name;
7263 }
7264 
7265 /* Create and initialize PIC register if required.  */
7266 static void
7267 ix86_init_pic_reg (void)
7268 {
7269   edge entry_edge;
7270   rtx_insn *seq;
7271 
7272   if (!ix86_use_pseudo_pic_reg ())
7273     return;
7274 
7275   start_sequence ();
7276 
7277   if (TARGET_64BIT)
7278     {
7279       if (ix86_cmodel == CM_LARGE_PIC)
7280 	ix86_init_large_pic_reg (R11_REG);
7281       else
7282 	emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7283     }
7284   else
7285     {
7286       /*  If there is future mcount call in the function it is more profitable
7287 	  to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM.  */
7288       rtx reg = crtl->profile
7289 		? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7290 		: pic_offset_table_rtx;
7291       rtx_insn *insn = emit_insn (gen_set_got (reg));
7292       RTX_FRAME_RELATED_P (insn) = 1;
7293       if (crtl->profile)
7294         emit_move_insn (pic_offset_table_rtx, reg);
7295       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7296     }
7297 
7298   seq = get_insns ();
7299   end_sequence ();
7300 
7301   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7302   insert_insn_on_edge (seq, entry_edge);
7303   commit_one_edge_insertion (entry_edge);
7304 }
7305 
7306 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7307    for a call to a function whose data type is FNTYPE.
7308    For a library call, FNTYPE is 0.  */
7309 
7310 void
7311 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
7312 		      tree fntype,	/* tree ptr for function decl */
7313 		      rtx libname,	/* SYMBOL_REF of library name or 0 */
7314 		      tree fndecl,
7315 		      int caller)
7316 {
7317   struct cgraph_local_info *i = NULL;
7318   struct cgraph_node *target = NULL;
7319 
7320   memset (cum, 0, sizeof (*cum));
7321 
7322   if (fndecl)
7323     {
7324       target = cgraph_node::get (fndecl);
7325       if (target)
7326 	{
7327 	  target = target->function_symbol ();
7328 	  i = cgraph_node::local_info (target->decl);
7329 	  cum->call_abi = ix86_function_abi (target->decl);
7330 	}
7331       else
7332 	cum->call_abi = ix86_function_abi (fndecl);
7333     }
7334   else
7335     cum->call_abi = ix86_function_type_abi (fntype);
7336 
7337   cum->caller = caller;
7338 
7339   /* Set up the number of registers to use for passing arguments.  */
7340   cum->nregs = ix86_regparm;
7341   if (TARGET_64BIT)
7342     {
7343       cum->nregs = (cum->call_abi == SYSV_ABI
7344                    ? X86_64_REGPARM_MAX
7345                    : X86_64_MS_REGPARM_MAX);
7346     }
7347   if (TARGET_SSE)
7348     {
7349       cum->sse_nregs = SSE_REGPARM_MAX;
7350       if (TARGET_64BIT)
7351         {
7352           cum->sse_nregs = (cum->call_abi == SYSV_ABI
7353                            ? X86_64_SSE_REGPARM_MAX
7354                            : X86_64_MS_SSE_REGPARM_MAX);
7355         }
7356     }
7357   if (TARGET_MMX)
7358     cum->mmx_nregs = MMX_REGPARM_MAX;
7359   cum->warn_avx512f = true;
7360   cum->warn_avx = true;
7361   cum->warn_sse = true;
7362   cum->warn_mmx = true;
7363 
7364   /* Because type might mismatch in between caller and callee, we need to
7365      use actual type of function for local calls.
7366      FIXME: cgraph_analyze can be told to actually record if function uses
7367      va_start so for local functions maybe_vaarg can be made aggressive
7368      helping K&R code.
7369      FIXME: once typesytem is fixed, we won't need this code anymore.  */
7370   if (i && i->local && i->can_change_signature)
7371     fntype = TREE_TYPE (target->decl);
7372   cum->stdarg = stdarg_p (fntype);
7373   cum->maybe_vaarg = (fntype
7374 		      ? (!prototype_p (fntype) || stdarg_p (fntype))
7375 		      : !libname);
7376 
7377   cum->bnd_regno = FIRST_BND_REG;
7378   cum->bnds_in_bt = 0;
7379   cum->force_bnd_pass = 0;
7380   cum->decl = fndecl;
7381 
7382   cum->warn_empty = !warn_abi || cum->stdarg;
7383   if (!cum->warn_empty && fntype)
7384     {
7385       function_args_iterator iter;
7386       tree argtype;
7387       bool seen_empty_type = false;
7388       FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
7389 	{
7390 	  if (argtype == error_mark_node || VOID_TYPE_P (argtype))
7391 	    break;
7392 	  if (TYPE_EMPTY_P (argtype))
7393 	    seen_empty_type = true;
7394 	  else if (seen_empty_type)
7395 	    {
7396 	      cum->warn_empty = true;
7397 	      break;
7398 	    }
7399 	}
7400     }
7401 
7402   if (!TARGET_64BIT)
7403     {
7404       /* If there are variable arguments, then we won't pass anything
7405          in registers in 32-bit mode. */
7406       if (stdarg_p (fntype))
7407 	{
7408 	  cum->nregs = 0;
7409 	  /* Since in 32-bit, variable arguments are always passed on
7410 	     stack, there is scratch register available for indirect
7411 	     sibcall.  */
7412 	  cfun->machine->arg_reg_available = true;
7413 	  cum->sse_nregs = 0;
7414 	  cum->mmx_nregs = 0;
7415 	  cum->warn_avx512f = false;
7416 	  cum->warn_avx = false;
7417 	  cum->warn_sse = false;
7418 	  cum->warn_mmx = false;
7419 	  return;
7420 	}
7421 
7422       /* Use ecx and edx registers if function has fastcall attribute,
7423 	 else look for regparm information.  */
7424       if (fntype)
7425 	{
7426 	  unsigned int ccvt = ix86_get_callcvt (fntype);
7427 	  if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7428 	    {
7429 	      cum->nregs = 1;
7430 	      cum->fastcall = 1; /* Same first register as in fastcall.  */
7431 	    }
7432 	  else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7433 	    {
7434 	      cum->nregs = 2;
7435 	      cum->fastcall = 1;
7436 	    }
7437 	  else
7438 	    cum->nregs = ix86_function_regparm (fntype, fndecl);
7439 	}
7440 
7441       /* Set up the number of SSE registers used for passing SFmode
7442 	 and DFmode arguments.  Warn for mismatching ABI.  */
7443       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7444     }
7445 
7446   cfun->machine->arg_reg_available = (cum->nregs > 0);
7447 }
7448 
7449 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
7450    But in the case of vector types, it is some vector mode.
7451 
7452    When we have only some of our vector isa extensions enabled, then there
7453    are some modes for which vector_mode_supported_p is false.  For these
7454    modes, the generic vector support in gcc will choose some non-vector mode
7455    in order to implement the type.  By computing the natural mode, we'll
7456    select the proper ABI location for the operand and not depend on whatever
7457    the middle-end decides to do with these vector types.
7458 
7459    The midde-end can't deal with the vector types > 16 bytes.  In this
7460    case, we return the original mode and warn ABI change if CUM isn't
7461    NULL.
7462 
7463    If INT_RETURN is true, warn ABI change if the vector mode isn't
7464    available for function return value.  */
7465 
7466 static machine_mode
7467 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7468 		   bool in_return)
7469 {
7470   machine_mode mode = TYPE_MODE (type);
7471 
7472   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7473     {
7474       HOST_WIDE_INT size = int_size_in_bytes (type);
7475       if ((size == 8 || size == 16 || size == 32 || size == 64)
7476 	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
7477 	  && TYPE_VECTOR_SUBPARTS (type) > 1)
7478 	{
7479 	  machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7480 
7481 	  /* There are no XFmode vector modes.  */
7482 	  if (innermode == XFmode)
7483 	    return mode;
7484 
7485 	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7486 	    mode = MIN_MODE_VECTOR_FLOAT;
7487 	  else
7488 	    mode = MIN_MODE_VECTOR_INT;
7489 
7490 	  /* Get the mode which has this inner mode and number of units.  */
7491 	  FOR_EACH_MODE_FROM (mode, mode)
7492 	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7493 		&& GET_MODE_INNER (mode) == innermode)
7494 	      {
7495 		if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7496 		  {
7497 		    static bool warnedavx512f;
7498 		    static bool warnedavx512f_ret;
7499 
7500 		    if (cum && cum->warn_avx512f && !warnedavx512f)
7501 		      {
7502 			if (warning (OPT_Wpsabi, "AVX512F vector argument "
7503 				     "without AVX512F enabled changes the ABI"))
7504 			  warnedavx512f = true;
7505 		      }
7506 		    else if (in_return && !warnedavx512f_ret)
7507 		      {
7508 			if (warning (OPT_Wpsabi, "AVX512F vector return "
7509 				     "without AVX512F enabled changes the ABI"))
7510 			  warnedavx512f_ret = true;
7511 		      }
7512 
7513 		    return TYPE_MODE (type);
7514 		  }
7515 		else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7516 		  {
7517 		    static bool warnedavx;
7518 		    static bool warnedavx_ret;
7519 
7520 		    if (cum && cum->warn_avx && !warnedavx)
7521 		      {
7522 			if (warning (OPT_Wpsabi, "AVX vector argument "
7523 				     "without AVX enabled changes the ABI"))
7524 			  warnedavx = true;
7525 		      }
7526 		    else if (in_return && !warnedavx_ret)
7527 		      {
7528 			if (warning (OPT_Wpsabi, "AVX vector return "
7529 				     "without AVX enabled changes the ABI"))
7530 			  warnedavx_ret = true;
7531 		      }
7532 
7533 		    return TYPE_MODE (type);
7534 		  }
7535 		else if (((size == 8 && TARGET_64BIT) || size == 16)
7536 			 && !TARGET_SSE
7537 			 && !TARGET_IAMCU)
7538 		  {
7539 		    static bool warnedsse;
7540 		    static bool warnedsse_ret;
7541 
7542 		    if (cum && cum->warn_sse && !warnedsse)
7543 		      {
7544 			if (warning (OPT_Wpsabi, "SSE vector argument "
7545 				     "without SSE enabled changes the ABI"))
7546 			  warnedsse = true;
7547 		      }
7548 		    else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7549 		      {
7550 			if (warning (OPT_Wpsabi, "SSE vector return "
7551 				     "without SSE enabled changes the ABI"))
7552 			  warnedsse_ret = true;
7553 		      }
7554 		  }
7555 		else if ((size == 8 && !TARGET_64BIT)
7556 			 && (!cfun
7557 			     || cfun->machine->func_type == TYPE_NORMAL)
7558 			 && !TARGET_MMX
7559 			 && !TARGET_IAMCU)
7560 		  {
7561 		    static bool warnedmmx;
7562 		    static bool warnedmmx_ret;
7563 
7564 		    if (cum && cum->warn_mmx && !warnedmmx)
7565 		      {
7566 			if (warning (OPT_Wpsabi, "MMX vector argument "
7567 				     "without MMX enabled changes the ABI"))
7568 			  warnedmmx = true;
7569 		      }
7570 		    else if (in_return && !warnedmmx_ret)
7571 		      {
7572 			if (warning (OPT_Wpsabi, "MMX vector return "
7573 				     "without MMX enabled changes the ABI"))
7574 			  warnedmmx_ret = true;
7575 		      }
7576 		  }
7577 		return mode;
7578 	      }
7579 
7580 	  gcc_unreachable ();
7581 	}
7582     }
7583 
7584   return mode;
7585 }
7586 
7587 /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
7588    this may not agree with the mode that the type system has chosen for the
7589    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
7590    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
7591 
7592 static rtx
7593 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7594 		     unsigned int regno)
7595 {
7596   rtx tmp;
7597 
7598   if (orig_mode != BLKmode)
7599     tmp = gen_rtx_REG (orig_mode, regno);
7600   else
7601     {
7602       tmp = gen_rtx_REG (mode, regno);
7603       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7604       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7605     }
7606 
7607   return tmp;
7608 }
7609 
7610 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
7611    of this code is to classify each 8bytes of incoming argument by the register
7612    class and assign registers accordingly.  */
7613 
7614 /* Return the union class of CLASS1 and CLASS2.
7615    See the x86-64 PS ABI for details.  */
7616 
7617 static enum x86_64_reg_class
7618 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7619 {
7620   /* Rule #1: If both classes are equal, this is the resulting class.  */
7621   if (class1 == class2)
7622     return class1;
7623 
7624   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7625      the other class.  */
7626   if (class1 == X86_64_NO_CLASS)
7627     return class2;
7628   if (class2 == X86_64_NO_CLASS)
7629     return class1;
7630 
7631   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
7632   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7633     return X86_64_MEMORY_CLASS;
7634 
7635   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
7636   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7637       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7638     return X86_64_INTEGERSI_CLASS;
7639   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7640       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7641     return X86_64_INTEGER_CLASS;
7642 
7643   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7644      MEMORY is used.  */
7645   if (class1 == X86_64_X87_CLASS
7646       || class1 == X86_64_X87UP_CLASS
7647       || class1 == X86_64_COMPLEX_X87_CLASS
7648       || class2 == X86_64_X87_CLASS
7649       || class2 == X86_64_X87UP_CLASS
7650       || class2 == X86_64_COMPLEX_X87_CLASS)
7651     return X86_64_MEMORY_CLASS;
7652 
7653   /* Rule #6: Otherwise class SSE is used.  */
7654   return X86_64_SSE_CLASS;
7655 }
7656 
7657 /* Classify the argument of type TYPE and mode MODE.
7658    CLASSES will be filled by the register class used to pass each word
7659    of the operand.  The number of words is returned.  In case the parameter
7660    should be passed in memory, 0 is returned. As a special case for zero
7661    sized containers, classes[0] will be NO_CLASS and 1 is returned.
7662 
7663    BIT_OFFSET is used internally for handling records and specifies offset
7664    of the offset in bits modulo 512 to avoid overflow cases.
7665 
7666    See the x86-64 PS ABI for details.
7667 */
7668 
7669 static int
7670 classify_argument (machine_mode mode, const_tree type,
7671 		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7672 {
7673   HOST_WIDE_INT bytes =
7674     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7675   int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7676 
7677   /* Variable sized entities are always passed/returned in memory.  */
7678   if (bytes < 0)
7679     return 0;
7680 
7681   if (mode != VOIDmode
7682       && targetm.calls.must_pass_in_stack (mode, type))
7683     return 0;
7684 
7685   if (type && AGGREGATE_TYPE_P (type))
7686     {
7687       int i;
7688       tree field;
7689       enum x86_64_reg_class subclasses[MAX_CLASSES];
7690 
7691       /* On x86-64 we pass structures larger than 64 bytes on the stack.  */
7692       if (bytes > 64)
7693 	return 0;
7694 
7695       for (i = 0; i < words; i++)
7696 	classes[i] = X86_64_NO_CLASS;
7697 
7698       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
7699 	 signalize memory class, so handle it as special case.  */
7700       if (!words)
7701 	{
7702 	  classes[0] = X86_64_NO_CLASS;
7703 	  return 1;
7704 	}
7705 
7706       /* Classify each field of record and merge classes.  */
7707       switch (TREE_CODE (type))
7708 	{
7709 	case RECORD_TYPE:
7710 	  /* And now merge the fields of structure.  */
7711 	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7712 	    {
7713 	      if (TREE_CODE (field) == FIELD_DECL)
7714 		{
7715 		  int num;
7716 
7717 		  if (TREE_TYPE (field) == error_mark_node)
7718 		    continue;
7719 
7720 		  /* Bitfields are always classified as integer.  Handle them
7721 		     early, since later code would consider them to be
7722 		     misaligned integers.  */
7723 		  if (DECL_BIT_FIELD (field))
7724 		    {
7725 		      for (i = (int_bit_position (field)
7726 				+ (bit_offset % 64)) / 8 / 8;
7727 			   i < ((int_bit_position (field) + (bit_offset % 64))
7728 			        + tree_to_shwi (DECL_SIZE (field))
7729 				+ 63) / 8 / 8; i++)
7730 			classes[i] =
7731 			  merge_classes (X86_64_INTEGER_CLASS,
7732 					 classes[i]);
7733 		    }
7734 		  else
7735 		    {
7736 		      int pos;
7737 
7738 		      type = TREE_TYPE (field);
7739 
7740 		      /* Flexible array member is ignored.  */
7741 		      if (TYPE_MODE (type) == BLKmode
7742 			  && TREE_CODE (type) == ARRAY_TYPE
7743 			  && TYPE_SIZE (type) == NULL_TREE
7744 			  && TYPE_DOMAIN (type) != NULL_TREE
7745 			  && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
7746 			      == NULL_TREE))
7747 			{
7748 			  static bool warned;
7749 
7750 			  if (!warned && warn_psabi)
7751 			    {
7752 			      warned = true;
7753 			      inform (input_location,
7754 				      "the ABI of passing struct with"
7755 				      " a flexible array member has"
7756 				      " changed in GCC 4.4");
7757 			    }
7758 			  continue;
7759 			}
7760 		      num = classify_argument (TYPE_MODE (type), type,
7761 					       subclasses,
7762 					       (int_bit_position (field)
7763 						+ bit_offset) % 512);
7764 		      if (!num)
7765 			return 0;
7766 		      pos = (int_bit_position (field)
7767 			     + (bit_offset % 64)) / 8 / 8;
7768 		      for (i = 0; i < num && (i + pos) < words; i++)
7769 			classes[i + pos] =
7770 			  merge_classes (subclasses[i], classes[i + pos]);
7771 		    }
7772 		}
7773 	    }
7774 	  break;
7775 
7776 	case ARRAY_TYPE:
7777 	  /* Arrays are handled as small records.  */
7778 	  {
7779 	    int num;
7780 	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
7781 				     TREE_TYPE (type), subclasses, bit_offset);
7782 	    if (!num)
7783 	      return 0;
7784 
7785 	    /* The partial classes are now full classes.  */
7786 	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
7787 	      subclasses[0] = X86_64_SSE_CLASS;
7788 	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
7789 		&& !((bit_offset % 64) == 0 && bytes == 4))
7790 	      subclasses[0] = X86_64_INTEGER_CLASS;
7791 
7792 	    for (i = 0; i < words; i++)
7793 	      classes[i] = subclasses[i % num];
7794 
7795 	    break;
7796 	  }
7797 	case UNION_TYPE:
7798 	case QUAL_UNION_TYPE:
7799 	  /* Unions are similar to RECORD_TYPE but offset is always 0.
7800 	     */
7801 	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7802 	    {
7803 	      if (TREE_CODE (field) == FIELD_DECL)
7804 		{
7805 		  int num;
7806 
7807 		  if (TREE_TYPE (field) == error_mark_node)
7808 		    continue;
7809 
7810 		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7811 					   TREE_TYPE (field), subclasses,
7812 					   bit_offset);
7813 		  if (!num)
7814 		    return 0;
7815 		  for (i = 0; i < num && i < words; i++)
7816 		    classes[i] = merge_classes (subclasses[i], classes[i]);
7817 		}
7818 	    }
7819 	  break;
7820 
7821 	default:
7822 	  gcc_unreachable ();
7823 	}
7824 
7825       if (words > 2)
7826 	{
7827 	  /* When size > 16 bytes, if the first one isn't
7828 	     X86_64_SSE_CLASS or any other ones aren't
7829 	     X86_64_SSEUP_CLASS, everything should be passed in
7830 	     memory.  */
7831 	  if (classes[0] != X86_64_SSE_CLASS)
7832 	      return 0;
7833 
7834 	  for (i = 1; i < words; i++)
7835 	    if (classes[i] != X86_64_SSEUP_CLASS)
7836 	      return 0;
7837 	}
7838 
7839       /* Final merger cleanup.  */
7840       for (i = 0; i < words; i++)
7841 	{
7842 	  /* If one class is MEMORY, everything should be passed in
7843 	     memory.  */
7844 	  if (classes[i] == X86_64_MEMORY_CLASS)
7845 	    return 0;
7846 
7847 	  /* The X86_64_SSEUP_CLASS should be always preceded by
7848 	     X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
7849 	  if (classes[i] == X86_64_SSEUP_CLASS
7850 	      && classes[i - 1] != X86_64_SSE_CLASS
7851 	      && classes[i - 1] != X86_64_SSEUP_CLASS)
7852 	    {
7853 	      /* The first one should never be X86_64_SSEUP_CLASS.  */
7854 	      gcc_assert (i != 0);
7855 	      classes[i] = X86_64_SSE_CLASS;
7856 	    }
7857 
7858 	  /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7859 	       everything should be passed in memory.  */
7860 	  if (classes[i] == X86_64_X87UP_CLASS
7861 	      && (classes[i - 1] != X86_64_X87_CLASS))
7862 	    {
7863 	      static bool warned;
7864 
7865 	      /* The first one should never be X86_64_X87UP_CLASS.  */
7866 	      gcc_assert (i != 0);
7867 	      if (!warned && warn_psabi)
7868 		{
7869 		  warned = true;
7870 		  inform (input_location,
7871 			  "the ABI of passing union with long double"
7872 			  " has changed in GCC 4.4");
7873 		}
7874 	      return 0;
7875 	    }
7876 	}
7877       return words;
7878     }
7879 
7880   /* Compute alignment needed.  We align all types to natural boundaries with
7881      exception of XFmode that is aligned to 64bits.  */
7882   if (mode != VOIDmode && mode != BLKmode)
7883     {
7884       int mode_alignment = GET_MODE_BITSIZE (mode);
7885 
7886       if (mode == XFmode)
7887 	mode_alignment = 128;
7888       else if (mode == XCmode)
7889 	mode_alignment = 256;
7890       if (COMPLEX_MODE_P (mode))
7891 	mode_alignment /= 2;
7892       /* Misaligned fields are always returned in memory.  */
7893       if (bit_offset % mode_alignment)
7894 	return 0;
7895     }
7896 
7897   /* for V1xx modes, just use the base mode */
7898   if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7899       && GET_MODE_UNIT_SIZE (mode) == bytes)
7900     mode = GET_MODE_INNER (mode);
7901 
7902   /* Classification of atomic types.  */
7903   switch (mode)
7904     {
7905     case E_SDmode:
7906     case E_DDmode:
7907       classes[0] = X86_64_SSE_CLASS;
7908       return 1;
7909     case E_TDmode:
7910       classes[0] = X86_64_SSE_CLASS;
7911       classes[1] = X86_64_SSEUP_CLASS;
7912       return 2;
7913     case E_DImode:
7914     case E_SImode:
7915     case E_HImode:
7916     case E_QImode:
7917     case E_CSImode:
7918     case E_CHImode:
7919     case E_CQImode:
7920       {
7921 	int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7922 
7923 	/* Analyze last 128 bits only.  */
7924 	size = (size - 1) & 0x7f;
7925 
7926 	if (size < 32)
7927 	  {
7928 	    classes[0] = X86_64_INTEGERSI_CLASS;
7929 	    return 1;
7930 	  }
7931 	else if (size < 64)
7932 	  {
7933 	    classes[0] = X86_64_INTEGER_CLASS;
7934 	    return 1;
7935 	  }
7936 	else if (size < 64+32)
7937 	  {
7938 	    classes[0] = X86_64_INTEGER_CLASS;
7939 	    classes[1] = X86_64_INTEGERSI_CLASS;
7940 	    return 2;
7941 	  }
7942 	else if (size < 64+64)
7943 	  {
7944 	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7945 	    return 2;
7946 	  }
7947 	else
7948 	  gcc_unreachable ();
7949       }
7950     case E_CDImode:
7951     case E_TImode:
7952       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7953       return 2;
7954     case E_COImode:
7955     case E_OImode:
7956       /* OImode shouldn't be used directly.  */
7957       gcc_unreachable ();
7958     case E_CTImode:
7959       return 0;
7960     case E_SFmode:
7961       if (!(bit_offset % 64))
7962 	classes[0] = X86_64_SSESF_CLASS;
7963       else
7964 	classes[0] = X86_64_SSE_CLASS;
7965       return 1;
7966     case E_DFmode:
7967       classes[0] = X86_64_SSEDF_CLASS;
7968       return 1;
7969     case E_XFmode:
7970       classes[0] = X86_64_X87_CLASS;
7971       classes[1] = X86_64_X87UP_CLASS;
7972       return 2;
7973     case E_TFmode:
7974       classes[0] = X86_64_SSE_CLASS;
7975       classes[1] = X86_64_SSEUP_CLASS;
7976       return 2;
7977     case E_SCmode:
7978       classes[0] = X86_64_SSE_CLASS;
7979       if (!(bit_offset % 64))
7980 	return 1;
7981       else
7982 	{
7983 	  static bool warned;
7984 
7985 	  if (!warned && warn_psabi)
7986 	    {
7987 	      warned = true;
7988 	      inform (input_location,
7989 		      "the ABI of passing structure with complex float"
7990 		      " member has changed in GCC 4.4");
7991 	    }
7992 	  classes[1] = X86_64_SSESF_CLASS;
7993 	  return 2;
7994 	}
7995     case E_DCmode:
7996       classes[0] = X86_64_SSEDF_CLASS;
7997       classes[1] = X86_64_SSEDF_CLASS;
7998       return 2;
7999     case E_XCmode:
8000       classes[0] = X86_64_COMPLEX_X87_CLASS;
8001       return 1;
8002     case E_TCmode:
8003       /* This modes is larger than 16 bytes.  */
8004       return 0;
8005     case E_V8SFmode:
8006     case E_V8SImode:
8007     case E_V32QImode:
8008     case E_V16HImode:
8009     case E_V4DFmode:
8010     case E_V4DImode:
8011       classes[0] = X86_64_SSE_CLASS;
8012       classes[1] = X86_64_SSEUP_CLASS;
8013       classes[2] = X86_64_SSEUP_CLASS;
8014       classes[3] = X86_64_SSEUP_CLASS;
8015       return 4;
8016     case E_V8DFmode:
8017     case E_V16SFmode:
8018     case E_V8DImode:
8019     case E_V16SImode:
8020     case E_V32HImode:
8021     case E_V64QImode:
8022       classes[0] = X86_64_SSE_CLASS;
8023       classes[1] = X86_64_SSEUP_CLASS;
8024       classes[2] = X86_64_SSEUP_CLASS;
8025       classes[3] = X86_64_SSEUP_CLASS;
8026       classes[4] = X86_64_SSEUP_CLASS;
8027       classes[5] = X86_64_SSEUP_CLASS;
8028       classes[6] = X86_64_SSEUP_CLASS;
8029       classes[7] = X86_64_SSEUP_CLASS;
8030       return 8;
8031     case E_V4SFmode:
8032     case E_V4SImode:
8033     case E_V16QImode:
8034     case E_V8HImode:
8035     case E_V2DFmode:
8036     case E_V2DImode:
8037       classes[0] = X86_64_SSE_CLASS;
8038       classes[1] = X86_64_SSEUP_CLASS;
8039       return 2;
8040     case E_V1TImode:
8041     case E_V1DImode:
8042     case E_V2SFmode:
8043     case E_V2SImode:
8044     case E_V4HImode:
8045     case E_V8QImode:
8046       classes[0] = X86_64_SSE_CLASS;
8047       return 1;
8048     case E_BLKmode:
8049     case E_VOIDmode:
8050       return 0;
8051     default:
8052       gcc_assert (VECTOR_MODE_P (mode));
8053 
8054       if (bytes > 16)
8055 	return 0;
8056 
8057       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8058 
8059       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8060 	classes[0] = X86_64_INTEGERSI_CLASS;
8061       else
8062 	classes[0] = X86_64_INTEGER_CLASS;
8063       classes[1] = X86_64_INTEGER_CLASS;
8064       return 1 + (bytes > 8);
8065     }
8066 }
8067 
8068 /* Examine the argument and return set number of register required in each
8069    class.  Return true iff parameter should be passed in memory.  */
8070 
8071 static bool
8072 examine_argument (machine_mode mode, const_tree type, int in_return,
8073 		  int *int_nregs, int *sse_nregs)
8074 {
8075   enum x86_64_reg_class regclass[MAX_CLASSES];
8076   int n = classify_argument (mode, type, regclass, 0);
8077 
8078   *int_nregs = 0;
8079   *sse_nregs = 0;
8080 
8081   if (!n)
8082     return true;
8083   for (n--; n >= 0; n--)
8084     switch (regclass[n])
8085       {
8086       case X86_64_INTEGER_CLASS:
8087       case X86_64_INTEGERSI_CLASS:
8088 	(*int_nregs)++;
8089 	break;
8090       case X86_64_SSE_CLASS:
8091       case X86_64_SSESF_CLASS:
8092       case X86_64_SSEDF_CLASS:
8093 	(*sse_nregs)++;
8094 	break;
8095       case X86_64_NO_CLASS:
8096       case X86_64_SSEUP_CLASS:
8097 	break;
8098       case X86_64_X87_CLASS:
8099       case X86_64_X87UP_CLASS:
8100       case X86_64_COMPLEX_X87_CLASS:
8101 	if (!in_return)
8102 	  return true;
8103 	break;
8104       case X86_64_MEMORY_CLASS:
8105 	gcc_unreachable ();
8106       }
8107 
8108   return false;
8109 }
8110 
8111 /* Construct container for the argument used by GCC interface.  See
8112    FUNCTION_ARG for the detailed description.  */
8113 
8114 static rtx
8115 construct_container (machine_mode mode, machine_mode orig_mode,
8116 		     const_tree type, int in_return, int nintregs, int nsseregs,
8117 		     const int *intreg, int sse_regno)
8118 {
8119   /* The following variables hold the static issued_error state.  */
8120   static bool issued_sse_arg_error;
8121   static bool issued_sse_ret_error;
8122   static bool issued_x87_ret_error;
8123 
8124   machine_mode tmpmode;
8125   int bytes =
8126     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8127   enum x86_64_reg_class regclass[MAX_CLASSES];
8128   int n;
8129   int i;
8130   int nexps = 0;
8131   int needed_sseregs, needed_intregs;
8132   rtx exp[MAX_CLASSES];
8133   rtx ret;
8134 
8135   n = classify_argument (mode, type, regclass, 0);
8136   if (!n)
8137     return NULL;
8138   if (examine_argument (mode, type, in_return, &needed_intregs,
8139 			&needed_sseregs))
8140     return NULL;
8141   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
8142     return NULL;
8143 
8144   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
8145      some less clueful developer tries to use floating-point anyway.  */
8146   if (needed_sseregs && !TARGET_SSE)
8147     {
8148       if (in_return)
8149 	{
8150 	  if (!issued_sse_ret_error)
8151 	    {
8152 	      error ("SSE register return with SSE disabled");
8153 	      issued_sse_ret_error = true;
8154 	    }
8155 	}
8156       else if (!issued_sse_arg_error)
8157 	{
8158 	  error ("SSE register argument with SSE disabled");
8159 	  issued_sse_arg_error = true;
8160 	}
8161       return NULL;
8162     }
8163 
8164   /* Likewise, error if the ABI requires us to return values in the
8165      x87 registers and the user specified -mno-80387.  */
8166   if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8167     for (i = 0; i < n; i++)
8168       if (regclass[i] == X86_64_X87_CLASS
8169 	  || regclass[i] == X86_64_X87UP_CLASS
8170 	  || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8171 	{
8172 	  if (!issued_x87_ret_error)
8173 	    {
8174 	      error ("x87 register return with x87 disabled");
8175 	      issued_x87_ret_error = true;
8176 	    }
8177 	  return NULL;
8178 	}
8179 
8180   /* First construct simple cases.  Avoid SCmode, since we want to use
8181      single register to pass this type.  */
8182   if (n == 1 && mode != SCmode)
8183     switch (regclass[0])
8184       {
8185       case X86_64_INTEGER_CLASS:
8186       case X86_64_INTEGERSI_CLASS:
8187 	return gen_rtx_REG (mode, intreg[0]);
8188       case X86_64_SSE_CLASS:
8189       case X86_64_SSESF_CLASS:
8190       case X86_64_SSEDF_CLASS:
8191 	if (mode != BLKmode)
8192 	  return gen_reg_or_parallel (mode, orig_mode,
8193 				      SSE_REGNO (sse_regno));
8194 	break;
8195       case X86_64_X87_CLASS:
8196       case X86_64_COMPLEX_X87_CLASS:
8197 	return gen_rtx_REG (mode, FIRST_STACK_REG);
8198       case X86_64_NO_CLASS:
8199 	/* Zero sized array, struct or class.  */
8200 	return NULL;
8201       default:
8202 	gcc_unreachable ();
8203       }
8204   if (n == 2
8205       && regclass[0] == X86_64_SSE_CLASS
8206       && regclass[1] == X86_64_SSEUP_CLASS
8207       && mode != BLKmode)
8208     return gen_reg_or_parallel (mode, orig_mode,
8209 				SSE_REGNO (sse_regno));
8210   if (n == 4
8211       && regclass[0] == X86_64_SSE_CLASS
8212       && regclass[1] == X86_64_SSEUP_CLASS
8213       && regclass[2] == X86_64_SSEUP_CLASS
8214       && regclass[3] == X86_64_SSEUP_CLASS
8215       && mode != BLKmode)
8216     return gen_reg_or_parallel (mode, orig_mode,
8217 				SSE_REGNO (sse_regno));
8218   if (n == 8
8219       && regclass[0] == X86_64_SSE_CLASS
8220       && regclass[1] == X86_64_SSEUP_CLASS
8221       && regclass[2] == X86_64_SSEUP_CLASS
8222       && regclass[3] == X86_64_SSEUP_CLASS
8223       && regclass[4] == X86_64_SSEUP_CLASS
8224       && regclass[5] == X86_64_SSEUP_CLASS
8225       && regclass[6] == X86_64_SSEUP_CLASS
8226       && regclass[7] == X86_64_SSEUP_CLASS
8227       && mode != BLKmode)
8228     return gen_reg_or_parallel (mode, orig_mode,
8229 				SSE_REGNO (sse_regno));
8230   if (n == 2
8231       && regclass[0] == X86_64_X87_CLASS
8232       && regclass[1] == X86_64_X87UP_CLASS)
8233     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8234 
8235   if (n == 2
8236       && regclass[0] == X86_64_INTEGER_CLASS
8237       && regclass[1] == X86_64_INTEGER_CLASS
8238       && (mode == CDImode || mode == TImode)
8239       && intreg[0] + 1 == intreg[1])
8240     return gen_rtx_REG (mode, intreg[0]);
8241 
8242   /* Otherwise figure out the entries of the PARALLEL.  */
8243   for (i = 0; i < n; i++)
8244     {
8245       int pos;
8246 
8247       switch (regclass[i])
8248         {
8249 	  case X86_64_NO_CLASS:
8250 	    break;
8251 	  case X86_64_INTEGER_CLASS:
8252 	  case X86_64_INTEGERSI_CLASS:
8253 	    /* Merge TImodes on aligned occasions here too.  */
8254 	    if (i * 8 + 8 > bytes)
8255 	      {
8256 		unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
8257 		if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
8258 		  /* We've requested 24 bytes we
8259 		     don't have mode for.  Use DImode.  */
8260 		  tmpmode = DImode;
8261 	      }
8262 	    else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8263 	      tmpmode = SImode;
8264 	    else
8265 	      tmpmode = DImode;
8266 	    exp [nexps++]
8267 	      = gen_rtx_EXPR_LIST (VOIDmode,
8268 				   gen_rtx_REG (tmpmode, *intreg),
8269 				   GEN_INT (i*8));
8270 	    intreg++;
8271 	    break;
8272 	  case X86_64_SSESF_CLASS:
8273 	    exp [nexps++]
8274 	      = gen_rtx_EXPR_LIST (VOIDmode,
8275 				   gen_rtx_REG (SFmode,
8276 						SSE_REGNO (sse_regno)),
8277 				   GEN_INT (i*8));
8278 	    sse_regno++;
8279 	    break;
8280 	  case X86_64_SSEDF_CLASS:
8281 	    exp [nexps++]
8282 	      = gen_rtx_EXPR_LIST (VOIDmode,
8283 				   gen_rtx_REG (DFmode,
8284 						SSE_REGNO (sse_regno)),
8285 				   GEN_INT (i*8));
8286 	    sse_regno++;
8287 	    break;
8288 	  case X86_64_SSE_CLASS:
8289 	    pos = i;
8290 	    switch (n)
8291 	      {
8292 	      case 1:
8293 		tmpmode = DImode;
8294 		break;
8295 	      case 2:
8296 		if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8297 		  {
8298 		    tmpmode = TImode;
8299 		    i++;
8300 		  }
8301 		else
8302 		  tmpmode = DImode;
8303 		break;
8304 	      case 4:
8305 		gcc_assert (i == 0
8306 			    && regclass[1] == X86_64_SSEUP_CLASS
8307 			    && regclass[2] == X86_64_SSEUP_CLASS
8308 			    && regclass[3] == X86_64_SSEUP_CLASS);
8309 		tmpmode = OImode;
8310 		i += 3;
8311 		break;
8312 	      case 8:
8313 		gcc_assert (i == 0
8314 			    && regclass[1] == X86_64_SSEUP_CLASS
8315 			    && regclass[2] == X86_64_SSEUP_CLASS
8316 			    && regclass[3] == X86_64_SSEUP_CLASS
8317 			    && regclass[4] == X86_64_SSEUP_CLASS
8318 			    && regclass[5] == X86_64_SSEUP_CLASS
8319 			    && regclass[6] == X86_64_SSEUP_CLASS
8320 			    && regclass[7] == X86_64_SSEUP_CLASS);
8321 		tmpmode = XImode;
8322 		i += 7;
8323 		break;
8324 	      default:
8325 		gcc_unreachable ();
8326 	      }
8327 	    exp [nexps++]
8328 	      = gen_rtx_EXPR_LIST (VOIDmode,
8329 				   gen_rtx_REG (tmpmode,
8330 						SSE_REGNO (sse_regno)),
8331 				   GEN_INT (pos*8));
8332 	    sse_regno++;
8333 	    break;
8334 	  default:
8335 	    gcc_unreachable ();
8336 	}
8337     }
8338 
8339   /* Empty aligned struct, union or class.  */
8340   if (nexps == 0)
8341     return NULL;
8342 
8343   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8344   for (i = 0; i < nexps; i++)
8345     XVECEXP (ret, 0, i) = exp [i];
8346   return ret;
8347 }
8348 
8349 /* Update the data in CUM to advance over an argument of mode MODE
8350    and data type TYPE.  (TYPE is null for libcalls where that information
8351    may not be available.)
8352 
8353    Return a number of integer regsiters advanced over.  */
8354 
8355 static int
8356 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8357 			 const_tree type, HOST_WIDE_INT bytes,
8358 			 HOST_WIDE_INT words)
8359 {
8360   int res = 0;
8361   bool error_p = false;
8362 
8363   if (TARGET_IAMCU)
8364     {
8365       /* Intel MCU psABI passes scalars and aggregates no larger than 8
8366 	 bytes in registers.  */
8367       if (!VECTOR_MODE_P (mode) && bytes <= 8)
8368 	goto pass_in_reg;
8369       return res;
8370     }
8371 
8372   switch (mode)
8373     {
8374     default:
8375       break;
8376 
8377     case E_BLKmode:
8378       if (bytes < 0)
8379 	break;
8380       /* FALLTHRU */
8381 
8382     case E_DImode:
8383     case E_SImode:
8384     case E_HImode:
8385     case E_QImode:
8386 pass_in_reg:
8387       cum->words += words;
8388       cum->nregs -= words;
8389       cum->regno += words;
8390       if (cum->nregs >= 0)
8391 	res = words;
8392       if (cum->nregs <= 0)
8393 	{
8394 	  cum->nregs = 0;
8395 	  cfun->machine->arg_reg_available = false;
8396 	  cum->regno = 0;
8397 	}
8398       break;
8399 
8400     case E_OImode:
8401       /* OImode shouldn't be used directly.  */
8402       gcc_unreachable ();
8403 
8404     case E_DFmode:
8405       if (cum->float_in_sse == -1)
8406 	error_p = true;
8407       if (cum->float_in_sse < 2)
8408 	break;
8409       /* FALLTHRU */
8410     case E_SFmode:
8411       if (cum->float_in_sse == -1)
8412 	error_p = true;
8413       if (cum->float_in_sse < 1)
8414 	break;
8415       /* FALLTHRU */
8416 
8417     case E_V8SFmode:
8418     case E_V8SImode:
8419     case E_V64QImode:
8420     case E_V32HImode:
8421     case E_V16SImode:
8422     case E_V8DImode:
8423     case E_V16SFmode:
8424     case E_V8DFmode:
8425     case E_V32QImode:
8426     case E_V16HImode:
8427     case E_V4DFmode:
8428     case E_V4DImode:
8429     case E_TImode:
8430     case E_V16QImode:
8431     case E_V8HImode:
8432     case E_V4SImode:
8433     case E_V2DImode:
8434     case E_V4SFmode:
8435     case E_V2DFmode:
8436       if (!type || !AGGREGATE_TYPE_P (type))
8437 	{
8438 	  cum->sse_words += words;
8439 	  cum->sse_nregs -= 1;
8440 	  cum->sse_regno += 1;
8441 	  if (cum->sse_nregs <= 0)
8442 	    {
8443 	      cum->sse_nregs = 0;
8444 	      cum->sse_regno = 0;
8445 	    }
8446 	}
8447       break;
8448 
8449     case E_V8QImode:
8450     case E_V4HImode:
8451     case E_V2SImode:
8452     case E_V2SFmode:
8453     case E_V1TImode:
8454     case E_V1DImode:
8455       if (!type || !AGGREGATE_TYPE_P (type))
8456 	{
8457 	  cum->mmx_words += words;
8458 	  cum->mmx_nregs -= 1;
8459 	  cum->mmx_regno += 1;
8460 	  if (cum->mmx_nregs <= 0)
8461 	    {
8462 	      cum->mmx_nregs = 0;
8463 	      cum->mmx_regno = 0;
8464 	    }
8465 	}
8466       break;
8467     }
8468   if (error_p)
8469     {
8470       cum->float_in_sse = 0;
8471       error ("calling %qD with SSE calling convention without "
8472 	     "SSE/SSE2 enabled", cum->decl);
8473       sorry ("this is a GCC bug that can be worked around by adding "
8474 	     "attribute used to function called");
8475     }
8476 
8477   return res;
8478 }
8479 
8480 static int
8481 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8482 			 const_tree type, HOST_WIDE_INT words, bool named)
8483 {
8484   int int_nregs, sse_nregs;
8485 
8486   /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
8487   if (!named && (VALID_AVX512F_REG_MODE (mode)
8488 		 || VALID_AVX256_REG_MODE (mode)))
8489     return 0;
8490 
8491   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8492       && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8493     {
8494       cum->nregs -= int_nregs;
8495       cum->sse_nregs -= sse_nregs;
8496       cum->regno += int_nregs;
8497       cum->sse_regno += sse_nregs;
8498       return int_nregs;
8499     }
8500   else
8501     {
8502       int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8503       cum->words = ROUND_UP (cum->words, align);
8504       cum->words += words;
8505       return 0;
8506     }
8507 }
8508 
8509 static int
8510 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8511 			    HOST_WIDE_INT words)
8512 {
8513   /* Otherwise, this should be passed indirect.  */
8514   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8515 
8516   cum->words += words;
8517   if (cum->nregs > 0)
8518     {
8519       cum->nregs -= 1;
8520       cum->regno += 1;
8521       return 1;
8522     }
8523   return 0;
8524 }
8525 
8526 /* Update the data in CUM to advance over an argument of mode MODE and
8527    data type TYPE.  (TYPE is null for libcalls where that information
8528    may not be available.)  */
8529 
8530 static void
8531 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8532 			   const_tree type, bool named)
8533 {
8534   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8535   HOST_WIDE_INT bytes, words;
8536   int nregs;
8537 
8538   /* The argument of interrupt handler is a special case and is
8539      handled in ix86_function_arg.  */
8540   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8541     return;
8542 
8543   if (mode == BLKmode)
8544     bytes = int_size_in_bytes (type);
8545   else
8546     bytes = GET_MODE_SIZE (mode);
8547   words = CEIL (bytes, UNITS_PER_WORD);
8548 
8549   if (type)
8550     mode = type_natural_mode (type, NULL, false);
8551 
8552   if ((type && POINTER_BOUNDS_TYPE_P (type))
8553       || POINTER_BOUNDS_MODE_P (mode))
8554     {
8555       /* If we pass bounds in BT then just update remained bounds count.  */
8556       if (cum->bnds_in_bt)
8557 	{
8558 	  cum->bnds_in_bt--;
8559 	  return;
8560 	}
8561 
8562       /* Update remained number of bounds to force.  */
8563       if (cum->force_bnd_pass)
8564 	cum->force_bnd_pass--;
8565 
8566       cum->bnd_regno++;
8567 
8568       return;
8569     }
8570 
8571   /* The first arg not going to Bounds Tables resets this counter.  */
8572   cum->bnds_in_bt = 0;
8573   /* For unnamed args we always pass bounds to avoid bounds mess when
8574      passed and received types do not match.  If bounds do not follow
8575      unnamed arg, still pretend required number of bounds were passed.  */
8576   if (cum->force_bnd_pass)
8577     {
8578       cum->bnd_regno += cum->force_bnd_pass;
8579       cum->force_bnd_pass = 0;
8580     }
8581 
8582   if (TARGET_64BIT)
8583     {
8584       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8585 
8586       if (call_abi == MS_ABI)
8587 	nregs = function_arg_advance_ms_64 (cum, bytes, words);
8588       else
8589 	nregs = function_arg_advance_64 (cum, mode, type, words, named);
8590     }
8591   else
8592     nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8593 
8594   /* For stdarg we expect bounds to be passed for each value passed
8595      in register.  */
8596   if (cum->stdarg)
8597     cum->force_bnd_pass = nregs;
8598   /* For pointers passed in memory we expect bounds passed in Bounds
8599      Table.  */
8600   if (!nregs)
8601     {
8602       /* Track if there are outgoing arguments on stack.  */
8603       if (cum->caller)
8604 	cfun->machine->outgoing_args_on_stack = true;
8605 
8606       if (flag_check_pointer_bounds)
8607 	cum->bnds_in_bt = chkp_type_bounds_count (type);
8608     }
8609 }
8610 
8611 /* Define where to put the arguments to a function.
8612    Value is zero to push the argument on the stack,
8613    or a hard register in which to store the argument.
8614 
8615    MODE is the argument's machine mode.
8616    TYPE is the data type of the argument (as a tree).
8617     This is null for libcalls where that information may
8618     not be available.
8619    CUM is a variable of type CUMULATIVE_ARGS which gives info about
8620     the preceding args and about the function being called.
8621    NAMED is nonzero if this argument is a named parameter
8622     (otherwise it is an extra parameter matching an ellipsis).  */
8623 
8624 static rtx
8625 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8626 		 machine_mode orig_mode, const_tree type,
8627 		 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8628 {
8629   bool error_p = false;
8630 
8631   /* Avoid the AL settings for the Unix64 ABI.  */
8632   if (mode == VOIDmode)
8633     return constm1_rtx;
8634 
8635   if (TARGET_IAMCU)
8636     {
8637       /* Intel MCU psABI passes scalars and aggregates no larger than 8
8638 	 bytes in registers.  */
8639       if (!VECTOR_MODE_P (mode) && bytes <= 8)
8640 	goto pass_in_reg;
8641       return NULL_RTX;
8642     }
8643 
8644   switch (mode)
8645     {
8646     default:
8647       break;
8648 
8649     case E_BLKmode:
8650       if (bytes < 0)
8651 	break;
8652       /* FALLTHRU */
8653     case E_DImode:
8654     case E_SImode:
8655     case E_HImode:
8656     case E_QImode:
8657 pass_in_reg:
8658       if (words <= cum->nregs)
8659 	{
8660 	  int regno = cum->regno;
8661 
8662 	  /* Fastcall allocates the first two DWORD (SImode) or
8663             smaller arguments to ECX and EDX if it isn't an
8664             aggregate type .  */
8665 	  if (cum->fastcall)
8666 	    {
8667 	      if (mode == BLKmode
8668 		  || mode == DImode
8669 		  || (type && AGGREGATE_TYPE_P (type)))
8670 	        break;
8671 
8672 	      /* ECX not EAX is the first allocated register.  */
8673 	      if (regno == AX_REG)
8674 		regno = CX_REG;
8675 	    }
8676 	  return gen_rtx_REG (mode, regno);
8677 	}
8678       break;
8679 
8680     case E_DFmode:
8681       if (cum->float_in_sse == -1)
8682 	error_p = true;
8683       if (cum->float_in_sse < 2)
8684 	break;
8685       /* FALLTHRU */
8686     case E_SFmode:
8687       if (cum->float_in_sse == -1)
8688 	error_p = true;
8689       if (cum->float_in_sse < 1)
8690 	break;
8691       /* FALLTHRU */
8692     case E_TImode:
8693       /* In 32bit, we pass TImode in xmm registers.  */
8694     case E_V16QImode:
8695     case E_V8HImode:
8696     case E_V4SImode:
8697     case E_V2DImode:
8698     case E_V4SFmode:
8699     case E_V2DFmode:
8700       if (!type || !AGGREGATE_TYPE_P (type))
8701 	{
8702 	  if (cum->sse_nregs)
8703 	    return gen_reg_or_parallel (mode, orig_mode,
8704 				        cum->sse_regno + FIRST_SSE_REG);
8705 	}
8706       break;
8707 
8708     case E_OImode:
8709     case E_XImode:
8710       /* OImode and XImode shouldn't be used directly.  */
8711       gcc_unreachable ();
8712 
8713     case E_V64QImode:
8714     case E_V32HImode:
8715     case E_V16SImode:
8716     case E_V8DImode:
8717     case E_V16SFmode:
8718     case E_V8DFmode:
8719     case E_V8SFmode:
8720     case E_V8SImode:
8721     case E_V32QImode:
8722     case E_V16HImode:
8723     case E_V4DFmode:
8724     case E_V4DImode:
8725       if (!type || !AGGREGATE_TYPE_P (type))
8726 	{
8727 	  if (cum->sse_nregs)
8728 	    return gen_reg_or_parallel (mode, orig_mode,
8729 				        cum->sse_regno + FIRST_SSE_REG);
8730 	}
8731       break;
8732 
8733     case E_V8QImode:
8734     case E_V4HImode:
8735     case E_V2SImode:
8736     case E_V2SFmode:
8737     case E_V1TImode:
8738     case E_V1DImode:
8739       if (!type || !AGGREGATE_TYPE_P (type))
8740 	{
8741 	  if (cum->mmx_nregs)
8742 	    return gen_reg_or_parallel (mode, orig_mode,
8743 				        cum->mmx_regno + FIRST_MMX_REG);
8744 	}
8745       break;
8746     }
8747   if (error_p)
8748     {
8749       cum->float_in_sse = 0;
8750       error ("calling %qD with SSE calling convention without "
8751 	     "SSE/SSE2 enabled", cum->decl);
8752       sorry ("this is a GCC bug that can be worked around by adding "
8753 	     "attribute used to function called");
8754     }
8755 
8756   return NULL_RTX;
8757 }
8758 
8759 static rtx
8760 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8761 		 machine_mode orig_mode, const_tree type, bool named)
8762 {
8763   /* Handle a hidden AL argument containing number of registers
8764      for varargs x86-64 functions.  */
8765   if (mode == VOIDmode)
8766     return GEN_INT (cum->maybe_vaarg
8767 		    ? (cum->sse_nregs < 0
8768 		       ? X86_64_SSE_REGPARM_MAX
8769 		       : cum->sse_regno)
8770 		    : -1);
8771 
8772   switch (mode)
8773     {
8774     default:
8775       break;
8776 
8777     case E_V8SFmode:
8778     case E_V8SImode:
8779     case E_V32QImode:
8780     case E_V16HImode:
8781     case E_V4DFmode:
8782     case E_V4DImode:
8783     case E_V16SFmode:
8784     case E_V16SImode:
8785     case E_V64QImode:
8786     case E_V32HImode:
8787     case E_V8DFmode:
8788     case E_V8DImode:
8789       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
8790       if (!named)
8791 	return NULL;
8792       break;
8793     }
8794 
8795   return construct_container (mode, orig_mode, type, 0, cum->nregs,
8796 			      cum->sse_nregs,
8797 			      &x86_64_int_parameter_registers [cum->regno],
8798 			      cum->sse_regno);
8799 }
8800 
8801 static rtx
8802 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
8803 		    machine_mode orig_mode, bool named,
8804 		    HOST_WIDE_INT bytes)
8805 {
8806   unsigned int regno;
8807 
8808   /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
8809      We use value of -2 to specify that current function call is MSABI.  */
8810   if (mode == VOIDmode)
8811     return GEN_INT (-2);
8812 
8813   /* If we've run out of registers, it goes on the stack.  */
8814   if (cum->nregs == 0)
8815     return NULL_RTX;
8816 
8817   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
8818 
8819   /* Only floating point modes are passed in anything but integer regs.  */
8820   if (TARGET_SSE && (mode == SFmode || mode == DFmode))
8821     {
8822       if (named)
8823 	regno = cum->regno + FIRST_SSE_REG;
8824       else
8825 	{
8826 	  rtx t1, t2;
8827 
8828 	  /* Unnamed floating parameters are passed in both the
8829 	     SSE and integer registers.  */
8830 	  t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8831 	  t2 = gen_rtx_REG (mode, regno);
8832 	  t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8833 	  t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8834 	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8835 	}
8836     }
8837   /* Handle aggregated types passed in register.  */
8838   if (orig_mode == BLKmode)
8839     {
8840       if (bytes > 0 && bytes <= 8)
8841         mode = (bytes > 4 ? DImode : SImode);
8842       if (mode == BLKmode)
8843         mode = DImode;
8844     }
8845 
8846   return gen_reg_or_parallel (mode, orig_mode, regno);
8847 }
8848 
8849 /* Return where to put the arguments to a function.
8850    Return zero to push the argument on the stack, or a hard register in which to store the argument.
8851 
8852    MODE is the argument's machine mode.  TYPE is the data type of the
8853    argument.  It is null for libcalls where that information may not be
8854    available.  CUM gives information about the preceding args and about
8855    the function being called.  NAMED is nonzero if this argument is a
8856    named parameter (otherwise it is an extra parameter matching an
8857    ellipsis).  */
8858 
8859 static rtx
8860 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8861 		   const_tree type, bool named)
8862 {
8863   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8864   machine_mode mode = omode;
8865   HOST_WIDE_INT bytes, words;
8866   rtx arg;
8867 
8868   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
8869     {
8870       gcc_assert (type != NULL_TREE);
8871       if (POINTER_TYPE_P (type))
8872 	{
8873 	  /* This is the pointer argument.  */
8874 	  gcc_assert (TYPE_MODE (type) == Pmode);
8875 	  /* It is at -WORD(AP) in the current frame in interrupt and
8876 	     exception handlers.  */
8877 	  arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
8878 	}
8879       else
8880 	{
8881 	  gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
8882 		      && TREE_CODE (type) == INTEGER_TYPE
8883 		      && TYPE_MODE (type) == word_mode);
8884 	  /* The error code is the word-mode integer argument at
8885 	     -2 * WORD(AP) in the current frame of the exception
8886 	     handler.  */
8887 	  arg = gen_rtx_MEM (word_mode,
8888 			     plus_constant (Pmode,
8889 					    arg_pointer_rtx,
8890 					    -2 * UNITS_PER_WORD));
8891 	}
8892       return arg;
8893     }
8894 
8895   /* All pointer bounds arguments are handled separately here.  */
8896   if ((type && POINTER_BOUNDS_TYPE_P (type))
8897       || POINTER_BOUNDS_MODE_P (mode))
8898     {
8899       /* Return NULL if bounds are forced to go in Bounds Table.  */
8900       if (cum->bnds_in_bt)
8901 	arg = NULL;
8902       /* Return the next available bound reg if any.  */
8903       else if (cum->bnd_regno <= LAST_BND_REG)
8904 	arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8905       /* Return the next special slot number otherwise.  */
8906       else
8907 	arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8908 
8909       return arg;
8910     }
8911 
8912   if (mode == BLKmode)
8913     bytes = int_size_in_bytes (type);
8914   else
8915     bytes = GET_MODE_SIZE (mode);
8916   words = CEIL (bytes, UNITS_PER_WORD);
8917 
8918   /* To simplify the code below, represent vector types with a vector mode
8919      even if MMX/SSE are not active.  */
8920   if (type && TREE_CODE (type) == VECTOR_TYPE)
8921     mode = type_natural_mode (type, cum, false);
8922 
8923   if (TARGET_64BIT)
8924     {
8925       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8926 
8927       if (call_abi == MS_ABI)
8928 	arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8929       else
8930 	arg = function_arg_64 (cum, mode, omode, type, named);
8931     }
8932   else
8933     arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8934 
8935   /* Track if there are outgoing arguments on stack.  */
8936   if (arg == NULL_RTX && cum->caller)
8937     cfun->machine->outgoing_args_on_stack = true;
8938 
8939   return arg;
8940 }
8941 
8942 /* A C expression that indicates when an argument must be passed by
8943    reference.  If nonzero for an argument, a copy of that argument is
8944    made in memory and a pointer to the argument is passed instead of
8945    the argument itself.  The pointer is passed in whatever way is
8946    appropriate for passing a pointer to that type.  */
8947 
8948 static bool
8949 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8950 			const_tree type, bool)
8951 {
8952   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8953 
8954   /* Bounds are never passed by reference.  */
8955   if ((type && POINTER_BOUNDS_TYPE_P (type))
8956       || POINTER_BOUNDS_MODE_P (mode))
8957     return false;
8958 
8959   if (TARGET_64BIT)
8960     {
8961       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8962 
8963       /* See Windows x64 Software Convention.  */
8964       if (call_abi == MS_ABI)
8965 	{
8966 	  HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8967 
8968 	  if (type)
8969 	    {
8970 	      /* Arrays are passed by reference.  */
8971 	      if (TREE_CODE (type) == ARRAY_TYPE)
8972 		return true;
8973 
8974 	      if (RECORD_OR_UNION_TYPE_P (type))
8975 		{
8976 		  /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8977 		     are passed by reference.  */
8978 		  msize = int_size_in_bytes (type);
8979 		}
8980 	    }
8981 
8982 	  /* __m128 is passed by reference.  */
8983 	  return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8984 	}
8985       else if (type && int_size_in_bytes (type) == -1)
8986 	return true;
8987     }
8988 
8989   return false;
8990 }
8991 
8992 /* Return true when TYPE should be 128bit aligned for 32bit argument
8993    passing ABI.  XXX: This function is obsolete and is only used for
8994    checking psABI compatibility with previous versions of GCC.  */
8995 
8996 static bool
8997 ix86_compat_aligned_value_p (const_tree type)
8998 {
8999   machine_mode mode = TYPE_MODE (type);
9000   if (((TARGET_SSE && SSE_REG_MODE_P (mode))
9001        || mode == TDmode
9002        || mode == TFmode
9003        || mode == TCmode)
9004       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
9005     return true;
9006   if (TYPE_ALIGN (type) < 128)
9007     return false;
9008 
9009   if (AGGREGATE_TYPE_P (type))
9010     {
9011       /* Walk the aggregates recursively.  */
9012       switch (TREE_CODE (type))
9013 	{
9014 	case RECORD_TYPE:
9015 	case UNION_TYPE:
9016 	case QUAL_UNION_TYPE:
9017 	  {
9018 	    tree field;
9019 
9020 	    /* Walk all the structure fields.  */
9021 	    for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9022 	      {
9023 		if (TREE_CODE (field) == FIELD_DECL
9024 		    && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9025 		  return true;
9026 	      }
9027 	    break;
9028 	  }
9029 
9030 	case ARRAY_TYPE:
9031 	  /* Just for use if some languages passes arrays by value.  */
9032 	  if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9033 	    return true;
9034 	  break;
9035 
9036 	default:
9037 	  gcc_unreachable ();
9038 	}
9039     }
9040   return false;
9041 }
9042 
9043 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9044    XXX: This function is obsolete and is only used for checking psABI
9045    compatibility with previous versions of GCC.  */
9046 
9047 static unsigned int
9048 ix86_compat_function_arg_boundary (machine_mode mode,
9049 				   const_tree type, unsigned int align)
9050 {
9051   /* In 32bit, only _Decimal128 and __float128 are aligned to their
9052      natural boundaries.  */
9053   if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9054     {
9055       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
9056 	 make an exception for SSE modes since these require 128bit
9057 	 alignment.
9058 
9059 	 The handling here differs from field_alignment.  ICC aligns MMX
9060 	 arguments to 4 byte boundaries, while structure fields are aligned
9061 	 to 8 byte boundaries.  */
9062       if (!type)
9063 	{
9064 	  if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9065 	    align = PARM_BOUNDARY;
9066 	}
9067       else
9068 	{
9069 	  if (!ix86_compat_aligned_value_p (type))
9070 	    align = PARM_BOUNDARY;
9071 	}
9072     }
9073   if (align > BIGGEST_ALIGNMENT)
9074     align = BIGGEST_ALIGNMENT;
9075   return align;
9076 }
9077 
9078 /* Return true when TYPE should be 128bit aligned for 32bit argument
9079    passing ABI.  */
9080 
9081 static bool
9082 ix86_contains_aligned_value_p (const_tree type)
9083 {
9084   machine_mode mode = TYPE_MODE (type);
9085 
9086   if (mode == XFmode || mode == XCmode)
9087     return false;
9088 
9089   if (TYPE_ALIGN (type) < 128)
9090     return false;
9091 
9092   if (AGGREGATE_TYPE_P (type))
9093     {
9094       /* Walk the aggregates recursively.  */
9095       switch (TREE_CODE (type))
9096 	{
9097 	case RECORD_TYPE:
9098 	case UNION_TYPE:
9099 	case QUAL_UNION_TYPE:
9100 	  {
9101 	    tree field;
9102 
9103 	    /* Walk all the structure fields.  */
9104 	    for (field = TYPE_FIELDS (type);
9105 		 field;
9106 		 field = DECL_CHAIN (field))
9107 	      {
9108 		if (TREE_CODE (field) == FIELD_DECL
9109 		    && ix86_contains_aligned_value_p (TREE_TYPE (field)))
9110 		  return true;
9111 	      }
9112 	    break;
9113 	  }
9114 
9115 	case ARRAY_TYPE:
9116 	  /* Just for use if some languages passes arrays by value.  */
9117 	  if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
9118 	    return true;
9119 	  break;
9120 
9121 	default:
9122 	  gcc_unreachable ();
9123 	}
9124     }
9125   else
9126     return TYPE_ALIGN (type) >= 128;
9127 
9128   return false;
9129 }
9130 
9131 /* Gives the alignment boundary, in bits, of an argument with the
9132    specified mode and type.  */
9133 
9134 static unsigned int
9135 ix86_function_arg_boundary (machine_mode mode, const_tree type)
9136 {
9137   unsigned int align;
9138   if (type)
9139     {
9140       /* Since the main variant type is used for call, we convert it to
9141 	 the main variant type.  */
9142       type = TYPE_MAIN_VARIANT (type);
9143       align = TYPE_ALIGN (type);
9144       if (TYPE_EMPTY_P (type))
9145 	return PARM_BOUNDARY;
9146     }
9147   else
9148     align = GET_MODE_ALIGNMENT (mode);
9149   if (align < PARM_BOUNDARY)
9150     align = PARM_BOUNDARY;
9151   else
9152     {
9153       static bool warned;
9154       unsigned int saved_align = align;
9155 
9156       if (!TARGET_64BIT)
9157 	{
9158 	  /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
9159 	  if (!type)
9160 	    {
9161 	      if (mode == XFmode || mode == XCmode)
9162 		align = PARM_BOUNDARY;
9163 	    }
9164 	  else if (!ix86_contains_aligned_value_p (type))
9165 	    align = PARM_BOUNDARY;
9166 
9167 	  if (align < 128)
9168 	    align = PARM_BOUNDARY;
9169 	}
9170 
9171       if (warn_psabi
9172 	  && !warned
9173 	  && align != ix86_compat_function_arg_boundary (mode, type,
9174 							 saved_align))
9175 	{
9176 	  warned = true;
9177 	  inform (input_location,
9178 		  "The ABI for passing parameters with %d-byte"
9179 		  " alignment has changed in GCC 4.6",
9180 		  align / BITS_PER_UNIT);
9181 	}
9182     }
9183 
9184   return align;
9185 }
9186 
9187 /* Return true if N is a possible register number of function value.  */
9188 
9189 static bool
9190 ix86_function_value_regno_p (const unsigned int regno)
9191 {
9192   switch (regno)
9193     {
9194     case AX_REG:
9195       return true;
9196     case DX_REG:
9197       return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9198     case DI_REG:
9199     case SI_REG:
9200       return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9201 
9202     case BND0_REG:
9203     case BND1_REG:
9204       return chkp_function_instrumented_p (current_function_decl);
9205 
9206       /* Complex values are returned in %st(0)/%st(1) pair.  */
9207     case ST0_REG:
9208     case ST1_REG:
9209       /* TODO: The function should depend on current function ABI but
9210        builtins.c would need updating then. Therefore we use the
9211        default ABI.  */
9212       if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9213 	return false;
9214       return TARGET_FLOAT_RETURNS_IN_80387;
9215 
9216       /* Complex values are returned in %xmm0/%xmm1 pair.  */
9217     case XMM0_REG:
9218     case XMM1_REG:
9219       return TARGET_SSE;
9220 
9221     case MM0_REG:
9222       if (TARGET_MACHO || TARGET_64BIT)
9223 	return false;
9224       return TARGET_MMX;
9225     }
9226 
9227   return false;
9228 }
9229 
9230 /* Define how to find the value returned by a function.
9231    VALTYPE is the data type of the value (as a tree).
9232    If the precise function being called is known, FUNC is its FUNCTION_DECL;
9233    otherwise, FUNC is 0.  */
9234 
9235 static rtx
9236 function_value_32 (machine_mode orig_mode, machine_mode mode,
9237 		   const_tree fntype, const_tree fn)
9238 {
9239   unsigned int regno;
9240 
9241   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9242      we normally prevent this case when mmx is not available.  However
9243      some ABIs may require the result to be returned like DImode.  */
9244   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9245     regno = FIRST_MMX_REG;
9246 
9247   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
9248      we prevent this case when sse is not available.  However some ABIs
9249      may require the result to be returned like integer TImode.  */
9250   else if (mode == TImode
9251 	   || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9252     regno = FIRST_SSE_REG;
9253 
9254   /* 32-byte vector modes in %ymm0.   */
9255   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9256     regno = FIRST_SSE_REG;
9257 
9258   /* 64-byte vector modes in %zmm0.   */
9259   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9260     regno = FIRST_SSE_REG;
9261 
9262   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
9263   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9264     regno = FIRST_FLOAT_REG;
9265   else
9266     /* Most things go in %eax.  */
9267     regno = AX_REG;
9268 
9269   /* Override FP return register with %xmm0 for local functions when
9270      SSE math is enabled or for functions with sseregparm attribute.  */
9271   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9272     {
9273       int sse_level = ix86_function_sseregparm (fntype, fn, false);
9274       if (sse_level == -1)
9275 	{
9276 	  error ("calling %qD with SSE calling convention without "
9277 		 "SSE/SSE2 enabled", fn);
9278 	  sorry ("this is a GCC bug that can be worked around by adding "
9279 		 "attribute used to function called");
9280 	}
9281       else if ((sse_level >= 1 && mode == SFmode)
9282 	       || (sse_level == 2 && mode == DFmode))
9283 	regno = FIRST_SSE_REG;
9284     }
9285 
9286   /* OImode shouldn't be used directly.  */
9287   gcc_assert (mode != OImode);
9288 
9289   return gen_rtx_REG (orig_mode, regno);
9290 }
9291 
9292 static rtx
9293 function_value_64 (machine_mode orig_mode, machine_mode mode,
9294 		   const_tree valtype)
9295 {
9296   rtx ret;
9297 
9298   /* Handle libcalls, which don't provide a type node.  */
9299   if (valtype == NULL)
9300     {
9301       unsigned int regno;
9302 
9303       switch (mode)
9304 	{
9305 	case E_SFmode:
9306 	case E_SCmode:
9307 	case E_DFmode:
9308 	case E_DCmode:
9309 	case E_TFmode:
9310 	case E_SDmode:
9311 	case E_DDmode:
9312 	case E_TDmode:
9313 	  regno = FIRST_SSE_REG;
9314 	  break;
9315 	case E_XFmode:
9316 	case E_XCmode:
9317 	  regno = FIRST_FLOAT_REG;
9318 	  break;
9319 	case E_TCmode:
9320 	  return NULL;
9321 	default:
9322 	  regno = AX_REG;
9323 	}
9324 
9325       return gen_rtx_REG (mode, regno);
9326     }
9327   else if (POINTER_TYPE_P (valtype))
9328     {
9329       /* Pointers are always returned in word_mode.  */
9330       mode = word_mode;
9331     }
9332 
9333   ret = construct_container (mode, orig_mode, valtype, 1,
9334 			     X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9335 			     x86_64_int_return_registers, 0);
9336 
9337   /* For zero sized structures, construct_container returns NULL, but we
9338      need to keep rest of compiler happy by returning meaningful value.  */
9339   if (!ret)
9340     ret = gen_rtx_REG (orig_mode, AX_REG);
9341 
9342   return ret;
9343 }
9344 
9345 static rtx
9346 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9347 		      const_tree valtype)
9348 {
9349   unsigned int regno = AX_REG;
9350 
9351   if (TARGET_SSE)
9352     {
9353       switch (GET_MODE_SIZE (mode))
9354 	{
9355 	case 16:
9356 	  if (valtype != NULL_TREE
9357 	      && !VECTOR_INTEGER_TYPE_P (valtype)
9358 	      && !VECTOR_INTEGER_TYPE_P (valtype)
9359 	      && !INTEGRAL_TYPE_P (valtype)
9360 	      && !VECTOR_FLOAT_TYPE_P (valtype))
9361 	    break;
9362 	  if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9363 	      && !COMPLEX_MODE_P (mode))
9364 	    regno = FIRST_SSE_REG;
9365 	  break;
9366 	case 8:
9367 	case 4:
9368 	  if (mode == SFmode || mode == DFmode)
9369 	    regno = FIRST_SSE_REG;
9370 	  break;
9371 	default:
9372 	  break;
9373         }
9374     }
9375   return gen_rtx_REG (orig_mode, regno);
9376 }
9377 
9378 static rtx
9379 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9380 		       machine_mode orig_mode, machine_mode mode)
9381 {
9382   const_tree fn, fntype;
9383 
9384   fn = NULL_TREE;
9385   if (fntype_or_decl && DECL_P (fntype_or_decl))
9386     fn = fntype_or_decl;
9387   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9388 
9389   if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9390       || POINTER_BOUNDS_MODE_P (mode))
9391     return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9392   else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9393     return function_value_ms_64 (orig_mode, mode, valtype);
9394   else if (TARGET_64BIT)
9395     return function_value_64 (orig_mode, mode, valtype);
9396   else
9397     return function_value_32 (orig_mode, mode, fntype, fn);
9398 }
9399 
9400 static rtx
9401 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9402 {
9403   machine_mode mode, orig_mode;
9404 
9405   orig_mode = TYPE_MODE (valtype);
9406   mode = type_natural_mode (valtype, NULL, true);
9407   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9408 }
9409 
9410 /*  Return an RTX representing a place where a function returns
9411     or recieves pointer bounds or NULL if no bounds are returned.
9412 
9413     VALTYPE is a data type of a value returned by the function.
9414 
9415     FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9416     or FUNCTION_TYPE of the function.
9417 
9418     If OUTGOING is false, return a place in which the caller will
9419     see the return value.  Otherwise, return a place where a
9420     function returns a value.  */
9421 
9422 static rtx
9423 ix86_function_value_bounds (const_tree valtype,
9424 			    const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9425 			    bool outgoing ATTRIBUTE_UNUSED)
9426 {
9427   rtx res = NULL_RTX;
9428 
9429   if (BOUNDED_TYPE_P (valtype))
9430     res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9431   else if (chkp_type_has_pointer (valtype))
9432     {
9433       bitmap slots;
9434       rtx bounds[2];
9435       bitmap_iterator bi;
9436       unsigned i, bnd_no = 0;
9437 
9438       bitmap_obstack_initialize (NULL);
9439       slots = BITMAP_ALLOC (NULL);
9440       chkp_find_bound_slots (valtype, slots);
9441 
9442       EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9443 	{
9444 	  rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9445 	  rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9446 	  gcc_assert (bnd_no < 2);
9447 	  bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9448 	}
9449 
9450       res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9451 
9452       BITMAP_FREE (slots);
9453       bitmap_obstack_release (NULL);
9454     }
9455   else
9456     res = NULL_RTX;
9457 
9458   return res;
9459 }
9460 
9461 /* Pointer function arguments and return values are promoted to
9462    word_mode for normal functions.  */
9463 
9464 static machine_mode
9465 ix86_promote_function_mode (const_tree type, machine_mode mode,
9466 			    int *punsignedp, const_tree fntype,
9467 			    int for_return)
9468 {
9469   if (cfun->machine->func_type == TYPE_NORMAL
9470       && type != NULL_TREE
9471       && POINTER_TYPE_P (type))
9472     {
9473       *punsignedp = POINTERS_EXTEND_UNSIGNED;
9474       return word_mode;
9475     }
9476   return default_promote_function_mode (type, mode, punsignedp, fntype,
9477 					for_return);
9478 }
9479 
9480 /* Return true if a structure, union or array with MODE containing FIELD
9481    should be accessed using BLKmode.  */
9482 
9483 static bool
9484 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9485 {
9486   /* Union with XFmode must be in BLKmode.  */
9487   return (mode == XFmode
9488 	  && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9489 	      || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9490 }
9491 
9492 rtx
9493 ix86_libcall_value (machine_mode mode)
9494 {
9495   return ix86_function_value_1 (NULL, NULL, mode, mode);
9496 }
9497 
9498 /* Return true iff type is returned in memory.  */
9499 
9500 static bool
9501 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9502 {
9503 #ifdef SUBTARGET_RETURN_IN_MEMORY
9504   return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9505 #else
9506   const machine_mode mode = type_natural_mode (type, NULL, true);
9507   HOST_WIDE_INT size;
9508 
9509   if (POINTER_BOUNDS_TYPE_P (type))
9510     return false;
9511 
9512   if (TARGET_64BIT)
9513     {
9514       if (ix86_function_type_abi (fntype) == MS_ABI)
9515 	{
9516 	  size = int_size_in_bytes (type);
9517 
9518 	  /* __m128 is returned in xmm0.  */
9519 	  if ((!type || VECTOR_INTEGER_TYPE_P (type)
9520 	       || INTEGRAL_TYPE_P (type)
9521 	       || VECTOR_FLOAT_TYPE_P (type))
9522 	      && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9523 	      && !COMPLEX_MODE_P (mode)
9524 	      && (GET_MODE_SIZE (mode) == 16 || size == 16))
9525 	    return false;
9526 
9527 	  /* Otherwise, the size must be exactly in [1248]. */
9528 	  return size != 1 && size != 2 && size != 4 && size != 8;
9529 	}
9530       else
9531 	{
9532 	  int needed_intregs, needed_sseregs;
9533 
9534 	  return examine_argument (mode, type, 1,
9535 				   &needed_intregs, &needed_sseregs);
9536 	}
9537     }
9538   else
9539     {
9540       size = int_size_in_bytes (type);
9541 
9542       /* Intel MCU psABI returns scalars and aggregates no larger than 8
9543 	 bytes in registers.  */
9544       if (TARGET_IAMCU)
9545 	return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9546 
9547       if (mode == BLKmode)
9548 	return true;
9549 
9550       if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9551 	return false;
9552 
9553       if (VECTOR_MODE_P (mode) || mode == TImode)
9554 	{
9555 	  /* User-created vectors small enough to fit in EAX.  */
9556 	  if (size < 8)
9557 	    return false;
9558 
9559 	  /* Unless ABI prescibes otherwise,
9560 	     MMX/3dNow values are returned in MM0 if available.  */
9561 
9562 	  if (size == 8)
9563 	    return TARGET_VECT8_RETURNS || !TARGET_MMX;
9564 
9565 	  /* SSE values are returned in XMM0 if available.  */
9566 	  if (size == 16)
9567 	    return !TARGET_SSE;
9568 
9569 	  /* AVX values are returned in YMM0 if available.  */
9570 	  if (size == 32)
9571 	    return !TARGET_AVX;
9572 
9573 	  /* AVX512F values are returned in ZMM0 if available.  */
9574 	  if (size == 64)
9575 	    return !TARGET_AVX512F;
9576 	}
9577 
9578       if (mode == XFmode)
9579 	return false;
9580 
9581       if (size > 12)
9582 	return true;
9583 
9584       /* OImode shouldn't be used directly.  */
9585       gcc_assert (mode != OImode);
9586 
9587       return false;
9588     }
9589 #endif
9590 }
9591 
9592 
9593 /* Create the va_list data type.  */
9594 
9595 static tree
9596 ix86_build_builtin_va_list_64 (void)
9597 {
9598   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9599 
9600   record = lang_hooks.types.make_type (RECORD_TYPE);
9601   type_decl = build_decl (BUILTINS_LOCATION,
9602 			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
9603 
9604   f_gpr = build_decl (BUILTINS_LOCATION,
9605 		      FIELD_DECL, get_identifier ("gp_offset"),
9606 		      unsigned_type_node);
9607   f_fpr = build_decl (BUILTINS_LOCATION,
9608 		      FIELD_DECL, get_identifier ("fp_offset"),
9609 		      unsigned_type_node);
9610   f_ovf = build_decl (BUILTINS_LOCATION,
9611 		      FIELD_DECL, get_identifier ("overflow_arg_area"),
9612 		      ptr_type_node);
9613   f_sav = build_decl (BUILTINS_LOCATION,
9614 		      FIELD_DECL, get_identifier ("reg_save_area"),
9615 		      ptr_type_node);
9616 
9617   va_list_gpr_counter_field = f_gpr;
9618   va_list_fpr_counter_field = f_fpr;
9619 
9620   DECL_FIELD_CONTEXT (f_gpr) = record;
9621   DECL_FIELD_CONTEXT (f_fpr) = record;
9622   DECL_FIELD_CONTEXT (f_ovf) = record;
9623   DECL_FIELD_CONTEXT (f_sav) = record;
9624 
9625   TYPE_STUB_DECL (record) = type_decl;
9626   TYPE_NAME (record) = type_decl;
9627   TYPE_FIELDS (record) = f_gpr;
9628   DECL_CHAIN (f_gpr) = f_fpr;
9629   DECL_CHAIN (f_fpr) = f_ovf;
9630   DECL_CHAIN (f_ovf) = f_sav;
9631 
9632   layout_type (record);
9633 
9634   TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
9635 					NULL_TREE, TYPE_ATTRIBUTES (record));
9636 
9637   /* The correct type is an array type of one element.  */
9638   return build_array_type (record, build_index_type (size_zero_node));
9639 }
9640 
9641 /* Setup the builtin va_list data type and for 64-bit the additional
9642    calling convention specific va_list data types.  */
9643 
9644 static tree
9645 ix86_build_builtin_va_list (void)
9646 {
9647   if (TARGET_64BIT)
9648     {
9649       /* Initialize ABI specific va_list builtin types.
9650 
9651 	 In lto1, we can encounter two va_list types:
9652 	 - one as a result of the type-merge across TUs, and
9653 	 - the one constructed here.
9654 	 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
9655 	 a type identity check in canonical_va_list_type based on
9656 	 TYPE_MAIN_VARIANT (which we used to have) will not work.
9657 	 Instead, we tag each va_list_type_node with its unique attribute, and
9658 	 look for the attribute in the type identity check in
9659 	 canonical_va_list_type.
9660 
9661 	 Tagging sysv_va_list_type_node directly with the attribute is
9662 	 problematic since it's a array of one record, which will degrade into a
9663 	 pointer to record when used as parameter (see build_va_arg comments for
9664 	 an example), dropping the attribute in the process.  So we tag the
9665 	 record instead.  */
9666 
9667       /* For SYSV_ABI we use an array of one record.  */
9668       sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
9669 
9670       /* For MS_ABI we use plain pointer to argument area.  */
9671       tree char_ptr_type = build_pointer_type (char_type_node);
9672       tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
9673 			     TYPE_ATTRIBUTES (char_ptr_type));
9674       ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
9675 
9676       return ((ix86_abi == MS_ABI)
9677 	      ? ms_va_list_type_node
9678 	      : sysv_va_list_type_node);
9679     }
9680   else
9681     {
9682       /* For i386 we use plain pointer to argument area.  */
9683       return build_pointer_type (char_type_node);
9684     }
9685 }
9686 
9687 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
9688 
9689 static void
9690 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9691 {
9692   rtx save_area, mem;
9693   alias_set_type set;
9694   int i, max;
9695 
9696   /* GPR size of varargs save area.  */
9697   if (cfun->va_list_gpr_size)
9698     ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9699   else
9700     ix86_varargs_gpr_size = 0;
9701 
9702   /* FPR size of varargs save area.  We don't need it if we don't pass
9703      anything in SSE registers.  */
9704   if (TARGET_SSE && cfun->va_list_fpr_size)
9705     ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9706   else
9707     ix86_varargs_fpr_size = 0;
9708 
9709   if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9710     return;
9711 
9712   save_area = frame_pointer_rtx;
9713   set = get_varargs_alias_set ();
9714 
9715   max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9716   if (max > X86_64_REGPARM_MAX)
9717     max = X86_64_REGPARM_MAX;
9718 
9719   for (i = cum->regno; i < max; i++)
9720     {
9721       mem = gen_rtx_MEM (word_mode,
9722 			 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9723       MEM_NOTRAP_P (mem) = 1;
9724       set_mem_alias_set (mem, set);
9725       emit_move_insn (mem,
9726 		      gen_rtx_REG (word_mode,
9727 				   x86_64_int_parameter_registers[i]));
9728     }
9729 
9730   if (ix86_varargs_fpr_size)
9731     {
9732       machine_mode smode;
9733       rtx_code_label *label;
9734       rtx test;
9735 
9736       /* Now emit code to save SSE registers.  The AX parameter contains number
9737 	 of SSE parameter registers used to call this function, though all we
9738 	 actually check here is the zero/non-zero status.  */
9739 
9740       label = gen_label_rtx ();
9741       test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9742       emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9743 				      label));
9744 
9745       /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9746 	 we used movdqa (i.e. TImode) instead?  Perhaps even better would
9747 	 be if we could determine the real mode of the data, via a hook
9748 	 into pass_stdarg.  Ignore all that for now.  */
9749       smode = V4SFmode;
9750       if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9751 	crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9752 
9753       max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9754       if (max > X86_64_SSE_REGPARM_MAX)
9755 	max = X86_64_SSE_REGPARM_MAX;
9756 
9757       for (i = cum->sse_regno; i < max; ++i)
9758 	{
9759 	  mem = plus_constant (Pmode, save_area,
9760 			       i * 16 + ix86_varargs_gpr_size);
9761 	  mem = gen_rtx_MEM (smode, mem);
9762 	  MEM_NOTRAP_P (mem) = 1;
9763 	  set_mem_alias_set (mem, set);
9764 	  set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9765 
9766 	  emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9767 	}
9768 
9769       emit_label (label);
9770     }
9771 }
9772 
9773 static void
9774 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9775 {
9776   alias_set_type set = get_varargs_alias_set ();
9777   int i;
9778 
9779   /* Reset to zero, as there might be a sysv vaarg used
9780      before.  */
9781   ix86_varargs_gpr_size = 0;
9782   ix86_varargs_fpr_size = 0;
9783 
9784   for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
9785     {
9786       rtx reg, mem;
9787 
9788       mem = gen_rtx_MEM (Pmode,
9789 			 plus_constant (Pmode, virtual_incoming_args_rtx,
9790 					i * UNITS_PER_WORD));
9791       MEM_NOTRAP_P (mem) = 1;
9792       set_mem_alias_set (mem, set);
9793 
9794       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
9795       emit_move_insn (mem, reg);
9796     }
9797 }
9798 
9799 static void
9800 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
9801 			     tree type, int *, int no_rtl)
9802 {
9803   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9804   CUMULATIVE_ARGS next_cum;
9805   tree fntype;
9806 
9807   /* This argument doesn't appear to be used anymore.  Which is good,
9808      because the old code here didn't suppress rtl generation.  */
9809   gcc_assert (!no_rtl);
9810 
9811   if (!TARGET_64BIT)
9812     return;
9813 
9814   fntype = TREE_TYPE (current_function_decl);
9815 
9816   /* For varargs, we do not want to skip the dummy va_dcl argument.
9817      For stdargs, we do want to skip the last named argument.  */
9818   next_cum = *cum;
9819   if (stdarg_p (fntype))
9820     ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9821 			       true);
9822 
9823   if (cum->call_abi == MS_ABI)
9824     setup_incoming_varargs_ms_64 (&next_cum);
9825   else
9826     setup_incoming_varargs_64 (&next_cum);
9827 }
9828 
9829 static void
9830 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
9831 				   machine_mode mode,
9832 				   tree type,
9833 				   int *pretend_size ATTRIBUTE_UNUSED,
9834 				   int no_rtl)
9835 {
9836   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9837   CUMULATIVE_ARGS next_cum;
9838   tree fntype;
9839   rtx save_area;
9840   int bnd_reg, i, max;
9841 
9842   gcc_assert (!no_rtl);
9843 
9844   /* Do nothing if we use plain pointer to argument area.  */
9845   if (!TARGET_64BIT || cum->call_abi == MS_ABI)
9846     return;
9847 
9848   fntype = TREE_TYPE (current_function_decl);
9849 
9850   /* For varargs, we do not want to skip the dummy va_dcl argument.
9851      For stdargs, we do want to skip the last named argument.  */
9852   next_cum = *cum;
9853   if (stdarg_p (fntype))
9854     ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9855 			       true);
9856   save_area = frame_pointer_rtx;
9857 
9858   max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9859   if (max > X86_64_REGPARM_MAX)
9860     max = X86_64_REGPARM_MAX;
9861 
9862   bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
9863   if (chkp_function_instrumented_p (current_function_decl))
9864     for (i = cum->regno; i < max; i++)
9865       {
9866 	rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
9867 	rtx ptr = gen_rtx_REG (Pmode,
9868 			       x86_64_int_parameter_registers[i]);
9869 	rtx bounds;
9870 
9871 	if (bnd_reg <= LAST_BND_REG)
9872 	  bounds = gen_rtx_REG (BNDmode, bnd_reg);
9873 	else
9874 	  {
9875 	    rtx ldx_addr =
9876 	      plus_constant (Pmode, arg_pointer_rtx,
9877 			     (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
9878 	    bounds = gen_reg_rtx (BNDmode);
9879 	    emit_insn (BNDmode == BND64mode
9880 		       ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
9881 		       : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9882 	  }
9883 
9884 	emit_insn (BNDmode == BND64mode
9885 		   ? gen_bnd64_stx (addr, ptr, bounds)
9886 		   : gen_bnd32_stx (addr, ptr, bounds));
9887 
9888 	bnd_reg++;
9889       }
9890 }
9891 
9892 
9893 /* Checks if TYPE is of kind va_list char *.  */
9894 
9895 static bool
9896 is_va_list_char_pointer (tree type)
9897 {
9898   tree canonic;
9899 
9900   /* For 32-bit it is always true.  */
9901   if (!TARGET_64BIT)
9902     return true;
9903   canonic = ix86_canonical_va_list_type (type);
9904   return (canonic == ms_va_list_type_node
9905           || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9906 }
9907 
9908 /* Implement va_start.  */
9909 
9910 static void
9911 ix86_va_start (tree valist, rtx nextarg)
9912 {
9913   HOST_WIDE_INT words, n_gpr, n_fpr;
9914   tree f_gpr, f_fpr, f_ovf, f_sav;
9915   tree gpr, fpr, ovf, sav, t;
9916   tree type;
9917   rtx ovf_rtx;
9918 
9919   if (flag_split_stack
9920       && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9921     {
9922       unsigned int scratch_regno;
9923 
9924       /* When we are splitting the stack, we can't refer to the stack
9925 	 arguments using internal_arg_pointer, because they may be on
9926 	 the old stack.  The split stack prologue will arrange to
9927 	 leave a pointer to the old stack arguments in a scratch
9928 	 register, which we here copy to a pseudo-register.  The split
9929 	 stack prologue can't set the pseudo-register directly because
9930 	 it (the prologue) runs before any registers have been saved.  */
9931 
9932       scratch_regno = split_stack_prologue_scratch_regno ();
9933       if (scratch_regno != INVALID_REGNUM)
9934 	{
9935 	  rtx reg;
9936 	  rtx_insn *seq;
9937 
9938 	  reg = gen_reg_rtx (Pmode);
9939 	  cfun->machine->split_stack_varargs_pointer = reg;
9940 
9941 	  start_sequence ();
9942 	  emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9943 	  seq = get_insns ();
9944 	  end_sequence ();
9945 
9946 	  push_topmost_sequence ();
9947 	  emit_insn_after (seq, entry_of_function ());
9948 	  pop_topmost_sequence ();
9949 	}
9950     }
9951 
9952   /* Only 64bit target needs something special.  */
9953   if (is_va_list_char_pointer (TREE_TYPE (valist)))
9954     {
9955       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9956 	std_expand_builtin_va_start (valist, nextarg);
9957       else
9958 	{
9959 	  rtx va_r, next;
9960 
9961 	  va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9962 	  next = expand_binop (ptr_mode, add_optab,
9963 			       cfun->machine->split_stack_varargs_pointer,
9964 			       crtl->args.arg_offset_rtx,
9965 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
9966 	  convert_move (va_r, next, 0);
9967 
9968 	  /* Store zero bounds for va_list.  */
9969 	  if (chkp_function_instrumented_p (current_function_decl))
9970 	    chkp_expand_bounds_reset_for_mem (valist,
9971 					      make_tree (TREE_TYPE (valist),
9972 							 next));
9973 
9974 	}
9975       return;
9976     }
9977 
9978   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9979   f_fpr = DECL_CHAIN (f_gpr);
9980   f_ovf = DECL_CHAIN (f_fpr);
9981   f_sav = DECL_CHAIN (f_ovf);
9982 
9983   valist = build_simple_mem_ref (valist);
9984   TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9985   /* The following should be folded into the MEM_REF offset.  */
9986   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9987 		f_gpr, NULL_TREE);
9988   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9989 		f_fpr, NULL_TREE);
9990   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9991 		f_ovf, NULL_TREE);
9992   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9993 		f_sav, NULL_TREE);
9994 
9995   /* Count number of gp and fp argument registers used.  */
9996   words = crtl->args.info.words;
9997   n_gpr = crtl->args.info.regno;
9998   n_fpr = crtl->args.info.sse_regno;
9999 
10000   if (cfun->va_list_gpr_size)
10001     {
10002       type = TREE_TYPE (gpr);
10003       t = build2 (MODIFY_EXPR, type,
10004 		  gpr, build_int_cst (type, n_gpr * 8));
10005       TREE_SIDE_EFFECTS (t) = 1;
10006       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10007     }
10008 
10009   if (TARGET_SSE && cfun->va_list_fpr_size)
10010     {
10011       type = TREE_TYPE (fpr);
10012       t = build2 (MODIFY_EXPR, type, fpr,
10013 		  build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10014       TREE_SIDE_EFFECTS (t) = 1;
10015       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10016     }
10017 
10018   /* Find the overflow area.  */
10019   type = TREE_TYPE (ovf);
10020   if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10021     ovf_rtx = crtl->args.internal_arg_pointer;
10022   else
10023     ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10024   t = make_tree (type, ovf_rtx);
10025   if (words != 0)
10026     t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10027 
10028   /* Store zero bounds for overflow area pointer.  */
10029   if (chkp_function_instrumented_p (current_function_decl))
10030     chkp_expand_bounds_reset_for_mem (ovf, t);
10031 
10032   t = build2 (MODIFY_EXPR, type, ovf, t);
10033   TREE_SIDE_EFFECTS (t) = 1;
10034   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10035 
10036   if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10037     {
10038       /* Find the register save area.
10039 	 Prologue of the function save it right above stack frame.  */
10040       type = TREE_TYPE (sav);
10041       t = make_tree (type, frame_pointer_rtx);
10042       if (!ix86_varargs_gpr_size)
10043 	t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10044 
10045       /* Store zero bounds for save area pointer.  */
10046       if (chkp_function_instrumented_p (current_function_decl))
10047 	chkp_expand_bounds_reset_for_mem (sav, t);
10048 
10049       t = build2 (MODIFY_EXPR, type, sav, t);
10050       TREE_SIDE_EFFECTS (t) = 1;
10051       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10052     }
10053 }
10054 
10055 /* Implement va_arg.  */
10056 
10057 static tree
10058 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10059 		      gimple_seq *post_p)
10060 {
10061   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10062   tree f_gpr, f_fpr, f_ovf, f_sav;
10063   tree gpr, fpr, ovf, sav, t;
10064   int size, rsize;
10065   tree lab_false, lab_over = NULL_TREE;
10066   tree addr, t2;
10067   rtx container;
10068   int indirect_p = 0;
10069   tree ptrtype;
10070   machine_mode nat_mode;
10071   unsigned int arg_boundary;
10072 
10073   /* Only 64bit target needs something special.  */
10074   if (is_va_list_char_pointer (TREE_TYPE (valist)))
10075     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10076 
10077   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10078   f_fpr = DECL_CHAIN (f_gpr);
10079   f_ovf = DECL_CHAIN (f_fpr);
10080   f_sav = DECL_CHAIN (f_ovf);
10081 
10082   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10083 		valist, f_gpr, NULL_TREE);
10084 
10085   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10086   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10087   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10088 
10089   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10090   if (indirect_p)
10091     type = build_pointer_type (type);
10092   size = arg_int_size_in_bytes (type);
10093   rsize = CEIL (size, UNITS_PER_WORD);
10094 
10095   nat_mode = type_natural_mode (type, NULL, false);
10096   switch (nat_mode)
10097     {
10098     case E_V8SFmode:
10099     case E_V8SImode:
10100     case E_V32QImode:
10101     case E_V16HImode:
10102     case E_V4DFmode:
10103     case E_V4DImode:
10104     case E_V16SFmode:
10105     case E_V16SImode:
10106     case E_V64QImode:
10107     case E_V32HImode:
10108     case E_V8DFmode:
10109     case E_V8DImode:
10110       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
10111       if (!TARGET_64BIT_MS_ABI)
10112 	{
10113 	  container = NULL;
10114 	  break;
10115 	}
10116       /* FALLTHRU */
10117 
10118     default:
10119       container = construct_container (nat_mode, TYPE_MODE (type),
10120 				       type, 0, X86_64_REGPARM_MAX,
10121 				       X86_64_SSE_REGPARM_MAX, intreg,
10122 				       0);
10123       break;
10124     }
10125 
10126   /* Pull the value out of the saved registers.  */
10127 
10128   addr = create_tmp_var (ptr_type_node, "addr");
10129 
10130   if (container)
10131     {
10132       int needed_intregs, needed_sseregs;
10133       bool need_temp;
10134       tree int_addr, sse_addr;
10135 
10136       lab_false = create_artificial_label (UNKNOWN_LOCATION);
10137       lab_over = create_artificial_label (UNKNOWN_LOCATION);
10138 
10139       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
10140 
10141       need_temp = (!REG_P (container)
10142 		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
10143 		       || TYPE_ALIGN (type) > 128));
10144 
10145       /* In case we are passing structure, verify that it is consecutive block
10146          on the register save area.  If not we need to do moves.  */
10147       if (!need_temp && !REG_P (container))
10148 	{
10149 	  /* Verify that all registers are strictly consecutive  */
10150 	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
10151 	    {
10152 	      int i;
10153 
10154 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10155 		{
10156 		  rtx slot = XVECEXP (container, 0, i);
10157 		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
10158 		      || INTVAL (XEXP (slot, 1)) != i * 16)
10159 		    need_temp = true;
10160 		}
10161 	    }
10162 	  else
10163 	    {
10164 	      int i;
10165 
10166 	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10167 		{
10168 		  rtx slot = XVECEXP (container, 0, i);
10169 		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10170 		      || INTVAL (XEXP (slot, 1)) != i * 8)
10171 		    need_temp = true;
10172 		}
10173 	    }
10174 	}
10175       if (!need_temp)
10176 	{
10177 	  int_addr = addr;
10178 	  sse_addr = addr;
10179 	}
10180       else
10181 	{
10182 	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
10183 	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10184 	}
10185 
10186       /* First ensure that we fit completely in registers.  */
10187       if (needed_intregs)
10188 	{
10189 	  t = build_int_cst (TREE_TYPE (gpr),
10190 			     (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10191 	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10192 	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10193 	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10194 	  gimplify_and_add (t, pre_p);
10195 	}
10196       if (needed_sseregs)
10197 	{
10198 	  t = build_int_cst (TREE_TYPE (fpr),
10199 			     (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10200 			     + X86_64_REGPARM_MAX * 8);
10201 	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10202 	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10203 	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10204 	  gimplify_and_add (t, pre_p);
10205 	}
10206 
10207       /* Compute index to start of area used for integer regs.  */
10208       if (needed_intregs)
10209 	{
10210 	  /* int_addr = gpr + sav; */
10211 	  t = fold_build_pointer_plus (sav, gpr);
10212 	  gimplify_assign (int_addr, t, pre_p);
10213 	}
10214       if (needed_sseregs)
10215 	{
10216 	  /* sse_addr = fpr + sav; */
10217 	  t = fold_build_pointer_plus (sav, fpr);
10218 	  gimplify_assign (sse_addr, t, pre_p);
10219 	}
10220       if (need_temp)
10221 	{
10222 	  int i, prev_size = 0;
10223 	  tree temp = create_tmp_var (type, "va_arg_tmp");
10224 
10225 	  /* addr = &temp; */
10226 	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10227 	  gimplify_assign (addr, t, pre_p);
10228 
10229 	  for (i = 0; i < XVECLEN (container, 0); i++)
10230 	    {
10231 	      rtx slot = XVECEXP (container, 0, i);
10232 	      rtx reg = XEXP (slot, 0);
10233 	      machine_mode mode = GET_MODE (reg);
10234 	      tree piece_type;
10235 	      tree addr_type;
10236 	      tree daddr_type;
10237 	      tree src_addr, src;
10238 	      int src_offset;
10239 	      tree dest_addr, dest;
10240 	      int cur_size = GET_MODE_SIZE (mode);
10241 
10242 	      gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10243 	      prev_size = INTVAL (XEXP (slot, 1));
10244 	      if (prev_size + cur_size > size)
10245 		{
10246 		  cur_size = size - prev_size;
10247 		  unsigned int nbits = cur_size * BITS_PER_UNIT;
10248 		  if (!int_mode_for_size (nbits, 1).exists (&mode))
10249 		    mode = QImode;
10250 		}
10251 	      piece_type = lang_hooks.types.type_for_mode (mode, 1);
10252 	      if (mode == GET_MODE (reg))
10253 		addr_type = build_pointer_type (piece_type);
10254 	      else
10255 		addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10256 							 true);
10257 	      daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10258 							true);
10259 
10260 	      if (SSE_REGNO_P (REGNO (reg)))
10261 		{
10262 		  src_addr = sse_addr;
10263 		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10264 		}
10265 	      else
10266 		{
10267 		  src_addr = int_addr;
10268 		  src_offset = REGNO (reg) * 8;
10269 		}
10270 	      src_addr = fold_convert (addr_type, src_addr);
10271 	      src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10272 
10273 	      dest_addr = fold_convert (daddr_type, addr);
10274 	      dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10275 	      if (cur_size == GET_MODE_SIZE (mode))
10276 		{
10277 		  src = build_va_arg_indirect_ref (src_addr);
10278 		  dest = build_va_arg_indirect_ref (dest_addr);
10279 
10280 		  gimplify_assign (dest, src, pre_p);
10281 		}
10282 	      else
10283 		{
10284 		  tree copy
10285 		    = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10286 				       3, dest_addr, src_addr,
10287 				       size_int (cur_size));
10288 		  gimplify_and_add (copy, pre_p);
10289 		}
10290 	      prev_size += cur_size;
10291 	    }
10292 	}
10293 
10294       if (needed_intregs)
10295 	{
10296 	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10297 		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10298 	  gimplify_assign (gpr, t, pre_p);
10299 	}
10300 
10301       if (needed_sseregs)
10302 	{
10303 	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10304 		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10305 	  gimplify_assign (unshare_expr (fpr), t, pre_p);
10306 	}
10307 
10308       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10309 
10310       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10311     }
10312 
10313   /* ... otherwise out of the overflow area.  */
10314 
10315   /* When we align parameter on stack for caller, if the parameter
10316      alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10317      aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
10318      here with caller.  */
10319   arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10320   if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10321     arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10322 
10323   /* Care for on-stack alignment if needed.  */
10324   if (arg_boundary <= 64 || size == 0)
10325     t = ovf;
10326  else
10327     {
10328       HOST_WIDE_INT align = arg_boundary / 8;
10329       t = fold_build_pointer_plus_hwi (ovf, align - 1);
10330       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10331 		  build_int_cst (TREE_TYPE (t), -align));
10332     }
10333 
10334   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10335   gimplify_assign (addr, t, pre_p);
10336 
10337   t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10338   gimplify_assign (unshare_expr (ovf), t, pre_p);
10339 
10340   if (container)
10341     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10342 
10343   ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10344   addr = fold_convert (ptrtype, addr);
10345 
10346   if (indirect_p)
10347     addr = build_va_arg_indirect_ref (addr);
10348   return build_va_arg_indirect_ref (addr);
10349 }
10350 
10351 /* Return true if OPNUM's MEM should be matched
10352    in movabs* patterns.  */
10353 
10354 bool
10355 ix86_check_movabs (rtx insn, int opnum)
10356 {
10357   rtx set, mem;
10358 
10359   set = PATTERN (insn);
10360   if (GET_CODE (set) == PARALLEL)
10361     set = XVECEXP (set, 0, 0);
10362   gcc_assert (GET_CODE (set) == SET);
10363   mem = XEXP (set, opnum);
10364   while (SUBREG_P (mem))
10365     mem = SUBREG_REG (mem);
10366   gcc_assert (MEM_P (mem));
10367   return volatile_ok || !MEM_VOLATILE_P (mem);
10368 }
10369 
10370 /* Return false if INSN contains a MEM with a non-default address space.  */
10371 bool
10372 ix86_check_no_addr_space (rtx insn)
10373 {
10374   subrtx_var_iterator::array_type array;
10375   FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10376     {
10377       rtx x = *iter;
10378       if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10379 	return false;
10380     }
10381   return true;
10382 }
10383 
10384 /* Initialize the table of extra 80387 mathematical constants.  */
10385 
10386 static void
10387 init_ext_80387_constants (void)
10388 {
10389   static const char * cst[5] =
10390   {
10391     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
10392     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
10393     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
10394     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
10395     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
10396   };
10397   int i;
10398 
10399   for (i = 0; i < 5; i++)
10400     {
10401       real_from_string (&ext_80387_constants_table[i], cst[i]);
10402       /* Ensure each constant is rounded to XFmode precision.  */
10403       real_convert (&ext_80387_constants_table[i],
10404 		    XFmode, &ext_80387_constants_table[i]);
10405     }
10406 
10407   ext_80387_constants_init = 1;
10408 }
10409 
10410 /* Return non-zero if the constant is something that
10411    can be loaded with a special instruction.  */
10412 
10413 int
10414 standard_80387_constant_p (rtx x)
10415 {
10416   machine_mode mode = GET_MODE (x);
10417 
10418   const REAL_VALUE_TYPE *r;
10419 
10420   if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10421     return -1;
10422 
10423   if (x == CONST0_RTX (mode))
10424     return 1;
10425   if (x == CONST1_RTX (mode))
10426     return 2;
10427 
10428   r = CONST_DOUBLE_REAL_VALUE (x);
10429 
10430   /* For XFmode constants, try to find a special 80387 instruction when
10431      optimizing for size or on those CPUs that benefit from them.  */
10432   if (mode == XFmode
10433       && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10434     {
10435       int i;
10436 
10437       if (! ext_80387_constants_init)
10438 	init_ext_80387_constants ();
10439 
10440       for (i = 0; i < 5; i++)
10441         if (real_identical (r, &ext_80387_constants_table[i]))
10442 	  return i + 3;
10443     }
10444 
10445   /* Load of the constant -0.0 or -1.0 will be split as
10446      fldz;fchs or fld1;fchs sequence.  */
10447   if (real_isnegzero (r))
10448     return 8;
10449   if (real_identical (r, &dconstm1))
10450     return 9;
10451 
10452   return 0;
10453 }
10454 
10455 /* Return the opcode of the special instruction to be used to load
10456    the constant X.  */
10457 
10458 const char *
10459 standard_80387_constant_opcode (rtx x)
10460 {
10461   switch (standard_80387_constant_p (x))
10462     {
10463     case 1:
10464       return "fldz";
10465     case 2:
10466       return "fld1";
10467     case 3:
10468       return "fldlg2";
10469     case 4:
10470       return "fldln2";
10471     case 5:
10472       return "fldl2e";
10473     case 6:
10474       return "fldl2t";
10475     case 7:
10476       return "fldpi";
10477     case 8:
10478     case 9:
10479       return "#";
10480     default:
10481       gcc_unreachable ();
10482     }
10483 }
10484 
10485 /* Return the CONST_DOUBLE representing the 80387 constant that is
10486    loaded by the specified special instruction.  The argument IDX
10487    matches the return value from standard_80387_constant_p.  */
10488 
10489 rtx
10490 standard_80387_constant_rtx (int idx)
10491 {
10492   int i;
10493 
10494   if (! ext_80387_constants_init)
10495     init_ext_80387_constants ();
10496 
10497   switch (idx)
10498     {
10499     case 3:
10500     case 4:
10501     case 5:
10502     case 6:
10503     case 7:
10504       i = idx - 3;
10505       break;
10506 
10507     default:
10508       gcc_unreachable ();
10509     }
10510 
10511   return const_double_from_real_value (ext_80387_constants_table[i],
10512 				       XFmode);
10513 }
10514 
10515 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
10516    in supported SSE/AVX vector mode.  */
10517 
10518 int
10519 standard_sse_constant_p (rtx x, machine_mode pred_mode)
10520 {
10521   machine_mode mode;
10522 
10523   if (!TARGET_SSE)
10524     return 0;
10525 
10526   mode = GET_MODE (x);
10527 
10528   if (x == const0_rtx || const0_operand (x, mode))
10529     return 1;
10530 
10531   if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10532     {
10533       /* VOIDmode integer constant, get mode from the predicate.  */
10534       if (mode == VOIDmode)
10535 	mode = pred_mode;
10536 
10537       switch (GET_MODE_SIZE (mode))
10538 	{
10539 	case 64:
10540 	  if (TARGET_AVX512F)
10541 	    return 2;
10542 	  break;
10543 	case 32:
10544 	  if (TARGET_AVX2)
10545 	    return 2;
10546 	  break;
10547 	case 16:
10548 	  if (TARGET_SSE2)
10549 	    return 2;
10550 	  break;
10551 	case 0:
10552 	  /* VOIDmode */
10553 	  gcc_unreachable ();
10554 	default:
10555 	  break;
10556 	}
10557     }
10558 
10559   return 0;
10560 }
10561 
10562 /* Return the opcode of the special instruction to be used to load
10563    the constant operands[1] into operands[0].  */
10564 
10565 const char *
10566 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
10567 {
10568   machine_mode mode;
10569   rtx x = operands[1];
10570 
10571   gcc_assert (TARGET_SSE);
10572 
10573   mode = GET_MODE (x);
10574 
10575   if (x == const0_rtx || const0_operand (x, mode))
10576     {
10577       switch (get_attr_mode (insn))
10578 	{
10579 	case MODE_TI:
10580 	  if (!EXT_REX_SSE_REG_P (operands[0]))
10581 	    return "%vpxor\t%0, %d0";
10582 	  /* FALLTHRU */
10583 	case MODE_XI:
10584 	case MODE_OI:
10585 	  if (EXT_REX_SSE_REG_P (operands[0]))
10586 	    return (TARGET_AVX512VL
10587 		    ? "vpxord\t%x0, %x0, %x0"
10588 		    : "vpxord\t%g0, %g0, %g0");
10589 	  return "vpxor\t%x0, %x0, %x0";
10590 
10591 	case MODE_V2DF:
10592 	  if (!EXT_REX_SSE_REG_P (operands[0]))
10593 	    return "%vxorpd\t%0, %d0";
10594 	  /* FALLTHRU */
10595 	case MODE_V8DF:
10596 	case MODE_V4DF:
10597 	  if (!EXT_REX_SSE_REG_P (operands[0]))
10598 	    return "vxorpd\t%x0, %x0, %x0";
10599 	  else if (TARGET_AVX512DQ)
10600 	    return (TARGET_AVX512VL
10601 		    ? "vxorpd\t%x0, %x0, %x0"
10602 		    : "vxorpd\t%g0, %g0, %g0");
10603 	  else
10604 	    return (TARGET_AVX512VL
10605 		    ? "vpxorq\t%x0, %x0, %x0"
10606 		    : "vpxorq\t%g0, %g0, %g0");
10607 
10608 	case MODE_V4SF:
10609 	  if (!EXT_REX_SSE_REG_P (operands[0]))
10610 	    return "%vxorps\t%0, %d0";
10611 	  /* FALLTHRU */
10612 	case MODE_V16SF:
10613 	case MODE_V8SF:
10614 	  if (!EXT_REX_SSE_REG_P (operands[0]))
10615 	    return "vxorps\t%x0, %x0, %x0";
10616 	  else if (TARGET_AVX512DQ)
10617 	    return (TARGET_AVX512VL
10618 		    ? "vxorps\t%x0, %x0, %x0"
10619 		    : "vxorps\t%g0, %g0, %g0");
10620 	  else
10621 	    return (TARGET_AVX512VL
10622 		    ? "vpxord\t%x0, %x0, %x0"
10623 		    : "vpxord\t%g0, %g0, %g0");
10624 
10625 	default:
10626 	  gcc_unreachable ();
10627 	}
10628     }
10629   else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
10630     {
10631       enum attr_mode insn_mode = get_attr_mode (insn);
10632 
10633       switch (insn_mode)
10634 	{
10635 	case MODE_XI:
10636 	case MODE_V8DF:
10637 	case MODE_V16SF:
10638 	  gcc_assert (TARGET_AVX512F);
10639 	  return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10640 
10641 	case MODE_OI:
10642 	case MODE_V4DF:
10643 	case MODE_V8SF:
10644 	  gcc_assert (TARGET_AVX2);
10645 	  /* FALLTHRU */
10646 	case MODE_TI:
10647 	case MODE_V2DF:
10648 	case MODE_V4SF:
10649 	  gcc_assert (TARGET_SSE2);
10650 	  if (!EXT_REX_SSE_REG_P (operands[0]))
10651 	    return (TARGET_AVX
10652 		    ? "vpcmpeqd\t%0, %0, %0"
10653 		    : "pcmpeqd\t%0, %0");
10654 	  else if (TARGET_AVX512VL)
10655 	    return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
10656 	  else
10657 	    return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10658 
10659 	default:
10660 	  gcc_unreachable ();
10661 	}
10662    }
10663 
10664   gcc_unreachable ();
10665 }
10666 
10667 /* Returns true if INSN can be transformed from a memory load
10668    to a supported FP constant load.  */
10669 
10670 bool
10671 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
10672 {
10673   rtx src = find_constant_src (insn);
10674 
10675   gcc_assert (REG_P (dst));
10676 
10677   if (src == NULL
10678       || (SSE_REGNO_P (REGNO (dst))
10679 	  && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
10680       || (STACK_REGNO_P (REGNO (dst))
10681 	   && standard_80387_constant_p (src) < 1))
10682     return false;
10683 
10684   return true;
10685 }
10686 
10687 /* Returns true if OP contains a symbol reference */
10688 
10689 bool
10690 symbolic_reference_mentioned_p (rtx op)
10691 {
10692   const char *fmt;
10693   int i;
10694 
10695   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10696     return true;
10697 
10698   fmt = GET_RTX_FORMAT (GET_CODE (op));
10699   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10700     {
10701       if (fmt[i] == 'E')
10702 	{
10703 	  int j;
10704 
10705 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10706 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10707 	      return true;
10708 	}
10709 
10710       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10711 	return true;
10712     }
10713 
10714   return false;
10715 }
10716 
10717 /* Return true if it is appropriate to emit `ret' instructions in the
10718    body of a function.  Do this only if the epilogue is simple, needing a
10719    couple of insns.  Prior to reloading, we can't tell how many registers
10720    must be saved, so return false then.  Return false if there is no frame
10721    marker to de-allocate.  */
10722 
10723 bool
10724 ix86_can_use_return_insn_p (void)
10725 {
10726   if (ix86_function_naked (current_function_decl))
10727     return false;
10728 
10729   /* Don't use `ret' instruction in interrupt handler.  */
10730   if (! reload_completed
10731       || frame_pointer_needed
10732       || cfun->machine->func_type != TYPE_NORMAL)
10733     return 0;
10734 
10735   /* Don't allow more than 32k pop, since that's all we can do
10736      with one instruction.  */
10737   if (crtl->args.pops_args && crtl->args.size >= 32768)
10738     return 0;
10739 
10740   struct ix86_frame &frame = cfun->machine->frame;
10741   return (frame.stack_pointer_offset == UNITS_PER_WORD
10742 	  && (frame.nregs + frame.nsseregs) == 0);
10743 }
10744 
10745 /* Value should be nonzero if functions must have frame pointers.
10746    Zero means the frame pointer need not be set up (and parms may
10747    be accessed via the stack pointer) in functions that seem suitable.  */
10748 
10749 static bool
10750 ix86_frame_pointer_required (void)
10751 {
10752   /* If we accessed previous frames, then the generated code expects
10753      to be able to access the saved ebp value in our frame.  */
10754   if (cfun->machine->accesses_prev_frame)
10755     return true;
10756 
10757   /* Several x86 os'es need a frame pointer for other reasons,
10758      usually pertaining to setjmp.  */
10759   if (SUBTARGET_FRAME_POINTER_REQUIRED)
10760     return true;
10761 
10762   /* For older 32-bit runtimes setjmp requires valid frame-pointer.  */
10763   if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10764     return true;
10765 
10766   /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10767      allocation is 4GB.  */
10768   if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10769     return true;
10770 
10771   /* SSE saves require frame-pointer when stack is misaligned.  */
10772   if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10773     return true;
10774 
10775   /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10776      turns off the frame pointer by default.  Turn it back on now if
10777      we've not got a leaf function.  */
10778   if (TARGET_OMIT_LEAF_FRAME_POINTER
10779       && (!crtl->is_leaf
10780 	  || ix86_current_function_calls_tls_descriptor))
10781     return true;
10782 
10783   if (crtl->profile && !flag_fentry)
10784     return true;
10785 
10786   return false;
10787 }
10788 
10789 /* Record that the current function accesses previous call frames.  */
10790 
10791 void
10792 ix86_setup_frame_addresses (void)
10793 {
10794   cfun->machine->accesses_prev_frame = 1;
10795 }
10796 
10797 #ifndef USE_HIDDEN_LINKONCE
10798 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10799 #  define USE_HIDDEN_LINKONCE 1
10800 # else
10801 #  define USE_HIDDEN_LINKONCE 0
10802 # endif
10803 #endif
10804 
10805 /* Label count for call and return thunks.  It is used to make unique
10806    labels in call and return thunks.  */
10807 static int indirectlabelno;
10808 
10809 /* True if call thunk function is needed.  */
10810 static bool indirect_thunk_needed = false;
10811 /* True if call thunk function with the BND prefix is needed.  */
10812 static bool indirect_thunk_bnd_needed = false;
10813 
10814 /* Bit masks of integer registers, which contain branch target, used
10815    by call thunk functions.  */
10816 static int indirect_thunks_used;
10817 /* Bit masks of integer registers, which contain branch target, used
10818    by call thunk functions with the BND prefix.  */
10819 static int indirect_thunks_bnd_used;
10820 
10821 /* True if return thunk function is needed.  */
10822 static bool indirect_return_needed = false;
10823 /* True if return thunk function with the BND prefix is needed.  */
10824 static bool indirect_return_bnd_needed = false;
10825 
10826 /* True if return thunk function via CX is needed.  */
10827 static bool indirect_return_via_cx;
10828 /* True if return thunk function via CX with the BND prefix is
10829    needed.  */
10830 static bool indirect_return_via_cx_bnd;
10831 
10832 #ifndef INDIRECT_LABEL
10833 # define INDIRECT_LABEL "LIND"
10834 #endif
10835 
10836 /* Indicate what prefix is needed for an indirect branch.  */
10837 enum indirect_thunk_prefix
10838 {
10839   indirect_thunk_prefix_none,
10840   indirect_thunk_prefix_bnd,
10841   indirect_thunk_prefix_nt
10842 };
10843 
10844 /* Return the prefix needed for an indirect branch INSN.  */
10845 
10846 enum indirect_thunk_prefix
10847 indirect_thunk_need_prefix (rtx_insn *insn)
10848 {
10849   enum indirect_thunk_prefix need_prefix;
10850   if (ix86_bnd_prefixed_insn_p (insn))
10851     need_prefix = indirect_thunk_prefix_bnd;
10852   else if ((cfun->machine->indirect_branch_type
10853 	    == indirect_branch_thunk_extern)
10854 	   && ix86_notrack_prefixed_insn_p (insn))
10855     {
10856       /* NOTRACK prefix is only used with external thunk so that it
10857 	 can be properly updated to support CET at run-time.  */
10858       need_prefix = indirect_thunk_prefix_nt;
10859     }
10860   else
10861     need_prefix = indirect_thunk_prefix_none;
10862   return need_prefix;
10863 }
10864 
10865 /* Fills in the label name that should be used for the indirect thunk.  */
10866 
10867 static void
10868 indirect_thunk_name (char name[32], unsigned int regno,
10869 		     enum indirect_thunk_prefix need_prefix,
10870 		     bool ret_p)
10871 {
10872   if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
10873     gcc_unreachable ();
10874 
10875   if (USE_HIDDEN_LINKONCE)
10876     {
10877       const char *prefix;
10878 
10879       if (need_prefix == indirect_thunk_prefix_bnd)
10880 	prefix = "_bnd";
10881       else if (need_prefix == indirect_thunk_prefix_nt
10882 	       && regno != INVALID_REGNUM)
10883 	{
10884 	  /* NOTRACK prefix is only used with external thunk via
10885 	     register so that NOTRACK prefix can be added to indirect
10886 	     branch via register to support CET at run-time.  */
10887 	  prefix = "_nt";
10888 	}
10889       else
10890 	prefix = "";
10891 
10892       const char *ret = ret_p ? "return" : "indirect";
10893 
10894       if (regno != INVALID_REGNUM)
10895 	{
10896 	  const char *reg_prefix;
10897 	  if (LEGACY_INT_REGNO_P (regno))
10898 	    reg_prefix = TARGET_64BIT ? "r" : "e";
10899 	  else
10900 	    reg_prefix = "";
10901 	  sprintf (name, "__x86_%s_thunk%s_%s%s",
10902 		   ret, prefix, reg_prefix, reg_names[regno]);
10903 	}
10904       else
10905 	sprintf (name, "__x86_%s_thunk%s", ret, prefix);
10906     }
10907   else
10908     {
10909       if (regno != INVALID_REGNUM)
10910 	{
10911 	  if (need_prefix == indirect_thunk_prefix_bnd)
10912 	    ASM_GENERATE_INTERNAL_LABEL (name, "LITBR", regno);
10913 	  else
10914 	    ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
10915 	}
10916       else
10917 	{
10918 	  if (ret_p)
10919 	    {
10920 	      if (need_prefix == indirect_thunk_prefix_bnd)
10921 		ASM_GENERATE_INTERNAL_LABEL (name, "LRTB", 0);
10922 	      else
10923 		ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
10924 	    }
10925 	  else
10926 	    {
10927 	      if (need_prefix == indirect_thunk_prefix_bnd)
10928 		ASM_GENERATE_INTERNAL_LABEL (name, "LITB", 0);
10929 	      else
10930 		ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
10931 	    }
10932 	}
10933     }
10934 }
10935 
10936 /* Output a call and return thunk for indirect branch.  If BND_P is
10937    true, the BND prefix is needed.   If REGNO != -1,  the function
10938    address is in REGNO and the call and return thunk looks like:
10939 
10940 	call	L2
10941    L1:
10942 	pause
10943 	lfence
10944 	jmp	L1
10945    L2:
10946 	mov	%REG, (%sp)
10947 	ret
10948 
10949    Otherwise, the function address is on the top of stack and the
10950    call and return thunk looks like:
10951 
10952 	call L2
10953   L1:
10954 	pause
10955 	lfence
10956 	jmp L1
10957   L2:
10958 	lea WORD_SIZE(%sp), %sp
10959 	ret
10960  */
10961 
10962 static void
10963 output_indirect_thunk (enum indirect_thunk_prefix need_prefix,
10964 		       unsigned int regno)
10965 {
10966   char indirectlabel1[32];
10967   char indirectlabel2[32];
10968 
10969   ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
10970 			       indirectlabelno++);
10971   ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
10972 			       indirectlabelno++);
10973 
10974   /* Call */
10975   if (need_prefix == indirect_thunk_prefix_bnd)
10976     fputs ("\tbnd call\t", asm_out_file);
10977   else
10978     fputs ("\tcall\t", asm_out_file);
10979   assemble_name_raw (asm_out_file, indirectlabel2);
10980   fputc ('\n', asm_out_file);
10981 
10982   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
10983 
10984   /* AMD and Intel CPUs prefer each a different instruction as loop filler.
10985      Usage of both pause + lfence is compromise solution.  */
10986   fprintf (asm_out_file, "\tpause\n\tlfence\n");
10987 
10988   /* Jump.  */
10989   fputs ("\tjmp\t", asm_out_file);
10990   assemble_name_raw (asm_out_file, indirectlabel1);
10991   fputc ('\n', asm_out_file);
10992 
10993   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
10994 
10995   if (regno != INVALID_REGNUM)
10996     {
10997       /* MOV.  */
10998       rtx xops[2];
10999       xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
11000       xops[1] = gen_rtx_REG (word_mode, regno);
11001       output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
11002     }
11003   else
11004     {
11005       /* LEA.  */
11006       rtx xops[2];
11007       xops[0] = stack_pointer_rtx;
11008       xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11009       output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
11010     }
11011 
11012   if (need_prefix == indirect_thunk_prefix_bnd)
11013     fputs ("\tbnd ret\n", asm_out_file);
11014   else
11015     fputs ("\tret\n", asm_out_file);
11016 }
11017 
11018 /* Output a funtion with a call and return thunk for indirect branch.
11019    If BND_P is true, the BND prefix is needed.  If REGNO != UNVALID_REGNUM,
11020    the function address is in REGNO.  Otherwise, the function address is
11021    on the top of stack.  Thunk is used for function return if RET_P is
11022    true.  */
11023 
11024 static void
11025 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
11026 				unsigned int regno, bool ret_p)
11027 {
11028   char name[32];
11029   tree decl;
11030 
11031   /* Create __x86_indirect_thunk/__x86_indirect_thunk_bnd.  */
11032   indirect_thunk_name (name, regno, need_prefix, ret_p);
11033   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11034 		     get_identifier (name),
11035 		     build_function_type_list (void_type_node, NULL_TREE));
11036   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11037 				   NULL_TREE, void_type_node);
11038   TREE_PUBLIC (decl) = 1;
11039   TREE_STATIC (decl) = 1;
11040   DECL_IGNORED_P (decl) = 1;
11041 
11042 #if TARGET_MACHO
11043   if (TARGET_MACHO)
11044     {
11045       switch_to_section (darwin_sections[picbase_thunk_section]);
11046       fputs ("\t.weak_definition\t", asm_out_file);
11047       assemble_name (asm_out_file, name);
11048       fputs ("\n\t.private_extern\t", asm_out_file);
11049       assemble_name (asm_out_file, name);
11050       putc ('\n', asm_out_file);
11051       ASM_OUTPUT_LABEL (asm_out_file, name);
11052       DECL_WEAK (decl) = 1;
11053     }
11054   else
11055 #endif
11056     if (USE_HIDDEN_LINKONCE)
11057       {
11058 	cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11059 
11060 	targetm.asm_out.unique_section (decl, 0);
11061 	switch_to_section (get_named_section (decl, NULL, 0));
11062 
11063 	targetm.asm_out.globalize_label (asm_out_file, name);
11064 	fputs ("\t.hidden\t", asm_out_file);
11065 	assemble_name (asm_out_file, name);
11066 	putc ('\n', asm_out_file);
11067 	ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11068       }
11069     else
11070       {
11071 	switch_to_section (text_section);
11072 	ASM_OUTPUT_LABEL (asm_out_file, name);
11073       }
11074 
11075   DECL_INITIAL (decl) = make_node (BLOCK);
11076   current_function_decl = decl;
11077   allocate_struct_function (decl, false);
11078   init_function_start (decl);
11079   /* We're about to hide the function body from callees of final_* by
11080      emitting it directly; tell them we're a thunk, if they care.  */
11081   cfun->is_thunk = true;
11082   first_function_block_is_cold = false;
11083   /* Make sure unwind info is emitted for the thunk if needed.  */
11084   final_start_function (emit_barrier (), asm_out_file, 1);
11085 
11086   output_indirect_thunk (need_prefix, regno);
11087 
11088   final_end_function ();
11089   init_insn_lengths ();
11090   free_after_compilation (cfun);
11091   set_cfun (NULL);
11092   current_function_decl = NULL;
11093 }
11094 
11095 static int pic_labels_used;
11096 
11097 /* Fills in the label name that should be used for a pc thunk for
11098    the given register.  */
11099 
11100 static void
11101 get_pc_thunk_name (char name[32], unsigned int regno)
11102 {
11103   gcc_assert (!TARGET_64BIT);
11104 
11105   if (USE_HIDDEN_LINKONCE)
11106     sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
11107   else
11108     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
11109 }
11110 
11111 
11112 /* This function generates code for -fpic that loads %ebx with
11113    the return address of the caller and then returns.  */
11114 
11115 static void
11116 ix86_code_end (void)
11117 {
11118   rtx xops[2];
11119   unsigned int regno;
11120 
11121   if (indirect_return_needed)
11122     output_indirect_thunk_function (indirect_thunk_prefix_none,
11123 				    INVALID_REGNUM, true);
11124   if (indirect_return_bnd_needed)
11125     output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11126 				    INVALID_REGNUM, true);
11127 
11128   if (indirect_return_via_cx)
11129     output_indirect_thunk_function (indirect_thunk_prefix_none,
11130 				    CX_REG, true);
11131   if (indirect_return_via_cx_bnd)
11132     output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11133 				    CX_REG, true);
11134 
11135   if (indirect_thunk_needed)
11136     output_indirect_thunk_function (indirect_thunk_prefix_none,
11137 				    INVALID_REGNUM, false);
11138   if (indirect_thunk_bnd_needed)
11139     output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11140 				    INVALID_REGNUM, false);
11141 
11142   for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
11143     {
11144       unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
11145       if ((indirect_thunks_used & (1 << i)))
11146 	output_indirect_thunk_function (indirect_thunk_prefix_none,
11147 					regno, false);
11148 
11149       if ((indirect_thunks_bnd_used & (1 << i)))
11150 	output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11151 					regno, false);
11152     }
11153 
11154   for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
11155     {
11156       char name[32];
11157       tree decl;
11158 
11159       if ((indirect_thunks_used & (1 << regno)))
11160 	output_indirect_thunk_function (indirect_thunk_prefix_none,
11161 					regno, false);
11162 
11163       if ((indirect_thunks_bnd_used & (1 << regno)))
11164 	output_indirect_thunk_function (indirect_thunk_prefix_bnd,
11165 					regno, false);
11166 
11167       if (!(pic_labels_used & (1 << regno)))
11168 	continue;
11169 
11170       get_pc_thunk_name (name, regno);
11171 
11172       decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11173 			 get_identifier (name),
11174 			 build_function_type_list (void_type_node, NULL_TREE));
11175       DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11176 				       NULL_TREE, void_type_node);
11177       TREE_PUBLIC (decl) = 1;
11178       TREE_STATIC (decl) = 1;
11179       DECL_IGNORED_P (decl) = 1;
11180 
11181 #if TARGET_MACHO
11182       if (TARGET_MACHO)
11183 	{
11184 	  switch_to_section (darwin_sections[picbase_thunk_section]);
11185 	  fputs ("\t.weak_definition\t", asm_out_file);
11186 	  assemble_name (asm_out_file, name);
11187 	  fputs ("\n\t.private_extern\t", asm_out_file);
11188 	  assemble_name (asm_out_file, name);
11189 	  putc ('\n', asm_out_file);
11190 	  ASM_OUTPUT_LABEL (asm_out_file, name);
11191 	  DECL_WEAK (decl) = 1;
11192 	}
11193       else
11194 #endif
11195       if (USE_HIDDEN_LINKONCE)
11196 	{
11197 	  cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11198 
11199 	  targetm.asm_out.unique_section (decl, 0);
11200 	  switch_to_section (get_named_section (decl, NULL, 0));
11201 
11202 	  targetm.asm_out.globalize_label (asm_out_file, name);
11203 	  fputs ("\t.hidden\t", asm_out_file);
11204 	  assemble_name (asm_out_file, name);
11205 	  putc ('\n', asm_out_file);
11206 	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11207 	}
11208       else
11209 	{
11210 	  switch_to_section (text_section);
11211 	  ASM_OUTPUT_LABEL (asm_out_file, name);
11212 	}
11213 
11214       DECL_INITIAL (decl) = make_node (BLOCK);
11215       current_function_decl = decl;
11216       allocate_struct_function (decl, false);
11217       init_function_start (decl);
11218       /* We're about to hide the function body from callees of final_* by
11219 	 emitting it directly; tell them we're a thunk, if they care.  */
11220       cfun->is_thunk = true;
11221       first_function_block_is_cold = false;
11222       /* Make sure unwind info is emitted for the thunk if needed.  */
11223       final_start_function (emit_barrier (), asm_out_file, 1);
11224 
11225       /* Pad stack IP move with 4 instructions (two NOPs count
11226 	 as one instruction).  */
11227       if (TARGET_PAD_SHORT_FUNCTION)
11228 	{
11229 	  int i = 8;
11230 
11231 	  while (i--)
11232 	    fputs ("\tnop\n", asm_out_file);
11233 	}
11234 
11235       xops[0] = gen_rtx_REG (Pmode, regno);
11236       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11237       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
11238       output_asm_insn ("%!ret", NULL);
11239       final_end_function ();
11240       init_insn_lengths ();
11241       free_after_compilation (cfun);
11242       set_cfun (NULL);
11243       current_function_decl = NULL;
11244     }
11245 
11246   if (flag_split_stack)
11247     file_end_indicate_split_stack ();
11248 }
11249 
11250 /* Emit code for the SET_GOT patterns.  */
11251 
11252 const char *
11253 output_set_got (rtx dest, rtx label)
11254 {
11255   rtx xops[3];
11256 
11257   xops[0] = dest;
11258 
11259   if (TARGET_VXWORKS_RTP && flag_pic)
11260     {
11261       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
11262       xops[2] = gen_rtx_MEM (Pmode,
11263 			     gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11264       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11265 
11266       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11267 	 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11268 	 an unadorned address.  */
11269       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11270       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11271       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11272       return "";
11273     }
11274 
11275   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11276 
11277   if (flag_pic)
11278     {
11279       char name[32];
11280       get_pc_thunk_name (name, REGNO (dest));
11281       pic_labels_used |= 1 << REGNO (dest);
11282 
11283       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11284       xops[2] = gen_rtx_MEM (QImode, xops[2]);
11285       output_asm_insn ("%!call\t%X2", xops);
11286 
11287 #if TARGET_MACHO
11288       /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11289          This is what will be referenced by the Mach-O PIC subsystem.  */
11290       if (machopic_should_output_picbase_label () || !label)
11291 	ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11292 
11293       /* When we are restoring the pic base at the site of a nonlocal label,
11294          and we decided to emit the pic base above, we will still output a
11295          local label used for calculating the correction offset (even though
11296          the offset will be 0 in that case).  */
11297       if (label)
11298         targetm.asm_out.internal_label (asm_out_file, "L",
11299 					   CODE_LABEL_NUMBER (label));
11300 #endif
11301     }
11302   else
11303     {
11304       if (TARGET_MACHO)
11305 	/* We don't need a pic base, we're not producing pic.  */
11306 	gcc_unreachable ();
11307 
11308       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11309       output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11310       targetm.asm_out.internal_label (asm_out_file, "L",
11311 				      CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11312     }
11313 
11314   if (!TARGET_MACHO)
11315     output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11316 
11317   return "";
11318 }
11319 
11320 /* Generate an "push" pattern for input ARG.  */
11321 
11322 static rtx
11323 gen_push (rtx arg)
11324 {
11325   struct machine_function *m = cfun->machine;
11326 
11327   if (m->fs.cfa_reg == stack_pointer_rtx)
11328     m->fs.cfa_offset += UNITS_PER_WORD;
11329   m->fs.sp_offset += UNITS_PER_WORD;
11330 
11331   if (REG_P (arg) && GET_MODE (arg) != word_mode)
11332     arg = gen_rtx_REG (word_mode, REGNO (arg));
11333 
11334   return gen_rtx_SET (gen_rtx_MEM (word_mode,
11335 				   gen_rtx_PRE_DEC (Pmode,
11336 						    stack_pointer_rtx)),
11337 		      arg);
11338 }
11339 
11340 /* Generate an "pop" pattern for input ARG.  */
11341 
11342 static rtx
11343 gen_pop (rtx arg)
11344 {
11345   if (REG_P (arg) && GET_MODE (arg) != word_mode)
11346     arg = gen_rtx_REG (word_mode, REGNO (arg));
11347 
11348   return gen_rtx_SET (arg,
11349 		      gen_rtx_MEM (word_mode,
11350 				   gen_rtx_POST_INC (Pmode,
11351 						     stack_pointer_rtx)));
11352 }
11353 
11354 /* Return >= 0 if there is an unused call-clobbered register available
11355    for the entire function.  */
11356 
11357 static unsigned int
11358 ix86_select_alt_pic_regnum (void)
11359 {
11360   if (ix86_use_pseudo_pic_reg ())
11361     return INVALID_REGNUM;
11362 
11363   if (crtl->is_leaf
11364       && !crtl->profile
11365       && !ix86_current_function_calls_tls_descriptor)
11366     {
11367       int i, drap;
11368       /* Can't use the same register for both PIC and DRAP.  */
11369       if (crtl->drap_reg)
11370 	drap = REGNO (crtl->drap_reg);
11371       else
11372 	drap = -1;
11373       for (i = 2; i >= 0; --i)
11374         if (i != drap && !df_regs_ever_live_p (i))
11375 	  return i;
11376     }
11377 
11378   return INVALID_REGNUM;
11379 }
11380 
11381 /* Return true if REGNO is used by the epilogue.  */
11382 
11383 bool
11384 ix86_epilogue_uses (int regno)
11385 {
11386   /* If there are no caller-saved registers, we preserve all registers,
11387      except for MMX and x87 registers which aren't supported when saving
11388      and restoring registers.  Don't explicitly save SP register since
11389      it is always preserved.  */
11390   return (epilogue_completed
11391 	  && cfun->machine->no_caller_saved_registers
11392 	  && !fixed_regs[regno]
11393 	  && !STACK_REGNO_P (regno)
11394 	  && !MMX_REGNO_P (regno));
11395 }
11396 
11397 /* Return nonzero if register REGNO can be used as a scratch register
11398    in peephole2.  */
11399 
11400 static bool
11401 ix86_hard_regno_scratch_ok (unsigned int regno)
11402 {
11403   /* If there are no caller-saved registers, we can't use any register
11404      as a scratch register after epilogue and use REGNO as scratch
11405      register only if it has been used before to avoid saving and
11406      restoring it.  */
11407   return (!cfun->machine->no_caller_saved_registers
11408 	  || (!epilogue_completed
11409 	      && df_regs_ever_live_p (regno)));
11410 }
11411 
11412 /* Return true if register class CL should be an additional allocno
11413    class.  */
11414 
11415 static bool
11416 ix86_additional_allocno_class_p (reg_class_t cl)
11417 {
11418   return cl == MOD4_SSE_REGS;
11419 }
11420 
11421 /* Return TRUE if we need to save REGNO.  */
11422 
11423 static bool
11424 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
11425 {
11426   /* If there are no caller-saved registers, we preserve all registers,
11427      except for MMX and x87 registers which aren't supported when saving
11428      and restoring registers.  Don't explicitly save SP register since
11429      it is always preserved.  */
11430   if (cfun->machine->no_caller_saved_registers)
11431     {
11432       /* Don't preserve registers used for function return value.  */
11433       rtx reg = crtl->return_rtx;
11434       if (reg)
11435 	{
11436 	  unsigned int i = REGNO (reg);
11437 	  unsigned int nregs = REG_NREGS (reg);
11438 	  while (nregs-- > 0)
11439 	    if ((i + nregs) == regno)
11440 	      return false;
11441 
11442 	  reg = crtl->return_bnd;
11443 	  if (reg)
11444 	    {
11445 	      i = REGNO (reg);
11446 	      nregs = REG_NREGS (reg);
11447 	      while (nregs-- > 0)
11448 		if ((i + nregs) == regno)
11449 		  return false;
11450 	    }
11451 	}
11452 
11453       return (df_regs_ever_live_p (regno)
11454 	      && !fixed_regs[regno]
11455 	      && !STACK_REGNO_P (regno)
11456 	      && !MMX_REGNO_P (regno)
11457 	      && (regno != HARD_FRAME_POINTER_REGNUM
11458 		  || !frame_pointer_needed));
11459     }
11460 
11461   if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11462       && pic_offset_table_rtx)
11463     {
11464       if (ix86_use_pseudo_pic_reg ())
11465 	{
11466 	  /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11467 	  _mcount in prologue.  */
11468 	  if (!TARGET_64BIT && flag_pic && crtl->profile)
11469 	    return true;
11470 	}
11471       else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11472 	       || crtl->profile
11473 	       || crtl->calls_eh_return
11474 	       || crtl->uses_const_pool
11475 	       || cfun->has_nonlocal_label)
11476         return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11477     }
11478 
11479   if (crtl->calls_eh_return && maybe_eh_return)
11480     {
11481       unsigned i;
11482       for (i = 0; ; i++)
11483 	{
11484 	  unsigned test = EH_RETURN_DATA_REGNO (i);
11485 	  if (test == INVALID_REGNUM)
11486 	    break;
11487 	  if (test == regno)
11488 	    return true;
11489 	}
11490     }
11491 
11492   if (ignore_outlined && cfun->machine->call_ms2sysv)
11493     {
11494       unsigned count = cfun->machine->call_ms2sysv_extra_regs
11495 		       + xlogue_layout::MIN_REGS;
11496       if (xlogue_layout::is_stub_managed_reg (regno, count))
11497 	return false;
11498     }
11499 
11500   if (crtl->drap_reg
11501       && regno == REGNO (crtl->drap_reg)
11502       && !cfun->machine->no_drap_save_restore)
11503     return true;
11504 
11505   return (df_regs_ever_live_p (regno)
11506 	  && !call_used_regs[regno]
11507 	  && !fixed_regs[regno]
11508 	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11509 }
11510 
11511 /* Return number of saved general prupose registers.  */
11512 
11513 static int
11514 ix86_nsaved_regs (void)
11515 {
11516   int nregs = 0;
11517   int regno;
11518 
11519   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11520     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11521       nregs ++;
11522   return nregs;
11523 }
11524 
11525 /* Return number of saved SSE registers.  */
11526 
11527 static int
11528 ix86_nsaved_sseregs (void)
11529 {
11530   int nregs = 0;
11531   int regno;
11532 
11533   if (!TARGET_64BIT_MS_ABI)
11534     return 0;
11535   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11536     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
11537       nregs ++;
11538   return nregs;
11539 }
11540 
11541 /* Given FROM and TO register numbers, say whether this elimination is
11542    allowed.  If stack alignment is needed, we can only replace argument
11543    pointer with hard frame pointer, or replace frame pointer with stack
11544    pointer.  Otherwise, frame pointer elimination is automatically
11545    handled and all other eliminations are valid.  */
11546 
11547 static bool
11548 ix86_can_eliminate (const int from, const int to)
11549 {
11550   if (stack_realign_fp)
11551     return ((from == ARG_POINTER_REGNUM
11552 	     && to == HARD_FRAME_POINTER_REGNUM)
11553 	    || (from == FRAME_POINTER_REGNUM
11554 		&& to == STACK_POINTER_REGNUM));
11555   else
11556     return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11557 }
11558 
11559 /* Return the offset between two registers, one to be eliminated, and the other
11560    its replacement, at the start of a routine.  */
11561 
11562 HOST_WIDE_INT
11563 ix86_initial_elimination_offset (int from, int to)
11564 {
11565   struct ix86_frame &frame = cfun->machine->frame;
11566 
11567   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11568     return frame.hard_frame_pointer_offset;
11569   else if (from == FRAME_POINTER_REGNUM
11570 	   && to == HARD_FRAME_POINTER_REGNUM)
11571     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11572   else
11573     {
11574       gcc_assert (to == STACK_POINTER_REGNUM);
11575 
11576       if (from == ARG_POINTER_REGNUM)
11577 	return frame.stack_pointer_offset;
11578 
11579       gcc_assert (from == FRAME_POINTER_REGNUM);
11580       return frame.stack_pointer_offset - frame.frame_pointer_offset;
11581     }
11582 }
11583 
11584 /* In a dynamically-aligned function, we can't know the offset from
11585    stack pointer to frame pointer, so we must ensure that setjmp
11586    eliminates fp against the hard fp (%ebp) rather than trying to
11587    index from %esp up to the top of the frame across a gap that is
11588    of unknown (at compile-time) size.  */
11589 static rtx
11590 ix86_builtin_setjmp_frame_value (void)
11591 {
11592   return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11593 }
11594 
11595 /* Emits a warning for unsupported msabi to sysv pro/epilogues.  */
11596 static void warn_once_call_ms2sysv_xlogues (const char *feature)
11597 {
11598   static bool warned_once = false;
11599   if (!warned_once)
11600     {
11601       warning (0, "-mcall-ms2sysv-xlogues is not compatible with %s",
11602 	       feature);
11603       warned_once = true;
11604     }
11605 }
11606 
11607 /* Return the probing interval for -fstack-clash-protection.  */
11608 
11609 static HOST_WIDE_INT
11610 get_probe_interval (void)
11611 {
11612   if (flag_stack_clash_protection)
11613     return (HOST_WIDE_INT_1U
11614 	    << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
11615   else
11616     return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
11617 }
11618 
11619 /* When using -fsplit-stack, the allocation routines set a field in
11620    the TCB to the bottom of the stack plus this much space, measured
11621    in bytes.  */
11622 
11623 #define SPLIT_STACK_AVAILABLE 256
11624 
11625 /* Fill structure ix86_frame about frame of currently computed function.  */
11626 
11627 static void
11628 ix86_compute_frame_layout (void)
11629 {
11630   struct ix86_frame *frame = &cfun->machine->frame;
11631   struct machine_function *m = cfun->machine;
11632   unsigned HOST_WIDE_INT stack_alignment_needed;
11633   HOST_WIDE_INT offset;
11634   unsigned HOST_WIDE_INT preferred_alignment;
11635   HOST_WIDE_INT size = get_frame_size ();
11636   HOST_WIDE_INT to_allocate;
11637 
11638   /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
11639    * ms_abi functions that call a sysv function.  We now need to prune away
11640    * cases where it should be disabled.  */
11641   if (TARGET_64BIT && m->call_ms2sysv)
11642     {
11643       gcc_assert (TARGET_64BIT_MS_ABI);
11644       gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
11645       gcc_assert (!TARGET_SEH);
11646       gcc_assert (TARGET_SSE);
11647       gcc_assert (!ix86_using_red_zone ());
11648 
11649       if (crtl->calls_eh_return)
11650 	{
11651 	  gcc_assert (!reload_completed);
11652 	  m->call_ms2sysv = false;
11653 	  warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
11654 	}
11655 
11656       else if (ix86_static_chain_on_stack)
11657 	{
11658 	  gcc_assert (!reload_completed);
11659 	  m->call_ms2sysv = false;
11660 	  warn_once_call_ms2sysv_xlogues ("static call chains");
11661 	}
11662 
11663       /* Finally, compute which registers the stub will manage.  */
11664       else
11665 	{
11666 	  unsigned count = xlogue_layout::count_stub_managed_regs ();
11667 	  m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
11668 	  m->call_ms2sysv_pad_in = 0;
11669 	}
11670     }
11671 
11672   frame->nregs = ix86_nsaved_regs ();
11673   frame->nsseregs = ix86_nsaved_sseregs ();
11674 
11675   /* 64-bit MS ABI seem to require stack alignment to be always 16,
11676      except for function prologues, leaf functions and when the defult
11677      incoming stack boundary is overriden at command line or via
11678      force_align_arg_pointer attribute.  */
11679   if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11680       && (!crtl->is_leaf || cfun->calls_alloca != 0
11681 	  || ix86_current_function_calls_tls_descriptor
11682 	  || ix86_incoming_stack_boundary < 128))
11683     {
11684       crtl->preferred_stack_boundary = 128;
11685       crtl->stack_alignment_needed = 128;
11686     }
11687 
11688   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11689   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11690 
11691   gcc_assert (!size || stack_alignment_needed);
11692   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11693   gcc_assert (preferred_alignment <= stack_alignment_needed);
11694 
11695   /* The only ABI saving SSE regs should be 64-bit ms_abi.  */
11696   gcc_assert (TARGET_64BIT || !frame->nsseregs);
11697   if (TARGET_64BIT && m->call_ms2sysv)
11698     {
11699       gcc_assert (stack_alignment_needed >= 16);
11700       gcc_assert (!frame->nsseregs);
11701     }
11702 
11703   /* For SEH we have to limit the amount of code movement into the prologue.
11704      At present we do this via a BLOCKAGE, at which point there's very little
11705      scheduling that can be done, which means that there's very little point
11706      in doing anything except PUSHs.  */
11707   if (TARGET_SEH)
11708     m->use_fast_prologue_epilogue = false;
11709   else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
11710     {
11711       int count = frame->nregs;
11712       struct cgraph_node *node = cgraph_node::get (current_function_decl);
11713 
11714       /* The fast prologue uses move instead of push to save registers.  This
11715          is significantly longer, but also executes faster as modern hardware
11716          can execute the moves in parallel, but can't do that for push/pop.
11717 
11718 	 Be careful about choosing what prologue to emit:  When function takes
11719 	 many instructions to execute we may use slow version as well as in
11720 	 case function is known to be outside hot spot (this is known with
11721 	 feedback only).  Weight the size of function by number of registers
11722 	 to save as it is cheap to use one or two push instructions but very
11723 	 slow to use many of them.  */
11724       if (count)
11725 	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11726       if (node->frequency < NODE_FREQUENCY_NORMAL
11727 	  || (flag_branch_probabilities
11728 	      && node->frequency < NODE_FREQUENCY_HOT))
11729 	m->use_fast_prologue_epilogue = false;
11730       else
11731 	m->use_fast_prologue_epilogue
11732 	   = !expensive_function_p (count);
11733     }
11734 
11735   frame->save_regs_using_mov
11736     = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
11737        /* If static stack checking is enabled and done with probes,
11738 	  the registers need to be saved before allocating the frame.  */
11739        && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11740 
11741   /* Skip return address and error code in exception handler.  */
11742   offset = INCOMING_FRAME_SP_OFFSET;
11743 
11744   /* Skip pushed static chain.  */
11745   if (ix86_static_chain_on_stack)
11746     offset += UNITS_PER_WORD;
11747 
11748   /* Skip saved base pointer.  */
11749   if (frame_pointer_needed)
11750     offset += UNITS_PER_WORD;
11751   frame->hfp_save_offset = offset;
11752 
11753   /* The traditional frame pointer location is at the top of the frame.  */
11754   frame->hard_frame_pointer_offset = offset;
11755 
11756   /* Register save area */
11757   offset += frame->nregs * UNITS_PER_WORD;
11758   frame->reg_save_offset = offset;
11759 
11760   /* On SEH target, registers are pushed just before the frame pointer
11761      location.  */
11762   if (TARGET_SEH)
11763     frame->hard_frame_pointer_offset = offset;
11764 
11765   /* Calculate the size of the va-arg area (not including padding, if any).  */
11766   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11767 
11768   /* Also adjust stack_realign_offset for the largest alignment of
11769      stack slot actually used.  */
11770   if (stack_realign_fp
11771       || (cfun->machine->max_used_stack_alignment != 0
11772 	  && (offset % cfun->machine->max_used_stack_alignment) != 0))
11773     {
11774       /* We may need a 16-byte aligned stack for the remainder of the
11775 	 register save area, but the stack frame for the local function
11776 	 may require a greater alignment if using AVX/2/512.  In order
11777 	 to avoid wasting space, we first calculate the space needed for
11778 	 the rest of the register saves, add that to the stack pointer,
11779 	 and then realign the stack to the boundary of the start of the
11780 	 frame for the local function.  */
11781       HOST_WIDE_INT space_needed = 0;
11782       HOST_WIDE_INT sse_reg_space_needed = 0;
11783 
11784       if (TARGET_64BIT)
11785 	{
11786 	  if (m->call_ms2sysv)
11787 	    {
11788 	      m->call_ms2sysv_pad_in = 0;
11789 	      space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
11790 	    }
11791 
11792 	  else if (frame->nsseregs)
11793 	    /* The only ABI that has saved SSE registers (Win64) also has a
11794 	       16-byte aligned default stack.  However, many programs violate
11795 	       the ABI, and Wine64 forces stack realignment to compensate.  */
11796 	    space_needed = frame->nsseregs * 16;
11797 
11798 	  sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
11799 
11800 	  /* 64-bit frame->va_arg_size should always be a multiple of 16, but
11801 	     rounding to be pedantic.  */
11802 	  space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
11803 	}
11804       else
11805 	space_needed = frame->va_arg_size;
11806 
11807       /* Record the allocation size required prior to the realignment AND.  */
11808       frame->stack_realign_allocate = space_needed;
11809 
11810       /* The re-aligned stack starts at frame->stack_realign_offset.  Values
11811 	 before this point are not directly comparable with values below
11812 	 this point.  Use sp_valid_at to determine if the stack pointer is
11813 	 valid for a given offset, fp_valid_at for the frame pointer, or
11814 	 choose_baseaddr to have a base register chosen for you.
11815 
11816 	 Note that the result of (frame->stack_realign_offset
11817 	 & (stack_alignment_needed - 1)) may not equal zero.  */
11818       offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
11819       frame->stack_realign_offset = offset - space_needed;
11820       frame->sse_reg_save_offset = frame->stack_realign_offset
11821 							+ sse_reg_space_needed;
11822     }
11823   else
11824     {
11825       frame->stack_realign_offset = offset;
11826 
11827       if (TARGET_64BIT && m->call_ms2sysv)
11828 	{
11829 	  m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
11830 	  offset += xlogue_layout::get_instance ().get_stack_space_used ();
11831 	}
11832 
11833       /* Align and set SSE register save area.  */
11834       else if (frame->nsseregs)
11835 	{
11836 	  /* If the incoming stack boundary is at least 16 bytes, or DRAP is
11837 	     required and the DRAP re-alignment boundary is at least 16 bytes,
11838 	     then we want the SSE register save area properly aligned.  */
11839 	  if (ix86_incoming_stack_boundary >= 128
11840 		  || (stack_realign_drap && stack_alignment_needed >= 16))
11841 	    offset = ROUND_UP (offset, 16);
11842 	  offset += frame->nsseregs * 16;
11843 	}
11844       frame->sse_reg_save_offset = offset;
11845       offset += frame->va_arg_size;
11846     }
11847 
11848   /* Align start of frame for local function.  When a function call
11849      is removed, it may become a leaf function.  But if argument may
11850      be passed on stack, we need to align the stack when there is no
11851      tail call.  */
11852   if (m->call_ms2sysv
11853       || frame->va_arg_size != 0
11854       || size != 0
11855       || !crtl->is_leaf
11856       || (!crtl->tail_call_emit
11857 	  && cfun->machine->outgoing_args_on_stack)
11858       || cfun->calls_alloca
11859       || ix86_current_function_calls_tls_descriptor)
11860     offset = ROUND_UP (offset, stack_alignment_needed);
11861 
11862   /* Frame pointer points here.  */
11863   frame->frame_pointer_offset = offset;
11864 
11865   offset += size;
11866 
11867   /* Add outgoing arguments area.  Can be skipped if we eliminated
11868      all the function calls as dead code.
11869      Skipping is however impossible when function calls alloca.  Alloca
11870      expander assumes that last crtl->outgoing_args_size
11871      of stack frame are unused.  */
11872   if (ACCUMULATE_OUTGOING_ARGS
11873       && (!crtl->is_leaf || cfun->calls_alloca
11874 	  || ix86_current_function_calls_tls_descriptor))
11875     {
11876       offset += crtl->outgoing_args_size;
11877       frame->outgoing_arguments_size = crtl->outgoing_args_size;
11878     }
11879   else
11880     frame->outgoing_arguments_size = 0;
11881 
11882   /* Align stack boundary.  Only needed if we're calling another function
11883      or using alloca.  */
11884   if (!crtl->is_leaf || cfun->calls_alloca
11885       || ix86_current_function_calls_tls_descriptor)
11886     offset = ROUND_UP (offset, preferred_alignment);
11887 
11888   /* We've reached end of stack frame.  */
11889   frame->stack_pointer_offset = offset;
11890 
11891   /* Size prologue needs to allocate.  */
11892   to_allocate = offset - frame->sse_reg_save_offset;
11893 
11894   if ((!to_allocate && frame->nregs <= 1)
11895       || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
11896       /* If stack clash probing needs a loop, then it needs a
11897 	 scratch register.  But the returned register is only guaranteed
11898 	 to be safe to use after register saves are complete.  So if
11899 	 stack clash protections are enabled and the allocated frame is
11900 	 larger than the probe interval, then use pushes to save
11901 	 callee saved registers.  */
11902       || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
11903     frame->save_regs_using_mov = false;
11904 
11905   if (ix86_using_red_zone ()
11906       && crtl->sp_is_unchanging
11907       && crtl->is_leaf
11908       && !ix86_pc_thunk_call_expanded
11909       && !ix86_current_function_calls_tls_descriptor)
11910     {
11911       frame->red_zone_size = to_allocate;
11912       if (frame->save_regs_using_mov)
11913 	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11914       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11915 	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11916     }
11917   else
11918     frame->red_zone_size = 0;
11919   frame->stack_pointer_offset -= frame->red_zone_size;
11920 
11921   /* The SEH frame pointer location is near the bottom of the frame.
11922      This is enforced by the fact that the difference between the
11923      stack pointer and the frame pointer is limited to 240 bytes in
11924      the unwind data structure.  */
11925   if (TARGET_SEH)
11926     {
11927       HOST_WIDE_INT diff;
11928 
11929       /* If we can leave the frame pointer where it is, do so.  Also, returns
11930 	 the establisher frame for __builtin_frame_address (0).  */
11931       diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11932       if (diff <= SEH_MAX_FRAME_SIZE
11933 	  && (diff > 240 || (diff & 15) != 0)
11934 	  && !crtl->accesses_prior_frames)
11935 	{
11936 	  /* Ideally we'd determine what portion of the local stack frame
11937 	     (within the constraint of the lowest 240) is most heavily used.
11938 	     But without that complication, simply bias the frame pointer
11939 	     by 128 bytes so as to maximize the amount of the local stack
11940 	     frame that is addressable with 8-bit offsets.  */
11941 	  frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11942 	}
11943     }
11944 }
11945 
11946 /* This is semi-inlined memory_address_length, but simplified
11947    since we know that we're always dealing with reg+offset, and
11948    to avoid having to create and discard all that rtl.  */
11949 
11950 static inline int
11951 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11952 {
11953   int len = 4;
11954 
11955   if (offset == 0)
11956     {
11957       /* EBP and R13 cannot be encoded without an offset.  */
11958       len = (regno == BP_REG || regno == R13_REG);
11959     }
11960   else if (IN_RANGE (offset, -128, 127))
11961     len = 1;
11962 
11963   /* ESP and R12 must be encoded with a SIB byte.  */
11964   if (regno == SP_REG || regno == R12_REG)
11965     len++;
11966 
11967   return len;
11968 }
11969 
11970 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
11971    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
11972 
11973 static bool
11974 sp_valid_at (HOST_WIDE_INT cfa_offset)
11975 {
11976   const struct machine_frame_state &fs = cfun->machine->fs;
11977   if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
11978     {
11979       /* Validate that the cfa_offset isn't in a "no-man's land".  */
11980       gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
11981       return false;
11982     }
11983   return fs.sp_valid;
11984 }
11985 
11986 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
11987    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
11988 
11989 static inline bool
11990 fp_valid_at (HOST_WIDE_INT cfa_offset)
11991 {
11992   const struct machine_frame_state &fs = cfun->machine->fs;
11993   if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
11994     {
11995       /* Validate that the cfa_offset isn't in a "no-man's land".  */
11996       gcc_assert (cfa_offset >= fs.sp_realigned_offset);
11997       return false;
11998     }
11999   return fs.fp_valid;
12000 }
12001 
12002 /* Choose a base register based upon alignment requested, speed and/or
12003    size.  */
12004 
12005 static void
12006 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
12007 		HOST_WIDE_INT &base_offset,
12008 		unsigned int align_reqested, unsigned int *align)
12009 {
12010   const struct machine_function *m = cfun->machine;
12011   unsigned int hfp_align;
12012   unsigned int drap_align;
12013   unsigned int sp_align;
12014   bool hfp_ok  = fp_valid_at (cfa_offset);
12015   bool drap_ok = m->fs.drap_valid;
12016   bool sp_ok   = sp_valid_at (cfa_offset);
12017 
12018   hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
12019 
12020   /* Filter out any registers that don't meet the requested alignment
12021      criteria.  */
12022   if (align_reqested)
12023     {
12024       if (m->fs.realigned)
12025 	hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
12026       /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
12027 	 notes (which we would need to use a realigned stack pointer),
12028 	 so disable on SEH targets.  */
12029       else if (m->fs.sp_realigned)
12030 	sp_align = crtl->stack_alignment_needed;
12031 
12032       hfp_ok = hfp_ok && hfp_align >= align_reqested;
12033       drap_ok = drap_ok && drap_align >= align_reqested;
12034       sp_ok = sp_ok && sp_align >= align_reqested;
12035     }
12036 
12037   if (m->use_fast_prologue_epilogue)
12038     {
12039       /* Choose the base register most likely to allow the most scheduling
12040          opportunities.  Generally FP is valid throughout the function,
12041          while DRAP must be reloaded within the epilogue.  But choose either
12042          over the SP due to increased encoding size.  */
12043 
12044       if (hfp_ok)
12045 	{
12046 	  base_reg = hard_frame_pointer_rtx;
12047 	  base_offset = m->fs.fp_offset - cfa_offset;
12048 	}
12049       else if (drap_ok)
12050 	{
12051 	  base_reg = crtl->drap_reg;
12052 	  base_offset = 0 - cfa_offset;
12053 	}
12054       else if (sp_ok)
12055 	{
12056 	  base_reg = stack_pointer_rtx;
12057 	  base_offset = m->fs.sp_offset - cfa_offset;
12058 	}
12059     }
12060   else
12061     {
12062       HOST_WIDE_INT toffset;
12063       int len = 16, tlen;
12064 
12065       /* Choose the base register with the smallest address encoding.
12066          With a tie, choose FP > DRAP > SP.  */
12067       if (sp_ok)
12068 	{
12069 	  base_reg = stack_pointer_rtx;
12070 	  base_offset = m->fs.sp_offset - cfa_offset;
12071           len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
12072 	}
12073       if (drap_ok)
12074 	{
12075 	  toffset = 0 - cfa_offset;
12076 	  tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
12077 	  if (tlen <= len)
12078 	    {
12079 	      base_reg = crtl->drap_reg;
12080 	      base_offset = toffset;
12081 	      len = tlen;
12082 	    }
12083 	}
12084       if (hfp_ok)
12085 	{
12086 	  toffset = m->fs.fp_offset - cfa_offset;
12087 	  tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
12088 	  if (tlen <= len)
12089 	    {
12090 	      base_reg = hard_frame_pointer_rtx;
12091 	      base_offset = toffset;
12092 	      len = tlen;
12093 	    }
12094 	}
12095     }
12096 
12097     /* Set the align return value.  */
12098     if (align)
12099       {
12100 	if (base_reg == stack_pointer_rtx)
12101 	  *align = sp_align;
12102 	else if (base_reg == crtl->drap_reg)
12103 	  *align = drap_align;
12104 	else if (base_reg == hard_frame_pointer_rtx)
12105 	  *align = hfp_align;
12106       }
12107 }
12108 
12109 /* Return an RTX that points to CFA_OFFSET within the stack frame and
12110    the alignment of address.  If ALIGN is non-null, it should point to
12111    an alignment value (in bits) that is preferred or zero and will
12112    recieve the alignment of the base register that was selected,
12113    irrespective of rather or not CFA_OFFSET is a multiple of that
12114    alignment value.  If it is possible for the base register offset to be
12115    non-immediate then SCRATCH_REGNO should specify a scratch register to
12116    use.
12117 
12118    The valid base registers are taken from CFUN->MACHINE->FS.  */
12119 
12120 static rtx
12121 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
12122 		 unsigned int scratch_regno = INVALID_REGNUM)
12123 {
12124   rtx base_reg = NULL;
12125   HOST_WIDE_INT base_offset = 0;
12126 
12127   /* If a specific alignment is requested, try to get a base register
12128      with that alignment first.  */
12129   if (align && *align)
12130     choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
12131 
12132   if (!base_reg)
12133     choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
12134 
12135   gcc_assert (base_reg != NULL);
12136 
12137   rtx base_offset_rtx = GEN_INT (base_offset);
12138 
12139   if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
12140     {
12141       gcc_assert (scratch_regno != INVALID_REGNUM);
12142 
12143       rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12144       emit_move_insn (scratch_reg, base_offset_rtx);
12145 
12146       return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
12147     }
12148 
12149   return plus_constant (Pmode, base_reg, base_offset);
12150 }
12151 
12152 /* Emit code to save registers in the prologue.  */
12153 
12154 static void
12155 ix86_emit_save_regs (void)
12156 {
12157   unsigned int regno;
12158   rtx_insn *insn;
12159 
12160   for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
12161     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
12162       {
12163 	insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
12164 	RTX_FRAME_RELATED_P (insn) = 1;
12165       }
12166 }
12167 
12168 /* Emit a single register save at CFA - CFA_OFFSET.  */
12169 
12170 static void
12171 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
12172 			      HOST_WIDE_INT cfa_offset)
12173 {
12174   struct machine_function *m = cfun->machine;
12175   rtx reg = gen_rtx_REG (mode, regno);
12176   rtx mem, addr, base, insn;
12177   unsigned int align = GET_MODE_ALIGNMENT (mode);
12178 
12179   addr = choose_baseaddr (cfa_offset, &align);
12180   mem = gen_frame_mem (mode, addr);
12181 
12182   /* The location aligment depends upon the base register.  */
12183   align = MIN (GET_MODE_ALIGNMENT (mode), align);
12184   gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
12185   set_mem_align (mem, align);
12186 
12187   insn = emit_insn (gen_rtx_SET (mem, reg));
12188   RTX_FRAME_RELATED_P (insn) = 1;
12189 
12190   base = addr;
12191   if (GET_CODE (base) == PLUS)
12192     base = XEXP (base, 0);
12193   gcc_checking_assert (REG_P (base));
12194 
12195   /* When saving registers into a re-aligned local stack frame, avoid
12196      any tricky guessing by dwarf2out.  */
12197   if (m->fs.realigned)
12198     {
12199       gcc_checking_assert (stack_realign_drap);
12200 
12201       if (regno == REGNO (crtl->drap_reg))
12202 	{
12203 	  /* A bit of a hack.  We force the DRAP register to be saved in
12204 	     the re-aligned stack frame, which provides us with a copy
12205 	     of the CFA that will last past the prologue.  Install it.  */
12206 	  gcc_checking_assert (cfun->machine->fs.fp_valid);
12207 	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
12208 				cfun->machine->fs.fp_offset - cfa_offset);
12209 	  mem = gen_rtx_MEM (mode, addr);
12210 	  add_reg_note (insn, REG_CFA_DEF_CFA, mem);
12211 	}
12212       else
12213 	{
12214 	  /* The frame pointer is a stable reference within the
12215 	     aligned frame.  Use it.  */
12216 	  gcc_checking_assert (cfun->machine->fs.fp_valid);
12217 	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
12218 				cfun->machine->fs.fp_offset - cfa_offset);
12219 	  mem = gen_rtx_MEM (mode, addr);
12220 	  add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
12221 	}
12222     }
12223 
12224   else if (base == stack_pointer_rtx && m->fs.sp_realigned
12225 	   && cfa_offset >= m->fs.sp_realigned_offset)
12226     {
12227       gcc_checking_assert (stack_realign_fp);
12228       add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
12229     }
12230 
12231   /* The memory may not be relative to the current CFA register,
12232      which means that we may need to generate a new pattern for
12233      use by the unwind info.  */
12234   else if (base != m->fs.cfa_reg)
12235     {
12236       addr = plus_constant (Pmode, m->fs.cfa_reg,
12237 			    m->fs.cfa_offset - cfa_offset);
12238       mem = gen_rtx_MEM (mode, addr);
12239       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
12240     }
12241 }
12242 
12243 /* Emit code to save registers using MOV insns.
12244    First register is stored at CFA - CFA_OFFSET.  */
12245 static void
12246 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
12247 {
12248   unsigned int regno;
12249 
12250   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12251     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
12252       {
12253         ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
12254 	cfa_offset -= UNITS_PER_WORD;
12255       }
12256 }
12257 
12258 /* Emit code to save SSE registers using MOV insns.
12259    First register is stored at CFA - CFA_OFFSET.  */
12260 static void
12261 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
12262 {
12263   unsigned int regno;
12264 
12265   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12266     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
12267       {
12268 	ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
12269 	cfa_offset -= GET_MODE_SIZE (V4SFmode);
12270       }
12271 }
12272 
12273 static GTY(()) rtx queued_cfa_restores;
12274 
12275 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
12276    manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
12277    Don't add the note if the previously saved value will be left untouched
12278    within stack red-zone till return, as unwinders can find the same value
12279    in the register and on the stack.  */
12280 
12281 static void
12282 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
12283 {
12284   if (!crtl->shrink_wrapped
12285       && cfa_offset <= cfun->machine->fs.red_zone_offset)
12286     return;
12287 
12288   if (insn)
12289     {
12290       add_reg_note (insn, REG_CFA_RESTORE, reg);
12291       RTX_FRAME_RELATED_P (insn) = 1;
12292     }
12293   else
12294     queued_cfa_restores
12295       = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
12296 }
12297 
12298 /* Add queued REG_CFA_RESTORE notes if any to INSN.  */
12299 
12300 static void
12301 ix86_add_queued_cfa_restore_notes (rtx insn)
12302 {
12303   rtx last;
12304   if (!queued_cfa_restores)
12305     return;
12306   for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
12307     ;
12308   XEXP (last, 1) = REG_NOTES (insn);
12309   REG_NOTES (insn) = queued_cfa_restores;
12310   queued_cfa_restores = NULL_RTX;
12311   RTX_FRAME_RELATED_P (insn) = 1;
12312 }
12313 
12314 /* Expand prologue or epilogue stack adjustment.
12315    The pattern exist to put a dependency on all ebp-based memory accesses.
12316    STYLE should be negative if instructions should be marked as frame related,
12317    zero if %r11 register is live and cannot be freely used and positive
12318    otherwise.  */
12319 
12320 static rtx
12321 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
12322 			   int style, bool set_cfa)
12323 {
12324   struct machine_function *m = cfun->machine;
12325   rtx insn;
12326   bool add_frame_related_expr = false;
12327 
12328   if (Pmode == SImode)
12329     insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
12330   else if (x86_64_immediate_operand (offset, DImode))
12331     insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
12332   else
12333     {
12334       rtx tmp;
12335       /* r11 is used by indirect sibcall return as well, set before the
12336 	 epilogue and used after the epilogue.  */
12337       if (style)
12338         tmp = gen_rtx_REG (DImode, R11_REG);
12339       else
12340 	{
12341 	  gcc_assert (src != hard_frame_pointer_rtx
12342 		      && dest != hard_frame_pointer_rtx);
12343 	  tmp = hard_frame_pointer_rtx;
12344 	}
12345       insn = emit_insn (gen_rtx_SET (tmp, offset));
12346       if (style < 0)
12347 	add_frame_related_expr = true;
12348 
12349       insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
12350     }
12351 
12352   insn = emit_insn (insn);
12353   if (style >= 0)
12354     ix86_add_queued_cfa_restore_notes (insn);
12355 
12356   if (set_cfa)
12357     {
12358       rtx r;
12359 
12360       gcc_assert (m->fs.cfa_reg == src);
12361       m->fs.cfa_offset += INTVAL (offset);
12362       m->fs.cfa_reg = dest;
12363 
12364       r = gen_rtx_PLUS (Pmode, src, offset);
12365       r = gen_rtx_SET (dest, r);
12366       add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
12367       RTX_FRAME_RELATED_P (insn) = 1;
12368     }
12369   else if (style < 0)
12370     {
12371       RTX_FRAME_RELATED_P (insn) = 1;
12372       if (add_frame_related_expr)
12373 	{
12374 	  rtx r = gen_rtx_PLUS (Pmode, src, offset);
12375 	  r = gen_rtx_SET (dest, r);
12376 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
12377 	}
12378     }
12379 
12380   if (dest == stack_pointer_rtx)
12381     {
12382       HOST_WIDE_INT ooffset = m->fs.sp_offset;
12383       bool valid = m->fs.sp_valid;
12384       bool realigned = m->fs.sp_realigned;
12385 
12386       if (src == hard_frame_pointer_rtx)
12387 	{
12388 	  valid = m->fs.fp_valid;
12389 	  realigned = false;
12390 	  ooffset = m->fs.fp_offset;
12391 	}
12392       else if (src == crtl->drap_reg)
12393 	{
12394 	  valid = m->fs.drap_valid;
12395 	  realigned = false;
12396 	  ooffset = 0;
12397 	}
12398       else
12399 	{
12400 	  /* Else there are two possibilities: SP itself, which we set
12401 	     up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
12402 	     taken care of this by hand along the eh_return path.  */
12403 	  gcc_checking_assert (src == stack_pointer_rtx
12404 			       || offset == const0_rtx);
12405 	}
12406 
12407       m->fs.sp_offset = ooffset - INTVAL (offset);
12408       m->fs.sp_valid = valid;
12409       m->fs.sp_realigned = realigned;
12410     }
12411   return insn;
12412 }
12413 
12414 /* Find an available register to be used as dynamic realign argument
12415    pointer regsiter.  Such a register will be written in prologue and
12416    used in begin of body, so it must not be
12417 	1. parameter passing register.
12418 	2. GOT pointer.
12419    We reuse static-chain register if it is available.  Otherwise, we
12420    use DI for i386 and R13 for x86-64.  We chose R13 since it has
12421    shorter encoding.
12422 
12423    Return: the regno of chosen register.  */
12424 
12425 static unsigned int
12426 find_drap_reg (void)
12427 {
12428   tree decl = cfun->decl;
12429 
12430   /* Always use callee-saved register if there are no caller-saved
12431      registers.  */
12432   if (TARGET_64BIT)
12433     {
12434       /* Use R13 for nested function or function need static chain.
12435 	 Since function with tail call may use any caller-saved
12436 	 registers in epilogue, DRAP must not use caller-saved
12437 	 register in such case.  */
12438       if (DECL_STATIC_CHAIN (decl)
12439 	  || cfun->machine->no_caller_saved_registers
12440 	  || crtl->tail_call_emit)
12441 	return R13_REG;
12442 
12443       return R10_REG;
12444     }
12445   else
12446     {
12447       /* Use DI for nested function or function need static chain.
12448 	 Since function with tail call may use any caller-saved
12449 	 registers in epilogue, DRAP must not use caller-saved
12450 	 register in such case.  */
12451       if (DECL_STATIC_CHAIN (decl)
12452 	  || cfun->machine->no_caller_saved_registers
12453 	  || crtl->tail_call_emit)
12454 	return DI_REG;
12455 
12456       /* Reuse static chain register if it isn't used for parameter
12457          passing.  */
12458       if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
12459 	{
12460 	  unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
12461 	  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
12462 	    return CX_REG;
12463 	}
12464       return DI_REG;
12465     }
12466 }
12467 
12468 /* Handle a "force_align_arg_pointer" attribute.  */
12469 
12470 static tree
12471 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
12472 					       tree, int, bool *no_add_attrs)
12473 {
12474   if (TREE_CODE (*node) != FUNCTION_TYPE
12475       && TREE_CODE (*node) != METHOD_TYPE
12476       && TREE_CODE (*node) != FIELD_DECL
12477       && TREE_CODE (*node) != TYPE_DECL)
12478     {
12479       warning (OPT_Wattributes, "%qE attribute only applies to functions",
12480 	       name);
12481       *no_add_attrs = true;
12482     }
12483 
12484   return NULL_TREE;
12485 }
12486 
12487 /* Return minimum incoming stack alignment.  */
12488 
12489 static unsigned int
12490 ix86_minimum_incoming_stack_boundary (bool sibcall)
12491 {
12492   unsigned int incoming_stack_boundary;
12493 
12494   /* Stack of interrupt handler is aligned to 128 bits in 64bit mode.  */
12495   if (cfun->machine->func_type != TYPE_NORMAL)
12496     incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
12497   /* Prefer the one specified at command line. */
12498   else if (ix86_user_incoming_stack_boundary)
12499     incoming_stack_boundary = ix86_user_incoming_stack_boundary;
12500   /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
12501      if -mstackrealign is used, it isn't used for sibcall check and
12502      estimated stack alignment is 128bit.  */
12503   else if (!sibcall
12504 	   && ix86_force_align_arg_pointer
12505 	   && crtl->stack_alignment_estimated == 128)
12506     incoming_stack_boundary = MIN_STACK_BOUNDARY;
12507   else
12508     incoming_stack_boundary = ix86_default_incoming_stack_boundary;
12509 
12510   /* Incoming stack alignment can be changed on individual functions
12511      via force_align_arg_pointer attribute.  We use the smallest
12512      incoming stack boundary.  */
12513   if (incoming_stack_boundary > MIN_STACK_BOUNDARY
12514       && lookup_attribute (ix86_force_align_arg_pointer_string,
12515 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
12516     incoming_stack_boundary = MIN_STACK_BOUNDARY;
12517 
12518   /* The incoming stack frame has to be aligned at least at
12519      parm_stack_boundary.  */
12520   if (incoming_stack_boundary < crtl->parm_stack_boundary)
12521     incoming_stack_boundary = crtl->parm_stack_boundary;
12522 
12523   /* Stack at entrance of main is aligned by runtime.  We use the
12524      smallest incoming stack boundary. */
12525   if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12526       && DECL_NAME (current_function_decl)
12527       && MAIN_NAME_P (DECL_NAME (current_function_decl))
12528       && DECL_FILE_SCOPE_P (current_function_decl))
12529     incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12530 
12531   return incoming_stack_boundary;
12532 }
12533 
12534 /* Update incoming stack boundary and estimated stack alignment.  */
12535 
12536 static void
12537 ix86_update_stack_boundary (void)
12538 {
12539   ix86_incoming_stack_boundary
12540     = ix86_minimum_incoming_stack_boundary (false);
12541 
12542   /* x86_64 vararg needs 16byte stack alignment for register save
12543      area.  */
12544   if (TARGET_64BIT
12545       && cfun->stdarg
12546       && crtl->stack_alignment_estimated < 128)
12547     crtl->stack_alignment_estimated = 128;
12548 
12549   /* __tls_get_addr needs to be called with 16-byte aligned stack.  */
12550   if (ix86_tls_descriptor_calls_expanded_in_cfun
12551       && crtl->preferred_stack_boundary < 128)
12552     crtl->preferred_stack_boundary = 128;
12553 }
12554 
12555 /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
12556    needed or an rtx for DRAP otherwise.  */
12557 
12558 static rtx
12559 ix86_get_drap_rtx (void)
12560 {
12561   /* We must use DRAP if there are outgoing arguments on stack and
12562      ACCUMULATE_OUTGOING_ARGS is false.  */
12563   if (ix86_force_drap
12564       || (cfun->machine->outgoing_args_on_stack
12565 	  && !ACCUMULATE_OUTGOING_ARGS))
12566     crtl->need_drap = true;
12567 
12568   if (stack_realign_drap)
12569     {
12570       /* Assign DRAP to vDRAP and returns vDRAP */
12571       unsigned int regno = find_drap_reg ();
12572       rtx drap_vreg;
12573       rtx arg_ptr;
12574       rtx_insn *seq, *insn;
12575 
12576       arg_ptr = gen_rtx_REG (Pmode, regno);
12577       crtl->drap_reg = arg_ptr;
12578 
12579       start_sequence ();
12580       drap_vreg = copy_to_reg (arg_ptr);
12581       seq = get_insns ();
12582       end_sequence ();
12583 
12584       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12585       if (!optimize)
12586 	{
12587 	  add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12588 	  RTX_FRAME_RELATED_P (insn) = 1;
12589 	}
12590       return drap_vreg;
12591     }
12592   else
12593     return NULL;
12594 }
12595 
12596 /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
12597 
12598 static rtx
12599 ix86_internal_arg_pointer (void)
12600 {
12601   return virtual_incoming_args_rtx;
12602 }
12603 
12604 struct scratch_reg {
12605   rtx reg;
12606   bool saved;
12607 };
12608 
12609 /* Return a short-lived scratch register for use on function entry.
12610    In 32-bit mode, it is valid only after the registers are saved
12611    in the prologue.  This register must be released by means of
12612    release_scratch_register_on_entry once it is dead.  */
12613 
12614 static void
12615 get_scratch_register_on_entry (struct scratch_reg *sr)
12616 {
12617   int regno;
12618 
12619   sr->saved = false;
12620 
12621   if (TARGET_64BIT)
12622     {
12623       /* We always use R11 in 64-bit mode.  */
12624       regno = R11_REG;
12625     }
12626   else
12627     {
12628       tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12629       bool fastcall_p
12630 	= lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12631       bool thiscall_p
12632 	= lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12633       bool static_chain_p = DECL_STATIC_CHAIN (decl);
12634       int regparm = ix86_function_regparm (fntype, decl);
12635       int drap_regno
12636 	= crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12637 
12638       /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12639 	  for the static chain register.  */
12640       if ((regparm < 1 || (fastcall_p && !static_chain_p))
12641 	  && drap_regno != AX_REG)
12642 	regno = AX_REG;
12643       /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12644 	  for the static chain register.  */
12645       else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12646         regno = AX_REG;
12647       else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12648 	regno = DX_REG;
12649       /* ecx is the static chain register.  */
12650       else if (regparm < 3 && !fastcall_p && !thiscall_p
12651 	       && !static_chain_p
12652 	       && drap_regno != CX_REG)
12653 	regno = CX_REG;
12654       else if (ix86_save_reg (BX_REG, true, false))
12655 	regno = BX_REG;
12656       /* esi is the static chain register.  */
12657       else if (!(regparm == 3 && static_chain_p)
12658 	       && ix86_save_reg (SI_REG, true, false))
12659 	regno = SI_REG;
12660       else if (ix86_save_reg (DI_REG, true, false))
12661 	regno = DI_REG;
12662       else
12663 	{
12664 	  regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12665 	  sr->saved = true;
12666 	}
12667     }
12668 
12669   sr->reg = gen_rtx_REG (Pmode, regno);
12670   if (sr->saved)
12671     {
12672       rtx_insn *insn = emit_insn (gen_push (sr->reg));
12673       RTX_FRAME_RELATED_P (insn) = 1;
12674     }
12675 }
12676 
12677 /* Release a scratch register obtained from the preceding function.
12678 
12679    If RELEASE_VIA_POP is true, we just pop the register off the stack
12680    to release it.  This is what non-Linux systems use with -fstack-check.
12681 
12682    Otherwise we use OFFSET to locate the saved register and the
12683    allocated stack space becomes part of the local frame and is
12684    deallocated by the epilogue.  */
12685 
12686 static void
12687 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
12688 				   bool release_via_pop)
12689 {
12690   if (sr->saved)
12691     {
12692       if (release_via_pop)
12693 	{
12694 	  struct machine_function *m = cfun->machine;
12695 	  rtx x, insn = emit_insn (gen_pop (sr->reg));
12696 
12697 	  /* The RX FRAME_RELATED_P mechanism doesn't know about pop.  */
12698 	  RTX_FRAME_RELATED_P (insn) = 1;
12699 	  x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12700 	  x = gen_rtx_SET (stack_pointer_rtx, x);
12701 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12702 	  m->fs.sp_offset -= UNITS_PER_WORD;
12703 	}
12704       else
12705 	{
12706 	  rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
12707 	  x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
12708 	  emit_insn (x);
12709 	}
12710     }
12711 }
12712 
12713 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12714 
12715    This differs from the next routine in that it tries hard to prevent
12716    attacks that jump the stack guard.  Thus it is never allowed to allocate
12717    more than PROBE_INTERVAL bytes of stack space without a suitable
12718    probe.
12719 
12720    INT_REGISTERS_SAVED is true if integer registers have already been
12721    pushed on the stack.  */
12722 
12723 static void
12724 ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
12725 					 const bool int_registers_saved)
12726 {
12727   struct machine_function *m = cfun->machine;
12728 
12729   /* If this function does not statically allocate stack space, then
12730      no probes are needed.  */
12731   if (!size)
12732     {
12733       /* However, the allocation of space via pushes for register
12734 	 saves could be viewed as allocating space, but without the
12735 	 need to probe.  */
12736       if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
12737         dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12738       else
12739 	dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
12740       return;
12741     }
12742 
12743   /* If we are a noreturn function, then we have to consider the
12744      possibility that we're called via a jump rather than a call.
12745 
12746      Thus we don't have the implicit probe generated by saving the
12747      return address into the stack at the call.  Thus, the stack
12748      pointer could be anywhere in the guard page.  The safe thing
12749      to do is emit a probe now.
12750 
12751      The probe can be avoided if we have already emitted any callee
12752      register saves into the stack or have a frame pointer (which will
12753      have been saved as well).  Those saves will function as implicit
12754      probes.
12755 
12756      ?!? This should be revamped to work like aarch64 and s390 where
12757      we track the offset from the most recent probe.  Normally that
12758      offset would be zero.  For a noreturn function we would reset
12759      it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT).   Then
12760      we just probe when we cross PROBE_INTERVAL.  */
12761   if (TREE_THIS_VOLATILE (cfun->decl)
12762       && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
12763     {
12764       /* We can safely use any register here since we're just going to push
12765 	 its value and immediately pop it back.  But we do try and avoid
12766 	 argument passing registers so as not to introduce dependencies in
12767 	 the pipeline.  For 32 bit we use %esi and for 64 bit we use %rax.  */
12768       rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
12769       rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
12770       rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
12771       m->fs.sp_offset -= UNITS_PER_WORD;
12772       if (m->fs.cfa_reg == stack_pointer_rtx)
12773 	{
12774 	  m->fs.cfa_offset -= UNITS_PER_WORD;
12775 	  rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12776 	  x = gen_rtx_SET (stack_pointer_rtx, x);
12777 	  add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
12778 	  RTX_FRAME_RELATED_P (insn_push) = 1;
12779 	  x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12780 	  x = gen_rtx_SET (stack_pointer_rtx, x);
12781 	  add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
12782 	  RTX_FRAME_RELATED_P (insn_pop) = 1;
12783 	}
12784       emit_insn (gen_blockage ());
12785     }
12786 
12787   /* If we allocate less than the size of the guard statically,
12788      then no probing is necessary, but we do need to allocate
12789      the stack.  */
12790   if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE)))
12791     {
12792       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12793 			         GEN_INT (-size), -1,
12794 			         m->fs.cfa_reg == stack_pointer_rtx);
12795       dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
12796       return;
12797     }
12798 
12799   /* We're allocating a large enough stack frame that we need to
12800      emit probes.  Either emit them inline or in a loop depending
12801      on the size.  */
12802   HOST_WIDE_INT probe_interval = get_probe_interval ();
12803   if (size <= 4 * probe_interval)
12804     {
12805       HOST_WIDE_INT i;
12806       for (i = probe_interval; i <= size; i += probe_interval)
12807 	{
12808 	  /* Allocate PROBE_INTERVAL bytes.  */
12809 	  rtx insn
12810 	    = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12811 					 GEN_INT (-probe_interval), -1,
12812 					 m->fs.cfa_reg == stack_pointer_rtx);
12813 	  add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
12814 
12815 	  /* And probe at *sp.  */
12816 	  emit_stack_probe (stack_pointer_rtx);
12817 	  emit_insn (gen_blockage ());
12818 	}
12819 
12820       /* We need to allocate space for the residual, but we do not need
12821 	 to probe the residual.  */
12822       HOST_WIDE_INT residual = (i - probe_interval - size);
12823       if (residual)
12824 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12825 				   GEN_INT (residual), -1,
12826 				   m->fs.cfa_reg == stack_pointer_rtx);
12827       dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
12828     }
12829   else
12830     {
12831       /* We expect the GP registers to be saved when probes are used
12832 	 as the probing sequences might need a scratch register and
12833 	 the routine to allocate one assumes the integer registers
12834 	 have already been saved.  */
12835       gcc_assert (int_registers_saved);
12836 
12837       struct scratch_reg sr;
12838       get_scratch_register_on_entry (&sr);
12839 
12840       /* If we needed to save a register, then account for any space
12841 	 that was pushed (we are not going to pop the register when
12842 	 we do the restore).  */
12843       if (sr.saved)
12844 	size -= UNITS_PER_WORD;
12845 
12846       /* Step 1: round SIZE down to a multiple of the interval.  */
12847       HOST_WIDE_INT rounded_size = size & -probe_interval;
12848 
12849       /* Step 2: compute final value of the loop counter.  Use lea if
12850 	 possible.  */
12851       rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
12852       rtx insn;
12853       if (address_no_seg_operand (addr, Pmode))
12854 	insn = emit_insn (gen_rtx_SET (sr.reg, addr));
12855       else
12856 	{
12857 	  emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12858 	  insn = emit_insn (gen_rtx_SET (sr.reg,
12859 					 gen_rtx_PLUS (Pmode, sr.reg,
12860 						       stack_pointer_rtx)));
12861 	}
12862       if (m->fs.cfa_reg == stack_pointer_rtx)
12863 	{
12864 	  add_reg_note (insn, REG_CFA_DEF_CFA,
12865 			plus_constant (Pmode, sr.reg,
12866 				       m->fs.cfa_offset + rounded_size));
12867 	  RTX_FRAME_RELATED_P (insn) = 1;
12868 	}
12869 
12870       /* Step 3: the loop.  */
12871       rtx size_rtx = GEN_INT (rounded_size);
12872       insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg,
12873 							 size_rtx));
12874       if (m->fs.cfa_reg == stack_pointer_rtx)
12875 	{
12876 	  m->fs.cfa_offset += rounded_size;
12877 	  add_reg_note (insn, REG_CFA_DEF_CFA,
12878 			plus_constant (Pmode, stack_pointer_rtx,
12879 				       m->fs.cfa_offset));
12880 	  RTX_FRAME_RELATED_P (insn) = 1;
12881 	}
12882       m->fs.sp_offset += rounded_size;
12883       emit_insn (gen_blockage ());
12884 
12885       /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
12886 	 is equal to ROUNDED_SIZE.  */
12887 
12888       if (size != rounded_size)
12889 	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12890 				   GEN_INT (rounded_size - size), -1,
12891 				   m->fs.cfa_reg == stack_pointer_rtx);
12892       dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
12893 
12894       /* This does not deallocate the space reserved for the scratch
12895 	 register.  That will be deallocated in the epilogue.  */
12896       release_scratch_register_on_entry (&sr, size, false);
12897     }
12898 
12899   /* Make sure nothing is scheduled before we are done.  */
12900   emit_insn (gen_blockage ());
12901 }
12902 
12903 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
12904 
12905    INT_REGISTERS_SAVED is true if integer registers have already been
12906    pushed on the stack.  */
12907 
12908 static void
12909 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
12910 			     const bool int_registers_saved)
12911 {
12912   /* We skip the probe for the first interval + a small dope of 4 words and
12913      probe that many bytes past the specified size to maintain a protection
12914      area at the botton of the stack.  */
12915   const int dope = 4 * UNITS_PER_WORD;
12916   rtx size_rtx = GEN_INT (size), last;
12917 
12918   /* See if we have a constant small number of probes to generate.  If so,
12919      that's the easy case.  The run-time loop is made up of 9 insns in the
12920      generic case while the compile-time loop is made up of 3+2*(n-1) insns
12921      for n # of intervals.  */
12922   if (size <= 4 * get_probe_interval ())
12923     {
12924       HOST_WIDE_INT i, adjust;
12925       bool first_probe = true;
12926 
12927       /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12928 	 values of N from 1 until it exceeds SIZE.  If only one probe is
12929 	 needed, this will not generate any code.  Then adjust and probe
12930 	 to PROBE_INTERVAL + SIZE.  */
12931       for (i = get_probe_interval (); i < size; i += get_probe_interval ())
12932 	{
12933 	  if (first_probe)
12934 	    {
12935 	      adjust = 2 * get_probe_interval () + dope;
12936 	      first_probe = false;
12937 	    }
12938 	  else
12939 	    adjust = get_probe_interval ();
12940 
12941 	  emit_insn (gen_rtx_SET (stack_pointer_rtx,
12942 				  plus_constant (Pmode, stack_pointer_rtx,
12943 						 -adjust)));
12944 	  emit_stack_probe (stack_pointer_rtx);
12945 	}
12946 
12947       if (first_probe)
12948 	adjust = size + get_probe_interval () + dope;
12949       else
12950         adjust = size + get_probe_interval () - i;
12951 
12952       emit_insn (gen_rtx_SET (stack_pointer_rtx,
12953 			      plus_constant (Pmode, stack_pointer_rtx,
12954 					     -adjust)));
12955       emit_stack_probe (stack_pointer_rtx);
12956 
12957       /* Adjust back to account for the additional first interval.  */
12958       last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12959 				     plus_constant (Pmode, stack_pointer_rtx,
12960 						    (get_probe_interval ()
12961 						     + dope))));
12962     }
12963 
12964   /* Otherwise, do the same as above, but in a loop.  Note that we must be
12965      extra careful with variables wrapping around because we might be at
12966      the very top (or the very bottom) of the address space and we have
12967      to be able to handle this case properly; in particular, we use an
12968      equality test for the loop condition.  */
12969   else
12970     {
12971       /* We expect the GP registers to be saved when probes are used
12972 	 as the probing sequences might need a scratch register and
12973 	 the routine to allocate one assumes the integer registers
12974 	 have already been saved.  */
12975       gcc_assert (int_registers_saved);
12976 
12977       HOST_WIDE_INT rounded_size;
12978       struct scratch_reg sr;
12979 
12980       get_scratch_register_on_entry (&sr);
12981 
12982       /* If we needed to save a register, then account for any space
12983 	 that was pushed (we are not going to pop the register when
12984 	 we do the restore).  */
12985       if (sr.saved)
12986 	size -= UNITS_PER_WORD;
12987 
12988       /* Step 1: round SIZE to the previous multiple of the interval.  */
12989 
12990       rounded_size = ROUND_DOWN (size, get_probe_interval ());
12991 
12992 
12993       /* Step 2: compute initial and final value of the loop counter.  */
12994 
12995       /* SP = SP_0 + PROBE_INTERVAL.  */
12996       emit_insn (gen_rtx_SET (stack_pointer_rtx,
12997 			      plus_constant (Pmode, stack_pointer_rtx,
12998 					     - (get_probe_interval () + dope))));
12999 
13000       /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE.  */
13001       if (rounded_size <= (HOST_WIDE_INT_1 << 31))
13002 	emit_insn (gen_rtx_SET (sr.reg,
13003 				plus_constant (Pmode, stack_pointer_rtx,
13004 					       -rounded_size)));
13005       else
13006 	{
13007 	  emit_move_insn (sr.reg, GEN_INT (-rounded_size));
13008 	  emit_insn (gen_rtx_SET (sr.reg,
13009 				  gen_rtx_PLUS (Pmode, sr.reg,
13010 						stack_pointer_rtx)));
13011 	}
13012 
13013 
13014       /* Step 3: the loop
13015 
13016 	 do
13017 	   {
13018 	     SP = SP + PROBE_INTERVAL
13019 	     probe at SP
13020 	   }
13021 	 while (SP != LAST_ADDR)
13022 
13023 	 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
13024 	 values of N from 1 until it is equal to ROUNDED_SIZE.  */
13025 
13026       emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
13027 
13028 
13029       /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
13030 	 assert at compile-time that SIZE is equal to ROUNDED_SIZE.  */
13031 
13032       if (size != rounded_size)
13033 	{
13034 	  emit_insn (gen_rtx_SET (stack_pointer_rtx,
13035 			          plus_constant (Pmode, stack_pointer_rtx,
13036 						 rounded_size - size)));
13037 	  emit_stack_probe (stack_pointer_rtx);
13038 	}
13039 
13040       /* Adjust back to account for the additional first interval.  */
13041       last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
13042 				     plus_constant (Pmode, stack_pointer_rtx,
13043 						    (get_probe_interval ()
13044 						     + dope))));
13045 
13046       /* This does not deallocate the space reserved for the scratch
13047 	 register.  That will be deallocated in the epilogue.  */
13048       release_scratch_register_on_entry (&sr, size, false);
13049     }
13050 
13051   /* Even if the stack pointer isn't the CFA register, we need to correctly
13052      describe the adjustments made to it, in particular differentiate the
13053      frame-related ones from the frame-unrelated ones.  */
13054   if (size > 0)
13055     {
13056       rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
13057       XVECEXP (expr, 0, 0)
13058 	= gen_rtx_SET (stack_pointer_rtx,
13059 		       plus_constant (Pmode, stack_pointer_rtx, -size));
13060       XVECEXP (expr, 0, 1)
13061 	= gen_rtx_SET (stack_pointer_rtx,
13062 		       plus_constant (Pmode, stack_pointer_rtx,
13063 				      get_probe_interval () + dope + size));
13064       add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
13065       RTX_FRAME_RELATED_P (last) = 1;
13066 
13067       cfun->machine->fs.sp_offset += size;
13068     }
13069 
13070   /* Make sure nothing is scheduled before we are done.  */
13071   emit_insn (gen_blockage ());
13072 }
13073 
13074 /* Adjust the stack pointer up to REG while probing it.  */
13075 
13076 const char *
13077 output_adjust_stack_and_probe (rtx reg)
13078 {
13079   static int labelno = 0;
13080   char loop_lab[32];
13081   rtx xops[2];
13082 
13083   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
13084 
13085   /* Loop.  */
13086   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
13087 
13088   /* SP = SP + PROBE_INTERVAL.  */
13089   xops[0] = stack_pointer_rtx;
13090   xops[1] = GEN_INT (get_probe_interval ());
13091   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
13092 
13093   /* Probe at SP.  */
13094   xops[1] = const0_rtx;
13095   output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
13096 
13097   /* Test if SP == LAST_ADDR.  */
13098   xops[0] = stack_pointer_rtx;
13099   xops[1] = reg;
13100   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
13101 
13102   /* Branch.  */
13103   fputs ("\tjne\t", asm_out_file);
13104   assemble_name_raw (asm_out_file, loop_lab);
13105   fputc ('\n', asm_out_file);
13106 
13107   return "";
13108 }
13109 
13110 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
13111    inclusive.  These are offsets from the current stack pointer.
13112 
13113    INT_REGISTERS_SAVED is true if integer registers have already been
13114    pushed on the stack.  */
13115 
13116 static void
13117 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
13118 			     const bool int_registers_saved)
13119 {
13120   /* See if we have a constant small number of probes to generate.  If so,
13121      that's the easy case.  The run-time loop is made up of 6 insns in the
13122      generic case while the compile-time loop is made up of n insns for n #
13123      of intervals.  */
13124   if (size <= 6 * get_probe_interval ())
13125     {
13126       HOST_WIDE_INT i;
13127 
13128       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
13129 	 it exceeds SIZE.  If only one probe is needed, this will not
13130 	 generate any code.  Then probe at FIRST + SIZE.  */
13131       for (i = get_probe_interval (); i < size; i += get_probe_interval ())
13132 	emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
13133 					 -(first + i)));
13134 
13135       emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
13136 				       -(first + size)));
13137     }
13138 
13139   /* Otherwise, do the same as above, but in a loop.  Note that we must be
13140      extra careful with variables wrapping around because we might be at
13141      the very top (or the very bottom) of the address space and we have
13142      to be able to handle this case properly; in particular, we use an
13143      equality test for the loop condition.  */
13144   else
13145     {
13146       /* We expect the GP registers to be saved when probes are used
13147 	 as the probing sequences might need a scratch register and
13148 	 the routine to allocate one assumes the integer registers
13149 	 have already been saved.  */
13150       gcc_assert (int_registers_saved);
13151 
13152       HOST_WIDE_INT rounded_size, last;
13153       struct scratch_reg sr;
13154 
13155       get_scratch_register_on_entry (&sr);
13156 
13157 
13158       /* Step 1: round SIZE to the previous multiple of the interval.  */
13159 
13160       rounded_size = ROUND_DOWN (size, get_probe_interval ());
13161 
13162 
13163       /* Step 2: compute initial and final value of the loop counter.  */
13164 
13165       /* TEST_OFFSET = FIRST.  */
13166       emit_move_insn (sr.reg, GEN_INT (-first));
13167 
13168       /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
13169       last = first + rounded_size;
13170 
13171 
13172       /* Step 3: the loop
13173 
13174 	 do
13175 	   {
13176 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
13177 	     probe at TEST_ADDR
13178 	   }
13179 	 while (TEST_ADDR != LAST_ADDR)
13180 
13181          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
13182          until it is equal to ROUNDED_SIZE.  */
13183 
13184       emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
13185 
13186 
13187       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
13188 	 that SIZE is equal to ROUNDED_SIZE.  */
13189 
13190       if (size != rounded_size)
13191 	emit_stack_probe (plus_constant (Pmode,
13192 					 gen_rtx_PLUS (Pmode,
13193 						       stack_pointer_rtx,
13194 						       sr.reg),
13195 					 rounded_size - size));
13196 
13197       release_scratch_register_on_entry (&sr, size, true);
13198     }
13199 
13200   /* Make sure nothing is scheduled before we are done.  */
13201   emit_insn (gen_blockage ());
13202 }
13203 
13204 /* Probe a range of stack addresses from REG to END, inclusive.  These are
13205    offsets from the current stack pointer.  */
13206 
13207 const char *
13208 output_probe_stack_range (rtx reg, rtx end)
13209 {
13210   static int labelno = 0;
13211   char loop_lab[32];
13212   rtx xops[3];
13213 
13214   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
13215 
13216   /* Loop.  */
13217   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
13218 
13219   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
13220   xops[0] = reg;
13221   xops[1] = GEN_INT (get_probe_interval ());
13222   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
13223 
13224   /* Probe at TEST_ADDR.  */
13225   xops[0] = stack_pointer_rtx;
13226   xops[1] = reg;
13227   xops[2] = const0_rtx;
13228   output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
13229 
13230   /* Test if TEST_ADDR == LAST_ADDR.  */
13231   xops[0] = reg;
13232   xops[1] = end;
13233   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
13234 
13235   /* Branch.  */
13236   fputs ("\tjne\t", asm_out_file);
13237   assemble_name_raw (asm_out_file, loop_lab);
13238   fputc ('\n', asm_out_file);
13239 
13240   return "";
13241 }
13242 
13243 /* Return true if stack frame is required.  Update STACK_ALIGNMENT
13244    to the largest alignment, in bits, of stack slot used if stack
13245    frame is required and CHECK_STACK_SLOT is true.  */
13246 
13247 static bool
13248 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
13249 				    bool check_stack_slot)
13250 {
13251   HARD_REG_SET set_up_by_prologue, prologue_used;
13252   basic_block bb;
13253 
13254   CLEAR_HARD_REG_SET (prologue_used);
13255   CLEAR_HARD_REG_SET (set_up_by_prologue);
13256   add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
13257   add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
13258   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
13259 		       HARD_FRAME_POINTER_REGNUM);
13260 
13261   /* The preferred stack alignment is the minimum stack alignment.  */
13262   if (stack_alignment > crtl->preferred_stack_boundary)
13263     stack_alignment = crtl->preferred_stack_boundary;
13264 
13265   bool require_stack_frame = false;
13266 
13267   FOR_EACH_BB_FN (bb, cfun)
13268     {
13269       rtx_insn *insn;
13270       FOR_BB_INSNS (bb, insn)
13271 	if (NONDEBUG_INSN_P (insn)
13272 	    && requires_stack_frame_p (insn, prologue_used,
13273 				       set_up_by_prologue))
13274 	  {
13275 	    require_stack_frame = true;
13276 
13277 	    if (check_stack_slot)
13278 	      {
13279 		/* Find the maximum stack alignment.  */
13280 		subrtx_iterator::array_type array;
13281 		FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13282 		  if (MEM_P (*iter)
13283 		      && (reg_mentioned_p (stack_pointer_rtx,
13284 					   *iter)
13285 			  || reg_mentioned_p (frame_pointer_rtx,
13286 					      *iter)))
13287 		    {
13288 		      unsigned int alignment = MEM_ALIGN (*iter);
13289 		      if (alignment > stack_alignment)
13290 			stack_alignment = alignment;
13291 		    }
13292 	      }
13293 	  }
13294     }
13295 
13296   return require_stack_frame;
13297 }
13298 
13299 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
13300    will guide prologue/epilogue to be generated in correct form.  */
13301 
13302 static void
13303 ix86_finalize_stack_frame_flags (void)
13304 {
13305   /* Check if stack realign is really needed after reload, and
13306      stores result in cfun */
13307   unsigned int incoming_stack_boundary
13308     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
13309        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
13310   unsigned int stack_alignment
13311     = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
13312        ? crtl->max_used_stack_slot_alignment
13313        : crtl->stack_alignment_needed);
13314   unsigned int stack_realign
13315     = (incoming_stack_boundary < stack_alignment);
13316   bool recompute_frame_layout_p = false;
13317 
13318   if (crtl->stack_realign_finalized)
13319     {
13320       /* After stack_realign_needed is finalized, we can't no longer
13321 	 change it.  */
13322       gcc_assert (crtl->stack_realign_needed == stack_realign);
13323       return;
13324     }
13325 
13326   /* If the only reason for frame_pointer_needed is that we conservatively
13327      assumed stack realignment might be needed or -fno-omit-frame-pointer
13328      is used, but in the end nothing that needed the stack alignment had
13329      been spilled nor stack access, clear frame_pointer_needed and say we
13330      don't need stack realignment.  */
13331   if ((stack_realign || !flag_omit_frame_pointer)
13332       && frame_pointer_needed
13333       && crtl->is_leaf
13334       && crtl->sp_is_unchanging
13335       && !ix86_current_function_calls_tls_descriptor
13336       && !crtl->accesses_prior_frames
13337       && !cfun->calls_alloca
13338       && !crtl->calls_eh_return
13339       /* See ira_setup_eliminable_regset for the rationale.  */
13340       && !(STACK_CHECK_MOVING_SP
13341 	   && flag_stack_check
13342 	   && flag_exceptions
13343 	   && cfun->can_throw_non_call_exceptions)
13344       && !ix86_frame_pointer_required ()
13345       && get_frame_size () == 0
13346       && ix86_nsaved_sseregs () == 0
13347       && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
13348     {
13349       if (ix86_find_max_used_stack_alignment (stack_alignment,
13350 					      stack_realign))
13351 	{
13352 	  /* Stack frame is required.  If stack alignment needed is less
13353 	     than incoming stack boundary, don't realign stack.  */
13354 	  stack_realign = incoming_stack_boundary < stack_alignment;
13355 	  if (!stack_realign)
13356 	    {
13357 	      crtl->max_used_stack_slot_alignment
13358 		= incoming_stack_boundary;
13359 	      crtl->stack_alignment_needed
13360 		= incoming_stack_boundary;
13361 	      /* Also update preferred_stack_boundary for leaf
13362 	         functions.  */
13363 	      crtl->preferred_stack_boundary
13364 		= incoming_stack_boundary;
13365 	    }
13366 	}
13367       else
13368 	{
13369 	  /* If drap has been set, but it actually isn't live at the
13370 	     start of the function, there is no reason to set it up.  */
13371 	  if (crtl->drap_reg)
13372 	    {
13373 	      basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
13374 	      if (! REGNO_REG_SET_P (DF_LR_IN (bb),
13375 				     REGNO (crtl->drap_reg)))
13376 		{
13377 		  crtl->drap_reg = NULL_RTX;
13378 		  crtl->need_drap = false;
13379 		}
13380 	    }
13381 	  else
13382 	    cfun->machine->no_drap_save_restore = true;
13383 
13384 	  frame_pointer_needed = false;
13385 	  stack_realign = false;
13386 	  crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
13387 	  crtl->stack_alignment_needed = incoming_stack_boundary;
13388 	  crtl->stack_alignment_estimated = incoming_stack_boundary;
13389 	  if (crtl->preferred_stack_boundary > incoming_stack_boundary)
13390 	    crtl->preferred_stack_boundary = incoming_stack_boundary;
13391 	  df_finish_pass (true);
13392 	  df_scan_alloc (NULL);
13393 	  df_scan_blocks ();
13394 	  df_compute_regs_ever_live (true);
13395 	  df_analyze ();
13396 
13397 	  if (flag_var_tracking)
13398 	    {
13399 	      /* Since frame pointer is no longer available, replace it with
13400 		 stack pointer - UNITS_PER_WORD in debug insns.  */
13401 	      df_ref ref, next;
13402 	      for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
13403 		   ref; ref = next)
13404 		{
13405 		  next = DF_REF_NEXT_REG (ref);
13406 		  if (!DF_REF_INSN_INFO (ref))
13407 		    continue;
13408 
13409 		  /* Make sure the next ref is for a different instruction,
13410 		     so that we're not affected by the rescan.  */
13411 		  rtx_insn *insn = DF_REF_INSN (ref);
13412 		  while (next && DF_REF_INSN (next) == insn)
13413 		    next = DF_REF_NEXT_REG (next);
13414 
13415 		  if (DEBUG_INSN_P (insn))
13416 		    {
13417 		      bool changed = false;
13418 		      for (; ref != next; ref = DF_REF_NEXT_REG (ref))
13419 			{
13420 			  rtx *loc = DF_REF_LOC (ref);
13421 			  if (*loc == hard_frame_pointer_rtx)
13422 			    {
13423 			      *loc = plus_constant (Pmode,
13424 						    stack_pointer_rtx,
13425 						    -UNITS_PER_WORD);
13426 			      changed = true;
13427 			    }
13428 			}
13429 		      if (changed)
13430 			df_insn_rescan (insn);
13431 		    }
13432 		}
13433 	    }
13434 
13435 	  recompute_frame_layout_p = true;
13436 	}
13437     }
13438   else if (crtl->max_used_stack_slot_alignment
13439 	   > crtl->preferred_stack_boundary)
13440     {
13441       /* We don't need to realign stack.  But we still need to keep
13442 	 stack frame properly aligned to satisfy the largest alignment
13443 	 of stack slots.  */
13444       if (ix86_find_max_used_stack_alignment (stack_alignment, true))
13445 	cfun->machine->max_used_stack_alignment
13446 	  = stack_alignment / BITS_PER_UNIT;
13447     }
13448 
13449   if (crtl->stack_realign_needed != stack_realign)
13450     recompute_frame_layout_p = true;
13451   crtl->stack_realign_needed = stack_realign;
13452   crtl->stack_realign_finalized = true;
13453   if (recompute_frame_layout_p)
13454     ix86_compute_frame_layout ();
13455 }
13456 
13457 /* Delete SET_GOT right after entry block if it is allocated to reg.  */
13458 
13459 static void
13460 ix86_elim_entry_set_got (rtx reg)
13461 {
13462   basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
13463   rtx_insn *c_insn = BB_HEAD (bb);
13464   if (!NONDEBUG_INSN_P (c_insn))
13465     c_insn = next_nonnote_nondebug_insn (c_insn);
13466   if (c_insn && NONJUMP_INSN_P (c_insn))
13467     {
13468       rtx pat = PATTERN (c_insn);
13469       if (GET_CODE (pat) == PARALLEL)
13470 	{
13471 	  rtx vec = XVECEXP (pat, 0, 0);
13472 	  if (GET_CODE (vec) == SET
13473 	      && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
13474 	      && REGNO (XEXP (vec, 0)) == REGNO (reg))
13475 	    delete_insn (c_insn);
13476 	}
13477     }
13478 }
13479 
13480 static rtx
13481 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
13482 {
13483   rtx addr, mem;
13484 
13485   if (offset)
13486     addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
13487   mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
13488   return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
13489 }
13490 
13491 static inline rtx
13492 gen_frame_load (rtx reg, rtx frame_reg, int offset)
13493 {
13494   return gen_frame_set (reg, frame_reg, offset, false);
13495 }
13496 
13497 static inline rtx
13498 gen_frame_store (rtx reg, rtx frame_reg, int offset)
13499 {
13500   return gen_frame_set (reg, frame_reg, offset, true);
13501 }
13502 
13503 static void
13504 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
13505 {
13506   struct machine_function *m = cfun->machine;
13507   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
13508 			  + m->call_ms2sysv_extra_regs;
13509   rtvec v = rtvec_alloc (ncregs + 1);
13510   unsigned int align, i, vi = 0;
13511   rtx_insn *insn;
13512   rtx sym, addr;
13513   rtx rax = gen_rtx_REG (word_mode, AX_REG);
13514   const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
13515 
13516   /* AL should only be live with sysv_abi.  */
13517   gcc_assert (!ix86_eax_live_at_start_p ());
13518   gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
13519 
13520   /* Setup RAX as the stub's base pointer.  We use stack_realign_offset rather
13521      we've actually realigned the stack or not.  */
13522   align = GET_MODE_ALIGNMENT (V4SFmode);
13523   addr = choose_baseaddr (frame.stack_realign_offset
13524 			  + xlogue.get_stub_ptr_offset (), &align, AX_REG);
13525   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
13526 
13527   emit_insn (gen_rtx_SET (rax, addr));
13528 
13529   /* Get the stub symbol.  */
13530   sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
13531 						  : XLOGUE_STUB_SAVE);
13532   RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
13533 
13534   for (i = 0; i < ncregs; ++i)
13535     {
13536       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
13537       rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
13538 			     r.regno);
13539       RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
13540     }
13541 
13542   gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
13543 
13544   insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
13545   RTX_FRAME_RELATED_P (insn) = true;
13546 }
13547 
13548 /* Expand the prologue into a bunch of separate insns.  */
13549 
13550 void
13551 ix86_expand_prologue (void)
13552 {
13553   struct machine_function *m = cfun->machine;
13554   rtx insn, t;
13555   HOST_WIDE_INT allocate;
13556   bool int_registers_saved;
13557   bool sse_registers_saved;
13558   bool save_stub_call_needed;
13559   rtx static_chain = NULL_RTX;
13560 
13561   if (ix86_function_naked (current_function_decl))
13562     return;
13563 
13564   ix86_finalize_stack_frame_flags ();
13565 
13566   /* DRAP should not coexist with stack_realign_fp */
13567   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
13568 
13569   memset (&m->fs, 0, sizeof (m->fs));
13570 
13571   /* Initialize CFA state for before the prologue.  */
13572   m->fs.cfa_reg = stack_pointer_rtx;
13573   m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
13574 
13575   /* Track SP offset to the CFA.  We continue tracking this after we've
13576      swapped the CFA register away from SP.  In the case of re-alignment
13577      this is fudged; we're interested to offsets within the local frame.  */
13578   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13579   m->fs.sp_valid = true;
13580   m->fs.sp_realigned = false;
13581 
13582   const struct ix86_frame &frame = cfun->machine->frame;
13583 
13584   if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
13585     {
13586       /* We should have already generated an error for any use of
13587          ms_hook on a nested function.  */
13588       gcc_checking_assert (!ix86_static_chain_on_stack);
13589 
13590       /* Check if profiling is active and we shall use profiling before
13591          prologue variant. If so sorry.  */
13592       if (crtl->profile && flag_fentry != 0)
13593         sorry ("ms_hook_prologue attribute isn%'t compatible "
13594 	       "with -mfentry for 32-bit");
13595 
13596       /* In ix86_asm_output_function_label we emitted:
13597 	 8b ff     movl.s %edi,%edi
13598 	 55        push   %ebp
13599 	 8b ec     movl.s %esp,%ebp
13600 
13601 	 This matches the hookable function prologue in Win32 API
13602 	 functions in Microsoft Windows XP Service Pack 2 and newer.
13603 	 Wine uses this to enable Windows apps to hook the Win32 API
13604 	 functions provided by Wine.
13605 
13606 	 What that means is that we've already set up the frame pointer.  */
13607 
13608       if (frame_pointer_needed
13609 	  && !(crtl->drap_reg && crtl->stack_realign_needed))
13610 	{
13611 	  rtx push, mov;
13612 
13613 	  /* We've decided to use the frame pointer already set up.
13614 	     Describe this to the unwinder by pretending that both
13615 	     push and mov insns happen right here.
13616 
13617 	     Putting the unwind info here at the end of the ms_hook
13618 	     is done so that we can make absolutely certain we get
13619 	     the required byte sequence at the start of the function,
13620 	     rather than relying on an assembler that can produce
13621 	     the exact encoding required.
13622 
13623 	     However it does mean (in the unpatched case) that we have
13624 	     a 1 insn window where the asynchronous unwind info is
13625 	     incorrect.  However, if we placed the unwind info at
13626 	     its correct location we would have incorrect unwind info
13627 	     in the patched case.  Which is probably all moot since
13628 	     I don't expect Wine generates dwarf2 unwind info for the
13629 	     system libraries that use this feature.  */
13630 
13631 	  insn = emit_insn (gen_blockage ());
13632 
13633 	  push = gen_push (hard_frame_pointer_rtx);
13634 	  mov = gen_rtx_SET (hard_frame_pointer_rtx,
13635 			     stack_pointer_rtx);
13636 	  RTX_FRAME_RELATED_P (push) = 1;
13637 	  RTX_FRAME_RELATED_P (mov) = 1;
13638 
13639 	  RTX_FRAME_RELATED_P (insn) = 1;
13640 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13641 			gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
13642 
13643 	  /* Note that gen_push incremented m->fs.cfa_offset, even
13644 	     though we didn't emit the push insn here.  */
13645 	  m->fs.cfa_reg = hard_frame_pointer_rtx;
13646 	  m->fs.fp_offset = m->fs.cfa_offset;
13647 	  m->fs.fp_valid = true;
13648 	}
13649       else
13650 	{
13651 	  /* The frame pointer is not needed so pop %ebp again.
13652 	     This leaves us with a pristine state.  */
13653 	  emit_insn (gen_pop (hard_frame_pointer_rtx));
13654 	}
13655     }
13656 
13657   /* The first insn of a function that accepts its static chain on the
13658      stack is to push the register that would be filled in by a direct
13659      call.  This insn will be skipped by the trampoline.  */
13660   else if (ix86_static_chain_on_stack)
13661     {
13662       static_chain = ix86_static_chain (cfun->decl, false);
13663       insn = emit_insn (gen_push (static_chain));
13664       emit_insn (gen_blockage ());
13665 
13666       /* We don't want to interpret this push insn as a register save,
13667 	 only as a stack adjustment.  The real copy of the register as
13668 	 a save will be done later, if needed.  */
13669       t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
13670       t = gen_rtx_SET (stack_pointer_rtx, t);
13671       add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
13672       RTX_FRAME_RELATED_P (insn) = 1;
13673     }
13674 
13675   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
13676      of DRAP is needed and stack realignment is really needed after reload */
13677   if (stack_realign_drap)
13678     {
13679       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13680 
13681       /* Can't use DRAP in interrupt function.  */
13682       if (cfun->machine->func_type != TYPE_NORMAL)
13683 	sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
13684 	       "in interrupt service routine.  This may be worked "
13685 	       "around by avoiding functions with aggregate return.");
13686 
13687       /* Only need to push parameter pointer reg if it is caller saved.  */
13688       if (!call_used_regs[REGNO (crtl->drap_reg)])
13689 	{
13690 	  /* Push arg pointer reg */
13691 	  insn = emit_insn (gen_push (crtl->drap_reg));
13692 	  RTX_FRAME_RELATED_P (insn) = 1;
13693 	}
13694 
13695       /* Grab the argument pointer.  */
13696       t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
13697       insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13698       RTX_FRAME_RELATED_P (insn) = 1;
13699       m->fs.cfa_reg = crtl->drap_reg;
13700       m->fs.cfa_offset = 0;
13701 
13702       /* Align the stack.  */
13703       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13704 					stack_pointer_rtx,
13705 					GEN_INT (-align_bytes)));
13706       RTX_FRAME_RELATED_P (insn) = 1;
13707 
13708       /* Replicate the return address on the stack so that return
13709 	 address can be reached via (argp - 1) slot.  This is needed
13710 	 to implement macro RETURN_ADDR_RTX and intrinsic function
13711 	 expand_builtin_return_addr etc.  */
13712       t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
13713       t = gen_frame_mem (word_mode, t);
13714       insn = emit_insn (gen_push (t));
13715       RTX_FRAME_RELATED_P (insn) = 1;
13716 
13717       /* For the purposes of frame and register save area addressing,
13718 	 we've started over with a new frame.  */
13719       m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
13720       m->fs.realigned = true;
13721 
13722       if (static_chain)
13723 	{
13724 	  /* Replicate static chain on the stack so that static chain
13725 	     can be reached via (argp - 2) slot.  This is needed for
13726 	     nested function with stack realignment.  */
13727 	  insn = emit_insn (gen_push (static_chain));
13728 	  RTX_FRAME_RELATED_P (insn) = 1;
13729 	}
13730     }
13731 
13732   int_registers_saved = (frame.nregs == 0);
13733   sse_registers_saved = (frame.nsseregs == 0);
13734   save_stub_call_needed = (m->call_ms2sysv);
13735   gcc_assert (sse_registers_saved || !save_stub_call_needed);
13736 
13737   if (frame_pointer_needed && !m->fs.fp_valid)
13738     {
13739       /* Note: AT&T enter does NOT have reversed args.  Enter is probably
13740          slower on all targets.  Also sdb didn't like it.  */
13741       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
13742       RTX_FRAME_RELATED_P (insn) = 1;
13743 
13744       /* Push registers now, before setting the frame pointer
13745 	 on SEH target.  */
13746       if (!int_registers_saved
13747 	  && TARGET_SEH
13748 	  && !frame.save_regs_using_mov)
13749 	{
13750 	  ix86_emit_save_regs ();
13751 	  int_registers_saved = true;
13752 	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13753 	}
13754 
13755       if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
13756 	{
13757 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
13758 	  RTX_FRAME_RELATED_P (insn) = 1;
13759 
13760 	  if (m->fs.cfa_reg == stack_pointer_rtx)
13761 	    m->fs.cfa_reg = hard_frame_pointer_rtx;
13762 	  m->fs.fp_offset = m->fs.sp_offset;
13763 	  m->fs.fp_valid = true;
13764 	}
13765     }
13766 
13767   if (!int_registers_saved)
13768     {
13769       /* If saving registers via PUSH, do so now.  */
13770       if (!frame.save_regs_using_mov)
13771 	{
13772 	  ix86_emit_save_regs ();
13773 	  int_registers_saved = true;
13774 	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
13775 	}
13776 
13777       /* When using red zone we may start register saving before allocating
13778 	 the stack frame saving one cycle of the prologue.  However, avoid
13779 	 doing this if we have to probe the stack; at least on x86_64 the
13780 	 stack probe can turn into a call that clobbers a red zone location. */
13781       else if (ix86_using_red_zone ()
13782 	       && (! TARGET_STACK_PROBE
13783 		   || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
13784 	{
13785 	  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13786 	  int_registers_saved = true;
13787 	}
13788     }
13789 
13790   if (stack_realign_fp)
13791     {
13792       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
13793       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
13794 
13795       /* Record last valid frame pointer offset.  */
13796       m->fs.sp_realigned_fp_last = frame.reg_save_offset;
13797 
13798       /* The computation of the size of the re-aligned stack frame means
13799 	 that we must allocate the size of the register save area before
13800 	 performing the actual alignment.  Otherwise we cannot guarantee
13801 	 that there's enough storage above the realignment point.  */
13802       allocate = frame.reg_save_offset - m->fs.sp_offset
13803 		 + frame.stack_realign_allocate;
13804       if (allocate)
13805         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13806 				   GEN_INT (-allocate), -1, false);
13807 
13808       /* Align the stack.  */
13809       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
13810 					stack_pointer_rtx,
13811 					GEN_INT (-align_bytes)));
13812       m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
13813       m->fs.sp_realigned_offset = m->fs.sp_offset
13814 					      - frame.stack_realign_allocate;
13815       /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
13816 	 Beyond this point, stack access should be done via choose_baseaddr or
13817 	 by using sp_valid_at and fp_valid_at to determine the correct base
13818 	 register.  Henceforth, any CFA offset should be thought of as logical
13819 	 and not physical.  */
13820       gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
13821       gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
13822       m->fs.sp_realigned = true;
13823 
13824       /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
13825 	 is needed to describe where a register is saved using a realigned
13826 	 stack pointer, so we need to invalidate the stack pointer for that
13827 	 target.  */
13828       if (TARGET_SEH)
13829 	m->fs.sp_valid = false;
13830 
13831       /* If SP offset is non-immediate after allocation of the stack frame,
13832 	 then emit SSE saves or stub call prior to allocating the rest of the
13833 	 stack frame.  This is less efficient for the out-of-line stub because
13834 	 we can't combine allocations across the call barrier, but it's better
13835 	 than using a scratch register.  */
13836       else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
13837 						   - m->fs.sp_realigned_offset),
13838 					  Pmode))
13839 	{
13840 	  if (!sse_registers_saved)
13841 	    {
13842 	      ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13843 	      sse_registers_saved = true;
13844 	    }
13845 	  else if (save_stub_call_needed)
13846 	    {
13847 	      ix86_emit_outlined_ms2sysv_save (frame);
13848 	      save_stub_call_needed = false;
13849 	    }
13850 	}
13851     }
13852 
13853   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
13854 
13855   if (flag_stack_usage_info)
13856     {
13857       /* We start to count from ARG_POINTER.  */
13858       HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
13859 
13860       /* If it was realigned, take into account the fake frame.  */
13861       if (stack_realign_drap)
13862 	{
13863 	  if (ix86_static_chain_on_stack)
13864 	    stack_size += UNITS_PER_WORD;
13865 
13866 	  if (!call_used_regs[REGNO (crtl->drap_reg)])
13867 	    stack_size += UNITS_PER_WORD;
13868 
13869 	  /* This over-estimates by 1 minimal-stack-alignment-unit but
13870 	     mitigates that by counting in the new return address slot.  */
13871 	  current_function_dynamic_stack_size
13872 	    += crtl->stack_alignment_needed / BITS_PER_UNIT;
13873 	}
13874 
13875       current_function_static_stack_size = stack_size;
13876     }
13877 
13878   /* On SEH target with very large frame size, allocate an area to save
13879      SSE registers (as the very large allocation won't be described).  */
13880   if (TARGET_SEH
13881       && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
13882       && !sse_registers_saved)
13883     {
13884       HOST_WIDE_INT sse_size =
13885 	frame.sse_reg_save_offset - frame.reg_save_offset;
13886 
13887       gcc_assert (int_registers_saved);
13888 
13889       /* No need to do stack checking as the area will be immediately
13890 	 written.  */
13891       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13892 			         GEN_INT (-sse_size), -1,
13893 				 m->fs.cfa_reg == stack_pointer_rtx);
13894       allocate -= sse_size;
13895       ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13896       sse_registers_saved = true;
13897     }
13898 
13899   /* The stack has already been decremented by the instruction calling us
13900      so probe if the size is non-negative to preserve the protection area.  */
13901   if (allocate >= 0
13902       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
13903 	  || flag_stack_clash_protection))
13904     {
13905       if (flag_stack_clash_protection)
13906 	{
13907 	  ix86_adjust_stack_and_probe_stack_clash (allocate,
13908 						   int_registers_saved);
13909 	  allocate = 0;
13910 	}
13911       else if (STACK_CHECK_MOVING_SP)
13912 	{
13913 	  if (!(crtl->is_leaf && !cfun->calls_alloca
13914 		&& allocate <= get_probe_interval ()))
13915 	    {
13916 	      ix86_adjust_stack_and_probe (allocate, int_registers_saved);
13917 	      allocate = 0;
13918 	    }
13919 	}
13920       else
13921 	{
13922 	  HOST_WIDE_INT size = allocate;
13923 
13924 	  if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
13925 	    size = 0x80000000 - get_stack_check_protect () - 1;
13926 
13927 	  if (TARGET_STACK_PROBE)
13928 	    {
13929 	      if (crtl->is_leaf && !cfun->calls_alloca)
13930 		{
13931 		  if (size > get_probe_interval ())
13932 		    ix86_emit_probe_stack_range (0, size, int_registers_saved);
13933 		}
13934 	      else
13935 		ix86_emit_probe_stack_range (0,
13936 					     size + get_stack_check_protect (),
13937 					     int_registers_saved);
13938 	    }
13939 	  else
13940 	    {
13941 	      if (crtl->is_leaf && !cfun->calls_alloca)
13942 		{
13943 		  if (size > get_probe_interval ()
13944 		      && size > get_stack_check_protect ())
13945 		    ix86_emit_probe_stack_range (get_stack_check_protect (),
13946 						 (size
13947 						  - get_stack_check_protect ()),
13948 						 int_registers_saved);
13949 		}
13950 	      else
13951 		ix86_emit_probe_stack_range (get_stack_check_protect (), size,
13952 					     int_registers_saved);
13953 	    }
13954 	}
13955     }
13956 
13957   if (allocate == 0)
13958     ;
13959   else if (!ix86_target_stack_probe ()
13960 	   || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
13961     {
13962       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13963 			         GEN_INT (-allocate), -1,
13964 			         m->fs.cfa_reg == stack_pointer_rtx);
13965     }
13966   else
13967     {
13968       rtx eax = gen_rtx_REG (Pmode, AX_REG);
13969       rtx r10 = NULL;
13970       rtx (*adjust_stack_insn)(rtx, rtx, rtx);
13971       const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
13972       bool eax_live = ix86_eax_live_at_start_p ();
13973       bool r10_live = false;
13974 
13975       if (TARGET_64BIT)
13976         r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
13977 
13978       if (eax_live)
13979 	{
13980 	  insn = emit_insn (gen_push (eax));
13981 	  allocate -= UNITS_PER_WORD;
13982 	  /* Note that SEH directives need to continue tracking the stack
13983 	     pointer even after the frame pointer has been set up.  */
13984 	  if (sp_is_cfa_reg || TARGET_SEH)
13985 	    {
13986 	      if (sp_is_cfa_reg)
13987 		m->fs.cfa_offset += UNITS_PER_WORD;
13988 	      RTX_FRAME_RELATED_P (insn) = 1;
13989 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13990 			    gen_rtx_SET (stack_pointer_rtx,
13991 					 plus_constant (Pmode, stack_pointer_rtx,
13992 							-UNITS_PER_WORD)));
13993 	    }
13994 	}
13995 
13996       if (r10_live)
13997 	{
13998 	  r10 = gen_rtx_REG (Pmode, R10_REG);
13999 	  insn = emit_insn (gen_push (r10));
14000 	  allocate -= UNITS_PER_WORD;
14001 	  if (sp_is_cfa_reg || TARGET_SEH)
14002 	    {
14003 	      if (sp_is_cfa_reg)
14004 		m->fs.cfa_offset += UNITS_PER_WORD;
14005 	      RTX_FRAME_RELATED_P (insn) = 1;
14006 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
14007 			    gen_rtx_SET (stack_pointer_rtx,
14008 					 plus_constant (Pmode, stack_pointer_rtx,
14009 							-UNITS_PER_WORD)));
14010 	    }
14011 	}
14012 
14013       emit_move_insn (eax, GEN_INT (allocate));
14014       emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
14015 
14016       /* Use the fact that AX still contains ALLOCATE.  */
14017       adjust_stack_insn = (Pmode == DImode
14018 			   ? gen_pro_epilogue_adjust_stack_di_sub
14019 			   : gen_pro_epilogue_adjust_stack_si_sub);
14020 
14021       insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
14022 					   stack_pointer_rtx, eax));
14023 
14024       if (sp_is_cfa_reg || TARGET_SEH)
14025 	{
14026 	  if (sp_is_cfa_reg)
14027 	    m->fs.cfa_offset += allocate;
14028 	  RTX_FRAME_RELATED_P (insn) = 1;
14029 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
14030 			gen_rtx_SET (stack_pointer_rtx,
14031 				     plus_constant (Pmode, stack_pointer_rtx,
14032 						    -allocate)));
14033 	}
14034       m->fs.sp_offset += allocate;
14035 
14036       /* Use stack_pointer_rtx for relative addressing so that code
14037 	 works for realigned stack, too.  */
14038       if (r10_live && eax_live)
14039         {
14040 	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
14041 	  emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
14042 			  gen_frame_mem (word_mode, t));
14043 	  t = plus_constant (Pmode, t, UNITS_PER_WORD);
14044 	  emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
14045 			  gen_frame_mem (word_mode, t));
14046 	}
14047       else if (eax_live || r10_live)
14048 	{
14049 	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
14050 	  emit_move_insn (gen_rtx_REG (word_mode,
14051 				       (eax_live ? AX_REG : R10_REG)),
14052 			  gen_frame_mem (word_mode, t));
14053 	}
14054     }
14055   gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
14056 
14057   /* If we havn't already set up the frame pointer, do so now.  */
14058   if (frame_pointer_needed && !m->fs.fp_valid)
14059     {
14060       insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
14061 			    GEN_INT (frame.stack_pointer_offset
14062 				     - frame.hard_frame_pointer_offset));
14063       insn = emit_insn (insn);
14064       RTX_FRAME_RELATED_P (insn) = 1;
14065       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
14066 
14067       if (m->fs.cfa_reg == stack_pointer_rtx)
14068 	m->fs.cfa_reg = hard_frame_pointer_rtx;
14069       m->fs.fp_offset = frame.hard_frame_pointer_offset;
14070       m->fs.fp_valid = true;
14071     }
14072 
14073   if (!int_registers_saved)
14074     ix86_emit_save_regs_using_mov (frame.reg_save_offset);
14075   if (!sse_registers_saved)
14076     ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
14077   else if (save_stub_call_needed)
14078     ix86_emit_outlined_ms2sysv_save (frame);
14079 
14080   /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
14081      in PROLOGUE.  */
14082   if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
14083     {
14084       rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
14085       insn = emit_insn (gen_set_got (pic));
14086       RTX_FRAME_RELATED_P (insn) = 1;
14087       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
14088       emit_insn (gen_prologue_use (pic));
14089       /* Deleting already emmitted SET_GOT if exist and allocated to
14090 	 REAL_PIC_OFFSET_TABLE_REGNUM.  */
14091       ix86_elim_entry_set_got (pic);
14092     }
14093 
14094   if (crtl->drap_reg && !crtl->stack_realign_needed)
14095     {
14096       /* vDRAP is setup but after reload it turns out stack realign
14097          isn't necessary, here we will emit prologue to setup DRAP
14098          without stack realign adjustment */
14099       t = choose_baseaddr (0, NULL);
14100       emit_insn (gen_rtx_SET (crtl->drap_reg, t));
14101     }
14102 
14103   /* Prevent instructions from being scheduled into register save push
14104      sequence when access to the redzone area is done through frame pointer.
14105      The offset between the frame pointer and the stack pointer is calculated
14106      relative to the value of the stack pointer at the end of the function
14107      prologue, and moving instructions that access redzone area via frame
14108      pointer inside push sequence violates this assumption.  */
14109   if (frame_pointer_needed && frame.red_zone_size)
14110     emit_insn (gen_memory_blockage ());
14111 
14112   /* SEH requires that the prologue end within 256 bytes of the start of
14113      the function.  Prevent instruction schedules that would extend that.
14114      Further, prevent alloca modifications to the stack pointer from being
14115      combined with prologue modifications.  */
14116   if (TARGET_SEH)
14117     emit_insn (gen_prologue_use (stack_pointer_rtx));
14118 }
14119 
14120 /* Emit code to restore REG using a POP insn.  */
14121 
14122 static void
14123 ix86_emit_restore_reg_using_pop (rtx reg)
14124 {
14125   struct machine_function *m = cfun->machine;
14126   rtx_insn *insn = emit_insn (gen_pop (reg));
14127 
14128   ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
14129   m->fs.sp_offset -= UNITS_PER_WORD;
14130 
14131   if (m->fs.cfa_reg == crtl->drap_reg
14132       && REGNO (reg) == REGNO (crtl->drap_reg))
14133     {
14134       /* Previously we'd represented the CFA as an expression
14135 	 like *(%ebp - 8).  We've just popped that value from
14136 	 the stack, which means we need to reset the CFA to
14137 	 the drap register.  This will remain until we restore
14138 	 the stack pointer.  */
14139       add_reg_note (insn, REG_CFA_DEF_CFA, reg);
14140       RTX_FRAME_RELATED_P (insn) = 1;
14141 
14142       /* This means that the DRAP register is valid for addressing too.  */
14143       m->fs.drap_valid = true;
14144       return;
14145     }
14146 
14147   if (m->fs.cfa_reg == stack_pointer_rtx)
14148     {
14149       rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14150       x = gen_rtx_SET (stack_pointer_rtx, x);
14151       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14152       RTX_FRAME_RELATED_P (insn) = 1;
14153 
14154       m->fs.cfa_offset -= UNITS_PER_WORD;
14155     }
14156 
14157   /* When the frame pointer is the CFA, and we pop it, we are
14158      swapping back to the stack pointer as the CFA.  This happens
14159      for stack frames that don't allocate other data, so we assume
14160      the stack pointer is now pointing at the return address, i.e.
14161      the function entry state, which makes the offset be 1 word.  */
14162   if (reg == hard_frame_pointer_rtx)
14163     {
14164       m->fs.fp_valid = false;
14165       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
14166 	{
14167 	  m->fs.cfa_reg = stack_pointer_rtx;
14168 	  m->fs.cfa_offset -= UNITS_PER_WORD;
14169 
14170 	  add_reg_note (insn, REG_CFA_DEF_CFA,
14171 			gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14172 				      GEN_INT (m->fs.cfa_offset)));
14173 	  RTX_FRAME_RELATED_P (insn) = 1;
14174 	}
14175     }
14176 }
14177 
14178 /* Emit code to restore saved registers using POP insns.  */
14179 
14180 static void
14181 ix86_emit_restore_regs_using_pop (void)
14182 {
14183   unsigned int regno;
14184 
14185   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14186     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
14187       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
14188 }
14189 
14190 /* Emit code and notes for the LEAVE instruction.  If insn is non-null,
14191    omits the emit and only attaches the notes.  */
14192 
14193 static void
14194 ix86_emit_leave (rtx_insn *insn)
14195 {
14196   struct machine_function *m = cfun->machine;
14197   if (!insn)
14198     insn = emit_insn (ix86_gen_leave ());
14199 
14200   ix86_add_queued_cfa_restore_notes (insn);
14201 
14202   gcc_assert (m->fs.fp_valid);
14203   m->fs.sp_valid = true;
14204   m->fs.sp_realigned = false;
14205   m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
14206   m->fs.fp_valid = false;
14207 
14208   if (m->fs.cfa_reg == hard_frame_pointer_rtx)
14209     {
14210       m->fs.cfa_reg = stack_pointer_rtx;
14211       m->fs.cfa_offset = m->fs.sp_offset;
14212 
14213       add_reg_note (insn, REG_CFA_DEF_CFA,
14214 		    plus_constant (Pmode, stack_pointer_rtx,
14215 				   m->fs.sp_offset));
14216       RTX_FRAME_RELATED_P (insn) = 1;
14217     }
14218   ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
14219 			     m->fs.fp_offset);
14220 }
14221 
14222 /* Emit code to restore saved registers using MOV insns.
14223    First register is restored from CFA - CFA_OFFSET.  */
14224 static void
14225 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
14226 				  bool maybe_eh_return)
14227 {
14228   struct machine_function *m = cfun->machine;
14229   unsigned int regno;
14230 
14231   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14232     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
14233       {
14234 	rtx reg = gen_rtx_REG (word_mode, regno);
14235 	rtx mem;
14236 	rtx_insn *insn;
14237 
14238 	mem = choose_baseaddr (cfa_offset, NULL);
14239 	mem = gen_frame_mem (word_mode, mem);
14240 	insn = emit_move_insn (reg, mem);
14241 
14242         if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
14243 	  {
14244 	    /* Previously we'd represented the CFA as an expression
14245 	       like *(%ebp - 8).  We've just popped that value from
14246 	       the stack, which means we need to reset the CFA to
14247 	       the drap register.  This will remain until we restore
14248 	       the stack pointer.  */
14249 	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
14250 	    RTX_FRAME_RELATED_P (insn) = 1;
14251 
14252 	    /* This means that the DRAP register is valid for addressing.  */
14253 	    m->fs.drap_valid = true;
14254 	  }
14255 	else
14256 	  ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
14257 
14258 	cfa_offset -= UNITS_PER_WORD;
14259       }
14260 }
14261 
14262 /* Emit code to restore saved registers using MOV insns.
14263    First register is restored from CFA - CFA_OFFSET.  */
14264 static void
14265 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
14266 				      bool maybe_eh_return)
14267 {
14268   unsigned int regno;
14269 
14270   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
14271     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
14272       {
14273 	rtx reg = gen_rtx_REG (V4SFmode, regno);
14274 	rtx mem;
14275 	unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
14276 
14277 	mem = choose_baseaddr (cfa_offset, &align);
14278 	mem = gen_rtx_MEM (V4SFmode, mem);
14279 
14280 	/* The location aligment depends upon the base register.  */
14281 	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
14282 	gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
14283 	set_mem_align (mem, align);
14284 	emit_insn (gen_rtx_SET (reg, mem));
14285 
14286 	ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
14287 
14288 	cfa_offset -= GET_MODE_SIZE (V4SFmode);
14289       }
14290 }
14291 
14292 static void
14293 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
14294 				  bool use_call, int style)
14295 {
14296   struct machine_function *m = cfun->machine;
14297   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
14298 			  + m->call_ms2sysv_extra_regs;
14299   rtvec v;
14300   unsigned int elems_needed, align, i, vi = 0;
14301   rtx_insn *insn;
14302   rtx sym, tmp;
14303   rtx rsi = gen_rtx_REG (word_mode, SI_REG);
14304   rtx r10 = NULL_RTX;
14305   const struct xlogue_layout &xlogue = xlogue_layout::get_instance ();
14306   HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
14307   HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
14308   rtx rsi_frame_load = NULL_RTX;
14309   HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
14310   enum xlogue_stub stub;
14311 
14312   gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
14313 
14314   /* If using a realigned stack, we should never start with padding.  */
14315   gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
14316 
14317   /* Setup RSI as the stub's base pointer.  */
14318   align = GET_MODE_ALIGNMENT (V4SFmode);
14319   tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
14320   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
14321 
14322   emit_insn (gen_rtx_SET (rsi, tmp));
14323 
14324   /* Get a symbol for the stub.  */
14325   if (frame_pointer_needed)
14326     stub = use_call ? XLOGUE_STUB_RESTORE_HFP
14327 		    : XLOGUE_STUB_RESTORE_HFP_TAIL;
14328   else
14329     stub = use_call ? XLOGUE_STUB_RESTORE
14330 		    : XLOGUE_STUB_RESTORE_TAIL;
14331   sym = xlogue.get_stub_rtx (stub);
14332 
14333   elems_needed = ncregs;
14334   if (use_call)
14335     elems_needed += 1;
14336   else
14337     elems_needed += frame_pointer_needed ? 5 : 3;
14338   v = rtvec_alloc (elems_needed);
14339 
14340   /* We call the epilogue stub when we need to pop incoming args or we are
14341      doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
14342      epilogue stub and it is the tail-call.  */
14343   if (use_call)
14344       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
14345   else
14346     {
14347       RTVEC_ELT (v, vi++) = ret_rtx;
14348       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
14349       if (frame_pointer_needed)
14350 	{
14351 	  rtx rbp = gen_rtx_REG (DImode, BP_REG);
14352 	  gcc_assert (m->fs.fp_valid);
14353 	  gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
14354 
14355 	  tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8));
14356 	  RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
14357 	  RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
14358 	  tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
14359 	  RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
14360 	}
14361       else
14362 	{
14363 	  /* If no hard frame pointer, we set R10 to the SP restore value.  */
14364 	  gcc_assert (!m->fs.fp_valid);
14365 	  gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
14366 	  gcc_assert (m->fs.sp_valid);
14367 
14368 	  r10 = gen_rtx_REG (DImode, R10_REG);
14369 	  tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset));
14370 	  emit_insn (gen_rtx_SET (r10, tmp));
14371 
14372 	  RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
14373 	}
14374     }
14375 
14376   /* Generate frame load insns and restore notes.  */
14377   for (i = 0; i < ncregs; ++i)
14378     {
14379       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
14380       machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
14381       rtx reg, frame_load;
14382 
14383       reg = gen_rtx_REG (mode, r.regno);
14384       frame_load = gen_frame_load (reg, rsi, r.offset);
14385 
14386       /* Save RSI frame load insn & note to add last.  */
14387       if (r.regno == SI_REG)
14388 	{
14389 	  gcc_assert (!rsi_frame_load);
14390 	  rsi_frame_load = frame_load;
14391 	  rsi_restore_offset = r.offset;
14392 	}
14393       else
14394 	{
14395 	  RTVEC_ELT (v, vi++) = frame_load;
14396 	  ix86_add_cfa_restore_note (NULL, reg, r.offset);
14397 	}
14398     }
14399 
14400   /* Add RSI frame load & restore note at the end.  */
14401   gcc_assert (rsi_frame_load);
14402   gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
14403   RTVEC_ELT (v, vi++) = rsi_frame_load;
14404   ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
14405 			     rsi_restore_offset);
14406 
14407   /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
14408   if (!use_call && !frame_pointer_needed)
14409     {
14410       gcc_assert (m->fs.sp_valid);
14411       gcc_assert (!m->fs.sp_realigned);
14412 
14413       /* At this point, R10 should point to frame.stack_realign_offset.  */
14414       if (m->fs.cfa_reg == stack_pointer_rtx)
14415 	m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
14416       m->fs.sp_offset = frame.stack_realign_offset;
14417     }
14418 
14419   gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
14420   tmp = gen_rtx_PARALLEL (VOIDmode, v);
14421   if (use_call)
14422       insn = emit_insn (tmp);
14423   else
14424     {
14425       insn = emit_jump_insn (tmp);
14426       JUMP_LABEL (insn) = ret_rtx;
14427 
14428       if (frame_pointer_needed)
14429 	ix86_emit_leave (insn);
14430       else
14431 	{
14432 	  /* Need CFA adjust note.  */
14433 	  tmp = gen_rtx_SET (stack_pointer_rtx, r10);
14434 	  add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
14435 	}
14436     }
14437 
14438   RTX_FRAME_RELATED_P (insn) = true;
14439   ix86_add_queued_cfa_restore_notes (insn);
14440 
14441   /* If we're not doing a tail-call, we need to adjust the stack.  */
14442   if (use_call && m->fs.sp_valid)
14443     {
14444       HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
14445       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14446 				GEN_INT (dealloc), style,
14447 				m->fs.cfa_reg == stack_pointer_rtx);
14448     }
14449 }
14450 
14451 /* Restore function stack, frame, and registers.  */
14452 
14453 void
14454 ix86_expand_epilogue (int style)
14455 {
14456   struct machine_function *m = cfun->machine;
14457   struct machine_frame_state frame_state_save = m->fs;
14458   bool restore_regs_via_mov;
14459   bool using_drap;
14460   bool restore_stub_is_tail = false;
14461 
14462   if (ix86_function_naked (current_function_decl))
14463     {
14464       /* The program should not reach this point.  */
14465       emit_insn (gen_ud2 ());
14466       return;
14467     }
14468 
14469   ix86_finalize_stack_frame_flags ();
14470   const struct ix86_frame &frame = cfun->machine->frame;
14471 
14472   m->fs.sp_realigned = stack_realign_fp;
14473   m->fs.sp_valid = stack_realign_fp
14474 		   || !frame_pointer_needed
14475 		   || crtl->sp_is_unchanging;
14476   gcc_assert (!m->fs.sp_valid
14477 	      || m->fs.sp_offset == frame.stack_pointer_offset);
14478 
14479   /* The FP must be valid if the frame pointer is present.  */
14480   gcc_assert (frame_pointer_needed == m->fs.fp_valid);
14481   gcc_assert (!m->fs.fp_valid
14482 	      || m->fs.fp_offset == frame.hard_frame_pointer_offset);
14483 
14484   /* We must have *some* valid pointer to the stack frame.  */
14485   gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
14486 
14487   /* The DRAP is never valid at this point.  */
14488   gcc_assert (!m->fs.drap_valid);
14489 
14490   /* See the comment about red zone and frame
14491      pointer usage in ix86_expand_prologue.  */
14492   if (frame_pointer_needed && frame.red_zone_size)
14493     emit_insn (gen_memory_blockage ());
14494 
14495   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
14496   gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
14497 
14498   /* Determine the CFA offset of the end of the red-zone.  */
14499   m->fs.red_zone_offset = 0;
14500   if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
14501     {
14502       /* The red-zone begins below return address and error code in
14503 	 exception handler.  */
14504       m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
14505 
14506       /* When the register save area is in the aligned portion of
14507          the stack, determine the maximum runtime displacement that
14508 	 matches up with the aligned frame.  */
14509       if (stack_realign_drap)
14510 	m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
14511 				  + UNITS_PER_WORD);
14512     }
14513 
14514   HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
14515 
14516   /* Special care must be taken for the normal return case of a function
14517      using eh_return: the eax and edx registers are marked as saved, but
14518      not restored along this path.  Adjust the save location to match.  */
14519   if (crtl->calls_eh_return && style != 2)
14520     reg_save_offset -= 2 * UNITS_PER_WORD;
14521 
14522   /* EH_RETURN requires the use of moves to function properly.  */
14523   if (crtl->calls_eh_return)
14524     restore_regs_via_mov = true;
14525   /* SEH requires the use of pops to identify the epilogue.  */
14526   else if (TARGET_SEH)
14527     restore_regs_via_mov = false;
14528   /* If we're only restoring one register and sp cannot be used then
14529      using a move instruction to restore the register since it's
14530      less work than reloading sp and popping the register.  */
14531   else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
14532     restore_regs_via_mov = true;
14533   else if (TARGET_EPILOGUE_USING_MOVE
14534 	   && cfun->machine->use_fast_prologue_epilogue
14535 	   && (frame.nregs > 1
14536 	       || m->fs.sp_offset != reg_save_offset))
14537     restore_regs_via_mov = true;
14538   else if (frame_pointer_needed
14539 	   && !frame.nregs
14540 	   && m->fs.sp_offset != reg_save_offset)
14541     restore_regs_via_mov = true;
14542   else if (frame_pointer_needed
14543 	   && TARGET_USE_LEAVE
14544 	   && cfun->machine->use_fast_prologue_epilogue
14545 	   && frame.nregs == 1)
14546     restore_regs_via_mov = true;
14547   else
14548     restore_regs_via_mov = false;
14549 
14550   if (restore_regs_via_mov || frame.nsseregs)
14551     {
14552       /* Ensure that the entire register save area is addressable via
14553 	 the stack pointer, if we will restore SSE regs via sp.  */
14554       if (TARGET_64BIT
14555 	  && m->fs.sp_offset > 0x7fffffff
14556 	  && sp_valid_at (frame.stack_realign_offset + 1)
14557 	  && (frame.nsseregs + frame.nregs) != 0)
14558 	{
14559 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14560 				     GEN_INT (m->fs.sp_offset
14561 					      - frame.sse_reg_save_offset),
14562 				     style,
14563 				     m->fs.cfa_reg == stack_pointer_rtx);
14564 	}
14565     }
14566 
14567   /* If there are any SSE registers to restore, then we have to do it
14568      via moves, since there's obviously no pop for SSE regs.  */
14569   if (frame.nsseregs)
14570     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
14571 					  style == 2);
14572 
14573   if (m->call_ms2sysv)
14574     {
14575       int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
14576 
14577       /* We cannot use a tail-call for the stub if:
14578 	 1. We have to pop incoming args,
14579 	 2. We have additional int regs to restore, or
14580 	 3. A sibling call will be the tail-call, or
14581 	 4. We are emitting an eh_return_internal epilogue.
14582 
14583 	 TODO: Item 4 has not yet tested!
14584 
14585 	 If any of the above are true, we will call the stub rather than
14586 	 jump to it.  */
14587       restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
14588       ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
14589     }
14590 
14591   /* If using out-of-line stub that is a tail-call, then...*/
14592   if (m->call_ms2sysv && restore_stub_is_tail)
14593     {
14594       /* TODO: parinoid tests. (remove eventually)  */
14595       gcc_assert (m->fs.sp_valid);
14596       gcc_assert (!m->fs.sp_realigned);
14597       gcc_assert (!m->fs.fp_valid);
14598       gcc_assert (!m->fs.realigned);
14599       gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
14600       gcc_assert (!crtl->drap_reg);
14601       gcc_assert (!frame.nregs);
14602     }
14603   else if (restore_regs_via_mov)
14604     {
14605       rtx t;
14606 
14607       if (frame.nregs)
14608 	ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
14609 
14610       /* eh_return epilogues need %ecx added to the stack pointer.  */
14611       if (style == 2)
14612 	{
14613 	  rtx sa = EH_RETURN_STACKADJ_RTX;
14614 	  rtx_insn *insn;
14615 
14616 	  /* %ecx can't be used for both DRAP register and eh_return.  */
14617 	  if (crtl->drap_reg)
14618 	    gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
14619 
14620 	  /* regparm nested functions don't work with eh_return.  */
14621 	  gcc_assert (!ix86_static_chain_on_stack);
14622 
14623 	  if (frame_pointer_needed)
14624 	    {
14625 	      t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
14626 	      t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
14627 	      emit_insn (gen_rtx_SET (sa, t));
14628 
14629 	      t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
14630 	      insn = emit_move_insn (hard_frame_pointer_rtx, t);
14631 
14632 	      /* Note that we use SA as a temporary CFA, as the return
14633 		 address is at the proper place relative to it.  We
14634 		 pretend this happens at the FP restore insn because
14635 		 prior to this insn the FP would be stored at the wrong
14636 		 offset relative to SA, and after this insn we have no
14637 		 other reasonable register to use for the CFA.  We don't
14638 		 bother resetting the CFA to the SP for the duration of
14639 		 the return insn, unless the control flow instrumentation
14640 		 is done.  In this case the SP is used later and we have
14641 		 to reset CFA to SP.  */
14642 	      add_reg_note (insn, REG_CFA_DEF_CFA,
14643 			    plus_constant (Pmode, sa, UNITS_PER_WORD));
14644 	      ix86_add_queued_cfa_restore_notes (insn);
14645 	      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
14646 	      RTX_FRAME_RELATED_P (insn) = 1;
14647 
14648 	      m->fs.cfa_reg = sa;
14649 	      m->fs.cfa_offset = UNITS_PER_WORD;
14650 	      m->fs.fp_valid = false;
14651 
14652 	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
14653 					 const0_rtx, style,
14654 					 flag_cf_protection);
14655 	    }
14656 	  else
14657 	    {
14658 	      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
14659 	      t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
14660 	      insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
14661 	      ix86_add_queued_cfa_restore_notes (insn);
14662 
14663 	      gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
14664 	      if (m->fs.cfa_offset != UNITS_PER_WORD)
14665 		{
14666 		  m->fs.cfa_offset = UNITS_PER_WORD;
14667 		  add_reg_note (insn, REG_CFA_DEF_CFA,
14668 				plus_constant (Pmode, stack_pointer_rtx,
14669 					       UNITS_PER_WORD));
14670 		  RTX_FRAME_RELATED_P (insn) = 1;
14671 		}
14672 	    }
14673 	  m->fs.sp_offset = UNITS_PER_WORD;
14674 	  m->fs.sp_valid = true;
14675 	  m->fs.sp_realigned = false;
14676 	}
14677     }
14678   else
14679     {
14680       /* SEH requires that the function end with (1) a stack adjustment
14681 	 if necessary, (2) a sequence of pops, and (3) a return or
14682 	 jump instruction.  Prevent insns from the function body from
14683 	 being scheduled into this sequence.  */
14684       if (TARGET_SEH)
14685 	{
14686 	  /* Prevent a catch region from being adjacent to the standard
14687 	     epilogue sequence.  Unfortunately neither crtl->uses_eh_lsda
14688 	     nor several other flags that would be interesting to test are
14689 	     set up yet.  */
14690 	  if (flag_non_call_exceptions)
14691 	    emit_insn (gen_nops (const1_rtx));
14692 	  else
14693 	    emit_insn (gen_blockage ());
14694 	}
14695 
14696       /* First step is to deallocate the stack frame so that we can
14697 	 pop the registers.  If the stack pointer was realigned, it needs
14698 	 to be restored now.  Also do it on SEH target for very large
14699 	 frame as the emitted instructions aren't allowed by the ABI
14700 	 in epilogues.  */
14701       if (!m->fs.sp_valid || m->fs.sp_realigned
14702  	  || (TARGET_SEH
14703 	      && (m->fs.sp_offset - reg_save_offset
14704 		  >= SEH_MAX_FRAME_SIZE)))
14705 	{
14706 	  pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
14707 				     GEN_INT (m->fs.fp_offset
14708 					      - reg_save_offset),
14709 				     style, false);
14710 	}
14711       else if (m->fs.sp_offset != reg_save_offset)
14712 	{
14713 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14714 				     GEN_INT (m->fs.sp_offset
14715 					      - reg_save_offset),
14716 				     style,
14717 				     m->fs.cfa_reg == stack_pointer_rtx);
14718 	}
14719 
14720       ix86_emit_restore_regs_using_pop ();
14721     }
14722 
14723   /* If we used a stack pointer and haven't already got rid of it,
14724      then do so now.  */
14725   if (m->fs.fp_valid)
14726     {
14727       /* If the stack pointer is valid and pointing at the frame
14728 	 pointer store address, then we only need a pop.  */
14729       if (sp_valid_at (frame.hfp_save_offset)
14730 	  && m->fs.sp_offset == frame.hfp_save_offset)
14731 	ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14732       /* Leave results in shorter dependency chains on CPUs that are
14733 	 able to grok it fast.  */
14734       else if (TARGET_USE_LEAVE
14735 	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
14736 	       || !cfun->machine->use_fast_prologue_epilogue)
14737 	ix86_emit_leave (NULL);
14738       else
14739         {
14740 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
14741 				     hard_frame_pointer_rtx,
14742 				     const0_rtx, style, !using_drap);
14743 	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
14744         }
14745     }
14746 
14747   if (using_drap)
14748     {
14749       int param_ptr_offset = UNITS_PER_WORD;
14750       rtx_insn *insn;
14751 
14752       gcc_assert (stack_realign_drap);
14753 
14754       if (ix86_static_chain_on_stack)
14755 	param_ptr_offset += UNITS_PER_WORD;
14756       if (!call_used_regs[REGNO (crtl->drap_reg)])
14757 	param_ptr_offset += UNITS_PER_WORD;
14758 
14759       insn = emit_insn (gen_rtx_SET
14760 			(stack_pointer_rtx,
14761 			 gen_rtx_PLUS (Pmode,
14762 				       crtl->drap_reg,
14763 				       GEN_INT (-param_ptr_offset))));
14764       m->fs.cfa_reg = stack_pointer_rtx;
14765       m->fs.cfa_offset = param_ptr_offset;
14766       m->fs.sp_offset = param_ptr_offset;
14767       m->fs.realigned = false;
14768 
14769       add_reg_note (insn, REG_CFA_DEF_CFA,
14770 		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14771 				  GEN_INT (param_ptr_offset)));
14772       RTX_FRAME_RELATED_P (insn) = 1;
14773 
14774       if (!call_used_regs[REGNO (crtl->drap_reg)])
14775 	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
14776     }
14777 
14778   /* At this point the stack pointer must be valid, and we must have
14779      restored all of the registers.  We may not have deallocated the
14780      entire stack frame.  We've delayed this until now because it may
14781      be possible to merge the local stack deallocation with the
14782      deallocation forced by ix86_static_chain_on_stack.   */
14783   gcc_assert (m->fs.sp_valid);
14784   gcc_assert (!m->fs.sp_realigned);
14785   gcc_assert (!m->fs.fp_valid);
14786   gcc_assert (!m->fs.realigned);
14787   if (m->fs.sp_offset != UNITS_PER_WORD)
14788     {
14789       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14790 				 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
14791 				 style, true);
14792     }
14793   else
14794     ix86_add_queued_cfa_restore_notes (get_last_insn ());
14795 
14796   /* Sibcall epilogues don't want a return instruction.  */
14797   if (style == 0)
14798     {
14799       m->fs = frame_state_save;
14800       return;
14801     }
14802 
14803   if (cfun->machine->func_type != TYPE_NORMAL)
14804     emit_jump_insn (gen_interrupt_return ());
14805   else if (crtl->args.pops_args && crtl->args.size)
14806     {
14807       rtx popc = GEN_INT (crtl->args.pops_args);
14808 
14809       /* i386 can only pop 64K bytes.  If asked to pop more, pop return
14810 	 address, do explicit add, and jump indirectly to the caller.  */
14811 
14812       if (crtl->args.pops_args >= 65536)
14813 	{
14814 	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
14815 	  rtx_insn *insn;
14816 
14817 	  /* There is no "pascal" calling convention in any 64bit ABI.  */
14818 	  gcc_assert (!TARGET_64BIT);
14819 
14820 	  insn = emit_insn (gen_pop (ecx));
14821 	  m->fs.cfa_offset -= UNITS_PER_WORD;
14822 	  m->fs.sp_offset -= UNITS_PER_WORD;
14823 
14824 	  rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14825 	  x = gen_rtx_SET (stack_pointer_rtx, x);
14826 	  add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14827 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14828 	  RTX_FRAME_RELATED_P (insn) = 1;
14829 
14830 	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
14831 				     popc, -1, true);
14832 	  emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14833 	}
14834       else
14835 	emit_jump_insn (gen_simple_return_pop_internal (popc));
14836     }
14837   else if (!m->call_ms2sysv || !restore_stub_is_tail)
14838     {
14839       /* In case of return from EH a simple return cannot be used
14840 	 as a return address will be compared with a shadow stack
14841 	 return address.  Use indirect jump instead.  */
14842       if (style == 2 && flag_cf_protection)
14843 	{
14844 	  /* Register used in indirect jump must be in word_mode.  But
14845 	     Pmode may not be the same as word_mode for x32.  */
14846 	  rtx ecx = gen_rtx_REG (word_mode, CX_REG);
14847 	  rtx_insn *insn;
14848 
14849 	  insn = emit_insn (gen_pop (ecx));
14850 	  m->fs.cfa_offset -= UNITS_PER_WORD;
14851 	  m->fs.sp_offset -= UNITS_PER_WORD;
14852 
14853 	  rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
14854 	  x = gen_rtx_SET (stack_pointer_rtx, x);
14855 	  add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
14856 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
14857 	  RTX_FRAME_RELATED_P (insn) = 1;
14858 
14859 	  emit_jump_insn (gen_simple_return_indirect_internal (ecx));
14860 	}
14861       else
14862 	emit_jump_insn (gen_simple_return_internal ());
14863     }
14864 
14865   /* Restore the state back to the state from the prologue,
14866      so that it's correct for the next epilogue.  */
14867   m->fs = frame_state_save;
14868 }
14869 
14870 /* Reset from the function's potential modifications.  */
14871 
14872 static void
14873 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
14874 {
14875   if (pic_offset_table_rtx
14876       && !ix86_use_pseudo_pic_reg ())
14877     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
14878 
14879   if (TARGET_MACHO)
14880     {
14881       rtx_insn *insn = get_last_insn ();
14882       rtx_insn *deleted_debug_label = NULL;
14883 
14884       /* Mach-O doesn't support labels at the end of objects, so if
14885          it looks like we might want one, take special action.
14886         First, collect any sequence of deleted debug labels.  */
14887       while (insn
14888 	     && NOTE_P (insn)
14889 	     && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
14890 	{
14891 	  /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
14892 	     notes only, instead set their CODE_LABEL_NUMBER to -1,
14893 	     otherwise there would be code generation differences
14894 	     in between -g and -g0.  */
14895 	  if (NOTE_P (insn) && NOTE_KIND (insn)
14896 	      == NOTE_INSN_DELETED_DEBUG_LABEL)
14897 	    deleted_debug_label = insn;
14898 	  insn = PREV_INSN (insn);
14899 	}
14900 
14901       /* If we have:
14902 	 label:
14903 	    barrier
14904 	  then this needs to be detected, so skip past the barrier.  */
14905 
14906       if (insn && BARRIER_P (insn))
14907 	insn = PREV_INSN (insn);
14908 
14909       /* Up to now we've only seen notes or barriers.  */
14910       if (insn)
14911 	{
14912 	  if (LABEL_P (insn)
14913 	      || (NOTE_P (insn)
14914 		  && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
14915 	    /* Trailing label.  */
14916 	    fputs ("\tnop\n", file);
14917 	  else if (cfun && ! cfun->is_thunk)
14918 	    {
14919 	      /* See if we have a completely empty function body, skipping
14920 	         the special case of the picbase thunk emitted as asm.  */
14921 	      while (insn && ! INSN_P (insn))
14922 		insn = PREV_INSN (insn);
14923 	      /* If we don't find any insns, we've got an empty function body;
14924 		 I.e. completely empty - without a return or branch.  This is
14925 		 taken as the case where a function body has been removed
14926 		 because it contains an inline __builtin_unreachable().  GCC
14927 		 declares that reaching __builtin_unreachable() means UB so
14928 		 we're not obliged to do anything special; however, we want
14929 		 non-zero-sized function bodies.  To meet this, and help the
14930 		 user out, let's trap the case.  */
14931 	      if (insn == NULL)
14932 		fputs ("\tud2\n", file);
14933 	    }
14934 	}
14935       else if (deleted_debug_label)
14936 	for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
14937 	  if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
14938 	    CODE_LABEL_NUMBER (insn) = -1;
14939     }
14940 }
14941 
14942 /* Return a scratch register to use in the split stack prologue.  The
14943    split stack prologue is used for -fsplit-stack.  It is the first
14944    instructions in the function, even before the regular prologue.
14945    The scratch register can be any caller-saved register which is not
14946    used for parameters or for the static chain.  */
14947 
14948 static unsigned int
14949 split_stack_prologue_scratch_regno (void)
14950 {
14951   if (TARGET_64BIT)
14952     return R11_REG;
14953   else
14954     {
14955       bool is_fastcall, is_thiscall;
14956       int regparm;
14957 
14958       is_fastcall = (lookup_attribute ("fastcall",
14959 				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14960 		     != NULL);
14961       is_thiscall = (lookup_attribute ("thiscall",
14962 				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
14963 		     != NULL);
14964       regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
14965 
14966       if (is_fastcall)
14967 	{
14968 	  if (DECL_STATIC_CHAIN (cfun->decl))
14969 	    {
14970 	      sorry ("-fsplit-stack does not support fastcall with "
14971 		     "nested function");
14972 	      return INVALID_REGNUM;
14973 	    }
14974 	  return AX_REG;
14975 	}
14976       else if (is_thiscall)
14977         {
14978 	  if (!DECL_STATIC_CHAIN (cfun->decl))
14979 	    return DX_REG;
14980 	  return AX_REG;
14981 	}
14982       else if (regparm < 3)
14983 	{
14984 	  if (!DECL_STATIC_CHAIN (cfun->decl))
14985 	    return CX_REG;
14986 	  else
14987 	    {
14988 	      if (regparm >= 2)
14989 		{
14990 		  sorry ("-fsplit-stack does not support 2 register "
14991 			 "parameters for a nested function");
14992 		  return INVALID_REGNUM;
14993 		}
14994 	      return DX_REG;
14995 	    }
14996 	}
14997       else
14998 	{
14999 	  /* FIXME: We could make this work by pushing a register
15000 	     around the addition and comparison.  */
15001 	  sorry ("-fsplit-stack does not support 3 register parameters");
15002 	  return INVALID_REGNUM;
15003 	}
15004     }
15005 }
15006 
15007 /* A SYMBOL_REF for the function which allocates new stackspace for
15008    -fsplit-stack.  */
15009 
15010 static GTY(()) rtx split_stack_fn;
15011 
15012 /* A SYMBOL_REF for the more stack function when using the large
15013    model.  */
15014 
15015 static GTY(()) rtx split_stack_fn_large;
15016 
15017 /* Return location of the stack guard value in the TLS block.  */
15018 
15019 rtx
15020 ix86_split_stack_guard (void)
15021 {
15022   int offset;
15023   addr_space_t as = DEFAULT_TLS_SEG_REG;
15024   rtx r;
15025 
15026   gcc_assert (flag_split_stack);
15027 
15028 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15029   offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15030 #else
15031   gcc_unreachable ();
15032 #endif
15033 
15034   r = GEN_INT (offset);
15035   r = gen_const_mem (Pmode, r);
15036   set_mem_addr_space (r, as);
15037 
15038   return r;
15039 }
15040 
15041 /* Handle -fsplit-stack.  These are the first instructions in the
15042    function, even before the regular prologue.  */
15043 
15044 void
15045 ix86_expand_split_stack_prologue (void)
15046 {
15047   HOST_WIDE_INT allocate;
15048   unsigned HOST_WIDE_INT args_size;
15049   rtx_code_label *label;
15050   rtx limit, current, allocate_rtx, call_insn, call_fusage;
15051   rtx scratch_reg = NULL_RTX;
15052   rtx_code_label *varargs_label = NULL;
15053   rtx fn;
15054 
15055   gcc_assert (flag_split_stack && reload_completed);
15056 
15057   ix86_finalize_stack_frame_flags ();
15058   struct ix86_frame &frame = cfun->machine->frame;
15059   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
15060 
15061   /* This is the label we will branch to if we have enough stack
15062      space.  We expect the basic block reordering pass to reverse this
15063      branch if optimizing, so that we branch in the unlikely case.  */
15064   label = gen_label_rtx ();
15065 
15066   /* We need to compare the stack pointer minus the frame size with
15067      the stack boundary in the TCB.  The stack boundary always gives
15068      us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
15069      can compare directly.  Otherwise we need to do an addition.  */
15070 
15071   limit = ix86_split_stack_guard ();
15072 
15073   if (allocate < SPLIT_STACK_AVAILABLE)
15074     current = stack_pointer_rtx;
15075   else
15076     {
15077       unsigned int scratch_regno;
15078       rtx offset;
15079 
15080       /* We need a scratch register to hold the stack pointer minus
15081 	 the required frame size.  Since this is the very start of the
15082 	 function, the scratch register can be any caller-saved
15083 	 register which is not used for parameters.  */
15084       offset = GEN_INT (- allocate);
15085       scratch_regno = split_stack_prologue_scratch_regno ();
15086       if (scratch_regno == INVALID_REGNUM)
15087 	return;
15088       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
15089       if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
15090 	{
15091 	  /* We don't use ix86_gen_add3 in this case because it will
15092 	     want to split to lea, but when not optimizing the insn
15093 	     will not be split after this point.  */
15094 	  emit_insn (gen_rtx_SET (scratch_reg,
15095 				  gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15096 						offset)));
15097 	}
15098       else
15099 	{
15100 	  emit_move_insn (scratch_reg, offset);
15101 	  emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
15102 				    stack_pointer_rtx));
15103 	}
15104       current = scratch_reg;
15105     }
15106 
15107   ix86_expand_branch (GEU, current, limit, label);
15108   rtx_insn *jump_insn = get_last_insn ();
15109   JUMP_LABEL (jump_insn) = label;
15110 
15111   /* Mark the jump as very likely to be taken.  */
15112   add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
15113 
15114   if (split_stack_fn == NULL_RTX)
15115     {
15116       split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
15117       SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
15118     }
15119   fn = split_stack_fn;
15120 
15121   /* Get more stack space.  We pass in the desired stack space and the
15122      size of the arguments to copy to the new stack.  In 32-bit mode
15123      we push the parameters; __morestack will return on a new stack
15124      anyhow.  In 64-bit mode we pass the parameters in r10 and
15125      r11.  */
15126   allocate_rtx = GEN_INT (allocate);
15127   args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
15128   call_fusage = NULL_RTX;
15129   rtx pop = NULL_RTX;
15130   if (TARGET_64BIT)
15131     {
15132       rtx reg10, reg11;
15133 
15134       reg10 = gen_rtx_REG (Pmode, R10_REG);
15135       reg11 = gen_rtx_REG (Pmode, R11_REG);
15136 
15137       /* If this function uses a static chain, it will be in %r10.
15138 	 Preserve it across the call to __morestack.  */
15139       if (DECL_STATIC_CHAIN (cfun->decl))
15140 	{
15141 	  rtx rax;
15142 
15143 	  rax = gen_rtx_REG (word_mode, AX_REG);
15144 	  emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
15145 	  use_reg (&call_fusage, rax);
15146 	}
15147 
15148       if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
15149           && !TARGET_PECOFF)
15150 	{
15151 	  HOST_WIDE_INT argval;
15152 
15153 	  gcc_assert (Pmode == DImode);
15154 	  /* When using the large model we need to load the address
15155 	     into a register, and we've run out of registers.  So we
15156 	     switch to a different calling convention, and we call a
15157 	     different function: __morestack_large.  We pass the
15158 	     argument size in the upper 32 bits of r10 and pass the
15159 	     frame size in the lower 32 bits.  */
15160 	  gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
15161 	  gcc_assert ((args_size & 0xffffffff) == args_size);
15162 
15163 	  if (split_stack_fn_large == NULL_RTX)
15164 	    {
15165 	      split_stack_fn_large =
15166 	        gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
15167 	      SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
15168 	    }
15169 	  if (ix86_cmodel == CM_LARGE_PIC)
15170 	    {
15171 	      rtx_code_label *label;
15172 	      rtx x;
15173 
15174 	      label = gen_label_rtx ();
15175 	      emit_label (label);
15176 	      LABEL_PRESERVE_P (label) = 1;
15177 	      emit_insn (gen_set_rip_rex64 (reg10, label));
15178 	      emit_insn (gen_set_got_offset_rex64 (reg11, label));
15179 	      emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
15180 	      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
15181 				  UNSPEC_GOT);
15182 	      x = gen_rtx_CONST (Pmode, x);
15183 	      emit_move_insn (reg11, x);
15184 	      x = gen_rtx_PLUS (Pmode, reg10, reg11);
15185 	      x = gen_const_mem (Pmode, x);
15186 	      emit_move_insn (reg11, x);
15187 	    }
15188 	  else
15189 	    emit_move_insn (reg11, split_stack_fn_large);
15190 
15191 	  fn = reg11;
15192 
15193 	  argval = ((args_size << 16) << 16) + allocate;
15194 	  emit_move_insn (reg10, GEN_INT (argval));
15195 	}
15196       else
15197 	{
15198 	  emit_move_insn (reg10, allocate_rtx);
15199 	  emit_move_insn (reg11, GEN_INT (args_size));
15200 	  use_reg (&call_fusage, reg11);
15201 	}
15202 
15203       use_reg (&call_fusage, reg10);
15204     }
15205   else
15206     {
15207       rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
15208       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
15209       insn = emit_insn (gen_push (allocate_rtx));
15210       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
15211       pop = GEN_INT (2 * UNITS_PER_WORD);
15212     }
15213   call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
15214 				GEN_INT (UNITS_PER_WORD), constm1_rtx,
15215 				pop, false);
15216   add_function_usage_to (call_insn, call_fusage);
15217   if (!TARGET_64BIT)
15218     add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
15219   /* Indicate that this function can't jump to non-local gotos.  */
15220   make_reg_eh_region_note_nothrow_nononlocal (as_a <rtx_insn *> (call_insn));
15221 
15222   /* In order to make call/return prediction work right, we now need
15223      to execute a return instruction.  See
15224      libgcc/config/i386/morestack.S for the details on how this works.
15225 
15226      For flow purposes gcc must not see this as a return
15227      instruction--we need control flow to continue at the subsequent
15228      label.  Therefore, we use an unspec.  */
15229   gcc_assert (crtl->args.pops_args < 65536);
15230   rtx_insn *ret_insn
15231     = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
15232 
15233   if ((flag_cf_protection & CF_BRANCH))
15234     {
15235       /* Insert ENDBR since __morestack will jump back here via indirect
15236 	 call.  */
15237       rtx cet_eb = gen_nop_endbr ();
15238       emit_insn_after (cet_eb, ret_insn);
15239     }
15240 
15241   /* If we are in 64-bit mode and this function uses a static chain,
15242      we saved %r10 in %rax before calling _morestack.  */
15243   if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
15244     emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
15245 		    gen_rtx_REG (word_mode, AX_REG));
15246 
15247   /* If this function calls va_start, we need to store a pointer to
15248      the arguments on the old stack, because they may not have been
15249      all copied to the new stack.  At this point the old stack can be
15250      found at the frame pointer value used by __morestack, because
15251      __morestack has set that up before calling back to us.  Here we
15252      store that pointer in a scratch register, and in
15253      ix86_expand_prologue we store the scratch register in a stack
15254      slot.  */
15255   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
15256     {
15257       unsigned int scratch_regno;
15258       rtx frame_reg;
15259       int words;
15260 
15261       scratch_regno = split_stack_prologue_scratch_regno ();
15262       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
15263       frame_reg = gen_rtx_REG (Pmode, BP_REG);
15264 
15265       /* 64-bit:
15266 	 fp -> old fp value
15267 	       return address within this function
15268 	       return address of caller of this function
15269 	       stack arguments
15270 	 So we add three words to get to the stack arguments.
15271 
15272 	 32-bit:
15273 	 fp -> old fp value
15274 	       return address within this function
15275                first argument to __morestack
15276                second argument to __morestack
15277                return address of caller of this function
15278                stack arguments
15279          So we add five words to get to the stack arguments.
15280       */
15281       words = TARGET_64BIT ? 3 : 5;
15282       emit_insn (gen_rtx_SET (scratch_reg,
15283 			      gen_rtx_PLUS (Pmode, frame_reg,
15284 					    GEN_INT (words * UNITS_PER_WORD))));
15285 
15286       varargs_label = gen_label_rtx ();
15287       emit_jump_insn (gen_jump (varargs_label));
15288       JUMP_LABEL (get_last_insn ()) = varargs_label;
15289 
15290       emit_barrier ();
15291     }
15292 
15293   emit_label (label);
15294   LABEL_NUSES (label) = 1;
15295 
15296   /* If this function calls va_start, we now have to set the scratch
15297      register for the case where we do not call __morestack.  In this
15298      case we need to set it based on the stack pointer.  */
15299   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
15300     {
15301       emit_insn (gen_rtx_SET (scratch_reg,
15302 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15303 					    GEN_INT (UNITS_PER_WORD))));
15304 
15305       emit_label (varargs_label);
15306       LABEL_NUSES (varargs_label) = 1;
15307     }
15308 }
15309 
15310 /* We may have to tell the dataflow pass that the split stack prologue
15311    is initializing a scratch register.  */
15312 
15313 static void
15314 ix86_live_on_entry (bitmap regs)
15315 {
15316   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
15317     {
15318       gcc_assert (flag_split_stack);
15319       bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
15320     }
15321 }
15322 
15323 /* Extract the parts of an RTL expression that is a valid memory address
15324    for an instruction.  Return 0 if the structure of the address is
15325    grossly off.  Return -1 if the address contains ASHIFT, so it is not
15326    strictly valid, but still used for computing length of lea instruction.  */
15327 
15328 int
15329 ix86_decompose_address (rtx addr, struct ix86_address *out)
15330 {
15331   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
15332   rtx base_reg, index_reg;
15333   HOST_WIDE_INT scale = 1;
15334   rtx scale_rtx = NULL_RTX;
15335   rtx tmp;
15336   int retval = 1;
15337   addr_space_t seg = ADDR_SPACE_GENERIC;
15338 
15339   /* Allow zero-extended SImode addresses,
15340      they will be emitted with addr32 prefix.  */
15341   if (TARGET_64BIT && GET_MODE (addr) == DImode)
15342     {
15343       if (GET_CODE (addr) == ZERO_EXTEND
15344 	  && GET_MODE (XEXP (addr, 0)) == SImode)
15345 	{
15346 	  addr = XEXP (addr, 0);
15347 	  if (CONST_INT_P (addr))
15348 	    return 0;
15349 	}
15350       else if (GET_CODE (addr) == AND
15351 	       && const_32bit_mask (XEXP (addr, 1), DImode))
15352 	{
15353 	  addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
15354 	  if (addr == NULL_RTX)
15355 	    return 0;
15356 
15357 	  if (CONST_INT_P (addr))
15358 	    return 0;
15359 	}
15360     }
15361 
15362   /* Allow SImode subregs of DImode addresses,
15363      they will be emitted with addr32 prefix.  */
15364   if (TARGET_64BIT && GET_MODE (addr) == SImode)
15365     {
15366       if (SUBREG_P (addr)
15367 	  && GET_MODE (SUBREG_REG (addr)) == DImode)
15368 	{
15369 	  addr = SUBREG_REG (addr);
15370 	  if (CONST_INT_P (addr))
15371 	    return 0;
15372 	}
15373     }
15374 
15375   if (REG_P (addr))
15376     base = addr;
15377   else if (SUBREG_P (addr))
15378     {
15379       if (REG_P (SUBREG_REG (addr)))
15380 	base = addr;
15381       else
15382 	return 0;
15383     }
15384   else if (GET_CODE (addr) == PLUS)
15385     {
15386       rtx addends[4], op;
15387       int n = 0, i;
15388 
15389       op = addr;
15390       do
15391 	{
15392 	  if (n >= 4)
15393 	    return 0;
15394 	  addends[n++] = XEXP (op, 1);
15395 	  op = XEXP (op, 0);
15396 	}
15397       while (GET_CODE (op) == PLUS);
15398       if (n >= 4)
15399 	return 0;
15400       addends[n] = op;
15401 
15402       for (i = n; i >= 0; --i)
15403 	{
15404 	  op = addends[i];
15405 	  switch (GET_CODE (op))
15406 	    {
15407 	    case MULT:
15408 	      if (index)
15409 		return 0;
15410 	      index = XEXP (op, 0);
15411 	      scale_rtx = XEXP (op, 1);
15412 	      break;
15413 
15414 	    case ASHIFT:
15415 	      if (index)
15416 		return 0;
15417 	      index = XEXP (op, 0);
15418 	      tmp = XEXP (op, 1);
15419 	      if (!CONST_INT_P (tmp))
15420 		return 0;
15421 	      scale = INTVAL (tmp);
15422 	      if ((unsigned HOST_WIDE_INT) scale > 3)
15423 		return 0;
15424 	      scale = 1 << scale;
15425 	      break;
15426 
15427 	    case ZERO_EXTEND:
15428 	      op = XEXP (op, 0);
15429 	      if (GET_CODE (op) != UNSPEC)
15430 		return 0;
15431 	      /* FALLTHRU */
15432 
15433 	    case UNSPEC:
15434 	      if (XINT (op, 1) == UNSPEC_TP
15435 	          && TARGET_TLS_DIRECT_SEG_REFS
15436 	          && seg == ADDR_SPACE_GENERIC)
15437 		seg = DEFAULT_TLS_SEG_REG;
15438 	      else
15439 		return 0;
15440 	      break;
15441 
15442 	    case SUBREG:
15443 	      if (!REG_P (SUBREG_REG (op)))
15444 		return 0;
15445 	      /* FALLTHRU */
15446 
15447 	    case REG:
15448 	      if (!base)
15449 		base = op;
15450 	      else if (!index)
15451 		index = op;
15452 	      else
15453 		return 0;
15454 	      break;
15455 
15456 	    case CONST:
15457 	    case CONST_INT:
15458 	    case SYMBOL_REF:
15459 	    case LABEL_REF:
15460 	      if (disp)
15461 		return 0;
15462 	      disp = op;
15463 	      break;
15464 
15465 	    default:
15466 	      return 0;
15467 	    }
15468 	}
15469     }
15470   else if (GET_CODE (addr) == MULT)
15471     {
15472       index = XEXP (addr, 0);		/* index*scale */
15473       scale_rtx = XEXP (addr, 1);
15474     }
15475   else if (GET_CODE (addr) == ASHIFT)
15476     {
15477       /* We're called for lea too, which implements ashift on occasion.  */
15478       index = XEXP (addr, 0);
15479       tmp = XEXP (addr, 1);
15480       if (!CONST_INT_P (tmp))
15481 	return 0;
15482       scale = INTVAL (tmp);
15483       if ((unsigned HOST_WIDE_INT) scale > 3)
15484 	return 0;
15485       scale = 1 << scale;
15486       retval = -1;
15487     }
15488   else
15489     disp = addr;			/* displacement */
15490 
15491   if (index)
15492     {
15493       if (REG_P (index))
15494 	;
15495       else if (SUBREG_P (index)
15496 	       && REG_P (SUBREG_REG (index)))
15497 	;
15498       else
15499 	return 0;
15500     }
15501 
15502   /* Extract the integral value of scale.  */
15503   if (scale_rtx)
15504     {
15505       if (!CONST_INT_P (scale_rtx))
15506 	return 0;
15507       scale = INTVAL (scale_rtx);
15508     }
15509 
15510   base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
15511   index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
15512 
15513   /* Avoid useless 0 displacement.  */
15514   if (disp == const0_rtx && (base || index))
15515     disp = NULL_RTX;
15516 
15517   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
15518   if (base_reg && index_reg && scale == 1
15519       && (REGNO (index_reg) == ARG_POINTER_REGNUM
15520 	  || REGNO (index_reg) == FRAME_POINTER_REGNUM
15521 	  || REGNO (index_reg) == SP_REG))
15522     {
15523       std::swap (base, index);
15524       std::swap (base_reg, index_reg);
15525     }
15526 
15527   /* Special case: %ebp cannot be encoded as a base without a displacement.
15528      Similarly %r13.  */
15529   if (!disp && base_reg
15530       && (REGNO (base_reg) == ARG_POINTER_REGNUM
15531 	  || REGNO (base_reg) == FRAME_POINTER_REGNUM
15532 	  || REGNO (base_reg) == BP_REG
15533 	  || REGNO (base_reg) == R13_REG))
15534     disp = const0_rtx;
15535 
15536   /* Special case: on K6, [%esi] makes the instruction vector decoded.
15537      Avoid this by transforming to [%esi+0].
15538      Reload calls address legitimization without cfun defined, so we need
15539      to test cfun for being non-NULL. */
15540   if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
15541       && base_reg && !index_reg && !disp
15542       && REGNO (base_reg) == SI_REG)
15543     disp = const0_rtx;
15544 
15545   /* Special case: encode reg+reg instead of reg*2.  */
15546   if (!base && index && scale == 2)
15547     base = index, base_reg = index_reg, scale = 1;
15548 
15549   /* Special case: scaling cannot be encoded without base or displacement.  */
15550   if (!base && !disp && index && scale != 1)
15551     disp = const0_rtx;
15552 
15553   out->base = base;
15554   out->index = index;
15555   out->disp = disp;
15556   out->scale = scale;
15557   out->seg = seg;
15558 
15559   return retval;
15560 }
15561 
15562 /* Return cost of the memory address x.
15563    For i386, it is better to use a complex address than let gcc copy
15564    the address into a reg and make a new pseudo.  But not if the address
15565    requires to two regs - that would mean more pseudos with longer
15566    lifetimes.  */
15567 static int
15568 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
15569 {
15570   struct ix86_address parts;
15571   int cost = 1;
15572   int ok = ix86_decompose_address (x, &parts);
15573 
15574   gcc_assert (ok);
15575 
15576   if (parts.base && SUBREG_P (parts.base))
15577     parts.base = SUBREG_REG (parts.base);
15578   if (parts.index && SUBREG_P (parts.index))
15579     parts.index = SUBREG_REG (parts.index);
15580 
15581   /* Attempt to minimize number of registers in the address by increasing
15582      address cost for each used register.  We don't increase address cost
15583      for "pic_offset_table_rtx".  When a memopt with "pic_offset_table_rtx"
15584      is not invariant itself it most likely means that base or index is not
15585      invariant.  Therefore only "pic_offset_table_rtx" could be hoisted out,
15586      which is not profitable for x86.  */
15587   if (parts.base
15588       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
15589       && (current_pass->type == GIMPLE_PASS
15590 	  || !pic_offset_table_rtx
15591 	  || !REG_P (parts.base)
15592 	  || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
15593     cost++;
15594 
15595   if (parts.index
15596       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
15597       && (current_pass->type == GIMPLE_PASS
15598 	  || !pic_offset_table_rtx
15599 	  || !REG_P (parts.index)
15600 	  || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
15601     cost++;
15602 
15603   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
15604      since it's predecode logic can't detect the length of instructions
15605      and it degenerates to vector decoded.  Increase cost of such
15606      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
15607      to split such addresses or even refuse such addresses at all.
15608 
15609      Following addressing modes are affected:
15610       [base+scale*index]
15611       [scale*index+disp]
15612       [base+index]
15613 
15614      The first and last case  may be avoidable by explicitly coding the zero in
15615      memory address, but I don't have AMD-K6 machine handy to check this
15616      theory.  */
15617 
15618   if (TARGET_K6
15619       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
15620 	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
15621 	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
15622     cost += 10;
15623 
15624   return cost;
15625 }
15626 
15627 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
15628    this is used for to form addresses to local data when -fPIC is in
15629    use.  */
15630 
15631 static bool
15632 darwin_local_data_pic (rtx disp)
15633 {
15634   return (GET_CODE (disp) == UNSPEC
15635 	  && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
15636 }
15637 
15638 /* True if operand X should be loaded from GOT.  */
15639 
15640 bool
15641 ix86_force_load_from_GOT_p (rtx x)
15642 {
15643   return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15644 	  && !TARGET_PECOFF && !TARGET_MACHO
15645 	  && !flag_plt && !flag_pic
15646 	  && ix86_cmodel != CM_LARGE
15647 	  && GET_CODE (x) == SYMBOL_REF
15648 	  && SYMBOL_REF_FUNCTION_P (x)
15649 	  && !SYMBOL_REF_LOCAL_P (x));
15650 }
15651 
15652 /* Determine if a given RTX is a valid constant.  We already know this
15653    satisfies CONSTANT_P.  */
15654 
15655 static bool
15656 ix86_legitimate_constant_p (machine_mode mode, rtx x)
15657 {
15658   /* Pointer bounds constants are not valid.  */
15659   if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
15660     return false;
15661 
15662   switch (GET_CODE (x))
15663     {
15664     case CONST:
15665       x = XEXP (x, 0);
15666 
15667       if (GET_CODE (x) == PLUS)
15668 	{
15669 	  if (!CONST_INT_P (XEXP (x, 1)))
15670 	    return false;
15671 	  x = XEXP (x, 0);
15672 	}
15673 
15674       if (TARGET_MACHO && darwin_local_data_pic (x))
15675 	return true;
15676 
15677       /* Only some unspecs are valid as "constants".  */
15678       if (GET_CODE (x) == UNSPEC)
15679 	switch (XINT (x, 1))
15680 	  {
15681 	  case UNSPEC_GOT:
15682 	  case UNSPEC_GOTOFF:
15683 	  case UNSPEC_PLTOFF:
15684 	    return TARGET_64BIT;
15685 	  case UNSPEC_TPOFF:
15686 	  case UNSPEC_NTPOFF:
15687 	    x = XVECEXP (x, 0, 0);
15688 	    return (GET_CODE (x) == SYMBOL_REF
15689 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15690 	  case UNSPEC_DTPOFF:
15691 	    x = XVECEXP (x, 0, 0);
15692 	    return (GET_CODE (x) == SYMBOL_REF
15693 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
15694 	  default:
15695 	    return false;
15696 	  }
15697 
15698       /* We must have drilled down to a symbol.  */
15699       if (GET_CODE (x) == LABEL_REF)
15700 	return true;
15701       if (GET_CODE (x) != SYMBOL_REF)
15702 	return false;
15703       /* FALLTHRU */
15704 
15705     case SYMBOL_REF:
15706       /* TLS symbols are never valid.  */
15707       if (SYMBOL_REF_TLS_MODEL (x))
15708 	return false;
15709 
15710       /* DLLIMPORT symbols are never valid.  */
15711       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15712 	  && SYMBOL_REF_DLLIMPORT_P (x))
15713 	return false;
15714 
15715 #if TARGET_MACHO
15716       /* mdynamic-no-pic */
15717       if (MACHO_DYNAMIC_NO_PIC_P)
15718 	return machopic_symbol_defined_p (x);
15719 #endif
15720 
15721       /* External function address should be loaded
15722 	 via the GOT slot to avoid PLT.  */
15723       if (ix86_force_load_from_GOT_p (x))
15724 	return false;
15725 
15726       break;
15727 
15728     CASE_CONST_SCALAR_INT:
15729       switch (mode)
15730 	{
15731 	case E_TImode:
15732 	  if (TARGET_64BIT)
15733 	    return true;
15734 	  /* FALLTHRU */
15735 	case E_OImode:
15736 	case E_XImode:
15737 	  if (!standard_sse_constant_p (x, mode))
15738 	    return false;
15739 	default:
15740 	  break;
15741 	}
15742       break;
15743 
15744     case CONST_VECTOR:
15745       if (!standard_sse_constant_p (x, mode))
15746 	return false;
15747 
15748     default:
15749       break;
15750     }
15751 
15752   /* Otherwise we handle everything else in the move patterns.  */
15753   return true;
15754 }
15755 
15756 /* Determine if it's legal to put X into the constant pool.  This
15757    is not possible for the address of thread-local symbols, which
15758    is checked above.  */
15759 
15760 static bool
15761 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
15762 {
15763   /* We can put any immediate constant in memory.  */
15764   switch (GET_CODE (x))
15765     {
15766     CASE_CONST_ANY:
15767       return false;
15768 
15769     default:
15770       break;
15771     }
15772 
15773   return !ix86_legitimate_constant_p (mode, x);
15774 }
15775 
15776 /*  Nonzero if the symbol is marked as dllimport, or as stub-variable,
15777     otherwise zero.  */
15778 
15779 static bool
15780 is_imported_p (rtx x)
15781 {
15782   if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
15783       || GET_CODE (x) != SYMBOL_REF)
15784     return false;
15785 
15786   return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
15787 }
15788 
15789 
15790 /* Nonzero if the constant value X is a legitimate general operand
15791    when generating PIC code.  It is given that flag_pic is on and
15792    that X satisfies CONSTANT_P.  */
15793 
15794 bool
15795 legitimate_pic_operand_p (rtx x)
15796 {
15797   rtx inner;
15798 
15799   switch (GET_CODE (x))
15800     {
15801     case CONST:
15802       inner = XEXP (x, 0);
15803       if (GET_CODE (inner) == PLUS
15804 	  && CONST_INT_P (XEXP (inner, 1)))
15805 	inner = XEXP (inner, 0);
15806 
15807       /* Only some unspecs are valid as "constants".  */
15808       if (GET_CODE (inner) == UNSPEC)
15809 	switch (XINT (inner, 1))
15810 	  {
15811 	  case UNSPEC_GOT:
15812 	  case UNSPEC_GOTOFF:
15813 	  case UNSPEC_PLTOFF:
15814 	    return TARGET_64BIT;
15815 	  case UNSPEC_TPOFF:
15816 	    x = XVECEXP (inner, 0, 0);
15817 	    return (GET_CODE (x) == SYMBOL_REF
15818 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
15819 	  case UNSPEC_MACHOPIC_OFFSET:
15820 	    return legitimate_pic_address_disp_p (x);
15821 	  default:
15822 	    return false;
15823 	  }
15824       /* FALLTHRU */
15825 
15826     case SYMBOL_REF:
15827     case LABEL_REF:
15828       return legitimate_pic_address_disp_p (x);
15829 
15830     default:
15831       return true;
15832     }
15833 }
15834 
15835 /* Determine if a given CONST RTX is a valid memory displacement
15836    in PIC mode.  */
15837 
15838 bool
15839 legitimate_pic_address_disp_p (rtx disp)
15840 {
15841   bool saw_plus;
15842 
15843   /* In 64bit mode we can allow direct addresses of symbols and labels
15844      when they are not dynamic symbols.  */
15845   if (TARGET_64BIT)
15846     {
15847       rtx op0 = disp, op1;
15848 
15849       switch (GET_CODE (disp))
15850 	{
15851 	case LABEL_REF:
15852 	  return true;
15853 
15854 	case CONST:
15855 	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
15856 	    break;
15857 	  op0 = XEXP (XEXP (disp, 0), 0);
15858 	  op1 = XEXP (XEXP (disp, 0), 1);
15859 	  if (!CONST_INT_P (op1))
15860 	    break;
15861 	  if (GET_CODE (op0) == UNSPEC
15862 	      && (XINT (op0, 1) == UNSPEC_DTPOFF
15863 		  || XINT (op0, 1) == UNSPEC_NTPOFF)
15864 	      && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
15865 	    return true;
15866 	  if (INTVAL (op1) >= 16*1024*1024
15867 	      || INTVAL (op1) < -16*1024*1024)
15868 	    break;
15869 	  if (GET_CODE (op0) == LABEL_REF)
15870 	    return true;
15871 	  if (GET_CODE (op0) == CONST
15872 	      && GET_CODE (XEXP (op0, 0)) == UNSPEC
15873 	      && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
15874 	    return true;
15875 	  if (GET_CODE (op0) == UNSPEC
15876 	      && XINT (op0, 1) == UNSPEC_PCREL)
15877 	    return true;
15878 	  if (GET_CODE (op0) != SYMBOL_REF)
15879 	    break;
15880 	  /* FALLTHRU */
15881 
15882 	case SYMBOL_REF:
15883 	  /* TLS references should always be enclosed in UNSPEC.
15884 	     The dllimported symbol needs always to be resolved.  */
15885 	  if (SYMBOL_REF_TLS_MODEL (op0)
15886 	      || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
15887 	    return false;
15888 
15889 	  if (TARGET_PECOFF)
15890 	    {
15891 	      if (is_imported_p (op0))
15892 		return true;
15893 
15894 	      if (SYMBOL_REF_FAR_ADDR_P (op0)
15895 		  || !SYMBOL_REF_LOCAL_P (op0))
15896 		break;
15897 
15898 	      /* Function-symbols need to be resolved only for
15899 	         large-model.
15900 	         For the small-model we don't need to resolve anything
15901 	         here.  */
15902 	      if ((ix86_cmodel != CM_LARGE_PIC
15903 	           && SYMBOL_REF_FUNCTION_P (op0))
15904 		  || ix86_cmodel == CM_SMALL_PIC)
15905 		return true;
15906 	      /* Non-external symbols don't need to be resolved for
15907 	         large, and medium-model.  */
15908 	      if ((ix86_cmodel == CM_LARGE_PIC
15909 		   || ix86_cmodel == CM_MEDIUM_PIC)
15910 		  && !SYMBOL_REF_EXTERNAL_P (op0))
15911 		return true;
15912 	    }
15913 	  else if (!SYMBOL_REF_FAR_ADDR_P (op0)
15914 		   && (SYMBOL_REF_LOCAL_P (op0)
15915 		       || (HAVE_LD_PIE_COPYRELOC
15916 			   && flag_pie
15917 			   && !SYMBOL_REF_WEAK (op0)
15918 			   && !SYMBOL_REF_FUNCTION_P (op0)))
15919 		   && ix86_cmodel != CM_LARGE_PIC)
15920 	    return true;
15921 	  break;
15922 
15923 	default:
15924 	  break;
15925 	}
15926     }
15927   if (GET_CODE (disp) != CONST)
15928     return false;
15929   disp = XEXP (disp, 0);
15930 
15931   if (TARGET_64BIT)
15932     {
15933       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
15934          of GOT tables.  We should not need these anyway.  */
15935       if (GET_CODE (disp) != UNSPEC
15936 	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
15937 	      && XINT (disp, 1) != UNSPEC_GOTOFF
15938 	      && XINT (disp, 1) != UNSPEC_PCREL
15939 	      && XINT (disp, 1) != UNSPEC_PLTOFF))
15940 	return false;
15941 
15942       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
15943 	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
15944 	return false;
15945       return true;
15946     }
15947 
15948   saw_plus = false;
15949   if (GET_CODE (disp) == PLUS)
15950     {
15951       if (!CONST_INT_P (XEXP (disp, 1)))
15952 	return false;
15953       disp = XEXP (disp, 0);
15954       saw_plus = true;
15955     }
15956 
15957   if (TARGET_MACHO && darwin_local_data_pic (disp))
15958     return true;
15959 
15960   if (GET_CODE (disp) != UNSPEC)
15961     return false;
15962 
15963   switch (XINT (disp, 1))
15964     {
15965     case UNSPEC_GOT:
15966       if (saw_plus)
15967 	return false;
15968       /* We need to check for both symbols and labels because VxWorks loads
15969 	 text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
15970 	 details.  */
15971       return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15972 	      || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
15973     case UNSPEC_GOTOFF:
15974       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
15975 	 While ABI specify also 32bit relocation but we don't produce it in
15976 	 small PIC model at all.  */
15977       if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
15978 	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
15979 	  && !TARGET_64BIT)
15980         return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
15981       return false;
15982     case UNSPEC_GOTTPOFF:
15983     case UNSPEC_GOTNTPOFF:
15984     case UNSPEC_INDNTPOFF:
15985       if (saw_plus)
15986 	return false;
15987       disp = XVECEXP (disp, 0, 0);
15988       return (GET_CODE (disp) == SYMBOL_REF
15989 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
15990     case UNSPEC_NTPOFF:
15991       disp = XVECEXP (disp, 0, 0);
15992       return (GET_CODE (disp) == SYMBOL_REF
15993 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
15994     case UNSPEC_DTPOFF:
15995       disp = XVECEXP (disp, 0, 0);
15996       return (GET_CODE (disp) == SYMBOL_REF
15997 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
15998     }
15999 
16000   return false;
16001 }
16002 
16003 /* Determine if op is suitable RTX for an address register.
16004    Return naked register if a register or a register subreg is
16005    found, otherwise return NULL_RTX.  */
16006 
16007 static rtx
16008 ix86_validate_address_register (rtx op)
16009 {
16010   machine_mode mode = GET_MODE (op);
16011 
16012   /* Only SImode or DImode registers can form the address.  */
16013   if (mode != SImode && mode != DImode)
16014     return NULL_RTX;
16015 
16016   if (REG_P (op))
16017     return op;
16018   else if (SUBREG_P (op))
16019     {
16020       rtx reg = SUBREG_REG (op);
16021 
16022       if (!REG_P (reg))
16023 	return NULL_RTX;
16024 
16025       mode = GET_MODE (reg);
16026 
16027       /* Don't allow SUBREGs that span more than a word.  It can
16028 	 lead to spill failures when the register is one word out
16029 	 of a two word structure.  */
16030       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16031 	return NULL_RTX;
16032 
16033       /* Allow only SUBREGs of non-eliminable hard registers.  */
16034       if (register_no_elim_operand (reg, mode))
16035 	return reg;
16036     }
16037 
16038   /* Op is not a register.  */
16039   return NULL_RTX;
16040 }
16041 
16042 /* Recognizes RTL expressions that are valid memory addresses for an
16043    instruction.  The MODE argument is the machine mode for the MEM
16044    expression that wants to use this address.
16045 
16046    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
16047    convert common non-canonical forms to canonical form so that they will
16048    be recognized.  */
16049 
16050 static bool
16051 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
16052 {
16053   struct ix86_address parts;
16054   rtx base, index, disp;
16055   HOST_WIDE_INT scale;
16056   addr_space_t seg;
16057 
16058   if (ix86_decompose_address (addr, &parts) <= 0)
16059     /* Decomposition failed.  */
16060     return false;
16061 
16062   base = parts.base;
16063   index = parts.index;
16064   disp = parts.disp;
16065   scale = parts.scale;
16066   seg = parts.seg;
16067 
16068   /* Validate base register.  */
16069   if (base)
16070     {
16071       rtx reg = ix86_validate_address_register (base);
16072 
16073       if (reg == NULL_RTX)
16074 	return false;
16075 
16076       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
16077 	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
16078 	/* Base is not valid.  */
16079 	return false;
16080     }
16081 
16082   /* Validate index register.  */
16083   if (index)
16084     {
16085       rtx reg = ix86_validate_address_register (index);
16086 
16087       if (reg == NULL_RTX)
16088 	return false;
16089 
16090       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
16091 	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
16092 	/* Index is not valid.  */
16093 	return false;
16094     }
16095 
16096   /* Index and base should have the same mode.  */
16097   if (base && index
16098       && GET_MODE (base) != GET_MODE (index))
16099     return false;
16100 
16101   /* Address override works only on the (%reg) part of %fs:(%reg).  */
16102   if (seg != ADDR_SPACE_GENERIC
16103       && ((base && GET_MODE (base) != word_mode)
16104 	  || (index && GET_MODE (index) != word_mode)))
16105     return false;
16106 
16107   /* Validate scale factor.  */
16108   if (scale != 1)
16109     {
16110       if (!index)
16111 	/* Scale without index.  */
16112 	return false;
16113 
16114       if (scale != 2 && scale != 4 && scale != 8)
16115 	/* Scale is not a valid multiplier.  */
16116 	return false;
16117     }
16118 
16119   /* Validate displacement.  */
16120   if (disp)
16121     {
16122       if (GET_CODE (disp) == CONST
16123 	  && GET_CODE (XEXP (disp, 0)) == UNSPEC
16124 	  && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
16125 	switch (XINT (XEXP (disp, 0), 1))
16126 	  {
16127 	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
16128 	     when used.  While ABI specify also 32bit relocations, we
16129 	     don't produce them at all and use IP relative instead.
16130 	     Allow GOT in 32bit mode for both PIC and non-PIC if symbol
16131 	     should be loaded via GOT.  */
16132 	  case UNSPEC_GOT:
16133 	    if (!TARGET_64BIT
16134 		&& ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
16135 	      goto is_legitimate_pic;
16136 	    /* FALLTHRU */
16137 	  case UNSPEC_GOTOFF:
16138 	    gcc_assert (flag_pic);
16139 	    if (!TARGET_64BIT)
16140 	      goto is_legitimate_pic;
16141 
16142 	    /* 64bit address unspec.  */
16143 	    return false;
16144 
16145 	  case UNSPEC_GOTPCREL:
16146 	    if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
16147 	      goto is_legitimate_pic;
16148 	    /* FALLTHRU */
16149 	  case UNSPEC_PCREL:
16150 	    gcc_assert (flag_pic);
16151 	    goto is_legitimate_pic;
16152 
16153 	  case UNSPEC_GOTTPOFF:
16154 	  case UNSPEC_GOTNTPOFF:
16155 	  case UNSPEC_INDNTPOFF:
16156 	  case UNSPEC_NTPOFF:
16157 	  case UNSPEC_DTPOFF:
16158 	    break;
16159 
16160 	  default:
16161 	    /* Invalid address unspec.  */
16162 	    return false;
16163 	  }
16164 
16165       else if (SYMBOLIC_CONST (disp)
16166 	       && (flag_pic
16167 		   || (TARGET_MACHO
16168 #if TARGET_MACHO
16169 		       && MACHOPIC_INDIRECT
16170 		       && !machopic_operand_p (disp)
16171 #endif
16172 	       )))
16173 	{
16174 
16175 	is_legitimate_pic:
16176 	  if (TARGET_64BIT && (index || base))
16177 	    {
16178 	      /* foo@dtpoff(%rX) is ok.  */
16179 	      if (GET_CODE (disp) != CONST
16180 		  || GET_CODE (XEXP (disp, 0)) != PLUS
16181 		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
16182 		  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
16183 		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
16184 		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
16185 		/* Non-constant pic memory reference.  */
16186 		return false;
16187 	    }
16188 	  else if ((!TARGET_MACHO || flag_pic)
16189 		    && ! legitimate_pic_address_disp_p (disp))
16190 	    /* Displacement is an invalid pic construct.  */
16191 	    return false;
16192 #if TARGET_MACHO
16193 	  else if (MACHO_DYNAMIC_NO_PIC_P
16194 		   && !ix86_legitimate_constant_p (Pmode, disp))
16195 	    /* displacment must be referenced via non_lazy_pointer */
16196 	    return false;
16197 #endif
16198 
16199           /* This code used to verify that a symbolic pic displacement
16200 	     includes the pic_offset_table_rtx register.
16201 
16202 	     While this is good idea, unfortunately these constructs may
16203 	     be created by "adds using lea" optimization for incorrect
16204 	     code like:
16205 
16206 	     int a;
16207 	     int foo(int i)
16208 	       {
16209 	         return *(&a+i);
16210 	       }
16211 
16212 	     This code is nonsensical, but results in addressing
16213 	     GOT table with pic_offset_table_rtx base.  We can't
16214 	     just refuse it easily, since it gets matched by
16215 	     "addsi3" pattern, that later gets split to lea in the
16216 	     case output register differs from input.  While this
16217 	     can be handled by separate addsi pattern for this case
16218 	     that never results in lea, this seems to be easier and
16219 	     correct fix for crash to disable this test.  */
16220 	}
16221       else if (GET_CODE (disp) != LABEL_REF
16222 	       && !CONST_INT_P (disp)
16223 	       && (GET_CODE (disp) != CONST
16224 		   || !ix86_legitimate_constant_p (Pmode, disp))
16225 	       && (GET_CODE (disp) != SYMBOL_REF
16226 		   || !ix86_legitimate_constant_p (Pmode, disp)))
16227 	/* Displacement is not constant.  */
16228 	return false;
16229       else if (TARGET_64BIT
16230 	       && !x86_64_immediate_operand (disp, VOIDmode))
16231 	/* Displacement is out of range.  */
16232 	return false;
16233       /* In x32 mode, constant addresses are sign extended to 64bit, so
16234 	 we have to prevent addresses from 0x80000000 to 0xffffffff.  */
16235       else if (TARGET_X32 && !(index || base)
16236 	       && CONST_INT_P (disp)
16237 	       && val_signbit_known_set_p (SImode, INTVAL (disp)))
16238 	return false;
16239     }
16240 
16241   /* Everything looks valid.  */
16242   return true;
16243 }
16244 
16245 /* Determine if a given RTX is a valid constant address.  */
16246 
16247 bool
16248 constant_address_p (rtx x)
16249 {
16250   return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
16251 }
16252 
16253 /* Return a unique alias set for the GOT.  */
16254 
16255 static alias_set_type
16256 ix86_GOT_alias_set (void)
16257 {
16258   static alias_set_type set = -1;
16259   if (set == -1)
16260     set = new_alias_set ();
16261   return set;
16262 }
16263 
16264 /* Return a legitimate reference for ORIG (an address) using the
16265    register REG.  If REG is 0, a new pseudo is generated.
16266 
16267    There are two types of references that must be handled:
16268 
16269    1. Global data references must load the address from the GOT, via
16270       the PIC reg.  An insn is emitted to do this load, and the reg is
16271       returned.
16272 
16273    2. Static data references, constant pool addresses, and code labels
16274       compute the address as an offset from the GOT, whose base is in
16275       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
16276       differentiate them from global data objects.  The returned
16277       address is the PIC reg + an unspec constant.
16278 
16279    TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
16280    reg also appears in the address.  */
16281 
16282 static rtx
16283 legitimize_pic_address (rtx orig, rtx reg)
16284 {
16285   rtx addr = orig;
16286   rtx new_rtx = orig;
16287 
16288 #if TARGET_MACHO
16289   if (TARGET_MACHO && !TARGET_64BIT)
16290     {
16291       if (reg == 0)
16292 	reg = gen_reg_rtx (Pmode);
16293       /* Use the generic Mach-O PIC machinery.  */
16294       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
16295     }
16296 #endif
16297 
16298   if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
16299     {
16300       rtx tmp = legitimize_pe_coff_symbol (addr, true);
16301       if (tmp)
16302         return tmp;
16303     }
16304 
16305   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
16306     new_rtx = addr;
16307   else if ((!TARGET_64BIT
16308 	    || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
16309 	   && !TARGET_PECOFF
16310 	   && gotoff_operand (addr, Pmode))
16311     {
16312       /* This symbol may be referenced via a displacement
16313 	 from the PIC base address (@GOTOFF).  */
16314       if (GET_CODE (addr) == CONST)
16315 	addr = XEXP (addr, 0);
16316 
16317       if (GET_CODE (addr) == PLUS)
16318 	  {
16319             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
16320 				      UNSPEC_GOTOFF);
16321 	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
16322 	  }
16323 	else
16324           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
16325 
16326       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16327 
16328       if (TARGET_64BIT)
16329 	new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
16330 
16331       if (reg != 0)
16332 	{
16333  	  gcc_assert (REG_P (reg));
16334 	  new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
16335 					 new_rtx, reg, 1, OPTAB_DIRECT);
16336  	}
16337       else
16338 	new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
16339     }
16340   else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
16341 	   /* We can't use @GOTOFF for text labels
16342 	      on VxWorks, see gotoff_operand.  */
16343 	   || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
16344     {
16345       rtx tmp = legitimize_pe_coff_symbol (addr, true);
16346       if (tmp)
16347         return tmp;
16348 
16349       /* For x64 PE-COFF there is no GOT table,
16350 	 so we use address directly.  */
16351       if (TARGET_64BIT && TARGET_PECOFF)
16352 	{
16353 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
16354 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16355 	}
16356       else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
16357 	{
16358 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
16359 				    UNSPEC_GOTPCREL);
16360 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16361 	  new_rtx = gen_const_mem (Pmode, new_rtx);
16362 	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
16363 	}
16364       else
16365 	{
16366 	  /* This symbol must be referenced via a load
16367 	     from the Global Offset Table (@GOT).  */
16368 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
16369 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16370 	  if (TARGET_64BIT)
16371 	    new_rtx = force_reg (Pmode, new_rtx);
16372 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
16373 	  new_rtx = gen_const_mem (Pmode, new_rtx);
16374 	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
16375 	}
16376 
16377       new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
16378     }
16379   else
16380     {
16381       if (CONST_INT_P (addr)
16382 	  && !x86_64_immediate_operand (addr, VOIDmode))
16383 	new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
16384       else if (GET_CODE (addr) == CONST)
16385 	{
16386 	  addr = XEXP (addr, 0);
16387 
16388 	  /* We must match stuff we generate before.  Assume the only
16389 	     unspecs that can get here are ours.  Not that we could do
16390 	     anything with them anyway....  */
16391 	  if (GET_CODE (addr) == UNSPEC
16392 	      || (GET_CODE (addr) == PLUS
16393 		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
16394 	    return orig;
16395 	  gcc_assert (GET_CODE (addr) == PLUS);
16396 	}
16397 
16398       if (GET_CODE (addr) == PLUS)
16399 	{
16400 	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
16401 
16402 	  /* Check first to see if this is a constant
16403 	     offset from a @GOTOFF symbol reference.  */
16404 	  if (!TARGET_PECOFF
16405 	      && gotoff_operand (op0, Pmode)
16406 	      && CONST_INT_P (op1))
16407 	    {
16408 	      if (!TARGET_64BIT)
16409 		{
16410 		  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
16411 					    UNSPEC_GOTOFF);
16412 		  new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
16413 		  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
16414 
16415 		  if (reg != 0)
16416 		    {
16417 		      gcc_assert (REG_P (reg));
16418 		      new_rtx = expand_simple_binop (Pmode, PLUS,
16419 						     pic_offset_table_rtx,
16420 						     new_rtx, reg, 1,
16421 						     OPTAB_DIRECT);
16422 		    }
16423 		  else
16424 		    new_rtx
16425 		      = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
16426 		}
16427 	      else
16428 		{
16429 		  if (INTVAL (op1) < -16*1024*1024
16430 		      || INTVAL (op1) >= 16*1024*1024)
16431 		    {
16432 		      if (!x86_64_immediate_operand (op1, Pmode))
16433 			op1 = force_reg (Pmode, op1);
16434 
16435 		      new_rtx
16436 			= gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
16437 		    }
16438 		}
16439 	    }
16440 	  else
16441 	    {
16442 	      rtx base = legitimize_pic_address (op0, reg);
16443 	      machine_mode mode = GET_MODE (base);
16444 	      new_rtx
16445 	        = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
16446 
16447 	      if (CONST_INT_P (new_rtx))
16448 		{
16449 		  if (INTVAL (new_rtx) < -16*1024*1024
16450 		      || INTVAL (new_rtx) >= 16*1024*1024)
16451 		    {
16452 		      if (!x86_64_immediate_operand (new_rtx, mode))
16453 			new_rtx = force_reg (mode, new_rtx);
16454 
16455 		      new_rtx
16456 		        = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
16457 		    }
16458 		  else
16459 		    new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
16460 		}
16461 	      else
16462 		{
16463 		  /* For %rip addressing, we have to use
16464 		     just disp32, not base nor index.  */
16465 		  if (TARGET_64BIT
16466 		      && (GET_CODE (base) == SYMBOL_REF
16467 			  || GET_CODE (base) == LABEL_REF))
16468 		    base = force_reg (mode, base);
16469 		  if (GET_CODE (new_rtx) == PLUS
16470 		      && CONSTANT_P (XEXP (new_rtx, 1)))
16471 		    {
16472 		      base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
16473 		      new_rtx = XEXP (new_rtx, 1);
16474 		    }
16475 		  new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
16476 		}
16477 	    }
16478 	}
16479     }
16480   return new_rtx;
16481 }
16482 
16483 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
16484 
16485 static rtx
16486 get_thread_pointer (machine_mode tp_mode, bool to_reg)
16487 {
16488   rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
16489 
16490   if (GET_MODE (tp) != tp_mode)
16491     {
16492       gcc_assert (GET_MODE (tp) == SImode);
16493       gcc_assert (tp_mode == DImode);
16494 
16495       tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
16496     }
16497 
16498   if (to_reg)
16499     tp = copy_to_mode_reg (tp_mode, tp);
16500 
16501   return tp;
16502 }
16503 
16504 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
16505 
16506 static GTY(()) rtx ix86_tls_symbol;
16507 
16508 static rtx
16509 ix86_tls_get_addr (void)
16510 {
16511   if (!ix86_tls_symbol)
16512     {
16513       const char *sym
16514 	= ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
16515 	   ? "___tls_get_addr" : "__tls_get_addr");
16516 
16517       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
16518     }
16519 
16520   if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
16521     {
16522       rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
16523 				   UNSPEC_PLTOFF);
16524       return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
16525 			   gen_rtx_CONST (Pmode, unspec));
16526     }
16527 
16528   return ix86_tls_symbol;
16529 }
16530 
16531 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
16532 
16533 static GTY(()) rtx ix86_tls_module_base_symbol;
16534 
16535 rtx
16536 ix86_tls_module_base (void)
16537 {
16538   if (!ix86_tls_module_base_symbol)
16539     {
16540       ix86_tls_module_base_symbol
16541 	= gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
16542 
16543       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16544 	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16545     }
16546 
16547   return ix86_tls_module_base_symbol;
16548 }
16549 
16550 /* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
16551    false if we expect this to be used for a memory address and true if
16552    we expect to load the address into a register.  */
16553 
16554 static rtx
16555 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
16556 {
16557   rtx dest, base, off;
16558   rtx pic = NULL_RTX, tp = NULL_RTX;
16559   machine_mode tp_mode = Pmode;
16560   int type;
16561 
16562   /* Fall back to global dynamic model if tool chain cannot support local
16563      dynamic.  */
16564   if (TARGET_SUN_TLS && !TARGET_64BIT
16565       && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
16566       && model == TLS_MODEL_LOCAL_DYNAMIC)
16567     model = TLS_MODEL_GLOBAL_DYNAMIC;
16568 
16569   switch (model)
16570     {
16571     case TLS_MODEL_GLOBAL_DYNAMIC:
16572       dest = gen_reg_rtx (Pmode);
16573 
16574       if (!TARGET_64BIT)
16575 	{
16576 	  if (flag_pic && !TARGET_PECOFF)
16577 	    pic = pic_offset_table_rtx;
16578 	  else
16579 	    {
16580 	      pic = gen_reg_rtx (Pmode);
16581 	      emit_insn (gen_set_got (pic));
16582 	    }
16583 	}
16584 
16585       if (TARGET_GNU2_TLS)
16586 	{
16587 	  if (TARGET_64BIT)
16588 	    emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
16589 	  else
16590 	    emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
16591 
16592 	  tp = get_thread_pointer (Pmode, true);
16593 	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
16594 
16595 	  if (GET_MODE (x) != Pmode)
16596 	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
16597 
16598 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16599 	}
16600       else
16601 	{
16602 	  rtx caddr = ix86_tls_get_addr ();
16603 
16604 	  if (TARGET_64BIT)
16605 	    {
16606 	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
16607 	      rtx_insn *insns;
16608 
16609 	      start_sequence ();
16610 	      emit_call_insn
16611 		(ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
16612 	      insns = get_insns ();
16613 	      end_sequence ();
16614 
16615 	      if (GET_MODE (x) != Pmode)
16616 		x = gen_rtx_ZERO_EXTEND (Pmode, x);
16617 
16618 	      RTL_CONST_CALL_P (insns) = 1;
16619 	      emit_libcall_block (insns, dest, rax, x);
16620 	    }
16621 	  else
16622 	    emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
16623 	}
16624       break;
16625 
16626     case TLS_MODEL_LOCAL_DYNAMIC:
16627       base = gen_reg_rtx (Pmode);
16628 
16629       if (!TARGET_64BIT)
16630 	{
16631 	  if (flag_pic)
16632 	    pic = pic_offset_table_rtx;
16633 	  else
16634 	    {
16635 	      pic = gen_reg_rtx (Pmode);
16636 	      emit_insn (gen_set_got (pic));
16637 	    }
16638 	}
16639 
16640       if (TARGET_GNU2_TLS)
16641 	{
16642 	  rtx tmp = ix86_tls_module_base ();
16643 
16644 	  if (TARGET_64BIT)
16645 	    emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
16646 	  else
16647 	    emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
16648 
16649 	  tp = get_thread_pointer (Pmode, true);
16650 	  set_unique_reg_note (get_last_insn (), REG_EQUAL,
16651 			       gen_rtx_MINUS (Pmode, tmp, tp));
16652 	}
16653       else
16654 	{
16655 	  rtx caddr = ix86_tls_get_addr ();
16656 
16657 	  if (TARGET_64BIT)
16658 	    {
16659 	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
16660 	      rtx_insn *insns;
16661 	      rtx eqv;
16662 
16663 	      start_sequence ();
16664 	      emit_call_insn
16665 		(ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
16666 	      insns = get_insns ();
16667 	      end_sequence ();
16668 
16669 	      /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
16670 		 share the LD_BASE result with other LD model accesses.  */
16671 	      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
16672 				    UNSPEC_TLS_LD_BASE);
16673 
16674 	      RTL_CONST_CALL_P (insns) = 1;
16675 	      emit_libcall_block (insns, base, rax, eqv);
16676 	    }
16677 	  else
16678 	    emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
16679 	}
16680 
16681       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
16682       off = gen_rtx_CONST (Pmode, off);
16683 
16684       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
16685 
16686       if (TARGET_GNU2_TLS)
16687 	{
16688 	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
16689 
16690 	  if (GET_MODE (x) != Pmode)
16691 	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
16692 
16693 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
16694 	}
16695       break;
16696 
16697     case TLS_MODEL_INITIAL_EXEC:
16698       if (TARGET_64BIT)
16699 	{
16700 	  if (TARGET_SUN_TLS && !TARGET_X32)
16701 	    {
16702 	      /* The Sun linker took the AMD64 TLS spec literally
16703 		 and can only handle %rax as destination of the
16704 		 initial executable code sequence.  */
16705 
16706 	      dest = gen_reg_rtx (DImode);
16707 	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
16708 	      return dest;
16709 	    }
16710 
16711 	  /* Generate DImode references to avoid %fs:(%reg32)
16712 	     problems and linker IE->LE relaxation bug.  */
16713 	  tp_mode = DImode;
16714 	  pic = NULL;
16715 	  type = UNSPEC_GOTNTPOFF;
16716 	}
16717       else if (flag_pic)
16718 	{
16719 	  pic = pic_offset_table_rtx;
16720 	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
16721 	}
16722       else if (!TARGET_ANY_GNU_TLS)
16723 	{
16724 	  pic = gen_reg_rtx (Pmode);
16725 	  emit_insn (gen_set_got (pic));
16726 	  type = UNSPEC_GOTTPOFF;
16727 	}
16728       else
16729 	{
16730 	  pic = NULL;
16731 	  type = UNSPEC_INDNTPOFF;
16732 	}
16733 
16734       off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
16735       off = gen_rtx_CONST (tp_mode, off);
16736       if (pic)
16737 	off = gen_rtx_PLUS (tp_mode, pic, off);
16738       off = gen_const_mem (tp_mode, off);
16739       set_mem_alias_set (off, ix86_GOT_alias_set ());
16740 
16741       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16742 	{
16743 	  base = get_thread_pointer (tp_mode,
16744 				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16745 	  off = force_reg (tp_mode, off);
16746 	  dest = gen_rtx_PLUS (tp_mode, base, off);
16747 	  if (tp_mode != Pmode)
16748 	    dest = convert_to_mode (Pmode, dest, 1);
16749 	}
16750       else
16751 	{
16752 	  base = get_thread_pointer (Pmode, true);
16753 	  dest = gen_reg_rtx (Pmode);
16754 	  emit_insn (ix86_gen_sub3 (dest, base, off));
16755 	}
16756       break;
16757 
16758     case TLS_MODEL_LOCAL_EXEC:
16759       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
16760 			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16761 			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
16762       off = gen_rtx_CONST (Pmode, off);
16763 
16764       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
16765 	{
16766 	  base = get_thread_pointer (Pmode,
16767 				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
16768 	  return gen_rtx_PLUS (Pmode, base, off);
16769 	}
16770       else
16771 	{
16772 	  base = get_thread_pointer (Pmode, true);
16773 	  dest = gen_reg_rtx (Pmode);
16774 	  emit_insn (ix86_gen_sub3 (dest, base, off));
16775 	}
16776       break;
16777 
16778     default:
16779       gcc_unreachable ();
16780     }
16781 
16782   return dest;
16783 }
16784 
16785 /* Return true if OP refers to a TLS address.  */
16786 bool
16787 ix86_tls_address_pattern_p (rtx op)
16788 {
16789   subrtx_var_iterator::array_type array;
16790   FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
16791     {
16792       rtx op = *iter;
16793       if (MEM_P (op))
16794 	{
16795 	  rtx *x = &XEXP (op, 0);
16796 	  while (GET_CODE (*x) == PLUS)
16797 	    {
16798 	      int i;
16799 	      for (i = 0; i < 2; i++)
16800 		{
16801 		  rtx u = XEXP (*x, i);
16802 		  if (GET_CODE (u) == ZERO_EXTEND)
16803 		    u = XEXP (u, 0);
16804 		  if (GET_CODE (u) == UNSPEC
16805 		      && XINT (u, 1) == UNSPEC_TP)
16806 		    return true;
16807 		}
16808 	      x = &XEXP (*x, 0);
16809 	    }
16810 
16811 	  iter.skip_subrtxes ();
16812 	}
16813     }
16814 
16815   return false;
16816 }
16817 
16818 /* Rewrite *LOC so that it refers to a default TLS address space.  */
16819 void
16820 ix86_rewrite_tls_address_1 (rtx *loc)
16821 {
16822   subrtx_ptr_iterator::array_type array;
16823   FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
16824     {
16825       rtx *loc = *iter;
16826       if (MEM_P (*loc))
16827 	{
16828 	  rtx addr = XEXP (*loc, 0);
16829 	  rtx *x = &addr;
16830 	  while (GET_CODE (*x) == PLUS)
16831 	    {
16832 	      int i;
16833 	      for (i = 0; i < 2; i++)
16834 		{
16835 		  rtx u = XEXP (*x, i);
16836 		  if (GET_CODE (u) == ZERO_EXTEND)
16837 		    u = XEXP (u, 0);
16838 		  if (GET_CODE (u) == UNSPEC
16839 		      && XINT (u, 1) == UNSPEC_TP)
16840 		    {
16841 		      addr_space_t as = DEFAULT_TLS_SEG_REG;
16842 
16843 		      *x = XEXP (*x, 1 - i);
16844 
16845 		      *loc = replace_equiv_address_nv (*loc, addr, true);
16846 		      set_mem_addr_space (*loc, as);
16847 		      return;
16848 		    }
16849 		}
16850 	      x = &XEXP (*x, 0);
16851 	    }
16852 
16853 	  iter.skip_subrtxes ();
16854 	}
16855     }
16856 }
16857 
16858 /* Rewrite instruction pattern involvning TLS address
16859    so that it refers to a default TLS address space.  */
16860 rtx
16861 ix86_rewrite_tls_address (rtx pattern)
16862 {
16863   pattern = copy_insn (pattern);
16864   ix86_rewrite_tls_address_1 (&pattern);
16865   return pattern;
16866 }
16867 
16868 /* Create or return the unique __imp_DECL dllimport symbol corresponding
16869    to symbol DECL if BEIMPORT is true.  Otherwise create or return the
16870    unique refptr-DECL symbol corresponding to symbol DECL.  */
16871 
16872 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
16873 {
16874   static inline hashval_t hash (tree_map *m) { return m->hash; }
16875   static inline bool
16876   equal (tree_map *a, tree_map *b)
16877   {
16878     return a->base.from == b->base.from;
16879   }
16880 
16881   static int
16882   keep_cache_entry (tree_map *&m)
16883   {
16884     return ggc_marked_p (m->base.from);
16885   }
16886 };
16887 
16888 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
16889 
16890 static tree
16891 get_dllimport_decl (tree decl, bool beimport)
16892 {
16893   struct tree_map *h, in;
16894   const char *name;
16895   const char *prefix;
16896   size_t namelen, prefixlen;
16897   char *imp_name;
16898   tree to;
16899   rtx rtl;
16900 
16901   if (!dllimport_map)
16902     dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
16903 
16904   in.hash = htab_hash_pointer (decl);
16905   in.base.from = decl;
16906   tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
16907   h = *loc;
16908   if (h)
16909     return h->to;
16910 
16911   *loc = h = ggc_alloc<tree_map> ();
16912   h->hash = in.hash;
16913   h->base.from = decl;
16914   h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
16915 			   VAR_DECL, NULL, ptr_type_node);
16916   DECL_ARTIFICIAL (to) = 1;
16917   DECL_IGNORED_P (to) = 1;
16918   DECL_EXTERNAL (to) = 1;
16919   TREE_READONLY (to) = 1;
16920 
16921   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
16922   name = targetm.strip_name_encoding (name);
16923   if (beimport)
16924     prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
16925       ? "*__imp_" : "*__imp__";
16926   else
16927     prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
16928   namelen = strlen (name);
16929   prefixlen = strlen (prefix);
16930   imp_name = (char *) alloca (namelen + prefixlen + 1);
16931   memcpy (imp_name, prefix, prefixlen);
16932   memcpy (imp_name + prefixlen, name, namelen + 1);
16933 
16934   name = ggc_alloc_string (imp_name, namelen + prefixlen);
16935   rtl = gen_rtx_SYMBOL_REF (Pmode, name);
16936   SET_SYMBOL_REF_DECL (rtl, to);
16937   SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
16938   if (!beimport)
16939     {
16940       SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
16941 #ifdef SUB_TARGET_RECORD_STUB
16942       SUB_TARGET_RECORD_STUB (name);
16943 #endif
16944     }
16945 
16946   rtl = gen_const_mem (Pmode, rtl);
16947   set_mem_alias_set (rtl, ix86_GOT_alias_set ());
16948 
16949   SET_DECL_RTL (to, rtl);
16950   SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
16951 
16952   return to;
16953 }
16954 
16955 /* Expand SYMBOL into its corresponding far-address symbol.
16956    WANT_REG is true if we require the result be a register.  */
16957 
16958 static rtx
16959 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
16960 {
16961   tree imp_decl;
16962   rtx x;
16963 
16964   gcc_assert (SYMBOL_REF_DECL (symbol));
16965   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
16966 
16967   x = DECL_RTL (imp_decl);
16968   if (want_reg)
16969     x = force_reg (Pmode, x);
16970   return x;
16971 }
16972 
16973 /* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
16974    true if we require the result be a register.  */
16975 
16976 static rtx
16977 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
16978 {
16979   tree imp_decl;
16980   rtx x;
16981 
16982   gcc_assert (SYMBOL_REF_DECL (symbol));
16983   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
16984 
16985   x = DECL_RTL (imp_decl);
16986   if (want_reg)
16987     x = force_reg (Pmode, x);
16988   return x;
16989 }
16990 
16991 /* Expand SYMBOL into its corresponding dllimport or refptr symbol.  WANT_REG
16992    is true if we require the result be a register.  */
16993 
16994 static rtx
16995 legitimize_pe_coff_symbol (rtx addr, bool inreg)
16996 {
16997   if (!TARGET_PECOFF)
16998     return NULL_RTX;
16999 
17000   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
17001     {
17002       if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
17003 	return legitimize_dllimport_symbol (addr, inreg);
17004       if (GET_CODE (addr) == CONST
17005 	  && GET_CODE (XEXP (addr, 0)) == PLUS
17006 	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
17007 	  && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
17008 	{
17009 	  rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
17010 	  return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
17011 	}
17012     }
17013 
17014   if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
17015     return NULL_RTX;
17016   if (GET_CODE (addr) == SYMBOL_REF
17017       && !is_imported_p (addr)
17018       && SYMBOL_REF_EXTERNAL_P (addr)
17019       && SYMBOL_REF_DECL (addr))
17020     return legitimize_pe_coff_extern_decl (addr, inreg);
17021 
17022   if (GET_CODE (addr) == CONST
17023       && GET_CODE (XEXP (addr, 0)) == PLUS
17024       && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
17025       && !is_imported_p (XEXP (XEXP (addr, 0), 0))
17026       && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
17027       && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
17028     {
17029       rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
17030       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
17031     }
17032   return NULL_RTX;
17033 }
17034 
17035 /* Try machine-dependent ways of modifying an illegitimate address
17036    to be legitimate.  If we find one, return the new, valid address.
17037    This macro is used in only one place: `memory_address' in explow.c.
17038 
17039    OLDX is the address as it was before break_out_memory_refs was called.
17040    In some cases it is useful to look at this to decide what needs to be done.
17041 
17042    It is always safe for this macro to do nothing.  It exists to recognize
17043    opportunities to optimize the output.
17044 
17045    For the 80386, we handle X+REG by loading X into a register R and
17046    using R+REG.  R will go in a general reg and indexing will be used.
17047    However, if REG is a broken-out memory address or multiplication,
17048    nothing needs to be done because REG can certainly go in a general reg.
17049 
17050    When -fpic is used, special handling is needed for symbolic references.
17051    See comments by legitimize_pic_address in i386.c for details.  */
17052 
17053 static rtx
17054 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
17055 {
17056   bool changed = false;
17057   unsigned log;
17058 
17059   log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
17060   if (log)
17061     return legitimize_tls_address (x, (enum tls_model) log, false);
17062   if (GET_CODE (x) == CONST
17063       && GET_CODE (XEXP (x, 0)) == PLUS
17064       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
17065       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
17066     {
17067       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
17068 				      (enum tls_model) log, false);
17069       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
17070     }
17071 
17072   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
17073     {
17074       rtx tmp = legitimize_pe_coff_symbol (x, true);
17075       if (tmp)
17076         return tmp;
17077     }
17078 
17079   if (flag_pic && SYMBOLIC_CONST (x))
17080     return legitimize_pic_address (x, 0);
17081 
17082 #if TARGET_MACHO
17083   if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
17084     return machopic_indirect_data_reference (x, 0);
17085 #endif
17086 
17087   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
17088   if (GET_CODE (x) == ASHIFT
17089       && CONST_INT_P (XEXP (x, 1))
17090       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
17091     {
17092       changed = true;
17093       log = INTVAL (XEXP (x, 1));
17094       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
17095 			GEN_INT (1 << log));
17096     }
17097 
17098   if (GET_CODE (x) == PLUS)
17099     {
17100       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
17101 
17102       if (GET_CODE (XEXP (x, 0)) == ASHIFT
17103 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
17104 	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
17105 	{
17106 	  changed = true;
17107 	  log = INTVAL (XEXP (XEXP (x, 0), 1));
17108 	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
17109 				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
17110 				      GEN_INT (1 << log));
17111 	}
17112 
17113       if (GET_CODE (XEXP (x, 1)) == ASHIFT
17114 	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
17115 	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
17116 	{
17117 	  changed = true;
17118 	  log = INTVAL (XEXP (XEXP (x, 1), 1));
17119 	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
17120 				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
17121 				      GEN_INT (1 << log));
17122 	}
17123 
17124       /* Put multiply first if it isn't already.  */
17125       if (GET_CODE (XEXP (x, 1)) == MULT)
17126 	{
17127 	  std::swap (XEXP (x, 0), XEXP (x, 1));
17128 	  changed = true;
17129 	}
17130 
17131       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
17132 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
17133 	 created by virtual register instantiation, register elimination, and
17134 	 similar optimizations.  */
17135       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
17136 	{
17137 	  changed = true;
17138 	  x = gen_rtx_PLUS (Pmode,
17139 			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
17140 					  XEXP (XEXP (x, 1), 0)),
17141 			    XEXP (XEXP (x, 1), 1));
17142 	}
17143 
17144       /* Canonicalize
17145 	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
17146 	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
17147       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
17148 	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17149 	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
17150 	       && CONSTANT_P (XEXP (x, 1)))
17151 	{
17152 	  rtx constant;
17153 	  rtx other = NULL_RTX;
17154 
17155 	  if (CONST_INT_P (XEXP (x, 1)))
17156 	    {
17157 	      constant = XEXP (x, 1);
17158 	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
17159 	    }
17160 	  else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
17161 	    {
17162 	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
17163 	      other = XEXP (x, 1);
17164 	    }
17165 	  else
17166 	    constant = 0;
17167 
17168 	  if (constant)
17169 	    {
17170 	      changed = true;
17171 	      x = gen_rtx_PLUS (Pmode,
17172 				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
17173 					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
17174 				plus_constant (Pmode, other,
17175 					       INTVAL (constant)));
17176 	    }
17177 	}
17178 
17179       if (changed && ix86_legitimate_address_p (mode, x, false))
17180 	return x;
17181 
17182       if (GET_CODE (XEXP (x, 0)) == MULT)
17183 	{
17184 	  changed = true;
17185 	  XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
17186 	}
17187 
17188       if (GET_CODE (XEXP (x, 1)) == MULT)
17189 	{
17190 	  changed = true;
17191 	  XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
17192 	}
17193 
17194       if (changed
17195 	  && REG_P (XEXP (x, 1))
17196 	  && REG_P (XEXP (x, 0)))
17197 	return x;
17198 
17199       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
17200 	{
17201 	  changed = true;
17202 	  x = legitimize_pic_address (x, 0);
17203 	}
17204 
17205       if (changed && ix86_legitimate_address_p (mode, x, false))
17206 	return x;
17207 
17208       if (REG_P (XEXP (x, 0)))
17209 	{
17210 	  rtx temp = gen_reg_rtx (Pmode);
17211 	  rtx val  = force_operand (XEXP (x, 1), temp);
17212 	  if (val != temp)
17213 	    {
17214 	      val = convert_to_mode (Pmode, val, 1);
17215 	      emit_move_insn (temp, val);
17216 	    }
17217 
17218 	  XEXP (x, 1) = temp;
17219 	  return x;
17220 	}
17221 
17222       else if (REG_P (XEXP (x, 1)))
17223 	{
17224 	  rtx temp = gen_reg_rtx (Pmode);
17225 	  rtx val  = force_operand (XEXP (x, 0), temp);
17226 	  if (val != temp)
17227 	    {
17228 	      val = convert_to_mode (Pmode, val, 1);
17229 	      emit_move_insn (temp, val);
17230 	    }
17231 
17232 	  XEXP (x, 0) = temp;
17233 	  return x;
17234 	}
17235     }
17236 
17237   return x;
17238 }
17239 
17240 /* Print an integer constant expression in assembler syntax.  Addition
17241    and subtraction are the only arithmetic that may appear in these
17242    expressions.  FILE is the stdio stream to write to, X is the rtx, and
17243    CODE is the operand print code from the output string.  */
17244 
17245 static void
17246 output_pic_addr_const (FILE *file, rtx x, int code)
17247 {
17248   char buf[256];
17249 
17250   switch (GET_CODE (x))
17251     {
17252     case PC:
17253       gcc_assert (flag_pic);
17254       putc ('.', file);
17255       break;
17256 
17257     case SYMBOL_REF:
17258       if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
17259 	output_addr_const (file, x);
17260       else
17261 	{
17262 	  const char *name = XSTR (x, 0);
17263 
17264 	  /* Mark the decl as referenced so that cgraph will
17265 	     output the function.  */
17266 	  if (SYMBOL_REF_DECL (x))
17267 	    mark_decl_referenced (SYMBOL_REF_DECL (x));
17268 
17269 #if TARGET_MACHO
17270 	  if (MACHOPIC_INDIRECT
17271 	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
17272 	    name = machopic_indirection_name (x, /*stub_p=*/true);
17273 #endif
17274 	  assemble_name (file, name);
17275 	}
17276       if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
17277 	  && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
17278 	fputs ("@PLT", file);
17279       break;
17280 
17281     case LABEL_REF:
17282       x = XEXP (x, 0);
17283       /* FALLTHRU */
17284     case CODE_LABEL:
17285       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
17286       assemble_name (asm_out_file, buf);
17287       break;
17288 
17289     case CONST_INT:
17290       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17291       break;
17292 
17293     case CONST:
17294       /* This used to output parentheses around the expression,
17295 	 but that does not work on the 386 (either ATT or BSD assembler).  */
17296       output_pic_addr_const (file, XEXP (x, 0), code);
17297       break;
17298 
17299     case CONST_DOUBLE:
17300       /* We can't handle floating point constants;
17301 	 TARGET_PRINT_OPERAND must handle them.  */
17302       output_operand_lossage ("floating constant misused");
17303       break;
17304 
17305     case PLUS:
17306       /* Some assemblers need integer constants to appear first.  */
17307       if (CONST_INT_P (XEXP (x, 0)))
17308 	{
17309 	  output_pic_addr_const (file, XEXP (x, 0), code);
17310 	  putc ('+', file);
17311 	  output_pic_addr_const (file, XEXP (x, 1), code);
17312 	}
17313       else
17314 	{
17315 	  gcc_assert (CONST_INT_P (XEXP (x, 1)));
17316 	  output_pic_addr_const (file, XEXP (x, 1), code);
17317 	  putc ('+', file);
17318 	  output_pic_addr_const (file, XEXP (x, 0), code);
17319 	}
17320       break;
17321 
17322     case MINUS:
17323       if (!TARGET_MACHO)
17324 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
17325       output_pic_addr_const (file, XEXP (x, 0), code);
17326       putc ('-', file);
17327       output_pic_addr_const (file, XEXP (x, 1), code);
17328       if (!TARGET_MACHO)
17329 	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
17330       break;
17331 
17332     case UNSPEC:
17333       gcc_assert (XVECLEN (x, 0) == 1);
17334       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
17335       switch (XINT (x, 1))
17336 	{
17337 	case UNSPEC_GOT:
17338 	  fputs ("@GOT", file);
17339 	  break;
17340 	case UNSPEC_GOTOFF:
17341 	  fputs ("@GOTOFF", file);
17342 	  break;
17343 	case UNSPEC_PLTOFF:
17344 	  fputs ("@PLTOFF", file);
17345 	  break;
17346 	case UNSPEC_PCREL:
17347 	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17348 		 "(%rip)" : "[rip]", file);
17349 	  break;
17350 	case UNSPEC_GOTPCREL:
17351 	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17352 		 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
17353 	  break;
17354 	case UNSPEC_GOTTPOFF:
17355 	  /* FIXME: This might be @TPOFF in Sun ld too.  */
17356 	  fputs ("@gottpoff", file);
17357 	  break;
17358 	case UNSPEC_TPOFF:
17359 	  fputs ("@tpoff", file);
17360 	  break;
17361 	case UNSPEC_NTPOFF:
17362 	  if (TARGET_64BIT)
17363 	    fputs ("@tpoff", file);
17364 	  else
17365 	    fputs ("@ntpoff", file);
17366 	  break;
17367 	case UNSPEC_DTPOFF:
17368 	  fputs ("@dtpoff", file);
17369 	  break;
17370 	case UNSPEC_GOTNTPOFF:
17371 	  if (TARGET_64BIT)
17372 	    fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17373 		   "@gottpoff(%rip)": "@gottpoff[rip]", file);
17374 	  else
17375 	    fputs ("@gotntpoff", file);
17376 	  break;
17377 	case UNSPEC_INDNTPOFF:
17378 	  fputs ("@indntpoff", file);
17379 	  break;
17380 #if TARGET_MACHO
17381 	case UNSPEC_MACHOPIC_OFFSET:
17382 	  putc ('-', file);
17383 	  machopic_output_function_base_name (file);
17384 	  break;
17385 #endif
17386 	default:
17387 	  output_operand_lossage ("invalid UNSPEC as operand");
17388 	  break;
17389 	}
17390        break;
17391 
17392     default:
17393       output_operand_lossage ("invalid expression as operand");
17394     }
17395 }
17396 
17397 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
17398    We need to emit DTP-relative relocations.  */
17399 
17400 static void ATTRIBUTE_UNUSED
17401 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
17402 {
17403   fputs (ASM_LONG, file);
17404   output_addr_const (file, x);
17405   fputs ("@dtpoff", file);
17406   switch (size)
17407     {
17408     case 4:
17409       break;
17410     case 8:
17411       fputs (", 0", file);
17412       break;
17413     default:
17414       gcc_unreachable ();
17415    }
17416 }
17417 
17418 /* Return true if X is a representation of the PIC register.  This copes
17419    with calls from ix86_find_base_term, where the register might have
17420    been replaced by a cselib value.  */
17421 
17422 static bool
17423 ix86_pic_register_p (rtx x)
17424 {
17425   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
17426     return (pic_offset_table_rtx
17427 	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
17428   else if (!REG_P (x))
17429     return false;
17430   else if (pic_offset_table_rtx)
17431     {
17432       if (REGNO (x) == REGNO (pic_offset_table_rtx))
17433 	return true;
17434       if (HARD_REGISTER_P (x)
17435 	  && !HARD_REGISTER_P (pic_offset_table_rtx)
17436 	  && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
17437 	return true;
17438       return false;
17439     }
17440   else
17441     return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
17442 }
17443 
17444 /* Helper function for ix86_delegitimize_address.
17445    Attempt to delegitimize TLS local-exec accesses.  */
17446 
17447 static rtx
17448 ix86_delegitimize_tls_address (rtx orig_x)
17449 {
17450   rtx x = orig_x, unspec;
17451   struct ix86_address addr;
17452 
17453   if (!TARGET_TLS_DIRECT_SEG_REFS)
17454     return orig_x;
17455   if (MEM_P (x))
17456     x = XEXP (x, 0);
17457   if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
17458     return orig_x;
17459   if (ix86_decompose_address (x, &addr) == 0
17460       || addr.seg != DEFAULT_TLS_SEG_REG
17461       || addr.disp == NULL_RTX
17462       || GET_CODE (addr.disp) != CONST)
17463     return orig_x;
17464   unspec = XEXP (addr.disp, 0);
17465   if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
17466     unspec = XEXP (unspec, 0);
17467   if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
17468     return orig_x;
17469   x = XVECEXP (unspec, 0, 0);
17470   gcc_assert (GET_CODE (x) == SYMBOL_REF);
17471   if (unspec != XEXP (addr.disp, 0))
17472     x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
17473   if (addr.index)
17474     {
17475       rtx idx = addr.index;
17476       if (addr.scale != 1)
17477 	idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
17478       x = gen_rtx_PLUS (Pmode, idx, x);
17479     }
17480   if (addr.base)
17481     x = gen_rtx_PLUS (Pmode, addr.base, x);
17482   if (MEM_P (orig_x))
17483     x = replace_equiv_address_nv (orig_x, x);
17484   return x;
17485 }
17486 
17487 /* In the name of slightly smaller debug output, and to cater to
17488    general assembler lossage, recognize PIC+GOTOFF and turn it back
17489    into a direct symbol reference.
17490 
17491    On Darwin, this is necessary to avoid a crash, because Darwin
17492    has a different PIC label for each routine but the DWARF debugging
17493    information is not associated with any particular routine, so it's
17494    necessary to remove references to the PIC label from RTL stored by
17495    the DWARF output code.
17496 
17497    This helper is used in the normal ix86_delegitimize_address
17498    entrypoint (e.g. used in the target delegitimization hook) and
17499    in ix86_find_base_term.  As compile time memory optimization, we
17500    avoid allocating rtxes that will not change anything on the outcome
17501    of the callers (find_base_value and find_base_term).  */
17502 
17503 static inline rtx
17504 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
17505 {
17506   rtx orig_x = delegitimize_mem_from_attrs (x);
17507   /* addend is NULL or some rtx if x is something+GOTOFF where
17508      something doesn't include the PIC register.  */
17509   rtx addend = NULL_RTX;
17510   /* reg_addend is NULL or a multiple of some register.  */
17511   rtx reg_addend = NULL_RTX;
17512   /* const_addend is NULL or a const_int.  */
17513   rtx const_addend = NULL_RTX;
17514   /* This is the result, or NULL.  */
17515   rtx result = NULL_RTX;
17516 
17517   x = orig_x;
17518 
17519   if (MEM_P (x))
17520     x = XEXP (x, 0);
17521 
17522   if (TARGET_64BIT)
17523     {
17524       if (GET_CODE (x) == CONST
17525           && GET_CODE (XEXP (x, 0)) == PLUS
17526           && GET_MODE (XEXP (x, 0)) == Pmode
17527           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
17528           && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
17529           && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
17530         {
17531 	  /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
17532 	     base.  A CONST can't be arg_pointer_rtx based.  */
17533 	  if (base_term_p && MEM_P (orig_x))
17534 	    return orig_x;
17535 	  rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
17536 	  x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
17537 	  if (MEM_P (orig_x))
17538 	    x = replace_equiv_address_nv (orig_x, x);
17539 	  return x;
17540 	}
17541 
17542       if (GET_CODE (x) == CONST
17543 	  && GET_CODE (XEXP (x, 0)) == UNSPEC
17544 	  && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
17545 	      || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
17546 	  && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
17547 	{
17548 	  x = XVECEXP (XEXP (x, 0), 0, 0);
17549 	  if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
17550 	    {
17551 	      x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
17552 	      if (x == NULL_RTX)
17553 		return orig_x;
17554 	    }
17555 	  return x;
17556 	}
17557 
17558       if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
17559 	return ix86_delegitimize_tls_address (orig_x);
17560 
17561       /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
17562 	 and -mcmodel=medium -fpic.  */
17563     }
17564 
17565   if (GET_CODE (x) != PLUS
17566       || GET_CODE (XEXP (x, 1)) != CONST)
17567     return ix86_delegitimize_tls_address (orig_x);
17568 
17569   if (ix86_pic_register_p (XEXP (x, 0)))
17570     /* %ebx + GOT/GOTOFF */
17571     ;
17572   else if (GET_CODE (XEXP (x, 0)) == PLUS)
17573     {
17574       /* %ebx + %reg * scale + GOT/GOTOFF */
17575       reg_addend = XEXP (x, 0);
17576       if (ix86_pic_register_p (XEXP (reg_addend, 0)))
17577 	reg_addend = XEXP (reg_addend, 1);
17578       else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
17579 	reg_addend = XEXP (reg_addend, 0);
17580       else
17581 	{
17582 	  reg_addend = NULL_RTX;
17583 	  addend = XEXP (x, 0);
17584 	}
17585     }
17586   else
17587     addend = XEXP (x, 0);
17588 
17589   x = XEXP (XEXP (x, 1), 0);
17590   if (GET_CODE (x) == PLUS
17591       && CONST_INT_P (XEXP (x, 1)))
17592     {
17593       const_addend = XEXP (x, 1);
17594       x = XEXP (x, 0);
17595     }
17596 
17597   if (GET_CODE (x) == UNSPEC
17598       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
17599 	  || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
17600 	  || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
17601 	      && !MEM_P (orig_x) && !addend)))
17602     result = XVECEXP (x, 0, 0);
17603 
17604   if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
17605       && !MEM_P (orig_x))
17606     result = XVECEXP (x, 0, 0);
17607 
17608   if (! result)
17609     return ix86_delegitimize_tls_address (orig_x);
17610 
17611   /* For (PLUS something CONST_INT) both find_base_{value,term} just
17612      recurse on the first operand.  */
17613   if (const_addend && !base_term_p)
17614     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
17615   if (reg_addend)
17616     result = gen_rtx_PLUS (Pmode, reg_addend, result);
17617   if (addend)
17618     {
17619       /* If the rest of original X doesn't involve the PIC register, add
17620 	 addend and subtract pic_offset_table_rtx.  This can happen e.g.
17621 	 for code like:
17622 	 leal (%ebx, %ecx, 4), %ecx
17623 	 ...
17624 	 movl foo@GOTOFF(%ecx), %edx
17625 	 in which case we return (%ecx - %ebx) + foo
17626 	 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
17627 	 and reload has completed.  Don't do the latter for debug,
17628 	 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly.  */
17629       if (pic_offset_table_rtx
17630 	  && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
17631         result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
17632 						     pic_offset_table_rtx),
17633 			       result);
17634       else if (base_term_p
17635 	       && pic_offset_table_rtx
17636 	       && !TARGET_MACHO
17637 	       && !TARGET_VXWORKS_RTP)
17638 	{
17639 	  rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
17640 	  tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
17641 	  result = gen_rtx_PLUS (Pmode, tmp, result);
17642 	}
17643       else
17644 	return orig_x;
17645     }
17646   if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
17647     {
17648       result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
17649       if (result == NULL_RTX)
17650 	return orig_x;
17651     }
17652   return result;
17653 }
17654 
17655 /* The normal instantiation of the above template.  */
17656 
17657 static rtx
17658 ix86_delegitimize_address (rtx x)
17659 {
17660   return ix86_delegitimize_address_1 (x, false);
17661 }
17662 
17663 /* If X is a machine specific address (i.e. a symbol or label being
17664    referenced as a displacement from the GOT implemented using an
17665    UNSPEC), then return the base term.  Otherwise return X.  */
17666 
17667 rtx
17668 ix86_find_base_term (rtx x)
17669 {
17670   rtx term;
17671 
17672   if (TARGET_64BIT)
17673     {
17674       if (GET_CODE (x) != CONST)
17675 	return x;
17676       term = XEXP (x, 0);
17677       if (GET_CODE (term) == PLUS
17678 	  && CONST_INT_P (XEXP (term, 1)))
17679 	term = XEXP (term, 0);
17680       if (GET_CODE (term) != UNSPEC
17681 	  || (XINT (term, 1) != UNSPEC_GOTPCREL
17682 	      && XINT (term, 1) != UNSPEC_PCREL))
17683 	return x;
17684 
17685       return XVECEXP (term, 0, 0);
17686     }
17687 
17688   return ix86_delegitimize_address_1 (x, true);
17689 }
17690 
17691 /* Return true if X shouldn't be emitted into the debug info.
17692    Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
17693    symbol easily into the .debug_info section, so we need not to
17694    delegitimize, but instead assemble as @gotoff.
17695    Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
17696    assembles that as _GLOBAL_OFFSET_TABLE_-. expression.  */
17697 
17698 static bool
17699 ix86_const_not_ok_for_debug_p (rtx x)
17700 {
17701   if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
17702     return true;
17703 
17704   if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
17705     return true;
17706 
17707   return false;
17708 }
17709 
17710 static void
17711 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
17712 		    bool fp, FILE *file)
17713 {
17714   const char *suffix;
17715 
17716   if (mode == CCFPmode)
17717     {
17718       code = ix86_fp_compare_code_to_integer (code);
17719       mode = CCmode;
17720     }
17721   if (reverse)
17722     code = reverse_condition (code);
17723 
17724   switch (code)
17725     {
17726     case EQ:
17727       gcc_assert (mode != CCGZmode);
17728       switch (mode)
17729 	{
17730 	case E_CCAmode:
17731 	  suffix = "a";
17732 	  break;
17733 	case E_CCCmode:
17734 	  suffix = "c";
17735 	  break;
17736 	case E_CCOmode:
17737 	  suffix = "o";
17738 	  break;
17739 	case E_CCPmode:
17740 	  suffix = "p";
17741 	  break;
17742 	case E_CCSmode:
17743 	  suffix = "s";
17744 	  break;
17745 	default:
17746 	  suffix = "e";
17747 	  break;
17748 	}
17749       break;
17750     case NE:
17751       gcc_assert (mode != CCGZmode);
17752       switch (mode)
17753 	{
17754 	case E_CCAmode:
17755 	  suffix = "na";
17756 	  break;
17757 	case E_CCCmode:
17758 	  suffix = "nc";
17759 	  break;
17760 	case E_CCOmode:
17761 	  suffix = "no";
17762 	  break;
17763 	case E_CCPmode:
17764 	  suffix = "np";
17765 	  break;
17766 	case E_CCSmode:
17767 	  suffix = "ns";
17768 	  break;
17769 	default:
17770 	  suffix = "ne";
17771 	  break;
17772 	}
17773       break;
17774     case GT:
17775       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
17776       suffix = "g";
17777       break;
17778     case GTU:
17779       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
17780 	 Those same assemblers have the same but opposite lossage on cmov.  */
17781       if (mode == CCmode)
17782 	suffix = fp ? "nbe" : "a";
17783       else
17784 	gcc_unreachable ();
17785       break;
17786     case LT:
17787       switch (mode)
17788 	{
17789 	case E_CCNOmode:
17790 	case E_CCGOCmode:
17791 	  suffix = "s";
17792 	  break;
17793 
17794 	case E_CCmode:
17795 	case E_CCGCmode:
17796 	case E_CCGZmode:
17797 	  suffix = "l";
17798 	  break;
17799 
17800 	default:
17801 	  gcc_unreachable ();
17802 	}
17803       break;
17804     case LTU:
17805       if (mode == CCmode || mode == CCGZmode)
17806 	suffix = "b";
17807       else if (mode == CCCmode)
17808 	suffix = fp ? "b" : "c";
17809       else
17810 	gcc_unreachable ();
17811       break;
17812     case GE:
17813       switch (mode)
17814 	{
17815 	case E_CCNOmode:
17816 	case E_CCGOCmode:
17817 	  suffix = "ns";
17818 	  break;
17819 
17820 	case E_CCmode:
17821 	case E_CCGCmode:
17822 	case E_CCGZmode:
17823 	  suffix = "ge";
17824 	  break;
17825 
17826 	default:
17827 	  gcc_unreachable ();
17828 	}
17829       break;
17830     case GEU:
17831       if (mode == CCmode || mode == CCGZmode)
17832 	suffix = "nb";
17833       else if (mode == CCCmode)
17834 	suffix = fp ? "nb" : "nc";
17835       else
17836 	gcc_unreachable ();
17837       break;
17838     case LE:
17839       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
17840       suffix = "le";
17841       break;
17842     case LEU:
17843       if (mode == CCmode)
17844 	suffix = "be";
17845       else
17846 	gcc_unreachable ();
17847       break;
17848     case UNORDERED:
17849       suffix = fp ? "u" : "p";
17850       break;
17851     case ORDERED:
17852       suffix = fp ? "nu" : "np";
17853       break;
17854     default:
17855       gcc_unreachable ();
17856     }
17857   fputs (suffix, file);
17858 }
17859 
17860 /* Print the name of register X to FILE based on its machine mode and number.
17861    If CODE is 'w', pretend the mode is HImode.
17862    If CODE is 'b', pretend the mode is QImode.
17863    If CODE is 'k', pretend the mode is SImode.
17864    If CODE is 'q', pretend the mode is DImode.
17865    If CODE is 'x', pretend the mode is V4SFmode.
17866    If CODE is 't', pretend the mode is V8SFmode.
17867    If CODE is 'g', pretend the mode is V16SFmode.
17868    If CODE is 'h', pretend the reg is the 'high' byte register.
17869    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
17870    If CODE is 'd', duplicate the operand for AVX instruction.
17871    If CODE is 'V', print naked full integer register name without %.
17872  */
17873 
17874 void
17875 print_reg (rtx x, int code, FILE *file)
17876 {
17877   const char *reg;
17878   int msize;
17879   unsigned int regno;
17880   bool duplicated;
17881 
17882   if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
17883     putc ('%', file);
17884 
17885   if (x == pc_rtx)
17886     {
17887       gcc_assert (TARGET_64BIT);
17888       fputs ("rip", file);
17889       return;
17890     }
17891 
17892   if (code == 'y' && STACK_TOP_P (x))
17893     {
17894       fputs ("st(0)", file);
17895       return;
17896     }
17897 
17898   if (code == 'w')
17899     msize = 2;
17900   else if (code == 'b')
17901     msize = 1;
17902   else if (code == 'k')
17903     msize = 4;
17904   else if (code == 'q')
17905     msize = 8;
17906   else if (code == 'h')
17907     msize = 0;
17908   else if (code == 'x')
17909     msize = 16;
17910   else if (code == 't')
17911     msize = 32;
17912   else if (code == 'g')
17913     msize = 64;
17914   else
17915     msize = GET_MODE_SIZE (GET_MODE (x));
17916 
17917   regno = REGNO (x);
17918 
17919   if (regno == ARG_POINTER_REGNUM
17920       || regno == FRAME_POINTER_REGNUM
17921       || regno == FPSR_REG
17922       || regno == FPCR_REG)
17923     {
17924       output_operand_lossage
17925 	("invalid use of register '%s'", reg_names[regno]);
17926       return;
17927     }
17928   else if (regno == FLAGS_REG)
17929     {
17930       output_operand_lossage ("invalid use of asm flag output");
17931       return;
17932     }
17933 
17934   if (code == 'V')
17935     {
17936       if (GENERAL_REGNO_P (regno))
17937 	msize = GET_MODE_SIZE (word_mode);
17938       else
17939 	error ("'V' modifier on non-integer register");
17940     }
17941 
17942   duplicated = code == 'd' && TARGET_AVX;
17943 
17944   switch (msize)
17945     {
17946     case 16:
17947     case 12:
17948     case 8:
17949       if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
17950 	warning (0, "unsupported size for integer register");
17951       /* FALLTHRU */
17952     case 4:
17953       if (LEGACY_INT_REGNO_P (regno))
17954 	putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
17955       /* FALLTHRU */
17956     case 2:
17957     normal:
17958       reg = hi_reg_name[regno];
17959       break;
17960     case 1:
17961       if (regno >= ARRAY_SIZE (qi_reg_name))
17962 	goto normal;
17963       if (!ANY_QI_REGNO_P (regno))
17964 	error ("unsupported size for integer register");
17965       reg = qi_reg_name[regno];
17966       break;
17967     case 0:
17968       if (regno >= ARRAY_SIZE (qi_high_reg_name))
17969 	goto normal;
17970       reg = qi_high_reg_name[regno];
17971       break;
17972     case 32:
17973     case 64:
17974       if (SSE_REGNO_P (regno))
17975 	{
17976 	  gcc_assert (!duplicated);
17977 	  putc (msize == 32 ? 'y' : 'z', file);
17978 	  reg = hi_reg_name[regno] + 1;
17979 	  break;
17980 	}
17981       goto normal;
17982     default:
17983       gcc_unreachable ();
17984     }
17985 
17986   fputs (reg, file);
17987 
17988   /* Irritatingly, AMD extended registers use
17989      different naming convention: "r%d[bwd]"  */
17990   if (REX_INT_REGNO_P (regno))
17991     {
17992       gcc_assert (TARGET_64BIT);
17993       switch (msize)
17994 	{
17995 	  case 0:
17996 	    error ("extended registers have no high halves");
17997 	    break;
17998 	  case 1:
17999 	    putc ('b', file);
18000 	    break;
18001 	  case 2:
18002 	    putc ('w', file);
18003 	    break;
18004 	  case 4:
18005 	    putc ('d', file);
18006 	    break;
18007 	  case 8:
18008 	    /* no suffix */
18009 	    break;
18010 	  default:
18011 	    error ("unsupported operand size for extended register");
18012 	    break;
18013 	}
18014       return;
18015     }
18016 
18017   if (duplicated)
18018     {
18019       if (ASSEMBLER_DIALECT == ASM_ATT)
18020 	fprintf (file, ", %%%s", reg);
18021       else
18022 	fprintf (file, ", %s", reg);
18023     }
18024 }
18025 
18026 /* Meaning of CODE:
18027    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
18028    C -- print opcode suffix for set/cmov insn.
18029    c -- like C, but print reversed condition
18030    F,f -- likewise, but for floating-point.
18031    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
18032 	otherwise nothing
18033    R -- print embedded rounding and sae.
18034    r -- print only sae.
18035    z -- print the opcode suffix for the size of the current operand.
18036    Z -- likewise, with special suffixes for x87 instructions.
18037    * -- print a star (in certain assembler syntax)
18038    A -- print an absolute memory reference.
18039    E -- print address with DImode register names if TARGET_64BIT.
18040    w -- print the operand as if it's a "word" (HImode) even if it isn't.
18041    s -- print a shift double count, followed by the assemblers argument
18042 	delimiter.
18043    b -- print the QImode name of the register for the indicated operand.
18044 	%b0 would print %al if operands[0] is reg 0.
18045    w --  likewise, print the HImode name of the register.
18046    k --  likewise, print the SImode name of the register.
18047    q --  likewise, print the DImode name of the register.
18048    x --  likewise, print the V4SFmode name of the register.
18049    t --  likewise, print the V8SFmode name of the register.
18050    g --  likewise, print the V16SFmode name of the register.
18051    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
18052    y -- print "st(0)" instead of "st" as a register.
18053    d -- print duplicated register operand for AVX instruction.
18054    D -- print condition for SSE cmp instruction.
18055    P -- if PIC, print an @PLT suffix.
18056    p -- print raw symbol name.
18057    X -- don't print any sort of PIC '@' suffix for a symbol.
18058    & -- print some in-use local-dynamic symbol name.
18059    H -- print a memory address offset by 8; used for sse high-parts
18060    Y -- print condition for XOP pcom* instruction.
18061    V -- print naked full integer register name without %.
18062    + -- print a branch hint as 'cs' or 'ds' prefix
18063    ; -- print a semicolon (after prefixes due to bug in older gas).
18064    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
18065    ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
18066    ! -- print MPX prefix for jxx/call/ret instructions if required.
18067  */
18068 
18069 void
18070 ix86_print_operand (FILE *file, rtx x, int code)
18071 {
18072   if (code)
18073     {
18074       switch (code)
18075 	{
18076 	case 'A':
18077 	  switch (ASSEMBLER_DIALECT)
18078 	    {
18079 	    case ASM_ATT:
18080 	      putc ('*', file);
18081 	      break;
18082 
18083 	    case ASM_INTEL:
18084 	      /* Intel syntax. For absolute addresses, registers should not
18085 		 be surrounded by braces.  */
18086 	      if (!REG_P (x))
18087 		{
18088 		  putc ('[', file);
18089 		  ix86_print_operand (file, x, 0);
18090 		  putc (']', file);
18091 		  return;
18092 		}
18093 	      break;
18094 
18095 	    default:
18096 	      gcc_unreachable ();
18097 	    }
18098 
18099 	  ix86_print_operand (file, x, 0);
18100 	  return;
18101 
18102 	case 'E':
18103 	  /* Wrap address in an UNSPEC to declare special handling.  */
18104 	  if (TARGET_64BIT)
18105 	    x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
18106 
18107 	  output_address (VOIDmode, x);
18108 	  return;
18109 
18110 	case 'L':
18111 	  if (ASSEMBLER_DIALECT == ASM_ATT)
18112 	    putc ('l', file);
18113 	  return;
18114 
18115 	case 'W':
18116 	  if (ASSEMBLER_DIALECT == ASM_ATT)
18117 	    putc ('w', file);
18118 	  return;
18119 
18120 	case 'B':
18121 	  if (ASSEMBLER_DIALECT == ASM_ATT)
18122 	    putc ('b', file);
18123 	  return;
18124 
18125 	case 'Q':
18126 	  if (ASSEMBLER_DIALECT == ASM_ATT)
18127 	    putc ('l', file);
18128 	  return;
18129 
18130 	case 'S':
18131 	  if (ASSEMBLER_DIALECT == ASM_ATT)
18132 	    putc ('s', file);
18133 	  return;
18134 
18135 	case 'T':
18136 	  if (ASSEMBLER_DIALECT == ASM_ATT)
18137 	    putc ('t', file);
18138 	  return;
18139 
18140 	case 'O':
18141 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
18142 	  if (ASSEMBLER_DIALECT != ASM_ATT)
18143 	    return;
18144 
18145 	  switch (GET_MODE_SIZE (GET_MODE (x)))
18146 	    {
18147 	    case 2:
18148 	      putc ('w', file);
18149 	      break;
18150 
18151 	    case 4:
18152 	      putc ('l', file);
18153 	      break;
18154 
18155 	    case 8:
18156 	      putc ('q', file);
18157 	      break;
18158 
18159 	    default:
18160 	      output_operand_lossage ("invalid operand size for operand "
18161 				      "code 'O'");
18162 	      return;
18163 	    }
18164 
18165 	  putc ('.', file);
18166 #endif
18167 	  return;
18168 
18169 	case 'z':
18170 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
18171 	    {
18172 	      /* Opcodes don't get size suffixes if using Intel opcodes.  */
18173 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
18174 		return;
18175 
18176 	      switch (GET_MODE_SIZE (GET_MODE (x)))
18177 		{
18178 		case 1:
18179 		  putc ('b', file);
18180 		  return;
18181 
18182 		case 2:
18183 		  putc ('w', file);
18184 		  return;
18185 
18186 		case 4:
18187 		  putc ('l', file);
18188 		  return;
18189 
18190 		case 8:
18191 		  putc ('q', file);
18192 		  return;
18193 
18194 		default:
18195 		  output_operand_lossage ("invalid operand size for operand "
18196 					  "code 'z'");
18197 		  return;
18198 		}
18199 	    }
18200 
18201 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
18202 	    warning (0, "non-integer operand used with operand code 'z'");
18203 	  /* FALLTHRU */
18204 
18205 	case 'Z':
18206 	  /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
18207 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
18208 	    return;
18209 
18210 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
18211 	    {
18212 	      switch (GET_MODE_SIZE (GET_MODE (x)))
18213 		{
18214 		case 2:
18215 #ifdef HAVE_AS_IX86_FILDS
18216 		  putc ('s', file);
18217 #endif
18218 		  return;
18219 
18220 		case 4:
18221 		  putc ('l', file);
18222 		  return;
18223 
18224 		case 8:
18225 #ifdef HAVE_AS_IX86_FILDQ
18226 		  putc ('q', file);
18227 #else
18228 		  fputs ("ll", file);
18229 #endif
18230 		  return;
18231 
18232 		default:
18233 		  break;
18234 		}
18235 	    }
18236 	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
18237 	    {
18238 	      /* 387 opcodes don't get size suffixes
18239 		 if the operands are registers.  */
18240 	      if (STACK_REG_P (x))
18241 		return;
18242 
18243 	      switch (GET_MODE_SIZE (GET_MODE (x)))
18244 		{
18245 		case 4:
18246 		  putc ('s', file);
18247 		  return;
18248 
18249 		case 8:
18250 		  putc ('l', file);
18251 		  return;
18252 
18253 		case 12:
18254 		case 16:
18255 		  putc ('t', file);
18256 		  return;
18257 
18258 		default:
18259 		  break;
18260 		}
18261 	    }
18262 	  else
18263 	    {
18264 	      output_operand_lossage ("invalid operand type used with "
18265 				      "operand code 'Z'");
18266 	      return;
18267 	    }
18268 
18269 	  output_operand_lossage ("invalid operand size for operand code 'Z'");
18270 	  return;
18271 
18272 	case 'd':
18273 	case 'b':
18274 	case 'w':
18275 	case 'k':
18276 	case 'q':
18277 	case 'h':
18278 	case 't':
18279 	case 'g':
18280 	case 'y':
18281 	case 'x':
18282 	case 'X':
18283 	case 'P':
18284 	case 'p':
18285 	case 'V':
18286 	  break;
18287 
18288 	case 's':
18289 	  if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
18290 	    {
18291 	      ix86_print_operand (file, x, 0);
18292 	      fputs (", ", file);
18293 	    }
18294 	  return;
18295 
18296 	case 'Y':
18297 	  switch (GET_CODE (x))
18298 	    {
18299 	    case NE:
18300 	      fputs ("neq", file);
18301 	      break;
18302 	    case EQ:
18303 	      fputs ("eq", file);
18304 	      break;
18305 	    case GE:
18306 	    case GEU:
18307 	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
18308 	      break;
18309 	    case GT:
18310 	    case GTU:
18311 	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
18312 	      break;
18313 	    case LE:
18314 	    case LEU:
18315 	      fputs ("le", file);
18316 	      break;
18317 	    case LT:
18318 	    case LTU:
18319 	      fputs ("lt", file);
18320 	      break;
18321 	    case UNORDERED:
18322 	      fputs ("unord", file);
18323 	      break;
18324 	    case ORDERED:
18325 	      fputs ("ord", file);
18326 	      break;
18327 	    case UNEQ:
18328 	      fputs ("ueq", file);
18329 	      break;
18330 	    case UNGE:
18331 	      fputs ("nlt", file);
18332 	      break;
18333 	    case UNGT:
18334 	      fputs ("nle", file);
18335 	      break;
18336 	    case UNLE:
18337 	      fputs ("ule", file);
18338 	      break;
18339 	    case UNLT:
18340 	      fputs ("ult", file);
18341 	      break;
18342 	    case LTGT:
18343 	      fputs ("une", file);
18344 	      break;
18345 	    default:
18346 	      output_operand_lossage ("operand is not a condition code, "
18347 				      "invalid operand code 'Y'");
18348 	      return;
18349 	    }
18350 	  return;
18351 
18352 	case 'D':
18353 	  /* Little bit of braindamage here.  The SSE compare instructions
18354 	     does use completely different names for the comparisons that the
18355 	     fp conditional moves.  */
18356 	  switch (GET_CODE (x))
18357 	    {
18358 	    case UNEQ:
18359 	      if (TARGET_AVX)
18360 		{
18361 		  fputs ("eq_us", file);
18362 		  break;
18363 		}
18364 	     /* FALLTHRU */
18365 	    case EQ:
18366 	      fputs ("eq", file);
18367 	      break;
18368 	    case UNLT:
18369 	      if (TARGET_AVX)
18370 		{
18371 		  fputs ("nge", file);
18372 		  break;
18373 		}
18374 	     /* FALLTHRU */
18375 	    case LT:
18376 	      fputs ("lt", file);
18377 	      break;
18378 	    case UNLE:
18379 	      if (TARGET_AVX)
18380 		{
18381 		  fputs ("ngt", file);
18382 		  break;
18383 		}
18384 	     /* FALLTHRU */
18385 	    case LE:
18386 	      fputs ("le", file);
18387 	      break;
18388 	    case UNORDERED:
18389 	      fputs ("unord", file);
18390 	      break;
18391 	    case LTGT:
18392 	      if (TARGET_AVX)
18393 		{
18394 		  fputs ("neq_oq", file);
18395 		  break;
18396 		}
18397 	     /* FALLTHRU */
18398 	    case NE:
18399 	      fputs ("neq", file);
18400 	      break;
18401 	    case GE:
18402 	      if (TARGET_AVX)
18403 		{
18404 		  fputs ("ge", file);
18405 		  break;
18406 		}
18407 	     /* FALLTHRU */
18408 	    case UNGE:
18409 	      fputs ("nlt", file);
18410 	      break;
18411 	    case GT:
18412 	      if (TARGET_AVX)
18413 		{
18414 		  fputs ("gt", file);
18415 		  break;
18416 		}
18417 	     /* FALLTHRU */
18418 	    case UNGT:
18419 	      fputs ("nle", file);
18420 	      break;
18421 	    case ORDERED:
18422 	      fputs ("ord", file);
18423 	      break;
18424 	    default:
18425 	      output_operand_lossage ("operand is not a condition code, "
18426 				      "invalid operand code 'D'");
18427 	      return;
18428 	    }
18429 	  return;
18430 
18431 	case 'F':
18432 	case 'f':
18433 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
18434 	  if (ASSEMBLER_DIALECT == ASM_ATT)
18435 	    putc ('.', file);
18436 	  gcc_fallthrough ();
18437 #endif
18438 
18439 	case 'C':
18440 	case 'c':
18441 	  if (!COMPARISON_P (x))
18442 	    {
18443 	      output_operand_lossage ("operand is not a condition code, "
18444 				      "invalid operand code '%c'", code);
18445 	      return;
18446 	    }
18447 	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
18448 			      code == 'c' || code == 'f',
18449 			      code == 'F' || code == 'f',
18450 			      file);
18451 	  return;
18452 
18453 	case 'H':
18454 	  if (!offsettable_memref_p (x))
18455 	    {
18456 	      output_operand_lossage ("operand is not an offsettable memory "
18457 				      "reference, invalid operand code 'H'");
18458 	      return;
18459 	    }
18460 	  /* It doesn't actually matter what mode we use here, as we're
18461 	     only going to use this for printing.  */
18462 	  x = adjust_address_nv (x, DImode, 8);
18463 	  /* Output 'qword ptr' for intel assembler dialect.  */
18464 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
18465 	    code = 'q';
18466 	  break;
18467 
18468 	case 'K':
18469 	  if (!CONST_INT_P (x))
18470 	    {
18471 	      output_operand_lossage ("operand is not an integer, invalid "
18472 				      "operand code 'K'");
18473 	      return;
18474 	    }
18475 
18476 	  if (INTVAL (x) & IX86_HLE_ACQUIRE)
18477 #ifdef HAVE_AS_IX86_HLE
18478 	    fputs ("xacquire ", file);
18479 #else
18480 	    fputs ("\n" ASM_BYTE "0xf2\n\t", file);
18481 #endif
18482 	  else if (INTVAL (x) & IX86_HLE_RELEASE)
18483 #ifdef HAVE_AS_IX86_HLE
18484 	    fputs ("xrelease ", file);
18485 #else
18486 	    fputs ("\n" ASM_BYTE "0xf3\n\t", file);
18487 #endif
18488 	  /* We do not want to print value of the operand.  */
18489 	  return;
18490 
18491 	case 'N':
18492 	  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
18493 	    fputs ("{z}", file);
18494 	  return;
18495 
18496 	case 'r':
18497 	  if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
18498 	    {
18499 	      output_operand_lossage ("operand is not a specific integer, "
18500 				      "invalid operand code 'r'");
18501 	      return;
18502 	    }
18503 
18504 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
18505 	    fputs (", ", file);
18506 
18507 	  fputs ("{sae}", file);
18508 
18509 	  if (ASSEMBLER_DIALECT == ASM_ATT)
18510 	    fputs (", ", file);
18511 
18512 	  return;
18513 
18514 	case 'R':
18515 	  if (!CONST_INT_P (x))
18516 	    {
18517 	      output_operand_lossage ("operand is not an integer, invalid "
18518 				      "operand code 'R'");
18519 	      return;
18520 	    }
18521 
18522 	  if (ASSEMBLER_DIALECT == ASM_INTEL)
18523 	    fputs (", ", file);
18524 
18525 	  switch (INTVAL (x))
18526 	    {
18527 	    case ROUND_NEAREST_INT | ROUND_SAE:
18528 	      fputs ("{rn-sae}", file);
18529 	      break;
18530 	    case ROUND_NEG_INF | ROUND_SAE:
18531 	      fputs ("{rd-sae}", file);
18532 	      break;
18533 	    case ROUND_POS_INF | ROUND_SAE:
18534 	      fputs ("{ru-sae}", file);
18535 	      break;
18536 	    case ROUND_ZERO | ROUND_SAE:
18537 	      fputs ("{rz-sae}", file);
18538 	      break;
18539 	    default:
18540 	      output_operand_lossage ("operand is not a specific integer, "
18541 				      "invalid operand code 'R'");
18542 	    }
18543 
18544 	  if (ASSEMBLER_DIALECT == ASM_ATT)
18545 	    fputs (", ", file);
18546 
18547 	  return;
18548 
18549 	case '*':
18550 	  if (ASSEMBLER_DIALECT == ASM_ATT)
18551 	    putc ('*', file);
18552 	  return;
18553 
18554 	case '&':
18555 	  {
18556 	    const char *name = get_some_local_dynamic_name ();
18557 	    if (name == NULL)
18558 	      output_operand_lossage ("'%%&' used without any "
18559 				      "local dynamic TLS references");
18560 	    else
18561 	      assemble_name (file, name);
18562 	    return;
18563 	  }
18564 
18565 	case '+':
18566 	  {
18567 	    rtx x;
18568 
18569 	    if (!optimize
18570 	        || optimize_function_for_size_p (cfun)
18571 		|| !TARGET_BRANCH_PREDICTION_HINTS)
18572 	      return;
18573 
18574 	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
18575 	    if (x)
18576 	      {
18577 		int pred_val = profile_probability::from_reg_br_prob_note
18578 				 (XINT (x, 0)).to_reg_br_prob_base ();
18579 
18580 		if (pred_val < REG_BR_PROB_BASE * 45 / 100
18581 		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
18582 		  {
18583 		    bool taken = pred_val > REG_BR_PROB_BASE / 2;
18584 		    bool cputaken
18585 		      = final_forward_branch_p (current_output_insn) == 0;
18586 
18587 		    /* Emit hints only in the case default branch prediction
18588 		       heuristics would fail.  */
18589 		    if (taken != cputaken)
18590 		      {
18591 			/* We use 3e (DS) prefix for taken branches and
18592 			   2e (CS) prefix for not taken branches.  */
18593 			if (taken)
18594 			  fputs ("ds ; ", file);
18595 			else
18596 			  fputs ("cs ; ", file);
18597 		      }
18598 		  }
18599 	      }
18600 	    return;
18601 	  }
18602 
18603 	case ';':
18604 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
18605 	  putc (';', file);
18606 #endif
18607 	  return;
18608 
18609 	case '~':
18610 	  putc (TARGET_AVX2 ? 'i' : 'f', file);
18611 	  return;
18612 
18613 	case '^':
18614 	  if (TARGET_64BIT && Pmode != word_mode)
18615 	    fputs ("addr32 ", file);
18616 	  return;
18617 
18618 	case '!':
18619 	  if (ix86_bnd_prefixed_insn_p (current_output_insn))
18620 	    fputs ("bnd ", file);
18621 	  if (ix86_notrack_prefixed_insn_p (current_output_insn))
18622 	    fputs ("notrack ", file);
18623 	  return;
18624 
18625 	default:
18626 	  output_operand_lossage ("invalid operand code '%c'", code);
18627 	}
18628     }
18629 
18630   if (REG_P (x))
18631     print_reg (x, code, file);
18632 
18633   else if (MEM_P (x))
18634     {
18635       rtx addr = XEXP (x, 0);
18636 
18637       /* No `byte ptr' prefix for call instructions ... */
18638       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
18639 	{
18640 	  machine_mode mode = GET_MODE (x);
18641 	  const char *size;
18642 
18643 	  /* Check for explicit size override codes.  */
18644 	  if (code == 'b')
18645 	    size = "BYTE";
18646 	  else if (code == 'w')
18647 	    size = "WORD";
18648 	  else if (code == 'k')
18649 	    size = "DWORD";
18650 	  else if (code == 'q')
18651 	    size = "QWORD";
18652 	  else if (code == 'x')
18653 	    size = "XMMWORD";
18654 	  else if (code == 't')
18655 	    size = "YMMWORD";
18656 	  else if (code == 'g')
18657 	    size = "ZMMWORD";
18658 	  else if (mode == BLKmode)
18659 	    /* ... or BLKmode operands, when not overridden.  */
18660 	    size = NULL;
18661 	  else
18662 	    switch (GET_MODE_SIZE (mode))
18663 	      {
18664 	      case 1: size = "BYTE"; break;
18665 	      case 2: size = "WORD"; break;
18666 	      case 4: size = "DWORD"; break;
18667 	      case 8: size = "QWORD"; break;
18668 	      case 12: size = "TBYTE"; break;
18669 	      case 16:
18670 		if (mode == XFmode)
18671 		  size = "TBYTE";
18672 		else
18673 		  size = "XMMWORD";
18674 		break;
18675 	      case 32: size = "YMMWORD"; break;
18676 	      case 64: size = "ZMMWORD"; break;
18677 	      default:
18678 		gcc_unreachable ();
18679 	      }
18680 	  if (size)
18681 	    {
18682 	      fputs (size, file);
18683 	      fputs (" PTR ", file);
18684 	    }
18685 	}
18686 
18687       if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
18688 	output_operand_lossage ("invalid constraints for operand");
18689       else
18690 	ix86_print_operand_address_as
18691 	  (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
18692     }
18693 
18694   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
18695     {
18696       long l;
18697 
18698       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18699 
18700       if (ASSEMBLER_DIALECT == ASM_ATT)
18701 	putc ('$', file);
18702       /* Sign extend 32bit SFmode immediate to 8 bytes.  */
18703       if (code == 'q')
18704 	fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
18705 		 (unsigned long long) (int) l);
18706       else
18707 	fprintf (file, "0x%08x", (unsigned int) l);
18708     }
18709 
18710   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
18711     {
18712       long l[2];
18713 
18714       REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
18715 
18716       if (ASSEMBLER_DIALECT == ASM_ATT)
18717 	putc ('$', file);
18718       fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
18719     }
18720 
18721   /* These float cases don't actually occur as immediate operands.  */
18722   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
18723     {
18724       char dstr[30];
18725 
18726       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
18727       fputs (dstr, file);
18728     }
18729 
18730   else
18731     {
18732       /* We have patterns that allow zero sets of memory, for instance.
18733 	 In 64-bit mode, we should probably support all 8-byte vectors,
18734 	 since we can in fact encode that into an immediate.  */
18735       if (GET_CODE (x) == CONST_VECTOR)
18736 	{
18737 	  if (x != CONST0_RTX (GET_MODE (x)))
18738 	    output_operand_lossage ("invalid vector immediate");
18739 	  x = const0_rtx;
18740 	}
18741 
18742       if (code != 'P' && code != 'p')
18743 	{
18744 	  if (CONST_INT_P (x))
18745 	    {
18746 	      if (ASSEMBLER_DIALECT == ASM_ATT)
18747 		putc ('$', file);
18748 	    }
18749 	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
18750 		   || GET_CODE (x) == LABEL_REF)
18751 	    {
18752 	      if (ASSEMBLER_DIALECT == ASM_ATT)
18753 		putc ('$', file);
18754 	      else
18755 		fputs ("OFFSET FLAT:", file);
18756 	    }
18757 	}
18758       if (CONST_INT_P (x))
18759 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18760       else if (flag_pic || MACHOPIC_INDIRECT)
18761 	output_pic_addr_const (file, x, code);
18762       else
18763 	output_addr_const (file, x);
18764     }
18765 }
18766 
18767 static bool
18768 ix86_print_operand_punct_valid_p (unsigned char code)
18769 {
18770   return (code == '*' || code == '+' || code == '&' || code == ';'
18771 	  || code == '~' || code == '^' || code == '!');
18772 }
18773 
18774 /* Print a memory operand whose address is ADDR.  */
18775 
18776 static void
18777 ix86_print_operand_address_as (FILE *file, rtx addr,
18778 			       addr_space_t as, bool no_rip)
18779 {
18780   struct ix86_address parts;
18781   rtx base, index, disp;
18782   int scale;
18783   int ok;
18784   bool vsib = false;
18785   int code = 0;
18786 
18787   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
18788     {
18789       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18790       gcc_assert (parts.index == NULL_RTX);
18791       parts.index = XVECEXP (addr, 0, 1);
18792       parts.scale = INTVAL (XVECEXP (addr, 0, 2));
18793       addr = XVECEXP (addr, 0, 0);
18794       vsib = true;
18795     }
18796   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
18797     {
18798       gcc_assert (TARGET_64BIT);
18799       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18800       code = 'q';
18801     }
18802   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
18803     {
18804       ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
18805       gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
18806       if (parts.base != NULL_RTX)
18807 	{
18808 	  parts.index = parts.base;
18809 	  parts.scale = 1;
18810 	}
18811       parts.base = XVECEXP (addr, 0, 0);
18812       addr = XVECEXP (addr, 0, 0);
18813     }
18814   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
18815     {
18816       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
18817       gcc_assert (parts.index == NULL_RTX);
18818       parts.index = XVECEXP (addr, 0, 1);
18819       addr = XVECEXP (addr, 0, 0);
18820     }
18821   else
18822     ok = ix86_decompose_address (addr, &parts);
18823 
18824   gcc_assert (ok);
18825 
18826   base = parts.base;
18827   index = parts.index;
18828   disp = parts.disp;
18829   scale = parts.scale;
18830 
18831   if (ADDR_SPACE_GENERIC_P (as))
18832     as = parts.seg;
18833   else
18834     gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
18835 
18836   if (!ADDR_SPACE_GENERIC_P (as))
18837     {
18838       const char *string;
18839 
18840       if (as == ADDR_SPACE_SEG_FS)
18841 	string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
18842       else if (as == ADDR_SPACE_SEG_GS)
18843 	string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
18844       else
18845 	gcc_unreachable ();
18846       fputs (string, file);
18847     }
18848 
18849   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
18850   if (TARGET_64BIT && !base && !index && !no_rip)
18851     {
18852       rtx symbol = disp;
18853 
18854       if (GET_CODE (disp) == CONST
18855 	  && GET_CODE (XEXP (disp, 0)) == PLUS
18856 	  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18857 	symbol = XEXP (XEXP (disp, 0), 0);
18858 
18859       if (GET_CODE (symbol) == LABEL_REF
18860 	  || (GET_CODE (symbol) == SYMBOL_REF
18861 	      && SYMBOL_REF_TLS_MODEL (symbol) == 0))
18862 	base = pc_rtx;
18863     }
18864 
18865   if (!base && !index)
18866     {
18867       /* Displacement only requires special attention.  */
18868       if (CONST_INT_P (disp))
18869 	{
18870 	  if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
18871 	    fputs ("ds:", file);
18872 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
18873 	}
18874       /* Load the external function address via the GOT slot to avoid PLT.  */
18875       else if (GET_CODE (disp) == CONST
18876 	       && GET_CODE (XEXP (disp, 0)) == UNSPEC
18877 	       && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
18878 		   || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
18879 	       && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
18880 	output_pic_addr_const (file, disp, 0);
18881       else if (flag_pic)
18882 	output_pic_addr_const (file, disp, 0);
18883       else
18884 	output_addr_const (file, disp);
18885     }
18886   else
18887     {
18888       /* Print SImode register names to force addr32 prefix.  */
18889       if (SImode_address_operand (addr, VOIDmode))
18890 	{
18891 	  if (flag_checking)
18892 	    {
18893 	      gcc_assert (TARGET_64BIT);
18894 	      switch (GET_CODE (addr))
18895 		{
18896 		case SUBREG:
18897 		  gcc_assert (GET_MODE (addr) == SImode);
18898 		  gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
18899 		  break;
18900 		case ZERO_EXTEND:
18901 		case AND:
18902 		  gcc_assert (GET_MODE (addr) == DImode);
18903 		  break;
18904 		default:
18905 		  gcc_unreachable ();
18906 		}
18907 	    }
18908 	  gcc_assert (!code);
18909 	  code = 'k';
18910 	}
18911       else if (code == 0
18912 	       && TARGET_X32
18913 	       && disp
18914 	       && CONST_INT_P (disp)
18915 	       && INTVAL (disp) < -16*1024*1024)
18916 	{
18917 	  /* X32 runs in 64-bit mode, where displacement, DISP, in
18918 	     address DISP(%r64), is encoded as 32-bit immediate sign-
18919 	     extended from 32-bit to 64-bit.  For -0x40000300(%r64),
18920 	     address is %r64 + 0xffffffffbffffd00.  When %r64 <
18921 	     0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
18922 	     which is invalid for x32.  The correct address is %r64
18923 	     - 0x40000300 == 0xf7ffdd64.  To properly encode
18924 	     -0x40000300(%r64) for x32, we zero-extend negative
18925 	     displacement by forcing addr32 prefix which truncates
18926 	     0xfffffffff7ffdd64 to 0xf7ffdd64.  In theory, we should
18927 	     zero-extend all negative displacements, including -1(%rsp).
18928 	     However, for small negative displacements, sign-extension
18929 	     won't cause overflow.  We only zero-extend negative
18930 	     displacements if they < -16*1024*1024, which is also used
18931 	     to check legitimate address displacements for PIC.  */
18932 	  code = 'k';
18933 	}
18934 
18935       /* Since the upper 32 bits of RSP are always zero for x32,
18936 	 we can encode %esp as %rsp to avoid 0x67 prefix if
18937 	 there is no index register.  */
18938       if (TARGET_X32 && Pmode == SImode
18939 	  && !index && base && REG_P (base) && REGNO (base) == SP_REG)
18940 	code = 'q';
18941 
18942       if (ASSEMBLER_DIALECT == ASM_ATT)
18943 	{
18944 	  if (disp)
18945 	    {
18946 	      if (flag_pic)
18947 		output_pic_addr_const (file, disp, 0);
18948 	      else if (GET_CODE (disp) == LABEL_REF)
18949 		output_asm_label (disp);
18950 	      else
18951 		output_addr_const (file, disp);
18952 	    }
18953 
18954 	  putc ('(', file);
18955 	  if (base)
18956 	    print_reg (base, code, file);
18957 	  if (index)
18958 	    {
18959 	      putc (',', file);
18960 	      print_reg (index, vsib ? 0 : code, file);
18961 	      if (scale != 1 || vsib)
18962 		fprintf (file, ",%d", scale);
18963 	    }
18964 	  putc (')', file);
18965 	}
18966       else
18967 	{
18968 	  rtx offset = NULL_RTX;
18969 
18970 	  if (disp)
18971 	    {
18972 	      /* Pull out the offset of a symbol; print any symbol itself.  */
18973 	      if (GET_CODE (disp) == CONST
18974 		  && GET_CODE (XEXP (disp, 0)) == PLUS
18975 		  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
18976 		{
18977 		  offset = XEXP (XEXP (disp, 0), 1);
18978 		  disp = gen_rtx_CONST (VOIDmode,
18979 					XEXP (XEXP (disp, 0), 0));
18980 		}
18981 
18982 	      if (flag_pic)
18983 		output_pic_addr_const (file, disp, 0);
18984 	      else if (GET_CODE (disp) == LABEL_REF)
18985 		output_asm_label (disp);
18986 	      else if (CONST_INT_P (disp))
18987 		offset = disp;
18988 	      else
18989 		output_addr_const (file, disp);
18990 	    }
18991 
18992 	  putc ('[', file);
18993 	  if (base)
18994 	    {
18995 	      print_reg (base, code, file);
18996 	      if (offset)
18997 		{
18998 		  if (INTVAL (offset) >= 0)
18999 		    putc ('+', file);
19000 		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
19001 		}
19002 	    }
19003 	  else if (offset)
19004 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
19005 	  else
19006 	    putc ('0', file);
19007 
19008 	  if (index)
19009 	    {
19010 	      putc ('+', file);
19011 	      print_reg (index, vsib ? 0 : code, file);
19012 	      if (scale != 1 || vsib)
19013 		fprintf (file, "*%d", scale);
19014 	    }
19015 	  putc (']', file);
19016 	}
19017     }
19018 }
19019 
19020 static void
19021 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
19022 {
19023   ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
19024 }
19025 
19026 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
19027 
19028 static bool
19029 i386_asm_output_addr_const_extra (FILE *file, rtx x)
19030 {
19031   rtx op;
19032 
19033   if (GET_CODE (x) != UNSPEC)
19034     return false;
19035 
19036   op = XVECEXP (x, 0, 0);
19037   switch (XINT (x, 1))
19038     {
19039     case UNSPEC_GOTOFF:
19040       output_addr_const (file, op);
19041       fputs ("@gotoff", file);
19042       break;
19043     case UNSPEC_GOTTPOFF:
19044       output_addr_const (file, op);
19045       /* FIXME: This might be @TPOFF in Sun ld.  */
19046       fputs ("@gottpoff", file);
19047       break;
19048     case UNSPEC_TPOFF:
19049       output_addr_const (file, op);
19050       fputs ("@tpoff", file);
19051       break;
19052     case UNSPEC_NTPOFF:
19053       output_addr_const (file, op);
19054       if (TARGET_64BIT)
19055 	fputs ("@tpoff", file);
19056       else
19057 	fputs ("@ntpoff", file);
19058       break;
19059     case UNSPEC_DTPOFF:
19060       output_addr_const (file, op);
19061       fputs ("@dtpoff", file);
19062       break;
19063     case UNSPEC_GOTNTPOFF:
19064       output_addr_const (file, op);
19065       if (TARGET_64BIT)
19066 	fputs (ASSEMBLER_DIALECT == ASM_ATT ?
19067 	       "@gottpoff(%rip)" : "@gottpoff[rip]", file);
19068       else
19069 	fputs ("@gotntpoff", file);
19070       break;
19071     case UNSPEC_INDNTPOFF:
19072       output_addr_const (file, op);
19073       fputs ("@indntpoff", file);
19074       break;
19075 #if TARGET_MACHO
19076     case UNSPEC_MACHOPIC_OFFSET:
19077       output_addr_const (file, op);
19078       putc ('-', file);
19079       machopic_output_function_base_name (file);
19080       break;
19081 #endif
19082 
19083     default:
19084       return false;
19085     }
19086 
19087   return true;
19088 }
19089 
19090 /* Split one or more double-mode RTL references into pairs of half-mode
19091    references.  The RTL can be REG, offsettable MEM, integer constant, or
19092    CONST_DOUBLE.  "operands" is a pointer to an array of double-mode RTLs to
19093    split and "num" is its length.  lo_half and hi_half are output arrays
19094    that parallel "operands".  */
19095 
19096 void
19097 split_double_mode (machine_mode mode, rtx operands[],
19098 		   int num, rtx lo_half[], rtx hi_half[])
19099 {
19100   machine_mode half_mode;
19101   unsigned int byte;
19102 
19103   switch (mode)
19104     {
19105     case E_TImode:
19106       half_mode = DImode;
19107       break;
19108     case E_DImode:
19109       half_mode = SImode;
19110       break;
19111     default:
19112       gcc_unreachable ();
19113     }
19114 
19115   byte = GET_MODE_SIZE (half_mode);
19116 
19117   while (num--)
19118     {
19119       rtx op = operands[num];
19120 
19121       /* simplify_subreg refuse to split volatile memory addresses,
19122          but we still have to handle it.  */
19123       if (MEM_P (op))
19124 	{
19125 	  lo_half[num] = adjust_address (op, half_mode, 0);
19126 	  hi_half[num] = adjust_address (op, half_mode, byte);
19127 	}
19128       else
19129 	{
19130 	  lo_half[num] = simplify_gen_subreg (half_mode, op,
19131 					      GET_MODE (op) == VOIDmode
19132 					      ? mode : GET_MODE (op), 0);
19133 	  hi_half[num] = simplify_gen_subreg (half_mode, op,
19134 					      GET_MODE (op) == VOIDmode
19135 					      ? mode : GET_MODE (op), byte);
19136 	}
19137     }
19138 }
19139 
19140 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
19141    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
19142    is the expression of the binary operation.  The output may either be
19143    emitted here, or returned to the caller, like all output_* functions.
19144 
19145    There is no guarantee that the operands are the same mode, as they
19146    might be within FLOAT or FLOAT_EXTEND expressions.  */
19147 
19148 #ifndef SYSV386_COMPAT
19149 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
19150    wants to fix the assemblers because that causes incompatibility
19151    with gcc.  No-one wants to fix gcc because that causes
19152    incompatibility with assemblers...  You can use the option of
19153    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
19154 #define SYSV386_COMPAT 1
19155 #endif
19156 
19157 const char *
19158 output_387_binary_op (rtx_insn *insn, rtx *operands)
19159 {
19160   static char buf[40];
19161   const char *p;
19162   bool is_sse
19163     = (SSE_REG_P (operands[0])
19164        || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
19165 
19166   if (is_sse)
19167     p = "%v";
19168   else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
19169 	   || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
19170     p = "fi";
19171   else
19172     p = "f";
19173 
19174   strcpy (buf, p);
19175 
19176   switch (GET_CODE (operands[3]))
19177     {
19178     case PLUS:
19179       p = "add"; break;
19180     case MINUS:
19181       p = "sub"; break;
19182     case MULT:
19183       p = "mul"; break;
19184     case DIV:
19185       p = "div"; break;
19186     default:
19187       gcc_unreachable ();
19188     }
19189 
19190   strcat (buf, p);
19191 
19192   if (is_sse)
19193    {
19194      p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
19195      strcat (buf, p);
19196 
19197      if (TARGET_AVX)
19198        p = "\t{%2, %1, %0|%0, %1, %2}";
19199      else
19200        p = "\t{%2, %0|%0, %2}";
19201 
19202      strcat (buf, p);
19203      return buf;
19204    }
19205 
19206   /* Even if we do not want to check the inputs, this documents input
19207      constraints.  Which helps in understanding the following code.  */
19208   if (flag_checking)
19209     {
19210       if (STACK_REG_P (operands[0])
19211 	  && ((REG_P (operands[1])
19212 	       && REGNO (operands[0]) == REGNO (operands[1])
19213 	       && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
19214 	      || (REG_P (operands[2])
19215 		  && REGNO (operands[0]) == REGNO (operands[2])
19216 		  && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
19217 	  && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
19218 	; /* ok */
19219       else
19220 	gcc_unreachable ();
19221     }
19222 
19223   switch (GET_CODE (operands[3]))
19224     {
19225     case MULT:
19226     case PLUS:
19227       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
19228 	std::swap (operands[1], operands[2]);
19229 
19230       /* know operands[0] == operands[1].  */
19231 
19232       if (MEM_P (operands[2]))
19233 	{
19234 	  p = "%Z2\t%2";
19235 	  break;
19236 	}
19237 
19238       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
19239 	{
19240 	  if (STACK_TOP_P (operands[0]))
19241 	    /* How is it that we are storing to a dead operand[2]?
19242 	       Well, presumably operands[1] is dead too.  We can't
19243 	       store the result to st(0) as st(0) gets popped on this
19244 	       instruction.  Instead store to operands[2] (which I
19245 	       think has to be st(1)).  st(1) will be popped later.
19246 	       gcc <= 2.8.1 didn't have this check and generated
19247 	       assembly code that the Unixware assembler rejected.  */
19248 	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
19249 	  else
19250 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
19251 	  break;
19252 	}
19253 
19254       if (STACK_TOP_P (operands[0]))
19255 	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
19256       else
19257 	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
19258       break;
19259 
19260     case MINUS:
19261     case DIV:
19262       if (MEM_P (operands[1]))
19263 	{
19264 	  p = "r%Z1\t%1";
19265 	  break;
19266 	}
19267 
19268       if (MEM_P (operands[2]))
19269 	{
19270 	  p = "%Z2\t%2";
19271 	  break;
19272 	}
19273 
19274       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
19275 	{
19276 #if SYSV386_COMPAT
19277 	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
19278 	     derived assemblers, confusingly reverse the direction of
19279 	     the operation for fsub{r} and fdiv{r} when the
19280 	     destination register is not st(0).  The Intel assembler
19281 	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
19282 	     figure out what the hardware really does.  */
19283 	  if (STACK_TOP_P (operands[0]))
19284 	    p = "{p\t%0, %2|rp\t%2, %0}";
19285 	  else
19286 	    p = "{rp\t%2, %0|p\t%0, %2}";
19287 #else
19288 	  if (STACK_TOP_P (operands[0]))
19289 	    /* As above for fmul/fadd, we can't store to st(0).  */
19290 	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
19291 	  else
19292 	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
19293 #endif
19294 	  break;
19295 	}
19296 
19297       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19298 	{
19299 #if SYSV386_COMPAT
19300 	  if (STACK_TOP_P (operands[0]))
19301 	    p = "{rp\t%0, %1|p\t%1, %0}";
19302 	  else
19303 	    p = "{p\t%1, %0|rp\t%0, %1}";
19304 #else
19305 	  if (STACK_TOP_P (operands[0]))
19306 	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
19307 	  else
19308 	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
19309 #endif
19310 	  break;
19311 	}
19312 
19313       if (STACK_TOP_P (operands[0]))
19314 	{
19315 	  if (STACK_TOP_P (operands[1]))
19316 	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
19317 	  else
19318 	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
19319 	  break;
19320 	}
19321       else if (STACK_TOP_P (operands[1]))
19322 	{
19323 #if SYSV386_COMPAT
19324 	  p = "{\t%1, %0|r\t%0, %1}";
19325 #else
19326 	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
19327 #endif
19328 	}
19329       else
19330 	{
19331 #if SYSV386_COMPAT
19332 	  p = "{r\t%2, %0|\t%0, %2}";
19333 #else
19334 	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
19335 #endif
19336 	}
19337       break;
19338 
19339     default:
19340       gcc_unreachable ();
19341     }
19342 
19343   strcat (buf, p);
19344   return buf;
19345 }
19346 
19347 /* Return needed mode for entity in optimize_mode_switching pass.  */
19348 
19349 static int
19350 ix86_dirflag_mode_needed (rtx_insn *insn)
19351 {
19352   if (CALL_P (insn))
19353     {
19354       if (cfun->machine->func_type == TYPE_NORMAL)
19355 	return X86_DIRFLAG_ANY;
19356       else
19357 	/* No need to emit CLD in interrupt handler for TARGET_CLD.  */
19358 	return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
19359     }
19360 
19361   if (recog_memoized (insn) < 0)
19362     return X86_DIRFLAG_ANY;
19363 
19364   if (get_attr_type (insn) == TYPE_STR)
19365     {
19366       /* Emit cld instruction if stringops are used in the function.  */
19367       if (cfun->machine->func_type == TYPE_NORMAL)
19368 	return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
19369       else
19370 	return X86_DIRFLAG_RESET;
19371     }
19372 
19373   return X86_DIRFLAG_ANY;
19374 }
19375 
19376 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP.   */
19377 
19378 static bool
19379 ix86_check_avx_upper_register (const_rtx exp)
19380 {
19381   if (SUBREG_P (exp))
19382     exp = SUBREG_REG (exp);
19383 
19384   return (REG_P (exp)
19385 	&& (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))
19386 	|| VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp))));
19387 }
19388 
19389 /* Return needed mode for entity in optimize_mode_switching pass.  */
19390 
19391 static int
19392 ix86_avx_u128_mode_needed (rtx_insn *insn)
19393 {
19394   if (CALL_P (insn))
19395     {
19396       rtx link;
19397 
19398       /* Needed mode is set to AVX_U128_CLEAN if there are
19399 	 no 256bit or 512bit modes used in function arguments. */
19400       for (link = CALL_INSN_FUNCTION_USAGE (insn);
19401 	   link;
19402 	   link = XEXP (link, 1))
19403 	{
19404 	  if (GET_CODE (XEXP (link, 0)) == USE)
19405 	    {
19406 	      rtx arg = XEXP (XEXP (link, 0), 0);
19407 
19408 	      if (ix86_check_avx_upper_register (arg))
19409 		return AVX_U128_DIRTY;
19410 	    }
19411 	}
19412 
19413       return AVX_U128_CLEAN;
19414     }
19415 
19416   /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
19417      Hardware changes state only when a 256bit register is written to,
19418      but we need to prevent the compiler from moving optimal insertion
19419      point above eventual read from 256bit or 512 bit register.  */
19420   subrtx_iterator::array_type array;
19421   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
19422     if (ix86_check_avx_upper_register (*iter))
19423       return AVX_U128_DIRTY;
19424 
19425   return AVX_U128_ANY;
19426 }
19427 
19428 /* Return mode that i387 must be switched into
19429    prior to the execution of insn.  */
19430 
19431 static int
19432 ix86_i387_mode_needed (int entity, rtx_insn *insn)
19433 {
19434   enum attr_i387_cw mode;
19435 
19436   /* The mode UNINITIALIZED is used to store control word after a
19437      function call or ASM pattern.  The mode ANY specify that function
19438      has no requirements on the control word and make no changes in the
19439      bits we are interested in.  */
19440 
19441   if (CALL_P (insn)
19442       || (NONJUMP_INSN_P (insn)
19443 	  && (asm_noperands (PATTERN (insn)) >= 0
19444 	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
19445     return I387_CW_UNINITIALIZED;
19446 
19447   if (recog_memoized (insn) < 0)
19448     return I387_CW_ANY;
19449 
19450   mode = get_attr_i387_cw (insn);
19451 
19452   switch (entity)
19453     {
19454     case I387_TRUNC:
19455       if (mode == I387_CW_TRUNC)
19456 	return mode;
19457       break;
19458 
19459     case I387_FLOOR:
19460       if (mode == I387_CW_FLOOR)
19461 	return mode;
19462       break;
19463 
19464     case I387_CEIL:
19465       if (mode == I387_CW_CEIL)
19466 	return mode;
19467       break;
19468 
19469     case I387_MASK_PM:
19470       if (mode == I387_CW_MASK_PM)
19471 	return mode;
19472       break;
19473 
19474     default:
19475       gcc_unreachable ();
19476     }
19477 
19478   return I387_CW_ANY;
19479 }
19480 
19481 /* Return mode that entity must be switched into
19482    prior to the execution of insn.  */
19483 
19484 static int
19485 ix86_mode_needed (int entity, rtx_insn *insn)
19486 {
19487   switch (entity)
19488     {
19489     case X86_DIRFLAG:
19490       return ix86_dirflag_mode_needed (insn);
19491     case AVX_U128:
19492       return ix86_avx_u128_mode_needed (insn);
19493     case I387_TRUNC:
19494     case I387_FLOOR:
19495     case I387_CEIL:
19496     case I387_MASK_PM:
19497       return ix86_i387_mode_needed (entity, insn);
19498     default:
19499       gcc_unreachable ();
19500     }
19501   return 0;
19502 }
19503 
19504 /* Check if a 256bit or 512bit AVX register is referenced in stores.   */
19505 
19506 static void
19507 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
19508  {
19509    if (ix86_check_avx_upper_register (dest))
19510     {
19511       bool *used = (bool *) data;
19512       *used = true;
19513     }
19514  }
19515 
19516 /* Calculate mode of upper 128bit AVX registers after the insn.  */
19517 
19518 static int
19519 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
19520 {
19521   rtx pat = PATTERN (insn);
19522 
19523   if (vzeroupper_operation (pat, VOIDmode)
19524       || vzeroall_operation (pat, VOIDmode))
19525     return AVX_U128_CLEAN;
19526 
19527   /* We know that state is clean after CALL insn if there are no
19528      256bit or 512bit registers used in the function return register. */
19529   if (CALL_P (insn))
19530     {
19531       bool avx_upper_reg_found = false;
19532       note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found);
19533 
19534       return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
19535     }
19536 
19537   /* Otherwise, return current mode.  Remember that if insn
19538      references AVX 256bit or 512bit registers, the mode was already
19539      changed to DIRTY from MODE_NEEDED.  */
19540   return mode;
19541 }
19542 
19543 /* Return the mode that an insn results in.  */
19544 
19545 static int
19546 ix86_mode_after (int entity, int mode, rtx_insn *insn)
19547 {
19548   switch (entity)
19549     {
19550     case X86_DIRFLAG:
19551       return mode;
19552     case AVX_U128:
19553       return ix86_avx_u128_mode_after (mode, insn);
19554     case I387_TRUNC:
19555     case I387_FLOOR:
19556     case I387_CEIL:
19557     case I387_MASK_PM:
19558       return mode;
19559     default:
19560       gcc_unreachable ();
19561     }
19562 }
19563 
19564 static int
19565 ix86_dirflag_mode_entry (void)
19566 {
19567   /* For TARGET_CLD or in the interrupt handler we can't assume
19568      direction flag state at function entry.  */
19569   if (TARGET_CLD
19570       || cfun->machine->func_type != TYPE_NORMAL)
19571     return X86_DIRFLAG_ANY;
19572 
19573   return X86_DIRFLAG_RESET;
19574 }
19575 
19576 static int
19577 ix86_avx_u128_mode_entry (void)
19578 {
19579   tree arg;
19580 
19581   /* Entry mode is set to AVX_U128_DIRTY if there are
19582      256bit or 512bit modes used in function arguments.  */
19583   for (arg = DECL_ARGUMENTS (current_function_decl); arg;
19584        arg = TREE_CHAIN (arg))
19585     {
19586       rtx incoming = DECL_INCOMING_RTL (arg);
19587 
19588       if (incoming && ix86_check_avx_upper_register (incoming))
19589 	return AVX_U128_DIRTY;
19590     }
19591 
19592   return AVX_U128_CLEAN;
19593 }
19594 
19595 /* Return a mode that ENTITY is assumed to be
19596    switched to at function entry.  */
19597 
19598 static int
19599 ix86_mode_entry (int entity)
19600 {
19601   switch (entity)
19602     {
19603     case X86_DIRFLAG:
19604       return ix86_dirflag_mode_entry ();
19605     case AVX_U128:
19606       return ix86_avx_u128_mode_entry ();
19607     case I387_TRUNC:
19608     case I387_FLOOR:
19609     case I387_CEIL:
19610     case I387_MASK_PM:
19611       return I387_CW_ANY;
19612     default:
19613       gcc_unreachable ();
19614     }
19615 }
19616 
19617 static int
19618 ix86_avx_u128_mode_exit (void)
19619 {
19620   rtx reg = crtl->return_rtx;
19621 
19622   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
19623      or 512 bit modes used in the function return register. */
19624   if (reg && ix86_check_avx_upper_register (reg))
19625     return AVX_U128_DIRTY;
19626 
19627   return AVX_U128_CLEAN;
19628 }
19629 
19630 /* Return a mode that ENTITY is assumed to be
19631    switched to at function exit.  */
19632 
19633 static int
19634 ix86_mode_exit (int entity)
19635 {
19636   switch (entity)
19637     {
19638     case X86_DIRFLAG:
19639       return X86_DIRFLAG_ANY;
19640     case AVX_U128:
19641       return ix86_avx_u128_mode_exit ();
19642     case I387_TRUNC:
19643     case I387_FLOOR:
19644     case I387_CEIL:
19645     case I387_MASK_PM:
19646       return I387_CW_ANY;
19647     default:
19648       gcc_unreachable ();
19649     }
19650 }
19651 
19652 static int
19653 ix86_mode_priority (int, int n)
19654 {
19655   return n;
19656 }
19657 
19658 /* Output code to initialize control word copies used by trunc?f?i and
19659    rounding patterns.  CURRENT_MODE is set to current control word,
19660    while NEW_MODE is set to new control word.  */
19661 
19662 static void
19663 emit_i387_cw_initialization (int mode)
19664 {
19665   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
19666   rtx new_mode;
19667 
19668   enum ix86_stack_slot slot;
19669 
19670   rtx reg = gen_reg_rtx (HImode);
19671 
19672   emit_insn (gen_x86_fnstcw_1 (stored_mode));
19673   emit_move_insn (reg, copy_rtx (stored_mode));
19674 
19675   switch (mode)
19676     {
19677     case I387_CW_TRUNC:
19678       /* round toward zero (truncate) */
19679       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
19680       slot = SLOT_CW_TRUNC;
19681       break;
19682 
19683     case I387_CW_FLOOR:
19684       /* round down toward -oo */
19685       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19686       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
19687       slot = SLOT_CW_FLOOR;
19688       break;
19689 
19690     case I387_CW_CEIL:
19691       /* round up toward +oo */
19692       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
19693       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
19694       slot = SLOT_CW_CEIL;
19695       break;
19696 
19697     case I387_CW_MASK_PM:
19698       /* mask precision exception for nearbyint() */
19699       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
19700       slot = SLOT_CW_MASK_PM;
19701       break;
19702 
19703     default:
19704       gcc_unreachable ();
19705     }
19706 
19707   gcc_assert (slot < MAX_386_STACK_LOCALS);
19708 
19709   new_mode = assign_386_stack_local (HImode, slot);
19710   emit_move_insn (new_mode, reg);
19711 }
19712 
19713 /* Emit vzeroupper.  */
19714 
19715 void
19716 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
19717 {
19718   int i;
19719 
19720   /* Cancel automatic vzeroupper insertion if there are
19721      live call-saved SSE registers at the insertion point.  */
19722 
19723   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19724     if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19725       return;
19726 
19727   if (TARGET_64BIT)
19728     for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19729       if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
19730 	return;
19731 
19732   emit_insn (gen_avx_vzeroupper ());
19733 }
19734 
19735 /* Generate one or more insns to set ENTITY to MODE.  */
19736 
19737 /* Generate one or more insns to set ENTITY to MODE.  HARD_REG_LIVE
19738    is the set of hard registers live at the point where the insn(s)
19739    are to be inserted.  */
19740 
19741 static void
19742 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
19743 		    HARD_REG_SET regs_live)
19744 {
19745   switch (entity)
19746     {
19747     case X86_DIRFLAG:
19748       if (mode == X86_DIRFLAG_RESET)
19749 	emit_insn (gen_cld ());
19750       break;
19751     case AVX_U128:
19752       if (mode == AVX_U128_CLEAN)
19753 	ix86_avx_emit_vzeroupper (regs_live);
19754       break;
19755     case I387_TRUNC:
19756     case I387_FLOOR:
19757     case I387_CEIL:
19758     case I387_MASK_PM:
19759       if (mode != I387_CW_ANY
19760 	  && mode != I387_CW_UNINITIALIZED)
19761 	emit_i387_cw_initialization (mode);
19762       break;
19763     default:
19764       gcc_unreachable ();
19765     }
19766 }
19767 
19768 /* Output code for INSN to convert a float to a signed int.  OPERANDS
19769    are the insn operands.  The output may be [HSD]Imode and the input
19770    operand may be [SDX]Fmode.  */
19771 
19772 const char *
19773 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
19774 {
19775   bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19776   bool dimode_p = GET_MODE (operands[0]) == DImode;
19777   int round_mode = get_attr_i387_cw (insn);
19778 
19779   static char buf[40];
19780   const char *p;
19781 
19782   /* Jump through a hoop or two for DImode, since the hardware has no
19783      non-popping instruction.  We used to do this a different way, but
19784      that was somewhat fragile and broke with post-reload splitters.  */
19785   if ((dimode_p || fisttp) && !stack_top_dies)
19786     output_asm_insn ("fld\t%y1", operands);
19787 
19788   gcc_assert (STACK_TOP_P (operands[1]));
19789   gcc_assert (MEM_P (operands[0]));
19790   gcc_assert (GET_MODE (operands[1]) != TFmode);
19791 
19792   if (fisttp)
19793     return "fisttp%Z0\t%0";
19794 
19795   strcpy (buf, "fist");
19796 
19797   if (round_mode != I387_CW_ANY)
19798     output_asm_insn ("fldcw\t%3", operands);
19799 
19800   p = "p%Z0\t%0";
19801   strcat (buf, p + !(stack_top_dies || dimode_p));
19802 
19803   output_asm_insn (buf, operands);
19804 
19805   if (round_mode != I387_CW_ANY)
19806     output_asm_insn ("fldcw\t%2", operands);
19807 
19808   return "";
19809 }
19810 
19811 /* Output code for x87 ffreep insn.  The OPNO argument, which may only
19812    have the values zero or one, indicates the ffreep insn's operand
19813    from the OPERANDS array.  */
19814 
19815 static const char *
19816 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
19817 {
19818   if (TARGET_USE_FFREEP)
19819 #ifdef HAVE_AS_IX86_FFREEP
19820     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
19821 #else
19822     {
19823       static char retval[32];
19824       int regno = REGNO (operands[opno]);
19825 
19826       gcc_assert (STACK_REGNO_P (regno));
19827 
19828       regno -= FIRST_STACK_REG;
19829 
19830       snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
19831       return retval;
19832     }
19833 #endif
19834 
19835   return opno ? "fstp\t%y1" : "fstp\t%y0";
19836 }
19837 
19838 
19839 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
19840    should be used.  UNORDERED_P is true when fucom should be used.  */
19841 
19842 const char *
19843 output_fp_compare (rtx_insn *insn, rtx *operands,
19844 		   bool eflags_p, bool unordered_p)
19845 {
19846   rtx *xops = eflags_p ? &operands[0] : &operands[1];
19847   bool stack_top_dies;
19848 
19849   static char buf[40];
19850   const char *p;
19851 
19852   gcc_assert (STACK_TOP_P (xops[0]));
19853 
19854   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
19855 
19856   if (eflags_p)
19857     {
19858       p = unordered_p ? "fucomi" : "fcomi";
19859       strcpy (buf, p);
19860 
19861       p = "p\t{%y1, %0|%0, %y1}";
19862       strcat (buf, p + !stack_top_dies);
19863 
19864       return buf;
19865     }
19866 
19867   if (STACK_REG_P (xops[1])
19868       && stack_top_dies
19869       && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
19870     {
19871       gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
19872 
19873       /* If both the top of the 387 stack die, and the other operand
19874 	 is also a stack register that dies, then this must be a
19875 	 `fcompp' float compare.  */
19876       p = unordered_p ? "fucompp" : "fcompp";
19877       strcpy (buf, p);
19878     }
19879   else if (const0_operand (xops[1], VOIDmode))
19880     {
19881       gcc_assert (!unordered_p);
19882       strcpy (buf, "ftst");
19883     }
19884   else
19885     {
19886       if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
19887 	{
19888 	  gcc_assert (!unordered_p);
19889 	  p = "ficom";
19890 	}
19891       else
19892 	p = unordered_p ? "fucom" : "fcom";
19893 
19894       strcpy (buf, p);
19895 
19896       p = "p%Z2\t%y2";
19897       strcat (buf, p + !stack_top_dies);
19898     }
19899 
19900   output_asm_insn (buf, operands);
19901   return "fnstsw\t%0";
19902 }
19903 
19904 void
19905 ix86_output_addr_vec_elt (FILE *file, int value)
19906 {
19907   const char *directive = ASM_LONG;
19908 
19909 #ifdef ASM_QUAD
19910   if (TARGET_LP64)
19911     directive = ASM_QUAD;
19912 #else
19913   gcc_assert (!TARGET_64BIT);
19914 #endif
19915 
19916   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
19917 }
19918 
19919 void
19920 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
19921 {
19922   const char *directive = ASM_LONG;
19923 
19924 #ifdef ASM_QUAD
19925   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
19926     directive = ASM_QUAD;
19927 #else
19928   gcc_assert (!TARGET_64BIT);
19929 #endif
19930   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
19931   if (TARGET_64BIT || TARGET_VXWORKS_RTP)
19932     fprintf (file, "%s%s%d-%s%d\n",
19933 	     directive, LPREFIX, value, LPREFIX, rel);
19934   else if (HAVE_AS_GOTOFF_IN_DATA)
19935     fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
19936 #if TARGET_MACHO
19937   else if (TARGET_MACHO)
19938     {
19939       fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
19940       machopic_output_function_base_name (file);
19941       putc ('\n', file);
19942     }
19943 #endif
19944   else
19945     asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
19946 		 GOT_SYMBOL_NAME, LPREFIX, value);
19947 }
19948 
19949 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
19950    for the target.  */
19951 
19952 void
19953 ix86_expand_clear (rtx dest)
19954 {
19955   rtx tmp;
19956 
19957   /* We play register width games, which are only valid after reload.  */
19958   gcc_assert (reload_completed);
19959 
19960   /* Avoid HImode and its attendant prefix byte.  */
19961   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
19962     dest = gen_rtx_REG (SImode, REGNO (dest));
19963   tmp = gen_rtx_SET (dest, const0_rtx);
19964 
19965   if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
19966     {
19967       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19968       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
19969     }
19970 
19971   emit_insn (tmp);
19972 }
19973 
19974 void
19975 ix86_expand_move (machine_mode mode, rtx operands[])
19976 {
19977   rtx op0, op1;
19978   rtx tmp, addend = NULL_RTX;
19979   enum tls_model model;
19980 
19981   op0 = operands[0];
19982   op1 = operands[1];
19983 
19984   switch (GET_CODE (op1))
19985     {
19986     case CONST:
19987       tmp = XEXP (op1, 0);
19988 
19989       if (GET_CODE (tmp) != PLUS
19990 	  || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
19991 	break;
19992 
19993       op1 = XEXP (tmp, 0);
19994       addend = XEXP (tmp, 1);
19995       /* FALLTHRU */
19996 
19997     case SYMBOL_REF:
19998       model = SYMBOL_REF_TLS_MODEL (op1);
19999 
20000       if (model)
20001 	op1 = legitimize_tls_address (op1, model, true);
20002       else if (ix86_force_load_from_GOT_p (op1))
20003 	{
20004 	  /* Load the external function address via GOT slot to avoid PLT.  */
20005 	  op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
20006 				(TARGET_64BIT
20007 				 ? UNSPEC_GOTPCREL
20008 				 : UNSPEC_GOT));
20009 	  op1 = gen_rtx_CONST (Pmode, op1);
20010 	  op1 = gen_const_mem (Pmode, op1);
20011 	  set_mem_alias_set (op1, ix86_GOT_alias_set ());
20012 	}
20013       else
20014 	{
20015 	  tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
20016 	  if (tmp)
20017 	    {
20018 	      op1 = tmp;
20019 	      if (!addend)
20020 		break;
20021 	    }
20022 	  else
20023 	    {
20024 	      op1 = operands[1];
20025 	      break;
20026 	    }
20027 	}
20028 
20029       if (addend)
20030 	{
20031 	  op1 = force_operand (op1, NULL_RTX);
20032 	  op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
20033 				     op0, 1, OPTAB_DIRECT);
20034 	}
20035       else
20036 	op1 = force_operand (op1, op0);
20037 
20038       if (op1 == op0)
20039 	return;
20040 
20041       op1 = convert_to_mode (mode, op1, 1);
20042 
20043     default:
20044       break;
20045     }
20046 
20047   if ((flag_pic || MACHOPIC_INDIRECT)
20048       && symbolic_operand (op1, mode))
20049     {
20050       if (TARGET_MACHO && !TARGET_64BIT)
20051 	{
20052 #if TARGET_MACHO
20053 	  /* dynamic-no-pic */
20054 	  if (MACHOPIC_INDIRECT)
20055 	    {
20056 	      rtx temp = (op0 && REG_P (op0) && mode == Pmode)
20057 			 ? op0 : gen_reg_rtx (Pmode);
20058 	      op1 = machopic_indirect_data_reference (op1, temp);
20059 	      if (MACHOPIC_PURE)
20060 		op1 = machopic_legitimize_pic_address (op1, mode,
20061 						       temp == op1 ? 0 : temp);
20062 	    }
20063 	  if (op0 != op1 && GET_CODE (op0) != MEM)
20064 	    {
20065 	      rtx insn = gen_rtx_SET (op0, op1);
20066 	      emit_insn (insn);
20067 	      return;
20068 	    }
20069 	  if (GET_CODE (op0) == MEM)
20070 	    op1 = force_reg (Pmode, op1);
20071 	  else
20072 	    {
20073 	      rtx temp = op0;
20074 	      if (GET_CODE (temp) != REG)
20075 		temp = gen_reg_rtx (Pmode);
20076 	      temp = legitimize_pic_address (op1, temp);
20077 	      if (temp == op0)
20078 	    return;
20079 	      op1 = temp;
20080 	    }
20081       /* dynamic-no-pic */
20082 #endif
20083 	}
20084       else
20085 	{
20086 	  if (MEM_P (op0))
20087 	    op1 = force_reg (mode, op1);
20088 	  else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
20089 	    {
20090 	      rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
20091 	      op1 = legitimize_pic_address (op1, reg);
20092 	      if (op0 == op1)
20093 		return;
20094 	      op1 = convert_to_mode (mode, op1, 1);
20095 	    }
20096 	}
20097     }
20098   else
20099     {
20100       if (MEM_P (op0)
20101 	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
20102 	      || !push_operand (op0, mode))
20103 	  && MEM_P (op1))
20104 	op1 = force_reg (mode, op1);
20105 
20106       if (push_operand (op0, mode)
20107 	  && ! general_no_elim_operand (op1, mode))
20108 	op1 = copy_to_mode_reg (mode, op1);
20109 
20110       /* Force large constants in 64bit compilation into register
20111 	 to get them CSEed.  */
20112       if (can_create_pseudo_p ()
20113 	  && (mode == DImode) && TARGET_64BIT
20114 	  && immediate_operand (op1, mode)
20115 	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
20116 	  && !register_operand (op0, mode)
20117 	  && optimize)
20118 	op1 = copy_to_mode_reg (mode, op1);
20119 
20120       if (can_create_pseudo_p ()
20121 	  && CONST_DOUBLE_P (op1))
20122 	{
20123 	  /* If we are loading a floating point constant to a register,
20124 	     force the value to memory now, since we'll get better code
20125 	     out the back end.  */
20126 
20127 	  op1 = validize_mem (force_const_mem (mode, op1));
20128 	  if (!register_operand (op0, mode))
20129 	    {
20130 	      rtx temp = gen_reg_rtx (mode);
20131 	      emit_insn (gen_rtx_SET (temp, op1));
20132 	      emit_move_insn (op0, temp);
20133 	      return;
20134 	    }
20135 	}
20136     }
20137 
20138   emit_insn (gen_rtx_SET (op0, op1));
20139 }
20140 
20141 void
20142 ix86_expand_vector_move (machine_mode mode, rtx operands[])
20143 {
20144   rtx op0 = operands[0], op1 = operands[1];
20145   /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
20146      psABI since the biggest alignment is 4 byte for IA MCU psABI.  */
20147   unsigned int align = (TARGET_IAMCU
20148 			? GET_MODE_BITSIZE (mode)
20149 			: GET_MODE_ALIGNMENT (mode));
20150 
20151   if (push_operand (op0, VOIDmode))
20152     op0 = emit_move_resolve_push (mode, op0);
20153 
20154   /* Force constants other than zero into memory.  We do not know how
20155      the instructions used to build constants modify the upper 64 bits
20156      of the register, once we have that information we may be able
20157      to handle some of them more efficiently.  */
20158   if (can_create_pseudo_p ()
20159       && (CONSTANT_P (op1)
20160 	  || (SUBREG_P (op1)
20161 	      && CONSTANT_P (SUBREG_REG (op1))))
20162       && ((register_operand (op0, mode)
20163 	   && !standard_sse_constant_p (op1, mode))
20164 	  /* ix86_expand_vector_move_misalign() does not like constants.  */
20165 	  || (SSE_REG_MODE_P (mode)
20166 	      && MEM_P (op0)
20167 	      && MEM_ALIGN (op0) < align)))
20168     {
20169       if (SUBREG_P (op1))
20170 	{
20171 	  machine_mode imode = GET_MODE (SUBREG_REG (op1));
20172 	  rtx r = force_const_mem (imode, SUBREG_REG (op1));
20173 	  if (r)
20174 	    r = validize_mem (r);
20175 	  else
20176 	    r = force_reg (imode, SUBREG_REG (op1));
20177 	  op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
20178 	}
20179       else
20180 	op1 = validize_mem (force_const_mem (mode, op1));
20181     }
20182 
20183   /* We need to check memory alignment for SSE mode since attribute
20184      can make operands unaligned.  */
20185   if (can_create_pseudo_p ()
20186       && SSE_REG_MODE_P (mode)
20187       && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
20188 	  || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
20189     {
20190       rtx tmp[2];
20191 
20192       /* ix86_expand_vector_move_misalign() does not like both
20193 	 arguments in memory.  */
20194       if (!register_operand (op0, mode)
20195 	  && !register_operand (op1, mode))
20196 	op1 = force_reg (mode, op1);
20197 
20198       tmp[0] = op0; tmp[1] = op1;
20199       ix86_expand_vector_move_misalign (mode, tmp);
20200       return;
20201     }
20202 
20203   /* Make operand1 a register if it isn't already.  */
20204   if (can_create_pseudo_p ()
20205       && !register_operand (op0, mode)
20206       && !register_operand (op1, mode))
20207     {
20208       emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
20209       return;
20210     }
20211 
20212   emit_insn (gen_rtx_SET (op0, op1));
20213 }
20214 
20215 /* Split 32-byte AVX unaligned load and store if needed.  */
20216 
20217 static void
20218 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
20219 {
20220   rtx m;
20221   rtx (*extract) (rtx, rtx, rtx);
20222   machine_mode mode;
20223 
20224   if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
20225       || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
20226     {
20227       emit_insn (gen_rtx_SET (op0, op1));
20228       return;
20229     }
20230 
20231   rtx orig_op0 = NULL_RTX;
20232   mode = GET_MODE (op0);
20233   switch (GET_MODE_CLASS (mode))
20234     {
20235     case MODE_VECTOR_INT:
20236     case MODE_INT:
20237       if (mode != V32QImode)
20238 	{
20239 	  if (!MEM_P (op0))
20240 	    {
20241 	      orig_op0 = op0;
20242 	      op0 = gen_reg_rtx (V32QImode);
20243 	    }
20244 	  else
20245 	    op0 = gen_lowpart (V32QImode, op0);
20246 	  op1 = gen_lowpart (V32QImode, op1);
20247 	  mode = V32QImode;
20248 	}
20249       break;
20250     case MODE_VECTOR_FLOAT:
20251       break;
20252     default:
20253       gcc_unreachable ();
20254     }
20255 
20256   switch (mode)
20257     {
20258     default:
20259       gcc_unreachable ();
20260     case E_V32QImode:
20261       extract = gen_avx_vextractf128v32qi;
20262       mode = V16QImode;
20263       break;
20264     case E_V8SFmode:
20265       extract = gen_avx_vextractf128v8sf;
20266       mode = V4SFmode;
20267       break;
20268     case E_V4DFmode:
20269       extract = gen_avx_vextractf128v4df;
20270       mode = V2DFmode;
20271       break;
20272     }
20273 
20274   if (MEM_P (op1))
20275     {
20276       rtx r = gen_reg_rtx (mode);
20277       m = adjust_address (op1, mode, 0);
20278       emit_move_insn (r, m);
20279       m = adjust_address (op1, mode, 16);
20280       r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
20281       emit_move_insn (op0, r);
20282     }
20283   else if (MEM_P (op0))
20284     {
20285       m = adjust_address (op0, mode, 0);
20286       emit_insn (extract (m, op1, const0_rtx));
20287       m = adjust_address (op0, mode, 16);
20288       emit_insn (extract (m, copy_rtx (op1), const1_rtx));
20289     }
20290   else
20291     gcc_unreachable ();
20292 
20293   if (orig_op0)
20294     emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
20295 }
20296 
20297 /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
20298    straight to ix86_expand_vector_move.  */
20299 /* Code generation for scalar reg-reg moves of single and double precision data:
20300      if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
20301        movaps reg, reg
20302      else
20303        movss reg, reg
20304      if (x86_sse_partial_reg_dependency == true)
20305        movapd reg, reg
20306      else
20307        movsd reg, reg
20308 
20309    Code generation for scalar loads of double precision data:
20310      if (x86_sse_split_regs == true)
20311        movlpd mem, reg      (gas syntax)
20312      else
20313        movsd mem, reg
20314 
20315    Code generation for unaligned packed loads of single precision data
20316    (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
20317      if (x86_sse_unaligned_move_optimal)
20318        movups mem, reg
20319 
20320      if (x86_sse_partial_reg_dependency == true)
20321        {
20322          xorps  reg, reg
20323          movlps mem, reg
20324          movhps mem+8, reg
20325        }
20326      else
20327        {
20328          movlps mem, reg
20329          movhps mem+8, reg
20330        }
20331 
20332    Code generation for unaligned packed loads of double precision data
20333    (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
20334      if (x86_sse_unaligned_move_optimal)
20335        movupd mem, reg
20336 
20337      if (x86_sse_split_regs == true)
20338        {
20339          movlpd mem, reg
20340          movhpd mem+8, reg
20341        }
20342      else
20343        {
20344          movsd  mem, reg
20345          movhpd mem+8, reg
20346        }
20347  */
20348 
20349 void
20350 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
20351 {
20352   rtx op0, op1, m;
20353 
20354   op0 = operands[0];
20355   op1 = operands[1];
20356 
20357   /* Use unaligned load/store for AVX512 or when optimizing for size.  */
20358   if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
20359     {
20360       emit_insn (gen_rtx_SET (op0, op1));
20361       return;
20362     }
20363 
20364   if (TARGET_AVX)
20365     {
20366       if (GET_MODE_SIZE (mode) == 32)
20367 	ix86_avx256_split_vector_move_misalign (op0, op1);
20368       else
20369 	/* Always use 128-bit mov<mode>_internal pattern for AVX.  */
20370 	emit_insn (gen_rtx_SET (op0, op1));
20371       return;
20372     }
20373 
20374   if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
20375       || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
20376     {
20377       emit_insn (gen_rtx_SET (op0, op1));
20378       return;
20379     }
20380 
20381   /* ??? If we have typed data, then it would appear that using
20382      movdqu is the only way to get unaligned data loaded with
20383      integer type.  */
20384   if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20385     {
20386       emit_insn (gen_rtx_SET (op0, op1));
20387       return;
20388     }
20389 
20390   if (MEM_P (op1))
20391     {
20392       if (TARGET_SSE2 && mode == V2DFmode)
20393         {
20394           rtx zero;
20395 
20396 	  /* When SSE registers are split into halves, we can avoid
20397 	     writing to the top half twice.  */
20398 	  if (TARGET_SSE_SPLIT_REGS)
20399 	    {
20400 	      emit_clobber (op0);
20401 	      zero = op0;
20402 	    }
20403 	  else
20404 	    {
20405 	      /* ??? Not sure about the best option for the Intel chips.
20406 		 The following would seem to satisfy; the register is
20407 		 entirely cleared, breaking the dependency chain.  We
20408 		 then store to the upper half, with a dependency depth
20409 		 of one.  A rumor has it that Intel recommends two movsd
20410 		 followed by an unpacklpd, but this is unconfirmed.  And
20411 		 given that the dependency depth of the unpacklpd would
20412 		 still be one, I'm not sure why this would be better.  */
20413 	      zero = CONST0_RTX (V2DFmode);
20414 	    }
20415 
20416 	  m = adjust_address (op1, DFmode, 0);
20417 	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
20418 	  m = adjust_address (op1, DFmode, 8);
20419 	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
20420 	}
20421       else
20422         {
20423 	  rtx t;
20424 
20425 	  if (mode != V4SFmode)
20426 	    t = gen_reg_rtx (V4SFmode);
20427 	  else
20428 	    t = op0;
20429 
20430 	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
20431 	    emit_move_insn (t, CONST0_RTX (V4SFmode));
20432 	  else
20433 	    emit_clobber (t);
20434 
20435 	  m = adjust_address (op1, V2SFmode, 0);
20436 	  emit_insn (gen_sse_loadlps (t, t, m));
20437 	  m = adjust_address (op1, V2SFmode, 8);
20438 	  emit_insn (gen_sse_loadhps (t, t, m));
20439 	  if (mode != V4SFmode)
20440 	    emit_move_insn (op0, gen_lowpart (mode, t));
20441 	}
20442     }
20443   else if (MEM_P (op0))
20444     {
20445       if (TARGET_SSE2 && mode == V2DFmode)
20446 	{
20447 	  m = adjust_address (op0, DFmode, 0);
20448 	  emit_insn (gen_sse2_storelpd (m, op1));
20449 	  m = adjust_address (op0, DFmode, 8);
20450 	  emit_insn (gen_sse2_storehpd (m, op1));
20451 	}
20452       else
20453 	{
20454 	  if (mode != V4SFmode)
20455 	    op1 = gen_lowpart (V4SFmode, op1);
20456 
20457 	  m = adjust_address (op0, V2SFmode, 0);
20458 	  emit_insn (gen_sse_storelps (m, op1));
20459 	  m = adjust_address (op0, V2SFmode, 8);
20460 	  emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
20461 	}
20462     }
20463   else
20464     gcc_unreachable ();
20465 }
20466 
20467 /* Helper function of ix86_fixup_binary_operands to canonicalize
20468    operand order.  Returns true if the operands should be swapped.  */
20469 
20470 static bool
20471 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
20472 			     rtx operands[])
20473 {
20474   rtx dst = operands[0];
20475   rtx src1 = operands[1];
20476   rtx src2 = operands[2];
20477 
20478   /* If the operation is not commutative, we can't do anything.  */
20479   if (GET_RTX_CLASS (code) != RTX_COMM_ARITH
20480       && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
20481     return false;
20482 
20483   /* Highest priority is that src1 should match dst.  */
20484   if (rtx_equal_p (dst, src1))
20485     return false;
20486   if (rtx_equal_p (dst, src2))
20487     return true;
20488 
20489   /* Next highest priority is that immediate constants come second.  */
20490   if (immediate_operand (src2, mode))
20491     return false;
20492   if (immediate_operand (src1, mode))
20493     return true;
20494 
20495   /* Lowest priority is that memory references should come second.  */
20496   if (MEM_P (src2))
20497     return false;
20498   if (MEM_P (src1))
20499     return true;
20500 
20501   return false;
20502 }
20503 
20504 
20505 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
20506    destination to use for the operation.  If different from the true
20507    destination in operands[0], a copy operation will be required.  */
20508 
20509 rtx
20510 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
20511 			    rtx operands[])
20512 {
20513   rtx dst = operands[0];
20514   rtx src1 = operands[1];
20515   rtx src2 = operands[2];
20516 
20517   /* Canonicalize operand order.  */
20518   if (ix86_swap_binary_operands_p (code, mode, operands))
20519     {
20520       /* It is invalid to swap operands of different modes.  */
20521       gcc_assert (GET_MODE (src1) == GET_MODE (src2));
20522 
20523       std::swap (src1, src2);
20524     }
20525 
20526   /* Both source operands cannot be in memory.  */
20527   if (MEM_P (src1) && MEM_P (src2))
20528     {
20529       /* Optimization: Only read from memory once.  */
20530       if (rtx_equal_p (src1, src2))
20531 	{
20532 	  src2 = force_reg (mode, src2);
20533 	  src1 = src2;
20534 	}
20535       else if (rtx_equal_p (dst, src1))
20536 	src2 = force_reg (mode, src2);
20537       else
20538 	src1 = force_reg (mode, src1);
20539     }
20540 
20541   /* If the destination is memory, and we do not have matching source
20542      operands, do things in registers.  */
20543   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20544     dst = gen_reg_rtx (mode);
20545 
20546   /* Source 1 cannot be a constant.  */
20547   if (CONSTANT_P (src1))
20548     src1 = force_reg (mode, src1);
20549 
20550   /* Source 1 cannot be a non-matching memory.  */
20551   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20552     src1 = force_reg (mode, src1);
20553 
20554   /* Improve address combine.  */
20555   if (code == PLUS
20556       && GET_MODE_CLASS (mode) == MODE_INT
20557       && MEM_P (src2))
20558     src2 = force_reg (mode, src2);
20559 
20560   operands[1] = src1;
20561   operands[2] = src2;
20562   return dst;
20563 }
20564 
20565 /* Similarly, but assume that the destination has already been
20566    set up properly.  */
20567 
20568 void
20569 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
20570 				    machine_mode mode, rtx operands[])
20571 {
20572   rtx dst = ix86_fixup_binary_operands (code, mode, operands);
20573   gcc_assert (dst == operands[0]);
20574 }
20575 
20576 /* Attempt to expand a binary operator.  Make the expansion closer to the
20577    actual machine, then just general_operand, which will allow 3 separate
20578    memory references (one output, two input) in a single insn.  */
20579 
20580 void
20581 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
20582 			     rtx operands[])
20583 {
20584   rtx src1, src2, dst, op, clob;
20585 
20586   dst = ix86_fixup_binary_operands (code, mode, operands);
20587   src1 = operands[1];
20588   src2 = operands[2];
20589 
20590  /* Emit the instruction.  */
20591 
20592   op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
20593 
20594   if (reload_completed
20595       && code == PLUS
20596       && !rtx_equal_p (dst, src1))
20597     {
20598       /* This is going to be an LEA; avoid splitting it later.  */
20599       emit_insn (op);
20600     }
20601   else
20602     {
20603       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20604       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20605     }
20606 
20607   /* Fix up the destination if needed.  */
20608   if (dst != operands[0])
20609     emit_move_insn (operands[0], dst);
20610 }
20611 
20612 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
20613    the given OPERANDS.  */
20614 
20615 void
20616 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
20617 				     rtx operands[])
20618 {
20619   rtx op1 = NULL_RTX, op2 = NULL_RTX;
20620   if (SUBREG_P (operands[1]))
20621     {
20622       op1 = operands[1];
20623       op2 = operands[2];
20624     }
20625   else if (SUBREG_P (operands[2]))
20626     {
20627       op1 = operands[2];
20628       op2 = operands[1];
20629     }
20630   /* Optimize (__m128i) d | (__m128i) e and similar code
20631      when d and e are float vectors into float vector logical
20632      insn.  In C/C++ without using intrinsics there is no other way
20633      to express vector logical operation on float vectors than
20634      to cast them temporarily to integer vectors.  */
20635   if (op1
20636       && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
20637       && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
20638       && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
20639       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
20640       && SUBREG_BYTE (op1) == 0
20641       && (GET_CODE (op2) == CONST_VECTOR
20642 	  || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
20643 	      && SUBREG_BYTE (op2) == 0))
20644       && can_create_pseudo_p ())
20645     {
20646       rtx dst;
20647       switch (GET_MODE (SUBREG_REG (op1)))
20648 	{
20649 	case E_V4SFmode:
20650 	case E_V8SFmode:
20651 	case E_V16SFmode:
20652 	case E_V2DFmode:
20653 	case E_V4DFmode:
20654 	case E_V8DFmode:
20655 	  dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
20656 	  if (GET_CODE (op2) == CONST_VECTOR)
20657 	    {
20658 	      op2 = gen_lowpart (GET_MODE (dst), op2);
20659 	      op2 = force_reg (GET_MODE (dst), op2);
20660 	    }
20661 	  else
20662 	    {
20663 	      op1 = operands[1];
20664 	      op2 = SUBREG_REG (operands[2]);
20665 	      if (!vector_operand (op2, GET_MODE (dst)))
20666 		op2 = force_reg (GET_MODE (dst), op2);
20667 	    }
20668 	  op1 = SUBREG_REG (op1);
20669 	  if (!vector_operand (op1, GET_MODE (dst)))
20670 	    op1 = force_reg (GET_MODE (dst), op1);
20671 	  emit_insn (gen_rtx_SET (dst,
20672 				  gen_rtx_fmt_ee (code, GET_MODE (dst),
20673 						  op1, op2)));
20674 	  emit_move_insn (operands[0], gen_lowpart (mode, dst));
20675 	  return;
20676 	default:
20677 	  break;
20678 	}
20679     }
20680   if (!vector_operand (operands[1], mode))
20681     operands[1] = force_reg (mode, operands[1]);
20682   if (!vector_operand (operands[2], mode))
20683     operands[2] = force_reg (mode, operands[2]);
20684   ix86_fixup_binary_operands_no_copy (code, mode, operands);
20685   emit_insn (gen_rtx_SET (operands[0],
20686 			  gen_rtx_fmt_ee (code, mode, operands[1],
20687 					  operands[2])));
20688 }
20689 
20690 /* Return TRUE or FALSE depending on whether the binary operator meets the
20691    appropriate constraints.  */
20692 
20693 bool
20694 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
20695 			 rtx operands[3])
20696 {
20697   rtx dst = operands[0];
20698   rtx src1 = operands[1];
20699   rtx src2 = operands[2];
20700 
20701   /* Both source operands cannot be in memory.  */
20702   if (MEM_P (src1) && MEM_P (src2))
20703     return false;
20704 
20705   /* Canonicalize operand order for commutative operators.  */
20706   if (ix86_swap_binary_operands_p (code, mode, operands))
20707     std::swap (src1, src2);
20708 
20709   /* If the destination is memory, we must have a matching source operand.  */
20710   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
20711     return false;
20712 
20713   /* Source 1 cannot be a constant.  */
20714   if (CONSTANT_P (src1))
20715     return false;
20716 
20717   /* Source 1 cannot be a non-matching memory.  */
20718   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
20719     /* Support "andhi/andsi/anddi" as a zero-extending move.  */
20720     return (code == AND
20721 	    && (mode == HImode
20722 		|| mode == SImode
20723 		|| (TARGET_64BIT && mode == DImode))
20724 	    && satisfies_constraint_L (src2));
20725 
20726   return true;
20727 }
20728 
20729 /* Attempt to expand a unary operator.  Make the expansion closer to the
20730    actual machine, then just general_operand, which will allow 2 separate
20731    memory references (one output, one input) in a single insn.  */
20732 
20733 void
20734 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
20735 			    rtx operands[])
20736 {
20737   bool matching_memory = false;
20738   rtx src, dst, op, clob;
20739 
20740   dst = operands[0];
20741   src = operands[1];
20742 
20743   /* If the destination is memory, and we do not have matching source
20744      operands, do things in registers.  */
20745   if (MEM_P (dst))
20746     {
20747       if (rtx_equal_p (dst, src))
20748 	matching_memory = true;
20749       else
20750 	dst = gen_reg_rtx (mode);
20751     }
20752 
20753   /* When source operand is memory, destination must match.  */
20754   if (MEM_P (src) && !matching_memory)
20755     src = force_reg (mode, src);
20756 
20757   /* Emit the instruction.  */
20758 
20759   op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
20760 
20761   if (code == NOT)
20762     emit_insn (op);
20763   else
20764     {
20765       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20766       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20767     }
20768 
20769   /* Fix up the destination if needed.  */
20770   if (dst != operands[0])
20771     emit_move_insn (operands[0], dst);
20772 }
20773 
20774 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
20775    divisor are within the range [0-255].  */
20776 
20777 void
20778 ix86_split_idivmod (machine_mode mode, rtx operands[],
20779 		    bool signed_p)
20780 {
20781   rtx_code_label *end_label, *qimode_label;
20782   rtx div, mod;
20783   rtx_insn *insn;
20784   rtx scratch, tmp0, tmp1, tmp2;
20785   rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
20786   rtx (*gen_zero_extend) (rtx, rtx);
20787   rtx (*gen_test_ccno_1) (rtx, rtx);
20788 
20789   switch (mode)
20790     {
20791     case E_SImode:
20792       if (GET_MODE (operands[0]) == SImode)
20793 	{
20794 	  if (GET_MODE (operands[1]) == SImode)
20795 	    gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
20796 	  else
20797 	    gen_divmod4_1
20798 	      = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2;
20799 	  gen_zero_extend = gen_zero_extendqisi2;
20800 	}
20801       else
20802 	{
20803 	  gen_divmod4_1
20804 	    = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1;
20805 	  gen_zero_extend = gen_zero_extendqidi2;
20806 	}
20807       gen_test_ccno_1 = gen_testsi_ccno_1;
20808       break;
20809     case E_DImode:
20810       gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
20811       gen_test_ccno_1 = gen_testdi_ccno_1;
20812       gen_zero_extend = gen_zero_extendqidi2;
20813       break;
20814     default:
20815       gcc_unreachable ();
20816     }
20817 
20818   end_label = gen_label_rtx ();
20819   qimode_label = gen_label_rtx ();
20820 
20821   scratch = gen_reg_rtx (mode);
20822 
20823   /* Use 8bit unsigned divimod if dividend and divisor are within
20824      the range [0-255].  */
20825   emit_move_insn (scratch, operands[2]);
20826   scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
20827 				 scratch, 1, OPTAB_DIRECT);
20828   emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
20829   tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
20830   tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
20831   tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
20832 			       gen_rtx_LABEL_REF (VOIDmode, qimode_label),
20833 			       pc_rtx);
20834   insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
20835   predict_jump (REG_BR_PROB_BASE * 50 / 100);
20836   JUMP_LABEL (insn) = qimode_label;
20837 
20838   /* Generate original signed/unsigned divimod.  */
20839   div = gen_divmod4_1 (operands[0], operands[1],
20840 		       operands[2], operands[3]);
20841   emit_insn (div);
20842 
20843   /* Branch to the end.  */
20844   emit_jump_insn (gen_jump (end_label));
20845   emit_barrier ();
20846 
20847   /* Generate 8bit unsigned divide.  */
20848   emit_label (qimode_label);
20849   /* Don't use operands[0] for result of 8bit divide since not all
20850      registers support QImode ZERO_EXTRACT.  */
20851   tmp0 = lowpart_subreg (HImode, scratch, mode);
20852   tmp1 = lowpart_subreg (HImode, operands[2], mode);
20853   tmp2 = lowpart_subreg (QImode, operands[3], mode);
20854   emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
20855 
20856   if (signed_p)
20857     {
20858       div = gen_rtx_DIV (mode, operands[2], operands[3]);
20859       mod = gen_rtx_MOD (mode, operands[2], operands[3]);
20860     }
20861   else
20862     {
20863       div = gen_rtx_UDIV (mode, operands[2], operands[3]);
20864       mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
20865     }
20866   if (mode == SImode)
20867     {
20868       if (GET_MODE (operands[0]) != SImode)
20869 	div = gen_rtx_ZERO_EXTEND (DImode, div);
20870       if (GET_MODE (operands[1]) != SImode)
20871 	mod = gen_rtx_ZERO_EXTEND (DImode, mod);
20872     }
20873 
20874   /* Extract remainder from AH.  */
20875   tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
20876 			       tmp0, GEN_INT (8), GEN_INT (8));
20877   if (REG_P (operands[1]))
20878     insn = emit_move_insn (operands[1], tmp1);
20879   else
20880     {
20881       /* Need a new scratch register since the old one has result
20882 	 of 8bit divide.  */
20883       scratch = gen_reg_rtx (GET_MODE (operands[1]));
20884       emit_move_insn (scratch, tmp1);
20885       insn = emit_move_insn (operands[1], scratch);
20886     }
20887   set_unique_reg_note (insn, REG_EQUAL, mod);
20888 
20889   /* Zero extend quotient from AL.  */
20890   tmp1 = gen_lowpart (QImode, tmp0);
20891   insn = emit_insn (gen_zero_extend (operands[0], tmp1));
20892   set_unique_reg_note (insn, REG_EQUAL, div);
20893 
20894   emit_label (end_label);
20895 }
20896 
20897 #define LEA_MAX_STALL (3)
20898 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
20899 
20900 /* Increase given DISTANCE in half-cycles according to
20901    dependencies between PREV and NEXT instructions.
20902    Add 1 half-cycle if there is no dependency and
20903    go to next cycle if there is some dependecy.  */
20904 
20905 static unsigned int
20906 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
20907 {
20908   df_ref def, use;
20909 
20910   if (!prev || !next)
20911     return distance + (distance & 1) + 2;
20912 
20913   if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
20914     return distance + 1;
20915 
20916   FOR_EACH_INSN_USE (use, next)
20917     FOR_EACH_INSN_DEF (def, prev)
20918       if (!DF_REF_IS_ARTIFICIAL (def)
20919 	  && DF_REF_REGNO (use) == DF_REF_REGNO (def))
20920 	return distance + (distance & 1) + 2;
20921 
20922   return distance + 1;
20923 }
20924 
20925 /* Function checks if instruction INSN defines register number
20926    REGNO1 or REGNO2.  */
20927 
20928 static bool
20929 insn_defines_reg (unsigned int regno1, unsigned int regno2,
20930 		  rtx_insn *insn)
20931 {
20932   df_ref def;
20933 
20934   FOR_EACH_INSN_DEF (def, insn)
20935     if (DF_REF_REG_DEF_P (def)
20936 	&& !DF_REF_IS_ARTIFICIAL (def)
20937 	&& (regno1 == DF_REF_REGNO (def)
20938 	    || regno2 == DF_REF_REGNO (def)))
20939       return true;
20940 
20941   return false;
20942 }
20943 
20944 /* Function checks if instruction INSN uses register number
20945    REGNO as a part of address expression.  */
20946 
20947 static bool
20948 insn_uses_reg_mem (unsigned int regno, rtx insn)
20949 {
20950   df_ref use;
20951 
20952   FOR_EACH_INSN_USE (use, insn)
20953     if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
20954       return true;
20955 
20956   return false;
20957 }
20958 
20959 /* Search backward for non-agu definition of register number REGNO1
20960    or register number REGNO2 in basic block starting from instruction
20961    START up to head of basic block or instruction INSN.
20962 
20963    Function puts true value into *FOUND var if definition was found
20964    and false otherwise.
20965 
20966    Distance in half-cycles between START and found instruction or head
20967    of BB is added to DISTANCE and returned.  */
20968 
20969 static int
20970 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
20971 			       rtx_insn *insn, int distance,
20972 			       rtx_insn *start, bool *found)
20973 {
20974   basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
20975   rtx_insn *prev = start;
20976   rtx_insn *next = NULL;
20977 
20978   *found = false;
20979 
20980   while (prev
20981 	 && prev != insn
20982 	 && distance < LEA_SEARCH_THRESHOLD)
20983     {
20984       if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
20985 	{
20986 	  distance = increase_distance (prev, next, distance);
20987 	  if (insn_defines_reg (regno1, regno2, prev))
20988 	    {
20989 	      if (recog_memoized (prev) < 0
20990 		  || get_attr_type (prev) != TYPE_LEA)
20991 		{
20992 		  *found = true;
20993 		  return distance;
20994 		}
20995 	    }
20996 
20997 	  next = prev;
20998 	}
20999       if (prev == BB_HEAD (bb))
21000 	break;
21001 
21002       prev = PREV_INSN (prev);
21003     }
21004 
21005   return distance;
21006 }
21007 
21008 /* Search backward for non-agu definition of register number REGNO1
21009    or register number REGNO2 in INSN's basic block until
21010    1. Pass LEA_SEARCH_THRESHOLD instructions, or
21011    2. Reach neighbor BBs boundary, or
21012    3. Reach agu definition.
21013    Returns the distance between the non-agu definition point and INSN.
21014    If no definition point, returns -1.  */
21015 
21016 static int
21017 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
21018 			 rtx_insn *insn)
21019 {
21020   basic_block bb = BLOCK_FOR_INSN (insn);
21021   int distance = 0;
21022   bool found = false;
21023 
21024   if (insn != BB_HEAD (bb))
21025     distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
21026 					      distance, PREV_INSN (insn),
21027 					      &found);
21028 
21029   if (!found && distance < LEA_SEARCH_THRESHOLD)
21030     {
21031       edge e;
21032       edge_iterator ei;
21033       bool simple_loop = false;
21034 
21035       FOR_EACH_EDGE (e, ei, bb->preds)
21036 	if (e->src == bb)
21037 	  {
21038 	    simple_loop = true;
21039 	    break;
21040 	  }
21041 
21042       if (simple_loop)
21043 	distance = distance_non_agu_define_in_bb (regno1, regno2,
21044 						  insn, distance,
21045 						  BB_END (bb), &found);
21046       else
21047 	{
21048 	  int shortest_dist = -1;
21049 	  bool found_in_bb = false;
21050 
21051 	  FOR_EACH_EDGE (e, ei, bb->preds)
21052 	    {
21053 	      int bb_dist
21054 		= distance_non_agu_define_in_bb (regno1, regno2,
21055 						 insn, distance,
21056 						 BB_END (e->src),
21057 						 &found_in_bb);
21058 	      if (found_in_bb)
21059 		{
21060 		  if (shortest_dist < 0)
21061 		    shortest_dist = bb_dist;
21062 		  else if (bb_dist > 0)
21063 		    shortest_dist = MIN (bb_dist, shortest_dist);
21064 
21065 		  found = true;
21066 		}
21067 	    }
21068 
21069 	  distance = shortest_dist;
21070 	}
21071     }
21072 
21073   /* get_attr_type may modify recog data.  We want to make sure
21074      that recog data is valid for instruction INSN, on which
21075      distance_non_agu_define is called.  INSN is unchanged here.  */
21076   extract_insn_cached (insn);
21077 
21078   if (!found)
21079     return -1;
21080 
21081   return distance >> 1;
21082 }
21083 
21084 /* Return the distance in half-cycles between INSN and the next
21085    insn that uses register number REGNO in memory address added
21086    to DISTANCE.  Return -1 if REGNO0 is set.
21087 
21088    Put true value into *FOUND if register usage was found and
21089    false otherwise.
21090    Put true value into *REDEFINED if register redefinition was
21091    found and false otherwise.  */
21092 
21093 static int
21094 distance_agu_use_in_bb (unsigned int regno,
21095 			rtx_insn *insn, int distance, rtx_insn *start,
21096 			bool *found, bool *redefined)
21097 {
21098   basic_block bb = NULL;
21099   rtx_insn *next = start;
21100   rtx_insn *prev = NULL;
21101 
21102   *found = false;
21103   *redefined = false;
21104 
21105   if (start != NULL_RTX)
21106     {
21107       bb = BLOCK_FOR_INSN (start);
21108       if (start != BB_HEAD (bb))
21109 	/* If insn and start belong to the same bb, set prev to insn,
21110 	   so the call to increase_distance will increase the distance
21111 	   between insns by 1.  */
21112 	prev = insn;
21113     }
21114 
21115   while (next
21116 	 && next != insn
21117 	 && distance < LEA_SEARCH_THRESHOLD)
21118     {
21119       if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
21120 	{
21121 	  distance = increase_distance(prev, next, distance);
21122 	  if (insn_uses_reg_mem (regno, next))
21123 	    {
21124 	      /* Return DISTANCE if OP0 is used in memory
21125 		 address in NEXT.  */
21126 	      *found = true;
21127 	      return distance;
21128 	    }
21129 
21130 	  if (insn_defines_reg (regno, INVALID_REGNUM, next))
21131 	    {
21132 	      /* Return -1 if OP0 is set in NEXT.  */
21133 	      *redefined = true;
21134 	      return -1;
21135 	    }
21136 
21137 	  prev = next;
21138 	}
21139 
21140       if (next == BB_END (bb))
21141 	break;
21142 
21143       next = NEXT_INSN (next);
21144     }
21145 
21146   return distance;
21147 }
21148 
21149 /* Return the distance between INSN and the next insn that uses
21150    register number REGNO0 in memory address.  Return -1 if no such
21151    a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
21152 
21153 static int
21154 distance_agu_use (unsigned int regno0, rtx_insn *insn)
21155 {
21156   basic_block bb = BLOCK_FOR_INSN (insn);
21157   int distance = 0;
21158   bool found = false;
21159   bool redefined = false;
21160 
21161   if (insn != BB_END (bb))
21162     distance = distance_agu_use_in_bb (regno0, insn, distance,
21163 				       NEXT_INSN (insn),
21164 				       &found, &redefined);
21165 
21166   if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
21167     {
21168       edge e;
21169       edge_iterator ei;
21170       bool simple_loop = false;
21171 
21172       FOR_EACH_EDGE (e, ei, bb->succs)
21173         if (e->dest == bb)
21174 	  {
21175 	    simple_loop = true;
21176 	    break;
21177 	  }
21178 
21179       if (simple_loop)
21180 	distance = distance_agu_use_in_bb (regno0, insn,
21181 					   distance, BB_HEAD (bb),
21182 					   &found, &redefined);
21183       else
21184 	{
21185 	  int shortest_dist = -1;
21186 	  bool found_in_bb = false;
21187 	  bool redefined_in_bb = false;
21188 
21189 	  FOR_EACH_EDGE (e, ei, bb->succs)
21190 	    {
21191 	      int bb_dist
21192 		= distance_agu_use_in_bb (regno0, insn,
21193 					  distance, BB_HEAD (e->dest),
21194 					  &found_in_bb, &redefined_in_bb);
21195 	      if (found_in_bb)
21196 		{
21197 		  if (shortest_dist < 0)
21198 		    shortest_dist = bb_dist;
21199 		  else if (bb_dist > 0)
21200 		    shortest_dist = MIN (bb_dist, shortest_dist);
21201 
21202 		  found = true;
21203 		}
21204 	    }
21205 
21206 	  distance = shortest_dist;
21207 	}
21208     }
21209 
21210   if (!found || redefined)
21211     return -1;
21212 
21213   return distance >> 1;
21214 }
21215 
21216 /* Define this macro to tune LEA priority vs ADD, it take effect when
21217    there is a dilemma of choicing LEA or ADD
21218    Negative value: ADD is more preferred than LEA
21219    Zero: Netrual
21220    Positive value: LEA is more preferred than ADD*/
21221 #define IX86_LEA_PRIORITY 0
21222 
21223 /* Return true if usage of lea INSN has performance advantage
21224    over a sequence of instructions.  Instructions sequence has
21225    SPLIT_COST cycles higher latency than lea latency.  */
21226 
21227 static bool
21228 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
21229 		      unsigned int regno2, int split_cost, bool has_scale)
21230 {
21231   int dist_define, dist_use;
21232 
21233   /* For Silvermont if using a 2-source or 3-source LEA for
21234      non-destructive destination purposes, or due to wanting
21235      ability to use SCALE, the use of LEA is justified.  */
21236   if (TARGET_SILVERMONT || TARGET_INTEL)
21237     {
21238       if (has_scale)
21239 	return true;
21240       if (split_cost < 1)
21241 	return false;
21242       if (regno0 == regno1 || regno0 == regno2)
21243 	return false;
21244       return true;
21245     }
21246 
21247   dist_define = distance_non_agu_define (regno1, regno2, insn);
21248   dist_use = distance_agu_use (regno0, insn);
21249 
21250   if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
21251     {
21252       /* If there is no non AGU operand definition, no AGU
21253 	 operand usage and split cost is 0 then both lea
21254 	 and non lea variants have same priority.  Currently
21255 	 we prefer lea for 64 bit code and non lea on 32 bit
21256 	 code.  */
21257       if (dist_use < 0 && split_cost == 0)
21258 	return TARGET_64BIT || IX86_LEA_PRIORITY;
21259       else
21260 	return true;
21261     }
21262 
21263   /* With longer definitions distance lea is more preferable.
21264      Here we change it to take into account splitting cost and
21265      lea priority.  */
21266   dist_define += split_cost + IX86_LEA_PRIORITY;
21267 
21268   /* If there is no use in memory addess then we just check
21269      that split cost exceeds AGU stall.  */
21270   if (dist_use < 0)
21271     return dist_define > LEA_MAX_STALL;
21272 
21273   /* If this insn has both backward non-agu dependence and forward
21274      agu dependence, the one with short distance takes effect.  */
21275   return dist_define >= dist_use;
21276 }
21277 
21278 /* Return true if it is legal to clobber flags by INSN and
21279    false otherwise.  */
21280 
21281 static bool
21282 ix86_ok_to_clobber_flags (rtx_insn *insn)
21283 {
21284   basic_block bb = BLOCK_FOR_INSN (insn);
21285   df_ref use;
21286   bitmap live;
21287 
21288   while (insn)
21289     {
21290       if (NONDEBUG_INSN_P (insn))
21291 	{
21292 	  FOR_EACH_INSN_USE (use, insn)
21293 	    if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
21294 	      return false;
21295 
21296 	  if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
21297 	    return true;
21298 	}
21299 
21300       if (insn == BB_END (bb))
21301 	break;
21302 
21303       insn = NEXT_INSN (insn);
21304     }
21305 
21306   live = df_get_live_out(bb);
21307   return !REGNO_REG_SET_P (live, FLAGS_REG);
21308 }
21309 
21310 /* Return true if we need to split op0 = op1 + op2 into a sequence of
21311    move and add to avoid AGU stalls.  */
21312 
21313 bool
21314 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
21315 {
21316   unsigned int regno0, regno1, regno2;
21317 
21318   /* Check if we need to optimize.  */
21319   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21320     return false;
21321 
21322   /* Check it is correct to split here.  */
21323   if (!ix86_ok_to_clobber_flags(insn))
21324     return false;
21325 
21326   regno0 = true_regnum (operands[0]);
21327   regno1 = true_regnum (operands[1]);
21328   regno2 = true_regnum (operands[2]);
21329 
21330   /* We need to split only adds with non destructive
21331      destination operand.  */
21332   if (regno0 == regno1 || regno0 == regno2)
21333     return false;
21334   else
21335     return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
21336 }
21337 
21338 /* Return true if we should emit lea instruction instead of mov
21339    instruction.  */
21340 
21341 bool
21342 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
21343 {
21344   unsigned int regno0, regno1;
21345 
21346   /* Check if we need to optimize.  */
21347   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21348     return false;
21349 
21350   /* Use lea for reg to reg moves only.  */
21351   if (!REG_P (operands[0]) || !REG_P (operands[1]))
21352     return false;
21353 
21354   regno0 = true_regnum (operands[0]);
21355   regno1 = true_regnum (operands[1]);
21356 
21357   return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
21358 }
21359 
21360 /* Return true if we need to split lea into a sequence of
21361    instructions to avoid AGU stalls. */
21362 
21363 bool
21364 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
21365 {
21366   unsigned int regno0, regno1, regno2;
21367   int split_cost;
21368   struct ix86_address parts;
21369   int ok;
21370 
21371   /* Check we need to optimize.  */
21372   if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
21373     return false;
21374 
21375   /* The "at least two components" test below might not catch simple
21376      move or zero extension insns if parts.base is non-NULL and parts.disp
21377      is const0_rtx as the only components in the address, e.g. if the
21378      register is %rbp or %r13.  As this test is much cheaper and moves or
21379      zero extensions are the common case, do this check first.  */
21380   if (REG_P (operands[1])
21381       || (SImode_address_operand (operands[1], VOIDmode)
21382 	  && REG_P (XEXP (operands[1], 0))))
21383     return false;
21384 
21385   /* Check if it is OK to split here.  */
21386   if (!ix86_ok_to_clobber_flags (insn))
21387     return false;
21388 
21389   ok = ix86_decompose_address (operands[1], &parts);
21390   gcc_assert (ok);
21391 
21392   /* There should be at least two components in the address.  */
21393   if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
21394       + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
21395     return false;
21396 
21397   /* We should not split into add if non legitimate pic
21398      operand is used as displacement. */
21399   if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
21400     return false;
21401 
21402   regno0 = true_regnum (operands[0]) ;
21403   regno1 = INVALID_REGNUM;
21404   regno2 = INVALID_REGNUM;
21405 
21406   if (parts.base)
21407     regno1 = true_regnum (parts.base);
21408   if (parts.index)
21409     regno2 = true_regnum (parts.index);
21410 
21411   split_cost = 0;
21412 
21413   /* Compute how many cycles we will add to execution time
21414      if split lea into a sequence of instructions.  */
21415   if (parts.base || parts.index)
21416     {
21417       /* Have to use mov instruction if non desctructive
21418 	 destination form is used.  */
21419       if (regno1 != regno0 && regno2 != regno0)
21420 	split_cost += 1;
21421 
21422       /* Have to add index to base if both exist.  */
21423       if (parts.base && parts.index)
21424 	split_cost += 1;
21425 
21426       /* Have to use shift and adds if scale is 2 or greater.  */
21427       if (parts.scale > 1)
21428 	{
21429 	  if (regno0 != regno1)
21430 	    split_cost += 1;
21431 	  else if (regno2 == regno0)
21432 	    split_cost += 4;
21433 	  else
21434 	    split_cost += parts.scale;
21435 	}
21436 
21437       /* Have to use add instruction with immediate if
21438 	 disp is non zero.  */
21439       if (parts.disp && parts.disp != const0_rtx)
21440 	split_cost += 1;
21441 
21442       /* Subtract the price of lea.  */
21443       split_cost -= 1;
21444     }
21445 
21446   return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
21447 				parts.scale > 1);
21448 }
21449 
21450 /* Emit x86 binary operand CODE in mode MODE, where the first operand
21451    matches destination.  RTX includes clobber of FLAGS_REG.  */
21452 
21453 static void
21454 ix86_emit_binop (enum rtx_code code, machine_mode mode,
21455 		 rtx dst, rtx src)
21456 {
21457   rtx op, clob;
21458 
21459   op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
21460   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
21461 
21462   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
21463 }
21464 
21465 /* Return true if regno1 def is nearest to the insn.  */
21466 
21467 static bool
21468 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
21469 {
21470   rtx_insn *prev = insn;
21471   rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
21472 
21473   if (insn == start)
21474     return false;
21475   while (prev && prev != start)
21476     {
21477       if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
21478 	{
21479 	  prev = PREV_INSN (prev);
21480 	  continue;
21481 	}
21482       if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
21483 	return true;
21484       else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
21485 	return false;
21486       prev = PREV_INSN (prev);
21487     }
21488 
21489   /* None of the regs is defined in the bb.  */
21490   return false;
21491 }
21492 
21493 /* Split lea instructions into a sequence of instructions
21494    which are executed on ALU to avoid AGU stalls.
21495    It is assumed that it is allowed to clobber flags register
21496    at lea position.  */
21497 
21498 void
21499 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
21500 {
21501   unsigned int regno0, regno1, regno2;
21502   struct ix86_address parts;
21503   rtx target, tmp;
21504   int ok, adds;
21505 
21506   ok = ix86_decompose_address (operands[1], &parts);
21507   gcc_assert (ok);
21508 
21509   target = gen_lowpart (mode, operands[0]);
21510 
21511   regno0 = true_regnum (target);
21512   regno1 = INVALID_REGNUM;
21513   regno2 = INVALID_REGNUM;
21514 
21515   if (parts.base)
21516     {
21517       parts.base = gen_lowpart (mode, parts.base);
21518       regno1 = true_regnum (parts.base);
21519     }
21520 
21521   if (parts.index)
21522     {
21523       parts.index = gen_lowpart (mode, parts.index);
21524       regno2 = true_regnum (parts.index);
21525     }
21526 
21527   if (parts.disp)
21528     parts.disp = gen_lowpart (mode, parts.disp);
21529 
21530   if (parts.scale > 1)
21531     {
21532       /* Case r1 = r1 + ...  */
21533       if (regno1 == regno0)
21534 	{
21535 	  /* If we have a case r1 = r1 + C * r2 then we
21536 	     should use multiplication which is very
21537 	     expensive.  Assume cost model is wrong if we
21538 	     have such case here.  */
21539 	  gcc_assert (regno2 != regno0);
21540 
21541 	  for (adds = parts.scale; adds > 0; adds--)
21542 	    ix86_emit_binop (PLUS, mode, target, parts.index);
21543 	}
21544       else
21545 	{
21546 	  /* r1 = r2 + r3 * C case.  Need to move r3 into r1.  */
21547 	  if (regno0 != regno2)
21548 	    emit_insn (gen_rtx_SET (target, parts.index));
21549 
21550 	  /* Use shift for scaling.  */
21551 	  ix86_emit_binop (ASHIFT, mode, target,
21552 			   GEN_INT (exact_log2 (parts.scale)));
21553 
21554 	  if (parts.base)
21555 	    ix86_emit_binop (PLUS, mode, target, parts.base);
21556 
21557 	  if (parts.disp && parts.disp != const0_rtx)
21558 	    ix86_emit_binop (PLUS, mode, target, parts.disp);
21559 	}
21560     }
21561   else if (!parts.base && !parts.index)
21562     {
21563       gcc_assert(parts.disp);
21564       emit_insn (gen_rtx_SET (target, parts.disp));
21565     }
21566   else
21567     {
21568       if (!parts.base)
21569 	{
21570 	  if (regno0 != regno2)
21571 	    emit_insn (gen_rtx_SET (target, parts.index));
21572 	}
21573       else if (!parts.index)
21574 	{
21575 	  if (regno0 != regno1)
21576 	    emit_insn (gen_rtx_SET (target, parts.base));
21577 	}
21578       else
21579 	{
21580 	  if (regno0 == regno1)
21581 	    tmp = parts.index;
21582 	  else if (regno0 == regno2)
21583 	    tmp = parts.base;
21584 	  else
21585 	    {
21586 	      rtx tmp1;
21587 
21588 	      /* Find better operand for SET instruction, depending
21589 		 on which definition is farther from the insn.  */
21590 	      if (find_nearest_reg_def (insn, regno1, regno2))
21591 		tmp = parts.index, tmp1 = parts.base;
21592 	      else
21593 		tmp = parts.base, tmp1 = parts.index;
21594 
21595 	      emit_insn (gen_rtx_SET (target, tmp));
21596 
21597 	      if (parts.disp && parts.disp != const0_rtx)
21598 		ix86_emit_binop (PLUS, mode, target, parts.disp);
21599 
21600 	      ix86_emit_binop (PLUS, mode, target, tmp1);
21601 	      return;
21602 	    }
21603 
21604 	  ix86_emit_binop (PLUS, mode, target, tmp);
21605 	}
21606 
21607       if (parts.disp && parts.disp != const0_rtx)
21608 	ix86_emit_binop (PLUS, mode, target, parts.disp);
21609     }
21610 }
21611 
21612 /* Return true if it is ok to optimize an ADD operation to LEA
21613    operation to avoid flag register consumation.  For most processors,
21614    ADD is faster than LEA.  For the processors like BONNELL, if the
21615    destination register of LEA holds an actual address which will be
21616    used soon, LEA is better and otherwise ADD is better.  */
21617 
21618 bool
21619 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
21620 {
21621   unsigned int regno0 = true_regnum (operands[0]);
21622   unsigned int regno1 = true_regnum (operands[1]);
21623   unsigned int regno2 = true_regnum (operands[2]);
21624 
21625   /* If a = b + c, (a!=b && a!=c), must use lea form. */
21626   if (regno0 != regno1 && regno0 != regno2)
21627     return true;
21628 
21629   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
21630     return false;
21631 
21632   return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
21633 }
21634 
21635 /* Return true if destination reg of SET_BODY is shift count of
21636    USE_BODY.  */
21637 
21638 static bool
21639 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
21640 {
21641   rtx set_dest;
21642   rtx shift_rtx;
21643   int i;
21644 
21645   /* Retrieve destination of SET_BODY.  */
21646   switch (GET_CODE (set_body))
21647     {
21648     case SET:
21649       set_dest = SET_DEST (set_body);
21650       if (!set_dest || !REG_P (set_dest))
21651 	return false;
21652       break;
21653     case PARALLEL:
21654       for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
21655 	if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
21656 					  use_body))
21657 	  return true;
21658       /* FALLTHROUGH */
21659     default:
21660       return false;
21661     }
21662 
21663   /* Retrieve shift count of USE_BODY.  */
21664   switch (GET_CODE (use_body))
21665     {
21666     case SET:
21667       shift_rtx = XEXP (use_body, 1);
21668       break;
21669     case PARALLEL:
21670       for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
21671 	if (ix86_dep_by_shift_count_body (set_body,
21672 					  XVECEXP (use_body, 0, i)))
21673 	  return true;
21674       /* FALLTHROUGH */
21675     default:
21676       return false;
21677     }
21678 
21679   if (shift_rtx
21680       && (GET_CODE (shift_rtx) == ASHIFT
21681 	  || GET_CODE (shift_rtx) == LSHIFTRT
21682 	  || GET_CODE (shift_rtx) == ASHIFTRT
21683 	  || GET_CODE (shift_rtx) == ROTATE
21684 	  || GET_CODE (shift_rtx) == ROTATERT))
21685     {
21686       rtx shift_count = XEXP (shift_rtx, 1);
21687 
21688       /* Return true if shift count is dest of SET_BODY.  */
21689       if (REG_P (shift_count))
21690 	{
21691 	  /* Add check since it can be invoked before register
21692 	     allocation in pre-reload schedule.  */
21693 	  if (reload_completed
21694 	      && true_regnum (set_dest) == true_regnum (shift_count))
21695 	    return true;
21696 	  else if (REGNO(set_dest) == REGNO(shift_count))
21697 	    return true;
21698 	}
21699     }
21700 
21701   return false;
21702 }
21703 
21704 /* Return true if destination reg of SET_INSN is shift count of
21705    USE_INSN.  */
21706 
21707 bool
21708 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
21709 {
21710   return ix86_dep_by_shift_count_body (PATTERN (set_insn),
21711 				       PATTERN (use_insn));
21712 }
21713 
21714 /* Return TRUE or FALSE depending on whether the unary operator meets the
21715    appropriate constraints.  */
21716 
21717 bool
21718 ix86_unary_operator_ok (enum rtx_code,
21719 			machine_mode,
21720 			rtx operands[2])
21721 {
21722   /* If one of operands is memory, source and destination must match.  */
21723   if ((MEM_P (operands[0])
21724        || MEM_P (operands[1]))
21725       && ! rtx_equal_p (operands[0], operands[1]))
21726     return false;
21727   return true;
21728 }
21729 
21730 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
21731    are ok, keeping in mind the possible movddup alternative.  */
21732 
21733 bool
21734 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
21735 {
21736   if (MEM_P (operands[0]))
21737     return rtx_equal_p (operands[0], operands[1 + high]);
21738   if (MEM_P (operands[1]) && MEM_P (operands[2]))
21739     return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
21740   return true;
21741 }
21742 
21743 /* Post-reload splitter for converting an SF or DFmode value in an
21744    SSE register into an unsigned SImode.  */
21745 
21746 void
21747 ix86_split_convert_uns_si_sse (rtx operands[])
21748 {
21749   machine_mode vecmode;
21750   rtx value, large, zero_or_two31, input, two31, x;
21751 
21752   large = operands[1];
21753   zero_or_two31 = operands[2];
21754   input = operands[3];
21755   two31 = operands[4];
21756   vecmode = GET_MODE (large);
21757   value = gen_rtx_REG (vecmode, REGNO (operands[0]));
21758 
21759   /* Load up the value into the low element.  We must ensure that the other
21760      elements are valid floats -- zero is the easiest such value.  */
21761   if (MEM_P (input))
21762     {
21763       if (vecmode == V4SFmode)
21764 	emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
21765       else
21766 	emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
21767     }
21768   else
21769     {
21770       input = gen_rtx_REG (vecmode, REGNO (input));
21771       emit_move_insn (value, CONST0_RTX (vecmode));
21772       if (vecmode == V4SFmode)
21773 	emit_insn (gen_sse_movss (value, value, input));
21774       else
21775 	emit_insn (gen_sse2_movsd (value, value, input));
21776     }
21777 
21778   emit_move_insn (large, two31);
21779   emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
21780 
21781   x = gen_rtx_fmt_ee (LE, vecmode, large, value);
21782   emit_insn (gen_rtx_SET (large, x));
21783 
21784   x = gen_rtx_AND (vecmode, zero_or_two31, large);
21785   emit_insn (gen_rtx_SET (zero_or_two31, x));
21786 
21787   x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
21788   emit_insn (gen_rtx_SET (value, x));
21789 
21790   large = gen_rtx_REG (V4SImode, REGNO (large));
21791   emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
21792 
21793   x = gen_rtx_REG (V4SImode, REGNO (value));
21794   if (vecmode == V4SFmode)
21795     emit_insn (gen_fix_truncv4sfv4si2 (x, value));
21796   else
21797     emit_insn (gen_sse2_cvttpd2dq (x, value));
21798   value = x;
21799 
21800   emit_insn (gen_xorv4si3 (value, value, large));
21801 }
21802 
21803 /* Convert an unsigned DImode value into a DFmode, using only SSE.
21804    Expects the 64-bit DImode to be supplied in a pair of integral
21805    registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
21806    -mfpmath=sse, !optimize_size only.  */
21807 
21808 void
21809 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
21810 {
21811   REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
21812   rtx int_xmm, fp_xmm;
21813   rtx biases, exponents;
21814   rtx x;
21815 
21816   int_xmm = gen_reg_rtx (V4SImode);
21817   if (TARGET_INTER_UNIT_MOVES_TO_VEC)
21818     emit_insn (gen_movdi_to_sse (int_xmm, input));
21819   else if (TARGET_SSE_SPLIT_REGS)
21820     {
21821       emit_clobber (int_xmm);
21822       emit_move_insn (gen_lowpart (DImode, int_xmm), input);
21823     }
21824   else
21825     {
21826       x = gen_reg_rtx (V2DImode);
21827       ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
21828       emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
21829     }
21830 
21831   x = gen_rtx_CONST_VECTOR (V4SImode,
21832 			    gen_rtvec (4, GEN_INT (0x43300000UL),
21833 				       GEN_INT (0x45300000UL),
21834 				       const0_rtx, const0_rtx));
21835   exponents = validize_mem (force_const_mem (V4SImode, x));
21836 
21837   /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
21838   emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
21839 
21840   /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
21841      yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
21842      Similarly (0x45300000UL ## fp_value_hi_xmm) yields
21843      (0x1.0p84 + double(fp_value_hi_xmm)).
21844      Note these exponents differ by 32.  */
21845 
21846   fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
21847 
21848   /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
21849      in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
21850   real_ldexp (&bias_lo_rvt, &dconst1, 52);
21851   real_ldexp (&bias_hi_rvt, &dconst1, 84);
21852   biases = const_double_from_real_value (bias_lo_rvt, DFmode);
21853   x = const_double_from_real_value (bias_hi_rvt, DFmode);
21854   biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
21855   biases = validize_mem (force_const_mem (V2DFmode, biases));
21856   emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
21857 
21858   /* Add the upper and lower DFmode values together.  */
21859   if (TARGET_SSE3)
21860     emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
21861   else
21862     {
21863       x = copy_to_mode_reg (V2DFmode, fp_xmm);
21864       emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
21865       emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
21866     }
21867 
21868   ix86_expand_vector_extract (false, target, fp_xmm, 0);
21869 }
21870 
21871 /* Not used, but eases macroization of patterns.  */
21872 void
21873 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
21874 {
21875   gcc_unreachable ();
21876 }
21877 
21878 /* Convert an unsigned SImode value into a DFmode.  Only currently used
21879    for SSE, but applicable anywhere.  */
21880 
21881 void
21882 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
21883 {
21884   REAL_VALUE_TYPE TWO31r;
21885   rtx x, fp;
21886 
21887   x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
21888 			   NULL, 1, OPTAB_DIRECT);
21889 
21890   fp = gen_reg_rtx (DFmode);
21891   emit_insn (gen_floatsidf2 (fp, x));
21892 
21893   real_ldexp (&TWO31r, &dconst1, 31);
21894   x = const_double_from_real_value (TWO31r, DFmode);
21895 
21896   x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
21897   if (x != target)
21898     emit_move_insn (target, x);
21899 }
21900 
21901 /* Convert a signed DImode value into a DFmode.  Only used for SSE in
21902    32-bit mode; otherwise we have a direct convert instruction.  */
21903 
21904 void
21905 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
21906 {
21907   REAL_VALUE_TYPE TWO32r;
21908   rtx fp_lo, fp_hi, x;
21909 
21910   fp_lo = gen_reg_rtx (DFmode);
21911   fp_hi = gen_reg_rtx (DFmode);
21912 
21913   emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
21914 
21915   real_ldexp (&TWO32r, &dconst1, 32);
21916   x = const_double_from_real_value (TWO32r, DFmode);
21917   fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
21918 
21919   ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
21920 
21921   x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
21922 			   0, OPTAB_DIRECT);
21923   if (x != target)
21924     emit_move_insn (target, x);
21925 }
21926 
21927 /* Convert an unsigned SImode value into a SFmode, using only SSE.
21928    For x86_32, -mfpmath=sse, !optimize_size only.  */
21929 void
21930 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
21931 {
21932   REAL_VALUE_TYPE ONE16r;
21933   rtx fp_hi, fp_lo, int_hi, int_lo, x;
21934 
21935   real_ldexp (&ONE16r, &dconst1, 16);
21936   x = const_double_from_real_value (ONE16r, SFmode);
21937   int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
21938 				      NULL, 0, OPTAB_DIRECT);
21939   int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
21940 				      NULL, 0, OPTAB_DIRECT);
21941   fp_hi = gen_reg_rtx (SFmode);
21942   fp_lo = gen_reg_rtx (SFmode);
21943   emit_insn (gen_floatsisf2 (fp_hi, int_hi));
21944   emit_insn (gen_floatsisf2 (fp_lo, int_lo));
21945   fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
21946 			       0, OPTAB_DIRECT);
21947   fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
21948 			       0, OPTAB_DIRECT);
21949   if (!rtx_equal_p (target, fp_hi))
21950     emit_move_insn (target, fp_hi);
21951 }
21952 
21953 /* floatunsv{4,8}siv{4,8}sf2 expander.  Expand code to convert
21954    a vector of unsigned ints VAL to vector of floats TARGET.  */
21955 
21956 void
21957 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
21958 {
21959   rtx tmp[8];
21960   REAL_VALUE_TYPE TWO16r;
21961   machine_mode intmode = GET_MODE (val);
21962   machine_mode fltmode = GET_MODE (target);
21963   rtx (*cvt) (rtx, rtx);
21964 
21965   if (intmode == V4SImode)
21966     cvt = gen_floatv4siv4sf2;
21967   else
21968     cvt = gen_floatv8siv8sf2;
21969   tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
21970   tmp[0] = force_reg (intmode, tmp[0]);
21971   tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
21972 				OPTAB_DIRECT);
21973   tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
21974 				NULL_RTX, 1, OPTAB_DIRECT);
21975   tmp[3] = gen_reg_rtx (fltmode);
21976   emit_insn (cvt (tmp[3], tmp[1]));
21977   tmp[4] = gen_reg_rtx (fltmode);
21978   emit_insn (cvt (tmp[4], tmp[2]));
21979   real_ldexp (&TWO16r, &dconst1, 16);
21980   tmp[5] = const_double_from_real_value (TWO16r, SFmode);
21981   tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
21982   tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
21983 				OPTAB_DIRECT);
21984   tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
21985 				OPTAB_DIRECT);
21986   if (tmp[7] != target)
21987     emit_move_insn (target, tmp[7]);
21988 }
21989 
21990 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
21991    pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
21992    This is done by doing just signed conversion if < 0x1p31, and otherwise by
21993    subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards.  */
21994 
21995 rtx
21996 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
21997 {
21998   REAL_VALUE_TYPE TWO31r;
21999   rtx two31r, tmp[4];
22000   machine_mode mode = GET_MODE (val);
22001   machine_mode scalarmode = GET_MODE_INNER (mode);
22002   machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
22003   rtx (*cmp) (rtx, rtx, rtx, rtx);
22004   int i;
22005 
22006   for (i = 0; i < 3; i++)
22007     tmp[i] = gen_reg_rtx (mode);
22008   real_ldexp (&TWO31r, &dconst1, 31);
22009   two31r = const_double_from_real_value (TWO31r, scalarmode);
22010   two31r = ix86_build_const_vector (mode, 1, two31r);
22011   two31r = force_reg (mode, two31r);
22012   switch (mode)
22013     {
22014     case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
22015     case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
22016     case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
22017     case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
22018     default: gcc_unreachable ();
22019     }
22020   tmp[3] = gen_rtx_LE (mode, two31r, val);
22021   emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
22022   tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
22023 				0, OPTAB_DIRECT);
22024   if (intmode == V4SImode || TARGET_AVX2)
22025     *xorp = expand_simple_binop (intmode, ASHIFT,
22026 				 gen_lowpart (intmode, tmp[0]),
22027 				 GEN_INT (31), NULL_RTX, 0,
22028 				 OPTAB_DIRECT);
22029   else
22030     {
22031       rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
22032       two31 = ix86_build_const_vector (intmode, 1, two31);
22033       *xorp = expand_simple_binop (intmode, AND,
22034 				   gen_lowpart (intmode, tmp[0]),
22035 				   two31, NULL_RTX, 0,
22036 				   OPTAB_DIRECT);
22037     }
22038   return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
22039 			      0, OPTAB_DIRECT);
22040 }
22041 
22042 /* A subroutine of ix86_build_signbit_mask.  If VECT is true,
22043    then replicate the value for all elements of the vector
22044    register.  */
22045 
22046 rtx
22047 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
22048 {
22049   int i, n_elt;
22050   rtvec v;
22051   machine_mode scalar_mode;
22052 
22053   switch (mode)
22054     {
22055     case E_V64QImode:
22056     case E_V32QImode:
22057     case E_V16QImode:
22058     case E_V32HImode:
22059     case E_V16HImode:
22060     case E_V8HImode:
22061     case E_V16SImode:
22062     case E_V8SImode:
22063     case E_V4SImode:
22064     case E_V8DImode:
22065     case E_V4DImode:
22066     case E_V2DImode:
22067       gcc_assert (vect);
22068       /* FALLTHRU */
22069     case E_V16SFmode:
22070     case E_V8SFmode:
22071     case E_V4SFmode:
22072     case E_V8DFmode:
22073     case E_V4DFmode:
22074     case E_V2DFmode:
22075       n_elt = GET_MODE_NUNITS (mode);
22076       v = rtvec_alloc (n_elt);
22077       scalar_mode = GET_MODE_INNER (mode);
22078 
22079       RTVEC_ELT (v, 0) = value;
22080 
22081       for (i = 1; i < n_elt; ++i)
22082 	RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
22083 
22084       return gen_rtx_CONST_VECTOR (mode, v);
22085 
22086     default:
22087       gcc_unreachable ();
22088     }
22089 }
22090 
22091 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
22092    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
22093    for an SSE register.  If VECT is true, then replicate the mask for
22094    all elements of the vector register.  If INVERT is true, then create
22095    a mask excluding the sign bit.  */
22096 
22097 rtx
22098 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
22099 {
22100   machine_mode vec_mode, imode;
22101   wide_int w;
22102   rtx mask, v;
22103 
22104   switch (mode)
22105     {
22106     case E_V16SImode:
22107     case E_V16SFmode:
22108     case E_V8SImode:
22109     case E_V4SImode:
22110     case E_V8SFmode:
22111     case E_V4SFmode:
22112       vec_mode = mode;
22113       imode = SImode;
22114       break;
22115 
22116     case E_V8DImode:
22117     case E_V4DImode:
22118     case E_V2DImode:
22119     case E_V8DFmode:
22120     case E_V4DFmode:
22121     case E_V2DFmode:
22122       vec_mode = mode;
22123       imode = DImode;
22124       break;
22125 
22126     case E_TImode:
22127     case E_TFmode:
22128       vec_mode = VOIDmode;
22129       imode = TImode;
22130       break;
22131 
22132     default:
22133       gcc_unreachable ();
22134     }
22135 
22136   machine_mode inner_mode = GET_MODE_INNER (mode);
22137   w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
22138 			   GET_MODE_BITSIZE (inner_mode));
22139   if (invert)
22140     w = wi::bit_not (w);
22141 
22142   /* Force this value into the low part of a fp vector constant.  */
22143   mask = immed_wide_int_const (w, imode);
22144   mask = gen_lowpart (inner_mode, mask);
22145 
22146   if (vec_mode == VOIDmode)
22147     return force_reg (inner_mode, mask);
22148 
22149   v = ix86_build_const_vector (vec_mode, vect, mask);
22150   return force_reg (vec_mode, v);
22151 }
22152 
22153 /* Generate code for floating point ABS or NEG.  */
22154 
22155 void
22156 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
22157 				rtx operands[])
22158 {
22159   rtx mask, set, dst, src;
22160   bool use_sse = false;
22161   bool vector_mode = VECTOR_MODE_P (mode);
22162   machine_mode vmode = mode;
22163 
22164   if (vector_mode)
22165     use_sse = true;
22166   else if (mode == TFmode)
22167     use_sse = true;
22168   else if (TARGET_SSE_MATH)
22169     {
22170       use_sse = SSE_FLOAT_MODE_P (mode);
22171       if (mode == SFmode)
22172 	vmode = V4SFmode;
22173       else if (mode == DFmode)
22174 	vmode = V2DFmode;
22175     }
22176 
22177   /* NEG and ABS performed with SSE use bitwise mask operations.
22178      Create the appropriate mask now.  */
22179   if (use_sse)
22180     mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
22181   else
22182     mask = NULL_RTX;
22183 
22184   dst = operands[0];
22185   src = operands[1];
22186 
22187   set = gen_rtx_fmt_e (code, mode, src);
22188   set = gen_rtx_SET (dst, set);
22189 
22190   if (mask)
22191     {
22192       rtx use, clob;
22193       rtvec par;
22194 
22195       use = gen_rtx_USE (VOIDmode, mask);
22196       if (vector_mode)
22197 	par = gen_rtvec (2, set, use);
22198       else
22199 	{
22200           clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
22201 	  par = gen_rtvec (3, set, use, clob);
22202         }
22203       emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
22204     }
22205   else
22206     emit_insn (set);
22207 }
22208 
22209 /* Expand a copysign operation.  Special case operand 0 being a constant.  */
22210 
22211 void
22212 ix86_expand_copysign (rtx operands[])
22213 {
22214   machine_mode mode, vmode;
22215   rtx dest, op0, op1, mask, nmask;
22216 
22217   dest = operands[0];
22218   op0 = operands[1];
22219   op1 = operands[2];
22220 
22221   mode = GET_MODE (dest);
22222 
22223   if (mode == SFmode)
22224     vmode = V4SFmode;
22225   else if (mode == DFmode)
22226     vmode = V2DFmode;
22227   else
22228     vmode = mode;
22229 
22230   if (CONST_DOUBLE_P (op0))
22231     {
22232       rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
22233 
22234       if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
22235 	op0 = simplify_unary_operation (ABS, mode, op0, mode);
22236 
22237       if (mode == SFmode || mode == DFmode)
22238 	{
22239 	  if (op0 == CONST0_RTX (mode))
22240 	    op0 = CONST0_RTX (vmode);
22241 	  else
22242 	    {
22243 	      rtx v = ix86_build_const_vector (vmode, false, op0);
22244 
22245 	      op0 = force_reg (vmode, v);
22246 	    }
22247 	}
22248       else if (op0 != CONST0_RTX (mode))
22249 	op0 = force_reg (mode, op0);
22250 
22251       mask = ix86_build_signbit_mask (vmode, 0, 0);
22252 
22253       if (mode == SFmode)
22254 	copysign_insn = gen_copysignsf3_const;
22255       else if (mode == DFmode)
22256 	copysign_insn = gen_copysigndf3_const;
22257       else
22258 	copysign_insn = gen_copysigntf3_const;
22259 
22260       emit_insn (copysign_insn (dest, op0, op1, mask));
22261     }
22262   else
22263     {
22264       rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
22265 
22266       nmask = ix86_build_signbit_mask (vmode, 0, 1);
22267       mask = ix86_build_signbit_mask (vmode, 0, 0);
22268 
22269       if (mode == SFmode)
22270 	copysign_insn = gen_copysignsf3_var;
22271       else if (mode == DFmode)
22272 	copysign_insn = gen_copysigndf3_var;
22273       else
22274 	copysign_insn = gen_copysigntf3_var;
22275 
22276       emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
22277     }
22278 }
22279 
22280 /* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
22281    be a constant, and so has already been expanded into a vector constant.  */
22282 
22283 void
22284 ix86_split_copysign_const (rtx operands[])
22285 {
22286   machine_mode mode, vmode;
22287   rtx dest, op0, mask, x;
22288 
22289   dest = operands[0];
22290   op0 = operands[1];
22291   mask = operands[3];
22292 
22293   mode = GET_MODE (dest);
22294   vmode = GET_MODE (mask);
22295 
22296   dest = lowpart_subreg (vmode, dest, mode);
22297   x = gen_rtx_AND (vmode, dest, mask);
22298   emit_insn (gen_rtx_SET (dest, x));
22299 
22300   if (op0 != CONST0_RTX (vmode))
22301     {
22302       x = gen_rtx_IOR (vmode, dest, op0);
22303       emit_insn (gen_rtx_SET (dest, x));
22304     }
22305 }
22306 
22307 /* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
22308    so we have to do two masks.  */
22309 
22310 void
22311 ix86_split_copysign_var (rtx operands[])
22312 {
22313   machine_mode mode, vmode;
22314   rtx dest, scratch, op0, op1, mask, nmask, x;
22315 
22316   dest = operands[0];
22317   scratch = operands[1];
22318   op0 = operands[2];
22319   op1 = operands[3];
22320   nmask = operands[4];
22321   mask = operands[5];
22322 
22323   mode = GET_MODE (dest);
22324   vmode = GET_MODE (mask);
22325 
22326   if (rtx_equal_p (op0, op1))
22327     {
22328       /* Shouldn't happen often (it's useless, obviously), but when it does
22329 	 we'd generate incorrect code if we continue below.  */
22330       emit_move_insn (dest, op0);
22331       return;
22332     }
22333 
22334   if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
22335     {
22336       gcc_assert (REGNO (op1) == REGNO (scratch));
22337 
22338       x = gen_rtx_AND (vmode, scratch, mask);
22339       emit_insn (gen_rtx_SET (scratch, x));
22340 
22341       dest = mask;
22342       op0 = lowpart_subreg (vmode, op0, mode);
22343       x = gen_rtx_NOT (vmode, dest);
22344       x = gen_rtx_AND (vmode, x, op0);
22345       emit_insn (gen_rtx_SET (dest, x));
22346     }
22347   else
22348     {
22349       if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
22350 	{
22351 	  x = gen_rtx_AND (vmode, scratch, mask);
22352 	}
22353       else						/* alternative 2,4 */
22354 	{
22355           gcc_assert (REGNO (mask) == REGNO (scratch));
22356           op1 = lowpart_subreg (vmode, op1, mode);
22357 	  x = gen_rtx_AND (vmode, scratch, op1);
22358 	}
22359       emit_insn (gen_rtx_SET (scratch, x));
22360 
22361       if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
22362 	{
22363 	  dest = lowpart_subreg (vmode, op0, mode);
22364 	  x = gen_rtx_AND (vmode, dest, nmask);
22365 	}
22366       else						/* alternative 3,4 */
22367 	{
22368           gcc_assert (REGNO (nmask) == REGNO (dest));
22369 	  dest = nmask;
22370 	  op0 = lowpart_subreg (vmode, op0, mode);
22371 	  x = gen_rtx_AND (vmode, dest, op0);
22372 	}
22373       emit_insn (gen_rtx_SET (dest, x));
22374     }
22375 
22376   x = gen_rtx_IOR (vmode, dest, scratch);
22377   emit_insn (gen_rtx_SET (dest, x));
22378 }
22379 
22380 /* Return TRUE or FALSE depending on whether the first SET in INSN
22381    has source and destination with matching CC modes, and that the
22382    CC mode is at least as constrained as REQ_MODE.  */
22383 
22384 bool
22385 ix86_match_ccmode (rtx insn, machine_mode req_mode)
22386 {
22387   rtx set;
22388   machine_mode set_mode;
22389 
22390   set = PATTERN (insn);
22391   if (GET_CODE (set) == PARALLEL)
22392     set = XVECEXP (set, 0, 0);
22393   gcc_assert (GET_CODE (set) == SET);
22394   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
22395 
22396   set_mode = GET_MODE (SET_DEST (set));
22397   switch (set_mode)
22398     {
22399     case E_CCNOmode:
22400       if (req_mode != CCNOmode
22401 	  && (req_mode != CCmode
22402 	      || XEXP (SET_SRC (set), 1) != const0_rtx))
22403 	return false;
22404       break;
22405     case E_CCmode:
22406       if (req_mode == CCGCmode)
22407 	return false;
22408       /* FALLTHRU */
22409     case E_CCGCmode:
22410       if (req_mode == CCGOCmode || req_mode == CCNOmode)
22411 	return false;
22412       /* FALLTHRU */
22413     case E_CCGOCmode:
22414       if (req_mode == CCZmode)
22415 	return false;
22416       /* FALLTHRU */
22417     case E_CCZmode:
22418       break;
22419 
22420     case E_CCGZmode:
22421 
22422     case E_CCAmode:
22423     case E_CCCmode:
22424     case E_CCOmode:
22425     case E_CCPmode:
22426     case E_CCSmode:
22427       if (set_mode != req_mode)
22428 	return false;
22429       break;
22430 
22431     default:
22432       gcc_unreachable ();
22433     }
22434 
22435   return GET_MODE (SET_SRC (set)) == set_mode;
22436 }
22437 
22438 /* Generate insn patterns to do an integer compare of OPERANDS.  */
22439 
22440 static rtx
22441 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
22442 {
22443   machine_mode cmpmode;
22444   rtx tmp, flags;
22445 
22446   cmpmode = SELECT_CC_MODE (code, op0, op1);
22447   flags = gen_rtx_REG (cmpmode, FLAGS_REG);
22448 
22449   /* This is very simple, but making the interface the same as in the
22450      FP case makes the rest of the code easier.  */
22451   tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
22452   emit_insn (gen_rtx_SET (flags, tmp));
22453 
22454   /* Return the test that should be put into the flags user, i.e.
22455      the bcc, scc, or cmov instruction.  */
22456   return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
22457 }
22458 
22459 /* Figure out whether to use unordered fp comparisons.  */
22460 
22461 static bool
22462 ix86_unordered_fp_compare (enum rtx_code code)
22463 {
22464   if (!TARGET_IEEE_FP)
22465     return false;
22466 
22467   switch (code)
22468     {
22469     case GT:
22470     case GE:
22471     case LT:
22472     case LE:
22473       return false;
22474 
22475     case EQ:
22476     case NE:
22477 
22478     case LTGT:
22479     case UNORDERED:
22480     case ORDERED:
22481     case UNLT:
22482     case UNLE:
22483     case UNGT:
22484     case UNGE:
22485     case UNEQ:
22486       return true;
22487 
22488     default:
22489       gcc_unreachable ();
22490     }
22491 }
22492 
22493 machine_mode
22494 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
22495 {
22496   machine_mode mode = GET_MODE (op0);
22497 
22498   if (SCALAR_FLOAT_MODE_P (mode))
22499     {
22500       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
22501       return CCFPmode;
22502     }
22503 
22504   switch (code)
22505     {
22506       /* Only zero flag is needed.  */
22507     case EQ:			/* ZF=0 */
22508     case NE:			/* ZF!=0 */
22509       return CCZmode;
22510       /* Codes needing carry flag.  */
22511     case GEU:			/* CF=0 */
22512     case LTU:			/* CF=1 */
22513       /* Detect overflow checks.  They need just the carry flag.  */
22514       if (GET_CODE (op0) == PLUS
22515 	  && (rtx_equal_p (op1, XEXP (op0, 0))
22516 	      || rtx_equal_p (op1, XEXP (op0, 1))))
22517 	return CCCmode;
22518       else
22519 	return CCmode;
22520     case GTU:			/* CF=0 & ZF=0 */
22521     case LEU:			/* CF=1 | ZF=1 */
22522       return CCmode;
22523       /* Codes possibly doable only with sign flag when
22524          comparing against zero.  */
22525     case GE:			/* SF=OF   or   SF=0 */
22526     case LT:			/* SF<>OF  or   SF=1 */
22527       if (op1 == const0_rtx)
22528 	return CCGOCmode;
22529       else
22530 	/* For other cases Carry flag is not required.  */
22531 	return CCGCmode;
22532       /* Codes doable only with sign flag when comparing
22533          against zero, but we miss jump instruction for it
22534          so we need to use relational tests against overflow
22535          that thus needs to be zero.  */
22536     case GT:			/* ZF=0 & SF=OF */
22537     case LE:			/* ZF=1 | SF<>OF */
22538       if (op1 == const0_rtx)
22539 	return CCNOmode;
22540       else
22541 	return CCGCmode;
22542       /* strcmp pattern do (use flags) and combine may ask us for proper
22543 	 mode.  */
22544     case USE:
22545       return CCmode;
22546     default:
22547       gcc_unreachable ();
22548     }
22549 }
22550 
22551 /* Return the fixed registers used for condition codes.  */
22552 
22553 static bool
22554 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22555 {
22556   *p1 = FLAGS_REG;
22557   *p2 = FPSR_REG;
22558   return true;
22559 }
22560 
22561 /* If two condition code modes are compatible, return a condition code
22562    mode which is compatible with both.  Otherwise, return
22563    VOIDmode.  */
22564 
22565 static machine_mode
22566 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
22567 {
22568   if (m1 == m2)
22569     return m1;
22570 
22571   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
22572     return VOIDmode;
22573 
22574   if ((m1 == CCGCmode && m2 == CCGOCmode)
22575       || (m1 == CCGOCmode && m2 == CCGCmode))
22576     return CCGCmode;
22577 
22578   if ((m1 == CCNOmode && m2 == CCGOCmode)
22579       || (m1 == CCGOCmode && m2 == CCNOmode))
22580     return CCNOmode;
22581 
22582   if (m1 == CCZmode
22583       && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
22584     return m2;
22585   else if (m2 == CCZmode
22586 	   && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
22587     return m1;
22588 
22589   switch (m1)
22590     {
22591     default:
22592       gcc_unreachable ();
22593 
22594     case E_CCmode:
22595     case E_CCGCmode:
22596     case E_CCGOCmode:
22597     case E_CCNOmode:
22598     case E_CCAmode:
22599     case E_CCCmode:
22600     case E_CCOmode:
22601     case E_CCPmode:
22602     case E_CCSmode:
22603     case E_CCZmode:
22604       switch (m2)
22605 	{
22606 	default:
22607 	  return VOIDmode;
22608 
22609 	case E_CCmode:
22610 	case E_CCGCmode:
22611 	case E_CCGOCmode:
22612 	case E_CCNOmode:
22613 	case E_CCAmode:
22614 	case E_CCCmode:
22615 	case E_CCOmode:
22616 	case E_CCPmode:
22617 	case E_CCSmode:
22618 	case E_CCZmode:
22619 	  return CCmode;
22620 	}
22621 
22622     case E_CCFPmode:
22623       /* These are only compatible with themselves, which we already
22624 	 checked above.  */
22625       return VOIDmode;
22626     }
22627 }
22628 
22629 
22630 /* Return a comparison we can do and that it is equivalent to
22631    swap_condition (code) apart possibly from orderedness.
22632    But, never change orderedness if TARGET_IEEE_FP, returning
22633    UNKNOWN in that case if necessary.  */
22634 
22635 static enum rtx_code
22636 ix86_fp_swap_condition (enum rtx_code code)
22637 {
22638   switch (code)
22639     {
22640     case GT:                   /* GTU - CF=0 & ZF=0 */
22641       return TARGET_IEEE_FP ? UNKNOWN : UNLT;
22642     case GE:                   /* GEU - CF=0 */
22643       return TARGET_IEEE_FP ? UNKNOWN : UNLE;
22644     case UNLT:                 /* LTU - CF=1 */
22645       return TARGET_IEEE_FP ? UNKNOWN : GT;
22646     case UNLE:                 /* LEU - CF=1 | ZF=1 */
22647       return TARGET_IEEE_FP ? UNKNOWN : GE;
22648     default:
22649       return swap_condition (code);
22650     }
22651 }
22652 
22653 /* Return cost of comparison CODE using the best strategy for performance.
22654    All following functions do use number of instructions as a cost metrics.
22655    In future this should be tweaked to compute bytes for optimize_size and
22656    take into account performance of various instructions on various CPUs.  */
22657 
22658 static int
22659 ix86_fp_comparison_cost (enum rtx_code code)
22660 {
22661   int arith_cost;
22662 
22663   /* The cost of code using bit-twiddling on %ah.  */
22664   switch (code)
22665     {
22666     case UNLE:
22667     case UNLT:
22668     case LTGT:
22669     case GT:
22670     case GE:
22671     case UNORDERED:
22672     case ORDERED:
22673     case UNEQ:
22674       arith_cost = 4;
22675       break;
22676     case LT:
22677     case NE:
22678     case EQ:
22679     case UNGE:
22680       arith_cost = TARGET_IEEE_FP ? 5 : 4;
22681       break;
22682     case LE:
22683     case UNGT:
22684       arith_cost = TARGET_IEEE_FP ? 6 : 4;
22685       break;
22686     default:
22687       gcc_unreachable ();
22688     }
22689 
22690   switch (ix86_fp_comparison_strategy (code))
22691     {
22692     case IX86_FPCMP_COMI:
22693       return arith_cost > 4 ? 3 : 2;
22694     case IX86_FPCMP_SAHF:
22695       return arith_cost > 4 ? 4 : 3;
22696     default:
22697       return arith_cost;
22698     }
22699 }
22700 
22701 /* Return strategy to use for floating-point.  We assume that fcomi is always
22702    preferrable where available, since that is also true when looking at size
22703    (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
22704 
22705 enum ix86_fpcmp_strategy
22706 ix86_fp_comparison_strategy (enum rtx_code)
22707 {
22708   /* Do fcomi/sahf based test when profitable.  */
22709 
22710   if (TARGET_CMOVE)
22711     return IX86_FPCMP_COMI;
22712 
22713   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
22714     return IX86_FPCMP_SAHF;
22715 
22716   return IX86_FPCMP_ARITH;
22717 }
22718 
22719 /* Swap, force into registers, or otherwise massage the two operands
22720    to a fp comparison.  The operands are updated in place; the new
22721    comparison code is returned.  */
22722 
22723 static enum rtx_code
22724 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
22725 {
22726   bool unordered_compare = ix86_unordered_fp_compare (code);
22727   rtx op0 = *pop0, op1 = *pop1;
22728   machine_mode op_mode = GET_MODE (op0);
22729   bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
22730 
22731   /* All of the unordered compare instructions only work on registers.
22732      The same is true of the fcomi compare instructions.  The XFmode
22733      compare instructions require registers except when comparing
22734      against zero or when converting operand 1 from fixed point to
22735      floating point.  */
22736 
22737   if (!is_sse
22738       && (unordered_compare
22739 	  || (op_mode == XFmode
22740 	      && ! (standard_80387_constant_p (op0) == 1
22741 		    || standard_80387_constant_p (op1) == 1)
22742 	      && GET_CODE (op1) != FLOAT)
22743 	  || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
22744     {
22745       op0 = force_reg (op_mode, op0);
22746       op1 = force_reg (op_mode, op1);
22747     }
22748   else
22749     {
22750       /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
22751 	 things around if they appear profitable, otherwise force op0
22752 	 into a register.  */
22753 
22754       if (standard_80387_constant_p (op0) == 0
22755 	  || (MEM_P (op0)
22756 	      && ! (standard_80387_constant_p (op1) == 0
22757 		    || MEM_P (op1))))
22758 	{
22759 	  enum rtx_code new_code = ix86_fp_swap_condition (code);
22760 	  if (new_code != UNKNOWN)
22761 	    {
22762 	      std::swap (op0, op1);
22763 	      code = new_code;
22764 	    }
22765 	}
22766 
22767       if (!REG_P (op0))
22768 	op0 = force_reg (op_mode, op0);
22769 
22770       if (CONSTANT_P (op1))
22771 	{
22772 	  int tmp = standard_80387_constant_p (op1);
22773 	  if (tmp == 0)
22774 	    op1 = validize_mem (force_const_mem (op_mode, op1));
22775 	  else if (tmp == 1)
22776 	    {
22777 	      if (TARGET_CMOVE)
22778 		op1 = force_reg (op_mode, op1);
22779 	    }
22780 	  else
22781 	    op1 = force_reg (op_mode, op1);
22782 	}
22783     }
22784 
22785   /* Try to rearrange the comparison to make it cheaper.  */
22786   if (ix86_fp_comparison_cost (code)
22787       > ix86_fp_comparison_cost (swap_condition (code))
22788       && (REG_P (op1) || can_create_pseudo_p ()))
22789     {
22790       std::swap (op0, op1);
22791       code = swap_condition (code);
22792       if (!REG_P (op0))
22793 	op0 = force_reg (op_mode, op0);
22794     }
22795 
22796   *pop0 = op0;
22797   *pop1 = op1;
22798   return code;
22799 }
22800 
22801 /* Convert comparison codes we use to represent FP comparison to integer
22802    code that will result in proper branch.  Return UNKNOWN if no such code
22803    is available.  */
22804 
22805 enum rtx_code
22806 ix86_fp_compare_code_to_integer (enum rtx_code code)
22807 {
22808   switch (code)
22809     {
22810     case GT:
22811       return GTU;
22812     case GE:
22813       return GEU;
22814     case ORDERED:
22815     case UNORDERED:
22816       return code;
22817     case UNEQ:
22818       return EQ;
22819     case UNLT:
22820       return LTU;
22821     case UNLE:
22822       return LEU;
22823     case LTGT:
22824       return NE;
22825     default:
22826       return UNKNOWN;
22827     }
22828 }
22829 
22830 /* Generate insn patterns to do a floating point compare of OPERANDS.  */
22831 
22832 static rtx
22833 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
22834 {
22835   bool unordered_compare = ix86_unordered_fp_compare (code);
22836   machine_mode intcmp_mode;
22837   rtx tmp, tmp2;
22838 
22839   code = ix86_prepare_fp_compare_args (code, &op0, &op1);
22840 
22841   /* Do fcomi/sahf based test when profitable.  */
22842   switch (ix86_fp_comparison_strategy (code))
22843     {
22844     case IX86_FPCMP_COMI:
22845       intcmp_mode = CCFPmode;
22846       tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22847       if (unordered_compare)
22848 	tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22849       emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp));
22850       break;
22851 
22852     case IX86_FPCMP_SAHF:
22853       intcmp_mode = CCFPmode;
22854       tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22855       if (unordered_compare)
22856 	tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22857       tmp = gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp);
22858       if (!scratch)
22859 	scratch = gen_reg_rtx (HImode);
22860       tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
22861       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
22862       break;
22863 
22864     case IX86_FPCMP_ARITH:
22865       /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
22866       tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
22867       if (unordered_compare)
22868 	tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
22869       tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
22870       if (!scratch)
22871 	scratch = gen_reg_rtx (HImode);
22872       emit_insn (gen_rtx_SET (scratch, tmp));
22873 
22874       /* In the unordered case, we have to check C2 for NaN's, which
22875 	 doesn't happen to work out to anything nice combination-wise.
22876 	 So do some bit twiddling on the value we've got in AH to come
22877 	 up with an appropriate set of condition codes.  */
22878 
22879       intcmp_mode = CCNOmode;
22880       switch (code)
22881 	{
22882 	case GT:
22883 	case UNGT:
22884 	  if (code == GT || !TARGET_IEEE_FP)
22885 	    {
22886 	      emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22887 	      code = EQ;
22888 	    }
22889 	  else
22890 	    {
22891 	      emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22892 	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22893 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
22894 	      intcmp_mode = CCmode;
22895 	      code = GEU;
22896 	    }
22897 	  break;
22898 	case LT:
22899 	case UNLT:
22900 	  if (code == LT && TARGET_IEEE_FP)
22901 	    {
22902 	      emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22903 	      emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
22904 	      intcmp_mode = CCmode;
22905 	      code = EQ;
22906 	    }
22907 	  else
22908 	    {
22909 	      emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx));
22910 	      code = NE;
22911 	    }
22912 	  break;
22913 	case GE:
22914 	case UNGE:
22915 	  if (code == GE || !TARGET_IEEE_FP)
22916 	    {
22917 	      emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05)));
22918 	      code = EQ;
22919 	    }
22920 	  else
22921 	    {
22922 	      emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22923 	      emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx));
22924 	      code = NE;
22925 	    }
22926 	  break;
22927 	case LE:
22928 	case UNLE:
22929 	  if (code == LE && TARGET_IEEE_FP)
22930 	    {
22931 	      emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22932 	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
22933 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22934 	      intcmp_mode = CCmode;
22935 	      code = LTU;
22936 	    }
22937 	  else
22938 	    {
22939 	      emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
22940 	      code = NE;
22941 	    }
22942 	  break;
22943 	case EQ:
22944 	case UNEQ:
22945 	  if (code == EQ && TARGET_IEEE_FP)
22946 	    {
22947 	      emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22948 	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
22949 	      intcmp_mode = CCmode;
22950 	      code = EQ;
22951 	    }
22952 	  else
22953 	    {
22954 	      emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22955 	      code = NE;
22956 	    }
22957 	  break;
22958 	case NE:
22959 	case LTGT:
22960 	  if (code == NE && TARGET_IEEE_FP)
22961 	    {
22962 	      emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
22963 	      emit_insn (gen_xorqi_ext_1_cc (scratch, scratch,
22964 					     GEN_INT (0x40)));
22965 	      code = NE;
22966 	    }
22967 	  else
22968 	    {
22969 	      emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
22970 	      code = EQ;
22971 	    }
22972 	  break;
22973 
22974 	case UNORDERED:
22975 	  emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
22976 	  code = NE;
22977 	  break;
22978 	case ORDERED:
22979 	  emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
22980 	  code = EQ;
22981 	  break;
22982 
22983 	default:
22984 	  gcc_unreachable ();
22985 	}
22986 	break;
22987 
22988     default:
22989       gcc_unreachable();
22990     }
22991 
22992   /* Return the test that should be put into the flags user, i.e.
22993      the bcc, scc, or cmov instruction.  */
22994   return gen_rtx_fmt_ee (code, VOIDmode,
22995 			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
22996 			 const0_rtx);
22997 }
22998 
22999 static rtx
23000 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
23001 {
23002   rtx ret;
23003 
23004   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
23005     ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
23006 
23007   else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
23008     {
23009       gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
23010       ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
23011     }
23012   else
23013     ret = ix86_expand_int_compare (code, op0, op1);
23014 
23015   return ret;
23016 }
23017 
23018 void
23019 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
23020 {
23021   machine_mode mode = GET_MODE (op0);
23022   rtx tmp;
23023 
23024   /* Handle special case - vector comparsion with boolean result, transform
23025      it using ptest instruction.  */
23026   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23027     {
23028       rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
23029       machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
23030 
23031       gcc_assert (code == EQ || code == NE);
23032       /* Generate XOR since we can't check that one operand is zero vector.  */
23033       tmp = gen_reg_rtx (mode);
23034       emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
23035       tmp = gen_lowpart (p_mode, tmp);
23036       emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
23037 			      gen_rtx_UNSPEC (CCmode,
23038 					      gen_rtvec (2, tmp, tmp),
23039 					      UNSPEC_PTEST)));
23040       tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
23041       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23042 				  gen_rtx_LABEL_REF (VOIDmode, label),
23043 				  pc_rtx);
23044       emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
23045       return;
23046     }
23047 
23048   switch (mode)
23049     {
23050     case E_SFmode:
23051     case E_DFmode:
23052     case E_XFmode:
23053     case E_QImode:
23054     case E_HImode:
23055     case E_SImode:
23056       simple:
23057       tmp = ix86_expand_compare (code, op0, op1);
23058       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23059 				  gen_rtx_LABEL_REF (VOIDmode, label),
23060 				  pc_rtx);
23061       emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
23062       return;
23063 
23064     case E_DImode:
23065       if (TARGET_64BIT)
23066 	goto simple;
23067       /* For 32-bit target DI comparison may be performed on
23068 	 SSE registers.  To allow this we should avoid split
23069 	 to SI mode which is achieved by doing xor in DI mode
23070 	 and then comparing with zero (which is recognized by
23071 	 STV pass).  We don't compare using xor when optimizing
23072 	 for size.  */
23073       if (!optimize_insn_for_size_p ()
23074 	  && TARGET_STV
23075 	  && (code == EQ || code == NE))
23076 	{
23077 	  op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
23078 	  op1 = const0_rtx;
23079 	}
23080       /* FALLTHRU */
23081     case E_TImode:
23082       /* Expand DImode branch into multiple compare+branch.  */
23083       {
23084 	rtx lo[2], hi[2];
23085 	rtx_code_label *label2;
23086 	enum rtx_code code1, code2, code3;
23087 	machine_mode submode;
23088 
23089 	if (CONSTANT_P (op0) && !CONSTANT_P (op1))
23090 	  {
23091 	    std::swap (op0, op1);
23092 	    code = swap_condition (code);
23093 	  }
23094 
23095 	split_double_mode (mode, &op0, 1, lo+0, hi+0);
23096 	split_double_mode (mode, &op1, 1, lo+1, hi+1);
23097 
23098 	submode = mode == DImode ? SImode : DImode;
23099 
23100 	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
23101 	   avoid two branches.  This costs one extra insn, so disable when
23102 	   optimizing for size.  */
23103 
23104 	if ((code == EQ || code == NE)
23105 	    && (!optimize_insn_for_size_p ()
23106 	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
23107 	  {
23108 	    rtx xor0, xor1;
23109 
23110 	    xor1 = hi[0];
23111 	    if (hi[1] != const0_rtx)
23112 	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
23113 				   NULL_RTX, 0, OPTAB_WIDEN);
23114 
23115 	    xor0 = lo[0];
23116 	    if (lo[1] != const0_rtx)
23117 	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
23118 				   NULL_RTX, 0, OPTAB_WIDEN);
23119 
23120 	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
23121 				NULL_RTX, 0, OPTAB_WIDEN);
23122 
23123 	    ix86_expand_branch (code, tmp, const0_rtx, label);
23124 	    return;
23125 	  }
23126 
23127 	/* Otherwise, if we are doing less-than or greater-or-equal-than,
23128 	   op1 is a constant and the low word is zero, then we can just
23129 	   examine the high word.  Similarly for low word -1 and
23130 	   less-or-equal-than or greater-than.  */
23131 
23132 	if (CONST_INT_P (hi[1]))
23133 	  switch (code)
23134 	    {
23135 	    case LT: case LTU: case GE: case GEU:
23136 	      if (lo[1] == const0_rtx)
23137 		{
23138 		  ix86_expand_branch (code, hi[0], hi[1], label);
23139 		  return;
23140 		}
23141 	      break;
23142 	    case LE: case LEU: case GT: case GTU:
23143 	      if (lo[1] == constm1_rtx)
23144 		{
23145 		  ix86_expand_branch (code, hi[0], hi[1], label);
23146 		  return;
23147 		}
23148 	      break;
23149 	    default:
23150 	      break;
23151 	    }
23152 
23153 	/* Emulate comparisons that do not depend on Zero flag with
23154 	   double-word subtraction.  Note that only Overflow, Sign
23155 	   and Carry flags are valid, so swap arguments and condition
23156 	   of comparisons that would otherwise test Zero flag.  */
23157 
23158 	switch (code)
23159 	  {
23160 	  case LE: case LEU: case GT: case GTU:
23161 	    std::swap (lo[0], lo[1]);
23162 	    std::swap (hi[0], hi[1]);
23163 	    code = swap_condition (code);
23164 	    /* FALLTHRU */
23165 
23166 	  case LT: case LTU: case GE: case GEU:
23167 	    {
23168 	      rtx (*cmp_insn) (rtx, rtx);
23169 	      rtx (*sbb_insn) (rtx, rtx, rtx);
23170 	      bool uns = (code == LTU || code == GEU);
23171 
23172 	      if (TARGET_64BIT)
23173 		{
23174 		  cmp_insn = gen_cmpdi_1;
23175 		  sbb_insn
23176 		    = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz;
23177 		}
23178 	      else
23179 		{
23180 		  cmp_insn = gen_cmpsi_1;
23181 		  sbb_insn
23182 		    = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz;
23183 		}
23184 
23185 	      if (!nonimmediate_operand (lo[0], submode))
23186 		lo[0] = force_reg (submode, lo[0]);
23187 	      if (!x86_64_general_operand (lo[1], submode))
23188 		lo[1] = force_reg (submode, lo[1]);
23189 
23190 	      if (!register_operand (hi[0], submode))
23191 		hi[0] = force_reg (submode, hi[0]);
23192 	      if ((uns && !nonimmediate_operand (hi[1], submode))
23193 		  || (!uns && !x86_64_general_operand (hi[1], submode)))
23194 		hi[1] = force_reg (submode, hi[1]);
23195 
23196 	      emit_insn (cmp_insn (lo[0], lo[1]));
23197 	      emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1]));
23198 
23199 	      tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
23200 
23201 	      ix86_expand_branch (code, tmp, const0_rtx, label);
23202 	      return;
23203 	    }
23204 
23205 	  default:
23206 	    break;
23207 	  }
23208 
23209 	/* Otherwise, we need two or three jumps.  */
23210 
23211 	label2 = gen_label_rtx ();
23212 
23213 	code1 = code;
23214 	code2 = swap_condition (code);
23215 	code3 = unsigned_condition (code);
23216 
23217 	switch (code)
23218 	  {
23219 	  case LT: case GT: case LTU: case GTU:
23220 	    break;
23221 
23222 	  case LE:   code1 = LT;  code2 = GT;  break;
23223 	  case GE:   code1 = GT;  code2 = LT;  break;
23224 	  case LEU:  code1 = LTU; code2 = GTU; break;
23225 	  case GEU:  code1 = GTU; code2 = LTU; break;
23226 
23227 	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
23228 	  case NE:   code2 = UNKNOWN; break;
23229 
23230 	  default:
23231 	    gcc_unreachable ();
23232 	  }
23233 
23234 	/*
23235 	 * a < b =>
23236 	 *    if (hi(a) < hi(b)) goto true;
23237 	 *    if (hi(a) > hi(b)) goto false;
23238 	 *    if (lo(a) < lo(b)) goto true;
23239 	 *  false:
23240 	 */
23241 
23242 	if (code1 != UNKNOWN)
23243 	  ix86_expand_branch (code1, hi[0], hi[1], label);
23244 	if (code2 != UNKNOWN)
23245 	  ix86_expand_branch (code2, hi[0], hi[1], label2);
23246 
23247 	ix86_expand_branch (code3, lo[0], lo[1], label);
23248 
23249 	if (code2 != UNKNOWN)
23250 	  emit_label (label2);
23251 	return;
23252       }
23253 
23254     default:
23255       gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
23256       goto simple;
23257     }
23258 }
23259 
23260 void
23261 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
23262 {
23263   rtx ret;
23264 
23265   gcc_assert (GET_MODE (dest) == QImode);
23266 
23267   ret = ix86_expand_compare (code, op0, op1);
23268   PUT_MODE (ret, QImode);
23269   emit_insn (gen_rtx_SET (dest, ret));
23270 }
23271 
23272 /* Expand comparison setting or clearing carry flag.  Return true when
23273    successful and set pop for the operation.  */
23274 static bool
23275 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
23276 {
23277   machine_mode mode =
23278     GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
23279 
23280   /* Do not handle double-mode compares that go through special path.  */
23281   if (mode == (TARGET_64BIT ? TImode : DImode))
23282     return false;
23283 
23284   if (SCALAR_FLOAT_MODE_P (mode))
23285     {
23286       rtx compare_op;
23287       rtx_insn *compare_seq;
23288 
23289       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
23290 
23291       /* Shortcut:  following common codes never translate
23292 	 into carry flag compares.  */
23293       if (code == EQ || code == NE || code == UNEQ || code == LTGT
23294 	  || code == ORDERED || code == UNORDERED)
23295 	return false;
23296 
23297       /* These comparisons require zero flag; swap operands so they won't.  */
23298       if ((code == GT || code == UNLE || code == LE || code == UNGT)
23299 	  && !TARGET_IEEE_FP)
23300 	{
23301 	  std::swap (op0, op1);
23302 	  code = swap_condition (code);
23303 	}
23304 
23305       /* Try to expand the comparison and verify that we end up with
23306 	 carry flag based comparison.  This fails to be true only when
23307 	 we decide to expand comparison using arithmetic that is not
23308 	 too common scenario.  */
23309       start_sequence ();
23310       compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
23311       compare_seq = get_insns ();
23312       end_sequence ();
23313 
23314       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
23315         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
23316       else
23317 	code = GET_CODE (compare_op);
23318 
23319       if (code != LTU && code != GEU)
23320 	return false;
23321 
23322       emit_insn (compare_seq);
23323       *pop = compare_op;
23324       return true;
23325     }
23326 
23327   if (!INTEGRAL_MODE_P (mode))
23328     return false;
23329 
23330   switch (code)
23331     {
23332     case LTU:
23333     case GEU:
23334       break;
23335 
23336     /* Convert a==0 into (unsigned)a<1.  */
23337     case EQ:
23338     case NE:
23339       if (op1 != const0_rtx)
23340 	return false;
23341       op1 = const1_rtx;
23342       code = (code == EQ ? LTU : GEU);
23343       break;
23344 
23345     /* Convert a>b into b<a or a>=b-1.  */
23346     case GTU:
23347     case LEU:
23348       if (CONST_INT_P (op1))
23349 	{
23350 	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
23351 	  /* Bail out on overflow.  We still can swap operands but that
23352 	     would force loading of the constant into register.  */
23353 	  if (op1 == const0_rtx
23354 	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
23355 	    return false;
23356 	  code = (code == GTU ? GEU : LTU);
23357 	}
23358       else
23359 	{
23360 	  std::swap (op0, op1);
23361 	  code = (code == GTU ? LTU : GEU);
23362 	}
23363       break;
23364 
23365     /* Convert a>=0 into (unsigned)a<0x80000000.  */
23366     case LT:
23367     case GE:
23368       if (mode == DImode || op1 != const0_rtx)
23369 	return false;
23370       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
23371       code = (code == LT ? GEU : LTU);
23372       break;
23373     case LE:
23374     case GT:
23375       if (mode == DImode || op1 != constm1_rtx)
23376 	return false;
23377       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
23378       code = (code == LE ? GEU : LTU);
23379       break;
23380 
23381     default:
23382       return false;
23383     }
23384   /* Swapping operands may cause constant to appear as first operand.  */
23385   if (!nonimmediate_operand (op0, VOIDmode))
23386     {
23387       if (!can_create_pseudo_p ())
23388 	return false;
23389       op0 = force_reg (mode, op0);
23390     }
23391   *pop = ix86_expand_compare (code, op0, op1);
23392   gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
23393   return true;
23394 }
23395 
23396 bool
23397 ix86_expand_int_movcc (rtx operands[])
23398 {
23399   enum rtx_code code = GET_CODE (operands[1]), compare_code;
23400   rtx_insn *compare_seq;
23401   rtx compare_op;
23402   machine_mode mode = GET_MODE (operands[0]);
23403   bool sign_bit_compare_p = false;
23404   rtx op0 = XEXP (operands[1], 0);
23405   rtx op1 = XEXP (operands[1], 1);
23406 
23407   if (GET_MODE (op0) == TImode
23408       || (GET_MODE (op0) == DImode
23409 	  && !TARGET_64BIT))
23410     return false;
23411 
23412   start_sequence ();
23413   compare_op = ix86_expand_compare (code, op0, op1);
23414   compare_seq = get_insns ();
23415   end_sequence ();
23416 
23417   compare_code = GET_CODE (compare_op);
23418 
23419   if ((op1 == const0_rtx && (code == GE || code == LT))
23420       || (op1 == constm1_rtx && (code == GT || code == LE)))
23421     sign_bit_compare_p = true;
23422 
23423   /* Don't attempt mode expansion here -- if we had to expand 5 or 6
23424      HImode insns, we'd be swallowed in word prefix ops.  */
23425 
23426   if ((mode != HImode || TARGET_FAST_PREFIX)
23427       && (mode != (TARGET_64BIT ? TImode : DImode))
23428       && CONST_INT_P (operands[2])
23429       && CONST_INT_P (operands[3]))
23430     {
23431       rtx out = operands[0];
23432       HOST_WIDE_INT ct = INTVAL (operands[2]);
23433       HOST_WIDE_INT cf = INTVAL (operands[3]);
23434       HOST_WIDE_INT diff;
23435 
23436       diff = ct - cf;
23437       /*  Sign bit compares are better done using shifts than we do by using
23438 	  sbb.  */
23439       if (sign_bit_compare_p
23440 	  || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
23441 	{
23442 	  /* Detect overlap between destination and compare sources.  */
23443 	  rtx tmp = out;
23444 
23445           if (!sign_bit_compare_p)
23446 	    {
23447 	      rtx flags;
23448 	      bool fpcmp = false;
23449 
23450 	      compare_code = GET_CODE (compare_op);
23451 
23452 	      flags = XEXP (compare_op, 0);
23453 
23454 	      if (GET_MODE (flags) == CCFPmode)
23455 		{
23456 		  fpcmp = true;
23457 		  compare_code
23458 		    = ix86_fp_compare_code_to_integer (compare_code);
23459 		}
23460 
23461 	      /* To simplify rest of code, restrict to the GEU case.  */
23462 	      if (compare_code == LTU)
23463 		{
23464 		  std::swap (ct, cf);
23465 		  compare_code = reverse_condition (compare_code);
23466 		  code = reverse_condition (code);
23467 		}
23468 	      else
23469 		{
23470 		  if (fpcmp)
23471 		    PUT_CODE (compare_op,
23472 			      reverse_condition_maybe_unordered
23473 			        (GET_CODE (compare_op)));
23474 		  else
23475 		    PUT_CODE (compare_op,
23476 			      reverse_condition (GET_CODE (compare_op)));
23477 		}
23478 	      diff = ct - cf;
23479 
23480 	      if (reg_overlap_mentioned_p (out, op0)
23481 		  || reg_overlap_mentioned_p (out, op1))
23482 		tmp = gen_reg_rtx (mode);
23483 
23484 	      if (mode == DImode)
23485 		emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
23486 	      else
23487 		emit_insn (gen_x86_movsicc_0_m1	(gen_lowpart (SImode, tmp),
23488 						 flags, compare_op));
23489 	    }
23490 	  else
23491 	    {
23492 	      if (code == GT || code == GE)
23493 		code = reverse_condition (code);
23494 	      else
23495 		{
23496 		  std::swap (ct, cf);
23497 		  diff = ct - cf;
23498 		}
23499 	      tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
23500 	    }
23501 
23502 	  if (diff == 1)
23503 	    {
23504 	      /*
23505 	       * cmpl op0,op1
23506 	       * sbbl dest,dest
23507 	       * [addl dest, ct]
23508 	       *
23509 	       * Size 5 - 8.
23510 	       */
23511 	      if (ct)
23512 		tmp = expand_simple_binop (mode, PLUS,
23513 					   tmp, GEN_INT (ct),
23514 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
23515 	    }
23516 	  else if (cf == -1)
23517 	    {
23518 	      /*
23519 	       * cmpl op0,op1
23520 	       * sbbl dest,dest
23521 	       * orl $ct, dest
23522 	       *
23523 	       * Size 8.
23524 	       */
23525 	      tmp = expand_simple_binop (mode, IOR,
23526 					 tmp, GEN_INT (ct),
23527 					 copy_rtx (tmp), 1, OPTAB_DIRECT);
23528 	    }
23529 	  else if (diff == -1 && ct)
23530 	    {
23531 	      /*
23532 	       * cmpl op0,op1
23533 	       * sbbl dest,dest
23534 	       * notl dest
23535 	       * [addl dest, cf]
23536 	       *
23537 	       * Size 8 - 11.
23538 	       */
23539 	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
23540 	      if (cf)
23541 		tmp = expand_simple_binop (mode, PLUS,
23542 					   copy_rtx (tmp), GEN_INT (cf),
23543 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
23544 	    }
23545 	  else
23546 	    {
23547 	      /*
23548 	       * cmpl op0,op1
23549 	       * sbbl dest,dest
23550 	       * [notl dest]
23551 	       * andl cf - ct, dest
23552 	       * [addl dest, ct]
23553 	       *
23554 	       * Size 8 - 11.
23555 	       */
23556 
23557 	      if (cf == 0)
23558 		{
23559 		  cf = ct;
23560 		  ct = 0;
23561 		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
23562 		}
23563 
23564 	      tmp = expand_simple_binop (mode, AND,
23565 					 copy_rtx (tmp),
23566 					 gen_int_mode (cf - ct, mode),
23567 					 copy_rtx (tmp), 1, OPTAB_DIRECT);
23568 	      if (ct)
23569 		tmp = expand_simple_binop (mode, PLUS,
23570 					   copy_rtx (tmp), GEN_INT (ct),
23571 					   copy_rtx (tmp), 1, OPTAB_DIRECT);
23572 	    }
23573 
23574 	  if (!rtx_equal_p (tmp, out))
23575 	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
23576 
23577 	  return true;
23578 	}
23579 
23580       if (diff < 0)
23581 	{
23582 	  machine_mode cmp_mode = GET_MODE (op0);
23583 	  enum rtx_code new_code;
23584 
23585 	  if (SCALAR_FLOAT_MODE_P (cmp_mode))
23586 	    {
23587 	      gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23588 
23589 	      /* We may be reversing unordered compare to normal compare, that
23590 		 is not valid in general (we may convert non-trapping condition
23591 		 to trapping one), however on i386 we currently emit all
23592 		 comparisons unordered.  */
23593 	      new_code = reverse_condition_maybe_unordered (code);
23594 	    }
23595 	  else
23596 	    new_code = ix86_reverse_condition (code, cmp_mode);
23597 	  if (new_code != UNKNOWN)
23598 	    {
23599 	      std::swap (ct, cf);
23600 	      diff = -diff;
23601 	      code = new_code;
23602 	    }
23603 	}
23604 
23605       compare_code = UNKNOWN;
23606       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
23607 	  && CONST_INT_P (op1))
23608 	{
23609 	  if (op1 == const0_rtx
23610 	      && (code == LT || code == GE))
23611 	    compare_code = code;
23612 	  else if (op1 == constm1_rtx)
23613 	    {
23614 	      if (code == LE)
23615 		compare_code = LT;
23616 	      else if (code == GT)
23617 		compare_code = GE;
23618 	    }
23619 	}
23620 
23621       /* Optimize dest = (op0 < 0) ? -1 : cf.  */
23622       if (compare_code != UNKNOWN
23623 	  && GET_MODE (op0) == GET_MODE (out)
23624 	  && (cf == -1 || ct == -1))
23625 	{
23626 	  /* If lea code below could be used, only optimize
23627 	     if it results in a 2 insn sequence.  */
23628 
23629 	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
23630 		 || diff == 3 || diff == 5 || diff == 9)
23631 	      || (compare_code == LT && ct == -1)
23632 	      || (compare_code == GE && cf == -1))
23633 	    {
23634 	      /*
23635 	       * notl op1	(if necessary)
23636 	       * sarl $31, op1
23637 	       * orl cf, op1
23638 	       */
23639 	      if (ct != -1)
23640 		{
23641 		  cf = ct;
23642 		  ct = -1;
23643 		  code = reverse_condition (code);
23644 		}
23645 
23646 	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23647 
23648 	      out = expand_simple_binop (mode, IOR,
23649 					 out, GEN_INT (cf),
23650 					 out, 1, OPTAB_DIRECT);
23651 	      if (out != operands[0])
23652 		emit_move_insn (operands[0], out);
23653 
23654 	      return true;
23655 	    }
23656 	}
23657 
23658 
23659       if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
23660 	   || diff == 3 || diff == 5 || diff == 9)
23661 	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
23662 	  && (mode != DImode
23663 	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
23664 	{
23665 	  /*
23666 	   * xorl dest,dest
23667 	   * cmpl op1,op2
23668 	   * setcc dest
23669 	   * lea cf(dest*(ct-cf)),dest
23670 	   *
23671 	   * Size 14.
23672 	   *
23673 	   * This also catches the degenerate setcc-only case.
23674 	   */
23675 
23676 	  rtx tmp;
23677 	  int nops;
23678 
23679 	  out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23680 
23681 	  nops = 0;
23682 	  /* On x86_64 the lea instruction operates on Pmode, so we need
23683 	     to get arithmetics done in proper mode to match.  */
23684 	  if (diff == 1)
23685 	    tmp = copy_rtx (out);
23686 	  else
23687 	    {
23688 	      rtx out1;
23689 	      out1 = copy_rtx (out);
23690 	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
23691 	      nops++;
23692 	      if (diff & 1)
23693 		{
23694 		  tmp = gen_rtx_PLUS (mode, tmp, out1);
23695 		  nops++;
23696 		}
23697 	    }
23698 	  if (cf != 0)
23699 	    {
23700 	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
23701 	      nops++;
23702 	    }
23703 	  if (!rtx_equal_p (tmp, out))
23704 	    {
23705 	      if (nops == 1)
23706 		out = force_operand (tmp, copy_rtx (out));
23707 	      else
23708 		emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
23709 	    }
23710 	  if (!rtx_equal_p (out, operands[0]))
23711 	    emit_move_insn (operands[0], copy_rtx (out));
23712 
23713 	  return true;
23714 	}
23715 
23716       /*
23717        * General case:			Jumpful:
23718        *   xorl dest,dest		cmpl op1, op2
23719        *   cmpl op1, op2		movl ct, dest
23720        *   setcc dest			jcc 1f
23721        *   decl dest			movl cf, dest
23722        *   andl (cf-ct),dest		1:
23723        *   addl ct,dest
23724        *
23725        * Size 20.			Size 14.
23726        *
23727        * This is reasonably steep, but branch mispredict costs are
23728        * high on modern cpus, so consider failing only if optimizing
23729        * for space.
23730        */
23731 
23732       if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23733 	  && BRANCH_COST (optimize_insn_for_speed_p (),
23734 		  	  false) >= 2)
23735 	{
23736 	  if (cf == 0)
23737 	    {
23738 	      machine_mode cmp_mode = GET_MODE (op0);
23739 	      enum rtx_code new_code;
23740 
23741 	      if (SCALAR_FLOAT_MODE_P (cmp_mode))
23742 		{
23743 		  gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
23744 
23745 		  /* We may be reversing unordered compare to normal compare,
23746 		     that is not valid in general (we may convert non-trapping
23747 		     condition to trapping one), however on i386 we currently
23748 		     emit all comparisons unordered.  */
23749 		  new_code = reverse_condition_maybe_unordered (code);
23750 		}
23751 	      else
23752 		{
23753 		  new_code = ix86_reverse_condition (code, cmp_mode);
23754 		  if (compare_code != UNKNOWN && new_code != UNKNOWN)
23755 		    compare_code = reverse_condition (compare_code);
23756 		}
23757 
23758 	      if (new_code != UNKNOWN)
23759 		{
23760 		  cf = ct;
23761 		  ct = 0;
23762 		  code = new_code;
23763 		}
23764 	    }
23765 
23766 	  if (compare_code != UNKNOWN)
23767 	    {
23768 	      /* notl op1	(if needed)
23769 		 sarl $31, op1
23770 		 andl (cf-ct), op1
23771 		 addl ct, op1
23772 
23773 		 For x < 0 (resp. x <= -1) there will be no notl,
23774 		 so if possible swap the constants to get rid of the
23775 		 complement.
23776 		 True/false will be -1/0 while code below (store flag
23777 		 followed by decrement) is 0/-1, so the constants need
23778 		 to be exchanged once more.  */
23779 
23780 	      if (compare_code == GE || !cf)
23781 		{
23782 		  code = reverse_condition (code);
23783 		  compare_code = LT;
23784 		}
23785 	      else
23786 		std::swap (ct, cf);
23787 
23788 	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
23789 	    }
23790 	  else
23791 	    {
23792 	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
23793 
23794 	      out = expand_simple_binop (mode, PLUS, copy_rtx (out),
23795 					 constm1_rtx,
23796 					 copy_rtx (out), 1, OPTAB_DIRECT);
23797 	    }
23798 
23799 	  out = expand_simple_binop (mode, AND, copy_rtx (out),
23800 				     gen_int_mode (cf - ct, mode),
23801 				     copy_rtx (out), 1, OPTAB_DIRECT);
23802 	  if (ct)
23803 	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
23804 				       copy_rtx (out), 1, OPTAB_DIRECT);
23805 	  if (!rtx_equal_p (out, operands[0]))
23806 	    emit_move_insn (operands[0], copy_rtx (out));
23807 
23808 	  return true;
23809 	}
23810     }
23811 
23812   if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
23813     {
23814       /* Try a few things more with specific constants and a variable.  */
23815 
23816       optab op;
23817       rtx var, orig_out, out, tmp;
23818 
23819       if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
23820 	return false;
23821 
23822       /* If one of the two operands is an interesting constant, load a
23823 	 constant with the above and mask it in with a logical operation.  */
23824 
23825       if (CONST_INT_P (operands[2]))
23826 	{
23827 	  var = operands[3];
23828 	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
23829 	    operands[3] = constm1_rtx, op = and_optab;
23830 	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
23831 	    operands[3] = const0_rtx, op = ior_optab;
23832 	  else
23833 	    return false;
23834 	}
23835       else if (CONST_INT_P (operands[3]))
23836 	{
23837 	  var = operands[2];
23838 	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
23839 	    operands[2] = constm1_rtx, op = and_optab;
23840 	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
23841 	    operands[2] = const0_rtx, op = ior_optab;
23842 	  else
23843 	    return false;
23844 	}
23845       else
23846         return false;
23847 
23848       orig_out = operands[0];
23849       tmp = gen_reg_rtx (mode);
23850       operands[0] = tmp;
23851 
23852       /* Recurse to get the constant loaded.  */
23853       if (!ix86_expand_int_movcc (operands))
23854         return false;
23855 
23856       /* Mask in the interesting variable.  */
23857       out = expand_binop (mode, op, var, tmp, orig_out, 0,
23858 			  OPTAB_WIDEN);
23859       if (!rtx_equal_p (out, orig_out))
23860 	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
23861 
23862       return true;
23863     }
23864 
23865   /*
23866    * For comparison with above,
23867    *
23868    * movl cf,dest
23869    * movl ct,tmp
23870    * cmpl op1,op2
23871    * cmovcc tmp,dest
23872    *
23873    * Size 15.
23874    */
23875 
23876   if (! nonimmediate_operand (operands[2], mode))
23877     operands[2] = force_reg (mode, operands[2]);
23878   if (! nonimmediate_operand (operands[3], mode))
23879     operands[3] = force_reg (mode, operands[3]);
23880 
23881   if (! register_operand (operands[2], VOIDmode)
23882       && (mode == QImode
23883           || ! register_operand (operands[3], VOIDmode)))
23884     operands[2] = force_reg (mode, operands[2]);
23885 
23886   if (mode == QImode
23887       && ! register_operand (operands[3], VOIDmode))
23888     operands[3] = force_reg (mode, operands[3]);
23889 
23890   emit_insn (compare_seq);
23891   emit_insn (gen_rtx_SET (operands[0],
23892 			  gen_rtx_IF_THEN_ELSE (mode,
23893 						compare_op, operands[2],
23894 						operands[3])));
23895   return true;
23896 }
23897 
23898 /* Swap, force into registers, or otherwise massage the two operands
23899    to an sse comparison with a mask result.  Thus we differ a bit from
23900    ix86_prepare_fp_compare_args which expects to produce a flags result.
23901 
23902    The DEST operand exists to help determine whether to commute commutative
23903    operators.  The POP0/POP1 operands are updated in place.  The new
23904    comparison code is returned, or UNKNOWN if not implementable.  */
23905 
23906 static enum rtx_code
23907 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
23908 				  rtx *pop0, rtx *pop1)
23909 {
23910   switch (code)
23911     {
23912     case LTGT:
23913     case UNEQ:
23914       /* AVX supports all the needed comparisons.  */
23915       if (TARGET_AVX)
23916 	break;
23917       /* We have no LTGT as an operator.  We could implement it with
23918 	 NE & ORDERED, but this requires an extra temporary.  It's
23919 	 not clear that it's worth it.  */
23920       return UNKNOWN;
23921 
23922     case LT:
23923     case LE:
23924     case UNGT:
23925     case UNGE:
23926       /* These are supported directly.  */
23927       break;
23928 
23929     case EQ:
23930     case NE:
23931     case UNORDERED:
23932     case ORDERED:
23933       /* AVX has 3 operand comparisons, no need to swap anything.  */
23934       if (TARGET_AVX)
23935 	break;
23936       /* For commutative operators, try to canonicalize the destination
23937 	 operand to be first in the comparison - this helps reload to
23938 	 avoid extra moves.  */
23939       if (!dest || !rtx_equal_p (dest, *pop1))
23940 	break;
23941       /* FALLTHRU */
23942 
23943     case GE:
23944     case GT:
23945     case UNLE:
23946     case UNLT:
23947       /* These are not supported directly before AVX, and furthermore
23948 	 ix86_expand_sse_fp_minmax only optimizes LT/UNGE.  Swap the
23949 	 comparison operands to transform into something that is
23950 	 supported.  */
23951       std::swap (*pop0, *pop1);
23952       code = swap_condition (code);
23953       break;
23954 
23955     default:
23956       gcc_unreachable ();
23957     }
23958 
23959   return code;
23960 }
23961 
23962 /* Detect conditional moves that exactly match min/max operational
23963    semantics.  Note that this is IEEE safe, as long as we don't
23964    interchange the operands.
23965 
23966    Returns FALSE if this conditional move doesn't match a MIN/MAX,
23967    and TRUE if the operation is successful and instructions are emitted.  */
23968 
23969 static bool
23970 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
23971 			   rtx cmp_op1, rtx if_true, rtx if_false)
23972 {
23973   machine_mode mode;
23974   bool is_min;
23975   rtx tmp;
23976 
23977   if (code == LT)
23978     ;
23979   else if (code == UNGE)
23980     std::swap (if_true, if_false);
23981   else
23982     return false;
23983 
23984   if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
23985     is_min = true;
23986   else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
23987     is_min = false;
23988   else
23989     return false;
23990 
23991   mode = GET_MODE (dest);
23992 
23993   /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
23994      but MODE may be a vector mode and thus not appropriate.  */
23995   if (!flag_finite_math_only || flag_signed_zeros)
23996     {
23997       int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
23998       rtvec v;
23999 
24000       if_true = force_reg (mode, if_true);
24001       v = gen_rtvec (2, if_true, if_false);
24002       tmp = gen_rtx_UNSPEC (mode, v, u);
24003     }
24004   else
24005     {
24006       code = is_min ? SMIN : SMAX;
24007       tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
24008     }
24009 
24010   emit_insn (gen_rtx_SET (dest, tmp));
24011   return true;
24012 }
24013 
24014 /* Expand an sse vector comparison.  Return the register with the result.  */
24015 
24016 static rtx
24017 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
24018 		     rtx op_true, rtx op_false)
24019 {
24020   machine_mode mode = GET_MODE (dest);
24021   machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
24022 
24023   /* In general case result of comparison can differ from operands' type.  */
24024   machine_mode cmp_mode;
24025 
24026   /* In AVX512F the result of comparison is an integer mask.  */
24027   bool maskcmp = false;
24028   rtx x;
24029 
24030   if (GET_MODE_SIZE (cmp_ops_mode) == 64)
24031     {
24032       unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
24033       cmp_mode = int_mode_for_size (nbits, 0).require ();
24034       maskcmp = true;
24035     }
24036   else
24037     cmp_mode = cmp_ops_mode;
24038 
24039 
24040   cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
24041   if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
24042     cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
24043 
24044   if (optimize
24045       || (maskcmp && cmp_mode != mode)
24046       || (op_true && reg_overlap_mentioned_p (dest, op_true))
24047       || (op_false && reg_overlap_mentioned_p (dest, op_false)))
24048     dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
24049 
24050   /* Compare patterns for int modes are unspec in AVX512F only.  */
24051   if (maskcmp && (code == GT || code == EQ))
24052     {
24053       rtx (*gen)(rtx, rtx, rtx);
24054 
24055       switch (cmp_ops_mode)
24056 	{
24057 	case E_V64QImode:
24058 	  gcc_assert (TARGET_AVX512BW);
24059 	  gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
24060 	  break;
24061 	case E_V32HImode:
24062 	  gcc_assert (TARGET_AVX512BW);
24063 	  gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
24064 	  break;
24065 	case E_V16SImode:
24066 	  gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
24067 	  break;
24068 	case E_V8DImode:
24069 	  gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
24070 	  break;
24071 	default:
24072 	  gen = NULL;
24073 	}
24074 
24075       if (gen)
24076 	{
24077 	  emit_insn (gen (dest, cmp_op0, cmp_op1));
24078 	  return dest;
24079 	}
24080     }
24081   x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
24082 
24083   if (cmp_mode != mode && !maskcmp)
24084     {
24085       x = force_reg (cmp_ops_mode, x);
24086       convert_move (dest, x, false);
24087     }
24088   else
24089     emit_insn (gen_rtx_SET (dest, x));
24090 
24091   return dest;
24092 }
24093 
24094 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
24095    operations.  This is used for both scalar and vector conditional moves.  */
24096 
24097 void
24098 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
24099 {
24100   machine_mode mode = GET_MODE (dest);
24101   machine_mode cmpmode = GET_MODE (cmp);
24102 
24103   /* In AVX512F the result of comparison is an integer mask.  */
24104   bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
24105 
24106   rtx t2, t3, x;
24107 
24108   /* If we have an integer mask and FP value then we need
24109      to cast mask to FP mode.  */
24110   if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
24111     {
24112       cmp = force_reg (cmpmode, cmp);
24113       cmp = gen_rtx_SUBREG (mode, cmp, 0);
24114     }
24115 
24116   if (vector_all_ones_operand (op_true, mode)
24117       && rtx_equal_p (op_false, CONST0_RTX (mode))
24118       && !maskcmp)
24119     {
24120       emit_insn (gen_rtx_SET (dest, cmp));
24121     }
24122   else if (op_false == CONST0_RTX (mode)
24123       && !maskcmp)
24124     {
24125       op_true = force_reg (mode, op_true);
24126       x = gen_rtx_AND (mode, cmp, op_true);
24127       emit_insn (gen_rtx_SET (dest, x));
24128     }
24129   else if (op_true == CONST0_RTX (mode)
24130       && !maskcmp)
24131     {
24132       op_false = force_reg (mode, op_false);
24133       x = gen_rtx_NOT (mode, cmp);
24134       x = gen_rtx_AND (mode, x, op_false);
24135       emit_insn (gen_rtx_SET (dest, x));
24136     }
24137   else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
24138       && !maskcmp)
24139     {
24140       op_false = force_reg (mode, op_false);
24141       x = gen_rtx_IOR (mode, cmp, op_false);
24142       emit_insn (gen_rtx_SET (dest, x));
24143     }
24144   else if (TARGET_XOP
24145       && !maskcmp)
24146     {
24147       op_true = force_reg (mode, op_true);
24148 
24149       if (!nonimmediate_operand (op_false, mode))
24150 	op_false = force_reg (mode, op_false);
24151 
24152       emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
24153 							  op_true,
24154 							  op_false)));
24155     }
24156   else
24157     {
24158       rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24159       rtx d = dest;
24160 
24161       if (!nonimmediate_operand (op_true, mode))
24162 	op_true = force_reg (mode, op_true);
24163 
24164       op_false = force_reg (mode, op_false);
24165 
24166       switch (mode)
24167 	{
24168 	case E_V4SFmode:
24169 	  if (TARGET_SSE4_1)
24170 	    gen = gen_sse4_1_blendvps;
24171 	  break;
24172 	case E_V2DFmode:
24173 	  if (TARGET_SSE4_1)
24174 	    gen = gen_sse4_1_blendvpd;
24175 	  break;
24176 	case E_V16QImode:
24177 	case E_V8HImode:
24178 	case E_V4SImode:
24179 	case E_V2DImode:
24180 	  if (TARGET_SSE4_1)
24181 	    {
24182 	      gen = gen_sse4_1_pblendvb;
24183 	      if (mode != V16QImode)
24184 		d = gen_reg_rtx (V16QImode);
24185 	      op_false = gen_lowpart (V16QImode, op_false);
24186 	      op_true = gen_lowpart (V16QImode, op_true);
24187 	      cmp = gen_lowpart (V16QImode, cmp);
24188 	    }
24189 	  break;
24190 	case E_V8SFmode:
24191 	  if (TARGET_AVX)
24192 	    gen = gen_avx_blendvps256;
24193 	  break;
24194 	case E_V4DFmode:
24195 	  if (TARGET_AVX)
24196 	    gen = gen_avx_blendvpd256;
24197 	  break;
24198 	case E_V32QImode:
24199 	case E_V16HImode:
24200 	case E_V8SImode:
24201 	case E_V4DImode:
24202 	  if (TARGET_AVX2)
24203 	    {
24204 	      gen = gen_avx2_pblendvb;
24205 	      if (mode != V32QImode)
24206 		d = gen_reg_rtx (V32QImode);
24207 	      op_false = gen_lowpart (V32QImode, op_false);
24208 	      op_true = gen_lowpart (V32QImode, op_true);
24209 	      cmp = gen_lowpart (V32QImode, cmp);
24210 	    }
24211 	  break;
24212 
24213 	case E_V64QImode:
24214 	  gen = gen_avx512bw_blendmv64qi;
24215 	  break;
24216 	case E_V32HImode:
24217 	  gen = gen_avx512bw_blendmv32hi;
24218 	  break;
24219 	case E_V16SImode:
24220 	  gen = gen_avx512f_blendmv16si;
24221 	  break;
24222 	case E_V8DImode:
24223 	  gen = gen_avx512f_blendmv8di;
24224 	  break;
24225 	case E_V8DFmode:
24226 	  gen = gen_avx512f_blendmv8df;
24227 	  break;
24228 	case E_V16SFmode:
24229 	  gen = gen_avx512f_blendmv16sf;
24230 	  break;
24231 
24232 	default:
24233 	  break;
24234 	}
24235 
24236       if (gen != NULL)
24237 	{
24238 	  emit_insn (gen (d, op_false, op_true, cmp));
24239 	  if (d != dest)
24240 	    emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
24241 	}
24242       else
24243 	{
24244 	  op_true = force_reg (mode, op_true);
24245 
24246 	  t2 = gen_reg_rtx (mode);
24247 	  if (optimize)
24248 	    t3 = gen_reg_rtx (mode);
24249 	  else
24250 	    t3 = dest;
24251 
24252 	  x = gen_rtx_AND (mode, op_true, cmp);
24253 	  emit_insn (gen_rtx_SET (t2, x));
24254 
24255 	  x = gen_rtx_NOT (mode, cmp);
24256 	  x = gen_rtx_AND (mode, x, op_false);
24257 	  emit_insn (gen_rtx_SET (t3, x));
24258 
24259 	  x = gen_rtx_IOR (mode, t3, t2);
24260 	  emit_insn (gen_rtx_SET (dest, x));
24261 	}
24262     }
24263 }
24264 
24265 /* Expand a floating-point conditional move.  Return true if successful.  */
24266 
24267 bool
24268 ix86_expand_fp_movcc (rtx operands[])
24269 {
24270   machine_mode mode = GET_MODE (operands[0]);
24271   enum rtx_code code = GET_CODE (operands[1]);
24272   rtx tmp, compare_op;
24273   rtx op0 = XEXP (operands[1], 0);
24274   rtx op1 = XEXP (operands[1], 1);
24275 
24276   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
24277     {
24278       machine_mode cmode;
24279 
24280       /* Since we've no cmove for sse registers, don't force bad register
24281 	 allocation just to gain access to it.  Deny movcc when the
24282 	 comparison mode doesn't match the move mode.  */
24283       cmode = GET_MODE (op0);
24284       if (cmode == VOIDmode)
24285 	cmode = GET_MODE (op1);
24286       if (cmode != mode)
24287 	return false;
24288 
24289       code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
24290       if (code == UNKNOWN)
24291 	return false;
24292 
24293       if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
24294 				     operands[2], operands[3]))
24295 	return true;
24296 
24297       tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
24298 				 operands[2], operands[3]);
24299       ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
24300       return true;
24301     }
24302 
24303   if (GET_MODE (op0) == TImode
24304       || (GET_MODE (op0) == DImode
24305 	  && !TARGET_64BIT))
24306     return false;
24307 
24308   /* The floating point conditional move instructions don't directly
24309      support conditions resulting from a signed integer comparison.  */
24310 
24311   compare_op = ix86_expand_compare (code, op0, op1);
24312   if (!fcmov_comparison_operator (compare_op, VOIDmode))
24313     {
24314       tmp = gen_reg_rtx (QImode);
24315       ix86_expand_setcc (tmp, code, op0, op1);
24316 
24317       compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
24318     }
24319 
24320   emit_insn (gen_rtx_SET (operands[0],
24321 			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
24322 						operands[2], operands[3])));
24323 
24324   return true;
24325 }
24326 
24327 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes.  */
24328 
24329 static int
24330 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
24331 {
24332   switch (code)
24333     {
24334     case EQ:
24335       return 0;
24336     case LT:
24337     case LTU:
24338       return 1;
24339     case LE:
24340     case LEU:
24341       return 2;
24342     case NE:
24343       return 4;
24344     case GE:
24345     case GEU:
24346       return 5;
24347     case GT:
24348     case GTU:
24349       return 6;
24350     default:
24351       gcc_unreachable ();
24352     }
24353 }
24354 
24355 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes.  */
24356 
24357 static int
24358 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
24359 {
24360   switch (code)
24361     {
24362     case EQ:
24363       return 0x00;
24364     case NE:
24365       return 0x04;
24366     case GT:
24367       return 0x0e;
24368     case LE:
24369       return 0x02;
24370     case GE:
24371       return 0x0d;
24372     case LT:
24373       return 0x01;
24374     case UNLE:
24375       return 0x0a;
24376     case UNLT:
24377       return 0x09;
24378     case UNGE:
24379       return 0x05;
24380     case UNGT:
24381       return 0x06;
24382     case UNEQ:
24383       return 0x18;
24384     case LTGT:
24385       return 0x0c;
24386     case ORDERED:
24387       return 0x07;
24388     case UNORDERED:
24389       return 0x03;
24390     default:
24391       gcc_unreachable ();
24392     }
24393 }
24394 
24395 /* Return immediate value to be used in UNSPEC_PCMP
24396    for comparison CODE in MODE.  */
24397 
24398 static int
24399 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
24400 {
24401   if (FLOAT_MODE_P (mode))
24402     return ix86_fp_cmp_code_to_pcmp_immediate (code);
24403   return ix86_int_cmp_code_to_pcmp_immediate (code);
24404 }
24405 
24406 /* Expand AVX-512 vector comparison.  */
24407 
24408 bool
24409 ix86_expand_mask_vec_cmp (rtx operands[])
24410 {
24411   machine_mode mask_mode = GET_MODE (operands[0]);
24412   machine_mode cmp_mode = GET_MODE (operands[2]);
24413   enum rtx_code code = GET_CODE (operands[1]);
24414   rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
24415   int unspec_code;
24416   rtx unspec;
24417 
24418   switch (code)
24419     {
24420     case LEU:
24421     case GTU:
24422     case GEU:
24423     case LTU:
24424       unspec_code = UNSPEC_UNSIGNED_PCMP;
24425       break;
24426 
24427     default:
24428       unspec_code = UNSPEC_PCMP;
24429     }
24430 
24431   unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
24432 						 operands[3], imm),
24433 			   unspec_code);
24434   emit_insn (gen_rtx_SET (operands[0], unspec));
24435 
24436   return true;
24437 }
24438 
24439 /* Expand fp vector comparison.  */
24440 
24441 bool
24442 ix86_expand_fp_vec_cmp (rtx operands[])
24443 {
24444   enum rtx_code code = GET_CODE (operands[1]);
24445   rtx cmp;
24446 
24447   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
24448 					   &operands[2], &operands[3]);
24449   if (code == UNKNOWN)
24450     {
24451       rtx temp;
24452       switch (GET_CODE (operands[1]))
24453 	{
24454 	case LTGT:
24455 	  temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
24456 				      operands[3], NULL, NULL);
24457 	  cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
24458 				     operands[3], NULL, NULL);
24459 	  code = AND;
24460 	  break;
24461 	case UNEQ:
24462 	  temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
24463 				      operands[3], NULL, NULL);
24464 	  cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
24465 				     operands[3], NULL, NULL);
24466 	  code = IOR;
24467 	  break;
24468 	default:
24469 	  gcc_unreachable ();
24470 	}
24471       cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24472 				 OPTAB_DIRECT);
24473     }
24474   else
24475     cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
24476 			       operands[1], operands[2]);
24477 
24478   if (operands[0] != cmp)
24479     emit_move_insn (operands[0], cmp);
24480 
24481   return true;
24482 }
24483 
24484 static rtx
24485 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
24486 			 rtx op_true, rtx op_false, bool *negate)
24487 {
24488   machine_mode data_mode = GET_MODE (dest);
24489   machine_mode mode = GET_MODE (cop0);
24490   rtx x;
24491 
24492   *negate = false;
24493 
24494   /* XOP supports all of the comparisons on all 128-bit vector int types.  */
24495   if (TARGET_XOP
24496       && (mode == V16QImode || mode == V8HImode
24497 	  || mode == V4SImode || mode == V2DImode))
24498     ;
24499   else
24500     {
24501       /* Canonicalize the comparison to EQ, GT, GTU.  */
24502       switch (code)
24503 	{
24504 	case EQ:
24505 	case GT:
24506 	case GTU:
24507 	  break;
24508 
24509 	case NE:
24510 	case LE:
24511 	case LEU:
24512 	  code = reverse_condition (code);
24513 	  *negate = true;
24514 	  break;
24515 
24516 	case GE:
24517 	case GEU:
24518 	  code = reverse_condition (code);
24519 	  *negate = true;
24520 	  /* FALLTHRU */
24521 
24522 	case LT:
24523 	case LTU:
24524 	  std::swap (cop0, cop1);
24525 	  code = swap_condition (code);
24526 	  break;
24527 
24528 	default:
24529 	  gcc_unreachable ();
24530 	}
24531 
24532       /* Only SSE4.1/SSE4.2 supports V2DImode.  */
24533       if (mode == V2DImode)
24534 	{
24535 	  switch (code)
24536 	    {
24537 	    case EQ:
24538 	      /* SSE4.1 supports EQ.  */
24539 	      if (!TARGET_SSE4_1)
24540 		return NULL;
24541 	      break;
24542 
24543 	    case GT:
24544 	    case GTU:
24545 	      /* SSE4.2 supports GT/GTU.  */
24546 	      if (!TARGET_SSE4_2)
24547 		return NULL;
24548 	      break;
24549 
24550 	    default:
24551 	      gcc_unreachable ();
24552 	    }
24553 	}
24554 
24555       /* Unsigned parallel compare is not supported by the hardware.
24556 	 Play some tricks to turn this into a signed comparison
24557 	 against 0.  */
24558       if (code == GTU)
24559 	{
24560 	  cop0 = force_reg (mode, cop0);
24561 
24562 	  switch (mode)
24563 	    {
24564 	    case E_V16SImode:
24565 	    case E_V8DImode:
24566 	    case E_V8SImode:
24567 	    case E_V4DImode:
24568 	    case E_V4SImode:
24569 	    case E_V2DImode:
24570 		{
24571 		  rtx t1, t2, mask;
24572 		  rtx (*gen_sub3) (rtx, rtx, rtx);
24573 
24574 		  switch (mode)
24575 		    {
24576 		    case E_V16SImode: gen_sub3 = gen_subv16si3; break;
24577 		    case E_V8DImode: gen_sub3 = gen_subv8di3; break;
24578 		    case E_V8SImode: gen_sub3 = gen_subv8si3; break;
24579 		    case E_V4DImode: gen_sub3 = gen_subv4di3; break;
24580 		    case E_V4SImode: gen_sub3 = gen_subv4si3; break;
24581 		    case E_V2DImode: gen_sub3 = gen_subv2di3; break;
24582 		    default:
24583 		      gcc_unreachable ();
24584 		    }
24585 		  /* Subtract (-(INT MAX) - 1) from both operands to make
24586 		     them signed.  */
24587 		  mask = ix86_build_signbit_mask (mode, true, false);
24588 		  t1 = gen_reg_rtx (mode);
24589 		  emit_insn (gen_sub3 (t1, cop0, mask));
24590 
24591 		  t2 = gen_reg_rtx (mode);
24592 		  emit_insn (gen_sub3 (t2, cop1, mask));
24593 
24594 		  cop0 = t1;
24595 		  cop1 = t2;
24596 		  code = GT;
24597 		}
24598 	      break;
24599 
24600 	    case E_V64QImode:
24601 	    case E_V32HImode:
24602 	    case E_V32QImode:
24603 	    case E_V16HImode:
24604 	    case E_V16QImode:
24605 	    case E_V8HImode:
24606 	      /* Perform a parallel unsigned saturating subtraction.  */
24607 	      x = gen_reg_rtx (mode);
24608 	      emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
24609 							   cop1)));
24610 
24611 	      cop0 = x;
24612 	      cop1 = CONST0_RTX (mode);
24613 	      code = EQ;
24614 	      *negate = !*negate;
24615 	      break;
24616 
24617 	    default:
24618 	      gcc_unreachable ();
24619 	    }
24620 	}
24621     }
24622 
24623   if (*negate)
24624     std::swap (op_true, op_false);
24625 
24626   /* Allow the comparison to be done in one mode, but the movcc to
24627      happen in another mode.  */
24628   if (data_mode == mode)
24629     {
24630       x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
24631 			       op_true, op_false);
24632     }
24633   else
24634     {
24635       gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
24636       x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
24637 			       op_true, op_false);
24638       if (GET_MODE (x) == mode)
24639 	x = gen_lowpart (data_mode, x);
24640     }
24641 
24642   return x;
24643 }
24644 
24645 /* Expand integer vector comparison.  */
24646 
24647 bool
24648 ix86_expand_int_vec_cmp (rtx operands[])
24649 {
24650   rtx_code code = GET_CODE (operands[1]);
24651   bool negate = false;
24652   rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
24653 				     operands[3], NULL, NULL, &negate);
24654 
24655   if (!cmp)
24656     return false;
24657 
24658   if (negate)
24659     cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
24660 				   CONST0_RTX (GET_MODE (cmp)),
24661 				   NULL, NULL, &negate);
24662 
24663   gcc_assert (!negate);
24664 
24665   if (operands[0] != cmp)
24666     emit_move_insn (operands[0], cmp);
24667 
24668   return true;
24669 }
24670 
24671 /* Expand a floating-point vector conditional move; a vcond operation
24672    rather than a movcc operation.  */
24673 
24674 bool
24675 ix86_expand_fp_vcond (rtx operands[])
24676 {
24677   enum rtx_code code = GET_CODE (operands[3]);
24678   rtx cmp;
24679 
24680   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
24681 					   &operands[4], &operands[5]);
24682   if (code == UNKNOWN)
24683     {
24684       rtx temp;
24685       switch (GET_CODE (operands[3]))
24686 	{
24687 	case LTGT:
24688 	  temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
24689 				      operands[5], operands[0], operands[0]);
24690 	  cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
24691 				     operands[5], operands[1], operands[2]);
24692 	  code = AND;
24693 	  break;
24694 	case UNEQ:
24695 	  temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
24696 				      operands[5], operands[0], operands[0]);
24697 	  cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
24698 				     operands[5], operands[1], operands[2]);
24699 	  code = IOR;
24700 	  break;
24701 	default:
24702 	  gcc_unreachable ();
24703 	}
24704       cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
24705 				 OPTAB_DIRECT);
24706       ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24707       return true;
24708     }
24709 
24710   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
24711 				 operands[5], operands[1], operands[2]))
24712     return true;
24713 
24714   cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
24715 			     operands[1], operands[2]);
24716   ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
24717   return true;
24718 }
24719 
24720 /* Expand a signed/unsigned integral vector conditional move.  */
24721 
24722 bool
24723 ix86_expand_int_vcond (rtx operands[])
24724 {
24725   machine_mode data_mode = GET_MODE (operands[0]);
24726   machine_mode mode = GET_MODE (operands[4]);
24727   enum rtx_code code = GET_CODE (operands[3]);
24728   bool negate = false;
24729   rtx x, cop0, cop1;
24730 
24731   cop0 = operands[4];
24732   cop1 = operands[5];
24733 
24734   /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
24735      and x < 0 ? 1 : 0 into (unsigned) x >> 31.  */
24736   if ((code == LT || code == GE)
24737       && data_mode == mode
24738       && cop1 == CONST0_RTX (mode)
24739       && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
24740       && GET_MODE_UNIT_SIZE (data_mode) > 1
24741       && GET_MODE_UNIT_SIZE (data_mode) <= 8
24742       && (GET_MODE_SIZE (data_mode) == 16
24743 	  || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
24744     {
24745       rtx negop = operands[2 - (code == LT)];
24746       int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
24747       if (negop == CONST1_RTX (data_mode))
24748 	{
24749 	  rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
24750 					 operands[0], 1, OPTAB_DIRECT);
24751 	  if (res != operands[0])
24752 	    emit_move_insn (operands[0], res);
24753 	  return true;
24754 	}
24755       else if (GET_MODE_INNER (data_mode) != DImode
24756 	       && vector_all_ones_operand (negop, data_mode))
24757 	{
24758 	  rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
24759 					 operands[0], 0, OPTAB_DIRECT);
24760 	  if (res != operands[0])
24761 	    emit_move_insn (operands[0], res);
24762 	  return true;
24763 	}
24764     }
24765 
24766   if (!nonimmediate_operand (cop1, mode))
24767     cop1 = force_reg (mode, cop1);
24768   if (!general_operand (operands[1], data_mode))
24769     operands[1] = force_reg (data_mode, operands[1]);
24770   if (!general_operand (operands[2], data_mode))
24771     operands[2] = force_reg (data_mode, operands[2]);
24772 
24773   x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
24774 			       operands[1], operands[2], &negate);
24775 
24776   if (!x)
24777     return false;
24778 
24779   ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
24780 			 operands[2-negate]);
24781   return true;
24782 }
24783 
24784 /* AVX512F does support 64-byte integer vector operations,
24785    thus the longest vector we are faced with is V64QImode.  */
24786 #define MAX_VECT_LEN	64
24787 
24788 struct expand_vec_perm_d
24789 {
24790   rtx target, op0, op1;
24791   unsigned char perm[MAX_VECT_LEN];
24792   machine_mode vmode;
24793   unsigned char nelt;
24794   bool one_operand_p;
24795   bool testing_p;
24796 };
24797 
24798 static bool
24799 ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1,
24800 			      struct expand_vec_perm_d *d)
24801 {
24802   /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24803      expander, so args are either in d, or in op0, op1 etc.  */
24804   machine_mode mode = GET_MODE (d ? d->op0 : op0);
24805   machine_mode maskmode = mode;
24806   rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
24807 
24808   switch (mode)
24809     {
24810     case E_V8HImode:
24811       if (TARGET_AVX512VL && TARGET_AVX512BW)
24812 	gen = gen_avx512vl_vpermt2varv8hi3;
24813       break;
24814     case E_V16HImode:
24815       if (TARGET_AVX512VL && TARGET_AVX512BW)
24816 	gen = gen_avx512vl_vpermt2varv16hi3;
24817       break;
24818     case E_V64QImode:
24819       if (TARGET_AVX512VBMI)
24820 	gen = gen_avx512bw_vpermt2varv64qi3;
24821       break;
24822     case E_V32HImode:
24823       if (TARGET_AVX512BW)
24824 	gen = gen_avx512bw_vpermt2varv32hi3;
24825       break;
24826     case E_V4SImode:
24827       if (TARGET_AVX512VL)
24828 	gen = gen_avx512vl_vpermt2varv4si3;
24829       break;
24830     case E_V8SImode:
24831       if (TARGET_AVX512VL)
24832 	gen = gen_avx512vl_vpermt2varv8si3;
24833       break;
24834     case E_V16SImode:
24835       if (TARGET_AVX512F)
24836 	gen = gen_avx512f_vpermt2varv16si3;
24837       break;
24838     case E_V4SFmode:
24839       if (TARGET_AVX512VL)
24840 	{
24841 	  gen = gen_avx512vl_vpermt2varv4sf3;
24842 	  maskmode = V4SImode;
24843 	}
24844       break;
24845     case E_V8SFmode:
24846       if (TARGET_AVX512VL)
24847 	{
24848 	  gen = gen_avx512vl_vpermt2varv8sf3;
24849 	  maskmode = V8SImode;
24850 	}
24851       break;
24852     case E_V16SFmode:
24853       if (TARGET_AVX512F)
24854 	{
24855 	  gen = gen_avx512f_vpermt2varv16sf3;
24856 	  maskmode = V16SImode;
24857 	}
24858       break;
24859     case E_V2DImode:
24860       if (TARGET_AVX512VL)
24861 	gen = gen_avx512vl_vpermt2varv2di3;
24862       break;
24863     case E_V4DImode:
24864       if (TARGET_AVX512VL)
24865 	gen = gen_avx512vl_vpermt2varv4di3;
24866       break;
24867     case E_V8DImode:
24868       if (TARGET_AVX512F)
24869 	gen = gen_avx512f_vpermt2varv8di3;
24870       break;
24871     case E_V2DFmode:
24872       if (TARGET_AVX512VL)
24873 	{
24874 	  gen = gen_avx512vl_vpermt2varv2df3;
24875 	  maskmode = V2DImode;
24876 	}
24877       break;
24878     case E_V4DFmode:
24879       if (TARGET_AVX512VL)
24880 	{
24881 	  gen = gen_avx512vl_vpermt2varv4df3;
24882 	  maskmode = V4DImode;
24883 	}
24884       break;
24885     case E_V8DFmode:
24886       if (TARGET_AVX512F)
24887 	{
24888 	  gen = gen_avx512f_vpermt2varv8df3;
24889 	  maskmode = V8DImode;
24890 	}
24891       break;
24892     default:
24893       break;
24894     }
24895 
24896   if (gen == NULL)
24897     return false;
24898 
24899   /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
24900      expander, so args are either in d, or in op0, op1 etc.  */
24901   if (d)
24902     {
24903       rtx vec[64];
24904       target = d->target;
24905       op0 = d->op0;
24906       op1 = d->op1;
24907       for (int i = 0; i < d->nelt; ++i)
24908 	vec[i] = GEN_INT (d->perm[i]);
24909       mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
24910     }
24911 
24912   emit_insn (gen (target, force_reg (maskmode, mask), op0, op1));
24913   return true;
24914 }
24915 
24916 /* Expand a variable vector permutation.  */
24917 
24918 void
24919 ix86_expand_vec_perm (rtx operands[])
24920 {
24921   rtx target = operands[0];
24922   rtx op0 = operands[1];
24923   rtx op1 = operands[2];
24924   rtx mask = operands[3];
24925   rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
24926   machine_mode mode = GET_MODE (op0);
24927   machine_mode maskmode = GET_MODE (mask);
24928   int w, e, i;
24929   bool one_operand_shuffle = rtx_equal_p (op0, op1);
24930 
24931   /* Number of elements in the vector.  */
24932   w = GET_MODE_NUNITS (mode);
24933   e = GET_MODE_UNIT_SIZE (mode);
24934   gcc_assert (w <= 64);
24935 
24936   if (TARGET_AVX512F && one_operand_shuffle)
24937     {
24938       rtx (*gen) (rtx, rtx, rtx) = NULL;
24939       switch (mode)
24940 	{
24941 	case E_V16SImode:
24942 	  gen =gen_avx512f_permvarv16si;
24943 	  break;
24944 	case E_V16SFmode:
24945 	  gen = gen_avx512f_permvarv16sf;
24946 	  break;
24947 	case E_V8DImode:
24948 	  gen = gen_avx512f_permvarv8di;
24949 	  break;
24950 	case E_V8DFmode:
24951 	  gen = gen_avx512f_permvarv8df;
24952 	  break;
24953 	default:
24954 	  break;
24955 	}
24956       if (gen != NULL)
24957 	{
24958 	  emit_insn (gen (target, op0, mask));
24959 	  return;
24960 	}
24961     }
24962 
24963   if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL))
24964     return;
24965 
24966   if (TARGET_AVX2)
24967     {
24968       if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
24969 	{
24970 	  /* Unfortunately, the VPERMQ and VPERMPD instructions only support
24971 	     an constant shuffle operand.  With a tiny bit of effort we can
24972 	     use VPERMD instead.  A re-interpretation stall for V4DFmode is
24973 	     unfortunate but there's no avoiding it.
24974 	     Similarly for V16HImode we don't have instructions for variable
24975 	     shuffling, while for V32QImode we can use after preparing suitable
24976 	     masks vpshufb; vpshufb; vpermq; vpor.  */
24977 
24978 	  if (mode == V16HImode)
24979 	    {
24980 	      maskmode = mode = V32QImode;
24981 	      w = 32;
24982 	      e = 1;
24983 	    }
24984 	  else
24985 	    {
24986 	      maskmode = mode = V8SImode;
24987 	      w = 8;
24988 	      e = 4;
24989 	    }
24990 	  t1 = gen_reg_rtx (maskmode);
24991 
24992 	  /* Replicate the low bits of the V4DImode mask into V8SImode:
24993 	       mask = { A B C D }
24994 	       t1 = { A A B B C C D D }.  */
24995 	  for (i = 0; i < w / 2; ++i)
24996 	    vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
24997 	  vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
24998 	  vt = force_reg (maskmode, vt);
24999 	  mask = gen_lowpart (maskmode, mask);
25000 	  if (maskmode == V8SImode)
25001 	    emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
25002 	  else
25003 	    emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
25004 
25005 	  /* Multiply the shuffle indicies by two.  */
25006 	  t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
25007 				    OPTAB_DIRECT);
25008 
25009 	  /* Add one to the odd shuffle indicies:
25010 		t1 = { A*2, A*2+1, B*2, B*2+1, ... }.  */
25011 	  for (i = 0; i < w / 2; ++i)
25012 	    {
25013 	      vec[i * 2] = const0_rtx;
25014 	      vec[i * 2 + 1] = const1_rtx;
25015 	    }
25016 	  vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
25017 	  vt = validize_mem (force_const_mem (maskmode, vt));
25018 	  t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
25019 				    OPTAB_DIRECT);
25020 
25021 	  /* Continue as if V8SImode (resp. V32QImode) was used initially.  */
25022 	  operands[3] = mask = t1;
25023 	  target = gen_reg_rtx (mode);
25024 	  op0 = gen_lowpart (mode, op0);
25025 	  op1 = gen_lowpart (mode, op1);
25026 	}
25027 
25028       switch (mode)
25029 	{
25030 	case E_V8SImode:
25031 	  /* The VPERMD and VPERMPS instructions already properly ignore
25032 	     the high bits of the shuffle elements.  No need for us to
25033 	     perform an AND ourselves.  */
25034 	  if (one_operand_shuffle)
25035 	    {
25036 	      emit_insn (gen_avx2_permvarv8si (target, op0, mask));
25037 	      if (target != operands[0])
25038 		emit_move_insn (operands[0],
25039 				gen_lowpart (GET_MODE (operands[0]), target));
25040 	    }
25041 	  else
25042 	    {
25043 	      t1 = gen_reg_rtx (V8SImode);
25044 	      t2 = gen_reg_rtx (V8SImode);
25045 	      emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
25046 	      emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
25047 	      goto merge_two;
25048 	    }
25049 	  return;
25050 
25051 	case E_V8SFmode:
25052 	  mask = gen_lowpart (V8SImode, mask);
25053 	  if (one_operand_shuffle)
25054 	    emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
25055 	  else
25056 	    {
25057 	      t1 = gen_reg_rtx (V8SFmode);
25058 	      t2 = gen_reg_rtx (V8SFmode);
25059 	      emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
25060 	      emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
25061 	      goto merge_two;
25062 	    }
25063 	  return;
25064 
25065         case E_V4SImode:
25066 	  /* By combining the two 128-bit input vectors into one 256-bit
25067 	     input vector, we can use VPERMD and VPERMPS for the full
25068 	     two-operand shuffle.  */
25069 	  t1 = gen_reg_rtx (V8SImode);
25070 	  t2 = gen_reg_rtx (V8SImode);
25071 	  emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
25072 	  emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
25073 	  emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
25074 	  emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
25075 	  return;
25076 
25077         case E_V4SFmode:
25078 	  t1 = gen_reg_rtx (V8SFmode);
25079 	  t2 = gen_reg_rtx (V8SImode);
25080 	  mask = gen_lowpart (V4SImode, mask);
25081 	  emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
25082 	  emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
25083 	  emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
25084 	  emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
25085 	  return;
25086 
25087 	case E_V32QImode:
25088 	  t1 = gen_reg_rtx (V32QImode);
25089 	  t2 = gen_reg_rtx (V32QImode);
25090 	  t3 = gen_reg_rtx (V32QImode);
25091 	  vt2 = GEN_INT (-128);
25092 	  vt = gen_const_vec_duplicate (V32QImode, vt2);
25093 	  vt = force_reg (V32QImode, vt);
25094 	  for (i = 0; i < 32; i++)
25095 	    vec[i] = i < 16 ? vt2 : const0_rtx;
25096 	  vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
25097 	  vt2 = force_reg (V32QImode, vt2);
25098 	  /* From mask create two adjusted masks, which contain the same
25099 	     bits as mask in the low 7 bits of each vector element.
25100 	     The first mask will have the most significant bit clear
25101 	     if it requests element from the same 128-bit lane
25102 	     and MSB set if it requests element from the other 128-bit lane.
25103 	     The second mask will have the opposite values of the MSB,
25104 	     and additionally will have its 128-bit lanes swapped.
25105 	     E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
25106 	     t1   { 07 92 9e 09 ... | 17 19 85 1f ... } and
25107 	     t3   { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
25108 	     stands for other 12 bytes.  */
25109 	  /* The bit whether element is from the same lane or the other
25110 	     lane is bit 4, so shift it up by 3 to the MSB position.  */
25111 	  t5 = gen_reg_rtx (V4DImode);
25112 	  emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
25113 				    GEN_INT (3)));
25114 	  /* Clear MSB bits from the mask just in case it had them set.  */
25115 	  emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
25116 	  /* After this t1 will have MSB set for elements from other lane.  */
25117 	  emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
25118 	  /* Clear bits other than MSB.  */
25119 	  emit_insn (gen_andv32qi3 (t1, t1, vt));
25120 	  /* Or in the lower bits from mask into t3.  */
25121 	  emit_insn (gen_iorv32qi3 (t3, t1, t2));
25122 	  /* And invert MSB bits in t1, so MSB is set for elements from the same
25123 	     lane.  */
25124 	  emit_insn (gen_xorv32qi3 (t1, t1, vt));
25125 	  /* Swap 128-bit lanes in t3.  */
25126 	  t6 = gen_reg_rtx (V4DImode);
25127 	  emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
25128 					  const2_rtx, GEN_INT (3),
25129 					  const0_rtx, const1_rtx));
25130 	  /* And or in the lower bits from mask into t1.  */
25131 	  emit_insn (gen_iorv32qi3 (t1, t1, t2));
25132 	  if (one_operand_shuffle)
25133 	    {
25134 	      /* Each of these shuffles will put 0s in places where
25135 		 element from the other 128-bit lane is needed, otherwise
25136 		 will shuffle in the requested value.  */
25137 	      emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
25138 						gen_lowpart (V32QImode, t6)));
25139 	      emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
25140 	      /* For t3 the 128-bit lanes are swapped again.  */
25141 	      t7 = gen_reg_rtx (V4DImode);
25142 	      emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
25143 					      const2_rtx, GEN_INT (3),
25144 					      const0_rtx, const1_rtx));
25145 	      /* And oring both together leads to the result.  */
25146 	      emit_insn (gen_iorv32qi3 (target, t1,
25147 					gen_lowpart (V32QImode, t7)));
25148 	      if (target != operands[0])
25149 		emit_move_insn (operands[0],
25150 				gen_lowpart (GET_MODE (operands[0]), target));
25151 	      return;
25152 	    }
25153 
25154 	  t4 = gen_reg_rtx (V32QImode);
25155 	  /* Similarly to the above one_operand_shuffle code,
25156 	     just for repeated twice for each operand.  merge_two:
25157 	     code will merge the two results together.  */
25158 	  emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
25159 					    gen_lowpart (V32QImode, t6)));
25160 	  emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
25161 					    gen_lowpart (V32QImode, t6)));
25162 	  emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
25163 	  emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
25164 	  t7 = gen_reg_rtx (V4DImode);
25165 	  emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
25166 					  const2_rtx, GEN_INT (3),
25167 					  const0_rtx, const1_rtx));
25168 	  t8 = gen_reg_rtx (V4DImode);
25169 	  emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
25170 					  const2_rtx, GEN_INT (3),
25171 					  const0_rtx, const1_rtx));
25172 	  emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
25173 	  emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
25174 	  t1 = t4;
25175 	  t2 = t3;
25176 	  goto merge_two;
25177 
25178 	default:
25179 	  gcc_assert (GET_MODE_SIZE (mode) <= 16);
25180 	  break;
25181 	}
25182     }
25183 
25184   if (TARGET_XOP)
25185     {
25186       /* The XOP VPPERM insn supports three inputs.  By ignoring the
25187 	 one_operand_shuffle special case, we avoid creating another
25188 	 set of constant vectors in memory.  */
25189       one_operand_shuffle = false;
25190 
25191       /* mask = mask & {2*w-1, ...} */
25192       vt = GEN_INT (2*w - 1);
25193     }
25194   else
25195     {
25196       /* mask = mask & {w-1, ...} */
25197       vt = GEN_INT (w - 1);
25198     }
25199 
25200   vt = gen_const_vec_duplicate (maskmode, vt);
25201   mask = expand_simple_binop (maskmode, AND, mask, vt,
25202 			      NULL_RTX, 0, OPTAB_DIRECT);
25203 
25204   /* For non-QImode operations, convert the word permutation control
25205      into a byte permutation control.  */
25206   if (mode != V16QImode)
25207     {
25208       mask = expand_simple_binop (maskmode, ASHIFT, mask,
25209 				  GEN_INT (exact_log2 (e)),
25210 				  NULL_RTX, 0, OPTAB_DIRECT);
25211 
25212       /* Convert mask to vector of chars.  */
25213       mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
25214 
25215       /* Replicate each of the input bytes into byte positions:
25216 	 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
25217 	 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
25218 	 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}.  */
25219       for (i = 0; i < 16; ++i)
25220 	vec[i] = GEN_INT (i/e * e);
25221       vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
25222       vt = validize_mem (force_const_mem (V16QImode, vt));
25223       if (TARGET_XOP)
25224 	emit_insn (gen_xop_pperm (mask, mask, mask, vt));
25225       else
25226 	emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
25227 
25228       /* Convert it into the byte positions by doing
25229 	 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...}  */
25230       for (i = 0; i < 16; ++i)
25231 	vec[i] = GEN_INT (i % e);
25232       vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
25233       vt = validize_mem (force_const_mem (V16QImode, vt));
25234       emit_insn (gen_addv16qi3 (mask, mask, vt));
25235     }
25236 
25237   /* The actual shuffle operations all operate on V16QImode.  */
25238   op0 = gen_lowpart (V16QImode, op0);
25239   op1 = gen_lowpart (V16QImode, op1);
25240 
25241   if (TARGET_XOP)
25242     {
25243       if (GET_MODE (target) != V16QImode)
25244 	target = gen_reg_rtx (V16QImode);
25245       emit_insn (gen_xop_pperm (target, op0, op1, mask));
25246       if (target != operands[0])
25247 	emit_move_insn (operands[0],
25248 			gen_lowpart (GET_MODE (operands[0]), target));
25249     }
25250   else if (one_operand_shuffle)
25251     {
25252       if (GET_MODE (target) != V16QImode)
25253 	target = gen_reg_rtx (V16QImode);
25254       emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
25255       if (target != operands[0])
25256 	emit_move_insn (operands[0],
25257 			gen_lowpart (GET_MODE (operands[0]), target));
25258     }
25259   else
25260     {
25261       rtx xops[6];
25262       bool ok;
25263 
25264       /* Shuffle the two input vectors independently.  */
25265       t1 = gen_reg_rtx (V16QImode);
25266       t2 = gen_reg_rtx (V16QImode);
25267       emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
25268       emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
25269 
25270  merge_two:
25271       /* Then merge them together.  The key is whether any given control
25272          element contained a bit set that indicates the second word.  */
25273       mask = operands[3];
25274       vt = GEN_INT (w);
25275       if (maskmode == V2DImode && !TARGET_SSE4_1)
25276 	{
25277 	  /* Without SSE4.1, we don't have V2DImode EQ.  Perform one
25278 	     more shuffle to convert the V2DI input mask into a V4SI
25279 	     input mask.  At which point the masking that expand_int_vcond
25280 	     will work as desired.  */
25281 	  rtx t3 = gen_reg_rtx (V4SImode);
25282 	  emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
25283 				        const0_rtx, const0_rtx,
25284 				        const2_rtx, const2_rtx));
25285 	  mask = t3;
25286 	  maskmode = V4SImode;
25287 	  e = w = 4;
25288 	}
25289 
25290       vt = gen_const_vec_duplicate (maskmode, vt);
25291       vt = force_reg (maskmode, vt);
25292       mask = expand_simple_binop (maskmode, AND, mask, vt,
25293 				  NULL_RTX, 0, OPTAB_DIRECT);
25294 
25295       if (GET_MODE (target) != mode)
25296 	target = gen_reg_rtx (mode);
25297       xops[0] = target;
25298       xops[1] = gen_lowpart (mode, t2);
25299       xops[2] = gen_lowpart (mode, t1);
25300       xops[3] = gen_rtx_EQ (maskmode, mask, vt);
25301       xops[4] = mask;
25302       xops[5] = vt;
25303       ok = ix86_expand_int_vcond (xops);
25304       gcc_assert (ok);
25305       if (target != operands[0])
25306 	emit_move_insn (operands[0],
25307 			gen_lowpart (GET_MODE (operands[0]), target));
25308     }
25309 }
25310 
25311 /* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
25312    true if we should do zero extension, else sign extension.  HIGH_P is
25313    true if we want the N/2 high elements, else the low elements.  */
25314 
25315 void
25316 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
25317 {
25318   machine_mode imode = GET_MODE (src);
25319   rtx tmp;
25320 
25321   if (TARGET_SSE4_1)
25322     {
25323       rtx (*unpack)(rtx, rtx);
25324       rtx (*extract)(rtx, rtx) = NULL;
25325       machine_mode halfmode = BLKmode;
25326 
25327       switch (imode)
25328 	{
25329 	case E_V64QImode:
25330 	  if (unsigned_p)
25331 	    unpack = gen_avx512bw_zero_extendv32qiv32hi2;
25332 	  else
25333 	    unpack = gen_avx512bw_sign_extendv32qiv32hi2;
25334 	  halfmode = V32QImode;
25335 	  extract
25336 	    = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
25337 	  break;
25338 	case E_V32QImode:
25339 	  if (unsigned_p)
25340 	    unpack = gen_avx2_zero_extendv16qiv16hi2;
25341 	  else
25342 	    unpack = gen_avx2_sign_extendv16qiv16hi2;
25343 	  halfmode = V16QImode;
25344 	  extract
25345 	    = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
25346 	  break;
25347 	case E_V32HImode:
25348 	  if (unsigned_p)
25349 	    unpack = gen_avx512f_zero_extendv16hiv16si2;
25350 	  else
25351 	    unpack = gen_avx512f_sign_extendv16hiv16si2;
25352 	  halfmode = V16HImode;
25353 	  extract
25354 	    = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
25355 	  break;
25356 	case E_V16HImode:
25357 	  if (unsigned_p)
25358 	    unpack = gen_avx2_zero_extendv8hiv8si2;
25359 	  else
25360 	    unpack = gen_avx2_sign_extendv8hiv8si2;
25361 	  halfmode = V8HImode;
25362 	  extract
25363 	    = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
25364 	  break;
25365 	case E_V16SImode:
25366 	  if (unsigned_p)
25367 	    unpack = gen_avx512f_zero_extendv8siv8di2;
25368 	  else
25369 	    unpack = gen_avx512f_sign_extendv8siv8di2;
25370 	  halfmode = V8SImode;
25371 	  extract
25372 	    = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
25373 	  break;
25374 	case E_V8SImode:
25375 	  if (unsigned_p)
25376 	    unpack = gen_avx2_zero_extendv4siv4di2;
25377 	  else
25378 	    unpack = gen_avx2_sign_extendv4siv4di2;
25379 	  halfmode = V4SImode;
25380 	  extract
25381 	    = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
25382 	  break;
25383 	case E_V16QImode:
25384 	  if (unsigned_p)
25385 	    unpack = gen_sse4_1_zero_extendv8qiv8hi2;
25386 	  else
25387 	    unpack = gen_sse4_1_sign_extendv8qiv8hi2;
25388 	  break;
25389 	case E_V8HImode:
25390 	  if (unsigned_p)
25391 	    unpack = gen_sse4_1_zero_extendv4hiv4si2;
25392 	  else
25393 	    unpack = gen_sse4_1_sign_extendv4hiv4si2;
25394 	  break;
25395 	case E_V4SImode:
25396 	  if (unsigned_p)
25397 	    unpack = gen_sse4_1_zero_extendv2siv2di2;
25398 	  else
25399 	    unpack = gen_sse4_1_sign_extendv2siv2di2;
25400 	  break;
25401 	default:
25402 	  gcc_unreachable ();
25403 	}
25404 
25405       if (GET_MODE_SIZE (imode) >= 32)
25406 	{
25407 	  tmp = gen_reg_rtx (halfmode);
25408 	  emit_insn (extract (tmp, src));
25409 	}
25410       else if (high_p)
25411 	{
25412 	  /* Shift higher 8 bytes to lower 8 bytes.  */
25413 	  tmp = gen_reg_rtx (V1TImode);
25414 	  emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
25415 					 GEN_INT (64)));
25416 	  tmp = gen_lowpart (imode, tmp);
25417 	}
25418       else
25419 	tmp = src;
25420 
25421       emit_insn (unpack (dest, tmp));
25422     }
25423   else
25424     {
25425       rtx (*unpack)(rtx, rtx, rtx);
25426 
25427       switch (imode)
25428 	{
25429 	case E_V16QImode:
25430 	  if (high_p)
25431 	    unpack = gen_vec_interleave_highv16qi;
25432 	  else
25433 	    unpack = gen_vec_interleave_lowv16qi;
25434 	  break;
25435 	case E_V8HImode:
25436 	  if (high_p)
25437 	    unpack = gen_vec_interleave_highv8hi;
25438 	  else
25439 	    unpack = gen_vec_interleave_lowv8hi;
25440 	  break;
25441 	case E_V4SImode:
25442 	  if (high_p)
25443 	    unpack = gen_vec_interleave_highv4si;
25444 	  else
25445 	    unpack = gen_vec_interleave_lowv4si;
25446 	  break;
25447 	default:
25448 	  gcc_unreachable ();
25449 	}
25450 
25451       if (unsigned_p)
25452 	tmp = force_reg (imode, CONST0_RTX (imode));
25453       else
25454 	tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
25455 				   src, pc_rtx, pc_rtx);
25456 
25457       rtx tmp2 = gen_reg_rtx (imode);
25458       emit_insn (unpack (tmp2, src, tmp));
25459       emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
25460     }
25461 }
25462 
25463 /* Expand conditional increment or decrement using adb/sbb instructions.
25464    The default case using setcc followed by the conditional move can be
25465    done by generic code.  */
25466 bool
25467 ix86_expand_int_addcc (rtx operands[])
25468 {
25469   enum rtx_code code = GET_CODE (operands[1]);
25470   rtx flags;
25471   rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
25472   rtx compare_op;
25473   rtx val = const0_rtx;
25474   bool fpcmp = false;
25475   machine_mode mode;
25476   rtx op0 = XEXP (operands[1], 0);
25477   rtx op1 = XEXP (operands[1], 1);
25478 
25479   if (operands[3] != const1_rtx
25480       && operands[3] != constm1_rtx)
25481     return false;
25482   if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
25483      return false;
25484   code = GET_CODE (compare_op);
25485 
25486   flags = XEXP (compare_op, 0);
25487 
25488   if (GET_MODE (flags) == CCFPmode)
25489     {
25490       fpcmp = true;
25491       code = ix86_fp_compare_code_to_integer (code);
25492     }
25493 
25494   if (code != LTU)
25495     {
25496       val = constm1_rtx;
25497       if (fpcmp)
25498 	PUT_CODE (compare_op,
25499 		  reverse_condition_maybe_unordered
25500 		    (GET_CODE (compare_op)));
25501       else
25502 	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
25503     }
25504 
25505   mode = GET_MODE (operands[0]);
25506 
25507   /* Construct either adc or sbb insn.  */
25508   if ((code == LTU) == (operands[3] == constm1_rtx))
25509     {
25510       switch (mode)
25511 	{
25512 	  case E_QImode:
25513 	    insn = gen_subqi3_carry;
25514 	    break;
25515 	  case E_HImode:
25516 	    insn = gen_subhi3_carry;
25517 	    break;
25518 	  case E_SImode:
25519 	    insn = gen_subsi3_carry;
25520 	    break;
25521 	  case E_DImode:
25522 	    insn = gen_subdi3_carry;
25523 	    break;
25524 	  default:
25525 	    gcc_unreachable ();
25526 	}
25527     }
25528   else
25529     {
25530       switch (mode)
25531 	{
25532 	  case E_QImode:
25533 	    insn = gen_addqi3_carry;
25534 	    break;
25535 	  case E_HImode:
25536 	    insn = gen_addhi3_carry;
25537 	    break;
25538 	  case E_SImode:
25539 	    insn = gen_addsi3_carry;
25540 	    break;
25541 	  case E_DImode:
25542 	    insn = gen_adddi3_carry;
25543 	    break;
25544 	  default:
25545 	    gcc_unreachable ();
25546 	}
25547     }
25548   emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
25549 
25550   return true;
25551 }
25552 
25553 
25554 /* Split operands 0 and 1 into half-mode parts.  Similar to split_double_mode,
25555    but works for floating pointer parameters and nonoffsetable memories.
25556    For pushes, it returns just stack offsets; the values will be saved
25557    in the right order.  Maximally three parts are generated.  */
25558 
25559 static int
25560 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
25561 {
25562   int size;
25563 
25564   if (!TARGET_64BIT)
25565     size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
25566   else
25567     size = (GET_MODE_SIZE (mode) + 4) / 8;
25568 
25569   gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
25570   gcc_assert (size >= 2 && size <= 4);
25571 
25572   /* Optimize constant pool reference to immediates.  This is used by fp
25573      moves, that force all constants to memory to allow combining.  */
25574   if (MEM_P (operand) && MEM_READONLY_P (operand))
25575     operand = avoid_constant_pool_reference (operand);
25576 
25577   if (MEM_P (operand) && !offsettable_memref_p (operand))
25578     {
25579       /* The only non-offsetable memories we handle are pushes.  */
25580       int ok = push_operand (operand, VOIDmode);
25581 
25582       gcc_assert (ok);
25583 
25584       operand = copy_rtx (operand);
25585       PUT_MODE (operand, word_mode);
25586       parts[0] = parts[1] = parts[2] = parts[3] = operand;
25587       return size;
25588     }
25589 
25590   if (GET_CODE (operand) == CONST_VECTOR)
25591     {
25592       scalar_int_mode imode = int_mode_for_mode (mode).require ();
25593       /* Caution: if we looked through a constant pool memory above,
25594 	 the operand may actually have a different mode now.  That's
25595 	 ok, since we want to pun this all the way back to an integer.  */
25596       operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
25597       gcc_assert (operand != NULL);
25598       mode = imode;
25599     }
25600 
25601   if (!TARGET_64BIT)
25602     {
25603       if (mode == DImode)
25604 	split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25605       else
25606 	{
25607 	  int i;
25608 
25609 	  if (REG_P (operand))
25610 	    {
25611 	      gcc_assert (reload_completed);
25612 	      for (i = 0; i < size; i++)
25613 		parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
25614 	    }
25615 	  else if (offsettable_memref_p (operand))
25616 	    {
25617 	      operand = adjust_address (operand, SImode, 0);
25618 	      parts[0] = operand;
25619 	      for (i = 1; i < size; i++)
25620 		parts[i] = adjust_address (operand, SImode, 4 * i);
25621 	    }
25622 	  else if (CONST_DOUBLE_P (operand))
25623 	    {
25624 	      const REAL_VALUE_TYPE *r;
25625 	      long l[4];
25626 
25627 	      r = CONST_DOUBLE_REAL_VALUE (operand);
25628 	      switch (mode)
25629 		{
25630 		case E_TFmode:
25631 		  real_to_target (l, r, mode);
25632 		  parts[3] = gen_int_mode (l[3], SImode);
25633 		  parts[2] = gen_int_mode (l[2], SImode);
25634 		  break;
25635 		case E_XFmode:
25636 		  /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
25637 		     long double may not be 80-bit.  */
25638 		  real_to_target (l, r, mode);
25639 		  parts[2] = gen_int_mode (l[2], SImode);
25640 		  break;
25641 		case E_DFmode:
25642 		  REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
25643 		  break;
25644 		default:
25645 		  gcc_unreachable ();
25646 		}
25647 	      parts[1] = gen_int_mode (l[1], SImode);
25648 	      parts[0] = gen_int_mode (l[0], SImode);
25649 	    }
25650 	  else
25651 	    gcc_unreachable ();
25652 	}
25653     }
25654   else
25655     {
25656       if (mode == TImode)
25657 	split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
25658       if (mode == XFmode || mode == TFmode)
25659 	{
25660 	  machine_mode upper_mode = mode==XFmode ? SImode : DImode;
25661 	  if (REG_P (operand))
25662 	    {
25663 	      gcc_assert (reload_completed);
25664 	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
25665 	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
25666 	    }
25667 	  else if (offsettable_memref_p (operand))
25668 	    {
25669 	      operand = adjust_address (operand, DImode, 0);
25670 	      parts[0] = operand;
25671 	      parts[1] = adjust_address (operand, upper_mode, 8);
25672 	    }
25673 	  else if (CONST_DOUBLE_P (operand))
25674 	    {
25675 	      long l[4];
25676 
25677 	      real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
25678 
25679 	      /* real_to_target puts 32-bit pieces in each long.  */
25680 	      parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff))
25681 				       | ((l[1] & HOST_WIDE_INT_C (0xffffffff))
25682 					  << 32), DImode);
25683 
25684 	      if (upper_mode == SImode)
25685 	        parts[1] = gen_int_mode (l[2], SImode);
25686 	      else
25687 	        parts[1]
25688 		  = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff))
25689 				  | ((l[3] & HOST_WIDE_INT_C (0xffffffff))
25690 				     << 32), DImode);
25691 	    }
25692 	  else
25693 	    gcc_unreachable ();
25694 	}
25695     }
25696 
25697   return size;
25698 }
25699 
25700 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
25701    Return false when normal moves are needed; true when all required
25702    insns have been emitted.  Operands 2-4 contain the input values
25703    int the correct order; operands 5-7 contain the output values.  */
25704 
25705 void
25706 ix86_split_long_move (rtx operands[])
25707 {
25708   rtx part[2][4];
25709   int nparts, i, j;
25710   int push = 0;
25711   int collisions = 0;
25712   machine_mode mode = GET_MODE (operands[0]);
25713   bool collisionparts[4];
25714 
25715   /* The DFmode expanders may ask us to move double.
25716      For 64bit target this is single move.  By hiding the fact
25717      here we simplify i386.md splitters.  */
25718   if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
25719     {
25720       /* Optimize constant pool reference to immediates.  This is used by
25721 	 fp moves, that force all constants to memory to allow combining.  */
25722 
25723       if (MEM_P (operands[1])
25724 	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
25725 	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
25726 	operands[1] = get_pool_constant (XEXP (operands[1], 0));
25727       if (push_operand (operands[0], VOIDmode))
25728 	{
25729 	  operands[0] = copy_rtx (operands[0]);
25730 	  PUT_MODE (operands[0], word_mode);
25731 	}
25732       else
25733         operands[0] = gen_lowpart (DImode, operands[0]);
25734       operands[1] = gen_lowpart (DImode, operands[1]);
25735       emit_move_insn (operands[0], operands[1]);
25736       return;
25737     }
25738 
25739   /* The only non-offsettable memory we handle is push.  */
25740   if (push_operand (operands[0], VOIDmode))
25741     push = 1;
25742   else
25743     gcc_assert (!MEM_P (operands[0])
25744 		|| offsettable_memref_p (operands[0]));
25745 
25746   nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
25747   ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
25748 
25749   /* When emitting push, take care for source operands on the stack.  */
25750   if (push && MEM_P (operands[1])
25751       && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
25752     {
25753       rtx src_base = XEXP (part[1][nparts - 1], 0);
25754 
25755       /* Compensate for the stack decrement by 4.  */
25756       if (!TARGET_64BIT && nparts == 3
25757 	  && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
25758 	src_base = plus_constant (Pmode, src_base, 4);
25759 
25760       /* src_base refers to the stack pointer and is
25761 	 automatically decreased by emitted push.  */
25762       for (i = 0; i < nparts; i++)
25763 	part[1][i] = change_address (part[1][i],
25764 				     GET_MODE (part[1][i]), src_base);
25765     }
25766 
25767   /* We need to do copy in the right order in case an address register
25768      of the source overlaps the destination.  */
25769   if (REG_P (part[0][0]) && MEM_P (part[1][0]))
25770     {
25771       rtx tmp;
25772 
25773       for (i = 0; i < nparts; i++)
25774 	{
25775 	  collisionparts[i]
25776 	    = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
25777 	  if (collisionparts[i])
25778 	    collisions++;
25779 	}
25780 
25781       /* Collision in the middle part can be handled by reordering.  */
25782       if (collisions == 1 && nparts == 3 && collisionparts [1])
25783 	{
25784 	  std::swap (part[0][1], part[0][2]);
25785 	  std::swap (part[1][1], part[1][2]);
25786 	}
25787       else if (collisions == 1
25788 	       && nparts == 4
25789 	       && (collisionparts [1] || collisionparts [2]))
25790 	{
25791 	  if (collisionparts [1])
25792 	    {
25793 	      std::swap (part[0][1], part[0][2]);
25794 	      std::swap (part[1][1], part[1][2]);
25795 	    }
25796 	  else
25797 	    {
25798 	      std::swap (part[0][2], part[0][3]);
25799 	      std::swap (part[1][2], part[1][3]);
25800 	    }
25801 	}
25802 
25803       /* If there are more collisions, we can't handle it by reordering.
25804 	 Do an lea to the last part and use only one colliding move.  */
25805       else if (collisions > 1)
25806 	{
25807 	  rtx base, addr;
25808 
25809 	  collisions = 1;
25810 
25811 	  base = part[0][nparts - 1];
25812 
25813 	  /* Handle the case when the last part isn't valid for lea.
25814 	     Happens in 64-bit mode storing the 12-byte XFmode.  */
25815 	  if (GET_MODE (base) != Pmode)
25816 	    base = gen_rtx_REG (Pmode, REGNO (base));
25817 
25818 	  addr = XEXP (part[1][0], 0);
25819 	  if (TARGET_TLS_DIRECT_SEG_REFS)
25820 	    {
25821 	      struct ix86_address parts;
25822 	      int ok = ix86_decompose_address (addr, &parts);
25823 	      gcc_assert (ok);
25824 	      /* It is not valid to use %gs: or %fs: in lea.  */
25825 	      gcc_assert (parts.seg == ADDR_SPACE_GENERIC);
25826 	    }
25827 	  emit_insn (gen_rtx_SET (base, addr));
25828 	  part[1][0] = replace_equiv_address (part[1][0], base);
25829 	  for (i = 1; i < nparts; i++)
25830 	    {
25831 	      tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
25832 	      part[1][i] = replace_equiv_address (part[1][i], tmp);
25833 	    }
25834 	}
25835     }
25836 
25837   if (push)
25838     {
25839       if (!TARGET_64BIT)
25840 	{
25841 	  if (nparts == 3)
25842 	    {
25843 	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
25844                 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
25845 					  stack_pointer_rtx, GEN_INT (-4)));
25846 	      emit_move_insn (part[0][2], part[1][2]);
25847 	    }
25848 	  else if (nparts == 4)
25849 	    {
25850 	      emit_move_insn (part[0][3], part[1][3]);
25851 	      emit_move_insn (part[0][2], part[1][2]);
25852 	    }
25853 	}
25854       else
25855 	{
25856 	  /* In 64bit mode we don't have 32bit push available.  In case this is
25857 	     register, it is OK - we will just use larger counterpart.  We also
25858 	     retype memory - these comes from attempt to avoid REX prefix on
25859 	     moving of second half of TFmode value.  */
25860 	  if (GET_MODE (part[1][1]) == SImode)
25861 	    {
25862 	      switch (GET_CODE (part[1][1]))
25863 		{
25864 		case MEM:
25865 		  part[1][1] = adjust_address (part[1][1], DImode, 0);
25866 		  break;
25867 
25868 		case REG:
25869 		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
25870 		  break;
25871 
25872 		default:
25873 		  gcc_unreachable ();
25874 		}
25875 
25876 	      if (GET_MODE (part[1][0]) == SImode)
25877 		part[1][0] = part[1][1];
25878 	    }
25879 	}
25880       emit_move_insn (part[0][1], part[1][1]);
25881       emit_move_insn (part[0][0], part[1][0]);
25882       return;
25883     }
25884 
25885   /* Choose correct order to not overwrite the source before it is copied.  */
25886   if ((REG_P (part[0][0])
25887        && REG_P (part[1][1])
25888        && (REGNO (part[0][0]) == REGNO (part[1][1])
25889 	   || (nparts == 3
25890 	       && REGNO (part[0][0]) == REGNO (part[1][2]))
25891 	   || (nparts == 4
25892 	       && REGNO (part[0][0]) == REGNO (part[1][3]))))
25893       || (collisions > 0
25894 	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
25895     {
25896       for (i = 0, j = nparts - 1; i < nparts; i++, j--)
25897 	{
25898 	  operands[2 + i] = part[0][j];
25899 	  operands[6 + i] = part[1][j];
25900 	}
25901     }
25902   else
25903     {
25904       for (i = 0; i < nparts; i++)
25905 	{
25906 	  operands[2 + i] = part[0][i];
25907 	  operands[6 + i] = part[1][i];
25908 	}
25909     }
25910 
25911   /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
25912   if (optimize_insn_for_size_p ())
25913     {
25914       for (j = 0; j < nparts - 1; j++)
25915 	if (CONST_INT_P (operands[6 + j])
25916 	    && operands[6 + j] != const0_rtx
25917 	    && REG_P (operands[2 + j]))
25918 	  for (i = j; i < nparts - 1; i++)
25919 	    if (CONST_INT_P (operands[7 + i])
25920 		&& INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
25921 	      operands[7 + i] = operands[2 + j];
25922     }
25923 
25924   for (i = 0; i < nparts; i++)
25925     emit_move_insn (operands[2 + i], operands[6 + i]);
25926 
25927   return;
25928 }
25929 
25930 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
25931    left shift by a constant, either using a single shift or
25932    a sequence of add instructions.  */
25933 
25934 static void
25935 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
25936 {
25937   rtx (*insn)(rtx, rtx, rtx);
25938 
25939   if (count == 1
25940       || (count * ix86_cost->add <= ix86_cost->shift_const
25941 	  && !optimize_insn_for_size_p ()))
25942     {
25943       insn = mode == DImode ? gen_addsi3 : gen_adddi3;
25944       while (count-- > 0)
25945 	emit_insn (insn (operand, operand, operand));
25946     }
25947   else
25948     {
25949       insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25950       emit_insn (insn (operand, operand, GEN_INT (count)));
25951     }
25952 }
25953 
25954 void
25955 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
25956 {
25957   rtx (*gen_ashl3)(rtx, rtx, rtx);
25958   rtx (*gen_shld)(rtx, rtx, rtx);
25959   int half_width = GET_MODE_BITSIZE (mode) >> 1;
25960 
25961   rtx low[2], high[2];
25962   int count;
25963 
25964   if (CONST_INT_P (operands[2]))
25965     {
25966       split_double_mode (mode, operands, 2, low, high);
25967       count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
25968 
25969       if (count >= half_width)
25970 	{
25971 	  emit_move_insn (high[0], low[1]);
25972 	  emit_move_insn (low[0], const0_rtx);
25973 
25974 	  if (count > half_width)
25975 	    ix86_expand_ashl_const (high[0], count - half_width, mode);
25976 	}
25977       else
25978 	{
25979 	  gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
25980 
25981 	  if (!rtx_equal_p (operands[0], operands[1]))
25982 	    emit_move_insn (operands[0], operands[1]);
25983 
25984 	  emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
25985 	  ix86_expand_ashl_const (low[0], count, mode);
25986 	}
25987       return;
25988     }
25989 
25990   split_double_mode (mode, operands, 1, low, high);
25991 
25992   gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
25993 
25994   if (operands[1] == const1_rtx)
25995     {
25996       /* Assuming we've chosen a QImode capable registers, then 1 << N
25997 	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
25998       if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
25999 	{
26000 	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
26001 
26002 	  ix86_expand_clear (low[0]);
26003 	  ix86_expand_clear (high[0]);
26004 	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
26005 
26006 	  d = gen_lowpart (QImode, low[0]);
26007 	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
26008 	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
26009 	  emit_insn (gen_rtx_SET (d, s));
26010 
26011 	  d = gen_lowpart (QImode, high[0]);
26012 	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
26013 	  s = gen_rtx_NE (QImode, flags, const0_rtx);
26014 	  emit_insn (gen_rtx_SET (d, s));
26015 	}
26016 
26017       /* Otherwise, we can get the same results by manually performing
26018 	 a bit extract operation on bit 5/6, and then performing the two
26019 	 shifts.  The two methods of getting 0/1 into low/high are exactly
26020 	 the same size.  Avoiding the shift in the bit extract case helps
26021 	 pentium4 a bit; no one else seems to care much either way.  */
26022       else
26023 	{
26024 	  machine_mode half_mode;
26025 	  rtx (*gen_lshr3)(rtx, rtx, rtx);
26026 	  rtx (*gen_and3)(rtx, rtx, rtx);
26027 	  rtx (*gen_xor3)(rtx, rtx, rtx);
26028 	  HOST_WIDE_INT bits;
26029 	  rtx x;
26030 
26031 	  if (mode == DImode)
26032 	    {
26033 	      half_mode = SImode;
26034 	      gen_lshr3 = gen_lshrsi3;
26035 	      gen_and3 = gen_andsi3;
26036 	      gen_xor3 = gen_xorsi3;
26037 	      bits = 5;
26038 	    }
26039 	  else
26040 	    {
26041 	      half_mode = DImode;
26042 	      gen_lshr3 = gen_lshrdi3;
26043 	      gen_and3 = gen_anddi3;
26044 	      gen_xor3 = gen_xordi3;
26045 	      bits = 6;
26046 	    }
26047 
26048 	  if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
26049 	    x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
26050 	  else
26051 	    x = gen_lowpart (half_mode, operands[2]);
26052 	  emit_insn (gen_rtx_SET (high[0], x));
26053 
26054 	  emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
26055 	  emit_insn (gen_and3 (high[0], high[0], const1_rtx));
26056 	  emit_move_insn (low[0], high[0]);
26057 	  emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
26058 	}
26059 
26060       emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
26061       emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
26062       return;
26063     }
26064 
26065   if (operands[1] == constm1_rtx)
26066     {
26067       /* For -1 << N, we can avoid the shld instruction, because we
26068 	 know that we're shifting 0...31/63 ones into a -1.  */
26069       emit_move_insn (low[0], constm1_rtx);
26070       if (optimize_insn_for_size_p ())
26071 	emit_move_insn (high[0], low[0]);
26072       else
26073 	emit_move_insn (high[0], constm1_rtx);
26074     }
26075   else
26076     {
26077       gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
26078 
26079       if (!rtx_equal_p (operands[0], operands[1]))
26080 	emit_move_insn (operands[0], operands[1]);
26081 
26082       split_double_mode (mode, operands, 1, low, high);
26083       emit_insn (gen_shld (high[0], low[0], operands[2]));
26084     }
26085 
26086   emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
26087 
26088   if (TARGET_CMOVE && scratch)
26089     {
26090       rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
26091 	= mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
26092 
26093       ix86_expand_clear (scratch);
26094       emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
26095     }
26096   else
26097     {
26098       rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
26099 	= mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
26100 
26101       emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
26102     }
26103 }
26104 
26105 void
26106 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
26107 {
26108   rtx (*gen_ashr3)(rtx, rtx, rtx)
26109     = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
26110   rtx (*gen_shrd)(rtx, rtx, rtx);
26111   int half_width = GET_MODE_BITSIZE (mode) >> 1;
26112 
26113   rtx low[2], high[2];
26114   int count;
26115 
26116   if (CONST_INT_P (operands[2]))
26117     {
26118       split_double_mode (mode, operands, 2, low, high);
26119       count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
26120 
26121       if (count == GET_MODE_BITSIZE (mode) - 1)
26122 	{
26123 	  emit_move_insn (high[0], high[1]);
26124 	  emit_insn (gen_ashr3 (high[0], high[0],
26125 				GEN_INT (half_width - 1)));
26126 	  emit_move_insn (low[0], high[0]);
26127 
26128 	}
26129       else if (count >= half_width)
26130 	{
26131 	  emit_move_insn (low[0], high[1]);
26132 	  emit_move_insn (high[0], low[0]);
26133 	  emit_insn (gen_ashr3 (high[0], high[0],
26134 				GEN_INT (half_width - 1)));
26135 
26136 	  if (count > half_width)
26137 	    emit_insn (gen_ashr3 (low[0], low[0],
26138 				  GEN_INT (count - half_width)));
26139 	}
26140       else
26141 	{
26142 	  gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26143 
26144 	  if (!rtx_equal_p (operands[0], operands[1]))
26145 	    emit_move_insn (operands[0], operands[1]);
26146 
26147 	  emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
26148 	  emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
26149 	}
26150     }
26151   else
26152     {
26153       gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26154 
26155      if (!rtx_equal_p (operands[0], operands[1]))
26156 	emit_move_insn (operands[0], operands[1]);
26157 
26158       split_double_mode (mode, operands, 1, low, high);
26159 
26160       emit_insn (gen_shrd (low[0], high[0], operands[2]));
26161       emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
26162 
26163       if (TARGET_CMOVE && scratch)
26164 	{
26165 	  rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
26166 	    = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
26167 
26168 	  emit_move_insn (scratch, high[0]);
26169 	  emit_insn (gen_ashr3 (scratch, scratch,
26170 				GEN_INT (half_width - 1)));
26171 	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
26172 					  scratch));
26173 	}
26174       else
26175 	{
26176 	  rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
26177 	    = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
26178 
26179 	  emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
26180 	}
26181     }
26182 }
26183 
26184 void
26185 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
26186 {
26187   rtx (*gen_lshr3)(rtx, rtx, rtx)
26188     = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
26189   rtx (*gen_shrd)(rtx, rtx, rtx);
26190   int half_width = GET_MODE_BITSIZE (mode) >> 1;
26191 
26192   rtx low[2], high[2];
26193   int count;
26194 
26195   if (CONST_INT_P (operands[2]))
26196     {
26197       split_double_mode (mode, operands, 2, low, high);
26198       count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
26199 
26200       if (count >= half_width)
26201 	{
26202 	  emit_move_insn (low[0], high[1]);
26203 	  ix86_expand_clear (high[0]);
26204 
26205 	  if (count > half_width)
26206 	    emit_insn (gen_lshr3 (low[0], low[0],
26207 				  GEN_INT (count - half_width)));
26208 	}
26209       else
26210 	{
26211 	  gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26212 
26213 	  if (!rtx_equal_p (operands[0], operands[1]))
26214 	    emit_move_insn (operands[0], operands[1]);
26215 
26216 	  emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
26217 	  emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
26218 	}
26219     }
26220   else
26221     {
26222       gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
26223 
26224       if (!rtx_equal_p (operands[0], operands[1]))
26225 	emit_move_insn (operands[0], operands[1]);
26226 
26227       split_double_mode (mode, operands, 1, low, high);
26228 
26229       emit_insn (gen_shrd (low[0], high[0], operands[2]));
26230       emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
26231 
26232       if (TARGET_CMOVE && scratch)
26233 	{
26234 	  rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
26235 	    = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
26236 
26237 	  ix86_expand_clear (scratch);
26238 	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
26239 					  scratch));
26240 	}
26241       else
26242 	{
26243 	  rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
26244 	    = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
26245 
26246 	  emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
26247 	}
26248     }
26249 }
26250 
26251 /* Predict just emitted jump instruction to be taken with probability PROB.  */
26252 static void
26253 predict_jump (int prob)
26254 {
26255   rtx_insn *insn = get_last_insn ();
26256   gcc_assert (JUMP_P (insn));
26257   add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob));
26258 }
26259 
26260 /* Helper function for the string operations below.  Dest VARIABLE whether
26261    it is aligned to VALUE bytes.  If true, jump to the label.  */
26262 static rtx_code_label *
26263 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
26264 {
26265   rtx_code_label *label = gen_label_rtx ();
26266   rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
26267   if (GET_MODE (variable) == DImode)
26268     emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
26269   else
26270     emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
26271   emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
26272 			   1, label);
26273   if (epilogue)
26274     predict_jump (REG_BR_PROB_BASE * 50 / 100);
26275   else
26276     predict_jump (REG_BR_PROB_BASE * 90 / 100);
26277   return label;
26278 }
26279 
26280 /* Adjust COUNTER by the VALUE.  */
26281 static void
26282 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
26283 {
26284   rtx (*gen_add)(rtx, rtx, rtx)
26285     = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
26286 
26287   emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
26288 }
26289 
26290 /* Zero extend possibly SImode EXP to Pmode register.  */
26291 rtx
26292 ix86_zero_extend_to_Pmode (rtx exp)
26293 {
26294   return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
26295 }
26296 
26297 /* Divide COUNTREG by SCALE.  */
26298 static rtx
26299 scale_counter (rtx countreg, int scale)
26300 {
26301   rtx sc;
26302 
26303   if (scale == 1)
26304     return countreg;
26305   if (CONST_INT_P (countreg))
26306     return GEN_INT (INTVAL (countreg) / scale);
26307   gcc_assert (REG_P (countreg));
26308 
26309   sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
26310 			    GEN_INT (exact_log2 (scale)),
26311 			    NULL, 1, OPTAB_DIRECT);
26312   return sc;
26313 }
26314 
26315 /* Return mode for the memcpy/memset loop counter.  Prefer SImode over
26316    DImode for constant loop counts.  */
26317 
26318 static machine_mode
26319 counter_mode (rtx count_exp)
26320 {
26321   if (GET_MODE (count_exp) != VOIDmode)
26322     return GET_MODE (count_exp);
26323   if (!CONST_INT_P (count_exp))
26324     return Pmode;
26325   if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
26326     return DImode;
26327   return SImode;
26328 }
26329 
26330 /* Copy the address to a Pmode register.  This is used for x32 to
26331    truncate DImode TLS address to a SImode register. */
26332 
26333 static rtx
26334 ix86_copy_addr_to_reg (rtx addr)
26335 {
26336   rtx reg;
26337   if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
26338     {
26339       reg = copy_addr_to_reg (addr);
26340       REG_POINTER (reg) = 1;
26341       return reg;
26342     }
26343   else
26344     {
26345       gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
26346       reg = copy_to_mode_reg (DImode, addr);
26347       REG_POINTER (reg) = 1;
26348       return gen_rtx_SUBREG (SImode, reg, 0);
26349     }
26350 }
26351 
26352 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
26353    to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
26354    specified in bytes.  When ISSETMEM is TRUE, output the equivalent loop to set
26355    memory by VALUE (supposed to be in MODE).
26356 
26357    The size is rounded down to whole number of chunk size moved at once.
26358    SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
26359 
26360 
26361 static void
26362 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
26363 			       rtx destptr, rtx srcptr, rtx value,
26364 			       rtx count, machine_mode mode, int unroll,
26365 			       int expected_size, bool issetmem)
26366 {
26367   rtx_code_label *out_label, *top_label;
26368   rtx iter, tmp;
26369   machine_mode iter_mode = counter_mode (count);
26370   int piece_size_n = GET_MODE_SIZE (mode) * unroll;
26371   rtx piece_size = GEN_INT (piece_size_n);
26372   rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
26373   rtx size;
26374   int i;
26375 
26376   top_label = gen_label_rtx ();
26377   out_label = gen_label_rtx ();
26378   iter = gen_reg_rtx (iter_mode);
26379 
26380   size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
26381 			      NULL, 1, OPTAB_DIRECT);
26382   /* Those two should combine.  */
26383   if (piece_size == const1_rtx)
26384     {
26385       emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
26386 			       true, out_label);
26387       predict_jump (REG_BR_PROB_BASE * 10 / 100);
26388     }
26389   emit_move_insn (iter, const0_rtx);
26390 
26391   emit_label (top_label);
26392 
26393   tmp = convert_modes (Pmode, iter_mode, iter, true);
26394 
26395   /* This assert could be relaxed - in this case we'll need to compute
26396      smallest power of two, containing in PIECE_SIZE_N and pass it to
26397      offset_address.  */
26398   gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
26399   destmem = offset_address (destmem, tmp, piece_size_n);
26400   destmem = adjust_address (destmem, mode, 0);
26401 
26402   if (!issetmem)
26403     {
26404       srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
26405       srcmem = adjust_address (srcmem, mode, 0);
26406 
26407       /* When unrolling for chips that reorder memory reads and writes,
26408 	 we can save registers by using single temporary.
26409 	 Also using 4 temporaries is overkill in 32bit mode.  */
26410       if (!TARGET_64BIT && 0)
26411 	{
26412 	  for (i = 0; i < unroll; i++)
26413 	    {
26414 	      if (i)
26415 		{
26416 		  destmem =
26417 		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
26418 		  srcmem =
26419 		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
26420 		}
26421 	      emit_move_insn (destmem, srcmem);
26422 	    }
26423 	}
26424       else
26425 	{
26426 	  rtx tmpreg[4];
26427 	  gcc_assert (unroll <= 4);
26428 	  for (i = 0; i < unroll; i++)
26429 	    {
26430 	      tmpreg[i] = gen_reg_rtx (mode);
26431 	      if (i)
26432 		{
26433 		  srcmem =
26434 		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
26435 		}
26436 	      emit_move_insn (tmpreg[i], srcmem);
26437 	    }
26438 	  for (i = 0; i < unroll; i++)
26439 	    {
26440 	      if (i)
26441 		{
26442 		  destmem =
26443 		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
26444 		}
26445 	      emit_move_insn (destmem, tmpreg[i]);
26446 	    }
26447 	}
26448     }
26449   else
26450     for (i = 0; i < unroll; i++)
26451       {
26452 	if (i)
26453 	  destmem =
26454 	    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
26455 	emit_move_insn (destmem, value);
26456       }
26457 
26458   tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
26459 			     true, OPTAB_LIB_WIDEN);
26460   if (tmp != iter)
26461     emit_move_insn (iter, tmp);
26462 
26463   emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
26464 			   true, top_label);
26465   if (expected_size != -1)
26466     {
26467       expected_size /= GET_MODE_SIZE (mode) * unroll;
26468       if (expected_size == 0)
26469 	predict_jump (0);
26470       else if (expected_size > REG_BR_PROB_BASE)
26471 	predict_jump (REG_BR_PROB_BASE - 1);
26472       else
26473         predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
26474     }
26475   else
26476     predict_jump (REG_BR_PROB_BASE * 80 / 100);
26477   iter = ix86_zero_extend_to_Pmode (iter);
26478   tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
26479 			     true, OPTAB_LIB_WIDEN);
26480   if (tmp != destptr)
26481     emit_move_insn (destptr, tmp);
26482   if (!issetmem)
26483     {
26484       tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
26485 				 true, OPTAB_LIB_WIDEN);
26486       if (tmp != srcptr)
26487 	emit_move_insn (srcptr, tmp);
26488     }
26489   emit_label (out_label);
26490 }
26491 
26492 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
26493    When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
26494    When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
26495    For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
26496    ORIG_VALUE is the original value passed to memset to fill the memory with.
26497    Other arguments have same meaning as for previous function.  */
26498 
26499 static void
26500 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
26501 			   rtx destptr, rtx srcptr, rtx value, rtx orig_value,
26502 			   rtx count,
26503 			   machine_mode mode, bool issetmem)
26504 {
26505   rtx destexp;
26506   rtx srcexp;
26507   rtx countreg;
26508   HOST_WIDE_INT rounded_count;
26509 
26510   /* If possible, it is shorter to use rep movs.
26511      TODO: Maybe it is better to move this logic to decide_alg.  */
26512   if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
26513       && (!issetmem || orig_value == const0_rtx))
26514     mode = SImode;
26515 
26516   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
26517     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
26518 
26519   countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
26520 						       GET_MODE_SIZE (mode)));
26521   if (mode != QImode)
26522     {
26523       destexp = gen_rtx_ASHIFT (Pmode, countreg,
26524 				GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
26525       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
26526     }
26527   else
26528     destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
26529   if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
26530     {
26531       rounded_count
26532 	= ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
26533       destmem = shallow_copy_rtx (destmem);
26534       set_mem_size (destmem, rounded_count);
26535     }
26536   else if (MEM_SIZE_KNOWN_P (destmem))
26537     clear_mem_size (destmem);
26538 
26539   if (issetmem)
26540     {
26541       value = force_reg (mode, gen_lowpart (mode, value));
26542       emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
26543     }
26544   else
26545     {
26546       if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
26547 	srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
26548       if (mode != QImode)
26549 	{
26550 	  srcexp = gen_rtx_ASHIFT (Pmode, countreg,
26551 				   GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
26552 	  srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
26553 	}
26554       else
26555 	srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
26556       if (CONST_INT_P (count))
26557 	{
26558 	  rounded_count
26559 	    = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
26560 	  srcmem = shallow_copy_rtx (srcmem);
26561 	  set_mem_size (srcmem, rounded_count);
26562 	}
26563       else
26564 	{
26565 	  if (MEM_SIZE_KNOWN_P (srcmem))
26566 	    clear_mem_size (srcmem);
26567 	}
26568       emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
26569 			      destexp, srcexp));
26570     }
26571 }
26572 
26573 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
26574    DESTMEM.
26575    SRC is passed by pointer to be updated on return.
26576    Return value is updated DST.  */
26577 static rtx
26578 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
26579 	     HOST_WIDE_INT size_to_move)
26580 {
26581   rtx dst = destmem, src = *srcmem, adjust, tempreg;
26582   enum insn_code code;
26583   machine_mode move_mode;
26584   int piece_size, i;
26585 
26586   /* Find the widest mode in which we could perform moves.
26587      Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26588      it until move of such size is supported.  */
26589   piece_size = 1 << floor_log2 (size_to_move);
26590   while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode)
26591 	 || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
26592     {
26593       gcc_assert (piece_size > 1);
26594       piece_size >>= 1;
26595     }
26596 
26597   /* Find the corresponding vector mode with the same size as MOVE_MODE.
26598      MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
26599   if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26600     {
26601       int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26602       if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
26603 	  || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
26604 	{
26605 	  move_mode = word_mode;
26606 	  piece_size = GET_MODE_SIZE (move_mode);
26607 	  code = optab_handler (mov_optab, move_mode);
26608 	}
26609     }
26610   gcc_assert (code != CODE_FOR_nothing);
26611 
26612   dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26613   src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
26614 
26615   /* Emit moves.  We'll need SIZE_TO_MOVE/PIECE_SIZES moves.  */
26616   gcc_assert (size_to_move % piece_size == 0);
26617   adjust = GEN_INT (piece_size);
26618   for (i = 0; i < size_to_move; i += piece_size)
26619     {
26620       /* We move from memory to memory, so we'll need to do it via
26621 	 a temporary register.  */
26622       tempreg = gen_reg_rtx (move_mode);
26623       emit_insn (GEN_FCN (code) (tempreg, src));
26624       emit_insn (GEN_FCN (code) (dst, tempreg));
26625 
26626       emit_move_insn (destptr,
26627 		      gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26628       emit_move_insn (srcptr,
26629 		      gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
26630 
26631       dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26632 					  piece_size);
26633       src = adjust_automodify_address_nv (src, move_mode, srcptr,
26634 					  piece_size);
26635     }
26636 
26637   /* Update DST and SRC rtx.  */
26638   *srcmem = src;
26639   return dst;
26640 }
26641 
26642 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
26643 static void
26644 expand_movmem_epilogue (rtx destmem, rtx srcmem,
26645 			rtx destptr, rtx srcptr, rtx count, int max_size)
26646 {
26647   rtx src, dest;
26648   if (CONST_INT_P (count))
26649     {
26650       HOST_WIDE_INT countval = INTVAL (count);
26651       HOST_WIDE_INT epilogue_size = countval % max_size;
26652       int i;
26653 
26654       /* For now MAX_SIZE should be a power of 2.  This assert could be
26655 	 relaxed, but it'll require a bit more complicated epilogue
26656 	 expanding.  */
26657       gcc_assert ((max_size & (max_size - 1)) == 0);
26658       for (i = max_size; i >= 1; i >>= 1)
26659 	{
26660 	  if (epilogue_size & i)
26661 	    destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26662 	}
26663       return;
26664     }
26665   if (max_size > 8)
26666     {
26667       count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
26668 				    count, 1, OPTAB_DIRECT);
26669       expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
26670 				     count, QImode, 1, 4, false);
26671       return;
26672     }
26673 
26674   /* When there are stringops, we can cheaply increase dest and src pointers.
26675      Otherwise we save code size by maintaining offset (zero is readily
26676      available from preceding rep operation) and using x86 addressing modes.
26677    */
26678   if (TARGET_SINGLE_STRINGOP)
26679     {
26680       if (max_size > 4)
26681 	{
26682 	  rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26683 	  src = change_address (srcmem, SImode, srcptr);
26684 	  dest = change_address (destmem, SImode, destptr);
26685 	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
26686 	  emit_label (label);
26687 	  LABEL_NUSES (label) = 1;
26688 	}
26689       if (max_size > 2)
26690 	{
26691 	  rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26692 	  src = change_address (srcmem, HImode, srcptr);
26693 	  dest = change_address (destmem, HImode, destptr);
26694 	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
26695 	  emit_label (label);
26696 	  LABEL_NUSES (label) = 1;
26697 	}
26698       if (max_size > 1)
26699 	{
26700 	  rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26701 	  src = change_address (srcmem, QImode, srcptr);
26702 	  dest = change_address (destmem, QImode, destptr);
26703 	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
26704 	  emit_label (label);
26705 	  LABEL_NUSES (label) = 1;
26706 	}
26707     }
26708   else
26709     {
26710       rtx offset = force_reg (Pmode, const0_rtx);
26711       rtx tmp;
26712 
26713       if (max_size > 4)
26714 	{
26715 	  rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26716 	  src = change_address (srcmem, SImode, srcptr);
26717 	  dest = change_address (destmem, SImode, destptr);
26718 	  emit_move_insn (dest, src);
26719 	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
26720 				     true, OPTAB_LIB_WIDEN);
26721 	  if (tmp != offset)
26722 	    emit_move_insn (offset, tmp);
26723 	  emit_label (label);
26724 	  LABEL_NUSES (label) = 1;
26725 	}
26726       if (max_size > 2)
26727 	{
26728 	  rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26729 	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26730 	  src = change_address (srcmem, HImode, tmp);
26731 	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26732 	  dest = change_address (destmem, HImode, tmp);
26733 	  emit_move_insn (dest, src);
26734 	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
26735 				     true, OPTAB_LIB_WIDEN);
26736 	  if (tmp != offset)
26737 	    emit_move_insn (offset, tmp);
26738 	  emit_label (label);
26739 	  LABEL_NUSES (label) = 1;
26740 	}
26741       if (max_size > 1)
26742 	{
26743 	  rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26744 	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
26745 	  src = change_address (srcmem, QImode, tmp);
26746 	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
26747 	  dest = change_address (destmem, QImode, tmp);
26748 	  emit_move_insn (dest, src);
26749 	  emit_label (label);
26750 	  LABEL_NUSES (label) = 1;
26751 	}
26752     }
26753 }
26754 
26755 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
26756    with value PROMOTED_VAL.
26757    SRC is passed by pointer to be updated on return.
26758    Return value is updated DST.  */
26759 static rtx
26760 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
26761 	     HOST_WIDE_INT size_to_move)
26762 {
26763   rtx dst = destmem, adjust;
26764   enum insn_code code;
26765   machine_mode move_mode;
26766   int piece_size, i;
26767 
26768   /* Find the widest mode in which we could perform moves.
26769      Start with the biggest power of 2 less than SIZE_TO_MOVE and half
26770      it until move of such size is supported.  */
26771   move_mode = GET_MODE (promoted_val);
26772   if (move_mode == VOIDmode)
26773     move_mode = QImode;
26774   if (size_to_move < GET_MODE_SIZE (move_mode))
26775     {
26776       unsigned int move_bits = size_to_move * BITS_PER_UNIT;
26777       move_mode = int_mode_for_size (move_bits, 0).require ();
26778       promoted_val = gen_lowpart (move_mode, promoted_val);
26779     }
26780   piece_size = GET_MODE_SIZE (move_mode);
26781   code = optab_handler (mov_optab, move_mode);
26782   gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
26783 
26784   dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
26785 
26786   /* Emit moves.  We'll need SIZE_TO_MOVE/PIECE_SIZES moves.  */
26787   gcc_assert (size_to_move % piece_size == 0);
26788   adjust = GEN_INT (piece_size);
26789   for (i = 0; i < size_to_move; i += piece_size)
26790     {
26791       if (piece_size <= GET_MODE_SIZE (word_mode))
26792 	{
26793 	  emit_insn (gen_strset (destptr, dst, promoted_val));
26794 	  dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26795 					      piece_size);
26796 	  continue;
26797 	}
26798 
26799       emit_insn (GEN_FCN (code) (dst, promoted_val));
26800 
26801       emit_move_insn (destptr,
26802 		      gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
26803 
26804       dst = adjust_automodify_address_nv (dst, move_mode, destptr,
26805 					  piece_size);
26806     }
26807 
26808   /* Update DST rtx.  */
26809   return dst;
26810 }
26811 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
26812 static void
26813 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
26814 				 rtx count, int max_size)
26815 {
26816   count =
26817     expand_simple_binop (counter_mode (count), AND, count,
26818 			 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
26819   expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
26820 				 gen_lowpart (QImode, value), count, QImode,
26821 				 1, max_size / 2, true);
26822 }
26823 
26824 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
26825 static void
26826 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
26827 			rtx count, int max_size)
26828 {
26829   rtx dest;
26830 
26831   if (CONST_INT_P (count))
26832     {
26833       HOST_WIDE_INT countval = INTVAL (count);
26834       HOST_WIDE_INT epilogue_size = countval % max_size;
26835       int i;
26836 
26837       /* For now MAX_SIZE should be a power of 2.  This assert could be
26838 	 relaxed, but it'll require a bit more complicated epilogue
26839 	 expanding.  */
26840       gcc_assert ((max_size & (max_size - 1)) == 0);
26841       for (i = max_size; i >= 1; i >>= 1)
26842 	{
26843 	  if (epilogue_size & i)
26844 	    {
26845 	      if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26846 		destmem = emit_memset (destmem, destptr, vec_value, i);
26847 	      else
26848 		destmem = emit_memset (destmem, destptr, value, i);
26849 	    }
26850 	}
26851       return;
26852     }
26853   if (max_size > 32)
26854     {
26855       expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
26856       return;
26857     }
26858   if (max_size > 16)
26859     {
26860       rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
26861       if (TARGET_64BIT)
26862 	{
26863 	  dest = change_address (destmem, DImode, destptr);
26864 	  emit_insn (gen_strset (destptr, dest, value));
26865 	  dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
26866 	  emit_insn (gen_strset (destptr, dest, value));
26867 	}
26868       else
26869 	{
26870 	  dest = change_address (destmem, SImode, destptr);
26871 	  emit_insn (gen_strset (destptr, dest, value));
26872 	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26873 	  emit_insn (gen_strset (destptr, dest, value));
26874 	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
26875 	  emit_insn (gen_strset (destptr, dest, value));
26876 	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
26877 	  emit_insn (gen_strset (destptr, dest, value));
26878 	}
26879       emit_label (label);
26880       LABEL_NUSES (label) = 1;
26881     }
26882   if (max_size > 8)
26883     {
26884       rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
26885       if (TARGET_64BIT)
26886 	{
26887 	  dest = change_address (destmem, DImode, destptr);
26888 	  emit_insn (gen_strset (destptr, dest, value));
26889 	}
26890       else
26891 	{
26892 	  dest = change_address (destmem, SImode, destptr);
26893 	  emit_insn (gen_strset (destptr, dest, value));
26894 	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
26895 	  emit_insn (gen_strset (destptr, dest, value));
26896 	}
26897       emit_label (label);
26898       LABEL_NUSES (label) = 1;
26899     }
26900   if (max_size > 4)
26901     {
26902       rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
26903       dest = change_address (destmem, SImode, destptr);
26904       emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
26905       emit_label (label);
26906       LABEL_NUSES (label) = 1;
26907     }
26908   if (max_size > 2)
26909     {
26910       rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
26911       dest = change_address (destmem, HImode, destptr);
26912       emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
26913       emit_label (label);
26914       LABEL_NUSES (label) = 1;
26915     }
26916   if (max_size > 1)
26917     {
26918       rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
26919       dest = change_address (destmem, QImode, destptr);
26920       emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
26921       emit_label (label);
26922       LABEL_NUSES (label) = 1;
26923     }
26924 }
26925 
26926 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
26927    DESTMEM to align it to DESIRED_ALIGNMENT.  Original alignment is ALIGN.
26928    Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
26929    ignored.
26930    Return value is updated DESTMEM.  */
26931 static rtx
26932 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
26933 				  rtx destptr, rtx srcptr, rtx value,
26934 				  rtx vec_value, rtx count, int align,
26935 				  int desired_alignment, bool issetmem)
26936 {
26937   int i;
26938   for (i = 1; i < desired_alignment; i <<= 1)
26939     {
26940       if (align <= i)
26941 	{
26942 	  rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
26943 	  if (issetmem)
26944 	    {
26945 	      if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
26946 		destmem = emit_memset (destmem, destptr, vec_value, i);
26947 	      else
26948 		destmem = emit_memset (destmem, destptr, value, i);
26949 	    }
26950 	  else
26951 	    destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
26952 	  ix86_adjust_counter (count, i);
26953 	  emit_label (label);
26954 	  LABEL_NUSES (label) = 1;
26955 	  set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
26956 	}
26957     }
26958   return destmem;
26959 }
26960 
26961 /* Test if COUNT&SIZE is nonzero and if so, expand movme
26962    or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
26963    and jump to DONE_LABEL.  */
26964 static void
26965 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
26966 			       rtx destptr, rtx srcptr,
26967 			       rtx value, rtx vec_value,
26968 			       rtx count, int size,
26969 			       rtx done_label, bool issetmem)
26970 {
26971   rtx_code_label *label = ix86_expand_aligntest (count, size, false);
26972   machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
26973   rtx modesize;
26974   int n;
26975 
26976   /* If we do not have vector value to copy, we must reduce size.  */
26977   if (issetmem)
26978     {
26979       if (!vec_value)
26980 	{
26981 	  if (GET_MODE (value) == VOIDmode && size > 8)
26982 	    mode = Pmode;
26983 	  else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
26984 	    mode = GET_MODE (value);
26985 	}
26986       else
26987 	mode = GET_MODE (vec_value), value = vec_value;
26988     }
26989   else
26990     {
26991       /* Choose appropriate vector mode.  */
26992       if (size >= 32)
26993 	mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
26994       else if (size >= 16)
26995 	mode = TARGET_SSE ? V16QImode : DImode;
26996       srcmem = change_address (srcmem, mode, srcptr);
26997     }
26998   destmem = change_address (destmem, mode, destptr);
26999   modesize = GEN_INT (GET_MODE_SIZE (mode));
27000   gcc_assert (GET_MODE_SIZE (mode) <= size);
27001   for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
27002     {
27003       if (issetmem)
27004 	emit_move_insn (destmem, gen_lowpart (mode, value));
27005       else
27006 	{
27007           emit_move_insn (destmem, srcmem);
27008           srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
27009 	}
27010       destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
27011     }
27012 
27013   destmem = offset_address (destmem, count, 1);
27014   destmem = offset_address (destmem, GEN_INT (-2 * size),
27015 			    GET_MODE_SIZE (mode));
27016   if (!issetmem)
27017     {
27018       srcmem = offset_address (srcmem, count, 1);
27019       srcmem = offset_address (srcmem, GEN_INT (-2 * size),
27020 			       GET_MODE_SIZE (mode));
27021     }
27022   for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
27023     {
27024       if (issetmem)
27025 	emit_move_insn (destmem, gen_lowpart (mode, value));
27026       else
27027 	{
27028 	  emit_move_insn (destmem, srcmem);
27029 	  srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
27030 	}
27031       destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
27032     }
27033   emit_jump_insn (gen_jump (done_label));
27034   emit_barrier ();
27035 
27036   emit_label (label);
27037   LABEL_NUSES (label) = 1;
27038 }
27039 
27040 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
27041    and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
27042    bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
27043    proceed with an loop copying SIZE bytes at once. Do moves in MODE.
27044    DONE_LABEL is a label after the whole copying sequence. The label is created
27045    on demand if *DONE_LABEL is NULL.
27046    MIN_SIZE is minimal size of block copied.  This value gets adjusted for new
27047    bounds after the initial copies.
27048 
27049    DESTMEM/SRCMEM are memory expressions pointing to the copies block,
27050    DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
27051    we will dispatch to a library call for large blocks.
27052 
27053    In pseudocode we do:
27054 
27055    if (COUNT < SIZE)
27056      {
27057        Assume that SIZE is 4. Bigger sizes are handled analogously
27058        if (COUNT & 4)
27059 	 {
27060 	    copy 4 bytes from SRCPTR to DESTPTR
27061 	    copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
27062 	    goto done_label
27063 	 }
27064        if (!COUNT)
27065 	 goto done_label;
27066        copy 1 byte from SRCPTR to DESTPTR
27067        if (COUNT & 2)
27068 	 {
27069 	    copy 2 bytes from SRCPTR to DESTPTR
27070 	    copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
27071 	 }
27072      }
27073    else
27074      {
27075        copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
27076        copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
27077 
27078        OLD_DESPTR = DESTPTR;
27079        Align DESTPTR up to DESIRED_ALIGN
27080        SRCPTR += DESTPTR - OLD_DESTPTR
27081        COUNT -= DEST_PTR - OLD_DESTPTR
27082        if (DYNAMIC_CHECK)
27083 	 Round COUNT down to multiple of SIZE
27084        << optional caller supplied zero size guard is here >>
27085        << optional caller supplied dynamic check is here >>
27086        << caller supplied main copy loop is here >>
27087      }
27088    done_label:
27089   */
27090 static void
27091 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
27092 							    rtx *destptr, rtx *srcptr,
27093 							    machine_mode mode,
27094 							    rtx value, rtx vec_value,
27095 							    rtx *count,
27096 							    rtx_code_label **done_label,
27097 							    int size,
27098 							    int desired_align,
27099 							    int align,
27100 							    unsigned HOST_WIDE_INT *min_size,
27101 							    bool dynamic_check,
27102 							    bool issetmem)
27103 {
27104   rtx_code_label *loop_label = NULL, *label;
27105   int n;
27106   rtx modesize;
27107   int prolog_size = 0;
27108   rtx mode_value;
27109 
27110   /* Chose proper value to copy.  */
27111   if (issetmem && VECTOR_MODE_P (mode))
27112     mode_value = vec_value;
27113   else
27114     mode_value = value;
27115   gcc_assert (GET_MODE_SIZE (mode) <= size);
27116 
27117   /* See if block is big or small, handle small blocks.  */
27118   if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
27119     {
27120       int size2 = size;
27121       loop_label = gen_label_rtx ();
27122 
27123       if (!*done_label)
27124 	*done_label = gen_label_rtx ();
27125 
27126       emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
27127 			       1, loop_label);
27128       size2 >>= 1;
27129 
27130       /* Handle sizes > 3.  */
27131       for (;size2 > 2; size2 >>= 1)
27132 	expand_small_movmem_or_setmem (destmem, srcmem,
27133 				       *destptr, *srcptr,
27134 				       value, vec_value,
27135 				       *count,
27136 				       size2, *done_label, issetmem);
27137       /* Nothing to copy?  Jump to DONE_LABEL if so */
27138       emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
27139 			       1, *done_label);
27140 
27141       /* Do a byte copy.  */
27142       destmem = change_address (destmem, QImode, *destptr);
27143       if (issetmem)
27144 	emit_move_insn (destmem, gen_lowpart (QImode, value));
27145       else
27146 	{
27147           srcmem = change_address (srcmem, QImode, *srcptr);
27148           emit_move_insn (destmem, srcmem);
27149 	}
27150 
27151       /* Handle sizes 2 and 3.  */
27152       label = ix86_expand_aligntest (*count, 2, false);
27153       destmem = change_address (destmem, HImode, *destptr);
27154       destmem = offset_address (destmem, *count, 1);
27155       destmem = offset_address (destmem, GEN_INT (-2), 2);
27156       if (issetmem)
27157         emit_move_insn (destmem, gen_lowpart (HImode, value));
27158       else
27159 	{
27160 	  srcmem = change_address (srcmem, HImode, *srcptr);
27161 	  srcmem = offset_address (srcmem, *count, 1);
27162 	  srcmem = offset_address (srcmem, GEN_INT (-2), 2);
27163 	  emit_move_insn (destmem, srcmem);
27164 	}
27165 
27166       emit_label (label);
27167       LABEL_NUSES (label) = 1;
27168       emit_jump_insn (gen_jump (*done_label));
27169       emit_barrier ();
27170     }
27171   else
27172     gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
27173 		|| UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
27174 
27175   /* Start memcpy for COUNT >= SIZE.  */
27176   if (loop_label)
27177     {
27178        emit_label (loop_label);
27179        LABEL_NUSES (loop_label) = 1;
27180     }
27181 
27182   /* Copy first desired_align bytes.  */
27183   if (!issetmem)
27184     srcmem = change_address (srcmem, mode, *srcptr);
27185   destmem = change_address (destmem, mode, *destptr);
27186   modesize = GEN_INT (GET_MODE_SIZE (mode));
27187   for (n = 0; prolog_size < desired_align - align; n++)
27188     {
27189       if (issetmem)
27190         emit_move_insn (destmem, mode_value);
27191       else
27192 	{
27193           emit_move_insn (destmem, srcmem);
27194           srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
27195 	}
27196       destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
27197       prolog_size += GET_MODE_SIZE (mode);
27198     }
27199 
27200 
27201   /* Copy last SIZE bytes.  */
27202   destmem = offset_address (destmem, *count, 1);
27203   destmem = offset_address (destmem,
27204 			    GEN_INT (-size - prolog_size),
27205 			    1);
27206   if (issetmem)
27207     emit_move_insn (destmem, mode_value);
27208   else
27209     {
27210       srcmem = offset_address (srcmem, *count, 1);
27211       srcmem = offset_address (srcmem,
27212 			       GEN_INT (-size - prolog_size),
27213 			       1);
27214       emit_move_insn (destmem, srcmem);
27215     }
27216   for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
27217     {
27218       destmem = offset_address (destmem, modesize, 1);
27219       if (issetmem)
27220 	emit_move_insn (destmem, mode_value);
27221       else
27222 	{
27223           srcmem = offset_address (srcmem, modesize, 1);
27224           emit_move_insn (destmem, srcmem);
27225 	}
27226     }
27227 
27228   /* Align destination.  */
27229   if (desired_align > 1 && desired_align > align)
27230     {
27231       rtx saveddest = *destptr;
27232 
27233       gcc_assert (desired_align <= size);
27234       /* Align destptr up, place it to new register.  */
27235       *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
27236 				      GEN_INT (prolog_size),
27237 				      NULL_RTX, 1, OPTAB_DIRECT);
27238       if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
27239 	REG_POINTER (*destptr) = 1;
27240       *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
27241 				      GEN_INT (-desired_align),
27242 				      *destptr, 1, OPTAB_DIRECT);
27243       /* See how many bytes we skipped.  */
27244       saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
27245 				       *destptr,
27246 				       saveddest, 1, OPTAB_DIRECT);
27247       /* Adjust srcptr and count.  */
27248       if (!issetmem)
27249 	*srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
27250 				       saveddest, *srcptr, 1, OPTAB_DIRECT);
27251       *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
27252 				    saveddest, *count, 1, OPTAB_DIRECT);
27253       /* We copied at most size + prolog_size.  */
27254       if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
27255 	*min_size
27256 	  = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
27257       else
27258 	*min_size = 0;
27259 
27260       /* Our loops always round down the block size, but for dispatch to
27261          library we need precise value.  */
27262       if (dynamic_check)
27263 	*count = expand_simple_binop (GET_MODE (*count), AND, *count,
27264 				      GEN_INT (-size), *count, 1, OPTAB_DIRECT);
27265     }
27266   else
27267     {
27268       gcc_assert (prolog_size == 0);
27269       /* Decrease count, so we won't end up copying last word twice.  */
27270       if (!CONST_INT_P (*count))
27271 	*count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
27272 				      constm1_rtx, *count, 1, OPTAB_DIRECT);
27273       else
27274 	*count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
27275 				      (unsigned HOST_WIDE_INT)size));
27276       if (*min_size)
27277 	*min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
27278     }
27279 }
27280 
27281 
27282 /* This function is like the previous one, except here we know how many bytes
27283    need to be copied.  That allows us to update alignment not only of DST, which
27284    is returned, but also of SRC, which is passed as a pointer for that
27285    reason.  */
27286 static rtx
27287 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
27288 					   rtx srcreg, rtx value, rtx vec_value,
27289 					   int desired_align, int align_bytes,
27290 					   bool issetmem)
27291 {
27292   rtx src = NULL;
27293   rtx orig_dst = dst;
27294   rtx orig_src = NULL;
27295   int piece_size = 1;
27296   int copied_bytes = 0;
27297 
27298   if (!issetmem)
27299     {
27300       gcc_assert (srcp != NULL);
27301       src = *srcp;
27302       orig_src = src;
27303     }
27304 
27305   for (piece_size = 1;
27306        piece_size <= desired_align && copied_bytes < align_bytes;
27307        piece_size <<= 1)
27308     {
27309       if (align_bytes & piece_size)
27310 	{
27311 	  if (issetmem)
27312 	    {
27313 	      if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
27314 		dst = emit_memset (dst, destreg, vec_value, piece_size);
27315 	      else
27316 		dst = emit_memset (dst, destreg, value, piece_size);
27317 	    }
27318 	  else
27319 	    dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
27320 	  copied_bytes += piece_size;
27321 	}
27322     }
27323   if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
27324     set_mem_align (dst, desired_align * BITS_PER_UNIT);
27325   if (MEM_SIZE_KNOWN_P (orig_dst))
27326     set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
27327 
27328   if (!issetmem)
27329     {
27330       int src_align_bytes = get_mem_align_offset (src, desired_align
27331 						       * BITS_PER_UNIT);
27332       if (src_align_bytes >= 0)
27333 	src_align_bytes = desired_align - src_align_bytes;
27334       if (src_align_bytes >= 0)
27335 	{
27336 	  unsigned int src_align;
27337 	  for (src_align = desired_align; src_align >= 2; src_align >>= 1)
27338 	    {
27339 	      if ((src_align_bytes & (src_align - 1))
27340 		   == (align_bytes & (src_align - 1)))
27341 		break;
27342 	    }
27343 	  if (src_align > (unsigned int) desired_align)
27344 	    src_align = desired_align;
27345 	  if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
27346 	    set_mem_align (src, src_align * BITS_PER_UNIT);
27347 	}
27348       if (MEM_SIZE_KNOWN_P (orig_src))
27349 	set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
27350       *srcp = src;
27351     }
27352 
27353   return dst;
27354 }
27355 
27356 /* Return true if ALG can be used in current context.
27357    Assume we expand memset if MEMSET is true.  */
27358 static bool
27359 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
27360 {
27361   if (alg == no_stringop)
27362     return false;
27363   if (alg == vector_loop)
27364     return TARGET_SSE || TARGET_AVX;
27365   /* Algorithms using the rep prefix want at least edi and ecx;
27366      additionally, memset wants eax and memcpy wants esi.  Don't
27367      consider such algorithms if the user has appropriated those
27368      registers for their own purposes, or if we have a non-default
27369      address space, since some string insns cannot override the segment.  */
27370   if (alg == rep_prefix_1_byte
27371       || alg == rep_prefix_4_byte
27372       || alg == rep_prefix_8_byte)
27373     {
27374       if (have_as)
27375 	return false;
27376       if (fixed_regs[CX_REG]
27377 	  || fixed_regs[DI_REG]
27378 	  || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
27379 	return false;
27380     }
27381   return true;
27382 }
27383 
27384 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
27385 static enum stringop_alg
27386 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
27387 	    unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
27388 	    bool memset, bool zero_memset, bool have_as,
27389 	    int *dynamic_check, bool *noalign, bool recur)
27390 {
27391   const struct stringop_algs *algs;
27392   bool optimize_for_speed;
27393   int max = 0;
27394   const struct processor_costs *cost;
27395   int i;
27396   bool any_alg_usable_p = false;
27397 
27398   *noalign = false;
27399   *dynamic_check = -1;
27400 
27401   /* Even if the string operation call is cold, we still might spend a lot
27402      of time processing large blocks.  */
27403   if (optimize_function_for_size_p (cfun)
27404       || (optimize_insn_for_size_p ()
27405  	  && (max_size < 256
27406               || (expected_size != -1 && expected_size < 256))))
27407     optimize_for_speed = false;
27408   else
27409     optimize_for_speed = true;
27410 
27411   cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
27412   if (memset)
27413     algs = &cost->memset[TARGET_64BIT != 0];
27414   else
27415     algs = &cost->memcpy[TARGET_64BIT != 0];
27416 
27417   /* See maximal size for user defined algorithm.  */
27418   for (i = 0; i < MAX_STRINGOP_ALGS; i++)
27419     {
27420       enum stringop_alg candidate = algs->size[i].alg;
27421       bool usable = alg_usable_p (candidate, memset, have_as);
27422       any_alg_usable_p |= usable;
27423 
27424       if (candidate != libcall && candidate && usable)
27425 	max = algs->size[i].max;
27426     }
27427 
27428   /* If expected size is not known but max size is small enough
27429      so inline version is a win, set expected size into
27430      the range.  */
27431   if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
27432       && expected_size == -1)
27433     expected_size = min_size / 2 + max_size / 2;
27434 
27435   /* If user specified the algorithm, honor it if possible.  */
27436   if (ix86_stringop_alg != no_stringop
27437       && alg_usable_p (ix86_stringop_alg, memset, have_as))
27438     return ix86_stringop_alg;
27439   /* rep; movq or rep; movl is the smallest variant.  */
27440   else if (!optimize_for_speed)
27441     {
27442       *noalign = true;
27443       if (!count || (count & 3) || (memset && !zero_memset))
27444 	return alg_usable_p (rep_prefix_1_byte, memset, have_as)
27445 	       ? rep_prefix_1_byte : loop_1_byte;
27446       else
27447 	return alg_usable_p (rep_prefix_4_byte, memset, have_as)
27448 	       ? rep_prefix_4_byte : loop;
27449     }
27450   /* Very tiny blocks are best handled via the loop, REP is expensive to
27451      setup.  */
27452   else if (expected_size != -1 && expected_size < 4)
27453     return loop_1_byte;
27454   else if (expected_size != -1)
27455     {
27456       enum stringop_alg alg = libcall;
27457       bool alg_noalign = false;
27458       for (i = 0; i < MAX_STRINGOP_ALGS; i++)
27459 	{
27460 	  /* We get here if the algorithms that were not libcall-based
27461 	     were rep-prefix based and we are unable to use rep prefixes
27462 	     based on global register usage.  Break out of the loop and
27463 	     use the heuristic below.  */
27464 	  if (algs->size[i].max == 0)
27465 	    break;
27466 	  if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
27467 	    {
27468 	      enum stringop_alg candidate = algs->size[i].alg;
27469 
27470 	      if (candidate != libcall
27471 		  && alg_usable_p (candidate, memset, have_as))
27472 		{
27473 		  alg = candidate;
27474 		  alg_noalign = algs->size[i].noalign;
27475 		}
27476 	      /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
27477 		 last non-libcall inline algorithm.  */
27478 	      if (TARGET_INLINE_ALL_STRINGOPS)
27479 		{
27480 		  /* When the current size is best to be copied by a libcall,
27481 		     but we are still forced to inline, run the heuristic below
27482 		     that will pick code for medium sized blocks.  */
27483 		  if (alg != libcall)
27484 		    {
27485 		      *noalign = alg_noalign;
27486 		      return alg;
27487 		    }
27488 		  else if (!any_alg_usable_p)
27489 		    break;
27490 		}
27491 	      else if (alg_usable_p (candidate, memset, have_as))
27492 		{
27493 		  *noalign = algs->size[i].noalign;
27494 		  return candidate;
27495 		}
27496 	    }
27497 	}
27498     }
27499   /* When asked to inline the call anyway, try to pick meaningful choice.
27500      We look for maximal size of block that is faster to copy by hand and
27501      take blocks of at most of that size guessing that average size will
27502      be roughly half of the block.
27503 
27504      If this turns out to be bad, we might simply specify the preferred
27505      choice in ix86_costs.  */
27506   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
27507       && (algs->unknown_size == libcall
27508 	  || !alg_usable_p (algs->unknown_size, memset, have_as)))
27509     {
27510       enum stringop_alg alg;
27511       HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
27512 
27513       /* If there aren't any usable algorithms or if recursing already,
27514 	 then recursing on smaller sizes or same size isn't going to
27515 	 find anything.  Just return the simple byte-at-a-time copy loop.  */
27516       if (!any_alg_usable_p || recur)
27517 	{
27518 	  /* Pick something reasonable.  */
27519 	  if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
27520 	    *dynamic_check = 128;
27521 	  return loop_1_byte;
27522 	}
27523       alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
27524 			zero_memset, have_as, dynamic_check, noalign, true);
27525       gcc_assert (*dynamic_check == -1);
27526       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
27527 	*dynamic_check = max;
27528       else
27529 	gcc_assert (alg != libcall);
27530       return alg;
27531     }
27532   return (alg_usable_p (algs->unknown_size, memset, have_as)
27533 	  ? algs->unknown_size : libcall);
27534 }
27535 
27536 /* Decide on alignment.  We know that the operand is already aligned to ALIGN
27537    (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
27538 static int
27539 decide_alignment (int align,
27540 		  enum stringop_alg alg,
27541 		  int expected_size,
27542 		  machine_mode move_mode)
27543 {
27544   int desired_align = 0;
27545 
27546   gcc_assert (alg != no_stringop);
27547 
27548   if (alg == libcall)
27549     return 0;
27550   if (move_mode == VOIDmode)
27551     return 0;
27552 
27553   desired_align = GET_MODE_SIZE (move_mode);
27554   /* PentiumPro has special logic triggering for 8 byte aligned blocks.
27555      copying whole cacheline at once.  */
27556   if (TARGET_PENTIUMPRO
27557       && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
27558     desired_align = 8;
27559 
27560   if (optimize_size)
27561     desired_align = 1;
27562   if (desired_align < align)
27563     desired_align = align;
27564   if (expected_size != -1 && expected_size < 4)
27565     desired_align = align;
27566 
27567   return desired_align;
27568 }
27569 
27570 
27571 /* Helper function for memcpy.  For QImode value 0xXY produce
27572    0xXYXYXYXY of wide specified by MODE.  This is essentially
27573    a * 0x10101010, but we can do slightly better than
27574    synth_mult by unwinding the sequence by hand on CPUs with
27575    slow multiply.  */
27576 static rtx
27577 promote_duplicated_reg (machine_mode mode, rtx val)
27578 {
27579   machine_mode valmode = GET_MODE (val);
27580   rtx tmp;
27581   int nops = mode == DImode ? 3 : 2;
27582 
27583   gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
27584   if (val == const0_rtx)
27585     return copy_to_mode_reg (mode, CONST0_RTX (mode));
27586   if (CONST_INT_P (val))
27587     {
27588       HOST_WIDE_INT v = INTVAL (val) & 255;
27589 
27590       v |= v << 8;
27591       v |= v << 16;
27592       if (mode == DImode)
27593         v |= (v << 16) << 16;
27594       return copy_to_mode_reg (mode, gen_int_mode (v, mode));
27595     }
27596 
27597   if (valmode == VOIDmode)
27598     valmode = QImode;
27599   if (valmode != QImode)
27600     val = gen_lowpart (QImode, val);
27601   if (mode == QImode)
27602     return val;
27603   if (!TARGET_PARTIAL_REG_STALL)
27604     nops--;
27605   if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
27606       + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
27607       <= (ix86_cost->shift_const + ix86_cost->add) * nops
27608           + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
27609     {
27610       rtx reg = convert_modes (mode, QImode, val, true);
27611       tmp = promote_duplicated_reg (mode, const1_rtx);
27612       return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
27613 				  OPTAB_DIRECT);
27614     }
27615   else
27616     {
27617       rtx reg = convert_modes (mode, QImode, val, true);
27618 
27619       if (!TARGET_PARTIAL_REG_STALL)
27620 	if (mode == SImode)
27621 	  emit_insn (gen_insvsi_1 (reg, reg));
27622 	else
27623 	  emit_insn (gen_insvdi_1 (reg, reg));
27624       else
27625 	{
27626 	  tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
27627 				     NULL, 1, OPTAB_DIRECT);
27628 	  reg =
27629 	    expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27630 	}
27631       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
27632 			         NULL, 1, OPTAB_DIRECT);
27633       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27634       if (mode == SImode)
27635 	return reg;
27636       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
27637 				 NULL, 1, OPTAB_DIRECT);
27638       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
27639       return reg;
27640     }
27641 }
27642 
27643 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
27644    be needed by main loop copying SIZE_NEEDED chunks and prologue getting
27645    alignment from ALIGN to DESIRED_ALIGN.  */
27646 static rtx
27647 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
27648 				int align)
27649 {
27650   rtx promoted_val;
27651 
27652   if (TARGET_64BIT
27653       && (size_needed > 4 || (desired_align > align && desired_align > 4)))
27654     promoted_val = promote_duplicated_reg (DImode, val);
27655   else if (size_needed > 2 || (desired_align > align && desired_align > 2))
27656     promoted_val = promote_duplicated_reg (SImode, val);
27657   else if (size_needed > 1 || (desired_align > align && desired_align > 1))
27658     promoted_val = promote_duplicated_reg (HImode, val);
27659   else
27660     promoted_val = val;
27661 
27662   return promoted_val;
27663 }
27664 
27665 /* Expand string move (memcpy) ot store (memset) operation.  Use i386 string
27666    operations when profitable.  The code depends upon architecture, block size
27667    and alignment, but always has one of the following overall structures:
27668 
27669    Aligned move sequence:
27670 
27671      1) Prologue guard: Conditional that jumps up to epilogues for small
27672 	blocks that can be handled by epilogue alone.  This is faster
27673 	but also needed for correctness, since prologue assume the block
27674 	is larger than the desired alignment.
27675 
27676 	Optional dynamic check for size and libcall for large
27677 	blocks is emitted here too, with -minline-stringops-dynamically.
27678 
27679      2) Prologue: copy first few bytes in order to get destination
27680 	aligned to DESIRED_ALIGN.  It is emitted only when ALIGN is less
27681 	than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
27682 	copied.  We emit either a jump tree on power of two sized
27683 	blocks, or a byte loop.
27684 
27685      3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27686 	with specified algorithm.
27687 
27688      4) Epilogue: code copying tail of the block that is too small to be
27689 	handled by main body (or up to size guarded by prologue guard).
27690 
27691   Misaligned move sequence
27692 
27693      1) missaligned move prologue/epilogue containing:
27694         a) Prologue handling small memory blocks and jumping to done_label
27695 	   (skipped if blocks are known to be large enough)
27696 	b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
27697            needed by single possibly misaligned move
27698 	   (skipped if alignment is not needed)
27699         c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
27700 
27701      2) Zero size guard dispatching to done_label, if needed
27702 
27703      3) dispatch to library call, if needed,
27704 
27705      3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
27706 	with specified algorithm.  */
27707 bool
27708 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
27709 			   rtx align_exp, rtx expected_align_exp,
27710 			   rtx expected_size_exp, rtx min_size_exp,
27711 			   rtx max_size_exp, rtx probable_max_size_exp,
27712 			   bool issetmem)
27713 {
27714   rtx destreg;
27715   rtx srcreg = NULL;
27716   rtx_code_label *label = NULL;
27717   rtx tmp;
27718   rtx_code_label *jump_around_label = NULL;
27719   HOST_WIDE_INT align = 1;
27720   unsigned HOST_WIDE_INT count = 0;
27721   HOST_WIDE_INT expected_size = -1;
27722   int size_needed = 0, epilogue_size_needed;
27723   int desired_align = 0, align_bytes = 0;
27724   enum stringop_alg alg;
27725   rtx promoted_val = NULL;
27726   rtx vec_promoted_val = NULL;
27727   bool force_loopy_epilogue = false;
27728   int dynamic_check;
27729   bool need_zero_guard = false;
27730   bool noalign;
27731   machine_mode move_mode = VOIDmode;
27732   machine_mode wider_mode;
27733   int unroll_factor = 1;
27734   /* TODO: Once value ranges are available, fill in proper data.  */
27735   unsigned HOST_WIDE_INT min_size = 0;
27736   unsigned HOST_WIDE_INT max_size = -1;
27737   unsigned HOST_WIDE_INT probable_max_size = -1;
27738   bool misaligned_prologue_used = false;
27739   bool have_as;
27740 
27741   if (CONST_INT_P (align_exp))
27742     align = INTVAL (align_exp);
27743   /* i386 can do misaligned access on reasonably increased cost.  */
27744   if (CONST_INT_P (expected_align_exp)
27745       && INTVAL (expected_align_exp) > align)
27746     align = INTVAL (expected_align_exp);
27747   /* ALIGN is the minimum of destination and source alignment, but we care here
27748      just about destination alignment.  */
27749   else if (!issetmem
27750 	   && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
27751     align = MEM_ALIGN (dst) / BITS_PER_UNIT;
27752 
27753   if (CONST_INT_P (count_exp))
27754     {
27755       min_size = max_size = probable_max_size = count = expected_size
27756 	= INTVAL (count_exp);
27757       /* When COUNT is 0, there is nothing to do.  */
27758       if (!count)
27759 	return true;
27760     }
27761   else
27762     {
27763       if (min_size_exp)
27764 	min_size = INTVAL (min_size_exp);
27765       if (max_size_exp)
27766 	max_size = INTVAL (max_size_exp);
27767       if (probable_max_size_exp)
27768 	probable_max_size = INTVAL (probable_max_size_exp);
27769       if (CONST_INT_P (expected_size_exp))
27770 	expected_size = INTVAL (expected_size_exp);
27771      }
27772 
27773   /* Make sure we don't need to care about overflow later on.  */
27774   if (count > (HOST_WIDE_INT_1U << 30))
27775     return false;
27776 
27777   have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
27778   if (!issetmem)
27779     have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
27780 
27781   /* Step 0: Decide on preferred algorithm, desired alignment and
27782      size of chunks to be copied by main loop.  */
27783   alg = decide_alg (count, expected_size, min_size, probable_max_size,
27784 		    issetmem,
27785 		    issetmem && val_exp == const0_rtx, have_as,
27786 		    &dynamic_check, &noalign, false);
27787   if (alg == libcall)
27788     return false;
27789   gcc_assert (alg != no_stringop);
27790 
27791   /* For now vector-version of memset is generated only for memory zeroing, as
27792      creating of promoted vector value is very cheap in this case.  */
27793   if (issetmem && alg == vector_loop && val_exp != const0_rtx)
27794     alg = unrolled_loop;
27795 
27796   if (!count)
27797     count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
27798   destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
27799   if (!issetmem)
27800     srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
27801 
27802   unroll_factor = 1;
27803   move_mode = word_mode;
27804   switch (alg)
27805     {
27806     case libcall:
27807     case no_stringop:
27808     case last_alg:
27809       gcc_unreachable ();
27810     case loop_1_byte:
27811       need_zero_guard = true;
27812       move_mode = QImode;
27813       break;
27814     case loop:
27815       need_zero_guard = true;
27816       break;
27817     case unrolled_loop:
27818       need_zero_guard = true;
27819       unroll_factor = (TARGET_64BIT ? 4 : 2);
27820       break;
27821     case vector_loop:
27822       need_zero_guard = true;
27823       unroll_factor = 4;
27824       /* Find the widest supported mode.  */
27825       move_mode = word_mode;
27826       while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
27827 	     && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
27828 	move_mode = wider_mode;
27829 
27830       if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128)
27831 	move_mode = TImode;
27832 
27833       /* Find the corresponding vector mode with the same size as MOVE_MODE.
27834 	 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
27835       if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
27836 	{
27837 	  int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
27838 	  if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
27839 	      || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
27840 	    move_mode = word_mode;
27841 	}
27842       gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
27843       break;
27844     case rep_prefix_8_byte:
27845       move_mode = DImode;
27846       break;
27847     case rep_prefix_4_byte:
27848       move_mode = SImode;
27849       break;
27850     case rep_prefix_1_byte:
27851       move_mode = QImode;
27852       break;
27853     }
27854   size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
27855   epilogue_size_needed = size_needed;
27856 
27857   /* If we are going to call any library calls conditionally, make sure any
27858      pending stack adjustment happen before the first conditional branch,
27859      otherwise they will be emitted before the library call only and won't
27860      happen from the other branches.  */
27861   if (dynamic_check != -1)
27862     do_pending_stack_adjust ();
27863 
27864   desired_align = decide_alignment (align, alg, expected_size, move_mode);
27865   if (!TARGET_ALIGN_STRINGOPS || noalign)
27866     align = desired_align;
27867 
27868   /* Step 1: Prologue guard.  */
27869 
27870   /* Alignment code needs count to be in register.  */
27871   if (CONST_INT_P (count_exp) && desired_align > align)
27872     {
27873       if (INTVAL (count_exp) > desired_align
27874 	  && INTVAL (count_exp) > size_needed)
27875 	{
27876 	  align_bytes
27877 	    = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
27878 	  if (align_bytes <= 0)
27879 	    align_bytes = 0;
27880 	  else
27881 	    align_bytes = desired_align - align_bytes;
27882 	}
27883       if (align_bytes == 0)
27884 	count_exp = force_reg (counter_mode (count_exp), count_exp);
27885     }
27886   gcc_assert (desired_align >= 1 && align >= 1);
27887 
27888   /* Misaligned move sequences handle both prologue and epilogue at once.
27889      Default code generation results in a smaller code for large alignments
27890      and also avoids redundant job when sizes are known precisely.  */
27891   misaligned_prologue_used
27892     = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
27893        && MAX (desired_align, epilogue_size_needed) <= 32
27894        && desired_align <= epilogue_size_needed
27895        && ((desired_align > align && !align_bytes)
27896 	   || (!count && epilogue_size_needed > 1)));
27897 
27898   /* Do the cheap promotion to allow better CSE across the
27899      main loop and epilogue (ie one load of the big constant in the
27900      front of all code.
27901      For now the misaligned move sequences do not have fast path
27902      without broadcasting.  */
27903   if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
27904     {
27905       if (alg == vector_loop)
27906 	{
27907 	  gcc_assert (val_exp == const0_rtx);
27908 	  vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
27909 	  promoted_val = promote_duplicated_reg_to_size (val_exp,
27910 							 GET_MODE_SIZE (word_mode),
27911 							 desired_align, align);
27912 	}
27913       else
27914 	{
27915 	  promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
27916 							 desired_align, align);
27917 	}
27918     }
27919   /* Misaligned move sequences handles both prologues and epilogues at once.
27920      Default code generation results in smaller code for large alignments and
27921      also avoids redundant job when sizes are known precisely.  */
27922   if (misaligned_prologue_used)
27923     {
27924       /* Misaligned move prologue handled small blocks by itself.  */
27925       expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
27926 	   (dst, src, &destreg, &srcreg,
27927 	    move_mode, promoted_val, vec_promoted_val,
27928 	    &count_exp,
27929 	    &jump_around_label,
27930             desired_align < align
27931 	    ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
27932 	    desired_align, align, &min_size, dynamic_check, issetmem);
27933       if (!issetmem)
27934         src = change_address (src, BLKmode, srcreg);
27935       dst = change_address (dst, BLKmode, destreg);
27936       set_mem_align (dst, desired_align * BITS_PER_UNIT);
27937       epilogue_size_needed = 0;
27938       if (need_zero_guard
27939 	  && min_size < (unsigned HOST_WIDE_INT) size_needed)
27940 	{
27941 	  /* It is possible that we copied enough so the main loop will not
27942 	     execute.  */
27943 	  gcc_assert (size_needed > 1);
27944 	  if (jump_around_label == NULL_RTX)
27945 	    jump_around_label = gen_label_rtx ();
27946 	  emit_cmp_and_jump_insns (count_exp,
27947 				   GEN_INT (size_needed),
27948 				   LTU, 0, counter_mode (count_exp), 1, jump_around_label);
27949 	  if (expected_size == -1
27950 	      || expected_size < (desired_align - align) / 2 + size_needed)
27951 	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
27952 	  else
27953 	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
27954 	}
27955     }
27956   /* Ensure that alignment prologue won't copy past end of block.  */
27957   else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
27958     {
27959       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
27960       /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
27961 	 Make sure it is power of 2.  */
27962       epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
27963 
27964       /* To improve performance of small blocks, we jump around the VAL
27965 	 promoting mode.  This mean that if the promoted VAL is not constant,
27966 	 we might not use it in the epilogue and have to use byte
27967 	 loop variant.  */
27968       if (issetmem && epilogue_size_needed > 2 && !promoted_val)
27969 	force_loopy_epilogue = true;
27970       if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27971 	  || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27972 	{
27973 	  /* If main algorithm works on QImode, no epilogue is needed.
27974 	     For small sizes just don't align anything.  */
27975 	  if (size_needed == 1)
27976 	    desired_align = align;
27977 	  else
27978 	    goto epilogue;
27979 	}
27980       else if (!count
27981 	       && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
27982 	{
27983 	  label = gen_label_rtx ();
27984 	  emit_cmp_and_jump_insns (count_exp,
27985 				   GEN_INT (epilogue_size_needed),
27986 				   LTU, 0, counter_mode (count_exp), 1, label);
27987 	  if (expected_size == -1 || expected_size < epilogue_size_needed)
27988 	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
27989 	  else
27990 	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
27991 	}
27992     }
27993 
27994   /* Emit code to decide on runtime whether library call or inline should be
27995      used.  */
27996   if (dynamic_check != -1)
27997     {
27998       if (!issetmem && CONST_INT_P (count_exp))
27999 	{
28000 	  if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
28001 	    {
28002 	      emit_block_copy_via_libcall (dst, src, count_exp);
28003 	      count_exp = const0_rtx;
28004 	      goto epilogue;
28005 	    }
28006 	}
28007       else
28008 	{
28009 	  rtx_code_label *hot_label = gen_label_rtx ();
28010 	  if (jump_around_label == NULL_RTX)
28011 	    jump_around_label = gen_label_rtx ();
28012 	  emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
28013 				   LEU, 0, counter_mode (count_exp),
28014 				   1, hot_label);
28015 	  predict_jump (REG_BR_PROB_BASE * 90 / 100);
28016 	  if (issetmem)
28017 	    set_storage_via_libcall (dst, count_exp, val_exp);
28018 	  else
28019 	    emit_block_copy_via_libcall (dst, src, count_exp);
28020 	  emit_jump (jump_around_label);
28021 	  emit_label (hot_label);
28022 	}
28023     }
28024 
28025   /* Step 2: Alignment prologue.  */
28026   /* Do the expensive promotion once we branched off the small blocks.  */
28027   if (issetmem && !promoted_val)
28028     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
28029 						   desired_align, align);
28030 
28031   if (desired_align > align && !misaligned_prologue_used)
28032     {
28033       if (align_bytes == 0)
28034 	{
28035 	  /* Except for the first move in prologue, we no longer know
28036 	     constant offset in aliasing info.  It don't seems to worth
28037 	     the pain to maintain it for the first move, so throw away
28038 	     the info early.  */
28039 	  dst = change_address (dst, BLKmode, destreg);
28040 	  if (!issetmem)
28041 	    src = change_address (src, BLKmode, srcreg);
28042 	  dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
28043 					    promoted_val, vec_promoted_val,
28044 					    count_exp, align, desired_align,
28045 					    issetmem);
28046 	  /* At most desired_align - align bytes are copied.  */
28047 	  if (min_size < (unsigned)(desired_align - align))
28048 	    min_size = 0;
28049 	  else
28050 	    min_size -= desired_align - align;
28051 	}
28052       else
28053 	{
28054 	  /* If we know how many bytes need to be stored before dst is
28055 	     sufficiently aligned, maintain aliasing info accurately.  */
28056 	  dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
28057 							   srcreg,
28058 							   promoted_val,
28059 							   vec_promoted_val,
28060 							   desired_align,
28061 							   align_bytes,
28062 							   issetmem);
28063 
28064 	  count_exp = plus_constant (counter_mode (count_exp),
28065 				     count_exp, -align_bytes);
28066 	  count -= align_bytes;
28067 	  min_size -= align_bytes;
28068 	  max_size -= align_bytes;
28069 	}
28070       if (need_zero_guard
28071 	  && min_size < (unsigned HOST_WIDE_INT) size_needed
28072 	  && (count < (unsigned HOST_WIDE_INT) size_needed
28073 	      || (align_bytes == 0
28074 		  && count < ((unsigned HOST_WIDE_INT) size_needed
28075 			      + desired_align - align))))
28076 	{
28077 	  /* It is possible that we copied enough so the main loop will not
28078 	     execute.  */
28079 	  gcc_assert (size_needed > 1);
28080 	  if (label == NULL_RTX)
28081 	    label = gen_label_rtx ();
28082 	  emit_cmp_and_jump_insns (count_exp,
28083 				   GEN_INT (size_needed),
28084 				   LTU, 0, counter_mode (count_exp), 1, label);
28085 	  if (expected_size == -1
28086 	      || expected_size < (desired_align - align) / 2 + size_needed)
28087 	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
28088 	  else
28089 	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
28090 	}
28091     }
28092   if (label && size_needed == 1)
28093     {
28094       emit_label (label);
28095       LABEL_NUSES (label) = 1;
28096       label = NULL;
28097       epilogue_size_needed = 1;
28098       if (issetmem)
28099 	promoted_val = val_exp;
28100     }
28101   else if (label == NULL_RTX && !misaligned_prologue_used)
28102     epilogue_size_needed = size_needed;
28103 
28104   /* Step 3: Main loop.  */
28105 
28106   switch (alg)
28107     {
28108     case libcall:
28109     case no_stringop:
28110     case last_alg:
28111       gcc_unreachable ();
28112     case loop_1_byte:
28113     case loop:
28114     case unrolled_loop:
28115       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
28116 				     count_exp, move_mode, unroll_factor,
28117 				     expected_size, issetmem);
28118       break;
28119     case vector_loop:
28120       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
28121 				     vec_promoted_val, count_exp, move_mode,
28122 				     unroll_factor, expected_size, issetmem);
28123       break;
28124     case rep_prefix_8_byte:
28125     case rep_prefix_4_byte:
28126     case rep_prefix_1_byte:
28127       expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
28128 				       val_exp, count_exp, move_mode, issetmem);
28129       break;
28130     }
28131   /* Adjust properly the offset of src and dest memory for aliasing.  */
28132   if (CONST_INT_P (count_exp))
28133     {
28134       if (!issetmem)
28135 	src = adjust_automodify_address_nv (src, BLKmode, srcreg,
28136 					    (count / size_needed) * size_needed);
28137       dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
28138 					  (count / size_needed) * size_needed);
28139     }
28140   else
28141     {
28142       if (!issetmem)
28143 	src = change_address (src, BLKmode, srcreg);
28144       dst = change_address (dst, BLKmode, destreg);
28145     }
28146 
28147   /* Step 4: Epilogue to copy the remaining bytes.  */
28148  epilogue:
28149   if (label)
28150     {
28151       /* When the main loop is done, COUNT_EXP might hold original count,
28152 	 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
28153 	 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
28154 	 bytes. Compensate if needed.  */
28155 
28156       if (size_needed < epilogue_size_needed)
28157 	{
28158 	  tmp =
28159 	    expand_simple_binop (counter_mode (count_exp), AND, count_exp,
28160 				 GEN_INT (size_needed - 1), count_exp, 1,
28161 				 OPTAB_DIRECT);
28162 	  if (tmp != count_exp)
28163 	    emit_move_insn (count_exp, tmp);
28164 	}
28165       emit_label (label);
28166       LABEL_NUSES (label) = 1;
28167     }
28168 
28169   if (count_exp != const0_rtx && epilogue_size_needed > 1)
28170     {
28171       if (force_loopy_epilogue)
28172 	expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
28173 					 epilogue_size_needed);
28174       else
28175 	{
28176 	  if (issetmem)
28177 	    expand_setmem_epilogue (dst, destreg, promoted_val,
28178 				    vec_promoted_val, count_exp,
28179 				    epilogue_size_needed);
28180 	  else
28181 	    expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
28182 				    epilogue_size_needed);
28183 	}
28184     }
28185   if (jump_around_label)
28186     emit_label (jump_around_label);
28187   return true;
28188 }
28189 
28190 
28191 /* Expand the appropriate insns for doing strlen if not just doing
28192    repnz; scasb
28193 
28194    out = result, initialized with the start address
28195    align_rtx = alignment of the address.
28196    scratch = scratch register, initialized with the startaddress when
28197 	not aligned, otherwise undefined
28198 
28199    This is just the body. It needs the initializations mentioned above and
28200    some address computing at the end.  These things are done in i386.md.  */
28201 
28202 static void
28203 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
28204 {
28205   int align;
28206   rtx tmp;
28207   rtx_code_label *align_2_label = NULL;
28208   rtx_code_label *align_3_label = NULL;
28209   rtx_code_label *align_4_label = gen_label_rtx ();
28210   rtx_code_label *end_0_label = gen_label_rtx ();
28211   rtx mem;
28212   rtx tmpreg = gen_reg_rtx (SImode);
28213   rtx scratch = gen_reg_rtx (SImode);
28214   rtx cmp;
28215 
28216   align = 0;
28217   if (CONST_INT_P (align_rtx))
28218     align = INTVAL (align_rtx);
28219 
28220   /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
28221 
28222   /* Is there a known alignment and is it less than 4?  */
28223   if (align < 4)
28224     {
28225       rtx scratch1 = gen_reg_rtx (Pmode);
28226       emit_move_insn (scratch1, out);
28227       /* Is there a known alignment and is it not 2? */
28228       if (align != 2)
28229 	{
28230 	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
28231 	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
28232 
28233 	  /* Leave just the 3 lower bits.  */
28234 	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
28235 				    NULL_RTX, 0, OPTAB_WIDEN);
28236 
28237 	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
28238 				   Pmode, 1, align_4_label);
28239 	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
28240 				   Pmode, 1, align_2_label);
28241 	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
28242 				   Pmode, 1, align_3_label);
28243 	}
28244       else
28245         {
28246 	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
28247 	     check if is aligned to 4 - byte.  */
28248 
28249 	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
28250 				    NULL_RTX, 0, OPTAB_WIDEN);
28251 
28252 	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
28253 				   Pmode, 1, align_4_label);
28254         }
28255 
28256       mem = change_address (src, QImode, out);
28257 
28258       /* Now compare the bytes.  */
28259 
28260       /* Compare the first n unaligned byte on a byte per byte basis.  */
28261       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
28262 			       QImode, 1, end_0_label);
28263 
28264       /* Increment the address.  */
28265       emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28266 
28267       /* Not needed with an alignment of 2 */
28268       if (align != 2)
28269 	{
28270 	  emit_label (align_2_label);
28271 
28272 	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
28273 				   end_0_label);
28274 
28275 	  emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28276 
28277 	  emit_label (align_3_label);
28278 	}
28279 
28280       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
28281 			       end_0_label);
28282 
28283       emit_insn (ix86_gen_add3 (out, out, const1_rtx));
28284     }
28285 
28286   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
28287      align this loop.  It gives only huge programs, but does not help to
28288      speed up.  */
28289   emit_label (align_4_label);
28290 
28291   mem = change_address (src, SImode, out);
28292   emit_move_insn (scratch, mem);
28293   emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
28294 
28295   /* This formula yields a nonzero result iff one of the bytes is zero.
28296      This saves three branches inside loop and many cycles.  */
28297 
28298   emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
28299   emit_insn (gen_one_cmplsi2 (scratch, scratch));
28300   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
28301   emit_insn (gen_andsi3 (tmpreg, tmpreg,
28302 			 gen_int_mode (0x80808080, SImode)));
28303   emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
28304 			   align_4_label);
28305 
28306   if (TARGET_CMOVE)
28307     {
28308        rtx reg = gen_reg_rtx (SImode);
28309        rtx reg2 = gen_reg_rtx (Pmode);
28310        emit_move_insn (reg, tmpreg);
28311        emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
28312 
28313        /* If zero is not in the first two bytes, move two bytes forward.  */
28314        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
28315        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28316        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
28317        emit_insn (gen_rtx_SET (tmpreg,
28318 			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
28319 						     reg,
28320 						     tmpreg)));
28321        /* Emit lea manually to avoid clobbering of flags.  */
28322        emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
28323 
28324        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28325        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
28326        emit_insn (gen_rtx_SET (out,
28327 			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
28328 						     reg2,
28329 						     out)));
28330     }
28331   else
28332     {
28333        rtx_code_label *end_2_label = gen_label_rtx ();
28334        /* Is zero in the first two bytes? */
28335 
28336        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
28337        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28338        tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
28339        tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28340                             gen_rtx_LABEL_REF (VOIDmode, end_2_label),
28341                             pc_rtx);
28342        tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
28343        JUMP_LABEL (tmp) = end_2_label;
28344 
28345        /* Not in the first two.  Move two bytes forward.  */
28346        emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
28347        emit_insn (ix86_gen_add3 (out, out, const2_rtx));
28348 
28349        emit_label (end_2_label);
28350 
28351     }
28352 
28353   /* Avoid branch in fixing the byte.  */
28354   tmpreg = gen_lowpart (QImode, tmpreg);
28355   emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
28356   tmp = gen_rtx_REG (CCmode, FLAGS_REG);
28357   cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
28358   emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
28359 
28360   emit_label (end_0_label);
28361 }
28362 
28363 /* Expand strlen.  */
28364 
28365 bool
28366 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
28367 {
28368   rtx addr, scratch1, scratch2, scratch3, scratch4;
28369 
28370   /* The generic case of strlen expander is long.  Avoid it's
28371      expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
28372 
28373   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
28374       && !TARGET_INLINE_ALL_STRINGOPS
28375       && !optimize_insn_for_size_p ()
28376       && (!CONST_INT_P (align) || INTVAL (align) < 4))
28377     return false;
28378 
28379   addr = force_reg (Pmode, XEXP (src, 0));
28380   scratch1 = gen_reg_rtx (Pmode);
28381 
28382   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
28383       && !optimize_insn_for_size_p ())
28384     {
28385       /* Well it seems that some optimizer does not combine a call like
28386          foo(strlen(bar), strlen(bar));
28387          when the move and the subtraction is done here.  It does calculate
28388          the length just once when these instructions are done inside of
28389          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
28390          often used and I use one fewer register for the lifetime of
28391          output_strlen_unroll() this is better.  */
28392 
28393       emit_move_insn (out, addr);
28394 
28395       ix86_expand_strlensi_unroll_1 (out, src, align);
28396 
28397       /* strlensi_unroll_1 returns the address of the zero at the end of
28398          the string, like memchr(), so compute the length by subtracting
28399          the start address.  */
28400       emit_insn (ix86_gen_sub3 (out, out, addr));
28401     }
28402   else
28403     {
28404       rtx unspec;
28405 
28406       /* Can't use this if the user has appropriated eax, ecx, or edi.  */
28407       if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
28408         return false;
28409       /* Can't use this for non-default address spaces.  */
28410       if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
28411 	return false;
28412 
28413       scratch2 = gen_reg_rtx (Pmode);
28414       scratch3 = gen_reg_rtx (Pmode);
28415       scratch4 = force_reg (Pmode, constm1_rtx);
28416 
28417       emit_move_insn (scratch3, addr);
28418       eoschar = force_reg (QImode, eoschar);
28419 
28420       src = replace_equiv_address_nv (src, scratch3);
28421 
28422       /* If .md starts supporting :P, this can be done in .md.  */
28423       unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
28424 						 scratch4), UNSPEC_SCAS);
28425       emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
28426       emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
28427       emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
28428     }
28429   return true;
28430 }
28431 
28432 /* For given symbol (function) construct code to compute address of it's PLT
28433    entry in large x86-64 PIC model.  */
28434 static rtx
28435 construct_plt_address (rtx symbol)
28436 {
28437   rtx tmp, unspec;
28438 
28439   gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
28440   gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
28441   gcc_assert (Pmode == DImode);
28442 
28443   tmp = gen_reg_rtx (Pmode);
28444   unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
28445 
28446   emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
28447   emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
28448   return tmp;
28449 }
28450 
28451 rtx
28452 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
28453 		  rtx callarg2,
28454 		  rtx pop, bool sibcall)
28455 {
28456   rtx vec[3];
28457   rtx use = NULL, call;
28458   unsigned int vec_len = 0;
28459   tree fndecl;
28460 
28461   if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
28462     {
28463       fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
28464       if (fndecl
28465 	  && (lookup_attribute ("interrupt",
28466 				TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
28467 	error ("interrupt service routine can't be called directly");
28468     }
28469   else
28470     fndecl = NULL_TREE;
28471 
28472   if (pop == const0_rtx)
28473     pop = NULL;
28474   gcc_assert (!TARGET_64BIT || !pop);
28475 
28476   if (TARGET_MACHO && !TARGET_64BIT)
28477     {
28478 #if TARGET_MACHO
28479       if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
28480 	fnaddr = machopic_indirect_call_target (fnaddr);
28481 #endif
28482     }
28483   else
28484     {
28485       /* Static functions and indirect calls don't need the pic register.  Also,
28486 	 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
28487 	 it an indirect call.  */
28488       rtx addr = XEXP (fnaddr, 0);
28489       if (flag_pic
28490 	  && GET_CODE (addr) == SYMBOL_REF
28491 	  && !SYMBOL_REF_LOCAL_P (addr))
28492 	{
28493 	  if (flag_plt
28494 	      && (SYMBOL_REF_DECL (addr) == NULL_TREE
28495 		  || !lookup_attribute ("noplt",
28496 					DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
28497 	    {
28498 	      if (!TARGET_64BIT
28499 		  || (ix86_cmodel == CM_LARGE_PIC
28500 		      && DEFAULT_ABI != MS_ABI))
28501 		{
28502 		  use_reg (&use, gen_rtx_REG (Pmode,
28503 					      REAL_PIC_OFFSET_TABLE_REGNUM));
28504 		  if (ix86_use_pseudo_pic_reg ())
28505 		    emit_move_insn (gen_rtx_REG (Pmode,
28506 						 REAL_PIC_OFFSET_TABLE_REGNUM),
28507 				    pic_offset_table_rtx);
28508 		}
28509 	    }
28510 	  else if (!TARGET_PECOFF && !TARGET_MACHO)
28511 	    {
28512 	      if (TARGET_64BIT)
28513 		{
28514 		  fnaddr = gen_rtx_UNSPEC (Pmode,
28515 					   gen_rtvec (1, addr),
28516 					   UNSPEC_GOTPCREL);
28517 		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
28518 		}
28519 	      else
28520 		{
28521 		  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
28522 					   UNSPEC_GOT);
28523 		  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
28524 		  fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
28525 					 fnaddr);
28526 		}
28527 	      fnaddr = gen_const_mem (Pmode, fnaddr);
28528 	      /* Pmode may not be the same as word_mode for x32, which
28529 		 doesn't support indirect branch via 32-bit memory slot.
28530 		 Since x32 GOT slot is 64 bit with zero upper 32 bits,
28531 		 indirect branch via x32 GOT slot is OK.  */
28532 	      if (GET_MODE (fnaddr) != word_mode)
28533 		fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
28534 	      fnaddr = gen_rtx_MEM (QImode, fnaddr);
28535 	    }
28536 	}
28537     }
28538 
28539   /* Skip setting up RAX register for -mskip-rax-setup when there are no
28540      parameters passed in vector registers.  */
28541   if (TARGET_64BIT
28542       && (INTVAL (callarg2) > 0
28543 	  || (INTVAL (callarg2) == 0
28544 	      && (TARGET_SSE || !flag_skip_rax_setup))))
28545     {
28546       rtx al = gen_rtx_REG (QImode, AX_REG);
28547       emit_move_insn (al, callarg2);
28548       use_reg (&use, al);
28549     }
28550 
28551   if (ix86_cmodel == CM_LARGE_PIC
28552       && !TARGET_PECOFF
28553       && MEM_P (fnaddr)
28554       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
28555       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
28556     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
28557   /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
28558      branch via x32 GOT slot is OK.  */
28559   else if (!(TARGET_X32
28560 	     && MEM_P (fnaddr)
28561 	     && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
28562 	     && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
28563 	   && (sibcall
28564 	       ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
28565 	       : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
28566     {
28567       fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
28568       fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
28569     }
28570 
28571   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
28572 
28573   if (retval)
28574     {
28575       /* We should add bounds as destination register in case
28576 	 pointer with bounds may be returned.  */
28577       if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
28578 	{
28579 	  rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
28580 	  rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
28581 	  if (GET_CODE (retval) == PARALLEL)
28582 	    {
28583 	      b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
28584 	      b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
28585 	      rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
28586 	      retval = chkp_join_splitted_slot (retval, par);
28587 	    }
28588 	  else
28589 	    {
28590 	      retval = gen_rtx_PARALLEL (VOIDmode,
28591 					 gen_rtvec (3, retval, b0, b1));
28592 	      chkp_put_regs_to_expr_list (retval);
28593 	    }
28594 	}
28595 
28596       call = gen_rtx_SET (retval, call);
28597     }
28598   vec[vec_len++] = call;
28599 
28600   if (pop)
28601     {
28602       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
28603       pop = gen_rtx_SET (stack_pointer_rtx, pop);
28604       vec[vec_len++] = pop;
28605     }
28606 
28607   if (cfun->machine->no_caller_saved_registers
28608       && (!fndecl
28609 	  || (!TREE_THIS_VOLATILE (fndecl)
28610 	      && !lookup_attribute ("no_caller_saved_registers",
28611 				    TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))))
28612     {
28613       static const char ix86_call_used_regs[] = CALL_USED_REGISTERS;
28614       bool is_64bit_ms_abi = (TARGET_64BIT
28615 			      && ix86_function_abi (fndecl) == MS_ABI);
28616       char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
28617 
28618       /* If there are no caller-saved registers, add all registers
28619 	 that are clobbered by the call which returns.  */
28620       for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28621 	if (!fixed_regs[i]
28622 	    && (ix86_call_used_regs[i] == 1
28623 		|| (ix86_call_used_regs[i] & c_mask))
28624 	    && !STACK_REGNO_P (i)
28625 	    && !MMX_REGNO_P (i))
28626 	  clobber_reg (&use,
28627 		       gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i));
28628     }
28629   else if (TARGET_64BIT_MS_ABI
28630 	   && (!callarg2 || INTVAL (callarg2) != -2))
28631     {
28632       unsigned i;
28633 
28634       for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++)
28635 	{
28636 	  int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
28637 	  machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
28638 
28639 	  clobber_reg (&use, gen_rtx_REG (mode, regno));
28640 	}
28641 
28642       /* Set here, but it may get cleared later.  */
28643       if (TARGET_CALL_MS2SYSV_XLOGUES)
28644 	{
28645 	  if (!TARGET_SSE)
28646 	    ;
28647 
28648 	  /* Don't break hot-patched functions.  */
28649 	  else if (ix86_function_ms_hook_prologue (current_function_decl))
28650 	    ;
28651 
28652 	  /* TODO: Cases not yet examined.  */
28653 	  else if (flag_split_stack)
28654 	    warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
28655 
28656 	  else
28657 	    {
28658 	      gcc_assert (!reload_completed);
28659 	      cfun->machine->call_ms2sysv = true;
28660 	    }
28661 	}
28662     }
28663 
28664   if (vec_len > 1)
28665     call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
28666   call = emit_call_insn (call);
28667   if (use)
28668     CALL_INSN_FUNCTION_USAGE (call) = use;
28669 
28670   return call;
28671 }
28672 
28673 /* Return true if the function being called was marked with attribute
28674    "noplt" or using -fno-plt and we are compiling for non-PIC.  We need
28675    to handle the non-PIC case in the backend because there is no easy
28676    interface for the front-end to force non-PLT calls to use the GOT.
28677    This is currently used only with 64-bit or 32-bit GOT32X ELF targets
28678    to call the function marked "noplt" indirectly.  */
28679 
28680 static bool
28681 ix86_nopic_noplt_attribute_p (rtx call_op)
28682 {
28683   if (flag_pic || ix86_cmodel == CM_LARGE
28684       || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
28685       || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
28686       || SYMBOL_REF_LOCAL_P (call_op))
28687     return false;
28688 
28689   tree symbol_decl = SYMBOL_REF_DECL (call_op);
28690 
28691   if (!flag_plt
28692       || (symbol_decl != NULL_TREE
28693           && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
28694     return true;
28695 
28696   return false;
28697 }
28698 
28699 /* Output indirect branch via a call and return thunk.  CALL_OP is a
28700    register which contains the branch target.  XASM is the assembly
28701    template for CALL_OP.  Branch is a tail call if SIBCALL_P is true.
28702    A normal call is converted to:
28703 
28704 	call __x86_indirect_thunk_reg
28705 
28706    and a tail call is converted to:
28707 
28708 	jmp __x86_indirect_thunk_reg
28709  */
28710 
28711 static void
28712 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
28713 {
28714   char thunk_name_buf[32];
28715   char *thunk_name;
28716   enum indirect_thunk_prefix need_prefix
28717     = indirect_thunk_need_prefix (current_output_insn);
28718   int regno = REGNO (call_op);
28719 
28720   if (cfun->machine->indirect_branch_type
28721       != indirect_branch_thunk_inline)
28722     {
28723       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
28724 	{
28725 	  int i = regno;
28726 	  if (i >= FIRST_REX_INT_REG)
28727 	    i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
28728 	  if (need_prefix == indirect_thunk_prefix_bnd)
28729 	    indirect_thunks_bnd_used |= 1 << i;
28730 	  else
28731 	    indirect_thunks_used |= 1 << i;
28732 	}
28733       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
28734       thunk_name = thunk_name_buf;
28735     }
28736   else
28737     thunk_name = NULL;
28738 
28739   if (sibcall_p)
28740     {
28741       if (thunk_name != NULL)
28742 	{
28743 	  if (need_prefix == indirect_thunk_prefix_bnd)
28744 	    fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28745 	  else
28746 	    fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28747 	}
28748       else
28749 	output_indirect_thunk (need_prefix, regno);
28750     }
28751   else
28752     {
28753       if (thunk_name != NULL)
28754 	{
28755 	  if (need_prefix == indirect_thunk_prefix_bnd)
28756 	    fprintf (asm_out_file, "\tbnd call\t%s\n", thunk_name);
28757 	  else
28758 	    fprintf (asm_out_file, "\tcall\t%s\n", thunk_name);
28759 	  return;
28760 	}
28761 
28762       char indirectlabel1[32];
28763       char indirectlabel2[32];
28764 
28765       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
28766 				   INDIRECT_LABEL,
28767 				   indirectlabelno++);
28768       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
28769 				   INDIRECT_LABEL,
28770 				   indirectlabelno++);
28771 
28772       /* Jump.  */
28773       if (need_prefix == indirect_thunk_prefix_bnd)
28774 	fputs ("\tbnd jmp\t", asm_out_file);
28775       else
28776 	fputs ("\tjmp\t", asm_out_file);
28777       assemble_name_raw (asm_out_file, indirectlabel2);
28778       fputc ('\n', asm_out_file);
28779 
28780       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
28781 
28782       if (thunk_name != NULL)
28783 	{
28784 	  if (need_prefix == indirect_thunk_prefix_bnd)
28785 	    fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28786 	  else
28787 	    fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28788 	}
28789       else
28790 	output_indirect_thunk (need_prefix, regno);
28791 
28792       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
28793 
28794       /* Call.  */
28795       if (need_prefix == indirect_thunk_prefix_bnd)
28796 	fputs ("\tbnd call\t", asm_out_file);
28797       else
28798 	fputs ("\tcall\t", asm_out_file);
28799       assemble_name_raw (asm_out_file, indirectlabel1);
28800       fputc ('\n', asm_out_file);
28801     }
28802 }
28803 
28804 /* Output indirect branch via a call and return thunk.  CALL_OP is
28805    the branch target.  XASM is the assembly template for CALL_OP.
28806    Branch is a tail call if SIBCALL_P is true.  A normal call is
28807    converted to:
28808 
28809 	jmp L2
28810    L1:
28811 	push CALL_OP
28812 	jmp __x86_indirect_thunk
28813    L2:
28814 	call L1
28815 
28816    and a tail call is converted to:
28817 
28818 	push CALL_OP
28819 	jmp __x86_indirect_thunk
28820  */
28821 
28822 static void
28823 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
28824 				      bool sibcall_p)
28825 {
28826   char thunk_name_buf[32];
28827   char *thunk_name;
28828   char push_buf[64];
28829   enum indirect_thunk_prefix need_prefix
28830     = indirect_thunk_need_prefix (current_output_insn);
28831   int regno = -1;
28832 
28833   if (cfun->machine->indirect_branch_type
28834       != indirect_branch_thunk_inline)
28835     {
28836       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
28837 	{
28838 	  if (need_prefix == indirect_thunk_prefix_bnd)
28839 	    indirect_thunk_bnd_needed = true;
28840 	  else
28841 	    indirect_thunk_needed = true;
28842 	}
28843       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
28844       thunk_name = thunk_name_buf;
28845     }
28846   else
28847     thunk_name = NULL;
28848 
28849   snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
28850 	    TARGET_64BIT ? 'q' : 'l', xasm);
28851 
28852   if (sibcall_p)
28853     {
28854       output_asm_insn (push_buf, &call_op);
28855       if (thunk_name != NULL)
28856 	{
28857 	  if (need_prefix == indirect_thunk_prefix_bnd)
28858 	    fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28859 	  else
28860 	    fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28861 	}
28862       else
28863 	output_indirect_thunk (need_prefix, regno);
28864     }
28865   else
28866     {
28867       char indirectlabel1[32];
28868       char indirectlabel2[32];
28869 
28870       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
28871 				   INDIRECT_LABEL,
28872 				   indirectlabelno++);
28873       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
28874 				   INDIRECT_LABEL,
28875 				   indirectlabelno++);
28876 
28877       /* Jump.  */
28878       if (need_prefix == indirect_thunk_prefix_bnd)
28879 	fputs ("\tbnd jmp\t", asm_out_file);
28880       else
28881 	fputs ("\tjmp\t", asm_out_file);
28882       assemble_name_raw (asm_out_file, indirectlabel2);
28883       fputc ('\n', asm_out_file);
28884 
28885       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
28886 
28887       /* An external function may be called via GOT, instead of PLT.  */
28888       if (MEM_P (call_op))
28889 	{
28890 	  struct ix86_address parts;
28891 	  rtx addr = XEXP (call_op, 0);
28892 	  if (ix86_decompose_address (addr, &parts)
28893 	      && parts.base == stack_pointer_rtx)
28894 	    {
28895 	      /* Since call will adjust stack by -UNITS_PER_WORD,
28896 		 we must convert "disp(stack, index, scale)" to
28897 		 "disp+UNITS_PER_WORD(stack, index, scale)".  */
28898 	      if (parts.index)
28899 		{
28900 		  addr = gen_rtx_MULT (Pmode, parts.index,
28901 				       GEN_INT (parts.scale));
28902 		  addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28903 				       addr);
28904 		}
28905 	      else
28906 		addr = stack_pointer_rtx;
28907 
28908 	      rtx disp;
28909 	      if (parts.disp != NULL_RTX)
28910 		disp = plus_constant (Pmode, parts.disp,
28911 				      UNITS_PER_WORD);
28912 	      else
28913 		disp = GEN_INT (UNITS_PER_WORD);
28914 
28915 	      addr = gen_rtx_PLUS (Pmode, addr, disp);
28916 	      call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
28917 	    }
28918 	}
28919 
28920       output_asm_insn (push_buf, &call_op);
28921 
28922       if (thunk_name != NULL)
28923 	{
28924 	  if (need_prefix == indirect_thunk_prefix_bnd)
28925 	    fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
28926 	  else
28927 	    fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
28928 	}
28929       else
28930 	output_indirect_thunk (need_prefix, regno);
28931 
28932       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
28933 
28934       /* Call.  */
28935       if (need_prefix == indirect_thunk_prefix_bnd)
28936 	fputs ("\tbnd call\t", asm_out_file);
28937       else
28938 	fputs ("\tcall\t", asm_out_file);
28939       assemble_name_raw (asm_out_file, indirectlabel1);
28940       fputc ('\n', asm_out_file);
28941     }
28942 }
28943 
28944 /* Output indirect branch via a call and return thunk.  CALL_OP is
28945    the branch target.  XASM is the assembly template for CALL_OP.
28946    Branch is a tail call if SIBCALL_P is true.   */
28947 
28948 static void
28949 ix86_output_indirect_branch (rtx call_op, const char *xasm,
28950 			     bool sibcall_p)
28951 {
28952   if (REG_P (call_op))
28953     ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
28954   else
28955     ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
28956 }
28957 
28958 /* Output indirect jump.  CALL_OP is the jump target.  */
28959 
28960 const char *
28961 ix86_output_indirect_jmp (rtx call_op)
28962 {
28963   if (cfun->machine->indirect_branch_type != indirect_branch_keep)
28964     {
28965       /* We can't have red-zone since "call" in the indirect thunk
28966          pushes the return address onto stack, destroying red-zone.  */
28967       if (ix86_red_zone_size != 0)
28968 	gcc_unreachable ();
28969 
28970       ix86_output_indirect_branch (call_op, "%0", true);
28971       return "";
28972     }
28973   else
28974     return "%!jmp\t%A0";
28975 }
28976 
28977 /* Output function return.  CALL_OP is the jump target.  Add a REP
28978    prefix to RET if LONG_P is true and function return is kept.  */
28979 
28980 const char *
28981 ix86_output_function_return (bool long_p)
28982 {
28983   if (cfun->machine->function_return_type != indirect_branch_keep)
28984     {
28985       char thunk_name[32];
28986       enum indirect_thunk_prefix need_prefix
28987 	= indirect_thunk_need_prefix (current_output_insn);
28988 
28989       if (cfun->machine->function_return_type
28990 	  != indirect_branch_thunk_inline)
28991 	{
28992 	  bool need_thunk = (cfun->machine->function_return_type
28993 			     == indirect_branch_thunk);
28994 	  indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
28995 			       true);
28996 	  if (need_prefix == indirect_thunk_prefix_bnd)
28997 	    {
28998 	      indirect_return_bnd_needed |= need_thunk;
28999 	      fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
29000 	    }
29001 	  else
29002 	    {
29003 	      indirect_return_needed |= need_thunk;
29004 	      fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
29005 	    }
29006 	}
29007       else
29008 	output_indirect_thunk (need_prefix, INVALID_REGNUM);
29009 
29010       return "";
29011     }
29012 
29013   if (!long_p || ix86_bnd_prefixed_insn_p (current_output_insn))
29014     return "%!ret";
29015 
29016   return "rep%; ret";
29017 }
29018 
29019 /* Output indirect function return.  RET_OP is the function return
29020    target.  */
29021 
29022 const char *
29023 ix86_output_indirect_function_return (rtx ret_op)
29024 {
29025   if (cfun->machine->function_return_type != indirect_branch_keep)
29026     {
29027       char thunk_name[32];
29028       enum indirect_thunk_prefix need_prefix
29029 	= indirect_thunk_need_prefix (current_output_insn);
29030       unsigned int regno = REGNO (ret_op);
29031       gcc_assert (regno == CX_REG);
29032 
29033       if (cfun->machine->function_return_type
29034 	  != indirect_branch_thunk_inline)
29035 	{
29036 	  bool need_thunk = (cfun->machine->function_return_type
29037 			     == indirect_branch_thunk);
29038 	  indirect_thunk_name (thunk_name, regno, need_prefix, true);
29039 	  if (need_prefix == indirect_thunk_prefix_bnd)
29040 	    {
29041 	      if (need_thunk)
29042 		{
29043 		  indirect_return_via_cx_bnd = true;
29044 		  indirect_thunks_bnd_used |= 1 << CX_REG;
29045 		}
29046 	      fprintf (asm_out_file, "\tbnd jmp\t%s\n", thunk_name);
29047 	    }
29048 	  else
29049 	    {
29050 	      if (need_thunk)
29051 		{
29052 		  indirect_return_via_cx = true;
29053 		  indirect_thunks_used |= 1 << CX_REG;
29054 		}
29055 	      fprintf (asm_out_file, "\tjmp\t%s\n", thunk_name);
29056 	    }
29057 	}
29058       else
29059 	output_indirect_thunk (need_prefix, regno);
29060 
29061       return "";
29062     }
29063   else
29064     return "%!jmp\t%A0";
29065 }
29066 
29067 /* Split simple return with popping POPC bytes from stack to indirect
29068    branch with stack adjustment .  */
29069 
29070 void
29071 ix86_split_simple_return_pop_internal (rtx popc)
29072 {
29073   struct machine_function *m = cfun->machine;
29074   rtx ecx = gen_rtx_REG (SImode, CX_REG);
29075   rtx_insn *insn;
29076 
29077   /* There is no "pascal" calling convention in any 64bit ABI.  */
29078   gcc_assert (!TARGET_64BIT);
29079 
29080   insn = emit_insn (gen_pop (ecx));
29081   m->fs.cfa_offset -= UNITS_PER_WORD;
29082   m->fs.sp_offset -= UNITS_PER_WORD;
29083 
29084   rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
29085   x = gen_rtx_SET (stack_pointer_rtx, x);
29086   add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
29087   add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
29088   RTX_FRAME_RELATED_P (insn) = 1;
29089 
29090   x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc);
29091   x = gen_rtx_SET (stack_pointer_rtx, x);
29092   insn = emit_insn (x);
29093   add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
29094   RTX_FRAME_RELATED_P (insn) = 1;
29095 
29096   /* Now return address is in ECX.  */
29097   emit_jump_insn (gen_simple_return_indirect_internal (ecx));
29098 }
29099 
29100 /* Output the assembly for a call instruction.  */
29101 
29102 const char *
29103 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
29104 {
29105   bool direct_p = constant_call_address_operand (call_op, VOIDmode);
29106   bool output_indirect_p
29107     = (!TARGET_SEH
29108        && cfun->machine->indirect_branch_type != indirect_branch_keep);
29109   bool seh_nop_p = false;
29110   const char *xasm;
29111 
29112   if (SIBLING_CALL_P (insn))
29113     {
29114       if (direct_p)
29115 	{
29116 	  if (ix86_nopic_noplt_attribute_p (call_op))
29117 	    {
29118 	      direct_p = false;
29119 	      if (TARGET_64BIT)
29120 		{
29121 		  if (output_indirect_p)
29122 		    xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29123 		  else
29124 		    xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29125 		}
29126 	      else
29127 		{
29128 		  if (output_indirect_p)
29129 		    xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
29130 		  else
29131 		    xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
29132 		}
29133 	    }
29134 	  else
29135 	    xasm = "%!jmp\t%P0";
29136 	}
29137       /* SEH epilogue detection requires the indirect branch case
29138 	 to include REX.W.  */
29139       else if (TARGET_SEH)
29140 	xasm = "%!rex.W jmp\t%A0";
29141       else
29142 	{
29143 	  if (output_indirect_p)
29144 	    xasm = "%0";
29145 	  else
29146 	    xasm = "%!jmp\t%A0";
29147 	}
29148 
29149       if (output_indirect_p && !direct_p)
29150 	ix86_output_indirect_branch (call_op, xasm, true);
29151       else
29152 	output_asm_insn (xasm, &call_op);
29153       return "";
29154     }
29155 
29156   /* SEH unwinding can require an extra nop to be emitted in several
29157      circumstances.  Determine if we have one of those.  */
29158   if (TARGET_SEH)
29159     {
29160       rtx_insn *i;
29161 
29162       for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
29163 	{
29164 	  /* Prevent a catch region from being adjacent to a jump that would
29165 	     be interpreted as an epilogue sequence by the unwinder.  */
29166 	  if (JUMP_P(i) && CROSSING_JUMP_P (i))
29167 	    {
29168 	      seh_nop_p = true;
29169 	      break;
29170 	    }
29171 
29172 	  /* If we get to another real insn, we don't need the nop.  */
29173 	  if (INSN_P (i))
29174 	    break;
29175 
29176 	  /* If we get to the epilogue note, prevent a catch region from
29177 	     being adjacent to the standard epilogue sequence.  If non-
29178 	     call-exceptions, we'll have done this during epilogue emission. */
29179 	  if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
29180 	      && !flag_non_call_exceptions
29181 	      && !can_throw_internal (insn))
29182 	    {
29183 	      seh_nop_p = true;
29184 	      break;
29185 	    }
29186 	}
29187 
29188       /* If we didn't find a real insn following the call, prevent the
29189 	 unwinder from looking into the next function.  */
29190       if (i == NULL)
29191 	seh_nop_p = true;
29192     }
29193 
29194   if (direct_p)
29195     {
29196       if (ix86_nopic_noplt_attribute_p (call_op))
29197 	{
29198 	  direct_p = false;
29199 	  if (TARGET_64BIT)
29200 	    {
29201 	      if (output_indirect_p)
29202 		xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29203 	      else
29204 		xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29205 	    }
29206 	  else
29207 	    {
29208 	      if (output_indirect_p)
29209 		xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
29210 	      else
29211 		xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
29212 	    }
29213 	}
29214       else
29215 	xasm = "%!call\t%P0";
29216     }
29217   else
29218     {
29219       if (output_indirect_p)
29220 	xasm = "%0";
29221       else
29222 	xasm = "%!call\t%A0";
29223     }
29224 
29225   if (output_indirect_p && !direct_p)
29226     ix86_output_indirect_branch (call_op, xasm, false);
29227   else
29228     output_asm_insn (xasm, &call_op);
29229 
29230   if (seh_nop_p)
29231     return "nop";
29232 
29233   return "";
29234 }
29235 
29236 /* Clear stack slot assignments remembered from previous functions.
29237    This is called from INIT_EXPANDERS once before RTL is emitted for each
29238    function.  */
29239 
29240 static struct machine_function *
29241 ix86_init_machine_status (void)
29242 {
29243   struct machine_function *f;
29244 
29245   f = ggc_cleared_alloc<machine_function> ();
29246   f->call_abi = ix86_abi;
29247 
29248   return f;
29249 }
29250 
29251 /* Return a MEM corresponding to a stack slot with mode MODE.
29252    Allocate a new slot if necessary.
29253 
29254    The RTL for a function can have several slots available: N is
29255    which slot to use.  */
29256 
29257 rtx
29258 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
29259 {
29260   struct stack_local_entry *s;
29261 
29262   gcc_assert (n < MAX_386_STACK_LOCALS);
29263 
29264   for (s = ix86_stack_locals; s; s = s->next)
29265     if (s->mode == mode && s->n == n)
29266       return validize_mem (copy_rtx (s->rtl));
29267 
29268   s = ggc_alloc<stack_local_entry> ();
29269   s->n = n;
29270   s->mode = mode;
29271   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
29272 
29273   s->next = ix86_stack_locals;
29274   ix86_stack_locals = s;
29275   return validize_mem (copy_rtx (s->rtl));
29276 }
29277 
29278 static void
29279 ix86_instantiate_decls (void)
29280 {
29281   struct stack_local_entry *s;
29282 
29283   for (s = ix86_stack_locals; s; s = s->next)
29284     if (s->rtl != NULL_RTX)
29285       instantiate_decl_rtl (s->rtl);
29286 }
29287 
29288 /* Return the number used for encoding REG, in the range 0..7.  */
29289 
29290 static int
29291 reg_encoded_number (rtx reg)
29292 {
29293   unsigned regno = REGNO (reg);
29294   switch (regno)
29295     {
29296     case AX_REG:
29297       return 0;
29298     case CX_REG:
29299       return 1;
29300     case DX_REG:
29301       return 2;
29302     case BX_REG:
29303       return 3;
29304     case SP_REG:
29305       return 4;
29306     case BP_REG:
29307       return 5;
29308     case SI_REG:
29309       return 6;
29310     case DI_REG:
29311       return 7;
29312     default:
29313       break;
29314     }
29315   if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
29316     return regno - FIRST_STACK_REG;
29317   if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
29318     return regno - FIRST_SSE_REG;
29319   if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
29320     return regno - FIRST_MMX_REG;
29321   if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
29322     return regno - FIRST_REX_SSE_REG;
29323   if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
29324     return regno - FIRST_REX_INT_REG;
29325   if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
29326     return regno - FIRST_MASK_REG;
29327   if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
29328     return regno - FIRST_BND_REG;
29329   return -1;
29330 }
29331 
29332 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
29333    in its encoding if it could be relevant for ROP mitigation, otherwise
29334    return -1.  If POPNO0 and POPNO1 are nonnull, store the operand numbers
29335    used for calculating it into them.  */
29336 
29337 static int
29338 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
29339 			int *popno0 = 0, int *popno1 = 0)
29340 {
29341   if (asm_noperands (PATTERN (insn)) >= 0)
29342     return -1;
29343   int has_modrm = get_attr_modrm (insn);
29344   if (!has_modrm)
29345     return -1;
29346   enum attr_modrm_class cls = get_attr_modrm_class (insn);
29347   rtx op0, op1;
29348   switch (cls)
29349     {
29350     case MODRM_CLASS_OP02:
29351       gcc_assert (noperands >= 3);
29352       if (popno0)
29353 	{
29354 	  *popno0 = 0;
29355 	  *popno1 = 2;
29356 	}
29357       op0 = operands[0];
29358       op1 = operands[2];
29359       break;
29360     case MODRM_CLASS_OP01:
29361       gcc_assert (noperands >= 2);
29362       if (popno0)
29363 	{
29364 	  *popno0 = 0;
29365 	  *popno1 = 1;
29366 	}
29367       op0 = operands[0];
29368       op1 = operands[1];
29369       break;
29370     default:
29371       return -1;
29372     }
29373   if (REG_P (op0) && REG_P (op1))
29374     {
29375       int enc0 = reg_encoded_number (op0);
29376       int enc1 = reg_encoded_number (op1);
29377       return 0xc0 + (enc1 << 3) + enc0;
29378     }
29379   return -1;
29380 }
29381 
29382 /* Check whether x86 address PARTS is a pc-relative address.  */
29383 
29384 bool
29385 ix86_rip_relative_addr_p (struct ix86_address *parts)
29386 {
29387   rtx base, index, disp;
29388 
29389   base = parts->base;
29390   index = parts->index;
29391   disp = parts->disp;
29392 
29393   if (disp && !base && !index)
29394     {
29395       if (TARGET_64BIT)
29396 	{
29397 	  rtx symbol = disp;
29398 
29399 	  if (GET_CODE (disp) == CONST)
29400 	    symbol = XEXP (disp, 0);
29401 	  if (GET_CODE (symbol) == PLUS
29402 	      && CONST_INT_P (XEXP (symbol, 1)))
29403 	    symbol = XEXP (symbol, 0);
29404 
29405 	  if (GET_CODE (symbol) == LABEL_REF
29406 	      || (GET_CODE (symbol) == SYMBOL_REF
29407 		  && SYMBOL_REF_TLS_MODEL (symbol) == 0)
29408 	      || (GET_CODE (symbol) == UNSPEC
29409 		  && (XINT (symbol, 1) == UNSPEC_GOTPCREL
29410 		      || XINT (symbol, 1) == UNSPEC_PCREL
29411 		      || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
29412 	    return true;
29413 	}
29414     }
29415   return false;
29416 }
29417 
29418 /* Calculate the length of the memory address in the instruction encoding.
29419    Includes addr32 prefix, does not include the one-byte modrm, opcode,
29420    or other prefixes.  We never generate addr32 prefix for LEA insn.  */
29421 
29422 int
29423 memory_address_length (rtx addr, bool lea)
29424 {
29425   struct ix86_address parts;
29426   rtx base, index, disp;
29427   int len;
29428   int ok;
29429 
29430   if (GET_CODE (addr) == PRE_DEC
29431       || GET_CODE (addr) == POST_INC
29432       || GET_CODE (addr) == PRE_MODIFY
29433       || GET_CODE (addr) == POST_MODIFY)
29434     return 0;
29435 
29436   ok = ix86_decompose_address (addr, &parts);
29437   gcc_assert (ok);
29438 
29439   len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
29440 
29441   /*  If this is not LEA instruction, add the length of addr32 prefix.  */
29442   if (TARGET_64BIT && !lea
29443       && (SImode_address_operand (addr, VOIDmode)
29444 	  || (parts.base && GET_MODE (parts.base) == SImode)
29445 	  || (parts.index && GET_MODE (parts.index) == SImode)))
29446     len++;
29447 
29448   base = parts.base;
29449   index = parts.index;
29450   disp = parts.disp;
29451 
29452   if (base && SUBREG_P (base))
29453     base = SUBREG_REG (base);
29454   if (index && SUBREG_P (index))
29455     index = SUBREG_REG (index);
29456 
29457   gcc_assert (base == NULL_RTX || REG_P (base));
29458   gcc_assert (index == NULL_RTX || REG_P (index));
29459 
29460   /* Rule of thumb:
29461        - esp as the base always wants an index,
29462        - ebp as the base always wants a displacement,
29463        - r12 as the base always wants an index,
29464        - r13 as the base always wants a displacement.  */
29465 
29466   /* Register Indirect.  */
29467   if (base && !index && !disp)
29468     {
29469       /* esp (for its index) and ebp (for its displacement) need
29470 	 the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
29471 	 code.  */
29472       if (base == arg_pointer_rtx
29473 	  || base == frame_pointer_rtx
29474 	  || REGNO (base) == SP_REG
29475 	  || REGNO (base) == BP_REG
29476 	  || REGNO (base) == R12_REG
29477 	  || REGNO (base) == R13_REG)
29478 	len++;
29479     }
29480 
29481   /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
29482      is not disp32, but disp32(%rip), so for disp32
29483      SIB byte is needed, unless print_operand_address
29484      optimizes it into disp32(%rip) or (%rip) is implied
29485      by UNSPEC.  */
29486   else if (disp && !base && !index)
29487     {
29488       len += 4;
29489       if (!ix86_rip_relative_addr_p (&parts))
29490 	len++;
29491     }
29492   else
29493     {
29494       /* Find the length of the displacement constant.  */
29495       if (disp)
29496 	{
29497 	  if (base && satisfies_constraint_K (disp))
29498 	    len += 1;
29499 	  else
29500 	    len += 4;
29501 	}
29502       /* ebp always wants a displacement.  Similarly r13.  */
29503       else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
29504 	len++;
29505 
29506       /* An index requires the two-byte modrm form....  */
29507       if (index
29508 	  /* ...like esp (or r12), which always wants an index.  */
29509 	  || base == arg_pointer_rtx
29510 	  || base == frame_pointer_rtx
29511 	  || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
29512 	len++;
29513     }
29514 
29515   return len;
29516 }
29517 
29518 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
29519    is set, expect that insn have 8bit immediate alternative.  */
29520 int
29521 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
29522 {
29523   int len = 0;
29524   int i;
29525   extract_insn_cached (insn);
29526   for (i = recog_data.n_operands - 1; i >= 0; --i)
29527     if (CONSTANT_P (recog_data.operand[i]))
29528       {
29529         enum attr_mode mode = get_attr_mode (insn);
29530 
29531 	gcc_assert (!len);
29532 	if (shortform && CONST_INT_P (recog_data.operand[i]))
29533 	  {
29534 	    HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
29535 	    switch (mode)
29536 	      {
29537 	      case MODE_QI:
29538 		len = 1;
29539 		continue;
29540 	      case MODE_HI:
29541 		ival = trunc_int_for_mode (ival, HImode);
29542 		break;
29543 	      case MODE_SI:
29544 		ival = trunc_int_for_mode (ival, SImode);
29545 		break;
29546 	      default:
29547 		break;
29548 	      }
29549 	    if (IN_RANGE (ival, -128, 127))
29550 	      {
29551 		len = 1;
29552 		continue;
29553 	      }
29554 	  }
29555 	switch (mode)
29556 	  {
29557 	  case MODE_QI:
29558 	    len = 1;
29559 	    break;
29560 	  case MODE_HI:
29561 	    len = 2;
29562 	    break;
29563 	  case MODE_SI:
29564 	    len = 4;
29565 	    break;
29566 	  /* Immediates for DImode instructions are encoded
29567 	     as 32bit sign extended values.  */
29568 	  case MODE_DI:
29569 	    len = 4;
29570 	    break;
29571 	  default:
29572 	    fatal_insn ("unknown insn mode", insn);
29573 	}
29574       }
29575   return len;
29576 }
29577 
29578 /* Compute default value for "length_address" attribute.  */
29579 int
29580 ix86_attr_length_address_default (rtx_insn *insn)
29581 {
29582   int i;
29583 
29584   if (get_attr_type (insn) == TYPE_LEA)
29585     {
29586       rtx set = PATTERN (insn), addr;
29587 
29588       if (GET_CODE (set) == PARALLEL)
29589 	set = XVECEXP (set, 0, 0);
29590 
29591       gcc_assert (GET_CODE (set) == SET);
29592 
29593       addr = SET_SRC (set);
29594 
29595       return memory_address_length (addr, true);
29596     }
29597 
29598   extract_insn_cached (insn);
29599   for (i = recog_data.n_operands - 1; i >= 0; --i)
29600     {
29601       rtx op = recog_data.operand[i];
29602       if (MEM_P (op))
29603 	{
29604 	  constrain_operands_cached (insn, reload_completed);
29605 	  if (which_alternative != -1)
29606 	    {
29607 	      const char *constraints = recog_data.constraints[i];
29608 	      int alt = which_alternative;
29609 
29610 	      while (*constraints == '=' || *constraints == '+')
29611 		constraints++;
29612 	      while (alt-- > 0)
29613 	        while (*constraints++ != ',')
29614 		  ;
29615 	      /* Skip ignored operands.  */
29616 	      if (*constraints == 'X')
29617 		continue;
29618 	    }
29619 
29620 	  int len = memory_address_length (XEXP (op, 0), false);
29621 
29622 	  /* Account for segment prefix for non-default addr spaces.  */
29623 	  if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
29624 	    len++;
29625 
29626 	  return len;
29627 	}
29628     }
29629   return 0;
29630 }
29631 
29632 /* Compute default value for "length_vex" attribute. It includes
29633    2 or 3 byte VEX prefix and 1 opcode byte.  */
29634 
29635 int
29636 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
29637 			      bool has_vex_w)
29638 {
29639   int i;
29640 
29641   /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
29642      byte VEX prefix.  */
29643   if (!has_0f_opcode || has_vex_w)
29644     return 3 + 1;
29645 
29646  /* We can always use 2 byte VEX prefix in 32bit.  */
29647   if (!TARGET_64BIT)
29648     return 2 + 1;
29649 
29650   extract_insn_cached (insn);
29651 
29652   for (i = recog_data.n_operands - 1; i >= 0; --i)
29653     if (REG_P (recog_data.operand[i]))
29654       {
29655 	/* REX.W bit uses 3 byte VEX prefix.  */
29656 	if (GET_MODE (recog_data.operand[i]) == DImode
29657 	    && GENERAL_REG_P (recog_data.operand[i]))
29658 	  return 3 + 1;
29659       }
29660     else
29661       {
29662 	/* REX.X or REX.B bits use 3 byte VEX prefix.  */
29663 	if (MEM_P (recog_data.operand[i])
29664 	    && x86_extended_reg_mentioned_p (recog_data.operand[i]))
29665 	  return 3 + 1;
29666       }
29667 
29668   return 2 + 1;
29669 }
29670 
29671 
29672 static bool
29673 ix86_class_likely_spilled_p (reg_class_t);
29674 
29675 /* Returns true if lhs of insn is HW function argument register and set up
29676    is_spilled to true if it is likely spilled HW register.  */
29677 static bool
29678 insn_is_function_arg (rtx insn, bool* is_spilled)
29679 {
29680   rtx dst;
29681 
29682   if (!NONDEBUG_INSN_P (insn))
29683     return false;
29684   /* Call instructions are not movable, ignore it.  */
29685   if (CALL_P (insn))
29686     return false;
29687   insn = PATTERN (insn);
29688   if (GET_CODE (insn) == PARALLEL)
29689     insn = XVECEXP (insn, 0, 0);
29690   if (GET_CODE (insn) != SET)
29691     return false;
29692   dst = SET_DEST (insn);
29693   if (REG_P (dst) && HARD_REGISTER_P (dst)
29694       && ix86_function_arg_regno_p (REGNO (dst)))
29695     {
29696       /* Is it likely spilled HW register?  */
29697       if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
29698 	  && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
29699 	*is_spilled = true;
29700       return true;
29701     }
29702   return false;
29703 }
29704 
29705 /* Add output dependencies for chain of function adjacent arguments if only
29706    there is a move to likely spilled HW register.  Return first argument
29707    if at least one dependence was added or NULL otherwise.  */
29708 static rtx_insn *
29709 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
29710 {
29711   rtx_insn *insn;
29712   rtx_insn *last = call;
29713   rtx_insn *first_arg = NULL;
29714   bool is_spilled = false;
29715 
29716   head = PREV_INSN (head);
29717 
29718   /* Find nearest to call argument passing instruction.  */
29719   while (true)
29720     {
29721       last = PREV_INSN (last);
29722       if (last == head)
29723 	return NULL;
29724       if (!NONDEBUG_INSN_P (last))
29725 	continue;
29726       if (insn_is_function_arg (last, &is_spilled))
29727 	break;
29728       return NULL;
29729     }
29730 
29731   first_arg = last;
29732   while (true)
29733     {
29734       insn = PREV_INSN (last);
29735       if (!INSN_P (insn))
29736 	break;
29737       if (insn == head)
29738 	break;
29739       if (!NONDEBUG_INSN_P (insn))
29740 	{
29741 	  last = insn;
29742 	  continue;
29743 	}
29744       if (insn_is_function_arg (insn, &is_spilled))
29745 	{
29746 	  /* Add output depdendence between two function arguments if chain
29747 	     of output arguments contains likely spilled HW registers.  */
29748 	  if (is_spilled)
29749 	    add_dependence (first_arg, insn, REG_DEP_OUTPUT);
29750 	  first_arg = last = insn;
29751 	}
29752       else
29753 	break;
29754     }
29755   if (!is_spilled)
29756     return NULL;
29757   return first_arg;
29758 }
29759 
29760 /* Add output or anti dependency from insn to first_arg to restrict its code
29761    motion.  */
29762 static void
29763 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
29764 {
29765   rtx set;
29766   rtx tmp;
29767 
29768   /* Add anti dependencies for bounds stores.  */
29769   if (INSN_P (insn)
29770       && GET_CODE (PATTERN (insn)) == PARALLEL
29771       && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
29772       && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
29773     {
29774       add_dependence (first_arg, insn, REG_DEP_ANTI);
29775       return;
29776     }
29777 
29778   set = single_set (insn);
29779   if (!set)
29780     return;
29781   tmp = SET_DEST (set);
29782   if (REG_P (tmp))
29783     {
29784       /* Add output dependency to the first function argument.  */
29785       add_dependence (first_arg, insn, REG_DEP_OUTPUT);
29786       return;
29787     }
29788   /* Add anti dependency.  */
29789   add_dependence (first_arg, insn, REG_DEP_ANTI);
29790 }
29791 
29792 /* Avoid cross block motion of function argument through adding dependency
29793    from the first non-jump instruction in bb.  */
29794 static void
29795 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
29796 {
29797   rtx_insn *insn = BB_END (bb);
29798 
29799   while (insn)
29800     {
29801       if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
29802 	{
29803 	  rtx set = single_set (insn);
29804 	  if (set)
29805 	    {
29806 	      avoid_func_arg_motion (arg, insn);
29807 	      return;
29808 	    }
29809 	}
29810       if (insn == BB_HEAD (bb))
29811 	return;
29812       insn = PREV_INSN (insn);
29813     }
29814 }
29815 
29816 /* Hook for pre-reload schedule - avoid motion of function arguments
29817    passed in likely spilled HW registers.  */
29818 static void
29819 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
29820 {
29821   rtx_insn *insn;
29822   rtx_insn *first_arg = NULL;
29823   if (reload_completed)
29824     return;
29825   while (head != tail && DEBUG_INSN_P (head))
29826     head = NEXT_INSN (head);
29827   for (insn = tail; insn != head; insn = PREV_INSN (insn))
29828     if (INSN_P (insn) && CALL_P (insn))
29829       {
29830 	first_arg = add_parameter_dependencies (insn, head);
29831 	if (first_arg)
29832 	  {
29833 	    /* Add dependee for first argument to predecessors if only
29834 	       region contains more than one block.  */
29835 	    basic_block bb =  BLOCK_FOR_INSN (insn);
29836 	    int rgn = CONTAINING_RGN (bb->index);
29837 	    int nr_blks = RGN_NR_BLOCKS (rgn);
29838 	    /* Skip trivial regions and region head blocks that can have
29839 	       predecessors outside of region.  */
29840 	    if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
29841 	      {
29842 		edge e;
29843 		edge_iterator ei;
29844 
29845 		/* Regions are SCCs with the exception of selective
29846 		   scheduling with pipelining of outer blocks enabled.
29847 		   So also check that immediate predecessors of a non-head
29848 		   block are in the same region.  */
29849 		FOR_EACH_EDGE (e, ei, bb->preds)
29850 		  {
29851 		    /* Avoid creating of loop-carried dependencies through
29852 		       using topological ordering in the region.  */
29853 		    if (rgn == CONTAINING_RGN (e->src->index)
29854 			&& BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
29855 		      add_dependee_for_func_arg (first_arg, e->src);
29856 		  }
29857 	      }
29858 	    insn = first_arg;
29859 	    if (insn == head)
29860 	      break;
29861 	  }
29862       }
29863     else if (first_arg)
29864       avoid_func_arg_motion (first_arg, insn);
29865 }
29866 
29867 /* Hook for pre-reload schedule - set priority of moves from likely spilled
29868    HW registers to maximum, to schedule them at soon as possible. These are
29869    moves from function argument registers at the top of the function entry
29870    and moves from function return value registers after call.  */
29871 static int
29872 ix86_adjust_priority (rtx_insn *insn, int priority)
29873 {
29874   rtx set;
29875 
29876   if (reload_completed)
29877     return priority;
29878 
29879   if (!NONDEBUG_INSN_P (insn))
29880     return priority;
29881 
29882   set = single_set (insn);
29883   if (set)
29884     {
29885       rtx tmp = SET_SRC (set);
29886       if (REG_P (tmp)
29887           && HARD_REGISTER_P (tmp)
29888           && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
29889           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
29890 	return current_sched_info->sched_max_insns_priority;
29891     }
29892 
29893   return priority;
29894 }
29895 
29896 /* Prepare for scheduling pass.  */
29897 static void
29898 ix86_sched_init_global (FILE *, int, int)
29899 {
29900   /* Install scheduling hooks for current CPU.  Some of these hooks are used
29901      in time-critical parts of the scheduler, so we only set them up when
29902      they are actually used.  */
29903   switch (ix86_tune)
29904     {
29905     case PROCESSOR_CORE2:
29906     case PROCESSOR_NEHALEM:
29907     case PROCESSOR_SANDYBRIDGE:
29908     case PROCESSOR_HASWELL:
29909     case PROCESSOR_GENERIC:
29910       /* Do not perform multipass scheduling for pre-reload schedule
29911          to save compile time.  */
29912       if (reload_completed)
29913 	{
29914 	  ix86_core2i7_init_hooks ();
29915 	  break;
29916 	}
29917       /* Fall through.  */
29918     default:
29919       targetm.sched.dfa_post_advance_cycle = NULL;
29920       targetm.sched.first_cycle_multipass_init = NULL;
29921       targetm.sched.first_cycle_multipass_begin = NULL;
29922       targetm.sched.first_cycle_multipass_issue = NULL;
29923       targetm.sched.first_cycle_multipass_backtrack = NULL;
29924       targetm.sched.first_cycle_multipass_end = NULL;
29925       targetm.sched.first_cycle_multipass_fini = NULL;
29926       break;
29927     }
29928 }
29929 
29930 
29931 /* Implement TARGET_STATIC_RTX_ALIGNMENT.  */
29932 
29933 static HOST_WIDE_INT
29934 ix86_static_rtx_alignment (machine_mode mode)
29935 {
29936   if (mode == DFmode)
29937     return 64;
29938   if (ALIGN_MODE_128 (mode))
29939     return MAX (128, GET_MODE_ALIGNMENT (mode));
29940   return GET_MODE_ALIGNMENT (mode);
29941 }
29942 
29943 /* Implement TARGET_CONSTANT_ALIGNMENT.  */
29944 
29945 static HOST_WIDE_INT
29946 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
29947 {
29948   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
29949       || TREE_CODE (exp) == INTEGER_CST)
29950     {
29951       machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
29952       HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
29953       return MAX (mode_align, align);
29954     }
29955   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
29956 	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
29957     return BITS_PER_WORD;
29958 
29959   return align;
29960 }
29961 
29962 /* Implement TARGET_EMPTY_RECORD_P.  */
29963 
29964 static bool
29965 ix86_is_empty_record (const_tree type)
29966 {
29967   if (!TARGET_64BIT)
29968     return false;
29969   return default_is_empty_record (type);
29970 }
29971 
29972 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI.  */
29973 
29974 static void
29975 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
29976 {
29977   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
29978 
29979   if (!cum->warn_empty)
29980     return;
29981 
29982   if (!TYPE_EMPTY_P (type))
29983     return;
29984 
29985   const_tree ctx = get_ultimate_context (cum->decl);
29986   if (ctx != NULL_TREE
29987       && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
29988     return;
29989 
29990   /* If the actual size of the type is zero, then there is no change
29991      in how objects of this size are passed.  */
29992   if (int_size_in_bytes (type) == 0)
29993     return;
29994 
29995   warning (OPT_Wabi, "empty class %qT parameter passing ABI "
29996 	   "changes in -fabi-version=12 (GCC 8)", type);
29997 
29998   /* Only warn once.  */
29999   cum->warn_empty = false;
30000 }
30001 
30002 /* Compute the alignment for a variable for Intel MCU psABI.  TYPE is
30003    the data type, and ALIGN is the alignment that the object would
30004    ordinarily have.  */
30005 
30006 static int
30007 iamcu_alignment (tree type, int align)
30008 {
30009   machine_mode mode;
30010 
30011   if (align < 32 || TYPE_USER_ALIGN (type))
30012     return align;
30013 
30014   /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
30015      bytes.  */
30016   mode = TYPE_MODE (strip_array_types (type));
30017   switch (GET_MODE_CLASS (mode))
30018     {
30019     case MODE_INT:
30020     case MODE_COMPLEX_INT:
30021     case MODE_COMPLEX_FLOAT:
30022     case MODE_FLOAT:
30023     case MODE_DECIMAL_FLOAT:
30024       return 32;
30025     default:
30026       return align;
30027     }
30028 }
30029 
30030 /* Compute the alignment for a static variable.
30031    TYPE is the data type, and ALIGN is the alignment that
30032    the object would ordinarily have.  The value of this function is used
30033    instead of that alignment to align the object.  */
30034 
30035 int
30036 ix86_data_alignment (tree type, int align, bool opt)
30037 {
30038   /* GCC 4.8 and earlier used to incorrectly assume this alignment even
30039      for symbols from other compilation units or symbols that don't need
30040      to bind locally.  In order to preserve some ABI compatibility with
30041      those compilers, ensure we don't decrease alignment from what we
30042      used to assume.  */
30043 
30044   int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
30045 
30046   /* A data structure, equal or greater than the size of a cache line
30047      (64 bytes in the Pentium 4 and other recent Intel processors, including
30048      processors based on Intel Core microarchitecture) should be aligned
30049      so that its base address is a multiple of a cache line size.  */
30050 
30051   int max_align
30052     = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
30053 
30054   if (max_align < BITS_PER_WORD)
30055     max_align = BITS_PER_WORD;
30056 
30057   switch (ix86_align_data_type)
30058     {
30059     case ix86_align_data_type_abi: opt = false; break;
30060     case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
30061     case ix86_align_data_type_cacheline: break;
30062     }
30063 
30064   if (TARGET_IAMCU)
30065     align = iamcu_alignment (type, align);
30066 
30067   if (opt
30068       && AGGREGATE_TYPE_P (type)
30069       && TYPE_SIZE (type)
30070       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
30071     {
30072       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
30073 	  && align < max_align_compat)
30074 	align = max_align_compat;
30075       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
30076 	  && align < max_align)
30077 	align = max_align;
30078     }
30079 
30080   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
30081      to 16byte boundary.  */
30082   if (TARGET_64BIT)
30083     {
30084       if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
30085 	  && TYPE_SIZE (type)
30086 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
30087 	  && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
30088 	  && align < 128)
30089 	return 128;
30090     }
30091 
30092   if (!opt)
30093     return align;
30094 
30095   if (TREE_CODE (type) == ARRAY_TYPE)
30096     {
30097       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
30098 	return 64;
30099       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
30100 	return 128;
30101     }
30102   else if (TREE_CODE (type) == COMPLEX_TYPE)
30103     {
30104 
30105       if (TYPE_MODE (type) == DCmode && align < 64)
30106 	return 64;
30107       if ((TYPE_MODE (type) == XCmode
30108 	   || TYPE_MODE (type) == TCmode) && align < 128)
30109 	return 128;
30110     }
30111   else if ((TREE_CODE (type) == RECORD_TYPE
30112 	    || TREE_CODE (type) == UNION_TYPE
30113 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
30114 	   && TYPE_FIELDS (type))
30115     {
30116       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
30117 	return 64;
30118       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
30119 	return 128;
30120     }
30121   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
30122 	   || TREE_CODE (type) == INTEGER_TYPE)
30123     {
30124       if (TYPE_MODE (type) == DFmode && align < 64)
30125 	return 64;
30126       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
30127 	return 128;
30128     }
30129 
30130   return align;
30131 }
30132 
30133 /* Compute the alignment for a local variable or a stack slot.  EXP is
30134    the data type or decl itself, MODE is the widest mode available and
30135    ALIGN is the alignment that the object would ordinarily have.  The
30136    value of this macro is used instead of that alignment to align the
30137    object.  */
30138 
30139 unsigned int
30140 ix86_local_alignment (tree exp, machine_mode mode,
30141 		      unsigned int align)
30142 {
30143   tree type, decl;
30144 
30145   if (exp && DECL_P (exp))
30146     {
30147       type = TREE_TYPE (exp);
30148       decl = exp;
30149     }
30150   else
30151     {
30152       type = exp;
30153       decl = NULL;
30154     }
30155 
30156   /* Don't do dynamic stack realignment for long long objects with
30157      -mpreferred-stack-boundary=2.  */
30158   if (!TARGET_64BIT
30159       && align == 64
30160       && ix86_preferred_stack_boundary < 64
30161       && (mode == DImode || (type && TYPE_MODE (type) == DImode))
30162       && (!type || !TYPE_USER_ALIGN (type))
30163       && (!decl || !DECL_USER_ALIGN (decl)))
30164     align = 32;
30165 
30166   /* If TYPE is NULL, we are allocating a stack slot for caller-save
30167      register in MODE.  We will return the largest alignment of XF
30168      and DF.  */
30169   if (!type)
30170     {
30171       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
30172 	align = GET_MODE_ALIGNMENT (DFmode);
30173       return align;
30174     }
30175 
30176   /* Don't increase alignment for Intel MCU psABI.  */
30177   if (TARGET_IAMCU)
30178     return align;
30179 
30180   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
30181      to 16byte boundary.  Exact wording is:
30182 
30183      An array uses the same alignment as its elements, except that a local or
30184      global array variable of length at least 16 bytes or
30185      a C99 variable-length array variable always has alignment of at least 16 bytes.
30186 
30187      This was added to allow use of aligned SSE instructions at arrays.  This
30188      rule is meant for static storage (where compiler can not do the analysis
30189      by itself).  We follow it for automatic variables only when convenient.
30190      We fully control everything in the function compiled and functions from
30191      other unit can not rely on the alignment.
30192 
30193      Exclude va_list type.  It is the common case of local array where
30194      we can not benefit from the alignment.
30195 
30196      TODO: Probably one should optimize for size only when var is not escaping.  */
30197   if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
30198       && TARGET_SSE)
30199     {
30200       if (AGGREGATE_TYPE_P (type)
30201 	  && (va_list_type_node == NULL_TREE
30202 	      || (TYPE_MAIN_VARIANT (type)
30203 		  != TYPE_MAIN_VARIANT (va_list_type_node)))
30204 	  && TYPE_SIZE (type)
30205 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
30206 	  && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
30207 	  && align < 128)
30208 	return 128;
30209     }
30210   if (TREE_CODE (type) == ARRAY_TYPE)
30211     {
30212       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
30213 	return 64;
30214       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
30215 	return 128;
30216     }
30217   else if (TREE_CODE (type) == COMPLEX_TYPE)
30218     {
30219       if (TYPE_MODE (type) == DCmode && align < 64)
30220 	return 64;
30221       if ((TYPE_MODE (type) == XCmode
30222 	   || TYPE_MODE (type) == TCmode) && align < 128)
30223 	return 128;
30224     }
30225   else if ((TREE_CODE (type) == RECORD_TYPE
30226 	    || TREE_CODE (type) == UNION_TYPE
30227 	    || TREE_CODE (type) == QUAL_UNION_TYPE)
30228 	   && TYPE_FIELDS (type))
30229     {
30230       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
30231 	return 64;
30232       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
30233 	return 128;
30234     }
30235   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
30236 	   || TREE_CODE (type) == INTEGER_TYPE)
30237     {
30238 
30239       if (TYPE_MODE (type) == DFmode && align < 64)
30240 	return 64;
30241       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
30242 	return 128;
30243     }
30244   return align;
30245 }
30246 
30247 /* Compute the minimum required alignment for dynamic stack realignment
30248    purposes for a local variable, parameter or a stack slot.  EXP is
30249    the data type or decl itself, MODE is its mode and ALIGN is the
30250    alignment that the object would ordinarily have.  */
30251 
30252 unsigned int
30253 ix86_minimum_alignment (tree exp, machine_mode mode,
30254 			unsigned int align)
30255 {
30256   tree type, decl;
30257 
30258   if (exp && DECL_P (exp))
30259     {
30260       type = TREE_TYPE (exp);
30261       decl = exp;
30262     }
30263   else
30264     {
30265       type = exp;
30266       decl = NULL;
30267     }
30268 
30269   if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
30270     return align;
30271 
30272   /* Don't do dynamic stack realignment for long long objects with
30273      -mpreferred-stack-boundary=2.  */
30274   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
30275       && (!type || !TYPE_USER_ALIGN (type))
30276       && (!decl || !DECL_USER_ALIGN (decl)))
30277     {
30278       gcc_checking_assert (!TARGET_STV);
30279       return 32;
30280     }
30281 
30282   return align;
30283 }
30284 
30285 /* Find a location for the static chain incoming to a nested function.
30286    This is a register, unless all free registers are used by arguments.  */
30287 
30288 static rtx
30289 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
30290 {
30291   unsigned regno;
30292 
30293   if (TARGET_64BIT)
30294     {
30295       /* We always use R10 in 64-bit mode.  */
30296       regno = R10_REG;
30297     }
30298   else
30299     {
30300       const_tree fntype, fndecl;
30301       unsigned int ccvt;
30302 
30303       /* By default in 32-bit mode we use ECX to pass the static chain.  */
30304       regno = CX_REG;
30305 
30306       if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
30307 	{
30308           fntype = TREE_TYPE (fndecl_or_type);
30309 	  fndecl = fndecl_or_type;
30310 	}
30311       else
30312 	{
30313 	  fntype = fndecl_or_type;
30314 	  fndecl = NULL;
30315 	}
30316 
30317       ccvt = ix86_get_callcvt (fntype);
30318       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
30319 	{
30320 	  /* Fastcall functions use ecx/edx for arguments, which leaves
30321 	     us with EAX for the static chain.
30322 	     Thiscall functions use ecx for arguments, which also
30323 	     leaves us with EAX for the static chain.  */
30324 	  regno = AX_REG;
30325 	}
30326       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
30327 	{
30328 	  /* Thiscall functions use ecx for arguments, which leaves
30329 	     us with EAX and EDX for the static chain.
30330 	     We are using for abi-compatibility EAX.  */
30331 	  regno = AX_REG;
30332 	}
30333       else if (ix86_function_regparm (fntype, fndecl) == 3)
30334 	{
30335 	  /* For regparm 3, we have no free call-clobbered registers in
30336 	     which to store the static chain.  In order to implement this,
30337 	     we have the trampoline push the static chain to the stack.
30338 	     However, we can't push a value below the return address when
30339 	     we call the nested function directly, so we have to use an
30340 	     alternate entry point.  For this we use ESI, and have the
30341 	     alternate entry point push ESI, so that things appear the
30342 	     same once we're executing the nested function.  */
30343 	  if (incoming_p)
30344 	    {
30345 	      if (fndecl == current_function_decl
30346 		  && !ix86_static_chain_on_stack)
30347 		{
30348 		  gcc_assert (!reload_completed);
30349 		  ix86_static_chain_on_stack = true;
30350 		}
30351 	      return gen_frame_mem (SImode,
30352 				    plus_constant (Pmode,
30353 						   arg_pointer_rtx, -8));
30354 	    }
30355 	  regno = SI_REG;
30356 	}
30357     }
30358 
30359   return gen_rtx_REG (Pmode, regno);
30360 }
30361 
30362 /* Emit RTL insns to initialize the variable parts of a trampoline.
30363    FNDECL is the decl of the target address; M_TRAMP is a MEM for
30364    the trampoline, and CHAIN_VALUE is an RTX for the static chain
30365    to be passed to the target function.  */
30366 
30367 static void
30368 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
30369 {
30370   rtx mem, fnaddr;
30371   int opcode;
30372   int offset = 0;
30373   bool need_endbr = (flag_cf_protection & CF_BRANCH);
30374 
30375   fnaddr = XEXP (DECL_RTL (fndecl), 0);
30376 
30377   if (TARGET_64BIT)
30378     {
30379       int size;
30380 
30381       if (need_endbr)
30382 	{
30383 	  /* Insert ENDBR64.  */
30384 	  mem = adjust_address (m_tramp, SImode, offset);
30385 	  emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
30386 	  offset += 4;
30387 	}
30388 
30389       /* Load the function address to r11.  Try to load address using
30390 	 the shorter movl instead of movabs.  We may want to support
30391 	 movq for kernel mode, but kernel does not use trampolines at
30392 	 the moment.  FNADDR is a 32bit address and may not be in
30393 	 DImode when ptr_mode == SImode.  Always use movl in this
30394 	 case.  */
30395       if (ptr_mode == SImode
30396 	  || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
30397 	{
30398 	  fnaddr = copy_addr_to_reg (fnaddr);
30399 
30400 	  mem = adjust_address (m_tramp, HImode, offset);
30401 	  emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
30402 
30403 	  mem = adjust_address (m_tramp, SImode, offset + 2);
30404 	  emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
30405 	  offset += 6;
30406 	}
30407       else
30408 	{
30409 	  mem = adjust_address (m_tramp, HImode, offset);
30410 	  emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
30411 
30412 	  mem = adjust_address (m_tramp, DImode, offset + 2);
30413 	  emit_move_insn (mem, fnaddr);
30414 	  offset += 10;
30415 	}
30416 
30417       /* Load static chain using movabs to r10.  Use the shorter movl
30418          instead of movabs when ptr_mode == SImode.  */
30419       if (ptr_mode == SImode)
30420 	{
30421 	  opcode = 0xba41;
30422 	  size = 6;
30423 	}
30424       else
30425 	{
30426 	  opcode = 0xba49;
30427 	  size = 10;
30428 	}
30429 
30430       mem = adjust_address (m_tramp, HImode, offset);
30431       emit_move_insn (mem, gen_int_mode (opcode, HImode));
30432 
30433       mem = adjust_address (m_tramp, ptr_mode, offset + 2);
30434       emit_move_insn (mem, chain_value);
30435       offset += size;
30436 
30437       /* Jump to r11; the last (unused) byte is a nop, only there to
30438 	 pad the write out to a single 32-bit store.  */
30439       mem = adjust_address (m_tramp, SImode, offset);
30440       emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
30441       offset += 4;
30442     }
30443   else
30444     {
30445       rtx disp, chain;
30446 
30447       /* Depending on the static chain location, either load a register
30448 	 with a constant, or push the constant to the stack.  All of the
30449 	 instructions are the same size.  */
30450       chain = ix86_static_chain (fndecl, true);
30451       if (REG_P (chain))
30452 	{
30453 	  switch (REGNO (chain))
30454 	    {
30455 	    case AX_REG:
30456 	      opcode = 0xb8; break;
30457 	    case CX_REG:
30458 	      opcode = 0xb9; break;
30459 	    default:
30460 	      gcc_unreachable ();
30461 	    }
30462 	}
30463       else
30464 	opcode = 0x68;
30465 
30466       if (need_endbr)
30467 	{
30468 	  /* Insert ENDBR32.  */
30469 	  mem = adjust_address (m_tramp, SImode, offset);
30470 	  emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
30471 	  offset += 4;
30472 	}
30473 
30474       mem = adjust_address (m_tramp, QImode, offset);
30475       emit_move_insn (mem, gen_int_mode (opcode, QImode));
30476 
30477       mem = adjust_address (m_tramp, SImode, offset + 1);
30478       emit_move_insn (mem, chain_value);
30479       offset += 5;
30480 
30481       mem = adjust_address (m_tramp, QImode, offset);
30482       emit_move_insn (mem, gen_int_mode (0xe9, QImode));
30483 
30484       mem = adjust_address (m_tramp, SImode, offset + 1);
30485 
30486       /* Compute offset from the end of the jmp to the target function.
30487 	 In the case in which the trampoline stores the static chain on
30488 	 the stack, we need to skip the first insn which pushes the
30489 	 (call-saved) register static chain; this push is 1 byte.  */
30490       offset += 5;
30491       disp = expand_binop (SImode, sub_optab, fnaddr,
30492 			   plus_constant (Pmode, XEXP (m_tramp, 0),
30493 					  offset - (MEM_P (chain) ? 1 : 0)),
30494 			   NULL_RTX, 1, OPTAB_DIRECT);
30495       emit_move_insn (mem, disp);
30496     }
30497 
30498   gcc_assert (offset <= TRAMPOLINE_SIZE);
30499 
30500 #ifdef HAVE_ENABLE_EXECUTE_STACK
30501 #ifdef CHECK_EXECUTE_STACK_ENABLED
30502   if (CHECK_EXECUTE_STACK_ENABLED)
30503 #endif
30504   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
30505 		     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
30506 #endif
30507 }
30508 
30509 static bool
30510 ix86_allocate_stack_slots_for_args (void)
30511 {
30512   /* Naked functions should not allocate stack slots for arguments.  */
30513   return !ix86_function_naked (current_function_decl);
30514 }
30515 
30516 static bool
30517 ix86_warn_func_return (tree decl)
30518 {
30519   /* Naked functions are implemented entirely in assembly, including the
30520      return sequence, so suppress warnings about this.  */
30521   return !ix86_function_naked (decl);
30522 }
30523 
30524 /* The following file contains several enumerations and data structures
30525    built from the definitions in i386-builtin-types.def.  */
30526 
30527 #include "i386-builtin-types.inc"
30528 
30529 /* Table for the ix86 builtin non-function types.  */
30530 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
30531 
30532 /* Retrieve an element from the above table, building some of
30533    the types lazily.  */
30534 
30535 static tree
30536 ix86_get_builtin_type (enum ix86_builtin_type tcode)
30537 {
30538   unsigned int index;
30539   tree type, itype;
30540 
30541   gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
30542 
30543   type = ix86_builtin_type_tab[(int) tcode];
30544   if (type != NULL)
30545     return type;
30546 
30547   gcc_assert (tcode > IX86_BT_LAST_PRIM);
30548   if (tcode <= IX86_BT_LAST_VECT)
30549     {
30550       machine_mode mode;
30551 
30552       index = tcode - IX86_BT_LAST_PRIM - 1;
30553       itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
30554       mode = ix86_builtin_type_vect_mode[index];
30555 
30556       type = build_vector_type_for_mode (itype, mode);
30557     }
30558   else
30559     {
30560       int quals;
30561 
30562       index = tcode - IX86_BT_LAST_VECT - 1;
30563       if (tcode <= IX86_BT_LAST_PTR)
30564 	quals = TYPE_UNQUALIFIED;
30565       else
30566 	quals = TYPE_QUAL_CONST;
30567 
30568       itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
30569       if (quals != TYPE_UNQUALIFIED)
30570 	itype = build_qualified_type (itype, quals);
30571 
30572       type = build_pointer_type (itype);
30573     }
30574 
30575   ix86_builtin_type_tab[(int) tcode] = type;
30576   return type;
30577 }
30578 
30579 /* Table for the ix86 builtin function types.  */
30580 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
30581 
30582 /* Retrieve an element from the above table, building some of
30583    the types lazily.  */
30584 
30585 static tree
30586 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
30587 {
30588   tree type;
30589 
30590   gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
30591 
30592   type = ix86_builtin_func_type_tab[(int) tcode];
30593   if (type != NULL)
30594     return type;
30595 
30596   if (tcode <= IX86_BT_LAST_FUNC)
30597     {
30598       unsigned start = ix86_builtin_func_start[(int) tcode];
30599       unsigned after = ix86_builtin_func_start[(int) tcode + 1];
30600       tree rtype, atype, args = void_list_node;
30601       unsigned i;
30602 
30603       rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
30604       for (i = after - 1; i > start; --i)
30605 	{
30606 	  atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
30607 	  args = tree_cons (NULL, atype, args);
30608 	}
30609 
30610       type = build_function_type (rtype, args);
30611     }
30612   else
30613     {
30614       unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
30615       enum ix86_builtin_func_type icode;
30616 
30617       icode = ix86_builtin_func_alias_base[index];
30618       type = ix86_get_builtin_func_type (icode);
30619     }
30620 
30621   ix86_builtin_func_type_tab[(int) tcode] = type;
30622   return type;
30623 }
30624 
30625 
30626 /* Codes for all the SSE/MMX builtins.  Builtins not mentioned in any
30627    bdesc_* arrays below should come first, then builtins for each bdesc_*
30628    array in ascending order, so that we can use direct array accesses.  */
30629 enum ix86_builtins
30630 {
30631   IX86_BUILTIN_MASKMOVQ,
30632   IX86_BUILTIN_LDMXCSR,
30633   IX86_BUILTIN_STMXCSR,
30634   IX86_BUILTIN_MASKMOVDQU,
30635   IX86_BUILTIN_PSLLDQ128,
30636   IX86_BUILTIN_CLFLUSH,
30637   IX86_BUILTIN_MONITOR,
30638   IX86_BUILTIN_MWAIT,
30639   IX86_BUILTIN_CLZERO,
30640   IX86_BUILTIN_VEC_INIT_V2SI,
30641   IX86_BUILTIN_VEC_INIT_V4HI,
30642   IX86_BUILTIN_VEC_INIT_V8QI,
30643   IX86_BUILTIN_VEC_EXT_V2DF,
30644   IX86_BUILTIN_VEC_EXT_V2DI,
30645   IX86_BUILTIN_VEC_EXT_V4SF,
30646   IX86_BUILTIN_VEC_EXT_V4SI,
30647   IX86_BUILTIN_VEC_EXT_V8HI,
30648   IX86_BUILTIN_VEC_EXT_V2SI,
30649   IX86_BUILTIN_VEC_EXT_V4HI,
30650   IX86_BUILTIN_VEC_EXT_V16QI,
30651   IX86_BUILTIN_VEC_SET_V2DI,
30652   IX86_BUILTIN_VEC_SET_V4SF,
30653   IX86_BUILTIN_VEC_SET_V4SI,
30654   IX86_BUILTIN_VEC_SET_V8HI,
30655   IX86_BUILTIN_VEC_SET_V4HI,
30656   IX86_BUILTIN_VEC_SET_V16QI,
30657   IX86_BUILTIN_GATHERSIV2DF,
30658   IX86_BUILTIN_GATHERSIV4DF,
30659   IX86_BUILTIN_GATHERDIV2DF,
30660   IX86_BUILTIN_GATHERDIV4DF,
30661   IX86_BUILTIN_GATHERSIV4SF,
30662   IX86_BUILTIN_GATHERSIV8SF,
30663   IX86_BUILTIN_GATHERDIV4SF,
30664   IX86_BUILTIN_GATHERDIV8SF,
30665   IX86_BUILTIN_GATHERSIV2DI,
30666   IX86_BUILTIN_GATHERSIV4DI,
30667   IX86_BUILTIN_GATHERDIV2DI,
30668   IX86_BUILTIN_GATHERDIV4DI,
30669   IX86_BUILTIN_GATHERSIV4SI,
30670   IX86_BUILTIN_GATHERSIV8SI,
30671   IX86_BUILTIN_GATHERDIV4SI,
30672   IX86_BUILTIN_GATHERDIV8SI,
30673   IX86_BUILTIN_VFMSUBSD3_MASK3,
30674   IX86_BUILTIN_VFMSUBSS3_MASK3,
30675   IX86_BUILTIN_GATHER3SIV8SF,
30676   IX86_BUILTIN_GATHER3SIV4SF,
30677   IX86_BUILTIN_GATHER3SIV4DF,
30678   IX86_BUILTIN_GATHER3SIV2DF,
30679   IX86_BUILTIN_GATHER3DIV8SF,
30680   IX86_BUILTIN_GATHER3DIV4SF,
30681   IX86_BUILTIN_GATHER3DIV4DF,
30682   IX86_BUILTIN_GATHER3DIV2DF,
30683   IX86_BUILTIN_GATHER3SIV8SI,
30684   IX86_BUILTIN_GATHER3SIV4SI,
30685   IX86_BUILTIN_GATHER3SIV4DI,
30686   IX86_BUILTIN_GATHER3SIV2DI,
30687   IX86_BUILTIN_GATHER3DIV8SI,
30688   IX86_BUILTIN_GATHER3DIV4SI,
30689   IX86_BUILTIN_GATHER3DIV4DI,
30690   IX86_BUILTIN_GATHER3DIV2DI,
30691   IX86_BUILTIN_SCATTERSIV8SF,
30692   IX86_BUILTIN_SCATTERSIV4SF,
30693   IX86_BUILTIN_SCATTERSIV4DF,
30694   IX86_BUILTIN_SCATTERSIV2DF,
30695   IX86_BUILTIN_SCATTERDIV8SF,
30696   IX86_BUILTIN_SCATTERDIV4SF,
30697   IX86_BUILTIN_SCATTERDIV4DF,
30698   IX86_BUILTIN_SCATTERDIV2DF,
30699   IX86_BUILTIN_SCATTERSIV8SI,
30700   IX86_BUILTIN_SCATTERSIV4SI,
30701   IX86_BUILTIN_SCATTERSIV4DI,
30702   IX86_BUILTIN_SCATTERSIV2DI,
30703   IX86_BUILTIN_SCATTERDIV8SI,
30704   IX86_BUILTIN_SCATTERDIV4SI,
30705   IX86_BUILTIN_SCATTERDIV4DI,
30706   IX86_BUILTIN_SCATTERDIV2DI,
30707   /* Alternate 4 and 8 element gather/scatter for the vectorizer
30708      where all operands are 32-byte or 64-byte wide respectively.  */
30709   IX86_BUILTIN_GATHERALTSIV4DF,
30710   IX86_BUILTIN_GATHERALTDIV8SF,
30711   IX86_BUILTIN_GATHERALTSIV4DI,
30712   IX86_BUILTIN_GATHERALTDIV8SI,
30713   IX86_BUILTIN_GATHER3ALTDIV16SF,
30714   IX86_BUILTIN_GATHER3ALTDIV16SI,
30715   IX86_BUILTIN_GATHER3ALTSIV4DF,
30716   IX86_BUILTIN_GATHER3ALTDIV8SF,
30717   IX86_BUILTIN_GATHER3ALTSIV4DI,
30718   IX86_BUILTIN_GATHER3ALTDIV8SI,
30719   IX86_BUILTIN_GATHER3ALTSIV8DF,
30720   IX86_BUILTIN_GATHER3ALTSIV8DI,
30721   IX86_BUILTIN_GATHER3DIV16SF,
30722   IX86_BUILTIN_GATHER3DIV16SI,
30723   IX86_BUILTIN_GATHER3DIV8DF,
30724   IX86_BUILTIN_GATHER3DIV8DI,
30725   IX86_BUILTIN_GATHER3SIV16SF,
30726   IX86_BUILTIN_GATHER3SIV16SI,
30727   IX86_BUILTIN_GATHER3SIV8DF,
30728   IX86_BUILTIN_GATHER3SIV8DI,
30729   IX86_BUILTIN_SCATTERALTSIV8DF,
30730   IX86_BUILTIN_SCATTERALTDIV16SF,
30731   IX86_BUILTIN_SCATTERALTSIV8DI,
30732   IX86_BUILTIN_SCATTERALTDIV16SI,
30733   IX86_BUILTIN_SCATTERDIV16SF,
30734   IX86_BUILTIN_SCATTERDIV16SI,
30735   IX86_BUILTIN_SCATTERDIV8DF,
30736   IX86_BUILTIN_SCATTERDIV8DI,
30737   IX86_BUILTIN_SCATTERSIV16SF,
30738   IX86_BUILTIN_SCATTERSIV16SI,
30739   IX86_BUILTIN_SCATTERSIV8DF,
30740   IX86_BUILTIN_SCATTERSIV8DI,
30741   IX86_BUILTIN_GATHERPFQPD,
30742   IX86_BUILTIN_GATHERPFDPS,
30743   IX86_BUILTIN_GATHERPFDPD,
30744   IX86_BUILTIN_GATHERPFQPS,
30745   IX86_BUILTIN_SCATTERPFDPD,
30746   IX86_BUILTIN_SCATTERPFDPS,
30747   IX86_BUILTIN_SCATTERPFQPD,
30748   IX86_BUILTIN_SCATTERPFQPS,
30749   IX86_BUILTIN_CLWB,
30750   IX86_BUILTIN_CLFLUSHOPT,
30751   IX86_BUILTIN_INFQ,
30752   IX86_BUILTIN_HUGE_VALQ,
30753   IX86_BUILTIN_NANQ,
30754   IX86_BUILTIN_NANSQ,
30755   IX86_BUILTIN_XABORT,
30756   IX86_BUILTIN_ADDCARRYX32,
30757   IX86_BUILTIN_ADDCARRYX64,
30758   IX86_BUILTIN_SBB32,
30759   IX86_BUILTIN_SBB64,
30760   IX86_BUILTIN_RDRAND16_STEP,
30761   IX86_BUILTIN_RDRAND32_STEP,
30762   IX86_BUILTIN_RDRAND64_STEP,
30763   IX86_BUILTIN_RDSEED16_STEP,
30764   IX86_BUILTIN_RDSEED32_STEP,
30765   IX86_BUILTIN_RDSEED64_STEP,
30766   IX86_BUILTIN_MONITORX,
30767   IX86_BUILTIN_MWAITX,
30768   IX86_BUILTIN_CFSTRING,
30769   IX86_BUILTIN_CPU_INIT,
30770   IX86_BUILTIN_CPU_IS,
30771   IX86_BUILTIN_CPU_SUPPORTS,
30772   IX86_BUILTIN_READ_FLAGS,
30773   IX86_BUILTIN_WRITE_FLAGS,
30774 
30775   /* All the remaining builtins are tracked in bdesc_* arrays in
30776      i386-builtin.def.  Don't add any IX86_BUILTIN_* enumerators after
30777      this point.  */
30778 #define BDESC(mask, icode, name, code, comparison, flag) \
30779   code,
30780 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
30781   code,									    \
30782   IX86_BUILTIN__BDESC_##kindu##_FIRST = code,
30783 #define BDESC_END(kind, next_kind)
30784 
30785 #include "i386-builtin.def"
30786 
30787 #undef BDESC
30788 #undef BDESC_FIRST
30789 #undef BDESC_END
30790 
30791   IX86_BUILTIN_MAX,
30792 
30793   IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX,
30794 
30795   /* Now just the aliases for bdesc_* start/end.  */
30796 #define BDESC(mask, icode, name, code, comparison, flag)
30797 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag)
30798 #define BDESC_END(kind, next_kind) \
30799   IX86_BUILTIN__BDESC_##kind##_LAST					    \
30800     = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1,
30801 
30802 #include "i386-builtin.def"
30803 
30804 #undef BDESC
30805 #undef BDESC_FIRST
30806 #undef BDESC_END
30807 
30808   /* Just to make sure there is no comma after the last enumerator.  */
30809   IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST
30810 };
30811 
30812 /* Table for the ix86 builtin decls.  */
30813 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30814 
30815 /* Table of all of the builtin functions that are possible with different ISA's
30816    but are waiting to be built until a function is declared to use that
30817    ISA.  */
30818 struct builtin_isa {
30819   HOST_WIDE_INT isa;		/* isa_flags this builtin is defined for */
30820   HOST_WIDE_INT isa2;		/* additional isa_flags this builtin is defined for */
30821   const char *name;		/* function name */
30822   enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30823   unsigned char const_p:1;	/* true if the declaration is constant */
30824   unsigned char pure_p:1;	/* true if the declaration has pure attribute */
30825   bool leaf_p;			/* true if the declaration has leaf attribute */
30826   bool nothrow_p;		/* true if the declaration has nothrow attribute */
30827   bool set_and_not_built_p;
30828 };
30829 
30830 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30831 
30832 /* Bits that can still enable any inclusion of a builtin.  */
30833 static HOST_WIDE_INT deferred_isa_values = 0;
30834 static HOST_WIDE_INT deferred_isa_values2 = 0;
30835 
30836 /* Add an ix86 target builtin function with CODE, NAME and TYPE.  Save the MASK
30837    of which isa_flags to use in the ix86_builtins_isa array.  Stores the
30838    function decl in the ix86_builtins array.  Returns the function decl or
30839    NULL_TREE, if the builtin was not added.
30840 
30841    If the front end has a special hook for builtin functions, delay adding
30842    builtin functions that aren't in the current ISA until the ISA is changed
30843    with function specific optimization.  Doing so, can save about 300K for the
30844    default compiler.  When the builtin is expanded, check at that time whether
30845    it is valid.
30846 
30847    If the front end doesn't have a special hook, record all builtins, even if
30848    it isn't an instruction set in the current ISA in case the user uses
30849    function specific options for a different ISA, so that we don't get scope
30850    errors if a builtin is added in the middle of a function scope.  */
30851 
30852 static inline tree
30853 def_builtin (HOST_WIDE_INT mask, const char *name,
30854 	     enum ix86_builtin_func_type tcode,
30855 	     enum ix86_builtins code)
30856 {
30857   tree decl = NULL_TREE;
30858 
30859   if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30860     {
30861       ix86_builtins_isa[(int) code].isa = mask;
30862 
30863       mask &= ~OPTION_MASK_ISA_64BIT;
30864 
30865       /* Filter out the masks most often ored together with others.  */
30866       if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
30867 	  && mask != OPTION_MASK_ISA_AVX512VL)
30868 	mask &= ~OPTION_MASK_ISA_AVX512VL;
30869       if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512BW)
30870 	  && mask != OPTION_MASK_ISA_AVX512BW)
30871 	mask &= ~OPTION_MASK_ISA_AVX512BW;
30872 
30873       if (mask == 0
30874 	  || (mask & ix86_isa_flags) != 0
30875 	  || (lang_hooks.builtin_function
30876 	      == lang_hooks.builtin_function_ext_scope))
30877 	{
30878 	  tree type = ix86_get_builtin_func_type (tcode);
30879 	  decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30880 				       NULL, NULL_TREE);
30881 	  ix86_builtins[(int) code] = decl;
30882 	  ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30883 	}
30884       else
30885 	{
30886 	  /* Just a MASK where set_and_not_built_p == true can potentially
30887 	     include a builtin.  */
30888 	  deferred_isa_values |= mask;
30889 	  ix86_builtins[(int) code] = NULL_TREE;
30890 	  ix86_builtins_isa[(int) code].tcode = tcode;
30891 	  ix86_builtins_isa[(int) code].name = name;
30892 	  ix86_builtins_isa[(int) code].leaf_p = false;
30893 	  ix86_builtins_isa[(int) code].nothrow_p = false;
30894 	  ix86_builtins_isa[(int) code].const_p = false;
30895 	  ix86_builtins_isa[(int) code].pure_p = false;
30896 	  ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30897 	}
30898     }
30899 
30900   return decl;
30901 }
30902 
30903 /* Like def_builtin, but also marks the function decl "const".  */
30904 
30905 static inline tree
30906 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30907 		   enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30908 {
30909   tree decl = def_builtin (mask, name, tcode, code);
30910   if (decl)
30911     TREE_READONLY (decl) = 1;
30912   else
30913     ix86_builtins_isa[(int) code].const_p = true;
30914 
30915   return decl;
30916 }
30917 
30918 /* Like def_builtin, but also marks the function decl "pure".  */
30919 
30920 static inline tree
30921 def_builtin_pure (HOST_WIDE_INT mask, const char *name,
30922 		  enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30923 {
30924   tree decl = def_builtin (mask, name, tcode, code);
30925   if (decl)
30926     DECL_PURE_P (decl) = 1;
30927   else
30928     ix86_builtins_isa[(int) code].pure_p = true;
30929 
30930   return decl;
30931 }
30932 
30933 /* Like def_builtin, but for additional isa2 flags.  */
30934 
30935 static inline tree
30936 def_builtin2 (HOST_WIDE_INT mask, const char *name,
30937 	      enum ix86_builtin_func_type tcode,
30938 	      enum ix86_builtins code)
30939 {
30940   tree decl = NULL_TREE;
30941 
30942   ix86_builtins_isa[(int) code].isa2 = mask;
30943 
30944   if (mask == 0
30945       || (mask & ix86_isa_flags2) != 0
30946       || (lang_hooks.builtin_function
30947 	  == lang_hooks.builtin_function_ext_scope))
30948 
30949     {
30950       tree type = ix86_get_builtin_func_type (tcode);
30951       decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30952 				   NULL, NULL_TREE);
30953       ix86_builtins[(int) code] = decl;
30954       ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30955     }
30956   else
30957     {
30958       /* Just a MASK where set_and_not_built_p == true can potentially
30959 	 include a builtin.  */
30960       deferred_isa_values2 |= mask;
30961       ix86_builtins[(int) code] = NULL_TREE;
30962       ix86_builtins_isa[(int) code].tcode = tcode;
30963       ix86_builtins_isa[(int) code].name = name;
30964       ix86_builtins_isa[(int) code].leaf_p = false;
30965       ix86_builtins_isa[(int) code].nothrow_p = false;
30966       ix86_builtins_isa[(int) code].const_p = false;
30967       ix86_builtins_isa[(int) code].pure_p = false;
30968       ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30969     }
30970 
30971   return decl;
30972 }
30973 
30974 /* Like def_builtin, but also marks the function decl "const".  */
30975 
30976 static inline tree
30977 def_builtin_const2 (HOST_WIDE_INT mask, const char *name,
30978 		    enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30979 {
30980   tree decl = def_builtin2 (mask, name, tcode, code);
30981   if (decl)
30982     TREE_READONLY (decl) = 1;
30983   else
30984     ix86_builtins_isa[(int) code].const_p = true;
30985 
30986   return decl;
30987 }
30988 
30989 /* Like def_builtin, but also marks the function decl "pure".  */
30990 
30991 static inline tree
30992 def_builtin_pure2 (HOST_WIDE_INT mask, const char *name,
30993 		   enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30994 {
30995   tree decl = def_builtin2 (mask, name, tcode, code);
30996   if (decl)
30997     DECL_PURE_P (decl) = 1;
30998   else
30999     ix86_builtins_isa[(int) code].pure_p = true;
31000 
31001   return decl;
31002 }
31003 
31004 /* Add any new builtin functions for a given ISA that may not have been
31005    declared.  This saves a bit of space compared to adding all of the
31006    declarations to the tree, even if we didn't use them.  */
31007 
31008 static void
31009 ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2)
31010 {
31011   isa &= ~OPTION_MASK_ISA_64BIT;
31012 
31013   if ((isa & deferred_isa_values) == 0
31014       && (isa2 & deferred_isa_values2) == 0)
31015     return;
31016 
31017   /* Bits in ISA value can be removed from potential isa values.  */
31018   deferred_isa_values &= ~isa;
31019   deferred_isa_values2 &= ~isa2;
31020 
31021   int i;
31022   tree saved_current_target_pragma = current_target_pragma;
31023   current_target_pragma = NULL_TREE;
31024 
31025   for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
31026     {
31027       if (((ix86_builtins_isa[i].isa & isa) != 0
31028 	   || (ix86_builtins_isa[i].isa2 & isa2) != 0)
31029 	  && ix86_builtins_isa[i].set_and_not_built_p)
31030 	{
31031 	  tree decl, type;
31032 
31033 	  /* Don't define the builtin again.  */
31034 	  ix86_builtins_isa[i].set_and_not_built_p = false;
31035 
31036 	  type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
31037 	  decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
31038 						 type, i, BUILT_IN_MD, NULL,
31039 						 NULL_TREE);
31040 
31041 	  ix86_builtins[i] = decl;
31042 	  if (ix86_builtins_isa[i].const_p)
31043 	    TREE_READONLY (decl) = 1;
31044 	  if (ix86_builtins_isa[i].pure_p)
31045 	    DECL_PURE_P (decl) = 1;
31046 	  if (ix86_builtins_isa[i].leaf_p)
31047 	    DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
31048 						      NULL_TREE);
31049 	  if (ix86_builtins_isa[i].nothrow_p)
31050 	    TREE_NOTHROW (decl) = 1;
31051 	}
31052     }
31053 
31054   current_target_pragma = saved_current_target_pragma;
31055 }
31056 
31057 /* Bits for builtin_description.flag.  */
31058 
31059 /* Set when we don't support the comparison natively, and should
31060    swap_comparison in order to support it.  */
31061 #define BUILTIN_DESC_SWAP_OPERANDS	1
31062 
31063 struct builtin_description
31064 {
31065   const HOST_WIDE_INT mask;
31066   const enum insn_code icode;
31067   const char *const name;
31068   const enum ix86_builtins code;
31069   const enum rtx_code comparison;
31070   const int flag;
31071 };
31072 
31073 #define MULTI_ARG_4_DF2_DI_I	V2DF_FTYPE_V2DF_V2DF_V2DI_INT
31074 #define MULTI_ARG_4_DF2_DI_I1	V4DF_FTYPE_V4DF_V4DF_V4DI_INT
31075 #define MULTI_ARG_4_SF2_SI_I	V4SF_FTYPE_V4SF_V4SF_V4SI_INT
31076 #define MULTI_ARG_4_SF2_SI_I1	V8SF_FTYPE_V8SF_V8SF_V8SI_INT
31077 #define MULTI_ARG_3_SF		V4SF_FTYPE_V4SF_V4SF_V4SF
31078 #define MULTI_ARG_3_DF		V2DF_FTYPE_V2DF_V2DF_V2DF
31079 #define MULTI_ARG_3_SF2		V8SF_FTYPE_V8SF_V8SF_V8SF
31080 #define MULTI_ARG_3_DF2		V4DF_FTYPE_V4DF_V4DF_V4DF
31081 #define MULTI_ARG_3_DI		V2DI_FTYPE_V2DI_V2DI_V2DI
31082 #define MULTI_ARG_3_SI		V4SI_FTYPE_V4SI_V4SI_V4SI
31083 #define MULTI_ARG_3_SI_DI	V4SI_FTYPE_V4SI_V4SI_V2DI
31084 #define MULTI_ARG_3_HI		V8HI_FTYPE_V8HI_V8HI_V8HI
31085 #define MULTI_ARG_3_HI_SI	V8HI_FTYPE_V8HI_V8HI_V4SI
31086 #define MULTI_ARG_3_QI		V16QI_FTYPE_V16QI_V16QI_V16QI
31087 #define MULTI_ARG_3_DI2		V4DI_FTYPE_V4DI_V4DI_V4DI
31088 #define MULTI_ARG_3_SI2		V8SI_FTYPE_V8SI_V8SI_V8SI
31089 #define MULTI_ARG_3_HI2		V16HI_FTYPE_V16HI_V16HI_V16HI
31090 #define MULTI_ARG_3_QI2		V32QI_FTYPE_V32QI_V32QI_V32QI
31091 #define MULTI_ARG_2_SF		V4SF_FTYPE_V4SF_V4SF
31092 #define MULTI_ARG_2_DF		V2DF_FTYPE_V2DF_V2DF
31093 #define MULTI_ARG_2_DI		V2DI_FTYPE_V2DI_V2DI
31094 #define MULTI_ARG_2_SI		V4SI_FTYPE_V4SI_V4SI
31095 #define MULTI_ARG_2_HI		V8HI_FTYPE_V8HI_V8HI
31096 #define MULTI_ARG_2_QI		V16QI_FTYPE_V16QI_V16QI
31097 #define MULTI_ARG_2_DI_IMM	V2DI_FTYPE_V2DI_SI
31098 #define MULTI_ARG_2_SI_IMM	V4SI_FTYPE_V4SI_SI
31099 #define MULTI_ARG_2_HI_IMM	V8HI_FTYPE_V8HI_SI
31100 #define MULTI_ARG_2_QI_IMM	V16QI_FTYPE_V16QI_SI
31101 #define MULTI_ARG_2_DI_CMP	V2DI_FTYPE_V2DI_V2DI_CMP
31102 #define MULTI_ARG_2_SI_CMP	V4SI_FTYPE_V4SI_V4SI_CMP
31103 #define MULTI_ARG_2_HI_CMP	V8HI_FTYPE_V8HI_V8HI_CMP
31104 #define MULTI_ARG_2_QI_CMP	V16QI_FTYPE_V16QI_V16QI_CMP
31105 #define MULTI_ARG_2_SF_TF	V4SF_FTYPE_V4SF_V4SF_TF
31106 #define MULTI_ARG_2_DF_TF	V2DF_FTYPE_V2DF_V2DF_TF
31107 #define MULTI_ARG_2_DI_TF	V2DI_FTYPE_V2DI_V2DI_TF
31108 #define MULTI_ARG_2_SI_TF	V4SI_FTYPE_V4SI_V4SI_TF
31109 #define MULTI_ARG_2_HI_TF	V8HI_FTYPE_V8HI_V8HI_TF
31110 #define MULTI_ARG_2_QI_TF	V16QI_FTYPE_V16QI_V16QI_TF
31111 #define MULTI_ARG_1_SF		V4SF_FTYPE_V4SF
31112 #define MULTI_ARG_1_DF		V2DF_FTYPE_V2DF
31113 #define MULTI_ARG_1_SF2		V8SF_FTYPE_V8SF
31114 #define MULTI_ARG_1_DF2		V4DF_FTYPE_V4DF
31115 #define MULTI_ARG_1_DI		V2DI_FTYPE_V2DI
31116 #define MULTI_ARG_1_SI		V4SI_FTYPE_V4SI
31117 #define MULTI_ARG_1_HI		V8HI_FTYPE_V8HI
31118 #define MULTI_ARG_1_QI		V16QI_FTYPE_V16QI
31119 #define MULTI_ARG_1_SI_DI	V2DI_FTYPE_V4SI
31120 #define MULTI_ARG_1_HI_DI	V2DI_FTYPE_V8HI
31121 #define MULTI_ARG_1_HI_SI	V4SI_FTYPE_V8HI
31122 #define MULTI_ARG_1_QI_DI	V2DI_FTYPE_V16QI
31123 #define MULTI_ARG_1_QI_SI	V4SI_FTYPE_V16QI
31124 #define MULTI_ARG_1_QI_HI	V8HI_FTYPE_V16QI
31125 
31126 #define BDESC(mask, icode, name, code, comparison, flag) \
31127   { mask, icode, name, code, comparison, flag },
31128 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
31129 static const struct builtin_description bdesc_##kind[] =		    \
31130 {									    \
31131   BDESC (mask, icode, name, code, comparison, flag)
31132 #define BDESC_END(kind, next_kind) \
31133 };
31134 
31135 #include "i386-builtin.def"
31136 
31137 #undef BDESC
31138 #undef BDESC_FIRST
31139 #undef BDESC_END
31140 
31141 /* TM vector builtins.  */
31142 
31143 /* Reuse the existing x86-specific `struct builtin_description' cause
31144    we're lazy.  Add casts to make them fit.  */
31145 static const struct builtin_description bdesc_tm[] =
31146 {
31147   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
31148   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
31149   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
31150   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31151   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31152   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31153   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
31154 
31155   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
31156   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
31157   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
31158   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31159   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31160   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31161   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
31162 
31163   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
31164   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
31165   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
31166   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31167   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31168   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31169   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
31170 
31171   { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
31172   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
31173   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
31174 };
31175 
31176 /* Initialize the transactional memory vector load/store builtins.  */
31177 
31178 static void
31179 ix86_init_tm_builtins (void)
31180 {
31181   enum ix86_builtin_func_type ftype;
31182   const struct builtin_description *d;
31183   size_t i;
31184   tree decl;
31185   tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
31186   tree attrs_log, attrs_type_log;
31187 
31188   if (!flag_tm)
31189     return;
31190 
31191   /* If there are no builtins defined, we must be compiling in a
31192      language without trans-mem support.  */
31193   if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
31194     return;
31195 
31196   /* Use whatever attributes a normal TM load has.  */
31197   decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
31198   attrs_load = DECL_ATTRIBUTES (decl);
31199   attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31200   /* Use whatever attributes a normal TM store has.  */
31201   decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
31202   attrs_store = DECL_ATTRIBUTES (decl);
31203   attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31204   /* Use whatever attributes a normal TM log has.  */
31205   decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
31206   attrs_log = DECL_ATTRIBUTES (decl);
31207   attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31208 
31209   for (i = 0, d = bdesc_tm;
31210        i < ARRAY_SIZE (bdesc_tm);
31211        i++, d++)
31212     {
31213       if ((d->mask & ix86_isa_flags) != 0
31214 	  || (lang_hooks.builtin_function
31215 	      == lang_hooks.builtin_function_ext_scope))
31216 	{
31217 	  tree type, attrs, attrs_type;
31218 	  enum built_in_function code = (enum built_in_function) d->code;
31219 
31220 	  ftype = (enum ix86_builtin_func_type) d->flag;
31221 	  type = ix86_get_builtin_func_type (ftype);
31222 
31223 	  if (BUILTIN_TM_LOAD_P (code))
31224 	    {
31225 	      attrs = attrs_load;
31226 	      attrs_type = attrs_type_load;
31227 	    }
31228 	  else if (BUILTIN_TM_STORE_P (code))
31229 	    {
31230 	      attrs = attrs_store;
31231 	      attrs_type = attrs_type_store;
31232 	    }
31233 	  else
31234 	    {
31235 	      attrs = attrs_log;
31236 	      attrs_type = attrs_type_log;
31237 	    }
31238 	  decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
31239 				       /* The builtin without the prefix for
31240 					  calling it directly.  */
31241 				       d->name + strlen ("__builtin_"),
31242 				       attrs);
31243 	  /* add_builtin_function() will set the DECL_ATTRIBUTES, now
31244 	     set the TYPE_ATTRIBUTES.  */
31245 	  decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
31246 
31247 	  set_builtin_decl (code, decl, false);
31248 	}
31249     }
31250 }
31251 
31252 /* Macros for verification of enum ix86_builtins order.  */
31253 #define BDESC_VERIFY(x, y, z) \
31254   gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
31255 #define BDESC_VERIFYS(x, y, z) \
31256   STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
31257 
31258 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
31259 	       IX86_BUILTIN__BDESC_COMI_LAST, 1);
31260 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
31261 	       IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
31262 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
31263 	       IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
31264 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
31265 	       IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
31266 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
31267 	       IX86_BUILTIN__BDESC_ARGS_LAST, 1);
31268 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST,
31269 	       IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1);
31270 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
31271 	       IX86_BUILTIN__BDESC_ARGS2_LAST, 1);
31272 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_FIRST,
31273 	       IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST, 1);
31274 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
31275 	       IX86_BUILTIN__BDESC_MPX_LAST, 1);
31276 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
31277 	       IX86_BUILTIN__BDESC_MPX_CONST_LAST, 1);
31278 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST,
31279 	       IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1);
31280 BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
31281 	       IX86_BUILTIN__BDESC_CET_LAST, 1);
31282 BDESC_VERIFYS (IX86_BUILTIN_MAX,
31283 	       IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1);
31284 
31285 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
31286    in the current target ISA to allow the user to compile particular modules
31287    with different target specific options that differ from the command line
31288    options.  */
31289 static void
31290 ix86_init_mmx_sse_builtins (void)
31291 {
31292   const struct builtin_description * d;
31293   enum ix86_builtin_func_type ftype;
31294   size_t i;
31295 
31296   /* Add all special builtins with variable number of operands.  */
31297   for (i = 0, d = bdesc_special_args;
31298        i < ARRAY_SIZE (bdesc_special_args);
31299        i++, d++)
31300     {
31301       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i);
31302       if (d->name == 0)
31303 	continue;
31304 
31305       ftype = (enum ix86_builtin_func_type) d->flag;
31306       def_builtin (d->mask, d->name, ftype, d->code);
31307     }
31308   BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST,
31309 		 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
31310 		 ARRAY_SIZE (bdesc_special_args) - 1);
31311 
31312   /* Add all special builtins with variable number of operands.  */
31313   for (i = 0, d = bdesc_special_args2;
31314        i < ARRAY_SIZE (bdesc_special_args2);
31315        i++, d++)
31316     {
31317       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST, i);
31318       if (d->name == 0)
31319 	continue;
31320 
31321       ftype = (enum ix86_builtin_func_type) d->flag;
31322       def_builtin2 (d->mask, d->name, ftype, d->code);
31323     }
31324   BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST,
31325 		 IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST,
31326 		 ARRAY_SIZE (bdesc_special_args2) - 1);
31327 
31328   /* Add all builtins with variable number of operands.  */
31329   for (i = 0, d = bdesc_args;
31330        i < ARRAY_SIZE (bdesc_args);
31331        i++, d++)
31332     {
31333       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i);
31334       if (d->name == 0)
31335 	continue;
31336 
31337       ftype = (enum ix86_builtin_func_type) d->flag;
31338       def_builtin_const (d->mask, d->name, ftype, d->code);
31339     }
31340   BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST,
31341 		 IX86_BUILTIN__BDESC_ARGS_FIRST,
31342 		 ARRAY_SIZE (bdesc_args) - 1);
31343 
31344   /* Add all builtins with variable number of operands.  */
31345   for (i = 0, d = bdesc_args2;
31346        i < ARRAY_SIZE (bdesc_args2);
31347        i++, d++)
31348     {
31349       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS2_FIRST, i);
31350       if (d->name == 0)
31351 	continue;
31352 
31353       ftype = (enum ix86_builtin_func_type) d->flag;
31354       def_builtin_const2 (d->mask, d->name, ftype, d->code);
31355     }
31356   BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_LAST,
31357 		 IX86_BUILTIN__BDESC_ARGS2_FIRST,
31358 		 ARRAY_SIZE (bdesc_args2) - 1);
31359 
31360   /* Add all builtins with rounding.  */
31361   for (i = 0, d = bdesc_round_args;
31362        i < ARRAY_SIZE (bdesc_round_args);
31363        i++, d++)
31364     {
31365       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i);
31366       if (d->name == 0)
31367 	continue;
31368 
31369       ftype = (enum ix86_builtin_func_type) d->flag;
31370       def_builtin_const (d->mask, d->name, ftype, d->code);
31371     }
31372   BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST,
31373 		 IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
31374 		 ARRAY_SIZE (bdesc_round_args) - 1);
31375 
31376   /* pcmpestr[im] insns.  */
31377   for (i = 0, d = bdesc_pcmpestr;
31378        i < ARRAY_SIZE (bdesc_pcmpestr);
31379        i++, d++)
31380     {
31381       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i);
31382       if (d->code == IX86_BUILTIN_PCMPESTRM128)
31383 	ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
31384       else
31385 	ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
31386       def_builtin_const (d->mask, d->name, ftype, d->code);
31387     }
31388   BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST,
31389 		 IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
31390 		 ARRAY_SIZE (bdesc_pcmpestr) - 1);
31391 
31392   /* pcmpistr[im] insns.  */
31393   for (i = 0, d = bdesc_pcmpistr;
31394        i < ARRAY_SIZE (bdesc_pcmpistr);
31395        i++, d++)
31396     {
31397       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i);
31398       if (d->code == IX86_BUILTIN_PCMPISTRM128)
31399 	ftype = V16QI_FTYPE_V16QI_V16QI_INT;
31400       else
31401 	ftype = INT_FTYPE_V16QI_V16QI_INT;
31402       def_builtin_const (d->mask, d->name, ftype, d->code);
31403     }
31404   BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST,
31405 		 IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
31406 		 ARRAY_SIZE (bdesc_pcmpistr) - 1);
31407 
31408   /* comi/ucomi insns.  */
31409   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
31410     {
31411       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i);
31412       if (d->mask == OPTION_MASK_ISA_SSE2)
31413 	ftype = INT_FTYPE_V2DF_V2DF;
31414       else
31415 	ftype = INT_FTYPE_V4SF_V4SF;
31416       def_builtin_const (d->mask, d->name, ftype, d->code);
31417     }
31418   BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST,
31419 		 IX86_BUILTIN__BDESC_COMI_FIRST,
31420 		 ARRAY_SIZE (bdesc_comi) - 1);
31421 
31422   /* SSE */
31423   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
31424 	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
31425   def_builtin_pure (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
31426 		    UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
31427 
31428   /* SSE or 3DNow!A */
31429   def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31430 	       /* As it uses V4HImode, we have to require -mmmx too.  */
31431 	       | OPTION_MASK_ISA_MMX,
31432 	       "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
31433 	       IX86_BUILTIN_MASKMOVQ);
31434 
31435   /* SSE2 */
31436   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
31437 	       VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
31438 
31439   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
31440 	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
31441   x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
31442 			    VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
31443 
31444   /* SSE3.  */
31445   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
31446 	       VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
31447   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
31448 	       VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
31449 
31450   /* AES */
31451   def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31452 		     "__builtin_ia32_aesenc128",
31453 		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
31454   def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31455 		     "__builtin_ia32_aesenclast128",
31456 		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
31457   def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31458 		     "__builtin_ia32_aesdec128",
31459 		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
31460   def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31461 		     "__builtin_ia32_aesdeclast128",
31462 		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
31463   def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31464 		     "__builtin_ia32_aesimc128",
31465 		     V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
31466   def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
31467 		     "__builtin_ia32_aeskeygenassist128",
31468 		     V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
31469 
31470   /* PCLMUL */
31471   def_builtin_const (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2,
31472 		     "__builtin_ia32_pclmulqdq128",
31473 		     V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
31474 
31475   /* RDRND */
31476   def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
31477 	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
31478   def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
31479 	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
31480   def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
31481 	       "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
31482 	       IX86_BUILTIN_RDRAND64_STEP);
31483 
31484   /* AVX2 */
31485   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
31486 		    V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
31487 		    IX86_BUILTIN_GATHERSIV2DF);
31488 
31489   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
31490 		    V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
31491 		    IX86_BUILTIN_GATHERSIV4DF);
31492 
31493   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
31494 		    V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
31495 		    IX86_BUILTIN_GATHERDIV2DF);
31496 
31497   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
31498 		    V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
31499 		    IX86_BUILTIN_GATHERDIV4DF);
31500 
31501   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
31502 		    V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
31503 		    IX86_BUILTIN_GATHERSIV4SF);
31504 
31505   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
31506 		    V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
31507 		    IX86_BUILTIN_GATHERSIV8SF);
31508 
31509   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
31510 		    V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
31511 		    IX86_BUILTIN_GATHERDIV4SF);
31512 
31513   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
31514 		    V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
31515 		    IX86_BUILTIN_GATHERDIV8SF);
31516 
31517   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
31518 		    V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
31519 		    IX86_BUILTIN_GATHERSIV2DI);
31520 
31521   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31522 		    V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31523 		    IX86_BUILTIN_GATHERSIV4DI);
31524 
31525   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31526 		    V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31527 		    IX86_BUILTIN_GATHERDIV2DI);
31528 
31529   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31530 		    V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31531 		    IX86_BUILTIN_GATHERDIV4DI);
31532 
31533   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31534 		    V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31535 		    IX86_BUILTIN_GATHERSIV4SI);
31536 
31537   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31538 		    V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31539 		    IX86_BUILTIN_GATHERSIV8SI);
31540 
31541   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31542 		    V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31543 		    IX86_BUILTIN_GATHERDIV4SI);
31544 
31545   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31546 		    V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31547 		    IX86_BUILTIN_GATHERDIV8SI);
31548 
31549   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31550 		    V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31551 		    IX86_BUILTIN_GATHERALTSIV4DF);
31552 
31553   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31554 		    V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31555 		    IX86_BUILTIN_GATHERALTDIV8SF);
31556 
31557   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31558 		    V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31559 		    IX86_BUILTIN_GATHERALTSIV4DI);
31560 
31561   def_builtin_pure (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31562 		    V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31563 		    IX86_BUILTIN_GATHERALTDIV8SI);
31564 
31565   /* AVX512F */
31566   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31567 		    V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
31568 		    IX86_BUILTIN_GATHER3SIV16SF);
31569 
31570   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31571 		    V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
31572 		    IX86_BUILTIN_GATHER3SIV8DF);
31573 
31574   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31575 		    V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
31576 		    IX86_BUILTIN_GATHER3DIV16SF);
31577 
31578   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31579 		    V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
31580 		    IX86_BUILTIN_GATHER3DIV8DF);
31581 
31582   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31583 		    V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
31584 		    IX86_BUILTIN_GATHER3SIV16SI);
31585 
31586   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31587 		    V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
31588 		    IX86_BUILTIN_GATHER3SIV8DI);
31589 
31590   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31591 		    V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
31592 		    IX86_BUILTIN_GATHER3DIV16SI);
31593 
31594   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31595 		    V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
31596 		    IX86_BUILTIN_GATHER3DIV8DI);
31597 
31598   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31599 		    V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31600 		    IX86_BUILTIN_GATHER3ALTSIV8DF);
31601 
31602   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31603 		    V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31604 		    IX86_BUILTIN_GATHER3ALTDIV16SF);
31605 
31606   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31607 		    V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31608 		    IX86_BUILTIN_GATHER3ALTSIV8DI);
31609 
31610   def_builtin_pure (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31611 		    V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31612 		    IX86_BUILTIN_GATHER3ALTDIV16SI);
31613 
31614   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31615 	       VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
31616 	       IX86_BUILTIN_SCATTERSIV16SF);
31617 
31618   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31619 	       VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
31620 	       IX86_BUILTIN_SCATTERSIV8DF);
31621 
31622   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31623 	       VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
31624 	       IX86_BUILTIN_SCATTERDIV16SF);
31625 
31626   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31627 	       VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
31628 	       IX86_BUILTIN_SCATTERDIV8DF);
31629 
31630   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31631 	       VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
31632 	       IX86_BUILTIN_SCATTERSIV16SI);
31633 
31634   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31635 	       VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
31636 	       IX86_BUILTIN_SCATTERSIV8DI);
31637 
31638   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31639 	       VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
31640 	       IX86_BUILTIN_SCATTERDIV16SI);
31641 
31642   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31643 	       VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
31644 	       IX86_BUILTIN_SCATTERDIV8DI);
31645 
31646   /* AVX512VL */
31647   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
31648 		    V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT,
31649 		    IX86_BUILTIN_GATHER3SIV2DF);
31650 
31651   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
31652 		    V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT,
31653 		    IX86_BUILTIN_GATHER3SIV4DF);
31654 
31655   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
31656 		    V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT,
31657 		    IX86_BUILTIN_GATHER3DIV2DF);
31658 
31659   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
31660 		    V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT,
31661 		    IX86_BUILTIN_GATHER3DIV4DF);
31662 
31663   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
31664 		    V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT,
31665 		    IX86_BUILTIN_GATHER3SIV4SF);
31666 
31667   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
31668 		    V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT,
31669 		    IX86_BUILTIN_GATHER3SIV8SF);
31670 
31671   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
31672 		    V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT,
31673 		    IX86_BUILTIN_GATHER3DIV4SF);
31674 
31675   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
31676 		    V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT,
31677 		    IX86_BUILTIN_GATHER3DIV8SF);
31678 
31679   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
31680 		    V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT,
31681 		    IX86_BUILTIN_GATHER3SIV2DI);
31682 
31683   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
31684 		    V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT,
31685 		    IX86_BUILTIN_GATHER3SIV4DI);
31686 
31687   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
31688 		    V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT,
31689 		    IX86_BUILTIN_GATHER3DIV2DI);
31690 
31691   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
31692 		    V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT,
31693 		    IX86_BUILTIN_GATHER3DIV4DI);
31694 
31695   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
31696 		    V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT,
31697 		    IX86_BUILTIN_GATHER3SIV4SI);
31698 
31699   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
31700 		    V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT,
31701 		    IX86_BUILTIN_GATHER3SIV8SI);
31702 
31703   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
31704 		    V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT,
31705 		    IX86_BUILTIN_GATHER3DIV4SI);
31706 
31707   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
31708 		    V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT,
31709 		    IX86_BUILTIN_GATHER3DIV8SI);
31710 
31711   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
31712 		    V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
31713 		    IX86_BUILTIN_GATHER3ALTSIV4DF);
31714 
31715   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
31716 		    V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
31717 		    IX86_BUILTIN_GATHER3ALTDIV8SF);
31718 
31719   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
31720 		    V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
31721 		    IX86_BUILTIN_GATHER3ALTSIV4DI);
31722 
31723   def_builtin_pure (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
31724 		    V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
31725 		    IX86_BUILTIN_GATHER3ALTDIV8SI);
31726 
31727   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
31728 	       VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT,
31729 	       IX86_BUILTIN_SCATTERSIV8SF);
31730 
31731   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
31732 	       VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT,
31733 	       IX86_BUILTIN_SCATTERSIV4SF);
31734 
31735   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
31736 	       VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT,
31737 	       IX86_BUILTIN_SCATTERSIV4DF);
31738 
31739   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
31740 	       VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT,
31741 	       IX86_BUILTIN_SCATTERSIV2DF);
31742 
31743   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
31744 	       VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT,
31745 	       IX86_BUILTIN_SCATTERDIV8SF);
31746 
31747   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
31748 	       VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT,
31749 	       IX86_BUILTIN_SCATTERDIV4SF);
31750 
31751   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
31752 	       VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT,
31753 	       IX86_BUILTIN_SCATTERDIV4DF);
31754 
31755   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
31756 	       VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT,
31757 	       IX86_BUILTIN_SCATTERDIV2DF);
31758 
31759   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
31760 	       VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT,
31761 	       IX86_BUILTIN_SCATTERSIV8SI);
31762 
31763   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
31764 	       VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT,
31765 	       IX86_BUILTIN_SCATTERSIV4SI);
31766 
31767   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
31768 	       VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT,
31769 	       IX86_BUILTIN_SCATTERSIV4DI);
31770 
31771   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
31772 	       VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT,
31773 	       IX86_BUILTIN_SCATTERSIV2DI);
31774 
31775   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
31776 	       VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT,
31777 	       IX86_BUILTIN_SCATTERDIV8SI);
31778 
31779   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
31780 	       VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT,
31781 	       IX86_BUILTIN_SCATTERDIV4SI);
31782 
31783   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
31784 	       VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT,
31785 	       IX86_BUILTIN_SCATTERDIV4DI);
31786 
31787   def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
31788 	       VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
31789 	       IX86_BUILTIN_SCATTERDIV2DI);
31790   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
31791 	       VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
31792 	       IX86_BUILTIN_SCATTERALTSIV8DF);
31793 
31794   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
31795 	       VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
31796 	       IX86_BUILTIN_SCATTERALTDIV16SF);
31797 
31798   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
31799 	       VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
31800 	       IX86_BUILTIN_SCATTERALTSIV8DI);
31801 
31802   def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
31803 	       VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
31804 	       IX86_BUILTIN_SCATTERALTDIV16SI);
31805 
31806   /* AVX512PF */
31807   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31808 	       VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
31809 	       IX86_BUILTIN_GATHERPFDPD);
31810   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31811 	       VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
31812 	       IX86_BUILTIN_GATHERPFDPS);
31813   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31814 	       VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31815 	       IX86_BUILTIN_GATHERPFQPD);
31816   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31817 	       VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31818 	       IX86_BUILTIN_GATHERPFQPS);
31819   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31820 	       VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
31821 	       IX86_BUILTIN_SCATTERPFDPD);
31822   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31823 	       VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
31824 	       IX86_BUILTIN_SCATTERPFDPS);
31825   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31826 	       VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31827 	       IX86_BUILTIN_SCATTERPFQPD);
31828   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31829 	       VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
31830 	       IX86_BUILTIN_SCATTERPFQPS);
31831 
31832   /* SHA */
31833   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31834 		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31835   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31836 		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31837   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31838 		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31839   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31840 		     V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31841   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31842 		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31843   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31844 		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31845   def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31846 		     V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31847 
31848   /* RTM.  */
31849   def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31850 	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31851 
31852   /* MMX access to the vec_init patterns.  */
31853   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31854 		     V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31855 
31856   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31857 		     V4HI_FTYPE_HI_HI_HI_HI,
31858 		     IX86_BUILTIN_VEC_INIT_V4HI);
31859 
31860   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31861 		     V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31862 		     IX86_BUILTIN_VEC_INIT_V8QI);
31863 
31864   /* Access to the vec_extract patterns.  */
31865   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31866 		     DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31867   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31868 		     DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31869   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31870 		     FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31871   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31872 		     SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31873   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31874 		     HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31875 
31876   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31877 		     /* As it uses V4HImode, we have to require -mmmx too.  */
31878 		     | OPTION_MASK_ISA_MMX,
31879 		     "__builtin_ia32_vec_ext_v4hi",
31880 		     HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31881 
31882   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31883 		     SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31884 
31885   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31886 		     QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31887 
31888   /* Access to the vec_set patterns.  */
31889   def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31890 		     "__builtin_ia32_vec_set_v2di",
31891 		     V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31892 
31893   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31894 		     V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31895 
31896   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31897 		     V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31898 
31899   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31900 		     V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31901 
31902   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
31903 		     /* As it uses V4HImode, we have to require -mmmx too.  */
31904 		     | OPTION_MASK_ISA_MMX,
31905 		     "__builtin_ia32_vec_set_v4hi",
31906 		     V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31907 
31908   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31909 		     V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31910 
31911   /* RDSEED */
31912   def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31913 	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31914   def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31915 	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31916   def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31917 	       "__builtin_ia32_rdseed_di_step",
31918 	       INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31919 
31920   /* ADCX */
31921   def_builtin (0, "__builtin_ia32_addcarryx_u32",
31922 	       UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31923   def_builtin (OPTION_MASK_ISA_64BIT,
31924 	       "__builtin_ia32_addcarryx_u64",
31925 	       UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31926 	       IX86_BUILTIN_ADDCARRYX64);
31927 
31928   /* SBB */
31929   def_builtin (0, "__builtin_ia32_sbb_u32",
31930 	       UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31931   def_builtin (OPTION_MASK_ISA_64BIT,
31932 	       "__builtin_ia32_sbb_u64",
31933 	       UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31934 	       IX86_BUILTIN_SBB64);
31935 
31936   /* Read/write FLAGS.  */
31937   if (TARGET_64BIT)
31938     {
31939       def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31940 		   UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31941       def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31942 		   VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31943     }
31944   else
31945     {
31946       def_builtin (0, "__builtin_ia32_readeflags_u32",
31947 		   UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31948       def_builtin (0, "__builtin_ia32_writeeflags_u32",
31949 		   VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31950     }
31951 
31952   /* CLFLUSHOPT.  */
31953   def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31954 	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31955 
31956   /* CLWB.  */
31957   def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
31958 	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
31959 
31960   /* MONITORX and MWAITX.  */
31961   def_builtin2 (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
31962 		VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
31963   def_builtin2 (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
31964 		VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
31965 
31966   /* CLZERO.  */
31967   def_builtin2 (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
31968 		VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
31969 
31970   /* Add FMA4 multi-arg argument instructions */
31971   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31972     {
31973       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i);
31974       if (d->name == 0)
31975 	continue;
31976 
31977       ftype = (enum ix86_builtin_func_type) d->flag;
31978       def_builtin_const (d->mask, d->name, ftype, d->code);
31979     }
31980   BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST,
31981 		 IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
31982 		 ARRAY_SIZE (bdesc_multi_arg) - 1);
31983 
31984   /* Add CET inrinsics.  */
31985   for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++)
31986     {
31987       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i);
31988       if (d->name == 0)
31989 	continue;
31990 
31991       ftype = (enum ix86_builtin_func_type) d->flag;
31992       def_builtin (d->mask, d->name, ftype, d->code);
31993     }
31994   BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST,
31995 		 IX86_BUILTIN__BDESC_CET_FIRST,
31996 		 ARRAY_SIZE (bdesc_cet) - 1);
31997 
31998   for (i = 0, d = bdesc_cet_rdssp;
31999        i < ARRAY_SIZE (bdesc_cet_rdssp);
32000        i++, d++)
32001     {
32002       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i);
32003       if (d->name == 0)
32004 	continue;
32005 
32006       ftype = (enum ix86_builtin_func_type) d->flag;
32007       def_builtin (d->mask, d->name, ftype, d->code);
32008     }
32009   BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST,
32010 		 IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
32011 		 ARRAY_SIZE (bdesc_cet_rdssp) - 1);
32012 }
32013 
32014 static void
32015 ix86_init_mpx_builtins ()
32016 {
32017   const struct builtin_description * d;
32018   enum ix86_builtin_func_type ftype;
32019   tree decl;
32020   size_t i;
32021 
32022   for (i = 0, d = bdesc_mpx;
32023        i < ARRAY_SIZE (bdesc_mpx);
32024        i++, d++)
32025     {
32026       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_FIRST, i);
32027       if (d->name == 0)
32028 	continue;
32029 
32030       ftype = (enum ix86_builtin_func_type) d->flag;
32031       decl = def_builtin2 (d->mask, d->name, ftype, d->code);
32032 
32033       /* With no leaf and nothrow flags for MPX builtins
32034 	 abnormal edges may follow its call when setjmp
32035 	 presents in the function.  Since we may have a lot
32036 	 of MPX builtins calls it causes lots of useless
32037 	 edges and enormous PHI nodes.  To avoid this we mark
32038 	 MPX builtins as leaf and nothrow.  */
32039       if (decl)
32040 	{
32041 	  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32042 						    NULL_TREE);
32043 	  TREE_NOTHROW (decl) = 1;
32044 	}
32045       else
32046 	{
32047 	  ix86_builtins_isa[(int)d->code].leaf_p = true;
32048 	  ix86_builtins_isa[(int)d->code].nothrow_p = true;
32049 	}
32050     }
32051   BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_LAST,
32052 		 IX86_BUILTIN__BDESC_MPX_FIRST,
32053 		 ARRAY_SIZE (bdesc_mpx) - 1);
32054 
32055   for (i = 0, d = bdesc_mpx_const;
32056        i < ARRAY_SIZE (bdesc_mpx_const);
32057        i++, d++)
32058     {
32059       BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MPX_CONST_FIRST, i);
32060       if (d->name == 0)
32061 	continue;
32062 
32063       ftype = (enum ix86_builtin_func_type) d->flag;
32064       decl = def_builtin_const2 (d->mask, d->name, ftype, d->code);
32065 
32066       if (decl)
32067 	{
32068 	  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32069 						    NULL_TREE);
32070 	  TREE_NOTHROW (decl) = 1;
32071 	}
32072       else
32073 	{
32074 	  ix86_builtins_isa[(int)d->code].leaf_p = true;
32075 	  ix86_builtins_isa[(int)d->code].nothrow_p = true;
32076 	}
32077     }
32078   BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_LAST,
32079 		 IX86_BUILTIN__BDESC_MPX_CONST_FIRST,
32080 		 ARRAY_SIZE (bdesc_mpx_const) - 1);
32081 }
32082 #undef BDESC_VERIFY
32083 #undef BDESC_VERIFYS
32084 
32085 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
32086    to return a pointer to VERSION_DECL if the outcome of the expression
32087    formed by PREDICATE_CHAIN is true.  This function will be called during
32088    version dispatch to decide which function version to execute.  It returns
32089    the basic block at the end, to which more conditions can be added.  */
32090 
32091 static basic_block
32092 add_condition_to_bb (tree function_decl, tree version_decl,
32093 		     tree predicate_chain, basic_block new_bb)
32094 {
32095   gimple *return_stmt;
32096   tree convert_expr, result_var;
32097   gimple *convert_stmt;
32098   gimple *call_cond_stmt;
32099   gimple *if_else_stmt;
32100 
32101   basic_block bb1, bb2, bb3;
32102   edge e12, e23;
32103 
32104   tree cond_var, and_expr_var = NULL_TREE;
32105   gimple_seq gseq;
32106 
32107   tree predicate_decl, predicate_arg;
32108 
32109   push_cfun (DECL_STRUCT_FUNCTION (function_decl));
32110 
32111   gcc_assert (new_bb != NULL);
32112   gseq = bb_seq (new_bb);
32113 
32114 
32115   convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
32116 	     		 build_fold_addr_expr (version_decl));
32117   result_var = create_tmp_var (ptr_type_node);
32118   convert_stmt = gimple_build_assign (result_var, convert_expr);
32119   return_stmt = gimple_build_return (result_var);
32120 
32121   if (predicate_chain == NULL_TREE)
32122     {
32123       gimple_seq_add_stmt (&gseq, convert_stmt);
32124       gimple_seq_add_stmt (&gseq, return_stmt);
32125       set_bb_seq (new_bb, gseq);
32126       gimple_set_bb (convert_stmt, new_bb);
32127       gimple_set_bb (return_stmt, new_bb);
32128       pop_cfun ();
32129       return new_bb;
32130     }
32131 
32132   while (predicate_chain != NULL)
32133     {
32134       cond_var = create_tmp_var (integer_type_node);
32135       predicate_decl = TREE_PURPOSE (predicate_chain);
32136       predicate_arg = TREE_VALUE (predicate_chain);
32137       call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
32138       gimple_call_set_lhs (call_cond_stmt, cond_var);
32139 
32140       gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
32141       gimple_set_bb (call_cond_stmt, new_bb);
32142       gimple_seq_add_stmt (&gseq, call_cond_stmt);
32143 
32144       predicate_chain = TREE_CHAIN (predicate_chain);
32145 
32146       if (and_expr_var == NULL)
32147         and_expr_var = cond_var;
32148       else
32149 	{
32150 	  gimple *assign_stmt;
32151 	  /* Use MIN_EXPR to check if any integer is zero?.
32152 	     and_expr_var = min_expr <cond_var, and_expr_var>  */
32153 	  assign_stmt = gimple_build_assign (and_expr_var,
32154 			  build2 (MIN_EXPR, integer_type_node,
32155 				  cond_var, and_expr_var));
32156 
32157 	  gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
32158 	  gimple_set_bb (assign_stmt, new_bb);
32159 	  gimple_seq_add_stmt (&gseq, assign_stmt);
32160 	}
32161     }
32162 
32163   if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
32164 	  		            integer_zero_node,
32165 				    NULL_TREE, NULL_TREE);
32166   gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
32167   gimple_set_bb (if_else_stmt, new_bb);
32168   gimple_seq_add_stmt (&gseq, if_else_stmt);
32169 
32170   gimple_seq_add_stmt (&gseq, convert_stmt);
32171   gimple_seq_add_stmt (&gseq, return_stmt);
32172   set_bb_seq (new_bb, gseq);
32173 
32174   bb1 = new_bb;
32175   e12 = split_block (bb1, if_else_stmt);
32176   bb2 = e12->dest;
32177   e12->flags &= ~EDGE_FALLTHRU;
32178   e12->flags |= EDGE_TRUE_VALUE;
32179 
32180   e23 = split_block (bb2, return_stmt);
32181 
32182   gimple_set_bb (convert_stmt, bb2);
32183   gimple_set_bb (return_stmt, bb2);
32184 
32185   bb3 = e23->dest;
32186   make_edge (bb1, bb3, EDGE_FALSE_VALUE);
32187 
32188   remove_edge (e23);
32189   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
32190 
32191   pop_cfun ();
32192 
32193   return bb3;
32194 }
32195 
32196 /* This parses the attribute arguments to target in DECL and determines
32197    the right builtin to use to match the platform specification.
32198    It returns the priority value for this version decl.  If PREDICATE_LIST
32199    is not NULL, it stores the list of cpu features that need to be checked
32200    before dispatching this function.  */
32201 
32202 static unsigned int
32203 get_builtin_code_for_version (tree decl, tree *predicate_list)
32204 {
32205   tree attrs;
32206   struct cl_target_option cur_target;
32207   tree target_node;
32208   struct cl_target_option *new_target;
32209   const char *arg_str = NULL;
32210   const char *attrs_str = NULL;
32211   char *tok_str = NULL;
32212   char *token;
32213 
32214   /* Priority of i386 features, greater value is higher priority.   This is
32215      used to decide the order in which function dispatch must happen.  For
32216      instance, a version specialized for SSE4.2 should be checked for dispatch
32217      before a version for SSE3, as SSE4.2 implies SSE3.  */
32218   enum feature_priority
32219   {
32220     P_ZERO = 0,
32221     P_MMX,
32222     P_SSE,
32223     P_SSE2,
32224     P_SSE3,
32225     P_SSSE3,
32226     P_PROC_SSSE3,
32227     P_SSE4_A,
32228     P_PROC_SSE4_A,
32229     P_SSE4_1,
32230     P_SSE4_2,
32231     P_PROC_SSE4_2,
32232     P_POPCNT,
32233     P_AES,
32234     P_PCLMUL,
32235     P_AVX,
32236     P_PROC_AVX,
32237     P_BMI,
32238     P_PROC_BMI,
32239     P_FMA4,
32240     P_XOP,
32241     P_PROC_XOP,
32242     P_FMA,
32243     P_PROC_FMA,
32244     P_BMI2,
32245     P_AVX2,
32246     P_PROC_AVX2,
32247     P_AVX512F,
32248     P_PROC_AVX512F
32249   };
32250 
32251   enum feature_priority priority = P_ZERO;
32252 
32253   /* These are the target attribute strings for which a dispatcher is
32254      available, from fold_builtin_cpu.  */
32255 
32256   static struct _feature_list
32257     {
32258       const char *const name;
32259       const enum feature_priority priority;
32260     }
32261   const feature_list[] =
32262     {
32263       {"mmx", P_MMX},
32264       {"sse", P_SSE},
32265       {"sse2", P_SSE2},
32266       {"sse3", P_SSE3},
32267       {"sse4a", P_SSE4_A},
32268       {"ssse3", P_SSSE3},
32269       {"sse4.1", P_SSE4_1},
32270       {"sse4.2", P_SSE4_2},
32271       {"popcnt", P_POPCNT},
32272       {"aes", P_AES},
32273       {"pclmul", P_PCLMUL},
32274       {"avx", P_AVX},
32275       {"bmi", P_BMI},
32276       {"fma4", P_FMA4},
32277       {"xop", P_XOP},
32278       {"fma", P_FMA},
32279       {"bmi2", P_BMI2},
32280       {"avx2", P_AVX2},
32281       {"avx512f", P_AVX512F}
32282     };
32283 
32284 
32285   static unsigned int NUM_FEATURES
32286     = sizeof (feature_list) / sizeof (struct _feature_list);
32287 
32288   unsigned int i;
32289 
32290   tree predicate_chain = NULL_TREE;
32291   tree predicate_decl, predicate_arg;
32292 
32293   attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32294   gcc_assert (attrs != NULL);
32295 
32296   attrs = TREE_VALUE (TREE_VALUE (attrs));
32297 
32298   gcc_assert (TREE_CODE (attrs) == STRING_CST);
32299   attrs_str = TREE_STRING_POINTER (attrs);
32300 
32301   /* Return priority zero for default function.  */
32302   if (strcmp (attrs_str, "default") == 0)
32303     return 0;
32304 
32305   /* Handle arch= if specified.  For priority, set it to be 1 more than
32306      the best instruction set the processor can handle.  For instance, if
32307      there is a version for atom and a version for ssse3 (the highest ISA
32308      priority for atom), the atom version must be checked for dispatch
32309      before the ssse3 version. */
32310   if (strstr (attrs_str, "arch=") != NULL)
32311     {
32312       cl_target_option_save (&cur_target, &global_options);
32313       target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
32314 						      &global_options_set);
32315 
32316       gcc_assert (target_node);
32317       if (target_node == error_mark_node)
32318 	return 0;
32319       new_target = TREE_TARGET_OPTION (target_node);
32320       gcc_assert (new_target);
32321 
32322       if (new_target->arch_specified && new_target->arch > 0)
32323 	{
32324 	  switch (new_target->arch)
32325 	    {
32326 	    case PROCESSOR_CORE2:
32327 	      arg_str = "core2";
32328 	      priority = P_PROC_SSSE3;
32329 	      break;
32330 	    case PROCESSOR_NEHALEM:
32331 	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
32332 		{
32333 		  arg_str = "westmere";
32334 		  priority = P_AES;
32335 		}
32336 	      else
32337 		{
32338 		  /* We translate "arch=corei7" and "arch=nehalem" to
32339 		     "corei7" so that it will be mapped to M_INTEL_COREI7
32340 		     as cpu type to cover all M_INTEL_COREI7_XXXs.  */
32341 		  arg_str = "corei7";
32342 		  priority = P_PROC_SSE4_2;
32343 		}
32344 	      break;
32345 	    case PROCESSOR_SANDYBRIDGE:
32346 	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
32347 		arg_str = "ivybridge";
32348 	      else
32349 		arg_str = "sandybridge";
32350 	      priority = P_PROC_AVX;
32351 	      break;
32352 	    case PROCESSOR_HASWELL:
32353 	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
32354 		arg_str = "broadwell";
32355 	      else
32356 		arg_str = "haswell";
32357 	      priority = P_PROC_AVX2;
32358 	      break;
32359 	    case PROCESSOR_SKYLAKE:
32360 	      arg_str = "skylake";
32361 	      priority = P_PROC_AVX2;
32362 	      break;
32363 	    case PROCESSOR_SKYLAKE_AVX512:
32364 	      arg_str = "skylake-avx512";
32365 	      priority = P_PROC_AVX512F;
32366 	      break;
32367 	    case PROCESSOR_CANNONLAKE:
32368 	      arg_str = "cannonlake";
32369 	      priority = P_PROC_AVX512F;
32370 	      break;
32371 	    case PROCESSOR_ICELAKE_CLIENT:
32372 	      arg_str = "icelake-client";
32373 	      priority = P_PROC_AVX512F;
32374 	      break;
32375 	    case PROCESSOR_ICELAKE_SERVER:
32376 	      arg_str = "icelake-server";
32377 	      priority = P_PROC_AVX512F;
32378 	      break;
32379 	    case PROCESSOR_BONNELL:
32380 	      arg_str = "bonnell";
32381 	      priority = P_PROC_SSSE3;
32382 	      break;
32383 	    case PROCESSOR_KNL:
32384 	      arg_str = "knl";
32385 	      priority = P_PROC_AVX512F;
32386 	      break;
32387 	    case PROCESSOR_KNM:
32388 	      arg_str = "knm";
32389 	      priority = P_PROC_AVX512F;
32390 	      break;
32391 	    case PROCESSOR_SILVERMONT:
32392 	      arg_str = "silvermont";
32393 	      priority = P_PROC_SSE4_2;
32394 	      break;
32395 	    case PROCESSOR_AMDFAM10:
32396 	      arg_str = "amdfam10h";
32397 	      priority = P_PROC_SSE4_A;
32398 	      break;
32399 	    case PROCESSOR_BTVER1:
32400 	      arg_str = "btver1";
32401 	      priority = P_PROC_SSE4_A;
32402 	      break;
32403 	    case PROCESSOR_BTVER2:
32404 	      arg_str = "btver2";
32405 	      priority = P_PROC_BMI;
32406 	      break;
32407 	    case PROCESSOR_BDVER1:
32408 	      arg_str = "bdver1";
32409 	      priority = P_PROC_XOP;
32410 	      break;
32411 	    case PROCESSOR_BDVER2:
32412 	      arg_str = "bdver2";
32413 	      priority = P_PROC_FMA;
32414 	      break;
32415 	    case PROCESSOR_BDVER3:
32416 	      arg_str = "bdver3";
32417 	      priority = P_PROC_FMA;
32418 	      break;
32419 	    case PROCESSOR_BDVER4:
32420 	      arg_str = "bdver4";
32421 	      priority = P_PROC_AVX2;
32422 	      break;
32423 	    case PROCESSOR_ZNVER1:
32424 	      arg_str = "znver1";
32425 	      priority = P_PROC_AVX2;
32426 	      break;
32427 	    }
32428 	}
32429 
32430       cl_target_option_restore (&global_options, &cur_target);
32431 
32432       if (predicate_list && arg_str == NULL)
32433 	{
32434 	  error_at (DECL_SOURCE_LOCATION (decl),
32435 	    	"No dispatcher found for the versioning attributes");
32436 	  return 0;
32437 	}
32438 
32439       if (predicate_list)
32440 	{
32441           predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
32442           /* For a C string literal the length includes the trailing NULL.  */
32443           predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
32444           predicate_chain = tree_cons (predicate_decl, predicate_arg,
32445 				       predicate_chain);
32446 	}
32447     }
32448 
32449   /* Process feature name.  */
32450   tok_str =  (char *) xmalloc (strlen (attrs_str) + 1);
32451   strcpy (tok_str, attrs_str);
32452   token = strtok (tok_str, ",");
32453   predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
32454 
32455   while (token != NULL)
32456     {
32457       /* Do not process "arch="  */
32458       if (strncmp (token, "arch=", 5) == 0)
32459 	{
32460 	  token = strtok (NULL, ",");
32461 	  continue;
32462 	}
32463       for (i = 0; i < NUM_FEATURES; ++i)
32464 	{
32465 	  if (strcmp (token, feature_list[i].name) == 0)
32466 	    {
32467 	      if (predicate_list)
32468 		{
32469 		  predicate_arg = build_string_literal (
32470 				  strlen (feature_list[i].name) + 1,
32471 				  feature_list[i].name);
32472 		  predicate_chain = tree_cons (predicate_decl, predicate_arg,
32473 					       predicate_chain);
32474 		}
32475 	      /* Find the maximum priority feature.  */
32476 	      if (feature_list[i].priority > priority)
32477 		priority = feature_list[i].priority;
32478 
32479 	      break;
32480 	    }
32481 	}
32482       if (predicate_list && i == NUM_FEATURES)
32483 	{
32484 	  error_at (DECL_SOURCE_LOCATION (decl),
32485 		    "No dispatcher found for %s", token);
32486 	  return 0;
32487 	}
32488       token = strtok (NULL, ",");
32489     }
32490   free (tok_str);
32491 
32492   if (predicate_list && predicate_chain == NULL_TREE)
32493     {
32494       error_at (DECL_SOURCE_LOCATION (decl),
32495 	        "No dispatcher found for the versioning attributes : %s",
32496 	        attrs_str);
32497       return 0;
32498     }
32499   else if (predicate_list)
32500     {
32501       predicate_chain = nreverse (predicate_chain);
32502       *predicate_list = predicate_chain;
32503     }
32504 
32505   return priority;
32506 }
32507 
32508 /* This compares the priority of target features in function DECL1
32509    and DECL2.  It returns positive value if DECL1 is higher priority,
32510    negative value if DECL2 is higher priority and 0 if they are the
32511    same.  */
32512 
32513 static int
32514 ix86_compare_version_priority (tree decl1, tree decl2)
32515 {
32516   unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
32517   unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
32518 
32519   return (int)priority1 - (int)priority2;
32520 }
32521 
32522 /* V1 and V2 point to function versions with different priorities
32523    based on the target ISA.  This function compares their priorities.  */
32524 
32525 static int
32526 feature_compare (const void *v1, const void *v2)
32527 {
32528   typedef struct _function_version_info
32529     {
32530       tree version_decl;
32531       tree predicate_chain;
32532       unsigned int dispatch_priority;
32533     } function_version_info;
32534 
32535   const function_version_info c1 = *(const function_version_info *)v1;
32536   const function_version_info c2 = *(const function_version_info *)v2;
32537   return (c2.dispatch_priority - c1.dispatch_priority);
32538 }
32539 
32540 /* This function generates the dispatch function for
32541    multi-versioned functions.  DISPATCH_DECL is the function which will
32542    contain the dispatch logic.  FNDECLS are the function choices for
32543    dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
32544    in DISPATCH_DECL in which the dispatch code is generated.  */
32545 
32546 static int
32547 dispatch_function_versions (tree dispatch_decl,
32548 			    void *fndecls_p,
32549 			    basic_block *empty_bb)
32550 {
32551   tree default_decl;
32552   gimple *ifunc_cpu_init_stmt;
32553   gimple_seq gseq;
32554   int ix;
32555   tree ele;
32556   vec<tree> *fndecls;
32557   unsigned int num_versions = 0;
32558   unsigned int actual_versions = 0;
32559   unsigned int i;
32560 
32561   struct _function_version_info
32562     {
32563       tree version_decl;
32564       tree predicate_chain;
32565       unsigned int dispatch_priority;
32566     }*function_version_info;
32567 
32568   gcc_assert (dispatch_decl != NULL
32569 	      && fndecls_p != NULL
32570 	      && empty_bb != NULL);
32571 
32572   /*fndecls_p is actually a vector.  */
32573   fndecls = static_cast<vec<tree> *> (fndecls_p);
32574 
32575   /* At least one more version other than the default.  */
32576   num_versions = fndecls->length ();
32577   gcc_assert (num_versions >= 2);
32578 
32579   function_version_info = (struct _function_version_info *)
32580     XNEWVEC (struct _function_version_info, (num_versions - 1));
32581 
32582   /* The first version in the vector is the default decl.  */
32583   default_decl = (*fndecls)[0];
32584 
32585   push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
32586 
32587   gseq = bb_seq (*empty_bb);
32588   /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
32589      constructors, so explicity call __builtin_cpu_init here.  */
32590   ifunc_cpu_init_stmt = gimple_build_call_vec (
32591                      ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
32592   gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
32593   gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
32594   set_bb_seq (*empty_bb, gseq);
32595 
32596   pop_cfun ();
32597 
32598 
32599   for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
32600     {
32601       tree version_decl = ele;
32602       tree predicate_chain = NULL_TREE;
32603       unsigned int priority;
32604       /* Get attribute string, parse it and find the right predicate decl.
32605          The predicate function could be a lengthy combination of many
32606 	 features, like arch-type and various isa-variants.  */
32607       priority = get_builtin_code_for_version (version_decl,
32608 	 			               &predicate_chain);
32609 
32610       if (predicate_chain == NULL_TREE)
32611 	continue;
32612 
32613       function_version_info [actual_versions].version_decl = version_decl;
32614       function_version_info [actual_versions].predicate_chain
32615 	 = predicate_chain;
32616       function_version_info [actual_versions].dispatch_priority = priority;
32617       actual_versions++;
32618     }
32619 
32620   /* Sort the versions according to descending order of dispatch priority.  The
32621      priority is based on the ISA.  This is not a perfect solution.  There
32622      could still be ambiguity.  If more than one function version is suitable
32623      to execute,  which one should be dispatched?  In future, allow the user
32624      to specify a dispatch  priority next to the version.  */
32625   qsort (function_version_info, actual_versions,
32626          sizeof (struct _function_version_info), feature_compare);
32627 
32628   for  (i = 0; i < actual_versions; ++i)
32629     *empty_bb = add_condition_to_bb (dispatch_decl,
32630 				     function_version_info[i].version_decl,
32631 				     function_version_info[i].predicate_chain,
32632 				     *empty_bb);
32633 
32634   /* dispatch default version at the end.  */
32635   *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
32636 				   NULL, *empty_bb);
32637 
32638   free (function_version_info);
32639   return 0;
32640 }
32641 
32642 /* This function changes the assembler name for functions that are
32643    versions.  If DECL is a function version and has a "target"
32644    attribute, it appends the attribute string to its assembler name.  */
32645 
32646 static tree
32647 ix86_mangle_function_version_assembler_name (tree decl, tree id)
32648 {
32649   tree version_attr;
32650   const char *orig_name, *version_string;
32651   char *attr_str, *assembler_name;
32652 
32653   if (DECL_DECLARED_INLINE_P (decl)
32654       && lookup_attribute ("gnu_inline",
32655 			   DECL_ATTRIBUTES (decl)))
32656     error_at (DECL_SOURCE_LOCATION (decl),
32657 	      "Function versions cannot be marked as gnu_inline,"
32658 	      " bodies have to be generated");
32659 
32660   if (DECL_VIRTUAL_P (decl)
32661       || DECL_VINDEX (decl))
32662     sorry ("Virtual function multiversioning not supported");
32663 
32664   version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32665 
32666   /* target attribute string cannot be NULL.  */
32667   gcc_assert (version_attr != NULL_TREE);
32668 
32669   orig_name = IDENTIFIER_POINTER (id);
32670   version_string
32671     = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
32672 
32673   if (strcmp (version_string, "default") == 0)
32674     return id;
32675 
32676   attr_str = sorted_attr_string (TREE_VALUE (version_attr));
32677   assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
32678 
32679   sprintf (assembler_name, "%s.%s", orig_name, attr_str);
32680 
32681   /* Allow assembler name to be modified if already set.  */
32682   if (DECL_ASSEMBLER_NAME_SET_P (decl))
32683     SET_DECL_RTL (decl, NULL);
32684 
32685   tree ret = get_identifier (assembler_name);
32686   XDELETEVEC (attr_str);
32687   XDELETEVEC (assembler_name);
32688   return ret;
32689 }
32690 
32691 
32692 static tree
32693 ix86_mangle_decl_assembler_name (tree decl, tree id)
32694 {
32695   /* For function version, add the target suffix to the assembler name.  */
32696   if (TREE_CODE (decl) == FUNCTION_DECL
32697       && DECL_FUNCTION_VERSIONED (decl))
32698     id = ix86_mangle_function_version_assembler_name (decl, id);
32699 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
32700   id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
32701 #endif
32702 
32703   return id;
32704 }
32705 
32706 /* Make a dispatcher declaration for the multi-versioned function DECL.
32707    Calls to DECL function will be replaced with calls to the dispatcher
32708    by the front-end.  Returns the decl of the dispatcher function.  */
32709 
32710 static tree
32711 ix86_get_function_versions_dispatcher (void *decl)
32712 {
32713   tree fn = (tree) decl;
32714   struct cgraph_node *node = NULL;
32715   struct cgraph_node *default_node = NULL;
32716   struct cgraph_function_version_info *node_v = NULL;
32717   struct cgraph_function_version_info *first_v = NULL;
32718 
32719   tree dispatch_decl = NULL;
32720 
32721   struct cgraph_function_version_info *default_version_info = NULL;
32722 
32723   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32724 
32725   node = cgraph_node::get (fn);
32726   gcc_assert (node != NULL);
32727 
32728   node_v = node->function_version ();
32729   gcc_assert (node_v != NULL);
32730 
32731   if (node_v->dispatcher_resolver != NULL)
32732     return node_v->dispatcher_resolver;
32733 
32734   /* Find the default version and make it the first node.  */
32735   first_v = node_v;
32736   /* Go to the beginning of the chain.  */
32737   while (first_v->prev != NULL)
32738     first_v = first_v->prev;
32739   default_version_info = first_v;
32740   while (default_version_info != NULL)
32741     {
32742       if (is_function_default_version
32743 	    (default_version_info->this_node->decl))
32744         break;
32745       default_version_info = default_version_info->next;
32746     }
32747 
32748   /* If there is no default node, just return NULL.  */
32749   if (default_version_info == NULL)
32750     return NULL;
32751 
32752   /* Make default info the first node.  */
32753   if (first_v != default_version_info)
32754     {
32755       default_version_info->prev->next = default_version_info->next;
32756       if (default_version_info->next)
32757         default_version_info->next->prev = default_version_info->prev;
32758       first_v->prev = default_version_info;
32759       default_version_info->next = first_v;
32760       default_version_info->prev = NULL;
32761     }
32762 
32763   default_node = default_version_info->this_node;
32764 
32765 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32766   if (targetm.has_ifunc_p ())
32767     {
32768       struct cgraph_function_version_info *it_v = NULL;
32769       struct cgraph_node *dispatcher_node = NULL;
32770       struct cgraph_function_version_info *dispatcher_version_info = NULL;
32771 
32772       /* Right now, the dispatching is done via ifunc.  */
32773       dispatch_decl = make_dispatcher_decl (default_node->decl);
32774 
32775       dispatcher_node = cgraph_node::get_create (dispatch_decl);
32776       gcc_assert (dispatcher_node != NULL);
32777       dispatcher_node->dispatcher_function = 1;
32778       dispatcher_version_info
32779 	= dispatcher_node->insert_new_function_version ();
32780       dispatcher_version_info->next = default_version_info;
32781       dispatcher_node->definition = 1;
32782 
32783       /* Set the dispatcher for all the versions.  */
32784       it_v = default_version_info;
32785       while (it_v != NULL)
32786 	{
32787 	  it_v->dispatcher_resolver = dispatch_decl;
32788 	  it_v = it_v->next;
32789 	}
32790     }
32791   else
32792 #endif
32793     {
32794       error_at (DECL_SOURCE_LOCATION (default_node->decl),
32795 		"multiversioning needs ifunc which is not supported "
32796 		"on this target");
32797     }
32798 
32799   return dispatch_decl;
32800 }
32801 
32802 /* Make the resolver function decl to dispatch the versions of
32803    a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
32804    ifunc alias that will point to the created resolver.  Create an
32805    empty basic block in the resolver and store the pointer in
32806    EMPTY_BB.  Return the decl of the resolver function.  */
32807 
32808 static tree
32809 make_resolver_func (const tree default_decl,
32810 		    const tree ifunc_alias_decl,
32811 		    basic_block *empty_bb)
32812 {
32813   char *resolver_name;
32814   tree decl, type, decl_name, t;
32815 
32816   /* IFUNC's have to be globally visible.  So, if the default_decl is
32817      not, then the name of the IFUNC should be made unique.  */
32818   if (TREE_PUBLIC (default_decl) == 0)
32819     {
32820       char *ifunc_name = make_unique_name (default_decl, "ifunc", true);
32821       symtab->change_decl_assembler_name (ifunc_alias_decl,
32822 					  get_identifier (ifunc_name));
32823       XDELETEVEC (ifunc_name);
32824     }
32825 
32826   resolver_name = make_unique_name (default_decl, "resolver", false);
32827 
32828   /* The resolver function should return a (void *). */
32829   type = build_function_type_list (ptr_type_node, NULL_TREE);
32830 
32831   decl = build_fn_decl (resolver_name, type);
32832   decl_name = get_identifier (resolver_name);
32833   SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32834 
32835   DECL_NAME (decl) = decl_name;
32836   TREE_USED (decl) = 1;
32837   DECL_ARTIFICIAL (decl) = 1;
32838   DECL_IGNORED_P (decl) = 1;
32839   TREE_PUBLIC (decl) = 0;
32840   DECL_UNINLINABLE (decl) = 1;
32841 
32842   /* Resolver is not external, body is generated.  */
32843   DECL_EXTERNAL (decl) = 0;
32844   DECL_EXTERNAL (ifunc_alias_decl) = 0;
32845 
32846   DECL_CONTEXT (decl) = NULL_TREE;
32847   DECL_INITIAL (decl) = make_node (BLOCK);
32848   DECL_STATIC_CONSTRUCTOR (decl) = 0;
32849 
32850   if (DECL_COMDAT_GROUP (default_decl)
32851       || TREE_PUBLIC (default_decl))
32852     {
32853       /* In this case, each translation unit with a call to this
32854 	 versioned function will put out a resolver.  Ensure it
32855 	 is comdat to keep just one copy.  */
32856       DECL_COMDAT (decl) = 1;
32857       make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32858     }
32859   /* Build result decl and add to function_decl. */
32860   t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32861   DECL_ARTIFICIAL (t) = 1;
32862   DECL_IGNORED_P (t) = 1;
32863   DECL_RESULT (decl) = t;
32864 
32865   gimplify_function_tree (decl);
32866   push_cfun (DECL_STRUCT_FUNCTION (decl));
32867   *empty_bb = init_lowered_empty_function (decl, false,
32868 					   profile_count::uninitialized ());
32869 
32870   cgraph_node::add_new_function (decl, true);
32871   symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32872 
32873   pop_cfun ();
32874 
32875   gcc_assert (ifunc_alias_decl != NULL);
32876   /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name.  */
32877   DECL_ATTRIBUTES (ifunc_alias_decl)
32878     = make_attribute ("ifunc", resolver_name,
32879 		      DECL_ATTRIBUTES (ifunc_alias_decl));
32880 
32881   /* Create the alias for dispatch to resolver here.  */
32882   cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
32883   XDELETEVEC (resolver_name);
32884   return decl;
32885 }
32886 
32887 /* Generate the dispatching code body to dispatch multi-versioned function
32888    DECL.  The target hook is called to process the "target" attributes and
32889    provide the code to dispatch the right function at run-time.  NODE points
32890    to the dispatcher decl whose body will be created.  */
32891 
32892 static tree
32893 ix86_generate_version_dispatcher_body (void *node_p)
32894 {
32895   tree resolver_decl;
32896   basic_block empty_bb;
32897   tree default_ver_decl;
32898   struct cgraph_node *versn;
32899   struct cgraph_node *node;
32900 
32901   struct cgraph_function_version_info *node_version_info = NULL;
32902   struct cgraph_function_version_info *versn_info = NULL;
32903 
32904   node = (cgraph_node *)node_p;
32905 
32906   node_version_info = node->function_version ();
32907   gcc_assert (node->dispatcher_function
32908 	      && node_version_info != NULL);
32909 
32910   if (node_version_info->dispatcher_resolver)
32911     return node_version_info->dispatcher_resolver;
32912 
32913   /* The first version in the chain corresponds to the default version.  */
32914   default_ver_decl = node_version_info->next->this_node->decl;
32915 
32916   /* node is going to be an alias, so remove the finalized bit.  */
32917   node->definition = false;
32918 
32919   resolver_decl = make_resolver_func (default_ver_decl,
32920 				      node->decl, &empty_bb);
32921 
32922   node_version_info->dispatcher_resolver = resolver_decl;
32923 
32924   push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32925 
32926   auto_vec<tree, 2> fn_ver_vec;
32927 
32928   for (versn_info = node_version_info->next; versn_info;
32929        versn_info = versn_info->next)
32930     {
32931       versn = versn_info->this_node;
32932       /* Check for virtual functions here again, as by this time it should
32933 	 have been determined if this function needs a vtable index or
32934 	 not.  This happens for methods in derived classes that override
32935 	 virtual methods in base classes but are not explicitly marked as
32936 	 virtual.  */
32937       if (DECL_VINDEX (versn->decl))
32938 	sorry ("Virtual function multiversioning not supported");
32939 
32940       fn_ver_vec.safe_push (versn->decl);
32941     }
32942 
32943   dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32944   cgraph_edge::rebuild_edges ();
32945   pop_cfun ();
32946   return resolver_decl;
32947 }
32948 /* This builds the processor_model struct type defined in
32949    libgcc/config/i386/cpuinfo.c  */
32950 
32951 static tree
32952 build_processor_model_struct (void)
32953 {
32954   const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32955 			      "__cpu_features"};
32956   tree field = NULL_TREE, field_chain = NULL_TREE;
32957   int i;
32958   tree type = make_node (RECORD_TYPE);
32959 
32960   /* The first 3 fields are unsigned int.  */
32961   for (i = 0; i < 3; ++i)
32962     {
32963       field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32964 			  get_identifier (field_name[i]), unsigned_type_node);
32965       if (field_chain != NULL_TREE)
32966 	DECL_CHAIN (field) = field_chain;
32967       field_chain = field;
32968     }
32969 
32970   /* The last field is an array of unsigned integers of size one.  */
32971   field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32972 		      get_identifier (field_name[3]),
32973 		      build_array_type (unsigned_type_node,
32974 					build_index_type (size_one_node)));
32975   if (field_chain != NULL_TREE)
32976     DECL_CHAIN (field) = field_chain;
32977   field_chain = field;
32978 
32979   finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32980   return type;
32981 }
32982 
32983 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32984 
32985 static tree
32986 make_var_decl (tree type, const char *name)
32987 {
32988   tree new_decl;
32989 
32990   new_decl = build_decl (UNKNOWN_LOCATION,
32991 	                 VAR_DECL,
32992 	  	         get_identifier(name),
32993 		         type);
32994 
32995   DECL_EXTERNAL (new_decl) = 1;
32996   TREE_STATIC (new_decl) = 1;
32997   TREE_PUBLIC (new_decl) = 1;
32998   DECL_INITIAL (new_decl) = 0;
32999   DECL_ARTIFICIAL (new_decl) = 0;
33000   DECL_PRESERVE_P (new_decl) = 1;
33001 
33002   make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
33003   assemble_variable (new_decl, 0, 0, 0);
33004 
33005   return new_decl;
33006 }
33007 
33008 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
33009    into an integer defined in libgcc/config/i386/cpuinfo.c */
33010 
33011 static tree
33012 fold_builtin_cpu (tree fndecl, tree *args)
33013 {
33014   unsigned int i;
33015   enum ix86_builtins fn_code = (enum ix86_builtins)
33016 				DECL_FUNCTION_CODE (fndecl);
33017   tree param_string_cst = NULL;
33018 
33019   /* This is the order of bit-fields in __processor_features in cpuinfo.c */
33020   enum processor_features
33021   {
33022     F_CMOV = 0,
33023     F_MMX,
33024     F_POPCNT,
33025     F_SSE,
33026     F_SSE2,
33027     F_SSE3,
33028     F_SSSE3,
33029     F_SSE4_1,
33030     F_SSE4_2,
33031     F_AVX,
33032     F_AVX2,
33033     F_SSE4_A,
33034     F_FMA4,
33035     F_XOP,
33036     F_FMA,
33037     F_AVX512F,
33038     F_BMI,
33039     F_BMI2,
33040     F_AES,
33041     F_PCLMUL,
33042     F_AVX512VL,
33043     F_AVX512BW,
33044     F_AVX512DQ,
33045     F_AVX512CD,
33046     F_AVX512ER,
33047     F_AVX512PF,
33048     F_AVX512VBMI,
33049     F_AVX512IFMA,
33050     F_AVX5124VNNIW,
33051     F_AVX5124FMAPS,
33052     F_AVX512VPOPCNTDQ,
33053     F_AVX512VBMI2,
33054     F_GFNI,
33055     F_VPCLMULQDQ,
33056     F_AVX512VNNI,
33057     F_AVX512BITALG,
33058     F_MAX
33059   };
33060 
33061   /* These are the values for vendor types and cpu types  and subtypes
33062      in cpuinfo.c.  Cpu types and subtypes should be subtracted by
33063      the corresponding start value.  */
33064   enum processor_model
33065   {
33066     M_INTEL = 1,
33067     M_AMD,
33068     M_CPU_TYPE_START,
33069     M_INTEL_BONNELL,
33070     M_INTEL_CORE2,
33071     M_INTEL_COREI7,
33072     M_AMDFAM10H,
33073     M_AMDFAM15H,
33074     M_INTEL_SILVERMONT,
33075     M_INTEL_KNL,
33076     M_AMD_BTVER1,
33077     M_AMD_BTVER2,
33078     M_AMDFAM17H,
33079     M_INTEL_KNM,
33080     M_CPU_SUBTYPE_START,
33081     M_INTEL_COREI7_NEHALEM,
33082     M_INTEL_COREI7_WESTMERE,
33083     M_INTEL_COREI7_SANDYBRIDGE,
33084     M_AMDFAM10H_BARCELONA,
33085     M_AMDFAM10H_SHANGHAI,
33086     M_AMDFAM10H_ISTANBUL,
33087     M_AMDFAM15H_BDVER1,
33088     M_AMDFAM15H_BDVER2,
33089     M_AMDFAM15H_BDVER3,
33090     M_AMDFAM15H_BDVER4,
33091     M_AMDFAM17H_ZNVER1,
33092     M_INTEL_COREI7_IVYBRIDGE,
33093     M_INTEL_COREI7_HASWELL,
33094     M_INTEL_COREI7_BROADWELL,
33095     M_INTEL_COREI7_SKYLAKE,
33096     M_INTEL_COREI7_SKYLAKE_AVX512,
33097     M_INTEL_COREI7_CANNONLAKE,
33098     M_INTEL_COREI7_ICELAKE_CLIENT,
33099     M_INTEL_COREI7_ICELAKE_SERVER
33100   };
33101 
33102   static struct _arch_names_table
33103     {
33104       const char *const name;
33105       const enum processor_model model;
33106     }
33107   const arch_names_table[] =
33108     {
33109       {"amd", M_AMD},
33110       {"intel", M_INTEL},
33111       {"atom", M_INTEL_BONNELL},
33112       {"slm", M_INTEL_SILVERMONT},
33113       {"core2", M_INTEL_CORE2},
33114       {"corei7", M_INTEL_COREI7},
33115       {"nehalem", M_INTEL_COREI7_NEHALEM},
33116       {"westmere", M_INTEL_COREI7_WESTMERE},
33117       {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
33118       {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
33119       {"haswell", M_INTEL_COREI7_HASWELL},
33120       {"broadwell", M_INTEL_COREI7_BROADWELL},
33121       {"skylake", M_INTEL_COREI7_SKYLAKE},
33122       {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
33123       {"cannonlake", M_INTEL_COREI7_CANNONLAKE},
33124       {"icelake-client", M_INTEL_COREI7_ICELAKE_CLIENT},
33125       {"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER},
33126       {"bonnell", M_INTEL_BONNELL},
33127       {"silvermont", M_INTEL_SILVERMONT},
33128       {"knl", M_INTEL_KNL},
33129       {"knm", M_INTEL_KNM},
33130       {"amdfam10h", M_AMDFAM10H},
33131       {"barcelona", M_AMDFAM10H_BARCELONA},
33132       {"shanghai", M_AMDFAM10H_SHANGHAI},
33133       {"istanbul", M_AMDFAM10H_ISTANBUL},
33134       {"btver1", M_AMD_BTVER1},
33135       {"amdfam15h", M_AMDFAM15H},
33136       {"bdver1", M_AMDFAM15H_BDVER1},
33137       {"bdver2", M_AMDFAM15H_BDVER2},
33138       {"bdver3", M_AMDFAM15H_BDVER3},
33139       {"bdver4", M_AMDFAM15H_BDVER4},
33140       {"btver2", M_AMD_BTVER2},
33141       {"amdfam17h", M_AMDFAM17H},
33142       {"znver1", M_AMDFAM17H_ZNVER1},
33143     };
33144 
33145   static struct _isa_names_table
33146     {
33147       const char *const name;
33148       const enum processor_features feature;
33149     }
33150   const isa_names_table[] =
33151     {
33152       {"cmov",    F_CMOV},
33153       {"mmx",     F_MMX},
33154       {"popcnt",  F_POPCNT},
33155       {"sse",     F_SSE},
33156       {"sse2",    F_SSE2},
33157       {"sse3",    F_SSE3},
33158       {"ssse3",   F_SSSE3},
33159       {"sse4a",   F_SSE4_A},
33160       {"sse4.1",  F_SSE4_1},
33161       {"sse4.2",  F_SSE4_2},
33162       {"avx",     F_AVX},
33163       {"fma4",    F_FMA4},
33164       {"xop",     F_XOP},
33165       {"fma",     F_FMA},
33166       {"avx2",    F_AVX2},
33167       {"avx512f", F_AVX512F},
33168       {"bmi",     F_BMI},
33169       {"bmi2",    F_BMI2},
33170       {"aes",     F_AES},
33171       {"pclmul",  F_PCLMUL},
33172       {"avx512vl",F_AVX512VL},
33173       {"avx512bw",F_AVX512BW},
33174       {"avx512dq",F_AVX512DQ},
33175       {"avx512cd",F_AVX512CD},
33176       {"avx512er",F_AVX512ER},
33177       {"avx512pf",F_AVX512PF},
33178       {"avx512vbmi",F_AVX512VBMI},
33179       {"avx512ifma",F_AVX512IFMA},
33180       {"avx5124vnniw",F_AVX5124VNNIW},
33181       {"avx5124fmaps",F_AVX5124FMAPS},
33182       {"avx512vpopcntdq",F_AVX512VPOPCNTDQ},
33183       {"avx512vbmi2", F_AVX512VBMI2},
33184       {"gfni", F_GFNI},
33185       {"vpclmulqdq", F_VPCLMULQDQ},
33186       {"avx512vnni", F_AVX512VNNI},
33187       {"avx512bitalg", F_AVX512BITALG}
33188     };
33189 
33190   tree __processor_model_type = build_processor_model_struct ();
33191   tree __cpu_model_var = make_var_decl (__processor_model_type,
33192 					"__cpu_model");
33193 
33194 
33195   varpool_node::add (__cpu_model_var);
33196 
33197   gcc_assert ((args != NULL) && (*args != NULL));
33198 
33199   param_string_cst = *args;
33200   while (param_string_cst
33201 	 && TREE_CODE (param_string_cst) !=  STRING_CST)
33202     {
33203       /* *args must be a expr that can contain other EXPRS leading to a
33204 	 STRING_CST.   */
33205       if (!EXPR_P (param_string_cst))
33206  	{
33207 	  error ("Parameter to builtin must be a string constant or literal");
33208 	  return integer_zero_node;
33209 	}
33210       param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
33211     }
33212 
33213   gcc_assert (param_string_cst);
33214 
33215   if (fn_code == IX86_BUILTIN_CPU_IS)
33216     {
33217       tree ref;
33218       tree field;
33219       tree final;
33220 
33221       unsigned int field_val = 0;
33222       unsigned int NUM_ARCH_NAMES
33223 	= sizeof (arch_names_table) / sizeof (struct _arch_names_table);
33224 
33225       for (i = 0; i < NUM_ARCH_NAMES; i++)
33226 	if (strcmp (arch_names_table[i].name,
33227 	    TREE_STRING_POINTER (param_string_cst)) == 0)
33228 	  break;
33229 
33230       if (i == NUM_ARCH_NAMES)
33231 	{
33232 	  error ("Parameter to builtin not valid: %s",
33233 	         TREE_STRING_POINTER (param_string_cst));
33234 	  return integer_zero_node;
33235 	}
33236 
33237       field = TYPE_FIELDS (__processor_model_type);
33238       field_val = arch_names_table[i].model;
33239 
33240       /* CPU types are stored in the next field.  */
33241       if (field_val > M_CPU_TYPE_START
33242 	  && field_val < M_CPU_SUBTYPE_START)
33243 	{
33244 	  field = DECL_CHAIN (field);
33245 	  field_val -= M_CPU_TYPE_START;
33246 	}
33247 
33248       /* CPU subtypes are stored in the next field.  */
33249       if (field_val > M_CPU_SUBTYPE_START)
33250 	{
33251 	  field = DECL_CHAIN ( DECL_CHAIN (field));
33252 	  field_val -= M_CPU_SUBTYPE_START;
33253 	}
33254 
33255       /* Get the appropriate field in __cpu_model.  */
33256       ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
33257 		    field, NULL_TREE);
33258 
33259       /* Check the value.  */
33260       final = build2 (EQ_EXPR, unsigned_type_node, ref,
33261 		      build_int_cstu (unsigned_type_node, field_val));
33262       return build1 (CONVERT_EXPR, integer_type_node, final);
33263     }
33264   else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
33265     {
33266       tree ref;
33267       tree array_elt;
33268       tree field;
33269       tree final;
33270 
33271       unsigned int field_val = 0;
33272       unsigned int NUM_ISA_NAMES
33273 	= sizeof (isa_names_table) / sizeof (struct _isa_names_table);
33274 
33275       for (i = 0; i < NUM_ISA_NAMES; i++)
33276 	if (strcmp (isa_names_table[i].name,
33277 	    TREE_STRING_POINTER (param_string_cst)) == 0)
33278 	  break;
33279 
33280       if (i == NUM_ISA_NAMES)
33281 	{
33282 	  error ("Parameter to builtin not valid: %s",
33283 	       	 TREE_STRING_POINTER (param_string_cst));
33284 	  return integer_zero_node;
33285 	}
33286 
33287       if (isa_names_table[i].feature >= 32)
33288 	{
33289 	  tree __cpu_features2_var = make_var_decl (unsigned_type_node,
33290 						    "__cpu_features2");
33291 
33292 	  varpool_node::add (__cpu_features2_var);
33293 	  field_val = (1U << (isa_names_table[i].feature - 32));
33294 	  /* Return __cpu_features2 & field_val  */
33295 	  final = build2 (BIT_AND_EXPR, unsigned_type_node,
33296 			  __cpu_features2_var,
33297 			  build_int_cstu (unsigned_type_node, field_val));
33298 	  return build1 (CONVERT_EXPR, integer_type_node, final);
33299 	}
33300 
33301       field = TYPE_FIELDS (__processor_model_type);
33302       /* Get the last field, which is __cpu_features.  */
33303       while (DECL_CHAIN (field))
33304         field = DECL_CHAIN (field);
33305 
33306       /* Get the appropriate field: __cpu_model.__cpu_features  */
33307       ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
33308 		    field, NULL_TREE);
33309 
33310       /* Access the 0th element of __cpu_features array.  */
33311       array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
33312 			  integer_zero_node, NULL_TREE, NULL_TREE);
33313 
33314       field_val = (1U << isa_names_table[i].feature);
33315       /* Return __cpu_model.__cpu_features[0] & field_val  */
33316       final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
33317 		      build_int_cstu (unsigned_type_node, field_val));
33318       return build1 (CONVERT_EXPR, integer_type_node, final);
33319     }
33320   gcc_unreachable ();
33321 }
33322 
33323 static tree
33324 ix86_fold_builtin (tree fndecl, int n_args,
33325 		   tree *args, bool ignore ATTRIBUTE_UNUSED)
33326 {
33327   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
33328     {
33329       enum ix86_builtins fn_code = (enum ix86_builtins)
33330 				   DECL_FUNCTION_CODE (fndecl);
33331       switch (fn_code)
33332 	{
33333 	case IX86_BUILTIN_CPU_IS:
33334 	case IX86_BUILTIN_CPU_SUPPORTS:
33335 	  gcc_assert (n_args == 1);
33336 	  return fold_builtin_cpu (fndecl, args);
33337 
33338 	case IX86_BUILTIN_NANQ:
33339 	case IX86_BUILTIN_NANSQ:
33340 	  {
33341 	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
33342 	    const char *str = c_getstr (*args);
33343 	    int quiet = fn_code == IX86_BUILTIN_NANQ;
33344 	    REAL_VALUE_TYPE real;
33345 
33346 	    if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
33347 	      return build_real (type, real);
33348 	    return NULL_TREE;
33349 	  }
33350 
33351 	case IX86_BUILTIN_INFQ:
33352 	case IX86_BUILTIN_HUGE_VALQ:
33353 	  {
33354 	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
33355 	    REAL_VALUE_TYPE inf;
33356 	    real_inf (&inf);
33357 	    return build_real (type, inf);
33358 	  }
33359 
33360 	case IX86_BUILTIN_TZCNT16:
33361 	case IX86_BUILTIN_CTZS:
33362 	case IX86_BUILTIN_TZCNT32:
33363 	case IX86_BUILTIN_TZCNT64:
33364 	  gcc_assert (n_args == 1);
33365 	  if (TREE_CODE (args[0]) == INTEGER_CST)
33366 	    {
33367 	      tree type = TREE_TYPE (TREE_TYPE (fndecl));
33368 	      tree arg = args[0];
33369 	      if (fn_code == IX86_BUILTIN_TZCNT16
33370 		  || fn_code == IX86_BUILTIN_CTZS)
33371 		arg = fold_convert (short_unsigned_type_node, arg);
33372 	      if (integer_zerop (arg))
33373 		return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
33374 	      else
33375 		return fold_const_call (CFN_CTZ, type, arg);
33376 	    }
33377 	  break;
33378 
33379 	case IX86_BUILTIN_LZCNT16:
33380 	case IX86_BUILTIN_CLZS:
33381 	case IX86_BUILTIN_LZCNT32:
33382 	case IX86_BUILTIN_LZCNT64:
33383 	  gcc_assert (n_args == 1);
33384 	  if (TREE_CODE (args[0]) == INTEGER_CST)
33385 	    {
33386 	      tree type = TREE_TYPE (TREE_TYPE (fndecl));
33387 	      tree arg = args[0];
33388 	      if (fn_code == IX86_BUILTIN_LZCNT16
33389 		  || fn_code == IX86_BUILTIN_CLZS)
33390 		arg = fold_convert (short_unsigned_type_node, arg);
33391 	      if (integer_zerop (arg))
33392 		return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
33393 	      else
33394 		return fold_const_call (CFN_CLZ, type, arg);
33395 	    }
33396 	  break;
33397 
33398 	case IX86_BUILTIN_BEXTR32:
33399 	case IX86_BUILTIN_BEXTR64:
33400 	case IX86_BUILTIN_BEXTRI32:
33401 	case IX86_BUILTIN_BEXTRI64:
33402 	  gcc_assert (n_args == 2);
33403 	  if (tree_fits_uhwi_p (args[1]))
33404 	    {
33405 	      unsigned HOST_WIDE_INT res = 0;
33406 	      unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
33407 	      unsigned int start = tree_to_uhwi (args[1]);
33408 	      unsigned int len = (start & 0xff00) >> 8;
33409 	      start &= 0xff;
33410 	      if (start >= prec || len == 0)
33411 		res = 0;
33412 	      else if (!tree_fits_uhwi_p (args[0]))
33413 		break;
33414 	      else
33415 		res = tree_to_uhwi (args[0]) >> start;
33416 	      if (len > prec)
33417 		len = prec;
33418 	      if (len < HOST_BITS_PER_WIDE_INT)
33419 		res &= (HOST_WIDE_INT_1U << len) - 1;
33420 	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33421 	    }
33422 	  break;
33423 
33424 	case IX86_BUILTIN_BZHI32:
33425 	case IX86_BUILTIN_BZHI64:
33426 	  gcc_assert (n_args == 2);
33427 	  if (tree_fits_uhwi_p (args[1]))
33428 	    {
33429 	      unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
33430 	      if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
33431 		return args[0];
33432 	      if (!tree_fits_uhwi_p (args[0]))
33433 		break;
33434 	      unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
33435 	      res &= ~(HOST_WIDE_INT_M1U << idx);
33436 	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33437 	    }
33438 	  break;
33439 
33440 	case IX86_BUILTIN_PDEP32:
33441 	case IX86_BUILTIN_PDEP64:
33442 	  gcc_assert (n_args == 2);
33443 	  if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
33444 	    {
33445 	      unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
33446 	      unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
33447 	      unsigned HOST_WIDE_INT res = 0;
33448 	      unsigned HOST_WIDE_INT m, k = 1;
33449 	      for (m = 1; m; m <<= 1)
33450 		if ((mask & m) != 0)
33451 		  {
33452 		    if ((src & k) != 0)
33453 		      res |= m;
33454 		    k <<= 1;
33455 		  }
33456 	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33457 	    }
33458 	  break;
33459 
33460 	case IX86_BUILTIN_PEXT32:
33461 	case IX86_BUILTIN_PEXT64:
33462 	  gcc_assert (n_args == 2);
33463 	  if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
33464 	    {
33465 	      unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
33466 	      unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
33467 	      unsigned HOST_WIDE_INT res = 0;
33468 	      unsigned HOST_WIDE_INT m, k = 1;
33469 	      for (m = 1; m; m <<= 1)
33470 		if ((mask & m) != 0)
33471 		  {
33472 		    if ((src & m) != 0)
33473 		      res |= k;
33474 		    k <<= 1;
33475 		  }
33476 	      return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
33477 	    }
33478 	  break;
33479 
33480 	default:
33481 	  break;
33482 	}
33483     }
33484 
33485 #ifdef SUBTARGET_FOLD_BUILTIN
33486   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
33487 #endif
33488 
33489   return NULL_TREE;
33490 }
33491 
33492 /* Fold a MD builtin (use ix86_fold_builtin for folding into
33493    constant) in GIMPLE.  */
33494 
33495 bool
33496 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
33497 {
33498   gimple *stmt = gsi_stmt (*gsi);
33499   tree fndecl = gimple_call_fndecl (stmt);
33500   gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
33501   int n_args = gimple_call_num_args (stmt);
33502   enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
33503   tree decl = NULL_TREE;
33504   tree arg0, arg1;
33505 
33506   switch (fn_code)
33507     {
33508     case IX86_BUILTIN_TZCNT32:
33509       decl = builtin_decl_implicit (BUILT_IN_CTZ);
33510       goto fold_tzcnt_lzcnt;
33511 
33512     case IX86_BUILTIN_TZCNT64:
33513       decl = builtin_decl_implicit (BUILT_IN_CTZLL);
33514       goto fold_tzcnt_lzcnt;
33515 
33516     case IX86_BUILTIN_LZCNT32:
33517       decl = builtin_decl_implicit (BUILT_IN_CLZ);
33518       goto fold_tzcnt_lzcnt;
33519 
33520     case IX86_BUILTIN_LZCNT64:
33521       decl = builtin_decl_implicit (BUILT_IN_CLZLL);
33522       goto fold_tzcnt_lzcnt;
33523 
33524     fold_tzcnt_lzcnt:
33525       gcc_assert (n_args == 1);
33526       arg0 = gimple_call_arg (stmt, 0);
33527       if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
33528 	{
33529 	  int prec = TYPE_PRECISION (TREE_TYPE (arg0));
33530 	  /* If arg0 is provably non-zero, optimize into generic
33531 	     __builtin_c[tl]z{,ll} function the middle-end handles
33532 	     better.  */
33533 	  if (!expr_not_equal_to (arg0, wi::zero (prec)))
33534 	    return false;
33535 
33536 	  location_t loc = gimple_location (stmt);
33537 	  gimple *g = gimple_build_call (decl, 1, arg0);
33538 	  gimple_set_location (g, loc);
33539 	  tree lhs = make_ssa_name (integer_type_node);
33540 	  gimple_call_set_lhs (g, lhs);
33541 	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
33542 	  g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
33543 	  gimple_set_location (g, loc);
33544 	  gsi_replace (gsi, g, false);
33545 	  return true;
33546 	}
33547       break;
33548 
33549     case IX86_BUILTIN_BZHI32:
33550     case IX86_BUILTIN_BZHI64:
33551       gcc_assert (n_args == 2);
33552       arg1 = gimple_call_arg (stmt, 1);
33553       if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
33554 	{
33555 	  unsigned int idx = tree_to_uhwi (arg1) & 0xff;
33556 	  arg0 = gimple_call_arg (stmt, 0);
33557 	  if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
33558 	    break;
33559 	  location_t loc = gimple_location (stmt);
33560 	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
33561 	  gimple_set_location (g, loc);
33562 	  gsi_replace (gsi, g, false);
33563 	  return true;
33564 	}
33565       break;
33566 
33567     case IX86_BUILTIN_PDEP32:
33568     case IX86_BUILTIN_PDEP64:
33569     case IX86_BUILTIN_PEXT32:
33570     case IX86_BUILTIN_PEXT64:
33571       gcc_assert (n_args == 2);
33572       arg1 = gimple_call_arg (stmt, 1);
33573       if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
33574 	{
33575 	  location_t loc = gimple_location (stmt);
33576 	  arg0 = gimple_call_arg (stmt, 0);
33577 	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
33578 	  gimple_set_location (g, loc);
33579 	  gsi_replace (gsi, g, false);
33580 	  return true;
33581 	}
33582       break;
33583 
33584     default:
33585       break;
33586     }
33587 
33588   return false;
33589 }
33590 
33591 /* Make builtins to detect cpu type and features supported.  NAME is
33592    the builtin name, CODE is the builtin code, and FTYPE is the function
33593    type of the builtin.  */
33594 
33595 static void
33596 make_cpu_type_builtin (const char* name, int code,
33597 		       enum ix86_builtin_func_type ftype, bool is_const)
33598 {
33599   tree decl;
33600   tree type;
33601 
33602   type = ix86_get_builtin_func_type (ftype);
33603   decl = add_builtin_function (name, type, code, BUILT_IN_MD,
33604 			       NULL, NULL_TREE);
33605   gcc_assert (decl != NULL_TREE);
33606   ix86_builtins[(int) code] = decl;
33607   TREE_READONLY (decl) = is_const;
33608 }
33609 
33610 /* Make builtins to get CPU type and features supported.  The created
33611    builtins are :
33612 
33613    __builtin_cpu_init (), to detect cpu type and features,
33614    __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
33615    __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
33616    */
33617 
33618 static void
33619 ix86_init_platform_type_builtins (void)
33620 {
33621   make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
33622 			 INT_FTYPE_VOID, false);
33623   make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
33624 			 INT_FTYPE_PCCHAR, true);
33625   make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
33626 			 INT_FTYPE_PCCHAR, true);
33627 }
33628 
33629 /* Internal method for ix86_init_builtins.  */
33630 
33631 static void
33632 ix86_init_builtins_va_builtins_abi (void)
33633 {
33634   tree ms_va_ref, sysv_va_ref;
33635   tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
33636   tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
33637   tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
33638   tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
33639 
33640   if (!TARGET_64BIT)
33641     return;
33642   fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
33643   fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
33644   ms_va_ref = build_reference_type (ms_va_list_type_node);
33645   sysv_va_ref =
33646     build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
33647 
33648   fnvoid_va_end_ms =
33649     build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
33650   fnvoid_va_start_ms =
33651     build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
33652   fnvoid_va_end_sysv =
33653     build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
33654   fnvoid_va_start_sysv =
33655     build_varargs_function_type_list (void_type_node, sysv_va_ref,
33656     				       NULL_TREE);
33657   fnvoid_va_copy_ms =
33658     build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
33659     			      NULL_TREE);
33660   fnvoid_va_copy_sysv =
33661     build_function_type_list (void_type_node, sysv_va_ref,
33662     			      sysv_va_ref, NULL_TREE);
33663 
33664   add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
33665   			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
33666   add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
33667   			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
33668   add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
33669 			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
33670   add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
33671   			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33672   add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
33673   			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33674   add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
33675 			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33676 }
33677 
33678 static void
33679 ix86_init_builtin_types (void)
33680 {
33681   tree float80_type_node, const_string_type_node;
33682 
33683   /* The __float80 type.  */
33684   float80_type_node = long_double_type_node;
33685   if (TYPE_MODE (float80_type_node) != XFmode)
33686     {
33687       if (float64x_type_node != NULL_TREE
33688 	  && TYPE_MODE (float64x_type_node) == XFmode)
33689 	float80_type_node = float64x_type_node;
33690       else
33691 	{
33692 	  /* The __float80 type.  */
33693 	  float80_type_node = make_node (REAL_TYPE);
33694 
33695 	  TYPE_PRECISION (float80_type_node) = 80;
33696 	  layout_type (float80_type_node);
33697 	}
33698     }
33699   lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
33700 
33701   /* The __float128 type.  The node has already been created as
33702      _Float128, so we only need to register the __float128 name for
33703      it.  */
33704   lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
33705 
33706   const_string_type_node
33707     = build_pointer_type (build_qualified_type
33708 			  (char_type_node, TYPE_QUAL_CONST));
33709 
33710   /* This macro is built by i386-builtin-types.awk.  */
33711   DEFINE_BUILTIN_PRIMITIVE_TYPES;
33712 }
33713 
33714 static void
33715 ix86_init_builtins (void)
33716 {
33717   tree ftype, decl;
33718 
33719   ix86_init_builtin_types ();
33720 
33721   /* Builtins to get CPU type and features. */
33722   ix86_init_platform_type_builtins ();
33723 
33724   /* TFmode support builtins.  */
33725   def_builtin_const (0, "__builtin_infq",
33726 		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
33727   def_builtin_const (0, "__builtin_huge_valq",
33728 		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
33729 
33730   ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING);
33731   decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ,
33732 			       BUILT_IN_MD, "nanq", NULL_TREE);
33733   TREE_READONLY (decl) = 1;
33734   ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl;
33735 
33736   decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ,
33737 			       BUILT_IN_MD, "nansq", NULL_TREE);
33738   TREE_READONLY (decl) = 1;
33739   ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl;
33740 
33741   /* We will expand them to normal call if SSE isn't available since
33742      they are used by libgcc. */
33743   ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
33744   decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ,
33745 			       BUILT_IN_MD, "__fabstf2", NULL_TREE);
33746   TREE_READONLY (decl) = 1;
33747   ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
33748 
33749   ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
33750   decl = add_builtin_function ("__builtin_copysignq", ftype,
33751 			       IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
33752 			       "__copysigntf3", NULL_TREE);
33753   TREE_READONLY (decl) = 1;
33754   ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
33755 
33756   ix86_init_tm_builtins ();
33757   ix86_init_mmx_sse_builtins ();
33758   ix86_init_mpx_builtins ();
33759 
33760   if (TARGET_LP64)
33761     ix86_init_builtins_va_builtins_abi ();
33762 
33763 #ifdef SUBTARGET_INIT_BUILTINS
33764   SUBTARGET_INIT_BUILTINS;
33765 #endif
33766 }
33767 
33768 /* Return the ix86 builtin for CODE.  */
33769 
33770 static tree
33771 ix86_builtin_decl (unsigned code, bool)
33772 {
33773   if (code >= IX86_BUILTIN_MAX)
33774     return error_mark_node;
33775 
33776   return ix86_builtins[code];
33777 }
33778 
33779 /* Errors in the source file can cause expand_expr to return const0_rtx
33780    where we expect a vector.  To avoid crashing, use one of the vector
33781    clear instructions.  */
33782 static rtx
33783 safe_vector_operand (rtx x, machine_mode mode)
33784 {
33785   if (x == const0_rtx)
33786     x = CONST0_RTX (mode);
33787   return x;
33788 }
33789 
33790 /* Fixup modeless constants to fit required mode.  */
33791 static rtx
33792 fixup_modeless_constant (rtx x, machine_mode mode)
33793 {
33794   if (GET_MODE (x) == VOIDmode)
33795     x = convert_to_mode (mode, x, 1);
33796   return x;
33797 }
33798 
33799 /* Subroutine of ix86_expand_builtin to take care of binop insns.  */
33800 
33801 static rtx
33802 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
33803 {
33804   rtx pat;
33805   tree arg0 = CALL_EXPR_ARG (exp, 0);
33806   tree arg1 = CALL_EXPR_ARG (exp, 1);
33807   rtx op0 = expand_normal (arg0);
33808   rtx op1 = expand_normal (arg1);
33809   machine_mode tmode = insn_data[icode].operand[0].mode;
33810   machine_mode mode0 = insn_data[icode].operand[1].mode;
33811   machine_mode mode1 = insn_data[icode].operand[2].mode;
33812 
33813   if (VECTOR_MODE_P (mode0))
33814     op0 = safe_vector_operand (op0, mode0);
33815   if (VECTOR_MODE_P (mode1))
33816     op1 = safe_vector_operand (op1, mode1);
33817 
33818   if (optimize || !target
33819       || GET_MODE (target) != tmode
33820       || !insn_data[icode].operand[0].predicate (target, tmode))
33821     target = gen_reg_rtx (tmode);
33822 
33823   if (GET_MODE (op1) == SImode && mode1 == TImode)
33824     {
33825       rtx x = gen_reg_rtx (V4SImode);
33826       emit_insn (gen_sse2_loadd (x, op1));
33827       op1 = gen_lowpart (TImode, x);
33828     }
33829 
33830   if (!insn_data[icode].operand[1].predicate (op0, mode0))
33831     op0 = copy_to_mode_reg (mode0, op0);
33832   if (!insn_data[icode].operand[2].predicate (op1, mode1))
33833     op1 = copy_to_mode_reg (mode1, op1);
33834 
33835   pat = GEN_FCN (icode) (target, op0, op1);
33836   if (! pat)
33837     return 0;
33838 
33839   emit_insn (pat);
33840 
33841   return target;
33842 }
33843 
33844 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
33845 
33846 static rtx
33847 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
33848 			       enum ix86_builtin_func_type m_type,
33849 			       enum rtx_code sub_code)
33850 {
33851   rtx pat;
33852   int i;
33853   int nargs;
33854   bool comparison_p = false;
33855   bool tf_p = false;
33856   bool last_arg_constant = false;
33857   int num_memory = 0;
33858   struct {
33859     rtx op;
33860     machine_mode mode;
33861   } args[4];
33862 
33863   machine_mode tmode = insn_data[icode].operand[0].mode;
33864 
33865   switch (m_type)
33866     {
33867     case MULTI_ARG_4_DF2_DI_I:
33868     case MULTI_ARG_4_DF2_DI_I1:
33869     case MULTI_ARG_4_SF2_SI_I:
33870     case MULTI_ARG_4_SF2_SI_I1:
33871       nargs = 4;
33872       last_arg_constant = true;
33873       break;
33874 
33875     case MULTI_ARG_3_SF:
33876     case MULTI_ARG_3_DF:
33877     case MULTI_ARG_3_SF2:
33878     case MULTI_ARG_3_DF2:
33879     case MULTI_ARG_3_DI:
33880     case MULTI_ARG_3_SI:
33881     case MULTI_ARG_3_SI_DI:
33882     case MULTI_ARG_3_HI:
33883     case MULTI_ARG_3_HI_SI:
33884     case MULTI_ARG_3_QI:
33885     case MULTI_ARG_3_DI2:
33886     case MULTI_ARG_3_SI2:
33887     case MULTI_ARG_3_HI2:
33888     case MULTI_ARG_3_QI2:
33889       nargs = 3;
33890       break;
33891 
33892     case MULTI_ARG_2_SF:
33893     case MULTI_ARG_2_DF:
33894     case MULTI_ARG_2_DI:
33895     case MULTI_ARG_2_SI:
33896     case MULTI_ARG_2_HI:
33897     case MULTI_ARG_2_QI:
33898       nargs = 2;
33899       break;
33900 
33901     case MULTI_ARG_2_DI_IMM:
33902     case MULTI_ARG_2_SI_IMM:
33903     case MULTI_ARG_2_HI_IMM:
33904     case MULTI_ARG_2_QI_IMM:
33905       nargs = 2;
33906       last_arg_constant = true;
33907       break;
33908 
33909     case MULTI_ARG_1_SF:
33910     case MULTI_ARG_1_DF:
33911     case MULTI_ARG_1_SF2:
33912     case MULTI_ARG_1_DF2:
33913     case MULTI_ARG_1_DI:
33914     case MULTI_ARG_1_SI:
33915     case MULTI_ARG_1_HI:
33916     case MULTI_ARG_1_QI:
33917     case MULTI_ARG_1_SI_DI:
33918     case MULTI_ARG_1_HI_DI:
33919     case MULTI_ARG_1_HI_SI:
33920     case MULTI_ARG_1_QI_DI:
33921     case MULTI_ARG_1_QI_SI:
33922     case MULTI_ARG_1_QI_HI:
33923       nargs = 1;
33924       break;
33925 
33926     case MULTI_ARG_2_DI_CMP:
33927     case MULTI_ARG_2_SI_CMP:
33928     case MULTI_ARG_2_HI_CMP:
33929     case MULTI_ARG_2_QI_CMP:
33930       nargs = 2;
33931       comparison_p = true;
33932       break;
33933 
33934     case MULTI_ARG_2_SF_TF:
33935     case MULTI_ARG_2_DF_TF:
33936     case MULTI_ARG_2_DI_TF:
33937     case MULTI_ARG_2_SI_TF:
33938     case MULTI_ARG_2_HI_TF:
33939     case MULTI_ARG_2_QI_TF:
33940       nargs = 2;
33941       tf_p = true;
33942       break;
33943 
33944     default:
33945       gcc_unreachable ();
33946     }
33947 
33948   if (optimize || !target
33949       || GET_MODE (target) != tmode
33950       || !insn_data[icode].operand[0].predicate (target, tmode))
33951     target = gen_reg_rtx (tmode);
33952   else if (memory_operand (target, tmode))
33953     num_memory++;
33954 
33955   gcc_assert (nargs <= 4);
33956 
33957   for (i = 0; i < nargs; i++)
33958     {
33959       tree arg = CALL_EXPR_ARG (exp, i);
33960       rtx op = expand_normal (arg);
33961       int adjust = (comparison_p) ? 1 : 0;
33962       machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
33963 
33964       if (last_arg_constant && i == nargs - 1)
33965 	{
33966 	  if (!insn_data[icode].operand[i + 1].predicate (op, mode))
33967 	    {
33968 	      enum insn_code new_icode = icode;
33969 	      switch (icode)
33970 		{
33971 		case CODE_FOR_xop_vpermil2v2df3:
33972 		case CODE_FOR_xop_vpermil2v4sf3:
33973 		case CODE_FOR_xop_vpermil2v4df3:
33974 		case CODE_FOR_xop_vpermil2v8sf3:
33975 		  error ("the last argument must be a 2-bit immediate");
33976 		  return gen_reg_rtx (tmode);
33977 		case CODE_FOR_xop_rotlv2di3:
33978 		  new_icode = CODE_FOR_rotlv2di3;
33979 		  goto xop_rotl;
33980 		case CODE_FOR_xop_rotlv4si3:
33981 		  new_icode = CODE_FOR_rotlv4si3;
33982 		  goto xop_rotl;
33983 		case CODE_FOR_xop_rotlv8hi3:
33984 		  new_icode = CODE_FOR_rotlv8hi3;
33985 		  goto xop_rotl;
33986 		case CODE_FOR_xop_rotlv16qi3:
33987 		  new_icode = CODE_FOR_rotlv16qi3;
33988 		xop_rotl:
33989 		  if (CONST_INT_P (op))
33990 		    {
33991 		      int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
33992 		      op = GEN_INT (INTVAL (op) & mask);
33993 		      gcc_checking_assert
33994 			(insn_data[icode].operand[i + 1].predicate (op, mode));
33995 		    }
33996 		  else
33997 		    {
33998 		      gcc_checking_assert
33999 			(nargs == 2
34000 			 && insn_data[new_icode].operand[0].mode == tmode
34001 			 && insn_data[new_icode].operand[1].mode == tmode
34002 			 && insn_data[new_icode].operand[2].mode == mode
34003 			 && insn_data[new_icode].operand[0].predicate
34004 			    == insn_data[icode].operand[0].predicate
34005 			 && insn_data[new_icode].operand[1].predicate
34006 			    == insn_data[icode].operand[1].predicate);
34007 		      icode = new_icode;
34008 		      goto non_constant;
34009 		    }
34010 		  break;
34011 		default:
34012 		  gcc_unreachable ();
34013 		}
34014 	    }
34015 	}
34016       else
34017 	{
34018 	non_constant:
34019 	  if (VECTOR_MODE_P (mode))
34020 	    op = safe_vector_operand (op, mode);
34021 
34022 	  /* If we aren't optimizing, only allow one memory operand to be
34023 	     generated.  */
34024 	  if (memory_operand (op, mode))
34025 	    num_memory++;
34026 
34027 	  gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
34028 
34029 	  if (optimize
34030 	      || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
34031 	      || num_memory > 1)
34032 	    op = force_reg (mode, op);
34033 	}
34034 
34035       args[i].op = op;
34036       args[i].mode = mode;
34037     }
34038 
34039   switch (nargs)
34040     {
34041     case 1:
34042       pat = GEN_FCN (icode) (target, args[0].op);
34043       break;
34044 
34045     case 2:
34046       if (tf_p)
34047 	pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34048 			       GEN_INT ((int)sub_code));
34049       else if (! comparison_p)
34050 	pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34051       else
34052 	{
34053 	  rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
34054 				       args[0].op,
34055 				       args[1].op);
34056 
34057 	  pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
34058 	}
34059       break;
34060 
34061     case 3:
34062       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34063       break;
34064 
34065     case 4:
34066       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
34067       break;
34068 
34069     default:
34070       gcc_unreachable ();
34071     }
34072 
34073   if (! pat)
34074     return 0;
34075 
34076   emit_insn (pat);
34077   return target;
34078 }
34079 
34080 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
34081    insns with vec_merge.  */
34082 
34083 static rtx
34084 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
34085 				    rtx target)
34086 {
34087   rtx pat;
34088   tree arg0 = CALL_EXPR_ARG (exp, 0);
34089   rtx op1, op0 = expand_normal (arg0);
34090   machine_mode tmode = insn_data[icode].operand[0].mode;
34091   machine_mode mode0 = insn_data[icode].operand[1].mode;
34092 
34093   if (optimize || !target
34094       || GET_MODE (target) != tmode
34095       || !insn_data[icode].operand[0].predicate (target, tmode))
34096     target = gen_reg_rtx (tmode);
34097 
34098   if (VECTOR_MODE_P (mode0))
34099     op0 = safe_vector_operand (op0, mode0);
34100 
34101   if ((optimize && !register_operand (op0, mode0))
34102       || !insn_data[icode].operand[1].predicate (op0, mode0))
34103     op0 = copy_to_mode_reg (mode0, op0);
34104 
34105   op1 = op0;
34106   if (!insn_data[icode].operand[2].predicate (op1, mode0))
34107     op1 = copy_to_mode_reg (mode0, op1);
34108 
34109   pat = GEN_FCN (icode) (target, op0, op1);
34110   if (! pat)
34111     return 0;
34112   emit_insn (pat);
34113   return target;
34114 }
34115 
34116 /* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
34117 
34118 static rtx
34119 ix86_expand_sse_compare (const struct builtin_description *d,
34120 			 tree exp, rtx target, bool swap)
34121 {
34122   rtx pat;
34123   tree arg0 = CALL_EXPR_ARG (exp, 0);
34124   tree arg1 = CALL_EXPR_ARG (exp, 1);
34125   rtx op0 = expand_normal (arg0);
34126   rtx op1 = expand_normal (arg1);
34127   rtx op2;
34128   machine_mode tmode = insn_data[d->icode].operand[0].mode;
34129   machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34130   machine_mode mode1 = insn_data[d->icode].operand[2].mode;
34131   enum rtx_code comparison = d->comparison;
34132 
34133   if (VECTOR_MODE_P (mode0))
34134     op0 = safe_vector_operand (op0, mode0);
34135   if (VECTOR_MODE_P (mode1))
34136     op1 = safe_vector_operand (op1, mode1);
34137 
34138   /* Swap operands if we have a comparison that isn't available in
34139      hardware.  */
34140   if (swap)
34141     std::swap (op0, op1);
34142 
34143   if (optimize || !target
34144       || GET_MODE (target) != tmode
34145       || !insn_data[d->icode].operand[0].predicate (target, tmode))
34146     target = gen_reg_rtx (tmode);
34147 
34148   if ((optimize && !register_operand (op0, mode0))
34149       || !insn_data[d->icode].operand[1].predicate (op0, mode0))
34150     op0 = copy_to_mode_reg (mode0, op0);
34151   if ((optimize && !register_operand (op1, mode1))
34152       || !insn_data[d->icode].operand[2].predicate (op1, mode1))
34153     op1 = copy_to_mode_reg (mode1, op1);
34154 
34155   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
34156   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
34157   if (! pat)
34158     return 0;
34159   emit_insn (pat);
34160   return target;
34161 }
34162 
34163 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
34164 
34165 static rtx
34166 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
34167 		      rtx target)
34168 {
34169   rtx pat;
34170   tree arg0 = CALL_EXPR_ARG (exp, 0);
34171   tree arg1 = CALL_EXPR_ARG (exp, 1);
34172   rtx op0 = expand_normal (arg0);
34173   rtx op1 = expand_normal (arg1);
34174   machine_mode mode0 = insn_data[d->icode].operand[0].mode;
34175   machine_mode mode1 = insn_data[d->icode].operand[1].mode;
34176   enum rtx_code comparison = d->comparison;
34177 
34178   if (VECTOR_MODE_P (mode0))
34179     op0 = safe_vector_operand (op0, mode0);
34180   if (VECTOR_MODE_P (mode1))
34181     op1 = safe_vector_operand (op1, mode1);
34182 
34183   /* Swap operands if we have a comparison that isn't available in
34184      hardware.  */
34185   if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
34186     std::swap (op0, op1);
34187 
34188   target = gen_reg_rtx (SImode);
34189   emit_move_insn (target, const0_rtx);
34190   target = gen_rtx_SUBREG (QImode, target, 0);
34191 
34192   if ((optimize && !register_operand (op0, mode0))
34193       || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34194     op0 = copy_to_mode_reg (mode0, op0);
34195   if ((optimize && !register_operand (op1, mode1))
34196       || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34197     op1 = copy_to_mode_reg (mode1, op1);
34198 
34199   pat = GEN_FCN (d->icode) (op0, op1);
34200   if (! pat)
34201     return 0;
34202   emit_insn (pat);
34203   emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34204 			  gen_rtx_fmt_ee (comparison, QImode,
34205 					  SET_DEST (pat),
34206 					  const0_rtx)));
34207 
34208   return SUBREG_REG (target);
34209 }
34210 
34211 /* Subroutines of ix86_expand_args_builtin to take care of round insns.  */
34212 
34213 static rtx
34214 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
34215 		       rtx target)
34216 {
34217   rtx pat;
34218   tree arg0 = CALL_EXPR_ARG (exp, 0);
34219   rtx op1, op0 = expand_normal (arg0);
34220   machine_mode tmode = insn_data[d->icode].operand[0].mode;
34221   machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34222 
34223   if (optimize || target == 0
34224       || GET_MODE (target) != tmode
34225       || !insn_data[d->icode].operand[0].predicate (target, tmode))
34226     target = gen_reg_rtx (tmode);
34227 
34228   if (VECTOR_MODE_P (mode0))
34229     op0 = safe_vector_operand (op0, mode0);
34230 
34231   if ((optimize && !register_operand (op0, mode0))
34232       || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34233     op0 = copy_to_mode_reg (mode0, op0);
34234 
34235   op1 = GEN_INT (d->comparison);
34236 
34237   pat = GEN_FCN (d->icode) (target, op0, op1);
34238   if (! pat)
34239     return 0;
34240   emit_insn (pat);
34241   return target;
34242 }
34243 
34244 static rtx
34245 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
34246 				     tree exp, rtx target)
34247 {
34248   rtx pat;
34249   tree arg0 = CALL_EXPR_ARG (exp, 0);
34250   tree arg1 = CALL_EXPR_ARG (exp, 1);
34251   rtx op0 = expand_normal (arg0);
34252   rtx op1 = expand_normal (arg1);
34253   rtx op2;
34254   machine_mode tmode = insn_data[d->icode].operand[0].mode;
34255   machine_mode mode0 = insn_data[d->icode].operand[1].mode;
34256   machine_mode mode1 = insn_data[d->icode].operand[2].mode;
34257 
34258   if (optimize || target == 0
34259       || GET_MODE (target) != tmode
34260       || !insn_data[d->icode].operand[0].predicate (target, tmode))
34261     target = gen_reg_rtx (tmode);
34262 
34263   op0 = safe_vector_operand (op0, mode0);
34264   op1 = safe_vector_operand (op1, mode1);
34265 
34266   if ((optimize && !register_operand (op0, mode0))
34267       || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34268     op0 = copy_to_mode_reg (mode0, op0);
34269   if ((optimize && !register_operand (op1, mode1))
34270       || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34271     op1 = copy_to_mode_reg (mode1, op1);
34272 
34273   op2 = GEN_INT (d->comparison);
34274 
34275   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
34276   if (! pat)
34277     return 0;
34278   emit_insn (pat);
34279   return target;
34280 }
34281 
34282 /* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
34283 
34284 static rtx
34285 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
34286 		       rtx target)
34287 {
34288   rtx pat;
34289   tree arg0 = CALL_EXPR_ARG (exp, 0);
34290   tree arg1 = CALL_EXPR_ARG (exp, 1);
34291   rtx op0 = expand_normal (arg0);
34292   rtx op1 = expand_normal (arg1);
34293   machine_mode mode0 = insn_data[d->icode].operand[0].mode;
34294   machine_mode mode1 = insn_data[d->icode].operand[1].mode;
34295   enum rtx_code comparison = d->comparison;
34296 
34297   if (VECTOR_MODE_P (mode0))
34298     op0 = safe_vector_operand (op0, mode0);
34299   if (VECTOR_MODE_P (mode1))
34300     op1 = safe_vector_operand (op1, mode1);
34301 
34302   target = gen_reg_rtx (SImode);
34303   emit_move_insn (target, const0_rtx);
34304   target = gen_rtx_SUBREG (QImode, target, 0);
34305 
34306   if ((optimize && !register_operand (op0, mode0))
34307       || !insn_data[d->icode].operand[0].predicate (op0, mode0))
34308     op0 = copy_to_mode_reg (mode0, op0);
34309   if ((optimize && !register_operand (op1, mode1))
34310       || !insn_data[d->icode].operand[1].predicate (op1, mode1))
34311     op1 = copy_to_mode_reg (mode1, op1);
34312 
34313   pat = GEN_FCN (d->icode) (op0, op1);
34314   if (! pat)
34315     return 0;
34316   emit_insn (pat);
34317   emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34318 			  gen_rtx_fmt_ee (comparison, QImode,
34319 					  SET_DEST (pat),
34320 					  const0_rtx)));
34321 
34322   return SUBREG_REG (target);
34323 }
34324 
34325 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
34326 
34327 static rtx
34328 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
34329 			  tree exp, rtx target)
34330 {
34331   rtx pat;
34332   tree arg0 = CALL_EXPR_ARG (exp, 0);
34333   tree arg1 = CALL_EXPR_ARG (exp, 1);
34334   tree arg2 = CALL_EXPR_ARG (exp, 2);
34335   tree arg3 = CALL_EXPR_ARG (exp, 3);
34336   tree arg4 = CALL_EXPR_ARG (exp, 4);
34337   rtx scratch0, scratch1;
34338   rtx op0 = expand_normal (arg0);
34339   rtx op1 = expand_normal (arg1);
34340   rtx op2 = expand_normal (arg2);
34341   rtx op3 = expand_normal (arg3);
34342   rtx op4 = expand_normal (arg4);
34343   machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
34344 
34345   tmode0 = insn_data[d->icode].operand[0].mode;
34346   tmode1 = insn_data[d->icode].operand[1].mode;
34347   modev2 = insn_data[d->icode].operand[2].mode;
34348   modei3 = insn_data[d->icode].operand[3].mode;
34349   modev4 = insn_data[d->icode].operand[4].mode;
34350   modei5 = insn_data[d->icode].operand[5].mode;
34351   modeimm = insn_data[d->icode].operand[6].mode;
34352 
34353   if (VECTOR_MODE_P (modev2))
34354     op0 = safe_vector_operand (op0, modev2);
34355   if (VECTOR_MODE_P (modev4))
34356     op2 = safe_vector_operand (op2, modev4);
34357 
34358   if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
34359     op0 = copy_to_mode_reg (modev2, op0);
34360   if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
34361     op1 = copy_to_mode_reg (modei3, op1);
34362   if ((optimize && !register_operand (op2, modev4))
34363       || !insn_data[d->icode].operand[4].predicate (op2, modev4))
34364     op2 = copy_to_mode_reg (modev4, op2);
34365   if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
34366     op3 = copy_to_mode_reg (modei5, op3);
34367 
34368   if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
34369     {
34370       error ("the fifth argument must be an 8-bit immediate");
34371       return const0_rtx;
34372     }
34373 
34374   if (d->code == IX86_BUILTIN_PCMPESTRI128)
34375     {
34376       if (optimize || !target
34377 	  || GET_MODE (target) != tmode0
34378 	  || !insn_data[d->icode].operand[0].predicate (target, tmode0))
34379 	target = gen_reg_rtx (tmode0);
34380 
34381       scratch1 = gen_reg_rtx (tmode1);
34382 
34383       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
34384     }
34385   else if (d->code == IX86_BUILTIN_PCMPESTRM128)
34386     {
34387       if (optimize || !target
34388 	  || GET_MODE (target) != tmode1
34389 	  || !insn_data[d->icode].operand[1].predicate (target, tmode1))
34390 	target = gen_reg_rtx (tmode1);
34391 
34392       scratch0 = gen_reg_rtx (tmode0);
34393 
34394       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
34395     }
34396   else
34397     {
34398       gcc_assert (d->flag);
34399 
34400       scratch0 = gen_reg_rtx (tmode0);
34401       scratch1 = gen_reg_rtx (tmode1);
34402 
34403       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
34404     }
34405 
34406   if (! pat)
34407     return 0;
34408 
34409   emit_insn (pat);
34410 
34411   if (d->flag)
34412     {
34413       target = gen_reg_rtx (SImode);
34414       emit_move_insn (target, const0_rtx);
34415       target = gen_rtx_SUBREG (QImode, target, 0);
34416 
34417       emit_insn
34418 	(gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34419 		      gen_rtx_fmt_ee (EQ, QImode,
34420 				      gen_rtx_REG ((machine_mode) d->flag,
34421 						   FLAGS_REG),
34422 				      const0_rtx)));
34423       return SUBREG_REG (target);
34424     }
34425   else
34426     return target;
34427 }
34428 
34429 
34430 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
34431 
34432 static rtx
34433 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
34434 			  tree exp, rtx target)
34435 {
34436   rtx pat;
34437   tree arg0 = CALL_EXPR_ARG (exp, 0);
34438   tree arg1 = CALL_EXPR_ARG (exp, 1);
34439   tree arg2 = CALL_EXPR_ARG (exp, 2);
34440   rtx scratch0, scratch1;
34441   rtx op0 = expand_normal (arg0);
34442   rtx op1 = expand_normal (arg1);
34443   rtx op2 = expand_normal (arg2);
34444   machine_mode tmode0, tmode1, modev2, modev3, modeimm;
34445 
34446   tmode0 = insn_data[d->icode].operand[0].mode;
34447   tmode1 = insn_data[d->icode].operand[1].mode;
34448   modev2 = insn_data[d->icode].operand[2].mode;
34449   modev3 = insn_data[d->icode].operand[3].mode;
34450   modeimm = insn_data[d->icode].operand[4].mode;
34451 
34452   if (VECTOR_MODE_P (modev2))
34453     op0 = safe_vector_operand (op0, modev2);
34454   if (VECTOR_MODE_P (modev3))
34455     op1 = safe_vector_operand (op1, modev3);
34456 
34457   if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
34458     op0 = copy_to_mode_reg (modev2, op0);
34459   if ((optimize && !register_operand (op1, modev3))
34460       || !insn_data[d->icode].operand[3].predicate (op1, modev3))
34461     op1 = copy_to_mode_reg (modev3, op1);
34462 
34463   if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
34464     {
34465       error ("the third argument must be an 8-bit immediate");
34466       return const0_rtx;
34467     }
34468 
34469   if (d->code == IX86_BUILTIN_PCMPISTRI128)
34470     {
34471       if (optimize || !target
34472 	  || GET_MODE (target) != tmode0
34473 	  || !insn_data[d->icode].operand[0].predicate (target, tmode0))
34474 	target = gen_reg_rtx (tmode0);
34475 
34476       scratch1 = gen_reg_rtx (tmode1);
34477 
34478       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
34479     }
34480   else if (d->code == IX86_BUILTIN_PCMPISTRM128)
34481     {
34482       if (optimize || !target
34483 	  || GET_MODE (target) != tmode1
34484 	  || !insn_data[d->icode].operand[1].predicate (target, tmode1))
34485 	target = gen_reg_rtx (tmode1);
34486 
34487       scratch0 = gen_reg_rtx (tmode0);
34488 
34489       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
34490     }
34491   else
34492     {
34493       gcc_assert (d->flag);
34494 
34495       scratch0 = gen_reg_rtx (tmode0);
34496       scratch1 = gen_reg_rtx (tmode1);
34497 
34498       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
34499     }
34500 
34501   if (! pat)
34502     return 0;
34503 
34504   emit_insn (pat);
34505 
34506   if (d->flag)
34507     {
34508       target = gen_reg_rtx (SImode);
34509       emit_move_insn (target, const0_rtx);
34510       target = gen_rtx_SUBREG (QImode, target, 0);
34511 
34512       emit_insn
34513 	(gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34514 		      gen_rtx_fmt_ee (EQ, QImode,
34515 				      gen_rtx_REG ((machine_mode) d->flag,
34516 						   FLAGS_REG),
34517 				      const0_rtx)));
34518       return SUBREG_REG (target);
34519     }
34520   else
34521     return target;
34522 }
34523 
34524 /* Subroutine of ix86_expand_builtin to take care of insns with
34525    variable number of operands.  */
34526 
34527 static rtx
34528 ix86_expand_args_builtin (const struct builtin_description *d,
34529 			  tree exp, rtx target)
34530 {
34531   rtx pat, real_target;
34532   unsigned int i, nargs;
34533   unsigned int nargs_constant = 0;
34534   unsigned int mask_pos = 0;
34535   int num_memory = 0;
34536   struct
34537     {
34538       rtx op;
34539       machine_mode mode;
34540     } args[6];
34541   bool second_arg_count = false;
34542   enum insn_code icode = d->icode;
34543   const struct insn_data_d *insn_p = &insn_data[icode];
34544   machine_mode tmode = insn_p->operand[0].mode;
34545   machine_mode rmode = VOIDmode;
34546   bool swap = false;
34547   enum rtx_code comparison = d->comparison;
34548 
34549   switch ((enum ix86_builtin_func_type) d->flag)
34550     {
34551     case V2DF_FTYPE_V2DF_ROUND:
34552     case V4DF_FTYPE_V4DF_ROUND:
34553     case V8DF_FTYPE_V8DF_ROUND:
34554     case V4SF_FTYPE_V4SF_ROUND:
34555     case V8SF_FTYPE_V8SF_ROUND:
34556     case V16SF_FTYPE_V16SF_ROUND:
34557     case V4SI_FTYPE_V4SF_ROUND:
34558     case V8SI_FTYPE_V8SF_ROUND:
34559     case V16SI_FTYPE_V16SF_ROUND:
34560       return ix86_expand_sse_round (d, exp, target);
34561     case V4SI_FTYPE_V2DF_V2DF_ROUND:
34562     case V8SI_FTYPE_V4DF_V4DF_ROUND:
34563     case V16SI_FTYPE_V8DF_V8DF_ROUND:
34564       return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
34565     case INT_FTYPE_V8SF_V8SF_PTEST:
34566     case INT_FTYPE_V4DI_V4DI_PTEST:
34567     case INT_FTYPE_V4DF_V4DF_PTEST:
34568     case INT_FTYPE_V4SF_V4SF_PTEST:
34569     case INT_FTYPE_V2DI_V2DI_PTEST:
34570     case INT_FTYPE_V2DF_V2DF_PTEST:
34571       return ix86_expand_sse_ptest (d, exp, target);
34572     case FLOAT128_FTYPE_FLOAT128:
34573     case FLOAT_FTYPE_FLOAT:
34574     case INT_FTYPE_INT:
34575     case UINT_FTYPE_UINT:
34576     case UINT16_FTYPE_UINT16:
34577     case UINT64_FTYPE_INT:
34578     case UINT64_FTYPE_UINT64:
34579     case INT64_FTYPE_INT64:
34580     case INT64_FTYPE_V4SF:
34581     case INT64_FTYPE_V2DF:
34582     case INT_FTYPE_V16QI:
34583     case INT_FTYPE_V8QI:
34584     case INT_FTYPE_V8SF:
34585     case INT_FTYPE_V4DF:
34586     case INT_FTYPE_V4SF:
34587     case INT_FTYPE_V2DF:
34588     case INT_FTYPE_V32QI:
34589     case V16QI_FTYPE_V16QI:
34590     case V8SI_FTYPE_V8SF:
34591     case V8SI_FTYPE_V4SI:
34592     case V8HI_FTYPE_V8HI:
34593     case V8HI_FTYPE_V16QI:
34594     case V8QI_FTYPE_V8QI:
34595     case V8SF_FTYPE_V8SF:
34596     case V8SF_FTYPE_V8SI:
34597     case V8SF_FTYPE_V4SF:
34598     case V8SF_FTYPE_V8HI:
34599     case V4SI_FTYPE_V4SI:
34600     case V4SI_FTYPE_V16QI:
34601     case V4SI_FTYPE_V4SF:
34602     case V4SI_FTYPE_V8SI:
34603     case V4SI_FTYPE_V8HI:
34604     case V4SI_FTYPE_V4DF:
34605     case V4SI_FTYPE_V2DF:
34606     case V4HI_FTYPE_V4HI:
34607     case V4DF_FTYPE_V4DF:
34608     case V4DF_FTYPE_V4SI:
34609     case V4DF_FTYPE_V4SF:
34610     case V4DF_FTYPE_V2DF:
34611     case V4SF_FTYPE_V4SF:
34612     case V4SF_FTYPE_V4SI:
34613     case V4SF_FTYPE_V8SF:
34614     case V4SF_FTYPE_V4DF:
34615     case V4SF_FTYPE_V8HI:
34616     case V4SF_FTYPE_V2DF:
34617     case V2DI_FTYPE_V2DI:
34618     case V2DI_FTYPE_V16QI:
34619     case V2DI_FTYPE_V8HI:
34620     case V2DI_FTYPE_V4SI:
34621     case V2DF_FTYPE_V2DF:
34622     case V2DF_FTYPE_V4SI:
34623     case V2DF_FTYPE_V4DF:
34624     case V2DF_FTYPE_V4SF:
34625     case V2DF_FTYPE_V2SI:
34626     case V2SI_FTYPE_V2SI:
34627     case V2SI_FTYPE_V4SF:
34628     case V2SI_FTYPE_V2SF:
34629     case V2SI_FTYPE_V2DF:
34630     case V2SF_FTYPE_V2SF:
34631     case V2SF_FTYPE_V2SI:
34632     case V32QI_FTYPE_V32QI:
34633     case V32QI_FTYPE_V16QI:
34634     case V16HI_FTYPE_V16HI:
34635     case V16HI_FTYPE_V8HI:
34636     case V8SI_FTYPE_V8SI:
34637     case V16HI_FTYPE_V16QI:
34638     case V8SI_FTYPE_V16QI:
34639     case V4DI_FTYPE_V16QI:
34640     case V8SI_FTYPE_V8HI:
34641     case V4DI_FTYPE_V8HI:
34642     case V4DI_FTYPE_V4SI:
34643     case V4DI_FTYPE_V2DI:
34644     case UQI_FTYPE_UQI:
34645     case UHI_FTYPE_UHI:
34646     case USI_FTYPE_USI:
34647     case USI_FTYPE_UQI:
34648     case USI_FTYPE_UHI:
34649     case UDI_FTYPE_UDI:
34650     case UHI_FTYPE_V16QI:
34651     case USI_FTYPE_V32QI:
34652     case UDI_FTYPE_V64QI:
34653     case V16QI_FTYPE_UHI:
34654     case V32QI_FTYPE_USI:
34655     case V64QI_FTYPE_UDI:
34656     case V8HI_FTYPE_UQI:
34657     case V16HI_FTYPE_UHI:
34658     case V32HI_FTYPE_USI:
34659     case V4SI_FTYPE_UQI:
34660     case V8SI_FTYPE_UQI:
34661     case V4SI_FTYPE_UHI:
34662     case V8SI_FTYPE_UHI:
34663     case UQI_FTYPE_V8HI:
34664     case UHI_FTYPE_V16HI:
34665     case USI_FTYPE_V32HI:
34666     case UQI_FTYPE_V4SI:
34667     case UQI_FTYPE_V8SI:
34668     case UHI_FTYPE_V16SI:
34669     case UQI_FTYPE_V2DI:
34670     case UQI_FTYPE_V4DI:
34671     case UQI_FTYPE_V8DI:
34672     case V16SI_FTYPE_UHI:
34673     case V2DI_FTYPE_UQI:
34674     case V4DI_FTYPE_UQI:
34675     case V16SI_FTYPE_INT:
34676     case V16SF_FTYPE_V8SF:
34677     case V16SI_FTYPE_V8SI:
34678     case V16SF_FTYPE_V4SF:
34679     case V16SI_FTYPE_V4SI:
34680     case V16SI_FTYPE_V16SF:
34681     case V16SI_FTYPE_V16SI:
34682     case V64QI_FTYPE_V64QI:
34683     case V32HI_FTYPE_V32HI:
34684     case V16SF_FTYPE_V16SF:
34685     case V8DI_FTYPE_UQI:
34686     case V8DI_FTYPE_V8DI:
34687     case V8DF_FTYPE_V4DF:
34688     case V8DF_FTYPE_V2DF:
34689     case V8DF_FTYPE_V8DF:
34690     case V4DI_FTYPE_V4DI:
34691       nargs = 1;
34692       break;
34693     case V4SF_FTYPE_V4SF_VEC_MERGE:
34694     case V2DF_FTYPE_V2DF_VEC_MERGE:
34695       return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
34696     case FLOAT128_FTYPE_FLOAT128_FLOAT128:
34697     case V16QI_FTYPE_V16QI_V16QI:
34698     case V16QI_FTYPE_V8HI_V8HI:
34699     case V16SF_FTYPE_V16SF_V16SF:
34700     case V8QI_FTYPE_V8QI_V8QI:
34701     case V8QI_FTYPE_V4HI_V4HI:
34702     case V8HI_FTYPE_V8HI_V8HI:
34703     case V8HI_FTYPE_V16QI_V16QI:
34704     case V8HI_FTYPE_V4SI_V4SI:
34705     case V8SF_FTYPE_V8SF_V8SF:
34706     case V8SF_FTYPE_V8SF_V8SI:
34707     case V8DF_FTYPE_V8DF_V8DF:
34708     case V4SI_FTYPE_V4SI_V4SI:
34709     case V4SI_FTYPE_V8HI_V8HI:
34710     case V4SI_FTYPE_V2DF_V2DF:
34711     case V4HI_FTYPE_V4HI_V4HI:
34712     case V4HI_FTYPE_V8QI_V8QI:
34713     case V4HI_FTYPE_V2SI_V2SI:
34714     case V4DF_FTYPE_V4DF_V4DF:
34715     case V4DF_FTYPE_V4DF_V4DI:
34716     case V4SF_FTYPE_V4SF_V4SF:
34717     case V4SF_FTYPE_V4SF_V4SI:
34718     case V4SF_FTYPE_V4SF_V2SI:
34719     case V4SF_FTYPE_V4SF_V2DF:
34720     case V4SF_FTYPE_V4SF_UINT:
34721     case V4SF_FTYPE_V4SF_DI:
34722     case V4SF_FTYPE_V4SF_SI:
34723     case V2DI_FTYPE_V2DI_V2DI:
34724     case V2DI_FTYPE_V16QI_V16QI:
34725     case V2DI_FTYPE_V4SI_V4SI:
34726     case V2DI_FTYPE_V2DI_V16QI:
34727     case V2SI_FTYPE_V2SI_V2SI:
34728     case V2SI_FTYPE_V4HI_V4HI:
34729     case V2SI_FTYPE_V2SF_V2SF:
34730     case V2DF_FTYPE_V2DF_V2DF:
34731     case V2DF_FTYPE_V2DF_V4SF:
34732     case V2DF_FTYPE_V2DF_V2DI:
34733     case V2DF_FTYPE_V2DF_DI:
34734     case V2DF_FTYPE_V2DF_SI:
34735     case V2DF_FTYPE_V2DF_UINT:
34736     case V2SF_FTYPE_V2SF_V2SF:
34737     case V1DI_FTYPE_V1DI_V1DI:
34738     case V1DI_FTYPE_V8QI_V8QI:
34739     case V1DI_FTYPE_V2SI_V2SI:
34740     case V32QI_FTYPE_V16HI_V16HI:
34741     case V16HI_FTYPE_V8SI_V8SI:
34742     case V64QI_FTYPE_V64QI_V64QI:
34743     case V32QI_FTYPE_V32QI_V32QI:
34744     case V16HI_FTYPE_V32QI_V32QI:
34745     case V16HI_FTYPE_V16HI_V16HI:
34746     case V8SI_FTYPE_V4DF_V4DF:
34747     case V8SI_FTYPE_V8SI_V8SI:
34748     case V8SI_FTYPE_V16HI_V16HI:
34749     case V4DI_FTYPE_V4DI_V4DI:
34750     case V4DI_FTYPE_V8SI_V8SI:
34751     case V8DI_FTYPE_V64QI_V64QI:
34752       if (comparison == UNKNOWN)
34753 	return ix86_expand_binop_builtin (icode, exp, target);
34754       nargs = 2;
34755       break;
34756     case V4SF_FTYPE_V4SF_V4SF_SWAP:
34757     case V2DF_FTYPE_V2DF_V2DF_SWAP:
34758       gcc_assert (comparison != UNKNOWN);
34759       nargs = 2;
34760       swap = true;
34761       break;
34762     case V16HI_FTYPE_V16HI_V8HI_COUNT:
34763     case V16HI_FTYPE_V16HI_SI_COUNT:
34764     case V8SI_FTYPE_V8SI_V4SI_COUNT:
34765     case V8SI_FTYPE_V8SI_SI_COUNT:
34766     case V4DI_FTYPE_V4DI_V2DI_COUNT:
34767     case V4DI_FTYPE_V4DI_INT_COUNT:
34768     case V8HI_FTYPE_V8HI_V8HI_COUNT:
34769     case V8HI_FTYPE_V8HI_SI_COUNT:
34770     case V4SI_FTYPE_V4SI_V4SI_COUNT:
34771     case V4SI_FTYPE_V4SI_SI_COUNT:
34772     case V4HI_FTYPE_V4HI_V4HI_COUNT:
34773     case V4HI_FTYPE_V4HI_SI_COUNT:
34774     case V2DI_FTYPE_V2DI_V2DI_COUNT:
34775     case V2DI_FTYPE_V2DI_SI_COUNT:
34776     case V2SI_FTYPE_V2SI_V2SI_COUNT:
34777     case V2SI_FTYPE_V2SI_SI_COUNT:
34778     case V1DI_FTYPE_V1DI_V1DI_COUNT:
34779     case V1DI_FTYPE_V1DI_SI_COUNT:
34780       nargs = 2;
34781       second_arg_count = true;
34782       break;
34783     case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT:
34784     case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT:
34785     case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT:
34786     case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT:
34787     case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT:
34788     case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT:
34789     case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT:
34790     case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT:
34791     case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT:
34792     case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT:
34793     case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT:
34794     case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT:
34795     case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT:
34796     case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT:
34797     case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT:
34798     case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT:
34799     case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT:
34800     case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT:
34801       nargs = 4;
34802       second_arg_count = true;
34803       break;
34804     case UINT64_FTYPE_UINT64_UINT64:
34805     case UINT_FTYPE_UINT_UINT:
34806     case UINT_FTYPE_UINT_USHORT:
34807     case UINT_FTYPE_UINT_UCHAR:
34808     case UINT16_FTYPE_UINT16_INT:
34809     case UINT8_FTYPE_UINT8_INT:
34810     case UQI_FTYPE_UQI_UQI:
34811     case UHI_FTYPE_UHI_UHI:
34812     case USI_FTYPE_USI_USI:
34813     case UDI_FTYPE_UDI_UDI:
34814     case V16SI_FTYPE_V8DF_V8DF:
34815       nargs = 2;
34816       break;
34817     case V2DI_FTYPE_V2DI_INT_CONVERT:
34818       nargs = 2;
34819       rmode = V1TImode;
34820       nargs_constant = 1;
34821       break;
34822     case V4DI_FTYPE_V4DI_INT_CONVERT:
34823       nargs = 2;
34824       rmode = V2TImode;
34825       nargs_constant = 1;
34826       break;
34827     case V8DI_FTYPE_V8DI_INT_CONVERT:
34828       nargs = 2;
34829       rmode = V4TImode;
34830       nargs_constant = 1;
34831       break;
34832     case V8HI_FTYPE_V8HI_INT:
34833     case V8HI_FTYPE_V8SF_INT:
34834     case V16HI_FTYPE_V16SF_INT:
34835     case V8HI_FTYPE_V4SF_INT:
34836     case V8SF_FTYPE_V8SF_INT:
34837     case V4SF_FTYPE_V16SF_INT:
34838     case V16SF_FTYPE_V16SF_INT:
34839     case V4SI_FTYPE_V4SI_INT:
34840     case V4SI_FTYPE_V8SI_INT:
34841     case V4HI_FTYPE_V4HI_INT:
34842     case V4DF_FTYPE_V4DF_INT:
34843     case V4DF_FTYPE_V8DF_INT:
34844     case V4SF_FTYPE_V4SF_INT:
34845     case V4SF_FTYPE_V8SF_INT:
34846     case V2DI_FTYPE_V2DI_INT:
34847     case V2DF_FTYPE_V2DF_INT:
34848     case V2DF_FTYPE_V4DF_INT:
34849     case V16HI_FTYPE_V16HI_INT:
34850     case V8SI_FTYPE_V8SI_INT:
34851     case V16SI_FTYPE_V16SI_INT:
34852     case V4SI_FTYPE_V16SI_INT:
34853     case V4DI_FTYPE_V4DI_INT:
34854     case V2DI_FTYPE_V4DI_INT:
34855     case V4DI_FTYPE_V8DI_INT:
34856     case QI_FTYPE_V4SF_INT:
34857     case QI_FTYPE_V2DF_INT:
34858     case UQI_FTYPE_UQI_UQI_CONST:
34859     case UHI_FTYPE_UHI_UQI:
34860     case USI_FTYPE_USI_UQI:
34861     case UDI_FTYPE_UDI_UQI:
34862       nargs = 2;
34863       nargs_constant = 1;
34864       break;
34865     case V16QI_FTYPE_V16QI_V16QI_V16QI:
34866     case V8SF_FTYPE_V8SF_V8SF_V8SF:
34867     case V4DF_FTYPE_V4DF_V4DF_V4DF:
34868     case V4SF_FTYPE_V4SF_V4SF_V4SF:
34869     case V2DF_FTYPE_V2DF_V2DF_V2DF:
34870     case V32QI_FTYPE_V32QI_V32QI_V32QI:
34871     case UHI_FTYPE_V16SI_V16SI_UHI:
34872     case UQI_FTYPE_V8DI_V8DI_UQI:
34873     case V16HI_FTYPE_V16SI_V16HI_UHI:
34874     case V16QI_FTYPE_V16SI_V16QI_UHI:
34875     case V16QI_FTYPE_V8DI_V16QI_UQI:
34876     case V16SF_FTYPE_V16SF_V16SF_UHI:
34877     case V16SF_FTYPE_V4SF_V16SF_UHI:
34878     case V16SI_FTYPE_SI_V16SI_UHI:
34879     case V16SI_FTYPE_V16HI_V16SI_UHI:
34880     case V16SI_FTYPE_V16QI_V16SI_UHI:
34881     case V8SF_FTYPE_V4SF_V8SF_UQI:
34882     case V4DF_FTYPE_V2DF_V4DF_UQI:
34883     case V8SI_FTYPE_V4SI_V8SI_UQI:
34884     case V8SI_FTYPE_SI_V8SI_UQI:
34885     case V4SI_FTYPE_V4SI_V4SI_UQI:
34886     case V4SI_FTYPE_SI_V4SI_UQI:
34887     case V4DI_FTYPE_V2DI_V4DI_UQI:
34888     case V4DI_FTYPE_DI_V4DI_UQI:
34889     case V2DI_FTYPE_V2DI_V2DI_UQI:
34890     case V2DI_FTYPE_DI_V2DI_UQI:
34891     case V64QI_FTYPE_V64QI_V64QI_UDI:
34892     case V64QI_FTYPE_V16QI_V64QI_UDI:
34893     case V64QI_FTYPE_QI_V64QI_UDI:
34894     case V32QI_FTYPE_V32QI_V32QI_USI:
34895     case V32QI_FTYPE_V16QI_V32QI_USI:
34896     case V32QI_FTYPE_QI_V32QI_USI:
34897     case V16QI_FTYPE_V16QI_V16QI_UHI:
34898     case V16QI_FTYPE_QI_V16QI_UHI:
34899     case V32HI_FTYPE_V8HI_V32HI_USI:
34900     case V32HI_FTYPE_HI_V32HI_USI:
34901     case V16HI_FTYPE_V8HI_V16HI_UHI:
34902     case V16HI_FTYPE_HI_V16HI_UHI:
34903     case V8HI_FTYPE_V8HI_V8HI_UQI:
34904     case V8HI_FTYPE_HI_V8HI_UQI:
34905     case V8SF_FTYPE_V8HI_V8SF_UQI:
34906     case V4SF_FTYPE_V8HI_V4SF_UQI:
34907     case V8SI_FTYPE_V8SF_V8SI_UQI:
34908     case V4SI_FTYPE_V4SF_V4SI_UQI:
34909     case V4DI_FTYPE_V4SF_V4DI_UQI:
34910     case V2DI_FTYPE_V4SF_V2DI_UQI:
34911     case V4SF_FTYPE_V4DI_V4SF_UQI:
34912     case V4SF_FTYPE_V2DI_V4SF_UQI:
34913     case V4DF_FTYPE_V4DI_V4DF_UQI:
34914     case V2DF_FTYPE_V2DI_V2DF_UQI:
34915     case V16QI_FTYPE_V8HI_V16QI_UQI:
34916     case V16QI_FTYPE_V16HI_V16QI_UHI:
34917     case V16QI_FTYPE_V4SI_V16QI_UQI:
34918     case V16QI_FTYPE_V8SI_V16QI_UQI:
34919     case V8HI_FTYPE_V4SI_V8HI_UQI:
34920     case V8HI_FTYPE_V8SI_V8HI_UQI:
34921     case V16QI_FTYPE_V2DI_V16QI_UQI:
34922     case V16QI_FTYPE_V4DI_V16QI_UQI:
34923     case V8HI_FTYPE_V2DI_V8HI_UQI:
34924     case V8HI_FTYPE_V4DI_V8HI_UQI:
34925     case V4SI_FTYPE_V2DI_V4SI_UQI:
34926     case V4SI_FTYPE_V4DI_V4SI_UQI:
34927     case V32QI_FTYPE_V32HI_V32QI_USI:
34928     case UHI_FTYPE_V16QI_V16QI_UHI:
34929     case USI_FTYPE_V32QI_V32QI_USI:
34930     case UDI_FTYPE_V64QI_V64QI_UDI:
34931     case UQI_FTYPE_V8HI_V8HI_UQI:
34932     case UHI_FTYPE_V16HI_V16HI_UHI:
34933     case USI_FTYPE_V32HI_V32HI_USI:
34934     case UQI_FTYPE_V4SI_V4SI_UQI:
34935     case UQI_FTYPE_V8SI_V8SI_UQI:
34936     case UQI_FTYPE_V2DI_V2DI_UQI:
34937     case UQI_FTYPE_V4DI_V4DI_UQI:
34938     case V4SF_FTYPE_V2DF_V4SF_UQI:
34939     case V4SF_FTYPE_V4DF_V4SF_UQI:
34940     case V16SI_FTYPE_V16SI_V16SI_UHI:
34941     case V16SI_FTYPE_V4SI_V16SI_UHI:
34942     case V2DI_FTYPE_V4SI_V2DI_UQI:
34943     case V2DI_FTYPE_V8HI_V2DI_UQI:
34944     case V2DI_FTYPE_V16QI_V2DI_UQI:
34945     case V4DI_FTYPE_V4DI_V4DI_UQI:
34946     case V4DI_FTYPE_V4SI_V4DI_UQI:
34947     case V4DI_FTYPE_V8HI_V4DI_UQI:
34948     case V4DI_FTYPE_V16QI_V4DI_UQI:
34949     case V4DI_FTYPE_V4DF_V4DI_UQI:
34950     case V2DI_FTYPE_V2DF_V2DI_UQI:
34951     case V4SI_FTYPE_V4DF_V4SI_UQI:
34952     case V4SI_FTYPE_V2DF_V4SI_UQI:
34953     case V4SI_FTYPE_V8HI_V4SI_UQI:
34954     case V4SI_FTYPE_V16QI_V4SI_UQI:
34955     case V4DI_FTYPE_V4DI_V4DI_V4DI:
34956     case V8DF_FTYPE_V2DF_V8DF_UQI:
34957     case V8DF_FTYPE_V4DF_V8DF_UQI:
34958     case V8DF_FTYPE_V8DF_V8DF_UQI:
34959     case V8SF_FTYPE_V8SF_V8SF_UQI:
34960     case V8SF_FTYPE_V8SI_V8SF_UQI:
34961     case V4DF_FTYPE_V4DF_V4DF_UQI:
34962     case V4SF_FTYPE_V4SF_V4SF_UQI:
34963     case V2DF_FTYPE_V2DF_V2DF_UQI:
34964     case V2DF_FTYPE_V4SF_V2DF_UQI:
34965     case V2DF_FTYPE_V4SI_V2DF_UQI:
34966     case V4SF_FTYPE_V4SI_V4SF_UQI:
34967     case V4DF_FTYPE_V4SF_V4DF_UQI:
34968     case V4DF_FTYPE_V4SI_V4DF_UQI:
34969     case V8SI_FTYPE_V8SI_V8SI_UQI:
34970     case V8SI_FTYPE_V8HI_V8SI_UQI:
34971     case V8SI_FTYPE_V16QI_V8SI_UQI:
34972     case V8DF_FTYPE_V8SI_V8DF_UQI:
34973     case V8DI_FTYPE_DI_V8DI_UQI:
34974     case V16SF_FTYPE_V8SF_V16SF_UHI:
34975     case V16SI_FTYPE_V8SI_V16SI_UHI:
34976     case V16HI_FTYPE_V16HI_V16HI_UHI:
34977     case V8HI_FTYPE_V16QI_V8HI_UQI:
34978     case V16HI_FTYPE_V16QI_V16HI_UHI:
34979     case V32HI_FTYPE_V32HI_V32HI_USI:
34980     case V32HI_FTYPE_V32QI_V32HI_USI:
34981     case V8DI_FTYPE_V16QI_V8DI_UQI:
34982     case V8DI_FTYPE_V2DI_V8DI_UQI:
34983     case V8DI_FTYPE_V4DI_V8DI_UQI:
34984     case V8DI_FTYPE_V8DI_V8DI_UQI:
34985     case V8DI_FTYPE_V8HI_V8DI_UQI:
34986     case V8DI_FTYPE_V8SI_V8DI_UQI:
34987     case V8HI_FTYPE_V8DI_V8HI_UQI:
34988     case V8SI_FTYPE_V8DI_V8SI_UQI:
34989     case V4SI_FTYPE_V4SI_V4SI_V4SI:
34990     case V16SI_FTYPE_V16SI_V16SI_V16SI:
34991     case V8DI_FTYPE_V8DI_V8DI_V8DI:
34992     case V32HI_FTYPE_V32HI_V32HI_V32HI:
34993     case V2DI_FTYPE_V2DI_V2DI_V2DI:
34994     case V16HI_FTYPE_V16HI_V16HI_V16HI:
34995     case V8SI_FTYPE_V8SI_V8SI_V8SI:
34996     case V8HI_FTYPE_V8HI_V8HI_V8HI:
34997       nargs = 3;
34998       break;
34999     case V32QI_FTYPE_V32QI_V32QI_INT:
35000     case V16HI_FTYPE_V16HI_V16HI_INT:
35001     case V16QI_FTYPE_V16QI_V16QI_INT:
35002     case V4DI_FTYPE_V4DI_V4DI_INT:
35003     case V8HI_FTYPE_V8HI_V8HI_INT:
35004     case V8SI_FTYPE_V8SI_V8SI_INT:
35005     case V8SI_FTYPE_V8SI_V4SI_INT:
35006     case V8SF_FTYPE_V8SF_V8SF_INT:
35007     case V8SF_FTYPE_V8SF_V4SF_INT:
35008     case V4SI_FTYPE_V4SI_V4SI_INT:
35009     case V4DF_FTYPE_V4DF_V4DF_INT:
35010     case V16SF_FTYPE_V16SF_V16SF_INT:
35011     case V16SF_FTYPE_V16SF_V4SF_INT:
35012     case V16SI_FTYPE_V16SI_V4SI_INT:
35013     case V4DF_FTYPE_V4DF_V2DF_INT:
35014     case V4SF_FTYPE_V4SF_V4SF_INT:
35015     case V2DI_FTYPE_V2DI_V2DI_INT:
35016     case V4DI_FTYPE_V4DI_V2DI_INT:
35017     case V2DF_FTYPE_V2DF_V2DF_INT:
35018     case UQI_FTYPE_V8DI_V8UDI_INT:
35019     case UQI_FTYPE_V8DF_V8DF_INT:
35020     case UQI_FTYPE_V2DF_V2DF_INT:
35021     case UQI_FTYPE_V4SF_V4SF_INT:
35022     case UHI_FTYPE_V16SI_V16SI_INT:
35023     case UHI_FTYPE_V16SF_V16SF_INT:
35024     case V64QI_FTYPE_V64QI_V64QI_INT:
35025     case V32HI_FTYPE_V32HI_V32HI_INT:
35026     case V16SI_FTYPE_V16SI_V16SI_INT:
35027     case V8DI_FTYPE_V8DI_V8DI_INT:
35028       nargs = 3;
35029       nargs_constant = 1;
35030       break;
35031     case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
35032       nargs = 3;
35033       rmode = V4DImode;
35034       nargs_constant = 1;
35035       break;
35036     case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
35037       nargs = 3;
35038       rmode = V2DImode;
35039       nargs_constant = 1;
35040       break;
35041     case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
35042       nargs = 3;
35043       rmode = DImode;
35044       nargs_constant = 1;
35045       break;
35046     case V2DI_FTYPE_V2DI_UINT_UINT:
35047       nargs = 3;
35048       nargs_constant = 2;
35049       break;
35050     case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
35051       nargs = 3;
35052       rmode = V8DImode;
35053       nargs_constant = 1;
35054       break;
35055     case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
35056       nargs = 5;
35057       rmode = V8DImode;
35058       mask_pos = 2;
35059       nargs_constant = 1;
35060       break;
35061     case QI_FTYPE_V8DF_INT_UQI:
35062     case QI_FTYPE_V4DF_INT_UQI:
35063     case QI_FTYPE_V2DF_INT_UQI:
35064     case HI_FTYPE_V16SF_INT_UHI:
35065     case QI_FTYPE_V8SF_INT_UQI:
35066     case QI_FTYPE_V4SF_INT_UQI:
35067     case V4SI_FTYPE_V4SI_V4SI_UHI:
35068     case V8SI_FTYPE_V8SI_V8SI_UHI:
35069       nargs = 3;
35070       mask_pos = 1;
35071       nargs_constant = 1;
35072       break;
35073     case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
35074       nargs = 5;
35075       rmode = V4DImode;
35076       mask_pos = 2;
35077       nargs_constant = 1;
35078       break;
35079     case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
35080       nargs = 5;
35081       rmode = V2DImode;
35082       mask_pos = 2;
35083       nargs_constant = 1;
35084       break;
35085     case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
35086     case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
35087     case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
35088     case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
35089     case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
35090     case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
35091     case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
35092     case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
35093     case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
35094     case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
35095     case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
35096     case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
35097     case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
35098     case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
35099     case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
35100     case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
35101     case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
35102     case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
35103     case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
35104     case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
35105     case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
35106     case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
35107     case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
35108     case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
35109     case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
35110     case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
35111     case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
35112     case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
35113     case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
35114     case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
35115     case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
35116     case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
35117     case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
35118     case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
35119     case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
35120     case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
35121     case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
35122     case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
35123     case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
35124     case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
35125     case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
35126     case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
35127     case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
35128     case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
35129     case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
35130     case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
35131     case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
35132     case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
35133     case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
35134     case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
35135     case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
35136       nargs = 4;
35137       break;
35138     case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
35139     case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
35140     case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
35141     case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
35142     case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
35143       nargs = 4;
35144       nargs_constant = 1;
35145       break;
35146     case UQI_FTYPE_V4DI_V4DI_INT_UQI:
35147     case UQI_FTYPE_V8SI_V8SI_INT_UQI:
35148     case QI_FTYPE_V4DF_V4DF_INT_UQI:
35149     case QI_FTYPE_V8SF_V8SF_INT_UQI:
35150     case UQI_FTYPE_V2DI_V2DI_INT_UQI:
35151     case UQI_FTYPE_V4SI_V4SI_INT_UQI:
35152     case UQI_FTYPE_V2DF_V2DF_INT_UQI:
35153     case UQI_FTYPE_V4SF_V4SF_INT_UQI:
35154     case UDI_FTYPE_V64QI_V64QI_INT_UDI:
35155     case USI_FTYPE_V32QI_V32QI_INT_USI:
35156     case UHI_FTYPE_V16QI_V16QI_INT_UHI:
35157     case USI_FTYPE_V32HI_V32HI_INT_USI:
35158     case UHI_FTYPE_V16HI_V16HI_INT_UHI:
35159     case UQI_FTYPE_V8HI_V8HI_INT_UQI:
35160     case V32HI_FTYPE_V32HI_V32HI_V32HI_INT:
35161     case V16HI_FTYPE_V16HI_V16HI_V16HI_INT:
35162     case V8HI_FTYPE_V8HI_V8HI_V8HI_INT:
35163     case V8SI_FTYPE_V8SI_V8SI_V8SI_INT:
35164     case V4DI_FTYPE_V4DI_V4DI_V4DI_INT:
35165     case V8DI_FTYPE_V8DI_V8DI_V8DI_INT:
35166     case V16SI_FTYPE_V16SI_V16SI_V16SI_INT:
35167     case V2DI_FTYPE_V2DI_V2DI_V2DI_INT:
35168     case V4SI_FTYPE_V4SI_V4SI_V4SI_INT:
35169       nargs = 4;
35170       mask_pos = 1;
35171       nargs_constant = 1;
35172       break;
35173     case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
35174       nargs = 4;
35175       nargs_constant = 2;
35176       break;
35177     case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
35178     case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
35179       nargs = 4;
35180       break;
35181     case UQI_FTYPE_V8DI_V8DI_INT_UQI:
35182     case UHI_FTYPE_V16SI_V16SI_INT_UHI:
35183       mask_pos = 1;
35184       nargs = 4;
35185       nargs_constant = 1;
35186       break;
35187     case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
35188     case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
35189     case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
35190     case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
35191     case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
35192     case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
35193     case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
35194     case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
35195     case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
35196     case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
35197     case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
35198     case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
35199     case V32HI_FTYPE_V32HI_INT_V32HI_USI:
35200     case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
35201     case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
35202     case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
35203     case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
35204     case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
35205     case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
35206     case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
35207     case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
35208     case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
35209     case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
35210     case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
35211     case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
35212     case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
35213     case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
35214     case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
35215     case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
35216     case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
35217       nargs = 4;
35218       mask_pos = 2;
35219       nargs_constant = 1;
35220       break;
35221     case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
35222     case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
35223     case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
35224     case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
35225     case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
35226     case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
35227     case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
35228     case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
35229     case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
35230     case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
35231     case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
35232     case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
35233     case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
35234     case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
35235     case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
35236     case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
35237     case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
35238     case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
35239     case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
35240     case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
35241     case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
35242     case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
35243     case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
35244     case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
35245     case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
35246     case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
35247     case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
35248       nargs = 5;
35249       mask_pos = 2;
35250       nargs_constant = 1;
35251       break;
35252     case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
35253     case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
35254     case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
35255     case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
35256     case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
35257     case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
35258     case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
35259     case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
35260     case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
35261     case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
35262       nargs = 5;
35263       mask_pos = 1;
35264       nargs_constant = 1;
35265       break;
35266     case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI:
35267     case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI:
35268     case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI:
35269     case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT:
35270     case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT:
35271     case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT:
35272     case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT:
35273     case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT:
35274     case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT:
35275     case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT:
35276     case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT:
35277     case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT:
35278       nargs = 5;
35279       mask_pos = 1;
35280       nargs_constant = 2;
35281       break;
35282 
35283     default:
35284       gcc_unreachable ();
35285     }
35286 
35287   gcc_assert (nargs <= ARRAY_SIZE (args));
35288 
35289   if (comparison != UNKNOWN)
35290     {
35291       gcc_assert (nargs == 2);
35292       return ix86_expand_sse_compare (d, exp, target, swap);
35293     }
35294 
35295   if (rmode == VOIDmode || rmode == tmode)
35296     {
35297       if (optimize
35298 	  || target == 0
35299 	  || GET_MODE (target) != tmode
35300 	  || !insn_p->operand[0].predicate (target, tmode))
35301 	target = gen_reg_rtx (tmode);
35302       else if (memory_operand (target, tmode))
35303 	num_memory++;
35304       real_target = target;
35305     }
35306   else
35307     {
35308       real_target = gen_reg_rtx (tmode);
35309       target = lowpart_subreg (rmode, real_target, tmode);
35310     }
35311 
35312   for (i = 0; i < nargs; i++)
35313     {
35314       tree arg = CALL_EXPR_ARG (exp, i);
35315       rtx op = expand_normal (arg);
35316       machine_mode mode = insn_p->operand[i + 1].mode;
35317       bool match = insn_p->operand[i + 1].predicate (op, mode);
35318 
35319       if (second_arg_count && i == 1)
35320 	{
35321 	  /* SIMD shift insns take either an 8-bit immediate or
35322 	     register as count.  But builtin functions take int as
35323 	     count.  If count doesn't match, we put it in register.
35324 	     The instructions are using 64-bit count, if op is just
35325 	     32-bit, zero-extend it, as negative shift counts
35326 	     are undefined behavior and zero-extension is more
35327 	     efficient.  */
35328 	  if (!match)
35329 	    {
35330 	      if (SCALAR_INT_MODE_P (GET_MODE (op)))
35331 		op = convert_modes (mode, GET_MODE (op), op, 1);
35332 	      else
35333 		op = lowpart_subreg (mode, op, GET_MODE (op));
35334 	      if (!insn_p->operand[i + 1].predicate (op, mode))
35335 		op = copy_to_reg (op);
35336 	    }
35337 	}
35338       else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
35339 	       (!mask_pos && (nargs - i) <= nargs_constant))
35340 	{
35341 	  if (!match)
35342 	    switch (icode)
35343 	      {
35344 	      case CODE_FOR_avx_vinsertf128v4di:
35345 	      case CODE_FOR_avx_vextractf128v4di:
35346 		error ("the last argument must be an 1-bit immediate");
35347 		return const0_rtx;
35348 
35349 	      case CODE_FOR_avx512f_cmpv8di3_mask:
35350 	      case CODE_FOR_avx512f_cmpv16si3_mask:
35351 	      case CODE_FOR_avx512f_ucmpv8di3_mask:
35352 	      case CODE_FOR_avx512f_ucmpv16si3_mask:
35353 	      case CODE_FOR_avx512vl_cmpv4di3_mask:
35354 	      case CODE_FOR_avx512vl_cmpv8si3_mask:
35355 	      case CODE_FOR_avx512vl_ucmpv4di3_mask:
35356 	      case CODE_FOR_avx512vl_ucmpv8si3_mask:
35357 	      case CODE_FOR_avx512vl_cmpv2di3_mask:
35358 	      case CODE_FOR_avx512vl_cmpv4si3_mask:
35359 	      case CODE_FOR_avx512vl_ucmpv2di3_mask:
35360 	      case CODE_FOR_avx512vl_ucmpv4si3_mask:
35361 		error ("the last argument must be a 3-bit immediate");
35362 		return const0_rtx;
35363 
35364 	      case CODE_FOR_sse4_1_roundsd:
35365 	      case CODE_FOR_sse4_1_roundss:
35366 
35367 	      case CODE_FOR_sse4_1_roundpd:
35368 	      case CODE_FOR_sse4_1_roundps:
35369 	      case CODE_FOR_avx_roundpd256:
35370 	      case CODE_FOR_avx_roundps256:
35371 
35372 	      case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
35373 	      case CODE_FOR_sse4_1_roundps_sfix:
35374 	      case CODE_FOR_avx_roundpd_vec_pack_sfix256:
35375 	      case CODE_FOR_avx_roundps_sfix256:
35376 
35377 	      case CODE_FOR_sse4_1_blendps:
35378 	      case CODE_FOR_avx_blendpd256:
35379 	      case CODE_FOR_avx_vpermilv4df:
35380 	      case CODE_FOR_avx_vpermilv4df_mask:
35381 	      case CODE_FOR_avx512f_getmantv8df_mask:
35382 	      case CODE_FOR_avx512f_getmantv16sf_mask:
35383 	      case CODE_FOR_avx512vl_getmantv8sf_mask:
35384 	      case CODE_FOR_avx512vl_getmantv4df_mask:
35385 	      case CODE_FOR_avx512vl_getmantv4sf_mask:
35386 	      case CODE_FOR_avx512vl_getmantv2df_mask:
35387 	      case CODE_FOR_avx512dq_rangepv8df_mask_round:
35388 	      case CODE_FOR_avx512dq_rangepv16sf_mask_round:
35389 	      case CODE_FOR_avx512dq_rangepv4df_mask:
35390 	      case CODE_FOR_avx512dq_rangepv8sf_mask:
35391 	      case CODE_FOR_avx512dq_rangepv2df_mask:
35392 	      case CODE_FOR_avx512dq_rangepv4sf_mask:
35393 	      case CODE_FOR_avx_shufpd256_mask:
35394 		error ("the last argument must be a 4-bit immediate");
35395 		return const0_rtx;
35396 
35397 	      case CODE_FOR_sha1rnds4:
35398 	      case CODE_FOR_sse4_1_blendpd:
35399 	      case CODE_FOR_avx_vpermilv2df:
35400 	      case CODE_FOR_avx_vpermilv2df_mask:
35401 	      case CODE_FOR_xop_vpermil2v2df3:
35402 	      case CODE_FOR_xop_vpermil2v4sf3:
35403 	      case CODE_FOR_xop_vpermil2v4df3:
35404 	      case CODE_FOR_xop_vpermil2v8sf3:
35405 	      case CODE_FOR_avx512f_vinsertf32x4_mask:
35406 	      case CODE_FOR_avx512f_vinserti32x4_mask:
35407 	      case CODE_FOR_avx512f_vextractf32x4_mask:
35408 	      case CODE_FOR_avx512f_vextracti32x4_mask:
35409 	      case CODE_FOR_sse2_shufpd:
35410 	      case CODE_FOR_sse2_shufpd_mask:
35411 	      case CODE_FOR_avx512dq_shuf_f64x2_mask:
35412 	      case CODE_FOR_avx512dq_shuf_i64x2_mask:
35413 	      case CODE_FOR_avx512vl_shuf_i32x4_mask:
35414 	      case CODE_FOR_avx512vl_shuf_f32x4_mask:
35415 		error ("the last argument must be a 2-bit immediate");
35416 		return const0_rtx;
35417 
35418 	      case CODE_FOR_avx_vextractf128v4df:
35419 	      case CODE_FOR_avx_vextractf128v8sf:
35420 	      case CODE_FOR_avx_vextractf128v8si:
35421 	      case CODE_FOR_avx_vinsertf128v4df:
35422 	      case CODE_FOR_avx_vinsertf128v8sf:
35423 	      case CODE_FOR_avx_vinsertf128v8si:
35424 	      case CODE_FOR_avx512f_vinsertf64x4_mask:
35425 	      case CODE_FOR_avx512f_vinserti64x4_mask:
35426 	      case CODE_FOR_avx512f_vextractf64x4_mask:
35427 	      case CODE_FOR_avx512f_vextracti64x4_mask:
35428 	      case CODE_FOR_avx512dq_vinsertf32x8_mask:
35429 	      case CODE_FOR_avx512dq_vinserti32x8_mask:
35430 	      case CODE_FOR_avx512vl_vinsertv4df:
35431 	      case CODE_FOR_avx512vl_vinsertv4di:
35432 	      case CODE_FOR_avx512vl_vinsertv8sf:
35433 	      case CODE_FOR_avx512vl_vinsertv8si:
35434 		error ("the last argument must be a 1-bit immediate");
35435 		return const0_rtx;
35436 
35437 	      case CODE_FOR_avx_vmcmpv2df3:
35438 	      case CODE_FOR_avx_vmcmpv4sf3:
35439 	      case CODE_FOR_avx_cmpv2df3:
35440 	      case CODE_FOR_avx_cmpv4sf3:
35441 	      case CODE_FOR_avx_cmpv4df3:
35442 	      case CODE_FOR_avx_cmpv8sf3:
35443 	      case CODE_FOR_avx512f_cmpv8df3_mask:
35444 	      case CODE_FOR_avx512f_cmpv16sf3_mask:
35445 	      case CODE_FOR_avx512f_vmcmpv2df3_mask:
35446 	      case CODE_FOR_avx512f_vmcmpv4sf3_mask:
35447 		error ("the last argument must be a 5-bit immediate");
35448 		return const0_rtx;
35449 
35450 	      default:
35451 		switch (nargs_constant)
35452 		  {
35453 		  case 2:
35454 		    if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
35455 			(!mask_pos && (nargs - i) == nargs_constant))
35456 		      {
35457 			error ("the next to last argument must be an 8-bit immediate");
35458 			break;
35459 		      }
35460 		    /* FALLTHRU */
35461 		  case 1:
35462 		    error ("the last argument must be an 8-bit immediate");
35463 		    break;
35464 		  default:
35465 		    gcc_unreachable ();
35466 		  }
35467 		return const0_rtx;
35468 	      }
35469 	}
35470       else
35471 	{
35472 	  if (VECTOR_MODE_P (mode))
35473 	    op = safe_vector_operand (op, mode);
35474 
35475 	  /* If we aren't optimizing, only allow one memory operand to
35476 	     be generated.  */
35477 	  if (memory_operand (op, mode))
35478 	    num_memory++;
35479 
35480 	  op = fixup_modeless_constant (op, mode);
35481 
35482 	  if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35483 	    {
35484 	      if (optimize || !match || num_memory > 1)
35485 		op = copy_to_mode_reg (mode, op);
35486 	    }
35487 	  else
35488 	    {
35489 	      op = copy_to_reg (op);
35490 	      op = lowpart_subreg (mode, op, GET_MODE (op));
35491 	    }
35492 	}
35493 
35494       args[i].op = op;
35495       args[i].mode = mode;
35496     }
35497 
35498   switch (nargs)
35499     {
35500     case 1:
35501       pat = GEN_FCN (icode) (real_target, args[0].op);
35502       break;
35503     case 2:
35504       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
35505       break;
35506     case 3:
35507       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35508 			     args[2].op);
35509       break;
35510     case 4:
35511       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35512 			     args[2].op, args[3].op);
35513       break;
35514     case 5:
35515       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35516 			     args[2].op, args[3].op, args[4].op);
35517       break;
35518     case 6:
35519       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
35520 			     args[2].op, args[3].op, args[4].op,
35521 			     args[5].op);
35522       break;
35523     default:
35524       gcc_unreachable ();
35525     }
35526 
35527   if (! pat)
35528     return 0;
35529 
35530   emit_insn (pat);
35531   return target;
35532 }
35533 
35534 /* Transform pattern of following layout:
35535      (set A
35536        (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
35537      )
35538    into:
35539      (set (A B)) */
35540 
35541 static rtx
35542 ix86_erase_embedded_rounding (rtx pat)
35543 {
35544   if (GET_CODE (pat) == INSN)
35545     pat = PATTERN (pat);
35546 
35547   gcc_assert (GET_CODE (pat) == SET);
35548   rtx src = SET_SRC (pat);
35549   gcc_assert (XVECLEN (src, 0) == 2);
35550   rtx p0 = XVECEXP (src, 0, 0);
35551   gcc_assert (GET_CODE (src) == UNSPEC
35552 	      && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING);
35553   rtx res = gen_rtx_SET (SET_DEST (pat), p0);
35554   return res;
35555 }
35556 
35557 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
35558    with rounding.  */
35559 static rtx
35560 ix86_expand_sse_comi_round (const struct builtin_description *d,
35561 			    tree exp, rtx target)
35562 {
35563   rtx pat, set_dst;
35564   tree arg0 = CALL_EXPR_ARG (exp, 0);
35565   tree arg1 = CALL_EXPR_ARG (exp, 1);
35566   tree arg2 = CALL_EXPR_ARG (exp, 2);
35567   tree arg3 = CALL_EXPR_ARG (exp, 3);
35568   rtx op0 = expand_normal (arg0);
35569   rtx op1 = expand_normal (arg1);
35570   rtx op2 = expand_normal (arg2);
35571   rtx op3 = expand_normal (arg3);
35572   enum insn_code icode = d->icode;
35573   const struct insn_data_d *insn_p = &insn_data[icode];
35574   machine_mode mode0 = insn_p->operand[0].mode;
35575   machine_mode mode1 = insn_p->operand[1].mode;
35576   enum rtx_code comparison = UNEQ;
35577   bool need_ucomi = false;
35578 
35579   /* See avxintrin.h for values.  */
35580   enum rtx_code comi_comparisons[32] =
35581     {
35582       UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
35583       UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
35584       UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
35585     };
35586   bool need_ucomi_values[32] =
35587     {
35588       true,  false, false, true,  true,  false, false, true,
35589       true,  false, false, true,  true,  false, false, true,
35590       false, true,  true,  false, false, true,  true,  false,
35591       false, true,  true,  false, false, true,  true,  false
35592     };
35593 
35594   if (!CONST_INT_P (op2))
35595     {
35596       error ("the third argument must be comparison constant");
35597       return const0_rtx;
35598     }
35599   if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
35600     {
35601       error ("incorrect comparison mode");
35602       return const0_rtx;
35603     }
35604 
35605   if (!insn_p->operand[2].predicate (op3, SImode))
35606     {
35607       error ("incorrect rounding operand");
35608       return const0_rtx;
35609     }
35610 
35611   comparison = comi_comparisons[INTVAL (op2)];
35612   need_ucomi = need_ucomi_values[INTVAL (op2)];
35613 
35614   if (VECTOR_MODE_P (mode0))
35615     op0 = safe_vector_operand (op0, mode0);
35616   if (VECTOR_MODE_P (mode1))
35617     op1 = safe_vector_operand (op1, mode1);
35618 
35619   target = gen_reg_rtx (SImode);
35620   emit_move_insn (target, const0_rtx);
35621   target = gen_rtx_SUBREG (QImode, target, 0);
35622 
35623   if ((optimize && !register_operand (op0, mode0))
35624       || !insn_p->operand[0].predicate (op0, mode0))
35625     op0 = copy_to_mode_reg (mode0, op0);
35626   if ((optimize && !register_operand (op1, mode1))
35627       || !insn_p->operand[1].predicate (op1, mode1))
35628     op1 = copy_to_mode_reg (mode1, op1);
35629 
35630   if (need_ucomi)
35631     icode = icode == CODE_FOR_sse_comi_round
35632 		     ? CODE_FOR_sse_ucomi_round
35633 		     : CODE_FOR_sse2_ucomi_round;
35634 
35635   pat = GEN_FCN (icode) (op0, op1, op3);
35636   if (! pat)
35637     return 0;
35638 
35639   /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point.  */
35640   if (INTVAL (op3) == NO_ROUND)
35641     {
35642       pat = ix86_erase_embedded_rounding (pat);
35643       if (! pat)
35644 	return 0;
35645 
35646       set_dst = SET_DEST (pat);
35647     }
35648   else
35649     {
35650       gcc_assert (GET_CODE (pat) == SET);
35651       set_dst = SET_DEST (pat);
35652     }
35653 
35654   emit_insn (pat);
35655   emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35656 			  gen_rtx_fmt_ee (comparison, QImode,
35657 					  set_dst,
35658 					  const0_rtx)));
35659 
35660   return SUBREG_REG (target);
35661 }
35662 
35663 static rtx
35664 ix86_expand_round_builtin (const struct builtin_description *d,
35665 			   tree exp, rtx target)
35666 {
35667   rtx pat;
35668   unsigned int i, nargs;
35669   struct
35670     {
35671       rtx op;
35672       machine_mode mode;
35673     } args[6];
35674   enum insn_code icode = d->icode;
35675   const struct insn_data_d *insn_p = &insn_data[icode];
35676   machine_mode tmode = insn_p->operand[0].mode;
35677   unsigned int nargs_constant = 0;
35678   unsigned int redundant_embed_rnd = 0;
35679 
35680   switch ((enum ix86_builtin_func_type) d->flag)
35681     {
35682     case UINT64_FTYPE_V2DF_INT:
35683     case UINT64_FTYPE_V4SF_INT:
35684     case UINT_FTYPE_V2DF_INT:
35685     case UINT_FTYPE_V4SF_INT:
35686     case INT64_FTYPE_V2DF_INT:
35687     case INT64_FTYPE_V4SF_INT:
35688     case INT_FTYPE_V2DF_INT:
35689     case INT_FTYPE_V4SF_INT:
35690       nargs = 2;
35691       break;
35692     case V4SF_FTYPE_V4SF_UINT_INT:
35693     case V4SF_FTYPE_V4SF_UINT64_INT:
35694     case V2DF_FTYPE_V2DF_UINT64_INT:
35695     case V4SF_FTYPE_V4SF_INT_INT:
35696     case V4SF_FTYPE_V4SF_INT64_INT:
35697     case V2DF_FTYPE_V2DF_INT64_INT:
35698     case V4SF_FTYPE_V4SF_V4SF_INT:
35699     case V2DF_FTYPE_V2DF_V2DF_INT:
35700     case V4SF_FTYPE_V4SF_V2DF_INT:
35701     case V2DF_FTYPE_V2DF_V4SF_INT:
35702       nargs = 3;
35703       break;
35704     case V8SF_FTYPE_V8DF_V8SF_QI_INT:
35705     case V8DF_FTYPE_V8DF_V8DF_QI_INT:
35706     case V8SI_FTYPE_V8DF_V8SI_QI_INT:
35707     case V8DI_FTYPE_V8DF_V8DI_QI_INT:
35708     case V8SF_FTYPE_V8DI_V8SF_QI_INT:
35709     case V8DF_FTYPE_V8DI_V8DF_QI_INT:
35710     case V16SF_FTYPE_V16SF_V16SF_HI_INT:
35711     case V8DI_FTYPE_V8SF_V8DI_QI_INT:
35712     case V16SF_FTYPE_V16SI_V16SF_HI_INT:
35713     case V16SI_FTYPE_V16SF_V16SI_HI_INT:
35714     case V8DF_FTYPE_V8SF_V8DF_QI_INT:
35715     case V16SF_FTYPE_V16HI_V16SF_HI_INT:
35716     case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
35717     case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
35718       nargs = 4;
35719       break;
35720     case V4SF_FTYPE_V4SF_V4SF_INT_INT:
35721     case V2DF_FTYPE_V2DF_V2DF_INT_INT:
35722       nargs_constant = 2;
35723       nargs = 4;
35724       break;
35725     case INT_FTYPE_V4SF_V4SF_INT_INT:
35726     case INT_FTYPE_V2DF_V2DF_INT_INT:
35727       return ix86_expand_sse_comi_round (d, exp, target);
35728     case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
35729     case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
35730     case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
35731     case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
35732     case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
35733     case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
35734     case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
35735     case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
35736       nargs = 5;
35737       break;
35738     case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
35739     case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
35740       nargs_constant = 4;
35741       nargs = 5;
35742       break;
35743     case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
35744     case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
35745     case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
35746     case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
35747       nargs_constant = 3;
35748       nargs = 5;
35749       break;
35750     case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
35751     case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
35752     case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
35753     case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
35754     case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
35755     case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
35756       nargs = 6;
35757       nargs_constant = 4;
35758       break;
35759     case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
35760     case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
35761     case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
35762     case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
35763       nargs = 6;
35764       nargs_constant = 3;
35765       break;
35766     default:
35767       gcc_unreachable ();
35768     }
35769   gcc_assert (nargs <= ARRAY_SIZE (args));
35770 
35771   if (optimize
35772       || target == 0
35773       || GET_MODE (target) != tmode
35774       || !insn_p->operand[0].predicate (target, tmode))
35775     target = gen_reg_rtx (tmode);
35776 
35777   for (i = 0; i < nargs; i++)
35778     {
35779       tree arg = CALL_EXPR_ARG (exp, i);
35780       rtx op = expand_normal (arg);
35781       machine_mode mode = insn_p->operand[i + 1].mode;
35782       bool match = insn_p->operand[i + 1].predicate (op, mode);
35783 
35784       if (i == nargs - nargs_constant)
35785 	{
35786 	  if (!match)
35787 	    {
35788 	      switch (icode)
35789 		{
35790 		case CODE_FOR_avx512f_getmantv8df_mask_round:
35791 		case CODE_FOR_avx512f_getmantv16sf_mask_round:
35792 		case CODE_FOR_avx512f_vgetmantv2df_round:
35793 		case CODE_FOR_avx512f_vgetmantv2df_mask_round:
35794 		case CODE_FOR_avx512f_vgetmantv4sf_round:
35795 		case CODE_FOR_avx512f_vgetmantv4sf_mask_round:
35796 		  error ("the immediate argument must be a 4-bit immediate");
35797 		  return const0_rtx;
35798 		case CODE_FOR_avx512f_cmpv8df3_mask_round:
35799 		case CODE_FOR_avx512f_cmpv16sf3_mask_round:
35800 		case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
35801 		case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
35802 		  error ("the immediate argument must be a 5-bit immediate");
35803 		  return const0_rtx;
35804 		default:
35805 		  error ("the immediate argument must be an 8-bit immediate");
35806 		  return const0_rtx;
35807 		}
35808 	    }
35809 	}
35810       else if (i == nargs-1)
35811 	{
35812 	  if (!insn_p->operand[nargs].predicate (op, SImode))
35813 	    {
35814 	      error ("incorrect rounding operand");
35815 	      return const0_rtx;
35816 	    }
35817 
35818 	  /* If there is no rounding use normal version of the pattern.  */
35819 	  if (INTVAL (op) == NO_ROUND)
35820 	    redundant_embed_rnd = 1;
35821 	}
35822       else
35823 	{
35824 	  if (VECTOR_MODE_P (mode))
35825 	    op = safe_vector_operand (op, mode);
35826 
35827 	  op = fixup_modeless_constant (op, mode);
35828 
35829 	  if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35830 	    {
35831 	      if (optimize || !match)
35832 		op = copy_to_mode_reg (mode, op);
35833 	    }
35834 	  else
35835 	    {
35836 	      op = copy_to_reg (op);
35837 	      op = lowpart_subreg (mode, op, GET_MODE (op));
35838 	    }
35839 	}
35840 
35841       args[i].op = op;
35842       args[i].mode = mode;
35843     }
35844 
35845   switch (nargs)
35846     {
35847     case 1:
35848       pat = GEN_FCN (icode) (target, args[0].op);
35849       break;
35850     case 2:
35851       pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35852       break;
35853     case 3:
35854       pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35855 			     args[2].op);
35856       break;
35857     case 4:
35858       pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35859 			     args[2].op, args[3].op);
35860       break;
35861     case 5:
35862       pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35863 			     args[2].op, args[3].op, args[4].op);
35864       break;
35865     case 6:
35866       pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35867 			     args[2].op, args[3].op, args[4].op,
35868 			     args[5].op);
35869       break;
35870     default:
35871       gcc_unreachable ();
35872     }
35873 
35874   if (!pat)
35875     return 0;
35876 
35877   if (redundant_embed_rnd)
35878     pat = ix86_erase_embedded_rounding (pat);
35879 
35880   emit_insn (pat);
35881   return target;
35882 }
35883 
35884 /* Subroutine of ix86_expand_builtin to take care of special insns
35885    with variable number of operands.  */
35886 
35887 static rtx
35888 ix86_expand_special_args_builtin (const struct builtin_description *d,
35889 				  tree exp, rtx target)
35890 {
35891   tree arg;
35892   rtx pat, op;
35893   unsigned int i, nargs, arg_adjust, memory;
35894   bool aligned_mem = false;
35895   struct
35896     {
35897       rtx op;
35898       machine_mode mode;
35899     } args[3];
35900   enum insn_code icode = d->icode;
35901   bool last_arg_constant = false;
35902   const struct insn_data_d *insn_p = &insn_data[icode];
35903   machine_mode tmode = insn_p->operand[0].mode;
35904   enum { load, store } klass;
35905 
35906   switch ((enum ix86_builtin_func_type) d->flag)
35907     {
35908     case VOID_FTYPE_VOID:
35909       emit_insn (GEN_FCN (icode) (target));
35910       return 0;
35911     case VOID_FTYPE_UINT64:
35912     case VOID_FTYPE_UNSIGNED:
35913       nargs = 0;
35914       klass = store;
35915       memory = 0;
35916       break;
35917 
35918     case INT_FTYPE_VOID:
35919     case USHORT_FTYPE_VOID:
35920     case UINT64_FTYPE_VOID:
35921     case UINT_FTYPE_VOID:
35922     case UNSIGNED_FTYPE_VOID:
35923       nargs = 0;
35924       klass = load;
35925       memory = 0;
35926       break;
35927     case UINT64_FTYPE_PUNSIGNED:
35928     case V2DI_FTYPE_PV2DI:
35929     case V4DI_FTYPE_PV4DI:
35930     case V32QI_FTYPE_PCCHAR:
35931     case V16QI_FTYPE_PCCHAR:
35932     case V8SF_FTYPE_PCV4SF:
35933     case V8SF_FTYPE_PCFLOAT:
35934     case V4SF_FTYPE_PCFLOAT:
35935     case V4DF_FTYPE_PCV2DF:
35936     case V4DF_FTYPE_PCDOUBLE:
35937     case V2DF_FTYPE_PCDOUBLE:
35938     case VOID_FTYPE_PVOID:
35939     case V8DI_FTYPE_PV8DI:
35940       nargs = 1;
35941       klass = load;
35942       memory = 0;
35943       switch (icode)
35944 	{
35945 	case CODE_FOR_sse4_1_movntdqa:
35946 	case CODE_FOR_avx2_movntdqa:
35947 	case CODE_FOR_avx512f_movntdqa:
35948 	  aligned_mem = true;
35949 	  break;
35950 	default:
35951 	  break;
35952 	}
35953       break;
35954     case VOID_FTYPE_PV2SF_V4SF:
35955     case VOID_FTYPE_PV8DI_V8DI:
35956     case VOID_FTYPE_PV4DI_V4DI:
35957     case VOID_FTYPE_PV2DI_V2DI:
35958     case VOID_FTYPE_PCHAR_V32QI:
35959     case VOID_FTYPE_PCHAR_V16QI:
35960     case VOID_FTYPE_PFLOAT_V16SF:
35961     case VOID_FTYPE_PFLOAT_V8SF:
35962     case VOID_FTYPE_PFLOAT_V4SF:
35963     case VOID_FTYPE_PDOUBLE_V8DF:
35964     case VOID_FTYPE_PDOUBLE_V4DF:
35965     case VOID_FTYPE_PDOUBLE_V2DF:
35966     case VOID_FTYPE_PLONGLONG_LONGLONG:
35967     case VOID_FTYPE_PULONGLONG_ULONGLONG:
35968     case VOID_FTYPE_PUNSIGNED_UNSIGNED:
35969     case VOID_FTYPE_PINT_INT:
35970       nargs = 1;
35971       klass = store;
35972       /* Reserve memory operand for target.  */
35973       memory = ARRAY_SIZE (args);
35974       switch (icode)
35975 	{
35976 	/* These builtins and instructions require the memory
35977 	   to be properly aligned.  */
35978 	case CODE_FOR_avx_movntv4di:
35979 	case CODE_FOR_sse2_movntv2di:
35980 	case CODE_FOR_avx_movntv8sf:
35981 	case CODE_FOR_sse_movntv4sf:
35982 	case CODE_FOR_sse4a_vmmovntv4sf:
35983 	case CODE_FOR_avx_movntv4df:
35984 	case CODE_FOR_sse2_movntv2df:
35985 	case CODE_FOR_sse4a_vmmovntv2df:
35986 	case CODE_FOR_sse2_movntidi:
35987 	case CODE_FOR_sse_movntq:
35988 	case CODE_FOR_sse2_movntisi:
35989 	case CODE_FOR_avx512f_movntv16sf:
35990 	case CODE_FOR_avx512f_movntv8df:
35991 	case CODE_FOR_avx512f_movntv8di:
35992 	  aligned_mem = true;
35993 	  break;
35994 	default:
35995 	  break;
35996 	}
35997       break;
35998     case VOID_FTYPE_PVOID_PCVOID:
35999 	nargs = 1;
36000 	klass = store;
36001 	memory = 0;
36002 
36003 	break;
36004     case V4SF_FTYPE_V4SF_PCV2SF:
36005     case V2DF_FTYPE_V2DF_PCDOUBLE:
36006       nargs = 2;
36007       klass = load;
36008       memory = 1;
36009       break;
36010     case V8SF_FTYPE_PCV8SF_V8SI:
36011     case V4DF_FTYPE_PCV4DF_V4DI:
36012     case V4SF_FTYPE_PCV4SF_V4SI:
36013     case V2DF_FTYPE_PCV2DF_V2DI:
36014     case V8SI_FTYPE_PCV8SI_V8SI:
36015     case V4DI_FTYPE_PCV4DI_V4DI:
36016     case V4SI_FTYPE_PCV4SI_V4SI:
36017     case V2DI_FTYPE_PCV2DI_V2DI:
36018     case VOID_FTYPE_INT_INT64:
36019       nargs = 2;
36020       klass = load;
36021       memory = 0;
36022       break;
36023     case VOID_FTYPE_PV8DF_V8DF_UQI:
36024     case VOID_FTYPE_PV4DF_V4DF_UQI:
36025     case VOID_FTYPE_PV2DF_V2DF_UQI:
36026     case VOID_FTYPE_PV16SF_V16SF_UHI:
36027     case VOID_FTYPE_PV8SF_V8SF_UQI:
36028     case VOID_FTYPE_PV4SF_V4SF_UQI:
36029     case VOID_FTYPE_PV8DI_V8DI_UQI:
36030     case VOID_FTYPE_PV4DI_V4DI_UQI:
36031     case VOID_FTYPE_PV2DI_V2DI_UQI:
36032     case VOID_FTYPE_PV16SI_V16SI_UHI:
36033     case VOID_FTYPE_PV8SI_V8SI_UQI:
36034     case VOID_FTYPE_PV4SI_V4SI_UQI:
36035     case VOID_FTYPE_PV64QI_V64QI_UDI:
36036     case VOID_FTYPE_PV32HI_V32HI_USI:
36037     case VOID_FTYPE_PV32QI_V32QI_USI:
36038     case VOID_FTYPE_PV16QI_V16QI_UHI:
36039     case VOID_FTYPE_PV16HI_V16HI_UHI:
36040     case VOID_FTYPE_PV8HI_V8HI_UQI:
36041       switch (icode)
36042 	{
36043 	/* These builtins and instructions require the memory
36044 	   to be properly aligned.  */
36045 	case CODE_FOR_avx512f_storev16sf_mask:
36046 	case CODE_FOR_avx512f_storev16si_mask:
36047 	case CODE_FOR_avx512f_storev8df_mask:
36048 	case CODE_FOR_avx512f_storev8di_mask:
36049 	case CODE_FOR_avx512vl_storev8sf_mask:
36050 	case CODE_FOR_avx512vl_storev8si_mask:
36051 	case CODE_FOR_avx512vl_storev4df_mask:
36052 	case CODE_FOR_avx512vl_storev4di_mask:
36053 	case CODE_FOR_avx512vl_storev4sf_mask:
36054 	case CODE_FOR_avx512vl_storev4si_mask:
36055 	case CODE_FOR_avx512vl_storev2df_mask:
36056 	case CODE_FOR_avx512vl_storev2di_mask:
36057 	  aligned_mem = true;
36058 	  break;
36059 	default:
36060 	  break;
36061 	}
36062       /* FALLTHRU */
36063     case VOID_FTYPE_PV8SF_V8SI_V8SF:
36064     case VOID_FTYPE_PV4DF_V4DI_V4DF:
36065     case VOID_FTYPE_PV4SF_V4SI_V4SF:
36066     case VOID_FTYPE_PV2DF_V2DI_V2DF:
36067     case VOID_FTYPE_PV8SI_V8SI_V8SI:
36068     case VOID_FTYPE_PV4DI_V4DI_V4DI:
36069     case VOID_FTYPE_PV4SI_V4SI_V4SI:
36070     case VOID_FTYPE_PV2DI_V2DI_V2DI:
36071     case VOID_FTYPE_PV8SI_V8DI_UQI:
36072     case VOID_FTYPE_PV8HI_V8DI_UQI:
36073     case VOID_FTYPE_PV16HI_V16SI_UHI:
36074     case VOID_FTYPE_PV16QI_V8DI_UQI:
36075     case VOID_FTYPE_PV16QI_V16SI_UHI:
36076     case VOID_FTYPE_PV4SI_V4DI_UQI:
36077     case VOID_FTYPE_PV4SI_V2DI_UQI:
36078     case VOID_FTYPE_PV8HI_V4DI_UQI:
36079     case VOID_FTYPE_PV8HI_V2DI_UQI:
36080     case VOID_FTYPE_PV8HI_V8SI_UQI:
36081     case VOID_FTYPE_PV8HI_V4SI_UQI:
36082     case VOID_FTYPE_PV16QI_V4DI_UQI:
36083     case VOID_FTYPE_PV16QI_V2DI_UQI:
36084     case VOID_FTYPE_PV16QI_V8SI_UQI:
36085     case VOID_FTYPE_PV16QI_V4SI_UQI:
36086     case VOID_FTYPE_PCHAR_V64QI_UDI:
36087     case VOID_FTYPE_PCHAR_V32QI_USI:
36088     case VOID_FTYPE_PCHAR_V16QI_UHI:
36089     case VOID_FTYPE_PSHORT_V32HI_USI:
36090     case VOID_FTYPE_PSHORT_V16HI_UHI:
36091     case VOID_FTYPE_PSHORT_V8HI_UQI:
36092     case VOID_FTYPE_PINT_V16SI_UHI:
36093     case VOID_FTYPE_PINT_V8SI_UQI:
36094     case VOID_FTYPE_PINT_V4SI_UQI:
36095     case VOID_FTYPE_PINT64_V8DI_UQI:
36096     case VOID_FTYPE_PINT64_V4DI_UQI:
36097     case VOID_FTYPE_PINT64_V2DI_UQI:
36098     case VOID_FTYPE_PDOUBLE_V8DF_UQI:
36099     case VOID_FTYPE_PDOUBLE_V4DF_UQI:
36100     case VOID_FTYPE_PDOUBLE_V2DF_UQI:
36101     case VOID_FTYPE_PFLOAT_V16SF_UHI:
36102     case VOID_FTYPE_PFLOAT_V8SF_UQI:
36103     case VOID_FTYPE_PFLOAT_V4SF_UQI:
36104     case VOID_FTYPE_PV32QI_V32HI_USI:
36105     case VOID_FTYPE_PV16QI_V16HI_UHI:
36106     case VOID_FTYPE_PV8QI_V8HI_UQI:
36107       nargs = 2;
36108       klass = store;
36109       /* Reserve memory operand for target.  */
36110       memory = ARRAY_SIZE (args);
36111       break;
36112     case V4SF_FTYPE_PCV4SF_V4SF_UQI:
36113     case V8SF_FTYPE_PCV8SF_V8SF_UQI:
36114     case V16SF_FTYPE_PCV16SF_V16SF_UHI:
36115     case V4SI_FTYPE_PCV4SI_V4SI_UQI:
36116     case V8SI_FTYPE_PCV8SI_V8SI_UQI:
36117     case V16SI_FTYPE_PCV16SI_V16SI_UHI:
36118     case V2DF_FTYPE_PCV2DF_V2DF_UQI:
36119     case V4DF_FTYPE_PCV4DF_V4DF_UQI:
36120     case V8DF_FTYPE_PCV8DF_V8DF_UQI:
36121     case V2DI_FTYPE_PCV2DI_V2DI_UQI:
36122     case V4DI_FTYPE_PCV4DI_V4DI_UQI:
36123     case V8DI_FTYPE_PCV8DI_V8DI_UQI:
36124     case V64QI_FTYPE_PCV64QI_V64QI_UDI:
36125     case V32HI_FTYPE_PCV32HI_V32HI_USI:
36126     case V32QI_FTYPE_PCV32QI_V32QI_USI:
36127     case V16QI_FTYPE_PCV16QI_V16QI_UHI:
36128     case V16HI_FTYPE_PCV16HI_V16HI_UHI:
36129     case V8HI_FTYPE_PCV8HI_V8HI_UQI:
36130       switch (icode)
36131 	{
36132 	/* These builtins and instructions require the memory
36133 	   to be properly aligned.  */
36134 	case CODE_FOR_avx512f_loadv16sf_mask:
36135 	case CODE_FOR_avx512f_loadv16si_mask:
36136 	case CODE_FOR_avx512f_loadv8df_mask:
36137 	case CODE_FOR_avx512f_loadv8di_mask:
36138 	case CODE_FOR_avx512vl_loadv8sf_mask:
36139 	case CODE_FOR_avx512vl_loadv8si_mask:
36140 	case CODE_FOR_avx512vl_loadv4df_mask:
36141 	case CODE_FOR_avx512vl_loadv4di_mask:
36142 	case CODE_FOR_avx512vl_loadv4sf_mask:
36143 	case CODE_FOR_avx512vl_loadv4si_mask:
36144 	case CODE_FOR_avx512vl_loadv2df_mask:
36145 	case CODE_FOR_avx512vl_loadv2di_mask:
36146 	case CODE_FOR_avx512bw_loadv64qi_mask:
36147 	case CODE_FOR_avx512vl_loadv32qi_mask:
36148 	case CODE_FOR_avx512vl_loadv16qi_mask:
36149 	case CODE_FOR_avx512bw_loadv32hi_mask:
36150 	case CODE_FOR_avx512vl_loadv16hi_mask:
36151 	case CODE_FOR_avx512vl_loadv8hi_mask:
36152 	  aligned_mem = true;
36153 	  break;
36154 	default:
36155 	  break;
36156 	}
36157       /* FALLTHRU */
36158     case V64QI_FTYPE_PCCHAR_V64QI_UDI:
36159     case V32QI_FTYPE_PCCHAR_V32QI_USI:
36160     case V16QI_FTYPE_PCCHAR_V16QI_UHI:
36161     case V32HI_FTYPE_PCSHORT_V32HI_USI:
36162     case V16HI_FTYPE_PCSHORT_V16HI_UHI:
36163     case V8HI_FTYPE_PCSHORT_V8HI_UQI:
36164     case V16SI_FTYPE_PCINT_V16SI_UHI:
36165     case V8SI_FTYPE_PCINT_V8SI_UQI:
36166     case V4SI_FTYPE_PCINT_V4SI_UQI:
36167     case V8DI_FTYPE_PCINT64_V8DI_UQI:
36168     case V4DI_FTYPE_PCINT64_V4DI_UQI:
36169     case V2DI_FTYPE_PCINT64_V2DI_UQI:
36170     case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
36171     case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
36172     case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
36173     case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
36174     case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
36175     case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
36176       nargs = 3;
36177       klass = load;
36178       memory = 0;
36179       break;
36180     case VOID_FTYPE_UINT_UINT_UINT:
36181     case VOID_FTYPE_UINT64_UINT_UINT:
36182     case UCHAR_FTYPE_UINT_UINT_UINT:
36183     case UCHAR_FTYPE_UINT64_UINT_UINT:
36184       nargs = 3;
36185       klass = load;
36186       memory = ARRAY_SIZE (args);
36187       last_arg_constant = true;
36188       break;
36189     default:
36190       gcc_unreachable ();
36191     }
36192 
36193   gcc_assert (nargs <= ARRAY_SIZE (args));
36194 
36195   if (klass == store)
36196     {
36197       arg = CALL_EXPR_ARG (exp, 0);
36198       op = expand_normal (arg);
36199       gcc_assert (target == 0);
36200       if (memory)
36201 	{
36202 	  op = ix86_zero_extend_to_Pmode (op);
36203 	  target = gen_rtx_MEM (tmode, op);
36204 	  /* target at this point has just BITS_PER_UNIT MEM_ALIGN
36205 	     on it.  Try to improve it using get_pointer_alignment,
36206 	     and if the special builtin is one that requires strict
36207 	     mode alignment, also from it's GET_MODE_ALIGNMENT.
36208 	     Failure to do so could lead to ix86_legitimate_combined_insn
36209 	     rejecting all changes to such insns.  */
36210 	  unsigned int align = get_pointer_alignment (arg);
36211 	  if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
36212 	    align = GET_MODE_ALIGNMENT (tmode);
36213 	  if (MEM_ALIGN (target) < align)
36214 	    set_mem_align (target, align);
36215 	}
36216       else
36217 	target = force_reg (tmode, op);
36218       arg_adjust = 1;
36219     }
36220   else
36221     {
36222       arg_adjust = 0;
36223       if (optimize
36224 	  || target == 0
36225 	  || !register_operand (target, tmode)
36226 	  || GET_MODE (target) != tmode)
36227 	target = gen_reg_rtx (tmode);
36228     }
36229 
36230   for (i = 0; i < nargs; i++)
36231     {
36232       machine_mode mode = insn_p->operand[i + 1].mode;
36233       bool match;
36234 
36235       arg = CALL_EXPR_ARG (exp, i + arg_adjust);
36236       op = expand_normal (arg);
36237       match = insn_p->operand[i + 1].predicate (op, mode);
36238 
36239       if (last_arg_constant && (i + 1) == nargs)
36240 	{
36241 	  if (!match)
36242 	    {
36243 	      if (icode == CODE_FOR_lwp_lwpvalsi3
36244 		  || icode == CODE_FOR_lwp_lwpinssi3
36245 		  || icode == CODE_FOR_lwp_lwpvaldi3
36246 		  || icode == CODE_FOR_lwp_lwpinsdi3)
36247 		error ("the last argument must be a 32-bit immediate");
36248 	      else
36249 		error ("the last argument must be an 8-bit immediate");
36250 	      return const0_rtx;
36251 	    }
36252 	}
36253       else
36254 	{
36255 	  if (i == memory)
36256 	    {
36257 	      /* This must be the memory operand.  */
36258 	      op = ix86_zero_extend_to_Pmode (op);
36259 	      op = gen_rtx_MEM (mode, op);
36260 	      /* op at this point has just BITS_PER_UNIT MEM_ALIGN
36261 		 on it.  Try to improve it using get_pointer_alignment,
36262 		 and if the special builtin is one that requires strict
36263 		 mode alignment, also from it's GET_MODE_ALIGNMENT.
36264 		 Failure to do so could lead to ix86_legitimate_combined_insn
36265 		 rejecting all changes to such insns.  */
36266 	      unsigned int align = get_pointer_alignment (arg);
36267 	      if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
36268 		align = GET_MODE_ALIGNMENT (mode);
36269 	      if (MEM_ALIGN (op) < align)
36270 		set_mem_align (op, align);
36271 	    }
36272 	  else
36273 	    {
36274 	      /* This must be register.  */
36275 	      if (VECTOR_MODE_P (mode))
36276 		op = safe_vector_operand (op, mode);
36277 
36278 	      op = fixup_modeless_constant (op, mode);
36279 
36280 	      if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
36281 		op = copy_to_mode_reg (mode, op);
36282 	      else
36283 	        {
36284 	          op = copy_to_reg (op);
36285 	          op = lowpart_subreg (mode, op, GET_MODE (op));
36286 	        }
36287 	    }
36288 	}
36289 
36290       args[i].op = op;
36291       args[i].mode = mode;
36292     }
36293 
36294   switch (nargs)
36295     {
36296     case 0:
36297       pat = GEN_FCN (icode) (target);
36298       break;
36299     case 1:
36300       pat = GEN_FCN (icode) (target, args[0].op);
36301       break;
36302     case 2:
36303       pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36304       break;
36305     case 3:
36306       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36307       break;
36308     default:
36309       gcc_unreachable ();
36310     }
36311 
36312   if (! pat)
36313     return 0;
36314   emit_insn (pat);
36315   return klass == store ? 0 : target;
36316 }
36317 
36318 /* Return the integer constant in ARG.  Constrain it to be in the range
36319    of the subparts of VEC_TYPE; issue an error if not.  */
36320 
36321 static int
36322 get_element_number (tree vec_type, tree arg)
36323 {
36324   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
36325 
36326   if (!tree_fits_uhwi_p (arg)
36327       || (elt = tree_to_uhwi (arg), elt > max))
36328     {
36329       error ("selector must be an integer constant in the range 0..%wi", max);
36330       return 0;
36331     }
36332 
36333   return elt;
36334 }
36335 
36336 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
36337    ix86_expand_vector_init.  We DO have language-level syntax for this, in
36338    the form of  (type){ init-list }.  Except that since we can't place emms
36339    instructions from inside the compiler, we can't allow the use of MMX
36340    registers unless the user explicitly asks for it.  So we do *not* define
36341    vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
36342    we have builtins invoked by mmintrin.h that gives us license to emit
36343    these sorts of instructions.  */
36344 
36345 static rtx
36346 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
36347 {
36348   machine_mode tmode = TYPE_MODE (type);
36349   machine_mode inner_mode = GET_MODE_INNER (tmode);
36350   int i, n_elt = GET_MODE_NUNITS (tmode);
36351   rtvec v = rtvec_alloc (n_elt);
36352 
36353   gcc_assert (VECTOR_MODE_P (tmode));
36354   gcc_assert (call_expr_nargs (exp) == n_elt);
36355 
36356   for (i = 0; i < n_elt; ++i)
36357     {
36358       rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
36359       RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
36360     }
36361 
36362   if (!target || !register_operand (target, tmode))
36363     target = gen_reg_rtx (tmode);
36364 
36365   ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
36366   return target;
36367 }
36368 
36369 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
36370    ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
36371    had a language-level syntax for referencing vector elements.  */
36372 
36373 static rtx
36374 ix86_expand_vec_ext_builtin (tree exp, rtx target)
36375 {
36376   machine_mode tmode, mode0;
36377   tree arg0, arg1;
36378   int elt;
36379   rtx op0;
36380 
36381   arg0 = CALL_EXPR_ARG (exp, 0);
36382   arg1 = CALL_EXPR_ARG (exp, 1);
36383 
36384   op0 = expand_normal (arg0);
36385   elt = get_element_number (TREE_TYPE (arg0), arg1);
36386 
36387   tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
36388   mode0 = TYPE_MODE (TREE_TYPE (arg0));
36389   gcc_assert (VECTOR_MODE_P (mode0));
36390 
36391   op0 = force_reg (mode0, op0);
36392 
36393   if (optimize || !target || !register_operand (target, tmode))
36394     target = gen_reg_rtx (tmode);
36395 
36396   ix86_expand_vector_extract (true, target, op0, elt);
36397 
36398   return target;
36399 }
36400 
36401 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
36402    ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
36403    a language-level syntax for referencing vector elements.  */
36404 
36405 static rtx
36406 ix86_expand_vec_set_builtin (tree exp)
36407 {
36408   machine_mode tmode, mode1;
36409   tree arg0, arg1, arg2;
36410   int elt;
36411   rtx op0, op1, target;
36412 
36413   arg0 = CALL_EXPR_ARG (exp, 0);
36414   arg1 = CALL_EXPR_ARG (exp, 1);
36415   arg2 = CALL_EXPR_ARG (exp, 2);
36416 
36417   tmode = TYPE_MODE (TREE_TYPE (arg0));
36418   mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
36419   gcc_assert (VECTOR_MODE_P (tmode));
36420 
36421   op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
36422   op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
36423   elt = get_element_number (TREE_TYPE (arg0), arg2);
36424 
36425   if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
36426     op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
36427 
36428   op0 = force_reg (tmode, op0);
36429   op1 = force_reg (mode1, op1);
36430 
36431   /* OP0 is the source of these builtin functions and shouldn't be
36432      modified.  Create a copy, use it and return it as target.  */
36433   target = gen_reg_rtx (tmode);
36434   emit_move_insn (target, op0);
36435   ix86_expand_vector_set (true, target, op1, elt);
36436 
36437   return target;
36438 }
36439 
36440 /* Emit conditional move of SRC to DST with condition
36441    OP1 CODE OP2.  */
36442 static void
36443 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
36444 {
36445   rtx t;
36446 
36447   if (TARGET_CMOVE)
36448     {
36449       t = ix86_expand_compare (code, op1, op2);
36450       emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
36451 							 src, dst)));
36452     }
36453   else
36454     {
36455       rtx_code_label *nomove = gen_label_rtx ();
36456       emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
36457 			       const0_rtx, GET_MODE (op1), 1, nomove);
36458       emit_move_insn (dst, src);
36459       emit_label (nomove);
36460     }
36461 }
36462 
36463 /* Choose max of DST and SRC and put it to DST.  */
36464 static void
36465 ix86_emit_move_max (rtx dst, rtx src)
36466 {
36467   ix86_emit_cmove (dst, src, LTU, dst, src);
36468 }
36469 
36470 /* Expand an expression EXP that calls a built-in function,
36471    with result going to TARGET if that's convenient
36472    (and in mode MODE if that's convenient).
36473    SUBTARGET may be used as the target for computing one of EXP's operands.
36474    IGNORE is nonzero if the value is to be ignored.  */
36475 
36476 static rtx
36477 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
36478 		     machine_mode mode, int ignore)
36479 {
36480   size_t i;
36481   enum insn_code icode, icode2;
36482   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
36483   tree arg0, arg1, arg2, arg3, arg4;
36484   rtx op0, op1, op2, op3, op4, pat, pat2, insn;
36485   machine_mode mode0, mode1, mode2, mode3, mode4;
36486   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
36487 
36488   /* For CPU builtins that can be folded, fold first and expand the fold.  */
36489   switch (fcode)
36490     {
36491     case IX86_BUILTIN_CPU_INIT:
36492       {
36493 	/* Make it call __cpu_indicator_init in libgcc. */
36494 	tree call_expr, fndecl, type;
36495         type = build_function_type_list (integer_type_node, NULL_TREE);
36496 	fndecl = build_fn_decl ("__cpu_indicator_init", type);
36497 	call_expr = build_call_expr (fndecl, 0);
36498 	return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
36499       }
36500     case IX86_BUILTIN_CPU_IS:
36501     case IX86_BUILTIN_CPU_SUPPORTS:
36502       {
36503 	tree arg0 = CALL_EXPR_ARG (exp, 0);
36504 	tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
36505 	gcc_assert (fold_expr != NULL_TREE);
36506 	return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
36507       }
36508     }
36509 
36510   HOST_WIDE_INT isa = ix86_isa_flags;
36511   HOST_WIDE_INT isa2 = ix86_isa_flags2;
36512   HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa;
36513   HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2;
36514   /* The general case is we require all the ISAs specified in bisa{,2}
36515      to be enabled.
36516      The exceptions are:
36517      OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
36518      OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32
36519      OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4
36520      where for each this pair it is sufficient if either of the ISAs is
36521      enabled, plus if it is ored with other options also those others.  */
36522   if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
36523        == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
36524       && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0)
36525     isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A);
36526   if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
36527        == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
36528       && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0)
36529     isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32);
36530   if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
36531        == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
36532       && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
36533     isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
36534   if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
36535     {
36536       char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
36537 				       (enum fpmath_unit) 0, false);
36538       if (!opts)
36539 	error ("%qE needs unknown isa option", fndecl);
36540       else
36541 	{
36542 	  gcc_assert (opts != NULL);
36543 	  error ("%qE needs isa option %s", fndecl, opts);
36544 	  free (opts);
36545 	}
36546       return expand_call (exp, target, ignore);
36547     }
36548 
36549   switch (fcode)
36550     {
36551     case IX86_BUILTIN_BNDMK:
36552       if (!target
36553 	  || GET_MODE (target) != BNDmode
36554 	  || !register_operand (target, BNDmode))
36555 	target = gen_reg_rtx (BNDmode);
36556 
36557       arg0 = CALL_EXPR_ARG (exp, 0);
36558       arg1 = CALL_EXPR_ARG (exp, 1);
36559 
36560       op0 = expand_normal (arg0);
36561       op1 = expand_normal (arg1);
36562 
36563       if (!register_operand (op0, Pmode))
36564 	op0 = ix86_zero_extend_to_Pmode (op0);
36565       if (!register_operand (op1, Pmode))
36566 	op1 = ix86_zero_extend_to_Pmode (op1);
36567 
36568       /* Builtin arg1 is size of block but instruction op1 should
36569 	 be (size - 1).  */
36570       op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
36571 				 NULL_RTX, 1, OPTAB_DIRECT);
36572 
36573       emit_insn (BNDmode == BND64mode
36574                  ? gen_bnd64_mk (target, op0, op1)
36575                  : gen_bnd32_mk (target, op0, op1));
36576       return target;
36577 
36578     case IX86_BUILTIN_BNDSTX:
36579       arg0 = CALL_EXPR_ARG (exp, 0);
36580       arg1 = CALL_EXPR_ARG (exp, 1);
36581       arg2 = CALL_EXPR_ARG (exp, 2);
36582 
36583       op0 = expand_normal (arg0);
36584       op1 = expand_normal (arg1);
36585       op2 = expand_normal (arg2);
36586 
36587       if (!register_operand (op0, Pmode))
36588 	op0 = ix86_zero_extend_to_Pmode (op0);
36589       if (!register_operand (op1, BNDmode))
36590 	op1 = copy_to_mode_reg (BNDmode, op1);
36591       if (!register_operand (op2, Pmode))
36592 	op2 = ix86_zero_extend_to_Pmode (op2);
36593 
36594       emit_insn (BNDmode == BND64mode
36595                  ? gen_bnd64_stx (op2, op0, op1)
36596                  : gen_bnd32_stx (op2, op0, op1));
36597       return 0;
36598 
36599     case IX86_BUILTIN_BNDLDX:
36600       if (!target
36601 	  || GET_MODE (target) != BNDmode
36602 	  || !register_operand (target, BNDmode))
36603 	target = gen_reg_rtx (BNDmode);
36604 
36605       arg0 = CALL_EXPR_ARG (exp, 0);
36606       arg1 = CALL_EXPR_ARG (exp, 1);
36607 
36608       op0 = expand_normal (arg0);
36609       op1 = expand_normal (arg1);
36610 
36611       if (!register_operand (op0, Pmode))
36612 	op0 = ix86_zero_extend_to_Pmode (op0);
36613       if (!register_operand (op1, Pmode))
36614 	op1 = ix86_zero_extend_to_Pmode (op1);
36615 
36616       emit_insn (BNDmode == BND64mode
36617 		 ? gen_bnd64_ldx (target, op0, op1)
36618 		 : gen_bnd32_ldx (target, op0, op1));
36619       return target;
36620 
36621     case IX86_BUILTIN_BNDCL:
36622       arg0 = CALL_EXPR_ARG (exp, 0);
36623       arg1 = CALL_EXPR_ARG (exp, 1);
36624 
36625       op0 = expand_normal (arg0);
36626       op1 = expand_normal (arg1);
36627 
36628       if (!register_operand (op0, Pmode))
36629 	op0 = ix86_zero_extend_to_Pmode (op0);
36630       if (!register_operand (op1, BNDmode))
36631 	op1 = copy_to_mode_reg (BNDmode, op1);
36632 
36633       emit_insn (BNDmode == BND64mode
36634                  ? gen_bnd64_cl (op1, op0)
36635                  : gen_bnd32_cl (op1, op0));
36636       return 0;
36637 
36638     case IX86_BUILTIN_BNDCU:
36639       arg0 = CALL_EXPR_ARG (exp, 0);
36640       arg1 = CALL_EXPR_ARG (exp, 1);
36641 
36642       op0 = expand_normal (arg0);
36643       op1 = expand_normal (arg1);
36644 
36645       if (!register_operand (op0, Pmode))
36646 	op0 = ix86_zero_extend_to_Pmode (op0);
36647       if (!register_operand (op1, BNDmode))
36648 	op1 = copy_to_mode_reg (BNDmode, op1);
36649 
36650       emit_insn (BNDmode == BND64mode
36651                  ? gen_bnd64_cu (op1, op0)
36652                  : gen_bnd32_cu (op1, op0));
36653       return 0;
36654 
36655     case IX86_BUILTIN_BNDRET:
36656       arg0 = CALL_EXPR_ARG (exp, 0);
36657       target = chkp_get_rtl_bounds (arg0);
36658 
36659       /* If no bounds were specified for returned value,
36660 	 then use INIT bounds.  It usually happens when
36661 	 some built-in function is expanded.  */
36662       if (!target)
36663 	{
36664 	  rtx t1 = gen_reg_rtx (Pmode);
36665 	  rtx t2 = gen_reg_rtx (Pmode);
36666 	  target = gen_reg_rtx (BNDmode);
36667 	  emit_move_insn (t1, const0_rtx);
36668 	  emit_move_insn (t2, constm1_rtx);
36669 	  emit_insn (BNDmode == BND64mode
36670 		     ? gen_bnd64_mk (target, t1, t2)
36671 		     : gen_bnd32_mk (target, t1, t2));
36672 	}
36673 
36674       gcc_assert (target && REG_P (target));
36675       return target;
36676 
36677     case IX86_BUILTIN_BNDNARROW:
36678       {
36679 	rtx m1, m1h1, m1h2, lb, ub, t1;
36680 
36681 	/* Return value and lb.  */
36682 	arg0 = CALL_EXPR_ARG (exp, 0);
36683 	/* Bounds.  */
36684 	arg1 = CALL_EXPR_ARG (exp, 1);
36685 	/* Size.  */
36686 	arg2 = CALL_EXPR_ARG (exp, 2);
36687 
36688 	lb = expand_normal (arg0);
36689 	op1 = expand_normal (arg1);
36690 	op2 = expand_normal (arg2);
36691 
36692 	/* Size was passed but we need to use (size - 1) as for bndmk.  */
36693 	op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
36694 				   NULL_RTX, 1, OPTAB_DIRECT);
36695 
36696 	/* Add LB to size and inverse to get UB.  */
36697 	op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
36698 				   op2, 1, OPTAB_DIRECT);
36699 	ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
36700 
36701 	if (!register_operand (lb, Pmode))
36702 	  lb = ix86_zero_extend_to_Pmode (lb);
36703 	if (!register_operand (ub, Pmode))
36704 	  ub = ix86_zero_extend_to_Pmode (ub);
36705 
36706 	/* We need to move bounds to memory before any computations.  */
36707 	if (MEM_P (op1))
36708 	  m1 = op1;
36709 	else
36710 	  {
36711 	    m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
36712 	    emit_move_insn (m1, op1);
36713 	  }
36714 
36715 	/* Generate mem expression to be used for access to LB and UB.  */
36716 	m1h1 = adjust_address (m1, Pmode, 0);
36717 	m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
36718 
36719 	t1 = gen_reg_rtx (Pmode);
36720 
36721 	/* Compute LB.  */
36722 	emit_move_insn (t1, m1h1);
36723 	ix86_emit_move_max (t1, lb);
36724 	emit_move_insn (m1h1, t1);
36725 
36726 	/* Compute UB.  UB is stored in 1's complement form.  Therefore
36727 	   we also use max here.  */
36728 	emit_move_insn (t1, m1h2);
36729 	ix86_emit_move_max (t1, ub);
36730 	emit_move_insn (m1h2, t1);
36731 
36732 	op2 = gen_reg_rtx (BNDmode);
36733 	emit_move_insn (op2, m1);
36734 
36735 	return chkp_join_splitted_slot (lb, op2);
36736       }
36737 
36738     case IX86_BUILTIN_BNDINT:
36739       {
36740 	rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
36741 
36742 	if (!target
36743 	    || GET_MODE (target) != BNDmode
36744 	    || !register_operand (target, BNDmode))
36745 	  target = gen_reg_rtx (BNDmode);
36746 
36747 	arg0 = CALL_EXPR_ARG (exp, 0);
36748 	arg1 = CALL_EXPR_ARG (exp, 1);
36749 
36750 	op0 = expand_normal (arg0);
36751 	op1 = expand_normal (arg1);
36752 
36753 	res = assign_386_stack_local (BNDmode, SLOT_TEMP);
36754 	rh1 = adjust_address (res, Pmode, 0);
36755 	rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
36756 
36757 	/* Put first bounds to temporaries.  */
36758 	lb1 = gen_reg_rtx (Pmode);
36759 	ub1 = gen_reg_rtx (Pmode);
36760 	if (MEM_P (op0))
36761 	  {
36762 	    emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
36763 	    emit_move_insn (ub1, adjust_address (op0, Pmode,
36764 						 GET_MODE_SIZE (Pmode)));
36765 	  }
36766 	else
36767 	  {
36768 	    emit_move_insn (res, op0);
36769 	    emit_move_insn (lb1, rh1);
36770 	    emit_move_insn (ub1, rh2);
36771 	  }
36772 
36773 	/* Put second bounds to temporaries.  */
36774 	lb2 = gen_reg_rtx (Pmode);
36775 	ub2 = gen_reg_rtx (Pmode);
36776 	if (MEM_P (op1))
36777 	  {
36778 	    emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
36779 	    emit_move_insn (ub2, adjust_address (op1, Pmode,
36780 						 GET_MODE_SIZE (Pmode)));
36781 	  }
36782 	else
36783 	  {
36784 	    emit_move_insn (res, op1);
36785 	    emit_move_insn (lb2, rh1);
36786 	    emit_move_insn (ub2, rh2);
36787 	  }
36788 
36789 	/* Compute LB.  */
36790 	ix86_emit_move_max (lb1, lb2);
36791 	emit_move_insn (rh1, lb1);
36792 
36793 	/* Compute UB.  UB is stored in 1's complement form.  Therefore
36794 	   we also use max here.  */
36795 	ix86_emit_move_max (ub1, ub2);
36796 	emit_move_insn (rh2, ub1);
36797 
36798 	emit_move_insn (target, res);
36799 
36800 	return target;
36801       }
36802 
36803     case IX86_BUILTIN_SIZEOF:
36804       {
36805 	tree name;
36806 	rtx symbol;
36807 
36808 	if (!target
36809 	    || GET_MODE (target) != Pmode
36810 	    || !register_operand (target, Pmode))
36811 	  target = gen_reg_rtx (Pmode);
36812 
36813 	arg0 = CALL_EXPR_ARG (exp, 0);
36814 	gcc_assert (VAR_P (arg0));
36815 
36816 	name = DECL_ASSEMBLER_NAME (arg0);
36817 	symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
36818 
36819 	emit_insn (Pmode == SImode
36820 		   ? gen_move_size_reloc_si (target, symbol)
36821 		   : gen_move_size_reloc_di (target, symbol));
36822 
36823 	return target;
36824       }
36825 
36826     case IX86_BUILTIN_BNDLOWER:
36827       {
36828 	rtx mem, hmem;
36829 
36830 	if (!target
36831 	    || GET_MODE (target) != Pmode
36832 	    || !register_operand (target, Pmode))
36833 	  target = gen_reg_rtx (Pmode);
36834 
36835 	arg0 = CALL_EXPR_ARG (exp, 0);
36836 	op0 = expand_normal (arg0);
36837 
36838 	/* We need to move bounds to memory first.  */
36839 	if (MEM_P (op0))
36840 	  mem = op0;
36841 	else
36842 	  {
36843 	    mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
36844 	    emit_move_insn (mem, op0);
36845 	  }
36846 
36847 	/* Generate mem expression to access LB and load it.  */
36848 	hmem = adjust_address (mem, Pmode, 0);
36849 	emit_move_insn (target, hmem);
36850 
36851 	return target;
36852       }
36853 
36854     case IX86_BUILTIN_BNDUPPER:
36855       {
36856 	rtx mem, hmem, res;
36857 
36858 	if (!target
36859 	    || GET_MODE (target) != Pmode
36860 	    || !register_operand (target, Pmode))
36861 	  target = gen_reg_rtx (Pmode);
36862 
36863 	arg0 = CALL_EXPR_ARG (exp, 0);
36864 	op0 = expand_normal (arg0);
36865 
36866 	/* We need to move bounds to memory first.  */
36867 	if (MEM_P (op0))
36868 	  mem = op0;
36869 	else
36870 	  {
36871 	    mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
36872 	    emit_move_insn (mem, op0);
36873 	  }
36874 
36875 	/* Generate mem expression to access UB.  */
36876 	hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
36877 
36878 	/* We need to inverse all bits of UB.  */
36879 	res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
36880 
36881 	if (res != target)
36882 	  emit_move_insn (target, res);
36883 
36884 	return target;
36885       }
36886 
36887     case IX86_BUILTIN_MASKMOVQ:
36888     case IX86_BUILTIN_MASKMOVDQU:
36889       icode = (fcode == IX86_BUILTIN_MASKMOVQ
36890 	       ? CODE_FOR_mmx_maskmovq
36891 	       : CODE_FOR_sse2_maskmovdqu);
36892       /* Note the arg order is different from the operand order.  */
36893       arg1 = CALL_EXPR_ARG (exp, 0);
36894       arg2 = CALL_EXPR_ARG (exp, 1);
36895       arg0 = CALL_EXPR_ARG (exp, 2);
36896       op0 = expand_normal (arg0);
36897       op1 = expand_normal (arg1);
36898       op2 = expand_normal (arg2);
36899       mode0 = insn_data[icode].operand[0].mode;
36900       mode1 = insn_data[icode].operand[1].mode;
36901       mode2 = insn_data[icode].operand[2].mode;
36902 
36903       op0 = ix86_zero_extend_to_Pmode (op0);
36904       op0 = gen_rtx_MEM (mode1, op0);
36905 
36906       if (!insn_data[icode].operand[0].predicate (op0, mode0))
36907 	op0 = copy_to_mode_reg (mode0, op0);
36908       if (!insn_data[icode].operand[1].predicate (op1, mode1))
36909 	op1 = copy_to_mode_reg (mode1, op1);
36910       if (!insn_data[icode].operand[2].predicate (op2, mode2))
36911 	op2 = copy_to_mode_reg (mode2, op2);
36912       pat = GEN_FCN (icode) (op0, op1, op2);
36913       if (! pat)
36914 	return 0;
36915       emit_insn (pat);
36916       return 0;
36917 
36918     case IX86_BUILTIN_LDMXCSR:
36919       op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
36920       target = assign_386_stack_local (SImode, SLOT_TEMP);
36921       emit_move_insn (target, op0);
36922       emit_insn (gen_sse_ldmxcsr (target));
36923       return 0;
36924 
36925     case IX86_BUILTIN_STMXCSR:
36926       target = assign_386_stack_local (SImode, SLOT_TEMP);
36927       emit_insn (gen_sse_stmxcsr (target));
36928       return copy_to_mode_reg (SImode, target);
36929 
36930     case IX86_BUILTIN_CLFLUSH:
36931 	arg0 = CALL_EXPR_ARG (exp, 0);
36932 	op0 = expand_normal (arg0);
36933 	icode = CODE_FOR_sse2_clflush;
36934 	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36935 	  op0 = ix86_zero_extend_to_Pmode (op0);
36936 
36937 	emit_insn (gen_sse2_clflush (op0));
36938 	return 0;
36939 
36940     case IX86_BUILTIN_CLWB:
36941 	arg0 = CALL_EXPR_ARG (exp, 0);
36942 	op0 = expand_normal (arg0);
36943 	icode = CODE_FOR_clwb;
36944 	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36945 	  op0 = ix86_zero_extend_to_Pmode (op0);
36946 
36947 	emit_insn (gen_clwb (op0));
36948 	return 0;
36949 
36950     case IX86_BUILTIN_CLFLUSHOPT:
36951 	arg0 = CALL_EXPR_ARG (exp, 0);
36952 	op0 = expand_normal (arg0);
36953 	icode = CODE_FOR_clflushopt;
36954 	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36955 	  op0 = ix86_zero_extend_to_Pmode (op0);
36956 
36957 	emit_insn (gen_clflushopt (op0));
36958 	return 0;
36959 
36960     case IX86_BUILTIN_MONITOR:
36961     case IX86_BUILTIN_MONITORX:
36962       arg0 = CALL_EXPR_ARG (exp, 0);
36963       arg1 = CALL_EXPR_ARG (exp, 1);
36964       arg2 = CALL_EXPR_ARG (exp, 2);
36965       op0 = expand_normal (arg0);
36966       op1 = expand_normal (arg1);
36967       op2 = expand_normal (arg2);
36968       if (!REG_P (op0))
36969 	op0 = ix86_zero_extend_to_Pmode (op0);
36970       if (!REG_P (op1))
36971 	op1 = copy_to_mode_reg (SImode, op1);
36972       if (!REG_P (op2))
36973 	op2 = copy_to_mode_reg (SImode, op2);
36974 
36975       emit_insn (fcode == IX86_BUILTIN_MONITOR
36976 		 ? ix86_gen_monitor (op0, op1, op2)
36977 		 : ix86_gen_monitorx (op0, op1, op2));
36978       return 0;
36979 
36980     case IX86_BUILTIN_MWAIT:
36981       arg0 = CALL_EXPR_ARG (exp, 0);
36982       arg1 = CALL_EXPR_ARG (exp, 1);
36983       op0 = expand_normal (arg0);
36984       op1 = expand_normal (arg1);
36985       if (!REG_P (op0))
36986 	op0 = copy_to_mode_reg (SImode, op0);
36987       if (!REG_P (op1))
36988 	op1 = copy_to_mode_reg (SImode, op1);
36989       emit_insn (gen_sse3_mwait (op0, op1));
36990       return 0;
36991 
36992     case IX86_BUILTIN_MWAITX:
36993       arg0 = CALL_EXPR_ARG (exp, 0);
36994       arg1 = CALL_EXPR_ARG (exp, 1);
36995       arg2 = CALL_EXPR_ARG (exp, 2);
36996       op0 = expand_normal (arg0);
36997       op1 = expand_normal (arg1);
36998       op2 = expand_normal (arg2);
36999       if (!REG_P (op0))
37000 	op0 = copy_to_mode_reg (SImode, op0);
37001       if (!REG_P (op1))
37002 	op1 = copy_to_mode_reg (SImode, op1);
37003       if (!REG_P (op2))
37004 	op2 = copy_to_mode_reg (SImode, op2);
37005       emit_insn (gen_mwaitx (op0, op1, op2));
37006       return 0;
37007 
37008     case IX86_BUILTIN_CLZERO:
37009       arg0 = CALL_EXPR_ARG (exp, 0);
37010       op0 = expand_normal (arg0);
37011       if (!REG_P (op0))
37012 	op0 = ix86_zero_extend_to_Pmode (op0);
37013       emit_insn (ix86_gen_clzero (op0));
37014       return 0;
37015 
37016     case IX86_BUILTIN_VEC_INIT_V2SI:
37017     case IX86_BUILTIN_VEC_INIT_V4HI:
37018     case IX86_BUILTIN_VEC_INIT_V8QI:
37019       return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
37020 
37021     case IX86_BUILTIN_VEC_EXT_V2DF:
37022     case IX86_BUILTIN_VEC_EXT_V2DI:
37023     case IX86_BUILTIN_VEC_EXT_V4SF:
37024     case IX86_BUILTIN_VEC_EXT_V4SI:
37025     case IX86_BUILTIN_VEC_EXT_V8HI:
37026     case IX86_BUILTIN_VEC_EXT_V2SI:
37027     case IX86_BUILTIN_VEC_EXT_V4HI:
37028     case IX86_BUILTIN_VEC_EXT_V16QI:
37029       return ix86_expand_vec_ext_builtin (exp, target);
37030 
37031     case IX86_BUILTIN_VEC_SET_V2DI:
37032     case IX86_BUILTIN_VEC_SET_V4SF:
37033     case IX86_BUILTIN_VEC_SET_V4SI:
37034     case IX86_BUILTIN_VEC_SET_V8HI:
37035     case IX86_BUILTIN_VEC_SET_V4HI:
37036     case IX86_BUILTIN_VEC_SET_V16QI:
37037       return ix86_expand_vec_set_builtin (exp);
37038 
37039     case IX86_BUILTIN_NANQ:
37040     case IX86_BUILTIN_NANSQ:
37041       return expand_call (exp, target, ignore);
37042 
37043     case IX86_BUILTIN_RDPID:
37044 
37045       op0 = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
37046 
37047       if (TARGET_64BIT)
37048 	{
37049 	  insn = gen_rdpid_rex64 (op0);
37050 	  op0 = convert_to_mode (SImode, op0, 1);
37051 	}
37052       else
37053 	insn = gen_rdpid (op0);
37054       emit_insn (insn);
37055 
37056       if (target == 0)
37057 	{
37058 	  /* mode is VOIDmode if __builtin_rdpid has been called
37059 	     without lhs.  */
37060 	  if (mode == VOIDmode)
37061 	    return target;
37062 	  target = gen_reg_rtx (mode);
37063 	}
37064       emit_move_insn (target, op0);
37065       return target;
37066     case IX86_BUILTIN_RDPMC:
37067     case IX86_BUILTIN_RDTSC:
37068     case IX86_BUILTIN_RDTSCP:
37069     case IX86_BUILTIN_XGETBV:
37070 
37071       op0 = gen_reg_rtx (DImode);
37072       op1 = gen_reg_rtx (DImode);
37073 
37074       if (fcode == IX86_BUILTIN_RDPMC)
37075 	{
37076 	  arg0 = CALL_EXPR_ARG (exp, 0);
37077 	  op2 = expand_normal (arg0);
37078 	  if (!register_operand (op2, SImode))
37079 	    op2 = copy_to_mode_reg (SImode, op2);
37080 
37081 	  insn = (TARGET_64BIT
37082 		  ? gen_rdpmc_rex64 (op0, op1, op2)
37083 		  : gen_rdpmc (op0, op2));
37084 	  emit_insn (insn);
37085 	}
37086       else if (fcode == IX86_BUILTIN_XGETBV)
37087 	{
37088 	  arg0 = CALL_EXPR_ARG (exp, 0);
37089 	  op2 = expand_normal (arg0);
37090 	  if (!register_operand (op2, SImode))
37091 	    op2 = copy_to_mode_reg (SImode, op2);
37092 
37093 	  insn = (TARGET_64BIT
37094 		  ? gen_xgetbv_rex64 (op0, op1, op2)
37095 		  : gen_xgetbv (op0, op2));
37096 	  emit_insn (insn);
37097 	}
37098       else if (fcode == IX86_BUILTIN_RDTSC)
37099 	{
37100 	  insn = (TARGET_64BIT
37101 		  ? gen_rdtsc_rex64 (op0, op1)
37102 		  : gen_rdtsc (op0));
37103 	  emit_insn (insn);
37104 	}
37105       else
37106 	{
37107 	  op2 = gen_reg_rtx (SImode);
37108 
37109 	  insn = (TARGET_64BIT
37110 		  ? gen_rdtscp_rex64 (op0, op1, op2)
37111 		  : gen_rdtscp (op0, op2));
37112 	  emit_insn (insn);
37113 
37114 	  arg0 = CALL_EXPR_ARG (exp, 0);
37115 	  op4 = expand_normal (arg0);
37116 	  if (!address_operand (op4, VOIDmode))
37117 	    {
37118 	      op4 = convert_memory_address (Pmode, op4);
37119 	      op4 = copy_addr_to_reg (op4);
37120 	    }
37121 	  emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
37122 	}
37123 
37124       if (target == 0)
37125 	{
37126 	  /* mode is VOIDmode if __builtin_rd* has been called
37127 	     without lhs.  */
37128 	  if (mode == VOIDmode)
37129 	    return target;
37130 	  target = gen_reg_rtx (mode);
37131 	}
37132 
37133       if (TARGET_64BIT)
37134 	{
37135 	  op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
37136 				     op1, 1, OPTAB_DIRECT);
37137 	  op0 = expand_simple_binop (DImode, IOR, op0, op1,
37138 				     op0, 1, OPTAB_DIRECT);
37139 	}
37140 
37141       emit_move_insn (target, op0);
37142       return target;
37143 
37144     case IX86_BUILTIN_MOVDIR64B:
37145 
37146       arg0 = CALL_EXPR_ARG (exp, 0);
37147       arg1 = CALL_EXPR_ARG (exp, 1);
37148       op0 = expand_normal (arg0);
37149       op1 = expand_normal (arg1);
37150 
37151       op0 = ix86_zero_extend_to_Pmode (op0);
37152       if (!address_operand (op1, VOIDmode))
37153       {
37154 	op1 = convert_memory_address (Pmode, op1);
37155 	op1 = copy_addr_to_reg (op1);
37156       }
37157       op1 = gen_rtx_MEM (XImode, op1);
37158 
37159       insn = (TARGET_64BIT
37160 		? gen_movdir64b_di (op0, op1)
37161 		: gen_movdir64b_si (op0, op1));
37162       emit_insn (insn);
37163       return 0;
37164 
37165     case IX86_BUILTIN_FXSAVE:
37166     case IX86_BUILTIN_FXRSTOR:
37167     case IX86_BUILTIN_FXSAVE64:
37168     case IX86_BUILTIN_FXRSTOR64:
37169     case IX86_BUILTIN_FNSTENV:
37170     case IX86_BUILTIN_FLDENV:
37171       mode0 = BLKmode;
37172       switch (fcode)
37173 	{
37174 	case IX86_BUILTIN_FXSAVE:
37175 	  icode = CODE_FOR_fxsave;
37176 	  break;
37177 	case IX86_BUILTIN_FXRSTOR:
37178 	  icode = CODE_FOR_fxrstor;
37179 	  break;
37180 	case IX86_BUILTIN_FXSAVE64:
37181 	  icode = CODE_FOR_fxsave64;
37182 	  break;
37183 	case IX86_BUILTIN_FXRSTOR64:
37184 	  icode = CODE_FOR_fxrstor64;
37185 	  break;
37186 	case IX86_BUILTIN_FNSTENV:
37187 	  icode = CODE_FOR_fnstenv;
37188 	  break;
37189 	case IX86_BUILTIN_FLDENV:
37190 	  icode = CODE_FOR_fldenv;
37191 	  break;
37192 	default:
37193 	  gcc_unreachable ();
37194 	}
37195 
37196       arg0 = CALL_EXPR_ARG (exp, 0);
37197       op0 = expand_normal (arg0);
37198 
37199       if (!address_operand (op0, VOIDmode))
37200 	{
37201 	  op0 = convert_memory_address (Pmode, op0);
37202 	  op0 = copy_addr_to_reg (op0);
37203 	}
37204       op0 = gen_rtx_MEM (mode0, op0);
37205 
37206       pat = GEN_FCN (icode) (op0);
37207       if (pat)
37208 	emit_insn (pat);
37209       return 0;
37210 
37211     case IX86_BUILTIN_XSETBV:
37212       arg0 = CALL_EXPR_ARG (exp, 0);
37213       arg1 = CALL_EXPR_ARG (exp, 1);
37214       op0 = expand_normal (arg0);
37215       op1 = expand_normal (arg1);
37216 
37217       if (!REG_P (op0))
37218 	op0 = copy_to_mode_reg (SImode, op0);
37219 
37220       if (TARGET_64BIT)
37221 	{
37222 	  op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
37223 				     NULL, 1, OPTAB_DIRECT);
37224 
37225 	  op2 = gen_lowpart (SImode, op2);
37226 	  op1 = gen_lowpart (SImode, op1);
37227 	  if (!REG_P (op1))
37228 	    op1 = copy_to_mode_reg (SImode, op1);
37229 	  if (!REG_P (op2))
37230 	    op2 = copy_to_mode_reg (SImode, op2);
37231 	  icode = CODE_FOR_xsetbv_rex64;
37232 	  pat = GEN_FCN (icode) (op0, op1, op2);
37233 	}
37234       else
37235 	{
37236 	  if (!REG_P (op1))
37237 	    op1 = copy_to_mode_reg (DImode, op1);
37238 	  icode = CODE_FOR_xsetbv;
37239 	  pat = GEN_FCN (icode) (op0, op1);
37240 	}
37241       if (pat)
37242 	emit_insn (pat);
37243       return 0;
37244 
37245     case IX86_BUILTIN_XSAVE:
37246     case IX86_BUILTIN_XRSTOR:
37247     case IX86_BUILTIN_XSAVE64:
37248     case IX86_BUILTIN_XRSTOR64:
37249     case IX86_BUILTIN_XSAVEOPT:
37250     case IX86_BUILTIN_XSAVEOPT64:
37251     case IX86_BUILTIN_XSAVES:
37252     case IX86_BUILTIN_XRSTORS:
37253     case IX86_BUILTIN_XSAVES64:
37254     case IX86_BUILTIN_XRSTORS64:
37255     case IX86_BUILTIN_XSAVEC:
37256     case IX86_BUILTIN_XSAVEC64:
37257       arg0 = CALL_EXPR_ARG (exp, 0);
37258       arg1 = CALL_EXPR_ARG (exp, 1);
37259       op0 = expand_normal (arg0);
37260       op1 = expand_normal (arg1);
37261 
37262       if (!address_operand (op0, VOIDmode))
37263 	{
37264 	  op0 = convert_memory_address (Pmode, op0);
37265 	  op0 = copy_addr_to_reg (op0);
37266 	}
37267       op0 = gen_rtx_MEM (BLKmode, op0);
37268 
37269       op1 = force_reg (DImode, op1);
37270 
37271       if (TARGET_64BIT)
37272 	{
37273 	  op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
37274 				     NULL, 1, OPTAB_DIRECT);
37275 	  switch (fcode)
37276 	    {
37277 	    case IX86_BUILTIN_XSAVE:
37278 	      icode = CODE_FOR_xsave_rex64;
37279 	      break;
37280 	    case IX86_BUILTIN_XRSTOR:
37281 	      icode = CODE_FOR_xrstor_rex64;
37282 	      break;
37283 	    case IX86_BUILTIN_XSAVE64:
37284 	      icode = CODE_FOR_xsave64;
37285 	      break;
37286 	    case IX86_BUILTIN_XRSTOR64:
37287 	      icode = CODE_FOR_xrstor64;
37288 	      break;
37289 	    case IX86_BUILTIN_XSAVEOPT:
37290 	      icode = CODE_FOR_xsaveopt_rex64;
37291 	      break;
37292 	    case IX86_BUILTIN_XSAVEOPT64:
37293 	      icode = CODE_FOR_xsaveopt64;
37294 	      break;
37295 	    case IX86_BUILTIN_XSAVES:
37296 	      icode = CODE_FOR_xsaves_rex64;
37297 	      break;
37298 	    case IX86_BUILTIN_XRSTORS:
37299 	      icode = CODE_FOR_xrstors_rex64;
37300 	      break;
37301 	    case IX86_BUILTIN_XSAVES64:
37302 	      icode = CODE_FOR_xsaves64;
37303 	      break;
37304 	    case IX86_BUILTIN_XRSTORS64:
37305 	      icode = CODE_FOR_xrstors64;
37306 	      break;
37307 	    case IX86_BUILTIN_XSAVEC:
37308 	      icode = CODE_FOR_xsavec_rex64;
37309 	      break;
37310 	    case IX86_BUILTIN_XSAVEC64:
37311 	      icode = CODE_FOR_xsavec64;
37312 	      break;
37313 	    default:
37314 	      gcc_unreachable ();
37315 	    }
37316 
37317 	  op2 = gen_lowpart (SImode, op2);
37318 	  op1 = gen_lowpart (SImode, op1);
37319 	  pat = GEN_FCN (icode) (op0, op1, op2);
37320 	}
37321       else
37322 	{
37323 	  switch (fcode)
37324 	    {
37325 	    case IX86_BUILTIN_XSAVE:
37326 	      icode = CODE_FOR_xsave;
37327 	      break;
37328 	    case IX86_BUILTIN_XRSTOR:
37329 	      icode = CODE_FOR_xrstor;
37330 	      break;
37331 	    case IX86_BUILTIN_XSAVEOPT:
37332 	      icode = CODE_FOR_xsaveopt;
37333 	      break;
37334 	    case IX86_BUILTIN_XSAVES:
37335 	      icode = CODE_FOR_xsaves;
37336 	      break;
37337 	    case IX86_BUILTIN_XRSTORS:
37338 	      icode = CODE_FOR_xrstors;
37339 	      break;
37340 	    case IX86_BUILTIN_XSAVEC:
37341 	      icode = CODE_FOR_xsavec;
37342 	      break;
37343 	    default:
37344 	      gcc_unreachable ();
37345 	    }
37346 	  pat = GEN_FCN (icode) (op0, op1);
37347 	}
37348 
37349       if (pat)
37350 	emit_insn (pat);
37351       return 0;
37352 
37353     case IX86_BUILTIN_LLWPCB:
37354       arg0 = CALL_EXPR_ARG (exp, 0);
37355       op0 = expand_normal (arg0);
37356       icode = CODE_FOR_lwp_llwpcb;
37357       if (!insn_data[icode].operand[0].predicate (op0, Pmode))
37358 	op0 = ix86_zero_extend_to_Pmode (op0);
37359       emit_insn (gen_lwp_llwpcb (op0));
37360       return 0;
37361 
37362     case IX86_BUILTIN_SLWPCB:
37363       icode = CODE_FOR_lwp_slwpcb;
37364       if (!target
37365 	  || !insn_data[icode].operand[0].predicate (target, Pmode))
37366 	target = gen_reg_rtx (Pmode);
37367       emit_insn (gen_lwp_slwpcb (target));
37368       return target;
37369 
37370     case IX86_BUILTIN_BEXTRI32:
37371     case IX86_BUILTIN_BEXTRI64:
37372       arg0 = CALL_EXPR_ARG (exp, 0);
37373       arg1 = CALL_EXPR_ARG (exp, 1);
37374       op0 = expand_normal (arg0);
37375       op1 = expand_normal (arg1);
37376       icode = (fcode == IX86_BUILTIN_BEXTRI32
37377 	  ? CODE_FOR_tbm_bextri_si
37378 	  : CODE_FOR_tbm_bextri_di);
37379       if (!CONST_INT_P (op1))
37380         {
37381           error ("last argument must be an immediate");
37382           return const0_rtx;
37383         }
37384       else
37385         {
37386           unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
37387           unsigned char lsb_index = INTVAL (op1) & 0xFF;
37388           op1 = GEN_INT (length);
37389           op2 = GEN_INT (lsb_index);
37390           pat = GEN_FCN (icode) (target, op0, op1, op2);
37391           if (pat)
37392             emit_insn (pat);
37393           return target;
37394         }
37395 
37396     case IX86_BUILTIN_RDRAND16_STEP:
37397       icode = CODE_FOR_rdrandhi_1;
37398       mode0 = HImode;
37399       goto rdrand_step;
37400 
37401     case IX86_BUILTIN_RDRAND32_STEP:
37402       icode = CODE_FOR_rdrandsi_1;
37403       mode0 = SImode;
37404       goto rdrand_step;
37405 
37406     case IX86_BUILTIN_RDRAND64_STEP:
37407       icode = CODE_FOR_rdranddi_1;
37408       mode0 = DImode;
37409 
37410 rdrand_step:
37411       arg0 = CALL_EXPR_ARG (exp, 0);
37412       op1 = expand_normal (arg0);
37413       if (!address_operand (op1, VOIDmode))
37414 	{
37415 	  op1 = convert_memory_address (Pmode, op1);
37416 	  op1 = copy_addr_to_reg (op1);
37417 	}
37418 
37419       op0 = gen_reg_rtx (mode0);
37420       emit_insn (GEN_FCN (icode) (op0));
37421 
37422       emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
37423 
37424       op1 = gen_reg_rtx (SImode);
37425       emit_move_insn (op1, CONST1_RTX (SImode));
37426 
37427       /* Emit SImode conditional move.  */
37428       if (mode0 == HImode)
37429 	{
37430 	  if (TARGET_ZERO_EXTEND_WITH_AND
37431 	      && optimize_function_for_speed_p (cfun))
37432 	    {
37433 	      op2 = force_reg (SImode, const0_rtx);
37434 
37435 	      emit_insn (gen_movstricthi
37436 			 (gen_lowpart (HImode, op2), op0));
37437 	    }
37438 	  else
37439 	    {
37440 	      op2 = gen_reg_rtx (SImode);
37441 
37442 	      emit_insn (gen_zero_extendhisi2 (op2, op0));
37443 	    }
37444 	}
37445       else if (mode0 == SImode)
37446 	op2 = op0;
37447       else
37448 	op2 = gen_rtx_SUBREG (SImode, op0, 0);
37449 
37450       if (target == 0
37451 	  || !register_operand (target, SImode))
37452 	target = gen_reg_rtx (SImode);
37453 
37454       pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
37455 			 const0_rtx);
37456       emit_insn (gen_rtx_SET (target,
37457 			      gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
37458       return target;
37459 
37460     case IX86_BUILTIN_RDSEED16_STEP:
37461       icode = CODE_FOR_rdseedhi_1;
37462       mode0 = HImode;
37463       goto rdseed_step;
37464 
37465     case IX86_BUILTIN_RDSEED32_STEP:
37466       icode = CODE_FOR_rdseedsi_1;
37467       mode0 = SImode;
37468       goto rdseed_step;
37469 
37470     case IX86_BUILTIN_RDSEED64_STEP:
37471       icode = CODE_FOR_rdseeddi_1;
37472       mode0 = DImode;
37473 
37474 rdseed_step:
37475       arg0 = CALL_EXPR_ARG (exp, 0);
37476       op1 = expand_normal (arg0);
37477       if (!address_operand (op1, VOIDmode))
37478 	{
37479 	  op1 = convert_memory_address (Pmode, op1);
37480 	  op1 = copy_addr_to_reg (op1);
37481 	}
37482 
37483       op0 = gen_reg_rtx (mode0);
37484       emit_insn (GEN_FCN (icode) (op0));
37485 
37486       emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
37487 
37488       op2 = gen_reg_rtx (QImode);
37489 
37490       pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
37491                          const0_rtx);
37492       emit_insn (gen_rtx_SET (op2, pat));
37493 
37494       if (target == 0
37495 	  || !register_operand (target, SImode))
37496         target = gen_reg_rtx (SImode);
37497 
37498       emit_insn (gen_zero_extendqisi2 (target, op2));
37499       return target;
37500 
37501     case IX86_BUILTIN_SBB32:
37502       icode = CODE_FOR_subborrowsi;
37503       icode2 = CODE_FOR_subborrowsi_0;
37504       mode0 = SImode;
37505       mode1 = DImode;
37506       mode2 = CCmode;
37507       goto handlecarry;
37508 
37509     case IX86_BUILTIN_SBB64:
37510       icode = CODE_FOR_subborrowdi;
37511       icode2 = CODE_FOR_subborrowdi_0;
37512       mode0 = DImode;
37513       mode1 = TImode;
37514       mode2 = CCmode;
37515       goto handlecarry;
37516 
37517     case IX86_BUILTIN_ADDCARRYX32:
37518       icode = CODE_FOR_addcarrysi;
37519       icode2 = CODE_FOR_addcarrysi_0;
37520       mode0 = SImode;
37521       mode1 = DImode;
37522       mode2 = CCCmode;
37523       goto handlecarry;
37524 
37525     case IX86_BUILTIN_ADDCARRYX64:
37526       icode = CODE_FOR_addcarrydi;
37527       icode2 = CODE_FOR_addcarrydi_0;
37528       mode0 = DImode;
37529       mode1 = TImode;
37530       mode2 = CCCmode;
37531 
37532     handlecarry:
37533       arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in.  */
37534       arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1.  */
37535       arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2.  */
37536       arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out.  */
37537 
37538       op1 = expand_normal (arg0);
37539       if (!integer_zerop (arg0))
37540 	op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
37541 
37542       op2 = expand_normal (arg1);
37543       if (!register_operand (op2, mode0))
37544 	op2 = copy_to_mode_reg (mode0, op2);
37545 
37546       op3 = expand_normal (arg2);
37547       if (!register_operand (op3, mode0))
37548 	op3 = copy_to_mode_reg (mode0, op3);
37549 
37550       op4 = expand_normal (arg3);
37551       if (!address_operand (op4, VOIDmode))
37552 	{
37553 	  op4 = convert_memory_address (Pmode, op4);
37554 	  op4 = copy_addr_to_reg (op4);
37555 	}
37556 
37557       op0 = gen_reg_rtx (mode0);
37558       if (integer_zerop (arg0))
37559 	{
37560 	  /* If arg0 is 0, optimize right away into add or sub
37561 	     instruction that sets CCCmode flags.  */
37562 	  op1 = gen_rtx_REG (mode2, FLAGS_REG);
37563 	  emit_insn (GEN_FCN (icode2) (op0, op2, op3));
37564 	}
37565       else
37566 	{
37567 	  /* Generate CF from input operand.  */
37568 	  emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
37569 
37570 	  /* Generate instruction that consumes CF.  */
37571 	  op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
37572 	  pat = gen_rtx_LTU (mode1, op1, const0_rtx);
37573 	  pat2 = gen_rtx_LTU (mode0, op1, const0_rtx);
37574 	  emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2));
37575 	}
37576 
37577       /* Return current CF value.  */
37578       if (target == 0)
37579         target = gen_reg_rtx (QImode);
37580 
37581       pat = gen_rtx_LTU (QImode, op1, const0_rtx);
37582       emit_insn (gen_rtx_SET (target, pat));
37583 
37584       /* Store the result.  */
37585       emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
37586 
37587       return target;
37588 
37589     case IX86_BUILTIN_READ_FLAGS:
37590       emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
37591 
37592       if (optimize
37593 	  || target == NULL_RTX
37594 	  || !nonimmediate_operand (target, word_mode)
37595 	  || GET_MODE (target) != word_mode)
37596 	target = gen_reg_rtx (word_mode);
37597 
37598       emit_insn (gen_pop (target));
37599       return target;
37600 
37601     case IX86_BUILTIN_WRITE_FLAGS:
37602 
37603       arg0 = CALL_EXPR_ARG (exp, 0);
37604       op0 = expand_normal (arg0);
37605       if (!general_no_elim_operand (op0, word_mode))
37606 	op0 = copy_to_mode_reg (word_mode, op0);
37607 
37608       emit_insn (gen_push (op0));
37609       emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
37610       return 0;
37611 
37612     case IX86_BUILTIN_KTESTC8:
37613       icode = CODE_FOR_ktestqi;
37614       mode3 = CCCmode;
37615       goto kortest;
37616 
37617     case IX86_BUILTIN_KTESTZ8:
37618       icode = CODE_FOR_ktestqi;
37619       mode3 = CCZmode;
37620       goto kortest;
37621 
37622     case IX86_BUILTIN_KTESTC16:
37623       icode = CODE_FOR_ktesthi;
37624       mode3 = CCCmode;
37625       goto kortest;
37626 
37627     case IX86_BUILTIN_KTESTZ16:
37628       icode = CODE_FOR_ktesthi;
37629       mode3 = CCZmode;
37630       goto kortest;
37631 
37632     case IX86_BUILTIN_KTESTC32:
37633       icode = CODE_FOR_ktestsi;
37634       mode3 = CCCmode;
37635       goto kortest;
37636 
37637     case IX86_BUILTIN_KTESTZ32:
37638       icode = CODE_FOR_ktestsi;
37639       mode3 = CCZmode;
37640       goto kortest;
37641 
37642     case IX86_BUILTIN_KTESTC64:
37643       icode = CODE_FOR_ktestdi;
37644       mode3 = CCCmode;
37645       goto kortest;
37646 
37647     case IX86_BUILTIN_KTESTZ64:
37648       icode = CODE_FOR_ktestdi;
37649       mode3 = CCZmode;
37650       goto kortest;
37651 
37652     case IX86_BUILTIN_KORTESTC8:
37653       icode = CODE_FOR_kortestqi;
37654       mode3 = CCCmode;
37655       goto kortest;
37656 
37657     case IX86_BUILTIN_KORTESTZ8:
37658       icode = CODE_FOR_kortestqi;
37659       mode3 = CCZmode;
37660       goto kortest;
37661 
37662     case IX86_BUILTIN_KORTESTC16:
37663       icode = CODE_FOR_kortesthi;
37664       mode3 = CCCmode;
37665       goto kortest;
37666 
37667     case IX86_BUILTIN_KORTESTZ16:
37668       icode = CODE_FOR_kortesthi;
37669       mode3 = CCZmode;
37670       goto kortest;
37671 
37672     case IX86_BUILTIN_KORTESTC32:
37673       icode = CODE_FOR_kortestsi;
37674       mode3 = CCCmode;
37675       goto kortest;
37676 
37677     case IX86_BUILTIN_KORTESTZ32:
37678       icode = CODE_FOR_kortestsi;
37679       mode3 = CCZmode;
37680       goto kortest;
37681 
37682     case IX86_BUILTIN_KORTESTC64:
37683       icode = CODE_FOR_kortestdi;
37684       mode3 = CCCmode;
37685       goto kortest;
37686 
37687     case IX86_BUILTIN_KORTESTZ64:
37688       icode = CODE_FOR_kortestdi;
37689       mode3 = CCZmode;
37690 
37691     kortest:
37692       arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1.  */
37693       arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2.  */
37694       op0 = expand_normal (arg0);
37695       op1 = expand_normal (arg1);
37696 
37697       mode0 = insn_data[icode].operand[0].mode;
37698       mode1 = insn_data[icode].operand[1].mode;
37699 
37700       if (GET_MODE (op0) != VOIDmode)
37701 	op0 = force_reg (GET_MODE (op0), op0);
37702 
37703       op0 = gen_lowpart (mode0, op0);
37704 
37705       if (!insn_data[icode].operand[0].predicate (op0, mode0))
37706 	op0 = copy_to_mode_reg (mode0, op0);
37707 
37708       if (GET_MODE (op1) != VOIDmode)
37709 	op1 = force_reg (GET_MODE (op1), op1);
37710 
37711       op1 = gen_lowpart (mode1, op1);
37712 
37713       if (!insn_data[icode].operand[1].predicate (op1, mode1))
37714 	op1 = copy_to_mode_reg (mode1, op1);
37715 
37716       target = gen_reg_rtx (QImode);
37717 
37718       /* Emit kortest.  */
37719       emit_insn (GEN_FCN (icode) (op0, op1));
37720       /* And use setcc to return result from flags.  */
37721       ix86_expand_setcc (target, EQ,
37722 			 gen_rtx_REG (mode3, FLAGS_REG), const0_rtx);
37723       return target;
37724 
37725     case IX86_BUILTIN_GATHERSIV2DF:
37726       icode = CODE_FOR_avx2_gathersiv2df;
37727       goto gather_gen;
37728     case IX86_BUILTIN_GATHERSIV4DF:
37729       icode = CODE_FOR_avx2_gathersiv4df;
37730       goto gather_gen;
37731     case IX86_BUILTIN_GATHERDIV2DF:
37732       icode = CODE_FOR_avx2_gatherdiv2df;
37733       goto gather_gen;
37734     case IX86_BUILTIN_GATHERDIV4DF:
37735       icode = CODE_FOR_avx2_gatherdiv4df;
37736       goto gather_gen;
37737     case IX86_BUILTIN_GATHERSIV4SF:
37738       icode = CODE_FOR_avx2_gathersiv4sf;
37739       goto gather_gen;
37740     case IX86_BUILTIN_GATHERSIV8SF:
37741       icode = CODE_FOR_avx2_gathersiv8sf;
37742       goto gather_gen;
37743     case IX86_BUILTIN_GATHERDIV4SF:
37744       icode = CODE_FOR_avx2_gatherdiv4sf;
37745       goto gather_gen;
37746     case IX86_BUILTIN_GATHERDIV8SF:
37747       icode = CODE_FOR_avx2_gatherdiv8sf;
37748       goto gather_gen;
37749     case IX86_BUILTIN_GATHERSIV2DI:
37750       icode = CODE_FOR_avx2_gathersiv2di;
37751       goto gather_gen;
37752     case IX86_BUILTIN_GATHERSIV4DI:
37753       icode = CODE_FOR_avx2_gathersiv4di;
37754       goto gather_gen;
37755     case IX86_BUILTIN_GATHERDIV2DI:
37756       icode = CODE_FOR_avx2_gatherdiv2di;
37757       goto gather_gen;
37758     case IX86_BUILTIN_GATHERDIV4DI:
37759       icode = CODE_FOR_avx2_gatherdiv4di;
37760       goto gather_gen;
37761     case IX86_BUILTIN_GATHERSIV4SI:
37762       icode = CODE_FOR_avx2_gathersiv4si;
37763       goto gather_gen;
37764     case IX86_BUILTIN_GATHERSIV8SI:
37765       icode = CODE_FOR_avx2_gathersiv8si;
37766       goto gather_gen;
37767     case IX86_BUILTIN_GATHERDIV4SI:
37768       icode = CODE_FOR_avx2_gatherdiv4si;
37769       goto gather_gen;
37770     case IX86_BUILTIN_GATHERDIV8SI:
37771       icode = CODE_FOR_avx2_gatherdiv8si;
37772       goto gather_gen;
37773     case IX86_BUILTIN_GATHERALTSIV4DF:
37774       icode = CODE_FOR_avx2_gathersiv4df;
37775       goto gather_gen;
37776     case IX86_BUILTIN_GATHERALTDIV8SF:
37777       icode = CODE_FOR_avx2_gatherdiv8sf;
37778       goto gather_gen;
37779     case IX86_BUILTIN_GATHERALTSIV4DI:
37780       icode = CODE_FOR_avx2_gathersiv4di;
37781       goto gather_gen;
37782     case IX86_BUILTIN_GATHERALTDIV8SI:
37783       icode = CODE_FOR_avx2_gatherdiv8si;
37784       goto gather_gen;
37785     case IX86_BUILTIN_GATHER3SIV16SF:
37786       icode = CODE_FOR_avx512f_gathersiv16sf;
37787       goto gather_gen;
37788     case IX86_BUILTIN_GATHER3SIV8DF:
37789       icode = CODE_FOR_avx512f_gathersiv8df;
37790       goto gather_gen;
37791     case IX86_BUILTIN_GATHER3DIV16SF:
37792       icode = CODE_FOR_avx512f_gatherdiv16sf;
37793       goto gather_gen;
37794     case IX86_BUILTIN_GATHER3DIV8DF:
37795       icode = CODE_FOR_avx512f_gatherdiv8df;
37796       goto gather_gen;
37797     case IX86_BUILTIN_GATHER3SIV16SI:
37798       icode = CODE_FOR_avx512f_gathersiv16si;
37799       goto gather_gen;
37800     case IX86_BUILTIN_GATHER3SIV8DI:
37801       icode = CODE_FOR_avx512f_gathersiv8di;
37802       goto gather_gen;
37803     case IX86_BUILTIN_GATHER3DIV16SI:
37804       icode = CODE_FOR_avx512f_gatherdiv16si;
37805       goto gather_gen;
37806     case IX86_BUILTIN_GATHER3DIV8DI:
37807       icode = CODE_FOR_avx512f_gatherdiv8di;
37808       goto gather_gen;
37809     case IX86_BUILTIN_GATHER3ALTSIV8DF:
37810       icode = CODE_FOR_avx512f_gathersiv8df;
37811       goto gather_gen;
37812     case IX86_BUILTIN_GATHER3ALTDIV16SF:
37813       icode = CODE_FOR_avx512f_gatherdiv16sf;
37814       goto gather_gen;
37815     case IX86_BUILTIN_GATHER3ALTSIV8DI:
37816       icode = CODE_FOR_avx512f_gathersiv8di;
37817       goto gather_gen;
37818     case IX86_BUILTIN_GATHER3ALTDIV16SI:
37819       icode = CODE_FOR_avx512f_gatherdiv16si;
37820       goto gather_gen;
37821     case IX86_BUILTIN_GATHER3SIV2DF:
37822       icode = CODE_FOR_avx512vl_gathersiv2df;
37823       goto gather_gen;
37824     case IX86_BUILTIN_GATHER3SIV4DF:
37825       icode = CODE_FOR_avx512vl_gathersiv4df;
37826       goto gather_gen;
37827     case IX86_BUILTIN_GATHER3DIV2DF:
37828       icode = CODE_FOR_avx512vl_gatherdiv2df;
37829       goto gather_gen;
37830     case IX86_BUILTIN_GATHER3DIV4DF:
37831       icode = CODE_FOR_avx512vl_gatherdiv4df;
37832       goto gather_gen;
37833     case IX86_BUILTIN_GATHER3SIV4SF:
37834       icode = CODE_FOR_avx512vl_gathersiv4sf;
37835       goto gather_gen;
37836     case IX86_BUILTIN_GATHER3SIV8SF:
37837       icode = CODE_FOR_avx512vl_gathersiv8sf;
37838       goto gather_gen;
37839     case IX86_BUILTIN_GATHER3DIV4SF:
37840       icode = CODE_FOR_avx512vl_gatherdiv4sf;
37841       goto gather_gen;
37842     case IX86_BUILTIN_GATHER3DIV8SF:
37843       icode = CODE_FOR_avx512vl_gatherdiv8sf;
37844       goto gather_gen;
37845     case IX86_BUILTIN_GATHER3SIV2DI:
37846       icode = CODE_FOR_avx512vl_gathersiv2di;
37847       goto gather_gen;
37848     case IX86_BUILTIN_GATHER3SIV4DI:
37849       icode = CODE_FOR_avx512vl_gathersiv4di;
37850       goto gather_gen;
37851     case IX86_BUILTIN_GATHER3DIV2DI:
37852       icode = CODE_FOR_avx512vl_gatherdiv2di;
37853       goto gather_gen;
37854     case IX86_BUILTIN_GATHER3DIV4DI:
37855       icode = CODE_FOR_avx512vl_gatherdiv4di;
37856       goto gather_gen;
37857     case IX86_BUILTIN_GATHER3SIV4SI:
37858       icode = CODE_FOR_avx512vl_gathersiv4si;
37859       goto gather_gen;
37860     case IX86_BUILTIN_GATHER3SIV8SI:
37861       icode = CODE_FOR_avx512vl_gathersiv8si;
37862       goto gather_gen;
37863     case IX86_BUILTIN_GATHER3DIV4SI:
37864       icode = CODE_FOR_avx512vl_gatherdiv4si;
37865       goto gather_gen;
37866     case IX86_BUILTIN_GATHER3DIV8SI:
37867       icode = CODE_FOR_avx512vl_gatherdiv8si;
37868       goto gather_gen;
37869     case IX86_BUILTIN_GATHER3ALTSIV4DF:
37870       icode = CODE_FOR_avx512vl_gathersiv4df;
37871       goto gather_gen;
37872     case IX86_BUILTIN_GATHER3ALTDIV8SF:
37873       icode = CODE_FOR_avx512vl_gatherdiv8sf;
37874       goto gather_gen;
37875     case IX86_BUILTIN_GATHER3ALTSIV4DI:
37876       icode = CODE_FOR_avx512vl_gathersiv4di;
37877       goto gather_gen;
37878     case IX86_BUILTIN_GATHER3ALTDIV8SI:
37879       icode = CODE_FOR_avx512vl_gatherdiv8si;
37880       goto gather_gen;
37881     case IX86_BUILTIN_SCATTERSIV16SF:
37882       icode = CODE_FOR_avx512f_scattersiv16sf;
37883       goto scatter_gen;
37884     case IX86_BUILTIN_SCATTERSIV8DF:
37885       icode = CODE_FOR_avx512f_scattersiv8df;
37886       goto scatter_gen;
37887     case IX86_BUILTIN_SCATTERDIV16SF:
37888       icode = CODE_FOR_avx512f_scatterdiv16sf;
37889       goto scatter_gen;
37890     case IX86_BUILTIN_SCATTERDIV8DF:
37891       icode = CODE_FOR_avx512f_scatterdiv8df;
37892       goto scatter_gen;
37893     case IX86_BUILTIN_SCATTERSIV16SI:
37894       icode = CODE_FOR_avx512f_scattersiv16si;
37895       goto scatter_gen;
37896     case IX86_BUILTIN_SCATTERSIV8DI:
37897       icode = CODE_FOR_avx512f_scattersiv8di;
37898       goto scatter_gen;
37899     case IX86_BUILTIN_SCATTERDIV16SI:
37900       icode = CODE_FOR_avx512f_scatterdiv16si;
37901       goto scatter_gen;
37902     case IX86_BUILTIN_SCATTERDIV8DI:
37903       icode = CODE_FOR_avx512f_scatterdiv8di;
37904       goto scatter_gen;
37905     case IX86_BUILTIN_SCATTERSIV8SF:
37906       icode = CODE_FOR_avx512vl_scattersiv8sf;
37907       goto scatter_gen;
37908     case IX86_BUILTIN_SCATTERSIV4SF:
37909       icode = CODE_FOR_avx512vl_scattersiv4sf;
37910       goto scatter_gen;
37911     case IX86_BUILTIN_SCATTERSIV4DF:
37912       icode = CODE_FOR_avx512vl_scattersiv4df;
37913       goto scatter_gen;
37914     case IX86_BUILTIN_SCATTERSIV2DF:
37915       icode = CODE_FOR_avx512vl_scattersiv2df;
37916       goto scatter_gen;
37917     case IX86_BUILTIN_SCATTERDIV8SF:
37918       icode = CODE_FOR_avx512vl_scatterdiv8sf;
37919       goto scatter_gen;
37920     case IX86_BUILTIN_SCATTERDIV4SF:
37921       icode = CODE_FOR_avx512vl_scatterdiv4sf;
37922       goto scatter_gen;
37923     case IX86_BUILTIN_SCATTERDIV4DF:
37924       icode = CODE_FOR_avx512vl_scatterdiv4df;
37925       goto scatter_gen;
37926     case IX86_BUILTIN_SCATTERDIV2DF:
37927       icode = CODE_FOR_avx512vl_scatterdiv2df;
37928       goto scatter_gen;
37929     case IX86_BUILTIN_SCATTERSIV8SI:
37930       icode = CODE_FOR_avx512vl_scattersiv8si;
37931       goto scatter_gen;
37932     case IX86_BUILTIN_SCATTERSIV4SI:
37933       icode = CODE_FOR_avx512vl_scattersiv4si;
37934       goto scatter_gen;
37935     case IX86_BUILTIN_SCATTERSIV4DI:
37936       icode = CODE_FOR_avx512vl_scattersiv4di;
37937       goto scatter_gen;
37938     case IX86_BUILTIN_SCATTERSIV2DI:
37939       icode = CODE_FOR_avx512vl_scattersiv2di;
37940       goto scatter_gen;
37941     case IX86_BUILTIN_SCATTERDIV8SI:
37942       icode = CODE_FOR_avx512vl_scatterdiv8si;
37943       goto scatter_gen;
37944     case IX86_BUILTIN_SCATTERDIV4SI:
37945       icode = CODE_FOR_avx512vl_scatterdiv4si;
37946       goto scatter_gen;
37947     case IX86_BUILTIN_SCATTERDIV4DI:
37948       icode = CODE_FOR_avx512vl_scatterdiv4di;
37949       goto scatter_gen;
37950     case IX86_BUILTIN_SCATTERDIV2DI:
37951       icode = CODE_FOR_avx512vl_scatterdiv2di;
37952       goto scatter_gen;
37953     case IX86_BUILTIN_GATHERPFDPD:
37954       icode = CODE_FOR_avx512pf_gatherpfv8sidf;
37955       goto vec_prefetch_gen;
37956     case IX86_BUILTIN_SCATTERALTSIV8DF:
37957       icode = CODE_FOR_avx512f_scattersiv8df;
37958       goto scatter_gen;
37959     case IX86_BUILTIN_SCATTERALTDIV16SF:
37960       icode = CODE_FOR_avx512f_scatterdiv16sf;
37961       goto scatter_gen;
37962     case IX86_BUILTIN_SCATTERALTSIV8DI:
37963       icode = CODE_FOR_avx512f_scattersiv8di;
37964       goto scatter_gen;
37965     case IX86_BUILTIN_SCATTERALTDIV16SI:
37966       icode = CODE_FOR_avx512f_scatterdiv16si;
37967       goto scatter_gen;
37968     case IX86_BUILTIN_GATHERPFDPS:
37969       icode = CODE_FOR_avx512pf_gatherpfv16sisf;
37970       goto vec_prefetch_gen;
37971     case IX86_BUILTIN_GATHERPFQPD:
37972       icode = CODE_FOR_avx512pf_gatherpfv8didf;
37973       goto vec_prefetch_gen;
37974     case IX86_BUILTIN_GATHERPFQPS:
37975       icode = CODE_FOR_avx512pf_gatherpfv8disf;
37976       goto vec_prefetch_gen;
37977     case IX86_BUILTIN_SCATTERPFDPD:
37978       icode = CODE_FOR_avx512pf_scatterpfv8sidf;
37979       goto vec_prefetch_gen;
37980     case IX86_BUILTIN_SCATTERPFDPS:
37981       icode = CODE_FOR_avx512pf_scatterpfv16sisf;
37982       goto vec_prefetch_gen;
37983     case IX86_BUILTIN_SCATTERPFQPD:
37984       icode = CODE_FOR_avx512pf_scatterpfv8didf;
37985       goto vec_prefetch_gen;
37986     case IX86_BUILTIN_SCATTERPFQPS:
37987       icode = CODE_FOR_avx512pf_scatterpfv8disf;
37988       goto vec_prefetch_gen;
37989 
37990     gather_gen:
37991       rtx half;
37992       rtx (*gen) (rtx, rtx);
37993 
37994       arg0 = CALL_EXPR_ARG (exp, 0);
37995       arg1 = CALL_EXPR_ARG (exp, 1);
37996       arg2 = CALL_EXPR_ARG (exp, 2);
37997       arg3 = CALL_EXPR_ARG (exp, 3);
37998       arg4 = CALL_EXPR_ARG (exp, 4);
37999       op0 = expand_normal (arg0);
38000       op1 = expand_normal (arg1);
38001       op2 = expand_normal (arg2);
38002       op3 = expand_normal (arg3);
38003       op4 = expand_normal (arg4);
38004       /* Note the arg order is different from the operand order.  */
38005       mode0 = insn_data[icode].operand[1].mode;
38006       mode2 = insn_data[icode].operand[3].mode;
38007       mode3 = insn_data[icode].operand[4].mode;
38008       mode4 = insn_data[icode].operand[5].mode;
38009 
38010       if (target == NULL_RTX
38011 	  || GET_MODE (target) != insn_data[icode].operand[0].mode
38012 	  || !insn_data[icode].operand[0].predicate (target,
38013 						     GET_MODE (target)))
38014 	subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
38015       else
38016 	subtarget = target;
38017 
38018       switch (fcode)
38019 	{
38020 	case IX86_BUILTIN_GATHER3ALTSIV8DF:
38021 	case IX86_BUILTIN_GATHER3ALTSIV8DI:
38022 	  half = gen_reg_rtx (V8SImode);
38023 	  if (!nonimmediate_operand (op2, V16SImode))
38024 	    op2 = copy_to_mode_reg (V16SImode, op2);
38025 	  emit_insn (gen_vec_extract_lo_v16si (half, op2));
38026 	  op2 = half;
38027 	  break;
38028 	case IX86_BUILTIN_GATHER3ALTSIV4DF:
38029 	case IX86_BUILTIN_GATHER3ALTSIV4DI:
38030 	case IX86_BUILTIN_GATHERALTSIV4DF:
38031 	case IX86_BUILTIN_GATHERALTSIV4DI:
38032 	  half = gen_reg_rtx (V4SImode);
38033 	  if (!nonimmediate_operand (op2, V8SImode))
38034 	    op2 = copy_to_mode_reg (V8SImode, op2);
38035 	  emit_insn (gen_vec_extract_lo_v8si (half, op2));
38036 	  op2 = half;
38037 	  break;
38038 	case IX86_BUILTIN_GATHER3ALTDIV16SF:
38039 	case IX86_BUILTIN_GATHER3ALTDIV16SI:
38040 	  half = gen_reg_rtx (mode0);
38041 	  if (mode0 == V8SFmode)
38042 	    gen = gen_vec_extract_lo_v16sf;
38043 	  else
38044 	    gen = gen_vec_extract_lo_v16si;
38045 	  if (!nonimmediate_operand (op0, GET_MODE (op0)))
38046 	    op0 = copy_to_mode_reg (GET_MODE (op0), op0);
38047 	  emit_insn (gen (half, op0));
38048 	  op0 = half;
38049 	  if (GET_MODE (op3) != VOIDmode)
38050 	    {
38051 	      if (!nonimmediate_operand (op3, GET_MODE (op3)))
38052 		op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38053 	      emit_insn (gen (half, op3));
38054 	      op3 = half;
38055 	    }
38056 	  break;
38057 	case IX86_BUILTIN_GATHER3ALTDIV8SF:
38058 	case IX86_BUILTIN_GATHER3ALTDIV8SI:
38059 	case IX86_BUILTIN_GATHERALTDIV8SF:
38060 	case IX86_BUILTIN_GATHERALTDIV8SI:
38061 	  half = gen_reg_rtx (mode0);
38062 	  if (mode0 == V4SFmode)
38063 	    gen = gen_vec_extract_lo_v8sf;
38064 	  else
38065 	    gen = gen_vec_extract_lo_v8si;
38066 	  if (!nonimmediate_operand (op0, GET_MODE (op0)))
38067 	    op0 = copy_to_mode_reg (GET_MODE (op0), op0);
38068 	  emit_insn (gen (half, op0));
38069 	  op0 = half;
38070 	  if (GET_MODE (op3) != VOIDmode)
38071 	    {
38072 	      if (!nonimmediate_operand (op3, GET_MODE (op3)))
38073 		op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38074 	      emit_insn (gen (half, op3));
38075 	      op3 = half;
38076 	    }
38077 	  break;
38078 	default:
38079 	  break;
38080 	}
38081 
38082       /* Force memory operand only with base register here.  But we
38083 	 don't want to do it on memory operand for other builtin
38084 	 functions.  */
38085       op1 = ix86_zero_extend_to_Pmode (op1);
38086 
38087       if (!insn_data[icode].operand[1].predicate (op0, mode0))
38088 	op0 = copy_to_mode_reg (mode0, op0);
38089       if (!insn_data[icode].operand[2].predicate (op1, Pmode))
38090 	op1 = copy_to_mode_reg (Pmode, op1);
38091       if (!insn_data[icode].operand[3].predicate (op2, mode2))
38092 	op2 = copy_to_mode_reg (mode2, op2);
38093 
38094       op3 = fixup_modeless_constant (op3, mode3);
38095 
38096       if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
38097 	{
38098 	  if (!insn_data[icode].operand[4].predicate (op3, mode3))
38099 	    op3 = copy_to_mode_reg (mode3, op3);
38100 	}
38101       else
38102 	{
38103 	  op3 = copy_to_reg (op3);
38104 	  op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
38105 	}
38106       if (!insn_data[icode].operand[5].predicate (op4, mode4))
38107 	{
38108           error ("the last argument must be scale 1, 2, 4, 8");
38109           return const0_rtx;
38110 	}
38111 
38112       /* Optimize.  If mask is known to have all high bits set,
38113 	 replace op0 with pc_rtx to signal that the instruction
38114 	 overwrites the whole destination and doesn't use its
38115 	 previous contents.  */
38116       if (optimize)
38117 	{
38118 	  if (TREE_CODE (arg3) == INTEGER_CST)
38119 	    {
38120 	      if (integer_all_onesp (arg3))
38121 		op0 = pc_rtx;
38122 	    }
38123 	  else if (TREE_CODE (arg3) == VECTOR_CST)
38124 	    {
38125 	      unsigned int negative = 0;
38126 	      for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
38127 		{
38128 		  tree cst = VECTOR_CST_ELT (arg3, i);
38129 		  if (TREE_CODE (cst) == INTEGER_CST
38130 		      && tree_int_cst_sign_bit (cst))
38131 		    negative++;
38132 		  else if (TREE_CODE (cst) == REAL_CST
38133 			   && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
38134 		    negative++;
38135 		}
38136 	      if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
38137 		op0 = pc_rtx;
38138 	    }
38139 	  else if (TREE_CODE (arg3) == SSA_NAME
38140 		   && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
38141 	    {
38142 	      /* Recognize also when mask is like:
38143 		 __v2df src = _mm_setzero_pd ();
38144 		 __v2df mask = _mm_cmpeq_pd (src, src);
38145 		 or
38146 		 __v8sf src = _mm256_setzero_ps ();
38147 		 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
38148 		 as that is a cheaper way to load all ones into
38149 		 a register than having to load a constant from
38150 		 memory.  */
38151 	      gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
38152 	      if (is_gimple_call (def_stmt))
38153 		{
38154 		  tree fndecl = gimple_call_fndecl (def_stmt);
38155 		  if (fndecl
38156 		      && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
38157 		    switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
38158 		      {
38159 		      case IX86_BUILTIN_CMPPD:
38160 		      case IX86_BUILTIN_CMPPS:
38161 		      case IX86_BUILTIN_CMPPD256:
38162 		      case IX86_BUILTIN_CMPPS256:
38163 			if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
38164 			  break;
38165 			/* FALLTHRU */
38166 		      case IX86_BUILTIN_CMPEQPD:
38167 		      case IX86_BUILTIN_CMPEQPS:
38168 			if (initializer_zerop (gimple_call_arg (def_stmt, 0))
38169 			    && initializer_zerop (gimple_call_arg (def_stmt,
38170 								   1)))
38171 			  op0 = pc_rtx;
38172 			break;
38173 		      default:
38174 			break;
38175 		      }
38176 		}
38177 	    }
38178 	}
38179 
38180       pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
38181       if (! pat)
38182 	return const0_rtx;
38183       emit_insn (pat);
38184 
38185       switch (fcode)
38186 	{
38187 	case IX86_BUILTIN_GATHER3DIV16SF:
38188 	  if (target == NULL_RTX)
38189 	    target = gen_reg_rtx (V8SFmode);
38190 	  emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
38191 	  break;
38192 	case IX86_BUILTIN_GATHER3DIV16SI:
38193 	  if (target == NULL_RTX)
38194 	    target = gen_reg_rtx (V8SImode);
38195 	  emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
38196 	  break;
38197 	case IX86_BUILTIN_GATHER3DIV8SF:
38198 	case IX86_BUILTIN_GATHERDIV8SF:
38199 	  if (target == NULL_RTX)
38200 	    target = gen_reg_rtx (V4SFmode);
38201 	  emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
38202 	  break;
38203 	case IX86_BUILTIN_GATHER3DIV8SI:
38204 	case IX86_BUILTIN_GATHERDIV8SI:
38205 	  if (target == NULL_RTX)
38206 	    target = gen_reg_rtx (V4SImode);
38207 	  emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
38208 	  break;
38209 	default:
38210 	  target = subtarget;
38211 	  break;
38212 	}
38213       return target;
38214 
38215     scatter_gen:
38216       arg0 = CALL_EXPR_ARG (exp, 0);
38217       arg1 = CALL_EXPR_ARG (exp, 1);
38218       arg2 = CALL_EXPR_ARG (exp, 2);
38219       arg3 = CALL_EXPR_ARG (exp, 3);
38220       arg4 = CALL_EXPR_ARG (exp, 4);
38221       op0 = expand_normal (arg0);
38222       op1 = expand_normal (arg1);
38223       op2 = expand_normal (arg2);
38224       op3 = expand_normal (arg3);
38225       op4 = expand_normal (arg4);
38226       mode1 = insn_data[icode].operand[1].mode;
38227       mode2 = insn_data[icode].operand[2].mode;
38228       mode3 = insn_data[icode].operand[3].mode;
38229       mode4 = insn_data[icode].operand[4].mode;
38230 
38231       /* Scatter instruction stores operand op3 to memory with
38232 	 indices from op2 and scale from op4 under writemask op1.
38233 	 If index operand op2 has more elements then source operand
38234 	 op3 one need to use only its low half. And vice versa.  */
38235       switch (fcode)
38236 	{
38237 	case IX86_BUILTIN_SCATTERALTSIV8DF:
38238 	case IX86_BUILTIN_SCATTERALTSIV8DI:
38239 	  half = gen_reg_rtx (V8SImode);
38240 	  if (!nonimmediate_operand (op2, V16SImode))
38241 	    op2 = copy_to_mode_reg (V16SImode, op2);
38242 	  emit_insn (gen_vec_extract_lo_v16si (half, op2));
38243 	  op2 = half;
38244 	  break;
38245 	case IX86_BUILTIN_SCATTERALTDIV16SF:
38246 	case IX86_BUILTIN_SCATTERALTDIV16SI:
38247 	  half = gen_reg_rtx (mode3);
38248 	  if (mode3 == V8SFmode)
38249 	    gen = gen_vec_extract_lo_v16sf;
38250 	  else
38251 	    gen = gen_vec_extract_lo_v16si;
38252 	  if (!nonimmediate_operand (op3, GET_MODE (op3)))
38253 	    op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38254 	  emit_insn (gen (half, op3));
38255 	  op3 = half;
38256 	  break;
38257 	default:
38258 	  break;
38259 	}
38260 
38261       /* Force memory operand only with base register here.  But we
38262 	 don't want to do it on memory operand for other builtin
38263 	 functions.  */
38264       op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
38265 
38266       if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38267 	op0 = copy_to_mode_reg (Pmode, op0);
38268 
38269       op1 = fixup_modeless_constant (op1, mode1);
38270 
38271       if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
38272 	{
38273 	  if (!insn_data[icode].operand[1].predicate (op1, mode1))
38274 	    op1 = copy_to_mode_reg (mode1, op1);
38275 	}
38276       else
38277 	{
38278 	  op1 = copy_to_reg (op1);
38279 	  op1 = lowpart_subreg (mode1, op1, GET_MODE (op1));
38280 	}
38281 
38282       if (!insn_data[icode].operand[2].predicate (op2, mode2))
38283 	op2 = copy_to_mode_reg (mode2, op2);
38284 
38285       if (!insn_data[icode].operand[3].predicate (op3, mode3))
38286 	op3 = copy_to_mode_reg (mode3, op3);
38287 
38288       if (!insn_data[icode].operand[4].predicate (op4, mode4))
38289 	{
38290 	  error ("the last argument must be scale 1, 2, 4, 8");
38291 	  return const0_rtx;
38292 	}
38293 
38294       pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38295       if (! pat)
38296 	return const0_rtx;
38297 
38298       emit_insn (pat);
38299       return 0;
38300 
38301     vec_prefetch_gen:
38302       arg0 = CALL_EXPR_ARG (exp, 0);
38303       arg1 = CALL_EXPR_ARG (exp, 1);
38304       arg2 = CALL_EXPR_ARG (exp, 2);
38305       arg3 = CALL_EXPR_ARG (exp, 3);
38306       arg4 = CALL_EXPR_ARG (exp, 4);
38307       op0 = expand_normal (arg0);
38308       op1 = expand_normal (arg1);
38309       op2 = expand_normal (arg2);
38310       op3 = expand_normal (arg3);
38311       op4 = expand_normal (arg4);
38312       mode0 = insn_data[icode].operand[0].mode;
38313       mode1 = insn_data[icode].operand[1].mode;
38314       mode3 = insn_data[icode].operand[3].mode;
38315       mode4 = insn_data[icode].operand[4].mode;
38316 
38317       op0 = fixup_modeless_constant (op0, mode0);
38318 
38319       if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
38320 	{
38321 	  if (!insn_data[icode].operand[0].predicate (op0, mode0))
38322 	    op0 = copy_to_mode_reg (mode0, op0);
38323 	}
38324       else
38325 	{
38326 	  op0 = copy_to_reg (op0);
38327 	  op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
38328 	}
38329 
38330       if (!insn_data[icode].operand[1].predicate (op1, mode1))
38331 	op1 = copy_to_mode_reg (mode1, op1);
38332 
38333       /* Force memory operand only with base register here.  But we
38334 	 don't want to do it on memory operand for other builtin
38335 	 functions.  */
38336       op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
38337 
38338       if (!insn_data[icode].operand[2].predicate (op2, Pmode))
38339 	op2 = copy_to_mode_reg (Pmode, op2);
38340 
38341       if (!insn_data[icode].operand[3].predicate (op3, mode3))
38342 	{
38343 	  error ("the forth argument must be scale 1, 2, 4, 8");
38344 	  return const0_rtx;
38345 	}
38346 
38347       if (!insn_data[icode].operand[4].predicate (op4, mode4))
38348 	{
38349 	  error ("incorrect hint operand");
38350 	  return const0_rtx;
38351 	}
38352 
38353       pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38354       if (! pat)
38355 	return const0_rtx;
38356 
38357       emit_insn (pat);
38358 
38359       return 0;
38360 
38361     case IX86_BUILTIN_XABORT:
38362       icode = CODE_FOR_xabort;
38363       arg0 = CALL_EXPR_ARG (exp, 0);
38364       op0 = expand_normal (arg0);
38365       mode0 = insn_data[icode].operand[0].mode;
38366       if (!insn_data[icode].operand[0].predicate (op0, mode0))
38367 	{
38368 	  error ("the xabort's argument must be an 8-bit immediate");
38369 	  return const0_rtx;
38370 	}
38371       emit_insn (gen_xabort (op0));
38372       return 0;
38373 
38374     case IX86_BUILTIN_RSTORSSP:
38375     case IX86_BUILTIN_CLRSSBSY:
38376       arg0 = CALL_EXPR_ARG (exp, 0);
38377       op0 = expand_normal (arg0);
38378       icode = (fcode == IX86_BUILTIN_RSTORSSP
38379 	  ? CODE_FOR_rstorssp
38380 	  : CODE_FOR_clrssbsy);
38381       if (!address_operand (op0, VOIDmode))
38382 	{
38383 	  op1 = convert_memory_address (Pmode, op0);
38384 	  op0 = copy_addr_to_reg (op1);
38385 	}
38386       emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0)));
38387       return 0;
38388 
38389     case IX86_BUILTIN_WRSSD:
38390     case IX86_BUILTIN_WRSSQ:
38391     case IX86_BUILTIN_WRUSSD:
38392     case IX86_BUILTIN_WRUSSQ:
38393       arg0 = CALL_EXPR_ARG (exp, 0);
38394       op0 = expand_normal (arg0);
38395       arg1 = CALL_EXPR_ARG (exp, 1);
38396       op1 = expand_normal (arg1);
38397       switch (fcode)
38398 	{
38399 	case IX86_BUILTIN_WRSSD:
38400 	  icode = CODE_FOR_wrsssi;
38401 	  mode = SImode;
38402 	  break;
38403 	case IX86_BUILTIN_WRSSQ:
38404 	  icode = CODE_FOR_wrssdi;
38405 	  mode = DImode;
38406 	  break;
38407 	case IX86_BUILTIN_WRUSSD:
38408 	  icode = CODE_FOR_wrusssi;
38409 	  mode = SImode;
38410 	  break;
38411 	case IX86_BUILTIN_WRUSSQ:
38412 	  icode = CODE_FOR_wrussdi;
38413 	  mode = DImode;
38414 	  break;
38415 	}
38416       op0 = force_reg (mode, op0);
38417       if (!address_operand (op1, VOIDmode))
38418 	{
38419 	  op2 = convert_memory_address (Pmode, op1);
38420 	  op1 = copy_addr_to_reg (op2);
38421 	}
38422       emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1)));
38423       return 0;
38424 
38425     default:
38426       break;
38427     }
38428 
38429   if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
38430       && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST)
38431     {
38432       i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST;
38433       return ix86_expand_special_args_builtin (bdesc_special_args + i, exp,
38434 					       target);
38435     }
38436 
38437   if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST
38438       && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS2_LAST)
38439     {
38440       i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS2_FIRST;
38441       return ix86_expand_special_args_builtin (bdesc_special_args2 + i, exp,
38442 					       target);
38443     }
38444 
38445   if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
38446       && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
38447     {
38448       i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST;
38449       switch (fcode)
38450 	{
38451 	case IX86_BUILTIN_FABSQ:
38452 	case IX86_BUILTIN_COPYSIGNQ:
38453 	  if (!TARGET_SSE)
38454 	    /* Emit a normal call if SSE isn't available.  */
38455 	    return expand_call (exp, target, ignore);
38456 	  /* FALLTHRU */
38457 	default:
38458 	  return ix86_expand_args_builtin (bdesc_args + i, exp, target);
38459 	}
38460     }
38461 
38462   if (fcode >= IX86_BUILTIN__BDESC_ARGS2_FIRST
38463       && fcode <= IX86_BUILTIN__BDESC_ARGS2_LAST)
38464     {
38465       i = fcode - IX86_BUILTIN__BDESC_ARGS2_FIRST;
38466       rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL;
38467       rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx);
38468       rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx);
38469       int masked = 1;
38470       machine_mode mode, wide_mode, nar_mode;
38471 
38472       nar_mode  = V4SFmode;
38473       mode      = V16SFmode;
38474       wide_mode = V64SFmode;
38475       fcn_mask  = gen_avx5124fmaddps_4fmaddps_mask;
38476       fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz;
38477 
38478       switch (fcode)
38479 	{
38480 	case IX86_BUILTIN_4FMAPS:
38481 	  fcn = gen_avx5124fmaddps_4fmaddps;
38482 	  masked = 0;
38483 	  goto v4fma_expand;
38484 
38485 	case IX86_BUILTIN_4DPWSSD:
38486 	  nar_mode  = V4SImode;
38487 	  mode      = V16SImode;
38488 	  wide_mode = V64SImode;
38489 	  fcn = gen_avx5124vnniw_vp4dpwssd;
38490 	  masked = 0;
38491 	  goto v4fma_expand;
38492 
38493 	case IX86_BUILTIN_4DPWSSDS:
38494 	  nar_mode  = V4SImode;
38495 	  mode      = V16SImode;
38496 	  wide_mode = V64SImode;
38497 	  fcn = gen_avx5124vnniw_vp4dpwssds;
38498 	  masked = 0;
38499 	  goto v4fma_expand;
38500 
38501 	case IX86_BUILTIN_4FNMAPS:
38502 	  fcn = gen_avx5124fmaddps_4fnmaddps;
38503 	  masked = 0;
38504 	  goto v4fma_expand;
38505 
38506 	case IX86_BUILTIN_4FNMAPS_MASK:
38507 	  fcn_mask  = gen_avx5124fmaddps_4fnmaddps_mask;
38508 	  fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz;
38509 	  goto v4fma_expand;
38510 
38511 	case IX86_BUILTIN_4DPWSSD_MASK:
38512 	  nar_mode  = V4SImode;
38513 	  mode      = V16SImode;
38514 	  wide_mode = V64SImode;
38515 	  fcn_mask  = gen_avx5124vnniw_vp4dpwssd_mask;
38516 	  fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz;
38517 	  goto v4fma_expand;
38518 
38519 	case IX86_BUILTIN_4DPWSSDS_MASK:
38520 	  nar_mode  = V4SImode;
38521 	  mode      = V16SImode;
38522 	  wide_mode = V64SImode;
38523 	  fcn_mask  = gen_avx5124vnniw_vp4dpwssds_mask;
38524 	  fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz;
38525 	  goto v4fma_expand;
38526 
38527 	case IX86_BUILTIN_4FMAPS_MASK:
38528 	  {
38529 	    tree args[4];
38530 	    rtx ops[4];
38531 	    rtx wide_reg;
38532 	    rtx accum;
38533 	    rtx addr;
38534 	    rtx mem;
38535 
38536 v4fma_expand:
38537 	    wide_reg = gen_reg_rtx (wide_mode);
38538 	    for (i = 0; i < 4; i++)
38539 	      {
38540 		args[i] = CALL_EXPR_ARG (exp, i);
38541 		ops[i] = expand_normal (args[i]);
38542 
38543 		emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64),
38544 				ops[i]);
38545 	      }
38546 
38547 	    accum = expand_normal (CALL_EXPR_ARG (exp, 4));
38548 	    accum = force_reg (mode, accum);
38549 
38550 	    addr = expand_normal (CALL_EXPR_ARG (exp, 5));
38551 	    addr = force_reg (Pmode, addr);
38552 
38553 	    mem = gen_rtx_MEM (nar_mode, addr);
38554 
38555 	    target = gen_reg_rtx (mode);
38556 
38557 	    emit_move_insn (target, accum);
38558 
38559 	    if (! masked)
38560 	      emit_insn (fcn (target, accum, wide_reg, mem));
38561 	    else
38562 	      {
38563 		rtx merge, mask;
38564 		merge = expand_normal (CALL_EXPR_ARG (exp, 6));
38565 
38566 		mask = expand_normal (CALL_EXPR_ARG (exp, 7));
38567 
38568 		if (CONST_INT_P (mask))
38569 		  mask = fixup_modeless_constant (mask, HImode);
38570 
38571 		mask = force_reg (HImode, mask);
38572 
38573 		if (GET_MODE (mask) != HImode)
38574 		  mask = gen_rtx_SUBREG (HImode, mask, 0);
38575 
38576 		/* If merge is 0 then we're about to emit z-masked variant.  */
38577 		if (const0_operand (merge, mode))
38578 		  emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
38579 		/* If merge is the same as accum then emit merge-masked variant.  */
38580 		else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
38581 		  {
38582 		    merge = force_reg (mode, merge);
38583 		    emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
38584 		  }
38585 		/* Merge with something unknown might happen if we z-mask w/ -O0.  */
38586 		else
38587 		  {
38588 		    target = gen_reg_rtx (mode);
38589 		    emit_move_insn (target, merge);
38590 		    emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
38591 		  }
38592 	      }
38593 	    return target;
38594 	  }
38595 
38596 	case IX86_BUILTIN_4FNMASS:
38597 	  fcn = gen_avx5124fmaddps_4fnmaddss;
38598 	  masked = 0;
38599 	  goto s4fma_expand;
38600 
38601 	case IX86_BUILTIN_4FMASS:
38602 	  fcn = gen_avx5124fmaddps_4fmaddss;
38603 	  masked = 0;
38604 	  goto s4fma_expand;
38605 
38606 	case IX86_BUILTIN_4FNMASS_MASK:
38607 	  fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask;
38608 	  fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz;
38609 	  goto s4fma_expand;
38610 
38611 	case IX86_BUILTIN_4FMASS_MASK:
38612 	  {
38613 	    tree args[4];
38614 	    rtx ops[4];
38615 	    rtx wide_reg;
38616 	    rtx accum;
38617 	    rtx addr;
38618 	    rtx mem;
38619 
38620 	    fcn_mask = gen_avx5124fmaddps_4fmaddss_mask;
38621 	    fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz;
38622 
38623 s4fma_expand:
38624 	    mode = V4SFmode;
38625 	    wide_reg = gen_reg_rtx (V64SFmode);
38626 	    for (i = 0; i < 4; i++)
38627 	      {
38628 		rtx tmp;
38629 		args[i] = CALL_EXPR_ARG (exp, i);
38630 		ops[i] = expand_normal (args[i]);
38631 
38632 		tmp = gen_reg_rtx (SFmode);
38633 		emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0));
38634 
38635 		emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64),
38636 				gen_rtx_SUBREG (V16SFmode, tmp, 0));
38637 	      }
38638 
38639 	    accum = expand_normal (CALL_EXPR_ARG (exp, 4));
38640 	    accum = force_reg (V4SFmode, accum);
38641 
38642 	    addr = expand_normal (CALL_EXPR_ARG (exp, 5));
38643 	    addr = force_reg (Pmode, addr);
38644 
38645 	    mem = gen_rtx_MEM (V4SFmode, addr);
38646 
38647 	    target = gen_reg_rtx (V4SFmode);
38648 
38649 	    emit_move_insn (target, accum);
38650 
38651 	    if (! masked)
38652 	      emit_insn (fcn (target, accum, wide_reg, mem));
38653 	    else
38654 	      {
38655 		rtx merge, mask;
38656 		merge = expand_normal (CALL_EXPR_ARG (exp, 6));
38657 
38658 		mask = expand_normal (CALL_EXPR_ARG (exp, 7));
38659 
38660 		if (CONST_INT_P (mask))
38661 		  mask = fixup_modeless_constant (mask, QImode);
38662 
38663 		mask = force_reg (QImode, mask);
38664 
38665 		if (GET_MODE (mask) != QImode)
38666 		  mask = gen_rtx_SUBREG (QImode, mask, 0);
38667 
38668 		/* If merge is 0 then we're about to emit z-masked variant.  */
38669 		if (const0_operand (merge, mode))
38670 		  emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
38671 		/* If merge is the same as accum then emit merge-masked
38672 		   variant.  */
38673 		else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
38674 		  {
38675 		    merge = force_reg (mode, merge);
38676 		    emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
38677 		  }
38678 		/* Merge with something unknown might happen if we z-mask
38679 		   w/ -O0.  */
38680 		else
38681 		  {
38682 		    target = gen_reg_rtx (mode);
38683 		    emit_move_insn (target, merge);
38684 		    emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
38685 		  }
38686 		}
38687 	      return target;
38688 	    }
38689 	  case IX86_BUILTIN_RDPID:
38690 	    return ix86_expand_special_args_builtin (bdesc_args2 + i, exp,
38691 						     target);
38692 	  default:
38693 	    return ix86_expand_args_builtin (bdesc_args2 + i, exp, target);
38694 	  }
38695     }
38696 
38697   if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST
38698       && fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
38699     {
38700       i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
38701       return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
38702     }
38703 
38704   if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
38705       && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST)
38706     {
38707       i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST;
38708       return ix86_expand_round_builtin (bdesc_round_args + i, exp, target);
38709     }
38710 
38711   if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
38712       && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST)
38713     {
38714       i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST;
38715       return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target);
38716     }
38717 
38718   if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
38719       && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST)
38720     {
38721       i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST;
38722       return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target);
38723     }
38724 
38725   if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
38726       && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST)
38727     {
38728       i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST;
38729       const struct builtin_description *d = bdesc_multi_arg + i;
38730       return ix86_expand_multi_arg_builtin (d->icode, exp, target,
38731 					    (enum ix86_builtin_func_type)
38732 					    d->flag, d->comparison);
38733     }
38734 
38735   if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST
38736       && fcode <= IX86_BUILTIN__BDESC_CET_LAST)
38737     {
38738       i = fcode - IX86_BUILTIN__BDESC_CET_FIRST;
38739       return ix86_expand_special_args_builtin (bdesc_cet + i, exp,
38740 					       target);
38741     }
38742 
38743   if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
38744       && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST)
38745     {
38746       i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST;
38747       return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp,
38748 				       target);
38749     }
38750 
38751   gcc_unreachable ();
38752 }
38753 
38754 /* This returns the target-specific builtin with code CODE if
38755    current_function_decl has visibility on this builtin, which is checked
38756    using isa flags.  Returns NULL_TREE otherwise.  */
38757 
38758 static tree ix86_get_builtin (enum ix86_builtins code)
38759 {
38760   struct cl_target_option *opts;
38761   tree target_tree = NULL_TREE;
38762 
38763   /* Determine the isa flags of current_function_decl.  */
38764 
38765   if (current_function_decl)
38766     target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
38767 
38768   if (target_tree == NULL)
38769     target_tree = target_option_default_node;
38770 
38771   opts = TREE_TARGET_OPTION (target_tree);
38772 
38773   if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
38774       || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2))
38775     return ix86_builtin_decl (code, true);
38776   else
38777     return NULL_TREE;
38778 }
38779 
38780 /* Return function decl for target specific builtin
38781    for given MPX builtin passed i FCODE.  */
38782 static tree
38783 ix86_builtin_mpx_function (unsigned fcode)
38784 {
38785   switch (fcode)
38786     {
38787     case BUILT_IN_CHKP_BNDMK:
38788       return ix86_builtins[IX86_BUILTIN_BNDMK];
38789 
38790     case BUILT_IN_CHKP_BNDSTX:
38791       return ix86_builtins[IX86_BUILTIN_BNDSTX];
38792 
38793     case BUILT_IN_CHKP_BNDLDX:
38794       return ix86_builtins[IX86_BUILTIN_BNDLDX];
38795 
38796     case BUILT_IN_CHKP_BNDCL:
38797       return ix86_builtins[IX86_BUILTIN_BNDCL];
38798 
38799     case BUILT_IN_CHKP_BNDCU:
38800       return ix86_builtins[IX86_BUILTIN_BNDCU];
38801 
38802     case BUILT_IN_CHKP_BNDRET:
38803       return ix86_builtins[IX86_BUILTIN_BNDRET];
38804 
38805     case BUILT_IN_CHKP_INTERSECT:
38806       return ix86_builtins[IX86_BUILTIN_BNDINT];
38807 
38808     case BUILT_IN_CHKP_NARROW:
38809       return ix86_builtins[IX86_BUILTIN_BNDNARROW];
38810 
38811     case BUILT_IN_CHKP_SIZEOF:
38812       return ix86_builtins[IX86_BUILTIN_SIZEOF];
38813 
38814     case BUILT_IN_CHKP_EXTRACT_LOWER:
38815       return ix86_builtins[IX86_BUILTIN_BNDLOWER];
38816 
38817     case BUILT_IN_CHKP_EXTRACT_UPPER:
38818       return ix86_builtins[IX86_BUILTIN_BNDUPPER];
38819 
38820     default:
38821       return NULL_TREE;
38822     }
38823 
38824   gcc_unreachable ();
38825 }
38826 
38827 /* Helper function for ix86_load_bounds and ix86_store_bounds.
38828 
38829    Return an address to be used to load/store bounds for pointer
38830    passed in SLOT.
38831 
38832    SLOT_NO is an integer constant holding number of a target
38833    dependent special slot to be used in case SLOT is not a memory.
38834 
38835    SPECIAL_BASE is a pointer to be used as a base of fake address
38836    to access special slots in Bounds Table.  SPECIAL_BASE[-1],
38837    SPECIAL_BASE[-2] etc. will be used as fake pointer locations.  */
38838 
38839 static rtx
38840 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
38841 {
38842   rtx addr = NULL;
38843 
38844   /* NULL slot means we pass bounds for pointer not passed to the
38845      function at all.  Register slot means we pass pointer in a
38846      register.  In both these cases bounds are passed via Bounds
38847      Table.  Since we do not have actual pointer stored in memory,
38848      we have to use fake addresses to access Bounds Table.  We
38849      start with (special_base - sizeof (void*)) and decrease this
38850      address by pointer size to get addresses for other slots.  */
38851   if (!slot || REG_P (slot))
38852     {
38853       gcc_assert (CONST_INT_P (slot_no));
38854       addr = plus_constant (Pmode, special_base,
38855 			    -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
38856     }
38857   /* If pointer is passed in a memory then its address is used to
38858      access Bounds Table.  */
38859   else if (MEM_P (slot))
38860     {
38861       addr = XEXP (slot, 0);
38862       if (!register_operand (addr, Pmode))
38863 	addr = copy_addr_to_reg (addr);
38864     }
38865   else
38866     gcc_unreachable ();
38867 
38868   return addr;
38869 }
38870 
38871 /* Expand pass uses this hook to load bounds for function parameter
38872    PTR passed in SLOT in case its bounds are not passed in a register.
38873 
38874    If SLOT is a memory, then bounds are loaded as for regular pointer
38875    loaded from memory.  PTR may be NULL in case SLOT is a memory.
38876    In such case value of PTR (if required) may be loaded from SLOT.
38877 
38878    If SLOT is NULL or a register then SLOT_NO is an integer constant
38879    holding number of the target dependent special slot which should be
38880    used to obtain bounds.
38881 
38882    Return loaded bounds.  */
38883 
38884 static rtx
38885 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
38886 {
38887   rtx reg = gen_reg_rtx (BNDmode);
38888   rtx addr;
38889 
38890   /* Get address to be used to access Bounds Table.  Special slots start
38891      at the location of return address of the current function.  */
38892   addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
38893 
38894   /* Load pointer value from a memory if we don't have it.  */
38895   if (!ptr)
38896     {
38897       gcc_assert (MEM_P (slot));
38898       ptr = copy_addr_to_reg (slot);
38899     }
38900 
38901   if (!register_operand (ptr, Pmode))
38902     ptr = ix86_zero_extend_to_Pmode (ptr);
38903 
38904   emit_insn (BNDmode == BND64mode
38905 	     ? gen_bnd64_ldx (reg, addr, ptr)
38906 	     : gen_bnd32_ldx (reg, addr, ptr));
38907 
38908   return reg;
38909 }
38910 
38911 /* Expand pass uses this hook to store BOUNDS for call argument PTR
38912    passed in SLOT in case BOUNDS are not passed in a register.
38913 
38914    If SLOT is a memory, then BOUNDS are stored as for regular pointer
38915    stored in memory.  PTR may be NULL in case SLOT is a memory.
38916    In such case value of PTR (if required) may be loaded from SLOT.
38917 
38918    If SLOT is NULL or a register then SLOT_NO is an integer constant
38919    holding number of the target dependent special slot which should be
38920    used to store BOUNDS.  */
38921 
38922 static void
38923 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
38924 {
38925   rtx addr;
38926 
38927   /* Get address to be used to access Bounds Table.  Special slots start
38928      at the location of return address of a called function.  */
38929   addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
38930 
38931   /* Load pointer value from a memory if we don't have it.  */
38932   if (!ptr)
38933     {
38934       gcc_assert (MEM_P (slot));
38935       ptr = copy_addr_to_reg (slot);
38936     }
38937 
38938   if (!register_operand (ptr, Pmode))
38939     ptr = ix86_zero_extend_to_Pmode (ptr);
38940 
38941   gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
38942   if (!register_operand (bounds, BNDmode))
38943     bounds = copy_to_mode_reg (BNDmode, bounds);
38944 
38945   emit_insn (BNDmode == BND64mode
38946 	     ? gen_bnd64_stx (addr, ptr, bounds)
38947 	     : gen_bnd32_stx (addr, ptr, bounds));
38948 }
38949 
38950 /* Load and return bounds returned by function in SLOT.  */
38951 
38952 static rtx
38953 ix86_load_returned_bounds (rtx slot)
38954 {
38955   rtx res;
38956 
38957   gcc_assert (REG_P (slot));
38958   res = gen_reg_rtx (BNDmode);
38959   emit_move_insn (res, slot);
38960 
38961   return res;
38962 }
38963 
38964 /* Store BOUNDS returned by function into SLOT.  */
38965 
38966 static void
38967 ix86_store_returned_bounds (rtx slot, rtx bounds)
38968 {
38969   gcc_assert (REG_P (slot));
38970   emit_move_insn (slot, bounds);
38971 }
38972 
38973 /* Returns a function decl for a vectorized version of the combined function
38974    with combined_fn code FN and the result vector type TYPE, or NULL_TREE
38975    if it is not available.  */
38976 
38977 static tree
38978 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
38979 				  tree type_in)
38980 {
38981   machine_mode in_mode, out_mode;
38982   int in_n, out_n;
38983 
38984   if (TREE_CODE (type_out) != VECTOR_TYPE
38985       || TREE_CODE (type_in) != VECTOR_TYPE)
38986     return NULL_TREE;
38987 
38988   out_mode = TYPE_MODE (TREE_TYPE (type_out));
38989   out_n = TYPE_VECTOR_SUBPARTS (type_out);
38990   in_mode = TYPE_MODE (TREE_TYPE (type_in));
38991   in_n = TYPE_VECTOR_SUBPARTS (type_in);
38992 
38993   switch (fn)
38994     {
38995     CASE_CFN_EXP2:
38996       if (out_mode == SFmode && in_mode == SFmode)
38997 	{
38998 	  if (out_n == 16 && in_n == 16)
38999 	    return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
39000 	}
39001       break;
39002 
39003     CASE_CFN_IFLOOR:
39004     CASE_CFN_LFLOOR:
39005     CASE_CFN_LLFLOOR:
39006       /* The round insn does not trap on denormals.  */
39007       if (flag_trapping_math || !TARGET_SSE4_1)
39008 	break;
39009 
39010       if (out_mode == SImode && in_mode == DFmode)
39011 	{
39012 	  if (out_n == 4 && in_n == 2)
39013 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
39014 	  else if (out_n == 8 && in_n == 4)
39015 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
39016 	  else if (out_n == 16 && in_n == 8)
39017 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
39018 	}
39019       if (out_mode == SImode && in_mode == SFmode)
39020 	{
39021 	  if (out_n == 4 && in_n == 4)
39022 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
39023 	  else if (out_n == 8 && in_n == 8)
39024 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
39025 	  else if (out_n == 16 && in_n == 16)
39026 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
39027 	}
39028       break;
39029 
39030     CASE_CFN_ICEIL:
39031     CASE_CFN_LCEIL:
39032     CASE_CFN_LLCEIL:
39033       /* The round insn does not trap on denormals.  */
39034       if (flag_trapping_math || !TARGET_SSE4_1)
39035 	break;
39036 
39037       if (out_mode == SImode && in_mode == DFmode)
39038 	{
39039 	  if (out_n == 4 && in_n == 2)
39040 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
39041 	  else if (out_n == 8 && in_n == 4)
39042 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
39043 	  else if (out_n == 16 && in_n == 8)
39044 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
39045 	}
39046       if (out_mode == SImode && in_mode == SFmode)
39047 	{
39048 	  if (out_n == 4 && in_n == 4)
39049 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
39050 	  else if (out_n == 8 && in_n == 8)
39051 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
39052 	  else if (out_n == 16 && in_n == 16)
39053 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
39054 	}
39055       break;
39056 
39057     CASE_CFN_IRINT:
39058     CASE_CFN_LRINT:
39059     CASE_CFN_LLRINT:
39060       if (out_mode == SImode && in_mode == DFmode)
39061 	{
39062 	  if (out_n == 4 && in_n == 2)
39063 	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
39064 	  else if (out_n == 8 && in_n == 4)
39065 	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
39066 	  else if (out_n == 16 && in_n == 8)
39067 	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
39068 	}
39069       if (out_mode == SImode && in_mode == SFmode)
39070 	{
39071 	  if (out_n == 4 && in_n == 4)
39072 	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
39073 	  else if (out_n == 8 && in_n == 8)
39074 	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
39075 	  else if (out_n == 16 && in_n == 16)
39076 	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
39077 	}
39078       break;
39079 
39080     CASE_CFN_IROUND:
39081     CASE_CFN_LROUND:
39082     CASE_CFN_LLROUND:
39083       /* The round insn does not trap on denormals.  */
39084       if (flag_trapping_math || !TARGET_SSE4_1)
39085 	break;
39086 
39087       if (out_mode == SImode && in_mode == DFmode)
39088 	{
39089 	  if (out_n == 4 && in_n == 2)
39090 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
39091 	  else if (out_n == 8 && in_n == 4)
39092 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
39093 	  else if (out_n == 16 && in_n == 8)
39094 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
39095 	}
39096       if (out_mode == SImode && in_mode == SFmode)
39097 	{
39098 	  if (out_n == 4 && in_n == 4)
39099 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
39100 	  else if (out_n == 8 && in_n == 8)
39101 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
39102 	  else if (out_n == 16 && in_n == 16)
39103 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
39104 	}
39105       break;
39106 
39107     CASE_CFN_FLOOR:
39108       /* The round insn does not trap on denormals.  */
39109       if (flag_trapping_math || !TARGET_SSE4_1)
39110 	break;
39111 
39112       if (out_mode == DFmode && in_mode == DFmode)
39113 	{
39114 	  if (out_n == 2 && in_n == 2)
39115 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
39116 	  else if (out_n == 4 && in_n == 4)
39117 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
39118 	  else if (out_n == 8 && in_n == 8)
39119 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
39120 	}
39121       if (out_mode == SFmode && in_mode == SFmode)
39122 	{
39123 	  if (out_n == 4 && in_n == 4)
39124 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
39125 	  else if (out_n == 8 && in_n == 8)
39126 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
39127 	  else if (out_n == 16 && in_n == 16)
39128 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
39129 	}
39130       break;
39131 
39132     CASE_CFN_CEIL:
39133       /* The round insn does not trap on denormals.  */
39134       if (flag_trapping_math || !TARGET_SSE4_1)
39135 	break;
39136 
39137       if (out_mode == DFmode && in_mode == DFmode)
39138 	{
39139 	  if (out_n == 2 && in_n == 2)
39140 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD);
39141 	  else if (out_n == 4 && in_n == 4)
39142 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
39143 	  else if (out_n == 8 && in_n == 8)
39144 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
39145 	}
39146       if (out_mode == SFmode && in_mode == SFmode)
39147 	{
39148 	  if (out_n == 4 && in_n == 4)
39149 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS);
39150 	  else if (out_n == 8 && in_n == 8)
39151 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
39152 	  else if (out_n == 16 && in_n == 16)
39153 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
39154 	}
39155       break;
39156 
39157     CASE_CFN_TRUNC:
39158       /* The round insn does not trap on denormals.  */
39159       if (flag_trapping_math || !TARGET_SSE4_1)
39160 	break;
39161 
39162       if (out_mode == DFmode && in_mode == DFmode)
39163 	{
39164 	  if (out_n == 2 && in_n == 2)
39165 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
39166 	  else if (out_n == 4 && in_n == 4)
39167 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
39168 	  else if (out_n == 8 && in_n == 8)
39169 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
39170 	}
39171       if (out_mode == SFmode && in_mode == SFmode)
39172 	{
39173 	  if (out_n == 4 && in_n == 4)
39174 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
39175 	  else if (out_n == 8 && in_n == 8)
39176 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
39177 	  else if (out_n == 16 && in_n == 16)
39178 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
39179 	}
39180       break;
39181 
39182     CASE_CFN_RINT:
39183       /* The round insn does not trap on denormals.  */
39184       if (flag_trapping_math || !TARGET_SSE4_1)
39185 	break;
39186 
39187       if (out_mode == DFmode && in_mode == DFmode)
39188 	{
39189 	  if (out_n == 2 && in_n == 2)
39190 	    return ix86_get_builtin (IX86_BUILTIN_RINTPD);
39191 	  else if (out_n == 4 && in_n == 4)
39192 	    return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
39193 	}
39194       if (out_mode == SFmode && in_mode == SFmode)
39195 	{
39196 	  if (out_n == 4 && in_n == 4)
39197 	    return ix86_get_builtin (IX86_BUILTIN_RINTPS);
39198 	  else if (out_n == 8 && in_n == 8)
39199 	    return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
39200 	}
39201       break;
39202 
39203     CASE_CFN_FMA:
39204       if (out_mode == DFmode && in_mode == DFmode)
39205 	{
39206 	  if (out_n == 2 && in_n == 2)
39207 	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
39208 	  if (out_n == 4 && in_n == 4)
39209 	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
39210 	}
39211       if (out_mode == SFmode && in_mode == SFmode)
39212 	{
39213 	  if (out_n == 4 && in_n == 4)
39214 	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
39215 	  if (out_n == 8 && in_n == 8)
39216 	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
39217 	}
39218       break;
39219 
39220     default:
39221       break;
39222     }
39223 
39224   /* Dispatch to a handler for a vectorization library.  */
39225   if (ix86_veclib_handler)
39226     return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
39227 
39228   return NULL_TREE;
39229 }
39230 
39231 /* Handler for an SVML-style interface to
39232    a library with vectorized intrinsics.  */
39233 
39234 static tree
39235 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
39236 {
39237   char name[20];
39238   tree fntype, new_fndecl, args;
39239   unsigned arity;
39240   const char *bname;
39241   machine_mode el_mode, in_mode;
39242   int n, in_n;
39243 
39244   /* The SVML is suitable for unsafe math only.  */
39245   if (!flag_unsafe_math_optimizations)
39246     return NULL_TREE;
39247 
39248   el_mode = TYPE_MODE (TREE_TYPE (type_out));
39249   n = TYPE_VECTOR_SUBPARTS (type_out);
39250   in_mode = TYPE_MODE (TREE_TYPE (type_in));
39251   in_n = TYPE_VECTOR_SUBPARTS (type_in);
39252   if (el_mode != in_mode
39253       || n != in_n)
39254     return NULL_TREE;
39255 
39256   switch (fn)
39257     {
39258     CASE_CFN_EXP:
39259     CASE_CFN_LOG:
39260     CASE_CFN_LOG10:
39261     CASE_CFN_POW:
39262     CASE_CFN_TANH:
39263     CASE_CFN_TAN:
39264     CASE_CFN_ATAN:
39265     CASE_CFN_ATAN2:
39266     CASE_CFN_ATANH:
39267     CASE_CFN_CBRT:
39268     CASE_CFN_SINH:
39269     CASE_CFN_SIN:
39270     CASE_CFN_ASINH:
39271     CASE_CFN_ASIN:
39272     CASE_CFN_COSH:
39273     CASE_CFN_COS:
39274     CASE_CFN_ACOSH:
39275     CASE_CFN_ACOS:
39276       if ((el_mode != DFmode || n != 2)
39277 	  && (el_mode != SFmode || n != 4))
39278 	return NULL_TREE;
39279       break;
39280 
39281     default:
39282       return NULL_TREE;
39283     }
39284 
39285   tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
39286   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
39287 
39288   if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
39289     strcpy (name, "vmlsLn4");
39290   else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
39291     strcpy (name, "vmldLn2");
39292   else if (n == 4)
39293     {
39294       sprintf (name, "vmls%s", bname+10);
39295       name[strlen (name)-1] = '4';
39296     }
39297   else
39298     sprintf (name, "vmld%s2", bname+10);
39299 
39300   /* Convert to uppercase. */
39301   name[4] &= ~0x20;
39302 
39303   arity = 0;
39304   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
39305     arity++;
39306 
39307   if (arity == 1)
39308     fntype = build_function_type_list (type_out, type_in, NULL);
39309   else
39310     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39311 
39312   /* Build a function declaration for the vectorized function.  */
39313   new_fndecl = build_decl (BUILTINS_LOCATION,
39314 			   FUNCTION_DECL, get_identifier (name), fntype);
39315   TREE_PUBLIC (new_fndecl) = 1;
39316   DECL_EXTERNAL (new_fndecl) = 1;
39317   DECL_IS_NOVOPS (new_fndecl) = 1;
39318   TREE_READONLY (new_fndecl) = 1;
39319 
39320   return new_fndecl;
39321 }
39322 
39323 /* Handler for an ACML-style interface to
39324    a library with vectorized intrinsics.  */
39325 
39326 static tree
39327 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
39328 {
39329   char name[20] = "__vr.._";
39330   tree fntype, new_fndecl, args;
39331   unsigned arity;
39332   const char *bname;
39333   machine_mode el_mode, in_mode;
39334   int n, in_n;
39335 
39336   /* The ACML is 64bits only and suitable for unsafe math only as
39337      it does not correctly support parts of IEEE with the required
39338      precision such as denormals.  */
39339   if (!TARGET_64BIT
39340       || !flag_unsafe_math_optimizations)
39341     return NULL_TREE;
39342 
39343   el_mode = TYPE_MODE (TREE_TYPE (type_out));
39344   n = TYPE_VECTOR_SUBPARTS (type_out);
39345   in_mode = TYPE_MODE (TREE_TYPE (type_in));
39346   in_n = TYPE_VECTOR_SUBPARTS (type_in);
39347   if (el_mode != in_mode
39348       || n != in_n)
39349     return NULL_TREE;
39350 
39351   switch (fn)
39352     {
39353     CASE_CFN_SIN:
39354     CASE_CFN_COS:
39355     CASE_CFN_EXP:
39356     CASE_CFN_LOG:
39357     CASE_CFN_LOG2:
39358     CASE_CFN_LOG10:
39359       if (el_mode == DFmode && n == 2)
39360 	{
39361 	  name[4] = 'd';
39362 	  name[5] = '2';
39363 	}
39364       else if (el_mode == SFmode && n == 4)
39365 	{
39366 	  name[4] = 's';
39367 	  name[5] = '4';
39368 	}
39369       else
39370 	return NULL_TREE;
39371       break;
39372 
39373     default:
39374       return NULL_TREE;
39375     }
39376 
39377   tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
39378   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
39379   sprintf (name + 7, "%s", bname+10);
39380 
39381   arity = 0;
39382   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
39383     arity++;
39384 
39385   if (arity == 1)
39386     fntype = build_function_type_list (type_out, type_in, NULL);
39387   else
39388     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39389 
39390   /* Build a function declaration for the vectorized function.  */
39391   new_fndecl = build_decl (BUILTINS_LOCATION,
39392 			   FUNCTION_DECL, get_identifier (name), fntype);
39393   TREE_PUBLIC (new_fndecl) = 1;
39394   DECL_EXTERNAL (new_fndecl) = 1;
39395   DECL_IS_NOVOPS (new_fndecl) = 1;
39396   TREE_READONLY (new_fndecl) = 1;
39397 
39398   return new_fndecl;
39399 }
39400 
39401 /* Returns a decl of a function that implements gather load with
39402    memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
39403    Return NULL_TREE if it is not available.  */
39404 
39405 static tree
39406 ix86_vectorize_builtin_gather (const_tree mem_vectype,
39407 			       const_tree index_type, int scale)
39408 {
39409   bool si;
39410   enum ix86_builtins code;
39411 
39412   if (! TARGET_AVX2 || !TARGET_USE_GATHER)
39413     return NULL_TREE;
39414 
39415   if ((TREE_CODE (index_type) != INTEGER_TYPE
39416        && !POINTER_TYPE_P (index_type))
39417       || (TYPE_MODE (index_type) != SImode
39418 	  && TYPE_MODE (index_type) != DImode))
39419     return NULL_TREE;
39420 
39421   if (TYPE_PRECISION (index_type) > POINTER_SIZE)
39422     return NULL_TREE;
39423 
39424   /* v*gather* insn sign extends index to pointer mode.  */
39425   if (TYPE_PRECISION (index_type) < POINTER_SIZE
39426       && TYPE_UNSIGNED (index_type))
39427     return NULL_TREE;
39428 
39429   if (scale <= 0
39430       || scale > 8
39431       || (scale & (scale - 1)) != 0)
39432     return NULL_TREE;
39433 
39434   si = TYPE_MODE (index_type) == SImode;
39435   switch (TYPE_MODE (mem_vectype))
39436     {
39437     case E_V2DFmode:
39438       if (TARGET_AVX512VL)
39439 	code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
39440       else
39441 	code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
39442       break;
39443     case E_V4DFmode:
39444       if (TARGET_AVX512VL)
39445 	code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
39446       else
39447 	code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
39448       break;
39449     case E_V2DImode:
39450       if (TARGET_AVX512VL)
39451 	code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
39452       else
39453 	code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
39454       break;
39455     case E_V4DImode:
39456       if (TARGET_AVX512VL)
39457 	code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
39458       else
39459 	code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
39460       break;
39461     case E_V4SFmode:
39462       if (TARGET_AVX512VL)
39463 	code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
39464       else
39465 	code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
39466       break;
39467     case E_V8SFmode:
39468       if (TARGET_AVX512VL)
39469 	code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
39470       else
39471 	code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
39472       break;
39473     case E_V4SImode:
39474       if (TARGET_AVX512VL)
39475 	code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
39476       else
39477 	code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
39478       break;
39479     case E_V8SImode:
39480       if (TARGET_AVX512VL)
39481 	code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
39482       else
39483 	code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
39484       break;
39485     case E_V8DFmode:
39486       if (TARGET_AVX512F)
39487 	code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
39488       else
39489 	return NULL_TREE;
39490       break;
39491     case E_V8DImode:
39492       if (TARGET_AVX512F)
39493 	code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
39494       else
39495 	return NULL_TREE;
39496       break;
39497     case E_V16SFmode:
39498       if (TARGET_AVX512F)
39499 	code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
39500       else
39501 	return NULL_TREE;
39502       break;
39503     case E_V16SImode:
39504       if (TARGET_AVX512F)
39505 	code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
39506       else
39507 	return NULL_TREE;
39508       break;
39509     default:
39510       return NULL_TREE;
39511     }
39512 
39513   return ix86_get_builtin (code);
39514 }
39515 
39516 /* Returns a decl of a function that implements scatter store with
39517    register type VECTYPE and index type INDEX_TYPE and SCALE.
39518    Return NULL_TREE if it is not available.  */
39519 
39520 static tree
39521 ix86_vectorize_builtin_scatter (const_tree vectype,
39522 				const_tree index_type, int scale)
39523 {
39524   bool si;
39525   enum ix86_builtins code;
39526 
39527   if (!TARGET_AVX512F)
39528     return NULL_TREE;
39529 
39530   if ((TREE_CODE (index_type) != INTEGER_TYPE
39531        && !POINTER_TYPE_P (index_type))
39532       || (TYPE_MODE (index_type) != SImode
39533 	  && TYPE_MODE (index_type) != DImode))
39534     return NULL_TREE;
39535 
39536   if (TYPE_PRECISION (index_type) > POINTER_SIZE)
39537     return NULL_TREE;
39538 
39539   /* v*scatter* insn sign extends index to pointer mode.  */
39540   if (TYPE_PRECISION (index_type) < POINTER_SIZE
39541       && TYPE_UNSIGNED (index_type))
39542     return NULL_TREE;
39543 
39544   /* Scale can be 1, 2, 4 or 8.  */
39545   if (scale <= 0
39546       || scale > 8
39547       || (scale & (scale - 1)) != 0)
39548     return NULL_TREE;
39549 
39550   si = TYPE_MODE (index_type) == SImode;
39551   switch (TYPE_MODE (vectype))
39552     {
39553     case E_V8DFmode:
39554       code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
39555       break;
39556     case E_V8DImode:
39557       code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
39558       break;
39559     case E_V16SFmode:
39560       code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
39561       break;
39562     case E_V16SImode:
39563       code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
39564       break;
39565     default:
39566       return NULL_TREE;
39567     }
39568 
39569   return ix86_builtins[code];
39570 }
39571 
39572 /* Return true if it is safe to use the rsqrt optabs to optimize
39573    1.0/sqrt.  */
39574 
39575 static bool
39576 use_rsqrt_p ()
39577 {
39578   return (TARGET_SSE_MATH
39579 	  && flag_finite_math_only
39580 	  && !flag_trapping_math
39581 	  && flag_unsafe_math_optimizations);
39582 }
39583 
39584 /* Returns a code for a target-specific builtin that implements
39585    reciprocal of the function, or NULL_TREE if not available.  */
39586 
39587 static tree
39588 ix86_builtin_reciprocal (tree fndecl)
39589 {
39590   switch (DECL_FUNCTION_CODE (fndecl))
39591     {
39592       /* Vectorized version of sqrt to rsqrt conversion.  */
39593     case IX86_BUILTIN_SQRTPS_NR:
39594       return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
39595 
39596     case IX86_BUILTIN_SQRTPS_NR256:
39597       return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
39598 
39599     default:
39600       return NULL_TREE;
39601     }
39602 }
39603 
39604 /* Helper for avx_vpermilps256_operand et al.  This is also used by
39605    the expansion functions to turn the parallel back into a mask.
39606    The return value is 0 for no match and the imm8+1 for a match.  */
39607 
39608 int
39609 avx_vpermilp_parallel (rtx par, machine_mode mode)
39610 {
39611   unsigned i, nelt = GET_MODE_NUNITS (mode);
39612   unsigned mask = 0;
39613   unsigned char ipar[16] = {};  /* Silence -Wuninitialized warning.  */
39614 
39615   if (XVECLEN (par, 0) != (int) nelt)
39616     return 0;
39617 
39618   /* Validate that all of the elements are constants, and not totally
39619      out of range.  Copy the data into an integral array to make the
39620      subsequent checks easier.  */
39621   for (i = 0; i < nelt; ++i)
39622     {
39623       rtx er = XVECEXP (par, 0, i);
39624       unsigned HOST_WIDE_INT ei;
39625 
39626       if (!CONST_INT_P (er))
39627 	return 0;
39628       ei = INTVAL (er);
39629       if (ei >= nelt)
39630 	return 0;
39631       ipar[i] = ei;
39632     }
39633 
39634   switch (mode)
39635     {
39636     case E_V8DFmode:
39637       /* In the 512-bit DFmode case, we can only move elements within
39638          a 128-bit lane.  First fill the second part of the mask,
39639 	 then fallthru.  */
39640       for (i = 4; i < 6; ++i)
39641 	{
39642 	  if (ipar[i] < 4 || ipar[i] >= 6)
39643 	    return 0;
39644 	  mask |= (ipar[i] - 4) << i;
39645 	}
39646       for (i = 6; i < 8; ++i)
39647 	{
39648 	  if (ipar[i] < 6)
39649 	    return 0;
39650 	  mask |= (ipar[i] - 6) << i;
39651 	}
39652       /* FALLTHRU */
39653 
39654     case E_V4DFmode:
39655       /* In the 256-bit DFmode case, we can only move elements within
39656          a 128-bit lane.  */
39657       for (i = 0; i < 2; ++i)
39658 	{
39659 	  if (ipar[i] >= 2)
39660 	    return 0;
39661 	  mask |= ipar[i] << i;
39662 	}
39663       for (i = 2; i < 4; ++i)
39664 	{
39665 	  if (ipar[i] < 2)
39666 	    return 0;
39667 	  mask |= (ipar[i] - 2) << i;
39668 	}
39669       break;
39670 
39671     case E_V16SFmode:
39672       /* In 512 bit SFmode case, permutation in the upper 256 bits
39673 	 must mirror the permutation in the lower 256-bits.  */
39674       for (i = 0; i < 8; ++i)
39675 	if (ipar[i] + 8 != ipar[i + 8])
39676 	  return 0;
39677       /* FALLTHRU */
39678 
39679     case E_V8SFmode:
39680       /* In 256 bit SFmode case, we have full freedom of
39681          movement within the low 128-bit lane, but the high 128-bit
39682          lane must mirror the exact same pattern.  */
39683       for (i = 0; i < 4; ++i)
39684 	if (ipar[i] + 4 != ipar[i + 4])
39685 	  return 0;
39686       nelt = 4;
39687       /* FALLTHRU */
39688 
39689     case E_V2DFmode:
39690     case E_V4SFmode:
39691       /* In the 128-bit case, we've full freedom in the placement of
39692 	 the elements from the source operand.  */
39693       for (i = 0; i < nelt; ++i)
39694 	mask |= ipar[i] << (i * (nelt / 2));
39695       break;
39696 
39697     default:
39698       gcc_unreachable ();
39699     }
39700 
39701   /* Make sure success has a non-zero value by adding one.  */
39702   return mask + 1;
39703 }
39704 
39705 /* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
39706    the expansion functions to turn the parallel back into a mask.
39707    The return value is 0 for no match and the imm8+1 for a match.  */
39708 
39709 int
39710 avx_vperm2f128_parallel (rtx par, machine_mode mode)
39711 {
39712   unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
39713   unsigned mask = 0;
39714   unsigned char ipar[8] = {};  /* Silence -Wuninitialized warning.  */
39715 
39716   if (XVECLEN (par, 0) != (int) nelt)
39717     return 0;
39718 
39719   /* Validate that all of the elements are constants, and not totally
39720      out of range.  Copy the data into an integral array to make the
39721      subsequent checks easier.  */
39722   for (i = 0; i < nelt; ++i)
39723     {
39724       rtx er = XVECEXP (par, 0, i);
39725       unsigned HOST_WIDE_INT ei;
39726 
39727       if (!CONST_INT_P (er))
39728 	return 0;
39729       ei = INTVAL (er);
39730       if (ei >= 2 * nelt)
39731 	return 0;
39732       ipar[i] = ei;
39733     }
39734 
39735   /* Validate that the halves of the permute are halves.  */
39736   for (i = 0; i < nelt2 - 1; ++i)
39737     if (ipar[i] + 1 != ipar[i + 1])
39738       return 0;
39739   for (i = nelt2; i < nelt - 1; ++i)
39740     if (ipar[i] + 1 != ipar[i + 1])
39741       return 0;
39742 
39743   /* Reconstruct the mask.  */
39744   for (i = 0; i < 2; ++i)
39745     {
39746       unsigned e = ipar[i * nelt2];
39747       if (e % nelt2)
39748 	return 0;
39749       e /= nelt2;
39750       mask |= e << (i * 4);
39751     }
39752 
39753   /* Make sure success has a non-zero value by adding one.  */
39754   return mask + 1;
39755 }
39756 
39757 /* Return a register priority for hard reg REGNO.  */
39758 static int
39759 ix86_register_priority (int hard_regno)
39760 {
39761   /* ebp and r13 as the base always wants a displacement, r12 as the
39762      base always wants an index.  So discourage their usage in an
39763      address.  */
39764   if (hard_regno == R12_REG || hard_regno == R13_REG)
39765     return 0;
39766   if (hard_regno == BP_REG)
39767     return 1;
39768   /* New x86-64 int registers result in bigger code size.  Discourage
39769      them.  */
39770   if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
39771     return 2;
39772   /* New x86-64 SSE registers result in bigger code size.  Discourage
39773      them.  */
39774   if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
39775     return 2;
39776   /* Usage of AX register results in smaller code.  Prefer it.  */
39777   if (hard_regno == AX_REG)
39778     return 4;
39779   return 3;
39780 }
39781 
39782 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
39783 
39784    Put float CONST_DOUBLE in the constant pool instead of fp regs.
39785    QImode must go into class Q_REGS.
39786    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
39787    movdf to do mem-to-mem moves through integer regs.  */
39788 
39789 static reg_class_t
39790 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
39791 {
39792   machine_mode mode = GET_MODE (x);
39793 
39794   /* We're only allowed to return a subclass of CLASS.  Many of the
39795      following checks fail for NO_REGS, so eliminate that early.  */
39796   if (regclass == NO_REGS)
39797     return NO_REGS;
39798 
39799   /* All classes can load zeros.  */
39800   if (x == CONST0_RTX (mode))
39801     return regclass;
39802 
39803   /* Force constants into memory if we are loading a (nonzero) constant into
39804      an MMX, SSE or MASK register.  This is because there are no MMX/SSE/MASK
39805      instructions to load from a constant.  */
39806   if (CONSTANT_P (x)
39807       && (MAYBE_MMX_CLASS_P (regclass)
39808 	  || MAYBE_SSE_CLASS_P (regclass)
39809 	  || MAYBE_MASK_CLASS_P (regclass)))
39810     return NO_REGS;
39811 
39812   /* Floating-point constants need more complex checks.  */
39813   if (CONST_DOUBLE_P (x))
39814     {
39815       /* General regs can load everything.  */
39816       if (INTEGER_CLASS_P (regclass))
39817         return regclass;
39818 
39819       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
39820 	 zero above.  We only want to wind up preferring 80387 registers if
39821 	 we plan on doing computation with them.  */
39822       if (IS_STACK_MODE (mode)
39823 	  && standard_80387_constant_p (x) > 0)
39824 	{
39825 	  /* Limit class to FP regs.  */
39826 	  if (FLOAT_CLASS_P (regclass))
39827 	    return FLOAT_REGS;
39828 	  else if (regclass == FP_TOP_SSE_REGS)
39829 	    return FP_TOP_REG;
39830 	  else if (regclass == FP_SECOND_SSE_REGS)
39831 	    return FP_SECOND_REG;
39832 	}
39833 
39834       return NO_REGS;
39835     }
39836 
39837   /* Prefer SSE regs only, if we can use them for math.  */
39838   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39839     return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
39840 
39841   /* Generally when we see PLUS here, it's the function invariant
39842      (plus soft-fp const_int).  Which can only be computed into general
39843      regs.  */
39844   if (GET_CODE (x) == PLUS)
39845     return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
39846 
39847   /* QImode constants are easy to load, but non-constant QImode data
39848      must go into Q_REGS.  */
39849   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
39850     {
39851       if (Q_CLASS_P (regclass))
39852 	return regclass;
39853       else if (reg_class_subset_p (Q_REGS, regclass))
39854 	return Q_REGS;
39855       else
39856 	return NO_REGS;
39857     }
39858 
39859   return regclass;
39860 }
39861 
39862 /* Discourage putting floating-point values in SSE registers unless
39863    SSE math is being used, and likewise for the 387 registers.  */
39864 static reg_class_t
39865 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
39866 {
39867   machine_mode mode = GET_MODE (x);
39868 
39869   /* Restrict the output reload class to the register bank that we are doing
39870      math on.  If we would like not to return a subset of CLASS, reject this
39871      alternative: if reload cannot do this, it will still use its choice.  */
39872   mode = GET_MODE (x);
39873   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
39874     return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
39875 
39876   if (IS_STACK_MODE (mode))
39877     {
39878       if (regclass == FP_TOP_SSE_REGS)
39879 	return FP_TOP_REG;
39880       else if (regclass == FP_SECOND_SSE_REGS)
39881 	return FP_SECOND_REG;
39882       else
39883 	return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
39884     }
39885 
39886   return regclass;
39887 }
39888 
39889 static reg_class_t
39890 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
39891 		       machine_mode mode, secondary_reload_info *sri)
39892 {
39893   /* Double-word spills from general registers to non-offsettable memory
39894      references (zero-extended addresses) require special handling.  */
39895   if (TARGET_64BIT
39896       && MEM_P (x)
39897       && GET_MODE_SIZE (mode) > UNITS_PER_WORD
39898       && INTEGER_CLASS_P (rclass)
39899       && !offsettable_memref_p (x))
39900     {
39901       sri->icode = (in_p
39902 		    ? CODE_FOR_reload_noff_load
39903 		    : CODE_FOR_reload_noff_store);
39904       /* Add the cost of moving address to a temporary.  */
39905       sri->extra_cost = 1;
39906 
39907       return NO_REGS;
39908     }
39909 
39910   /* QImode spills from non-QI registers require
39911      intermediate register on 32bit targets.  */
39912   if (mode == QImode
39913       && ((!TARGET_64BIT && !in_p
39914 	   && INTEGER_CLASS_P (rclass)
39915 	   && MAYBE_NON_Q_CLASS_P (rclass))
39916 	  || (!TARGET_AVX512DQ
39917 	      && MAYBE_MASK_CLASS_P (rclass))))
39918     {
39919       int regno = true_regnum (x);
39920 
39921       /* Return Q_REGS if the operand is in memory.  */
39922       if (regno == -1)
39923 	return Q_REGS;
39924 
39925       return NO_REGS;
39926     }
39927 
39928   /* This condition handles corner case where an expression involving
39929      pointers gets vectorized.  We're trying to use the address of a
39930      stack slot as a vector initializer.
39931 
39932      (set (reg:V2DI 74 [ vect_cst_.2 ])
39933           (vec_duplicate:V2DI (reg/f:DI 20 frame)))
39934 
39935      Eventually frame gets turned into sp+offset like this:
39936 
39937      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39938           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
39939 	                               (const_int 392 [0x188]))))
39940 
39941      That later gets turned into:
39942 
39943      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39944           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
39945 	    (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
39946 
39947      We'll have the following reload recorded:
39948 
39949      Reload 0: reload_in (DI) =
39950            (plus:DI (reg/f:DI 7 sp)
39951             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
39952      reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39953      SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
39954      reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
39955      reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
39956      reload_reg_rtx: (reg:V2DI 22 xmm1)
39957 
39958      Which isn't going to work since SSE instructions can't handle scalar
39959      additions.  Returning GENERAL_REGS forces the addition into integer
39960      register and reload can handle subsequent reloads without problems.  */
39961 
39962   if (in_p && GET_CODE (x) == PLUS
39963       && SSE_CLASS_P (rclass)
39964       && SCALAR_INT_MODE_P (mode))
39965     return GENERAL_REGS;
39966 
39967   return NO_REGS;
39968 }
39969 
39970 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
39971 
39972 static bool
39973 ix86_class_likely_spilled_p (reg_class_t rclass)
39974 {
39975   switch (rclass)
39976     {
39977       case AREG:
39978       case DREG:
39979       case CREG:
39980       case BREG:
39981       case AD_REGS:
39982       case SIREG:
39983       case DIREG:
39984       case SSE_FIRST_REG:
39985       case FP_TOP_REG:
39986       case FP_SECOND_REG:
39987       case BND_REGS:
39988 	return true;
39989 
39990       default:
39991 	break;
39992     }
39993 
39994   return false;
39995 }
39996 
39997 /* If we are copying between registers from different register sets
39998    (e.g. FP and integer), we may need a memory location.
39999 
40000    The function can't work reliably when one of the CLASSES is a class
40001    containing registers from multiple sets.  We avoid this by never combining
40002    different sets in a single alternative in the machine description.
40003    Ensure that this constraint holds to avoid unexpected surprises.
40004 
40005    When STRICT is false, we are being called from REGISTER_MOVE_COST,
40006    so do not enforce these sanity checks.
40007 
40008    To optimize register_move_cost performance, define inline variant.  */
40009 
40010 static inline bool
40011 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
40012 				reg_class_t class2, int strict)
40013 {
40014   if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
40015     return false;
40016 
40017   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
40018       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
40019       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
40020       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
40021       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
40022       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
40023       || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
40024       || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
40025     {
40026       gcc_assert (!strict || lra_in_progress);
40027       return true;
40028     }
40029 
40030   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
40031     return true;
40032 
40033   /* Between mask and general, we have moves no larger than word size.  */
40034   if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
40035       && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
40036   return true;
40037 
40038   /* ??? This is a lie.  We do have moves between mmx/general, and for
40039      mmx/sse2.  But by saying we need secondary memory we discourage the
40040      register allocator from using the mmx registers unless needed.  */
40041   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
40042     return true;
40043 
40044   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
40045     {
40046       /* SSE1 doesn't have any direct moves from other classes.  */
40047       if (!TARGET_SSE2)
40048 	return true;
40049 
40050       /* If the target says that inter-unit moves are more expensive
40051 	 than moving through memory, then don't generate them.  */
40052       if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
40053 	  || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
40054 	return true;
40055 
40056       /* Between SSE and general, we have moves no larger than word size.  */
40057       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40058 	return true;
40059     }
40060 
40061   return false;
40062 }
40063 
40064 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
40065 
40066 static bool
40067 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
40068 			      reg_class_t class2)
40069 {
40070   return inline_secondary_memory_needed (mode, class1, class2, true);
40071 }
40072 
40073 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
40074 
40075    get_secondary_mem widens integral modes to BITS_PER_WORD.
40076    There is no need to emit full 64 bit move on 64 bit targets
40077    for integral modes that can be moved using 32 bit move.  */
40078 
40079 static machine_mode
40080 ix86_secondary_memory_needed_mode (machine_mode mode)
40081 {
40082   if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
40083     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
40084   return mode;
40085 }
40086 
40087 /* Implement the TARGET_CLASS_MAX_NREGS hook.
40088 
40089    On the 80386, this is the size of MODE in words,
40090    except in the FP regs, where a single reg is always enough.  */
40091 
40092 static unsigned char
40093 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
40094 {
40095   if (MAYBE_INTEGER_CLASS_P (rclass))
40096     {
40097       if (mode == XFmode)
40098 	return (TARGET_64BIT ? 2 : 3);
40099       else if (mode == XCmode)
40100 	return (TARGET_64BIT ? 4 : 6);
40101       else
40102 	return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
40103     }
40104   else
40105     {
40106       if (COMPLEX_MODE_P (mode))
40107 	return 2;
40108       else
40109 	return 1;
40110     }
40111 }
40112 
40113 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
40114 
40115 static bool
40116 ix86_can_change_mode_class (machine_mode from, machine_mode to,
40117 			    reg_class_t regclass)
40118 {
40119   if (from == to)
40120     return true;
40121 
40122   /* x87 registers can't do subreg at all, as all values are reformatted
40123      to extended precision.  */
40124   if (MAYBE_FLOAT_CLASS_P (regclass))
40125     return false;
40126 
40127   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
40128     {
40129       /* Vector registers do not support QI or HImode loads.  If we don't
40130 	 disallow a change to these modes, reload will assume it's ok to
40131 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
40132 	 the vec_dupv4hi pattern.  */
40133       if (GET_MODE_SIZE (from) < 4)
40134 	return false;
40135     }
40136 
40137   return true;
40138 }
40139 
40140 /* Return index of MODE in the sse load/store tables.  */
40141 
40142 static inline int
40143 sse_store_index (machine_mode mode)
40144 {
40145       switch (GET_MODE_SIZE (mode))
40146 	{
40147 	  case 4:
40148 	    return 0;
40149 	  case 8:
40150 	    return 1;
40151 	  case 16:
40152 	    return 2;
40153 	  case 32:
40154 	    return 3;
40155 	  case 64:
40156 	    return 4;
40157 	  default:
40158 	    return -1;
40159 	}
40160 }
40161 
40162 /* Return the cost of moving data of mode M between a
40163    register and memory.  A value of 2 is the default; this cost is
40164    relative to those in `REGISTER_MOVE_COST'.
40165 
40166    This function is used extensively by register_move_cost that is used to
40167    build tables at startup.  Make it inline in this case.
40168    When IN is 2, return maximum of in and out move cost.
40169 
40170    If moving between registers and memory is more expensive than
40171    between two registers, you should define this macro to express the
40172    relative cost.
40173 
40174    Model also increased moving costs of QImode registers in non
40175    Q_REGS classes.
40176  */
40177 static inline int
40178 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
40179 			 int in)
40180 {
40181   int cost;
40182   if (FLOAT_CLASS_P (regclass))
40183     {
40184       int index;
40185       switch (mode)
40186 	{
40187 	  case E_SFmode:
40188 	    index = 0;
40189 	    break;
40190 	  case E_DFmode:
40191 	    index = 1;
40192 	    break;
40193 	  case E_XFmode:
40194 	    index = 2;
40195 	    break;
40196 	  default:
40197 	    return 100;
40198 	}
40199       if (in == 2)
40200         return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
40201       return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
40202     }
40203   if (SSE_CLASS_P (regclass))
40204     {
40205       int index = sse_store_index (mode);
40206       if (index == -1)
40207 	return 100;
40208       if (in == 2)
40209         return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
40210       return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
40211     }
40212   if (MMX_CLASS_P (regclass))
40213     {
40214       int index;
40215       switch (GET_MODE_SIZE (mode))
40216 	{
40217 	  case 4:
40218 	    index = 0;
40219 	    break;
40220 	  case 8:
40221 	    index = 1;
40222 	    break;
40223 	  default:
40224 	    return 100;
40225 	}
40226       if (in)
40227         return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
40228       return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
40229     }
40230   switch (GET_MODE_SIZE (mode))
40231     {
40232       case 1:
40233 	if (Q_CLASS_P (regclass) || TARGET_64BIT)
40234 	  {
40235 	    if (!in)
40236 	      return ix86_cost->int_store[0];
40237 	    if (TARGET_PARTIAL_REG_DEPENDENCY
40238 	        && optimize_function_for_speed_p (cfun))
40239 	      cost = ix86_cost->movzbl_load;
40240 	    else
40241 	      cost = ix86_cost->int_load[0];
40242 	    if (in == 2)
40243 	      return MAX (cost, ix86_cost->int_store[0]);
40244 	    return cost;
40245 	  }
40246 	else
40247 	  {
40248 	   if (in == 2)
40249 	     return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
40250 	   if (in)
40251 	     return ix86_cost->movzbl_load;
40252 	   else
40253 	     return ix86_cost->int_store[0] + 4;
40254 	  }
40255 	break;
40256       case 2:
40257 	if (in == 2)
40258 	  return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
40259 	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
40260       default:
40261 	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
40262 	if (mode == TFmode)
40263 	  mode = XFmode;
40264 	if (in == 2)
40265 	  cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
40266 	else if (in)
40267 	  cost = ix86_cost->int_load[2];
40268 	else
40269 	  cost = ix86_cost->int_store[2];
40270 	return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
40271     }
40272 }
40273 
40274 static int
40275 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
40276 		       bool in)
40277 {
40278   return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
40279 }
40280 
40281 
40282 /* Return the cost of moving data from a register in class CLASS1 to
40283    one in class CLASS2.
40284 
40285    It is not required that the cost always equal 2 when FROM is the same as TO;
40286    on some machines it is expensive to move between registers if they are not
40287    general registers.  */
40288 
40289 static int
40290 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
40291 			 reg_class_t class2_i)
40292 {
40293   enum reg_class class1 = (enum reg_class) class1_i;
40294   enum reg_class class2 = (enum reg_class) class2_i;
40295 
40296   /* In case we require secondary memory, compute cost of the store followed
40297      by load.  In order to avoid bad register allocation choices, we need
40298      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
40299 
40300   if (inline_secondary_memory_needed (mode, class1, class2, false))
40301     {
40302       int cost = 1;
40303 
40304       cost += inline_memory_move_cost (mode, class1, 2);
40305       cost += inline_memory_move_cost (mode, class2, 2);
40306 
40307       /* In case of copying from general_purpose_register we may emit multiple
40308          stores followed by single load causing memory size mismatch stall.
40309          Count this as arbitrarily high cost of 20.  */
40310       if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
40311 	  && TARGET_MEMORY_MISMATCH_STALL
40312 	  && targetm.class_max_nregs (class1, mode)
40313 	     > targetm.class_max_nregs (class2, mode))
40314 	cost += 20;
40315 
40316       /* In the case of FP/MMX moves, the registers actually overlap, and we
40317 	 have to switch modes in order to treat them differently.  */
40318       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
40319           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
40320 	cost += 20;
40321 
40322       return cost;
40323     }
40324 
40325   /* Moves between SSE/MMX and integer unit are expensive.  */
40326   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
40327       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
40328 
40329     /* ??? By keeping returned value relatively high, we limit the number
40330        of moves between integer and MMX/SSE registers for all targets.
40331        Additionally, high value prevents problem with x86_modes_tieable_p(),
40332        where integer modes in MMX/SSE registers are not tieable
40333        because of missing QImode and HImode moves to, from or between
40334        MMX/SSE registers.  */
40335     return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2)
40336 		? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer);
40337 
40338   if (MAYBE_FLOAT_CLASS_P (class1))
40339     return ix86_cost->fp_move;
40340   if (MAYBE_SSE_CLASS_P (class1))
40341     {
40342       if (GET_MODE_BITSIZE (mode) <= 128)
40343 	return ix86_cost->xmm_move;
40344       if (GET_MODE_BITSIZE (mode) <= 256)
40345 	return ix86_cost->ymm_move;
40346       return ix86_cost->zmm_move;
40347     }
40348   if (MAYBE_MMX_CLASS_P (class1))
40349     return ix86_cost->mmx_move;
40350   return 2;
40351 }
40352 
40353 /* Implement TARGET_HARD_REGNO_NREGS.  This is ordinarily the length in
40354    words of a value of mode MODE but can be less for certain modes in
40355    special long registers.
40356 
40357    Actually there are no two word move instructions for consecutive
40358    registers.  And only registers 0-3 may have mov byte instructions
40359    applied to them.  */
40360 
40361 static unsigned int
40362 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
40363 {
40364   if (GENERAL_REGNO_P (regno))
40365     {
40366       if (mode == XFmode)
40367 	return TARGET_64BIT ? 2 : 3;
40368       if (mode == XCmode)
40369 	return TARGET_64BIT ? 4 : 6;
40370       return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
40371     }
40372   if (COMPLEX_MODE_P (mode))
40373     return 2;
40374   if (mode == V64SFmode || mode == V64SImode)
40375     return 4;
40376   return 1;
40377 }
40378 
40379 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
40380 
40381 static bool
40382 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
40383 {
40384   /* Flags and only flags can only hold CCmode values.  */
40385   if (CC_REGNO_P (regno))
40386     return GET_MODE_CLASS (mode) == MODE_CC;
40387   if (GET_MODE_CLASS (mode) == MODE_CC
40388       || GET_MODE_CLASS (mode) == MODE_RANDOM
40389       || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
40390     return false;
40391   if (STACK_REGNO_P (regno))
40392     return VALID_FP_MODE_P (mode);
40393   if (MASK_REGNO_P (regno))
40394     return (VALID_MASK_REG_MODE (mode)
40395 	    || (TARGET_AVX512BW
40396 		&& VALID_MASK_AVX512BW_MODE (mode)));
40397   if (BND_REGNO_P (regno))
40398     return VALID_BND_REG_MODE (mode);
40399   if (SSE_REGNO_P (regno))
40400     {
40401       /* We implement the move patterns for all vector modes into and
40402 	 out of SSE registers, even when no operation instructions
40403 	 are available.  */
40404 
40405       /* For AVX-512 we allow, regardless of regno:
40406 	  - XI mode
40407 	  - any of 512-bit wide vector mode
40408 	  - any scalar mode.  */
40409       if (TARGET_AVX512F
40410 	  && (mode == XImode
40411 	      || VALID_AVX512F_REG_MODE (mode)
40412 	      || VALID_AVX512F_SCALAR_MODE (mode)))
40413 	return true;
40414 
40415       /* For AVX-5124FMAPS allow V64SFmode for special regnos.  */
40416       if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
40417 	  && MOD4_SSE_REGNO_P (regno)
40418 	  && mode == V64SFmode)
40419 	return true;
40420 
40421       /* For AVX-5124VNNIW allow V64SImode for special regnos.  */
40422       if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
40423 	  && MOD4_SSE_REGNO_P (regno)
40424 	  && mode == V64SImode)
40425 	return true;
40426 
40427       /* TODO check for QI/HI scalars.  */
40428       /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
40429       if (TARGET_AVX512VL
40430 	  && (mode == OImode
40431 	      || mode == TImode
40432 	      || VALID_AVX256_REG_MODE (mode)
40433 	      || VALID_AVX512VL_128_REG_MODE (mode)))
40434 	return true;
40435 
40436       /* xmm16-xmm31 are only available for AVX-512.  */
40437       if (EXT_REX_SSE_REGNO_P (regno))
40438 	return false;
40439 
40440       /* OImode and AVX modes are available only when AVX is enabled.  */
40441       return ((TARGET_AVX
40442 	       && VALID_AVX256_REG_OR_OI_MODE (mode))
40443 	      || VALID_SSE_REG_MODE (mode)
40444 	      || VALID_SSE2_REG_MODE (mode)
40445 	      || VALID_MMX_REG_MODE (mode)
40446 	      || VALID_MMX_REG_MODE_3DNOW (mode));
40447     }
40448   if (MMX_REGNO_P (regno))
40449     {
40450       /* We implement the move patterns for 3DNOW modes even in MMX mode,
40451 	 so if the register is available at all, then we can move data of
40452 	 the given mode into or out of it.  */
40453       return (VALID_MMX_REG_MODE (mode)
40454 	      || VALID_MMX_REG_MODE_3DNOW (mode));
40455     }
40456 
40457   if (mode == QImode)
40458     {
40459       /* Take care for QImode values - they can be in non-QI regs,
40460 	 but then they do cause partial register stalls.  */
40461       if (ANY_QI_REGNO_P (regno))
40462 	return true;
40463       if (!TARGET_PARTIAL_REG_STALL)
40464 	return true;
40465       /* LRA checks if the hard register is OK for the given mode.
40466 	 QImode values can live in non-QI regs, so we allow all
40467 	 registers here.  */
40468       if (lra_in_progress)
40469        return true;
40470       return !can_create_pseudo_p ();
40471     }
40472   /* We handle both integer and floats in the general purpose registers.  */
40473   else if (VALID_INT_MODE_P (mode))
40474     return true;
40475   else if (VALID_FP_MODE_P (mode))
40476     return true;
40477   else if (VALID_DFP_MODE_P (mode))
40478     return true;
40479   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
40480      on to use that value in smaller contexts, this can easily force a
40481      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
40482      supporting DImode, allow it.  */
40483   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
40484     return true;
40485 
40486   return false;
40487 }
40488 
40489 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  The only ABI that
40490    saves SSE registers across calls is Win64 (thus no need to check the
40491    current ABI here), and with AVX enabled Win64 only guarantees that
40492    the low 16 bytes are saved.  */
40493 
40494 static bool
40495 ix86_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
40496 {
40497   return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
40498 }
40499 
40500 /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
40501    tieable integer mode.  */
40502 
40503 static bool
40504 ix86_tieable_integer_mode_p (machine_mode mode)
40505 {
40506   switch (mode)
40507     {
40508     case E_HImode:
40509     case E_SImode:
40510       return true;
40511 
40512     case E_QImode:
40513       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
40514 
40515     case E_DImode:
40516       return TARGET_64BIT;
40517 
40518     default:
40519       return false;
40520     }
40521 }
40522 
40523 /* Implement TARGET_MODES_TIEABLE_P.
40524 
40525    Return true if MODE1 is accessible in a register that can hold MODE2
40526    without copying.  That is, all register classes that can hold MODE2
40527    can also hold MODE1.  */
40528 
40529 static bool
40530 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
40531 {
40532   if (mode1 == mode2)
40533     return true;
40534 
40535   if (ix86_tieable_integer_mode_p (mode1)
40536       && ix86_tieable_integer_mode_p (mode2))
40537     return true;
40538 
40539   /* MODE2 being XFmode implies fp stack or general regs, which means we
40540      can tie any smaller floating point modes to it.  Note that we do not
40541      tie this with TFmode.  */
40542   if (mode2 == XFmode)
40543     return mode1 == SFmode || mode1 == DFmode;
40544 
40545   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
40546      that we can tie it with SFmode.  */
40547   if (mode2 == DFmode)
40548     return mode1 == SFmode;
40549 
40550   /* If MODE2 is only appropriate for an SSE register, then tie with
40551      any other mode acceptable to SSE registers.  */
40552   if (GET_MODE_SIZE (mode2) == 32
40553       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40554     return (GET_MODE_SIZE (mode1) == 32
40555 	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40556   if (GET_MODE_SIZE (mode2) == 16
40557       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40558     return (GET_MODE_SIZE (mode1) == 16
40559 	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40560 
40561   /* If MODE2 is appropriate for an MMX register, then tie
40562      with any other mode acceptable to MMX registers.  */
40563   if (GET_MODE_SIZE (mode2) == 8
40564       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
40565     return (GET_MODE_SIZE (mode1) == 8
40566 	    && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
40567 
40568   return false;
40569 }
40570 
40571 /* Return the cost of moving between two registers of mode MODE.  */
40572 
40573 static int
40574 ix86_set_reg_reg_cost (machine_mode mode)
40575 {
40576   unsigned int units = UNITS_PER_WORD;
40577 
40578   switch (GET_MODE_CLASS (mode))
40579     {
40580     default:
40581       break;
40582 
40583     case MODE_CC:
40584       units = GET_MODE_SIZE (CCmode);
40585       break;
40586 
40587     case MODE_FLOAT:
40588       if ((TARGET_SSE && mode == TFmode)
40589 	  || (TARGET_80387 && mode == XFmode)
40590 	  || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
40591 	  || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
40592 	units = GET_MODE_SIZE (mode);
40593       break;
40594 
40595     case MODE_COMPLEX_FLOAT:
40596       if ((TARGET_SSE && mode == TCmode)
40597 	  || (TARGET_80387 && mode == XCmode)
40598 	  || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
40599 	  || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
40600 	units = GET_MODE_SIZE (mode);
40601       break;
40602 
40603     case MODE_VECTOR_INT:
40604     case MODE_VECTOR_FLOAT:
40605       if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
40606 	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
40607 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
40608 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
40609 	  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
40610 	units = GET_MODE_SIZE (mode);
40611     }
40612 
40613   /* Return the cost of moving between two registers of mode MODE,
40614      assuming that the move will be in pieces of at most UNITS bytes.  */
40615   return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
40616 }
40617 
40618 /* Return cost of vector operation in MODE given that scalar version has
40619    COST.  If PARALLEL is true assume that CPU has more than one unit
40620    performing the operation.  */
40621 
40622 static int
40623 ix86_vec_cost (machine_mode mode, int cost, bool parallel)
40624 {
40625   if (!VECTOR_MODE_P (mode))
40626     return cost;
40627 
40628   if (!parallel)
40629     return cost * GET_MODE_NUNITS (mode);
40630   if (GET_MODE_BITSIZE (mode) == 128
40631       && TARGET_SSE_SPLIT_REGS)
40632     return cost * 2;
40633   if (GET_MODE_BITSIZE (mode) > 128
40634       && TARGET_AVX128_OPTIMAL)
40635     return cost * GET_MODE_BITSIZE (mode) / 128;
40636   return cost;
40637 }
40638 
40639 /* Return cost of multiplication in MODE.  */
40640 
40641 static int
40642 ix86_multiplication_cost (const struct processor_costs *cost,
40643 			  enum machine_mode mode)
40644 {
40645   machine_mode inner_mode = mode;
40646   if (VECTOR_MODE_P (mode))
40647     inner_mode = GET_MODE_INNER (mode);
40648 
40649   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40650     return inner_mode == DFmode ? cost->mulsd : cost->mulss;
40651   else if (X87_FLOAT_MODE_P (mode))
40652     return cost->fmul;
40653   else if (FLOAT_MODE_P (mode))
40654     return  ix86_vec_cost (mode,
40655 			   inner_mode == DFmode
40656 			   ? cost->mulsd : cost->mulss, true);
40657   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40658     {
40659       /* vpmullq is used in this case. No emulation is needed.  */
40660       if (TARGET_AVX512DQ)
40661 	return ix86_vec_cost (mode, cost->mulss, true);
40662 
40663       /* V*QImode is emulated with 7-13 insns.  */
40664       if (mode == V16QImode || mode == V32QImode)
40665 	{
40666 	  int extra = 11;
40667 	  if (TARGET_XOP && mode == V16QImode)
40668 	    extra = 5;
40669 	  else if (TARGET_SSSE3)
40670 	    extra = 6;
40671 	  return ix86_vec_cost (mode,
40672 				cost->mulss * 2 + cost->sse_op * extra,
40673 				true);
40674 	}
40675       /* V*DImode is emulated with 5-8 insns.  */
40676       else if (mode == V2DImode || mode == V4DImode)
40677 	{
40678 	  if (TARGET_XOP && mode == V2DImode)
40679 	    return ix86_vec_cost (mode,
40680 				  cost->mulss * 2 + cost->sse_op * 3,
40681 				  true);
40682 	  else
40683 	    return ix86_vec_cost (mode,
40684 				  cost->mulss * 3 + cost->sse_op * 5,
40685 				  true);
40686 	}
40687       /* Without sse4.1, we don't have PMULLD; it's emulated with 7
40688 	 insns, including two PMULUDQ.  */
40689       else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
40690 	return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5,
40691 				true);
40692       else
40693 	return ix86_vec_cost (mode, cost->mulss, true);
40694     }
40695   else
40696     return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
40697 }
40698 
40699 /* Return cost of multiplication in MODE.  */
40700 
40701 static int
40702 ix86_division_cost (const struct processor_costs *cost,
40703 			  enum machine_mode mode)
40704 {
40705   machine_mode inner_mode = mode;
40706   if (VECTOR_MODE_P (mode))
40707     inner_mode = GET_MODE_INNER (mode);
40708 
40709   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40710     return inner_mode == DFmode ? cost->divsd : cost->divss;
40711   else if (X87_FLOAT_MODE_P (mode))
40712     return cost->fdiv;
40713   else if (FLOAT_MODE_P (mode))
40714     return ix86_vec_cost (mode,
40715 			    inner_mode == DFmode ? cost->divsd : cost->divss,
40716 			    true);
40717   else
40718     return cost->divide[MODE_INDEX (mode)];
40719 }
40720 
40721 /* Return cost of shift in MODE.
40722    If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
40723    AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
40724    if op1 is a result of subreg.
40725 
40726    SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored.  */
40727 
40728 static int
40729 ix86_shift_rotate_cost (const struct processor_costs *cost,
40730 			enum machine_mode mode, bool constant_op1,
40731 			HOST_WIDE_INT op1_val,
40732 			bool speed,
40733 			bool and_in_op1,
40734 			bool shift_and_truncate,
40735 			bool *skip_op0, bool *skip_op1)
40736 {
40737   if (skip_op0)
40738     *skip_op0 = *skip_op1 = false;
40739   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40740     {
40741       /* V*QImode is emulated with 1-11 insns.  */
40742       if (mode == V16QImode || mode == V32QImode)
40743 	{
40744 	  int count = 11;
40745 	  if (TARGET_XOP && mode == V16QImode)
40746 	    {
40747 	      /* For XOP we use vpshab, which requires a broadcast of the
40748 		 value to the variable shift insn.  For constants this
40749 		 means a V16Q const in mem; even when we can perform the
40750 		 shift with one insn set the cost to prefer paddb.  */
40751 	      if (constant_op1)
40752 		{
40753 		  if (skip_op1)
40754 		    *skip_op1 = true;
40755 		  return ix86_vec_cost (mode,
40756 			    cost->sse_op
40757 			    + (speed
40758 			       ? 2
40759 			       : COSTS_N_BYTES
40760 				 (GET_MODE_UNIT_SIZE (mode))), true);
40761 		}
40762 	      count = 3;
40763 	    }
40764 	  else if (TARGET_SSSE3)
40765 	    count = 7;
40766 	  return ix86_vec_cost (mode, cost->sse_op * count, true);
40767 	}
40768       else
40769 	return ix86_vec_cost (mode, cost->sse_op, true);
40770     }
40771   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40772     {
40773       if (constant_op1)
40774 	{
40775 	  if (op1_val > 32)
40776 	    return cost->shift_const + COSTS_N_INSNS (2);
40777 	  else
40778 	    return cost->shift_const * 2;
40779 	}
40780       else
40781 	{
40782 	  if (and_in_op1)
40783 	    return cost->shift_var * 2;
40784 	  else
40785 	    return cost->shift_var * 6 + COSTS_N_INSNS (2);
40786 	}
40787     }
40788   else
40789     {
40790       if (constant_op1)
40791 	return cost->shift_const;
40792       else if (shift_and_truncate)
40793 	{
40794 	  if (skip_op0)
40795 	    *skip_op0 = *skip_op1 = true;
40796 	  /* Return the cost after shift-and truncation.  */
40797 	  return cost->shift_var;
40798 	}
40799       else
40800 	return cost->shift_var;
40801     }
40802   return cost->shift_const;
40803 }
40804 
40805 /* Compute a (partial) cost for rtx X.  Return true if the complete
40806    cost has been computed, and false if subexpressions should be
40807    scanned.  In either case, *TOTAL contains the cost result.  */
40808 
40809 static bool
40810 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
40811 		int *total, bool speed)
40812 {
40813   rtx mask;
40814   enum rtx_code code = GET_CODE (x);
40815   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
40816   const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
40817   int src_cost;
40818 
40819   switch (code)
40820     {
40821     case SET:
40822       if (register_operand (SET_DEST (x), VOIDmode)
40823 	  && reg_or_0_operand (SET_SRC (x), VOIDmode))
40824 	{
40825 	  *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
40826 	  return true;
40827 	}
40828 
40829       if (register_operand (SET_SRC (x), VOIDmode))
40830 	/* Avoid potentially incorrect high cost from rtx_costs
40831 	   for non-tieable SUBREGs.  */
40832 	src_cost = 0;
40833       else
40834 	{
40835 	  src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
40836 
40837 	  if (CONSTANT_P (SET_SRC (x)))
40838 	    /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
40839 	       a small value, possibly zero for cheap constants.  */
40840 	    src_cost += COSTS_N_INSNS (1);
40841 	}
40842 
40843       *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
40844       return true;
40845 
40846     case CONST_INT:
40847     case CONST:
40848     case LABEL_REF:
40849     case SYMBOL_REF:
40850       if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
40851 	*total = 3;
40852       else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
40853 	*total = 2;
40854       else if (flag_pic && SYMBOLIC_CONST (x)
40855 	       && !(TARGET_64BIT
40856 		    && (GET_CODE (x) == LABEL_REF
40857 			|| (GET_CODE (x) == SYMBOL_REF
40858 			    && SYMBOL_REF_LOCAL_P (x))))
40859 	       /* Use 0 cost for CONST to improve its propagation.  */
40860 	       && (TARGET_64BIT || GET_CODE (x) != CONST))
40861 	*total = 1;
40862       else
40863 	*total = 0;
40864       return true;
40865 
40866     case CONST_DOUBLE:
40867       if (IS_STACK_MODE (mode))
40868 	switch (standard_80387_constant_p (x))
40869 	  {
40870 	  case -1:
40871 	  case 0:
40872 	    break;
40873 	  case 1: /* 0.0 */
40874 	    *total = 1;
40875 	    return true;
40876 	  default: /* Other constants */
40877 	    *total = 2;
40878 	    return true;
40879 	  }
40880       /* FALLTHRU */
40881 
40882     case CONST_VECTOR:
40883       switch (standard_sse_constant_p (x, mode))
40884 	{
40885 	case 0:
40886 	  break;
40887 	case 1:  /* 0: xor eliminates false dependency */
40888 	  *total = 0;
40889 	  return true;
40890 	default: /* -1: cmp contains false dependency */
40891 	  *total = 1;
40892 	  return true;
40893 	}
40894       /* FALLTHRU */
40895 
40896     case CONST_WIDE_INT:
40897       /* Fall back to (MEM (SYMBOL_REF)), since that's where
40898 	 it'll probably end up.  Add a penalty for size.  */
40899       *total = (COSTS_N_INSNS (1)
40900 		+ (!TARGET_64BIT && flag_pic)
40901 		+ (GET_MODE_SIZE (mode) <= 4
40902 		   ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
40903       return true;
40904 
40905     case ZERO_EXTEND:
40906       /* The zero extensions is often completely free on x86_64, so make
40907 	 it as cheap as possible.  */
40908       if (TARGET_64BIT && mode == DImode
40909 	  && GET_MODE (XEXP (x, 0)) == SImode)
40910 	*total = 1;
40911       else if (TARGET_ZERO_EXTEND_WITH_AND)
40912 	*total = cost->add;
40913       else
40914 	*total = cost->movzx;
40915       return false;
40916 
40917     case SIGN_EXTEND:
40918       *total = cost->movsx;
40919       return false;
40920 
40921     case ASHIFT:
40922       if (SCALAR_INT_MODE_P (mode)
40923 	  && GET_MODE_SIZE (mode) < UNITS_PER_WORD
40924 	  && CONST_INT_P (XEXP (x, 1)))
40925 	{
40926 	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40927 	  if (value == 1)
40928 	    {
40929 	      *total = cost->add;
40930 	      return false;
40931 	    }
40932 	  if ((value == 2 || value == 3)
40933 	      && cost->lea <= cost->shift_const)
40934 	    {
40935 	      *total = cost->lea;
40936 	      return false;
40937 	    }
40938 	}
40939       /* FALLTHRU */
40940 
40941     case ROTATE:
40942     case ASHIFTRT:
40943     case LSHIFTRT:
40944     case ROTATERT:
40945       bool skip_op0, skip_op1;
40946       *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
40947 				       CONST_INT_P (XEXP (x, 1))
40948 					 ? INTVAL (XEXP (x, 1)) : -1,
40949 				       speed,
40950 				       GET_CODE (XEXP (x, 1)) == AND,
40951 				       SUBREG_P (XEXP (x, 1))
40952 				       && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
40953 				       &skip_op0, &skip_op1);
40954       if (skip_op0 || skip_op1)
40955 	{
40956 	  if (!skip_op0)
40957 	    *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
40958 	  if (!skip_op1)
40959 	    *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
40960 	  return true;
40961 	}
40962       return false;
40963 
40964     case FMA:
40965       {
40966 	rtx sub;
40967 
40968         gcc_assert (FLOAT_MODE_P (mode));
40969         gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
40970 
40971         *total = ix86_vec_cost (mode,
40972 				mode == SFmode ? cost->fmass : cost->fmasd,
40973 				true);
40974 	*total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
40975 
40976         /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
40977 	sub = XEXP (x, 0);
40978 	if (GET_CODE (sub) == NEG)
40979 	  sub = XEXP (sub, 0);
40980 	*total += rtx_cost (sub, mode, FMA, 0, speed);
40981 
40982 	sub = XEXP (x, 2);
40983 	if (GET_CODE (sub) == NEG)
40984 	  sub = XEXP (sub, 0);
40985 	*total += rtx_cost (sub, mode, FMA, 2, speed);
40986 	return true;
40987       }
40988 
40989     case MULT:
40990       if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
40991 	{
40992 	  rtx op0 = XEXP (x, 0);
40993 	  rtx op1 = XEXP (x, 1);
40994 	  int nbits;
40995 	  if (CONST_INT_P (XEXP (x, 1)))
40996 	    {
40997 	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40998 	      for (nbits = 0; value != 0; value &= value - 1)
40999 	        nbits++;
41000 	    }
41001 	  else
41002 	    /* This is arbitrary.  */
41003 	    nbits = 7;
41004 
41005 	  /* Compute costs correctly for widening multiplication.  */
41006 	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
41007 	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
41008 	         == GET_MODE_SIZE (mode))
41009 	    {
41010 	      int is_mulwiden = 0;
41011 	      machine_mode inner_mode = GET_MODE (op0);
41012 
41013 	      if (GET_CODE (op0) == GET_CODE (op1))
41014 		is_mulwiden = 1, op1 = XEXP (op1, 0);
41015 	      else if (CONST_INT_P (op1))
41016 		{
41017 		  if (GET_CODE (op0) == SIGN_EXTEND)
41018 		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
41019 			          == INTVAL (op1);
41020 		  else
41021 		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
41022 	        }
41023 
41024 	      if (is_mulwiden)
41025 	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
41026 	    }
41027 
41028   	  *total = (cost->mult_init[MODE_INDEX (mode)]
41029 		    + nbits * cost->mult_bit
41030 	            + rtx_cost (op0, mode, outer_code, opno, speed)
41031 		    + rtx_cost (op1, mode, outer_code, opno, speed));
41032 
41033           return true;
41034 	}
41035       *total = ix86_multiplication_cost (cost, mode);
41036       return false;
41037 
41038     case DIV:
41039     case UDIV:
41040     case MOD:
41041     case UMOD:
41042       *total = ix86_division_cost (cost, mode);
41043       return false;
41044 
41045     case PLUS:
41046       if (GET_MODE_CLASS (mode) == MODE_INT
41047 	  && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
41048 	{
41049 	  if (GET_CODE (XEXP (x, 0)) == PLUS
41050 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
41051 	      && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
41052 	      && CONSTANT_P (XEXP (x, 1)))
41053 	    {
41054 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
41055 	      if (val == 2 || val == 4 || val == 8)
41056 		{
41057 		  *total = cost->lea;
41058 		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
41059 				      outer_code, opno, speed);
41060 		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
41061 				      outer_code, opno, speed);
41062 		  *total += rtx_cost (XEXP (x, 1), mode,
41063 				      outer_code, opno, speed);
41064 		  return true;
41065 		}
41066 	    }
41067 	  else if (GET_CODE (XEXP (x, 0)) == MULT
41068 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
41069 	    {
41070 	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
41071 	      if (val == 2 || val == 4 || val == 8)
41072 		{
41073 		  *total = cost->lea;
41074 		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
41075 				      outer_code, opno, speed);
41076 		  *total += rtx_cost (XEXP (x, 1), mode,
41077 				      outer_code, opno, speed);
41078 		  return true;
41079 		}
41080 	    }
41081 	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
41082 	    {
41083 	      /* Add with carry, ignore the cost of adding a carry flag.  */
41084 	      if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
41085 		*total = cost->add;
41086 	      else
41087 		{
41088 		  *total = cost->lea;
41089 		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
41090 				      outer_code, opno, speed);
41091 		}
41092 
41093 	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
41094 				  outer_code, opno, speed);
41095 	      *total += rtx_cost (XEXP (x, 1), mode,
41096 				  outer_code, opno, speed);
41097 	      return true;
41098 	    }
41099 	}
41100       /* FALLTHRU */
41101 
41102     case MINUS:
41103       /* Subtract with borrow, ignore the cost of subtracting a carry flag.  */
41104       if (GET_MODE_CLASS (mode) == MODE_INT
41105 	  && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
41106 	  && GET_CODE (XEXP (x, 0)) == MINUS
41107 	  && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
41108 	{
41109 	  *total = cost->add;
41110 	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
41111 			      outer_code, opno, speed);
41112 	  *total += rtx_cost (XEXP (x, 1), mode,
41113 			      outer_code, opno, speed);
41114 	  return true;
41115 	}
41116 
41117       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41118 	{
41119 	  *total = cost->addss;
41120 	  return false;
41121 	}
41122       else if (X87_FLOAT_MODE_P (mode))
41123 	{
41124 	  *total = cost->fadd;
41125 	  return false;
41126 	}
41127       else if (FLOAT_MODE_P (mode))
41128 	{
41129 	  *total = ix86_vec_cost (mode, cost->addss, true);
41130 	  return false;
41131 	}
41132       /* FALLTHRU */
41133 
41134     case AND:
41135     case IOR:
41136     case XOR:
41137       if (GET_MODE_CLASS (mode) == MODE_INT
41138 	  && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41139 	{
41140 	  *total = (cost->add * 2
41141 		    + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
41142 		       << (GET_MODE (XEXP (x, 0)) != DImode))
41143 		    + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
41144 	               << (GET_MODE (XEXP (x, 1)) != DImode)));
41145 	  return true;
41146 	}
41147       /* FALLTHRU */
41148 
41149     case NEG:
41150       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41151 	{
41152 	  *total = cost->sse_op;
41153 	  return false;
41154 	}
41155       else if (X87_FLOAT_MODE_P (mode))
41156 	{
41157 	  *total = cost->fchs;
41158 	  return false;
41159 	}
41160       else if (FLOAT_MODE_P (mode))
41161 	{
41162 	  *total = ix86_vec_cost (mode, cost->sse_op, true);
41163 	  return false;
41164 	}
41165       /* FALLTHRU */
41166 
41167     case NOT:
41168       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41169 	*total = ix86_vec_cost (mode, cost->sse_op, true);
41170       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41171 	*total = cost->add * 2;
41172       else
41173 	*total = cost->add;
41174       return false;
41175 
41176     case COMPARE:
41177       if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
41178 	  && XEXP (XEXP (x, 0), 1) == const1_rtx
41179 	  && CONST_INT_P (XEXP (XEXP (x, 0), 2))
41180 	  && XEXP (x, 1) == const0_rtx)
41181 	{
41182 	  /* This kind of construct is implemented using test[bwl].
41183 	     Treat it as if we had an AND.  */
41184 	  mode = GET_MODE (XEXP (XEXP (x, 0), 0));
41185 	  *total = (cost->add
41186 		    + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
41187 				opno, speed)
41188 		    + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
41189 	  return true;
41190 	}
41191 
41192       /* The embedded comparison operand is completely free.  */
41193       if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
41194 	  && XEXP (x, 1) == const0_rtx)
41195 	*total = 0;
41196 
41197       return false;
41198 
41199     case FLOAT_EXTEND:
41200       if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
41201 	*total = 0;
41202       else
41203         *total = ix86_vec_cost (mode, cost->addss, true);
41204       return false;
41205 
41206     case FLOAT_TRUNCATE:
41207       if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
41208 	*total = cost->fadd;
41209       else
41210         *total = ix86_vec_cost (mode, cost->addss, true);
41211       return false;
41212 
41213     case ABS:
41214       /* SSE requires memory load for the constant operand. It may make
41215 	 sense to account for this.  Of course the constant operand may or
41216 	 may not be reused. */
41217       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41218 	*total = cost->sse_op;
41219       else if (X87_FLOAT_MODE_P (mode))
41220 	*total = cost->fabs;
41221       else if (FLOAT_MODE_P (mode))
41222 	*total = ix86_vec_cost (mode, cost->sse_op, true);
41223       return false;
41224 
41225     case SQRT:
41226       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41227 	*total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
41228       else if (X87_FLOAT_MODE_P (mode))
41229 	*total = cost->fsqrt;
41230       else if (FLOAT_MODE_P (mode))
41231 	*total = ix86_vec_cost (mode,
41232 				mode == SFmode ? cost->sqrtss : cost->sqrtsd,
41233 				true);
41234       return false;
41235 
41236     case UNSPEC:
41237       if (XINT (x, 1) == UNSPEC_TP)
41238 	*total = 0;
41239       return false;
41240 
41241     case VEC_SELECT:
41242     case VEC_CONCAT:
41243     case VEC_DUPLICATE:
41244       /* ??? Assume all of these vector manipulation patterns are
41245 	 recognizable.  In which case they all pretty much have the
41246 	 same cost.  */
41247      *total = cost->sse_op;
41248      return true;
41249     case VEC_MERGE:
41250       mask = XEXP (x, 2);
41251       /* This is masked instruction, assume the same cost,
41252 	 as nonmasked variant.  */
41253       if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
41254 	*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
41255       else
41256 	*total = cost->sse_op;
41257       return true;
41258 
41259     default:
41260       return false;
41261     }
41262 }
41263 
41264 #if TARGET_MACHO
41265 
41266 static int current_machopic_label_num;
41267 
41268 /* Given a symbol name and its associated stub, write out the
41269    definition of the stub.  */
41270 
41271 void
41272 machopic_output_stub (FILE *file, const char *symb, const char *stub)
41273 {
41274   unsigned int length;
41275   char *binder_name, *symbol_name, lazy_ptr_name[32];
41276   int label = ++current_machopic_label_num;
41277 
41278   /* For 64-bit we shouldn't get here.  */
41279   gcc_assert (!TARGET_64BIT);
41280 
41281   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
41282   symb = targetm.strip_name_encoding (symb);
41283 
41284   length = strlen (stub);
41285   binder_name = XALLOCAVEC (char, length + 32);
41286   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
41287 
41288   length = strlen (symb);
41289   symbol_name = XALLOCAVEC (char, length + 32);
41290   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
41291 
41292   sprintf (lazy_ptr_name, "L%d$lz", label);
41293 
41294   if (MACHOPIC_ATT_STUB)
41295     switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
41296   else if (MACHOPIC_PURE)
41297     switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
41298   else
41299     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
41300 
41301   fprintf (file, "%s:\n", stub);
41302   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41303 
41304   if (MACHOPIC_ATT_STUB)
41305     {
41306       fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
41307     }
41308   else if (MACHOPIC_PURE)
41309     {
41310       /* PIC stub.  */
41311       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
41312       rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
41313       output_set_got (tmp, NULL_RTX);	/* "CALL ___<cpu>.get_pc_thunk.cx".  */
41314       fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
41315 	       label, lazy_ptr_name, label);
41316       fprintf (file, "\tjmp\t*%%ecx\n");
41317     }
41318   else
41319     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
41320 
41321   /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
41322      it needs no stub-binding-helper.  */
41323   if (MACHOPIC_ATT_STUB)
41324     return;
41325 
41326   fprintf (file, "%s:\n", binder_name);
41327 
41328   if (MACHOPIC_PURE)
41329     {
41330       fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
41331       fprintf (file, "\tpushl\t%%ecx\n");
41332     }
41333   else
41334     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
41335 
41336   fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
41337 
41338   /* N.B. Keep the correspondence of these
41339      'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
41340      old-pic/new-pic/non-pic stubs; altering this will break
41341      compatibility with existing dylibs.  */
41342   if (MACHOPIC_PURE)
41343     {
41344       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
41345       switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
41346     }
41347   else
41348     /* 16-byte -mdynamic-no-pic stub.  */
41349     switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
41350 
41351   fprintf (file, "%s:\n", lazy_ptr_name);
41352   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41353   fprintf (file, ASM_LONG "%s\n", binder_name);
41354 }
41355 #endif /* TARGET_MACHO */
41356 
41357 /* Order the registers for register allocator.  */
41358 
41359 void
41360 x86_order_regs_for_local_alloc (void)
41361 {
41362    int pos = 0;
41363    int i;
41364 
41365    /* First allocate the local general purpose registers.  */
41366    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41367      if (GENERAL_REGNO_P (i) && call_used_regs[i])
41368 	reg_alloc_order [pos++] = i;
41369 
41370    /* Global general purpose registers.  */
41371    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41372      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
41373 	reg_alloc_order [pos++] = i;
41374 
41375    /* x87 registers come first in case we are doing FP math
41376       using them.  */
41377    if (!TARGET_SSE_MATH)
41378      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41379        reg_alloc_order [pos++] = i;
41380 
41381    /* SSE registers.  */
41382    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
41383      reg_alloc_order [pos++] = i;
41384    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
41385      reg_alloc_order [pos++] = i;
41386 
41387    /* Extended REX SSE registers.  */
41388    for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
41389      reg_alloc_order [pos++] = i;
41390 
41391    /* Mask register.  */
41392    for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
41393      reg_alloc_order [pos++] = i;
41394 
41395    /* MPX bound registers.  */
41396    for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
41397      reg_alloc_order [pos++] = i;
41398 
41399    /* x87 registers.  */
41400    if (TARGET_SSE_MATH)
41401      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41402        reg_alloc_order [pos++] = i;
41403 
41404    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
41405      reg_alloc_order [pos++] = i;
41406 
41407    /* Initialize the rest of array as we do not allocate some registers
41408       at all.  */
41409    while (pos < FIRST_PSEUDO_REGISTER)
41410      reg_alloc_order [pos++] = 0;
41411 }
41412 
41413 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
41414    in struct attribute_spec handler.  */
41415 static tree
41416 ix86_handle_callee_pop_aggregate_return (tree *node, tree name, tree args, int,
41417 					 bool *no_add_attrs)
41418 {
41419   if (TREE_CODE (*node) != FUNCTION_TYPE
41420       && TREE_CODE (*node) != METHOD_TYPE
41421       && TREE_CODE (*node) != FIELD_DECL
41422       && TREE_CODE (*node) != TYPE_DECL)
41423     {
41424       warning (OPT_Wattributes, "%qE attribute only applies to functions",
41425 	       name);
41426       *no_add_attrs = true;
41427       return NULL_TREE;
41428     }
41429   if (TARGET_64BIT)
41430     {
41431       warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
41432 	       name);
41433       *no_add_attrs = true;
41434       return NULL_TREE;
41435     }
41436   if (is_attribute_p ("callee_pop_aggregate_return", name))
41437     {
41438       tree cst;
41439 
41440       cst = TREE_VALUE (args);
41441       if (TREE_CODE (cst) != INTEGER_CST)
41442 	{
41443 	  warning (OPT_Wattributes,
41444 		   "%qE attribute requires an integer constant argument",
41445 		   name);
41446 	  *no_add_attrs = true;
41447 	}
41448       else if (compare_tree_int (cst, 0) != 0
41449 	       && compare_tree_int (cst, 1) != 0)
41450 	{
41451 	  warning (OPT_Wattributes,
41452 		   "argument to %qE attribute is neither zero, nor one",
41453 		   name);
41454 	  *no_add_attrs = true;
41455 	}
41456 
41457       return NULL_TREE;
41458     }
41459 
41460   return NULL_TREE;
41461 }
41462 
41463 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
41464    struct attribute_spec.handler.  */
41465 static tree
41466 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
41467 			   bool *no_add_attrs)
41468 {
41469   if (TREE_CODE (*node) != FUNCTION_TYPE
41470       && TREE_CODE (*node) != METHOD_TYPE
41471       && TREE_CODE (*node) != FIELD_DECL
41472       && TREE_CODE (*node) != TYPE_DECL)
41473     {
41474       warning (OPT_Wattributes, "%qE attribute only applies to functions",
41475 	       name);
41476       *no_add_attrs = true;
41477       return NULL_TREE;
41478     }
41479 
41480   /* Can combine regparm with all attributes but fastcall.  */
41481   if (is_attribute_p ("ms_abi", name))
41482     {
41483       if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
41484         {
41485 	  error ("ms_abi and sysv_abi attributes are not compatible");
41486 	}
41487 
41488       return NULL_TREE;
41489     }
41490   else if (is_attribute_p ("sysv_abi", name))
41491     {
41492       if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
41493         {
41494 	  error ("ms_abi and sysv_abi attributes are not compatible");
41495 	}
41496 
41497       return NULL_TREE;
41498     }
41499 
41500   return NULL_TREE;
41501 }
41502 
41503 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
41504    struct attribute_spec.handler.  */
41505 static tree
41506 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
41507 			      bool *no_add_attrs)
41508 {
41509   tree *type = NULL;
41510   if (DECL_P (*node))
41511     {
41512       if (TREE_CODE (*node) == TYPE_DECL)
41513 	type = &TREE_TYPE (*node);
41514     }
41515   else
41516     type = node;
41517 
41518   if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
41519     {
41520       warning (OPT_Wattributes, "%qE attribute ignored",
41521 	       name);
41522       *no_add_attrs = true;
41523     }
41524 
41525   else if ((is_attribute_p ("ms_struct", name)
41526 	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
41527 	   || ((is_attribute_p ("gcc_struct", name)
41528 		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
41529     {
41530       warning (OPT_Wattributes, "%qE incompatible attribute ignored",
41531                name);
41532       *no_add_attrs = true;
41533     }
41534 
41535   return NULL_TREE;
41536 }
41537 
41538 static tree
41539 ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int,
41540 			      bool *no_add_attrs)
41541 {
41542   if (TREE_CODE (*node) != FUNCTION_DECL)
41543     {
41544       warning (OPT_Wattributes, "%qE attribute only applies to functions",
41545                name);
41546       *no_add_attrs = true;
41547     }
41548 
41549   if (is_attribute_p ("indirect_branch", name))
41550     {
41551       tree cst = TREE_VALUE (args);
41552       if (TREE_CODE (cst) != STRING_CST)
41553 	{
41554 	  warning (OPT_Wattributes,
41555 		   "%qE attribute requires a string constant argument",
41556 		   name);
41557 	  *no_add_attrs = true;
41558 	}
41559       else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
41560 	       && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
41561 	       && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
41562 	       && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
41563 	{
41564 	  warning (OPT_Wattributes,
41565 		   "argument to %qE attribute is not "
41566 		   "(keep|thunk|thunk-inline|thunk-extern)", name);
41567 	  *no_add_attrs = true;
41568 	}
41569     }
41570 
41571   if (is_attribute_p ("function_return", name))
41572     {
41573       tree cst = TREE_VALUE (args);
41574       if (TREE_CODE (cst) != STRING_CST)
41575 	{
41576 	  warning (OPT_Wattributes,
41577 		   "%qE attribute requires a string constant argument",
41578 		   name);
41579 	  *no_add_attrs = true;
41580 	}
41581       else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
41582 	       && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
41583 	       && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
41584 	       && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
41585 	{
41586 	  warning (OPT_Wattributes,
41587 		   "argument to %qE attribute is not "
41588 		   "(keep|thunk|thunk-inline|thunk-extern)", name);
41589 	  *no_add_attrs = true;
41590 	}
41591     }
41592 
41593   return NULL_TREE;
41594 }
41595 
41596 static tree
41597 ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree,
41598 						 int, bool *)
41599 {
41600   return NULL_TREE;
41601 }
41602 
41603 static tree
41604 ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *)
41605 {
41606   /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
41607      but the function type contains args and return type data.  */
41608   tree func_type = *node;
41609   tree return_type = TREE_TYPE (func_type);
41610 
41611   int nargs = 0;
41612   tree current_arg_type = TYPE_ARG_TYPES (func_type);
41613   while (current_arg_type
41614 	 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type)))
41615     {
41616       if (nargs == 0)
41617 	{
41618 	  if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type)))
41619 	    error ("interrupt service routine should have a pointer "
41620 		   "as the first argument");
41621 	}
41622       else if (nargs == 1)
41623 	{
41624 	  if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE
41625 	      || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode)
41626 	    error ("interrupt service routine should have unsigned %s"
41627 		   "int as the second argument",
41628 		   TARGET_64BIT
41629 		   ? (TARGET_X32 ? "long long " : "long ")
41630 		   : "");
41631 	}
41632       nargs++;
41633       current_arg_type = TREE_CHAIN (current_arg_type);
41634     }
41635   if (!nargs || nargs > 2)
41636     error ("interrupt service routine can only have a pointer argument "
41637 	   "and an optional integer argument");
41638   if (! VOID_TYPE_P (return_type))
41639     error ("interrupt service routine can't have non-void return value");
41640 
41641   return NULL_TREE;
41642 }
41643 
41644 static bool
41645 ix86_ms_bitfield_layout_p (const_tree record_type)
41646 {
41647   return ((TARGET_MS_BITFIELD_LAYOUT
41648 	   && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
41649           || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
41650 }
41651 
41652 /* Returns an expression indicating where the this parameter is
41653    located on entry to the FUNCTION.  */
41654 
41655 static rtx
41656 x86_this_parameter (tree function)
41657 {
41658   tree type = TREE_TYPE (function);
41659   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
41660   int nregs;
41661 
41662   if (TARGET_64BIT)
41663     {
41664       const int *parm_regs;
41665 
41666       if (ix86_function_type_abi (type) == MS_ABI)
41667         parm_regs = x86_64_ms_abi_int_parameter_registers;
41668       else
41669         parm_regs = x86_64_int_parameter_registers;
41670       return gen_rtx_REG (Pmode, parm_regs[aggr]);
41671     }
41672 
41673   nregs = ix86_function_regparm (type, function);
41674 
41675   if (nregs > 0 && !stdarg_p (type))
41676     {
41677       int regno;
41678       unsigned int ccvt = ix86_get_callcvt (type);
41679 
41680       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41681 	regno = aggr ? DX_REG : CX_REG;
41682       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41683         {
41684 	  regno = CX_REG;
41685 	  if (aggr)
41686 	    return gen_rtx_MEM (SImode,
41687 				plus_constant (Pmode, stack_pointer_rtx, 4));
41688 	}
41689       else
41690         {
41691 	  regno = AX_REG;
41692 	  if (aggr)
41693 	    {
41694 	      regno = DX_REG;
41695 	      if (nregs == 1)
41696 		return gen_rtx_MEM (SImode,
41697 				    plus_constant (Pmode,
41698 						   stack_pointer_rtx, 4));
41699 	    }
41700 	}
41701       return gen_rtx_REG (SImode, regno);
41702     }
41703 
41704   return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
41705 					     aggr ? 8 : 4));
41706 }
41707 
41708 /* Determine whether x86_output_mi_thunk can succeed.  */
41709 
41710 static bool
41711 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
41712 			 const_tree function)
41713 {
41714   /* 64-bit can handle anything.  */
41715   if (TARGET_64BIT)
41716     return true;
41717 
41718   /* For 32-bit, everything's fine if we have one free register.  */
41719   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
41720     return true;
41721 
41722   /* Need a free register for vcall_offset.  */
41723   if (vcall_offset)
41724     return false;
41725 
41726   /* Need a free register for GOT references.  */
41727   if (flag_pic && !targetm.binds_local_p (function))
41728     return false;
41729 
41730   /* Otherwise ok.  */
41731   return true;
41732 }
41733 
41734 /* Output the assembler code for a thunk function.  THUNK_DECL is the
41735    declaration for the thunk function itself, FUNCTION is the decl for
41736    the target function.  DELTA is an immediate constant offset to be
41737    added to THIS.  If VCALL_OFFSET is nonzero, the word at
41738    *(*this + vcall_offset) should be added to THIS.  */
41739 
41740 static void
41741 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
41742 		     HOST_WIDE_INT vcall_offset, tree function)
41743 {
41744   rtx this_param = x86_this_parameter (function);
41745   rtx this_reg, tmp, fnaddr;
41746   unsigned int tmp_regno;
41747   rtx_insn *insn;
41748 
41749   if (TARGET_64BIT)
41750     tmp_regno = R10_REG;
41751   else
41752     {
41753       unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
41754       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41755 	tmp_regno = AX_REG;
41756       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41757 	tmp_regno = DX_REG;
41758       else
41759 	tmp_regno = CX_REG;
41760     }
41761 
41762   emit_note (NOTE_INSN_PROLOGUE_END);
41763 
41764   /* CET is enabled, insert EB instruction.  */
41765   if ((flag_cf_protection & CF_BRANCH))
41766     emit_insn (gen_nop_endbr ());
41767 
41768   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
41769      pull it in now and let DELTA benefit.  */
41770   if (REG_P (this_param))
41771     this_reg = this_param;
41772   else if (vcall_offset)
41773     {
41774       /* Put the this parameter into %eax.  */
41775       this_reg = gen_rtx_REG (Pmode, AX_REG);
41776       emit_move_insn (this_reg, this_param);
41777     }
41778   else
41779     this_reg = NULL_RTX;
41780 
41781   /* Adjust the this parameter by a fixed constant.  */
41782   if (delta)
41783     {
41784       rtx delta_rtx = GEN_INT (delta);
41785       rtx delta_dst = this_reg ? this_reg : this_param;
41786 
41787       if (TARGET_64BIT)
41788 	{
41789 	  if (!x86_64_general_operand (delta_rtx, Pmode))
41790 	    {
41791 	      tmp = gen_rtx_REG (Pmode, tmp_regno);
41792 	      emit_move_insn (tmp, delta_rtx);
41793 	      delta_rtx = tmp;
41794 	    }
41795 	}
41796 
41797       ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
41798     }
41799 
41800   /* Adjust the this parameter by a value stored in the vtable.  */
41801   if (vcall_offset)
41802     {
41803       rtx vcall_addr, vcall_mem, this_mem;
41804 
41805       tmp = gen_rtx_REG (Pmode, tmp_regno);
41806 
41807       this_mem = gen_rtx_MEM (ptr_mode, this_reg);
41808       if (Pmode != ptr_mode)
41809 	this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
41810       emit_move_insn (tmp, this_mem);
41811 
41812       /* Adjust the this parameter.  */
41813       vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
41814       if (TARGET_64BIT
41815 	  && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
41816 	{
41817 	  rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
41818 	  emit_move_insn (tmp2, GEN_INT (vcall_offset));
41819 	  vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
41820 	}
41821 
41822       vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
41823       if (Pmode != ptr_mode)
41824 	emit_insn (gen_addsi_1_zext (this_reg,
41825 				     gen_rtx_REG (ptr_mode,
41826 						  REGNO (this_reg)),
41827 				     vcall_mem));
41828       else
41829 	ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
41830     }
41831 
41832   /* If necessary, drop THIS back to its stack slot.  */
41833   if (this_reg && this_reg != this_param)
41834     emit_move_insn (this_param, this_reg);
41835 
41836   fnaddr = XEXP (DECL_RTL (function), 0);
41837   if (TARGET_64BIT)
41838     {
41839       if (!flag_pic || targetm.binds_local_p (function)
41840 	  || TARGET_PECOFF)
41841 	;
41842       else
41843 	{
41844 	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
41845 	  tmp = gen_rtx_CONST (Pmode, tmp);
41846 	  fnaddr = gen_const_mem (Pmode, tmp);
41847 	}
41848     }
41849   else
41850     {
41851       if (!flag_pic || targetm.binds_local_p (function))
41852 	;
41853 #if TARGET_MACHO
41854       else if (TARGET_MACHO)
41855 	{
41856 	  fnaddr = machopic_indirect_call_target (DECL_RTL (function));
41857 	  fnaddr = XEXP (fnaddr, 0);
41858 	}
41859 #endif /* TARGET_MACHO */
41860       else
41861 	{
41862 	  tmp = gen_rtx_REG (Pmode, CX_REG);
41863 	  output_set_got (tmp, NULL_RTX);
41864 
41865 	  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
41866 	  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
41867 	  fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
41868 	  fnaddr = gen_const_mem (Pmode, fnaddr);
41869 	}
41870     }
41871 
41872   /* Our sibling call patterns do not allow memories, because we have no
41873      predicate that can distinguish between frame and non-frame memory.
41874      For our purposes here, we can get away with (ab)using a jump pattern,
41875      because we're going to do no optimization.  */
41876   if (MEM_P (fnaddr))
41877     {
41878       if (sibcall_insn_operand (fnaddr, word_mode))
41879 	{
41880 	  fnaddr = XEXP (DECL_RTL (function), 0);
41881 	  tmp = gen_rtx_MEM (QImode, fnaddr);
41882 	  tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41883 	  tmp = emit_call_insn (tmp);
41884 	  SIBLING_CALL_P (tmp) = 1;
41885 	}
41886       else
41887 	emit_jump_insn (gen_indirect_jump (fnaddr));
41888     }
41889   else
41890     {
41891       if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
41892 	{
41893 	  // CM_LARGE_PIC always uses pseudo PIC register which is
41894 	  // uninitialized.  Since FUNCTION is local and calling it
41895 	  // doesn't go through PLT, we use scratch register %r11 as
41896 	  // PIC register and initialize it here.
41897 	  pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
41898 	  ix86_init_large_pic_reg (tmp_regno);
41899 	  fnaddr = legitimize_pic_address (fnaddr,
41900 					   gen_rtx_REG (Pmode, tmp_regno));
41901 	}
41902 
41903       if (!sibcall_insn_operand (fnaddr, word_mode))
41904 	{
41905 	  tmp = gen_rtx_REG (word_mode, tmp_regno);
41906 	  if (GET_MODE (fnaddr) != word_mode)
41907 	    fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
41908 	  emit_move_insn (tmp, fnaddr);
41909 	  fnaddr = tmp;
41910 	}
41911 
41912       tmp = gen_rtx_MEM (QImode, fnaddr);
41913       tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41914       tmp = emit_call_insn (tmp);
41915       SIBLING_CALL_P (tmp) = 1;
41916     }
41917   emit_barrier ();
41918 
41919   /* Emit just enough of rest_of_compilation to get the insns emitted.
41920      Note that use_thunk calls assemble_start_function et al.  */
41921   insn = get_insns ();
41922   shorten_branches (insn);
41923   final_start_function (insn, file, 1);
41924   final (insn, file, 1);
41925   final_end_function ();
41926 }
41927 
41928 static void
41929 x86_file_start (void)
41930 {
41931   default_file_start ();
41932   if (TARGET_16BIT)
41933     fputs ("\t.code16gcc\n", asm_out_file);
41934 #if TARGET_MACHO
41935   darwin_file_start ();
41936 #endif
41937   if (X86_FILE_START_VERSION_DIRECTIVE)
41938     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
41939   if (X86_FILE_START_FLTUSED)
41940     fputs ("\t.global\t__fltused\n", asm_out_file);
41941   if (ix86_asm_dialect == ASM_INTEL)
41942     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
41943 }
41944 
41945 int
41946 x86_field_alignment (tree type, int computed)
41947 {
41948   machine_mode mode;
41949 
41950   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
41951     return computed;
41952   if (TARGET_IAMCU)
41953     return iamcu_alignment (type, computed);
41954   mode = TYPE_MODE (strip_array_types (type));
41955   if (mode == DFmode || mode == DCmode
41956       || GET_MODE_CLASS (mode) == MODE_INT
41957       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
41958     return MIN (32, computed);
41959   return computed;
41960 }
41961 
41962 /* Print call to TARGET to FILE.  */
41963 
41964 static void
41965 x86_print_call_or_nop (FILE *file, const char *target)
41966 {
41967   if (flag_nop_mcount)
41968     /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
41969     fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
41970   else
41971     fprintf (file, "1:\tcall\t%s\n", target);
41972 }
41973 
41974 /* Output assembler code to FILE to increment profiler label # LABELNO
41975    for profiling a function entry.  */
41976 void
41977 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
41978 {
41979   const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
41980 					 : MCOUNT_NAME);
41981   if (TARGET_64BIT)
41982     {
41983 #ifndef NO_PROFILE_COUNTERS
41984       fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
41985 #endif
41986 
41987       if (!TARGET_PECOFF && flag_pic)
41988 	fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
41989       else
41990 	x86_print_call_or_nop (file, mcount_name);
41991     }
41992   else if (flag_pic)
41993     {
41994 #ifndef NO_PROFILE_COUNTERS
41995       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
41996 	       LPREFIX, labelno);
41997 #endif
41998       fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
41999     }
42000   else
42001     {
42002 #ifndef NO_PROFILE_COUNTERS
42003       fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
42004 	       LPREFIX, labelno);
42005 #endif
42006       x86_print_call_or_nop (file, mcount_name);
42007     }
42008 
42009   if (flag_record_mcount)
42010     {
42011       fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
42012       fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
42013       fprintf (file, "\t.previous\n");
42014     }
42015 }
42016 
42017 /* We don't have exact information about the insn sizes, but we may assume
42018    quite safely that we are informed about all 1 byte insns and memory
42019    address sizes.  This is enough to eliminate unnecessary padding in
42020    99% of cases.  */
42021 
42022 int
42023 ix86_min_insn_size (rtx_insn *insn)
42024 {
42025   int l = 0, len;
42026 
42027   if (!INSN_P (insn) || !active_insn_p (insn))
42028     return 0;
42029 
42030   /* Discard alignments we've emit and jump instructions.  */
42031   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
42032       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
42033     return 0;
42034 
42035   /* Important case - calls are always 5 bytes.
42036      It is common to have many calls in the row.  */
42037   if (CALL_P (insn)
42038       && symbolic_reference_mentioned_p (PATTERN (insn))
42039       && !SIBLING_CALL_P (insn))
42040     return 5;
42041   len = get_attr_length (insn);
42042   if (len <= 1)
42043     return 1;
42044 
42045   /* For normal instructions we rely on get_attr_length being exact,
42046      with a few exceptions.  */
42047   if (!JUMP_P (insn))
42048     {
42049       enum attr_type type = get_attr_type (insn);
42050 
42051       switch (type)
42052 	{
42053 	case TYPE_MULTI:
42054 	  if (GET_CODE (PATTERN (insn)) == ASM_INPUT
42055 	      || asm_noperands (PATTERN (insn)) >= 0)
42056 	    return 0;
42057 	  break;
42058 	case TYPE_OTHER:
42059 	case TYPE_FCMP:
42060 	  break;
42061 	default:
42062 	  /* Otherwise trust get_attr_length.  */
42063 	  return len;
42064 	}
42065 
42066       l = get_attr_length_address (insn);
42067       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
42068 	l = 4;
42069     }
42070   if (l)
42071     return 1+l;
42072   else
42073     return 2;
42074 }
42075 
42076 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42077 
42078 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
42079    window.  */
42080 
42081 static void
42082 ix86_avoid_jump_mispredicts (void)
42083 {
42084   rtx_insn *insn, *start = get_insns ();
42085   int nbytes = 0, njumps = 0;
42086   bool isjump = false;
42087 
42088   /* Look for all minimal intervals of instructions containing 4 jumps.
42089      The intervals are bounded by START and INSN.  NBYTES is the total
42090      size of instructions in the interval including INSN and not including
42091      START.  When the NBYTES is smaller than 16 bytes, it is possible
42092      that the end of START and INSN ends up in the same 16byte page.
42093 
42094      The smallest offset in the page INSN can start is the case where START
42095      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
42096      We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
42097 
42098      Don't consider asm goto as jump, while it can contain a jump, it doesn't
42099      have to, control transfer to label(s) can be performed through other
42100      means, and also we estimate minimum length of all asm stmts as 0.  */
42101   for (insn = start; insn; insn = NEXT_INSN (insn))
42102     {
42103       int min_size;
42104 
42105       if (LABEL_P (insn))
42106 	{
42107 	  int align = label_to_alignment (insn);
42108 	  int max_skip = label_to_max_skip (insn);
42109 
42110 	  if (max_skip > 15)
42111 	    max_skip = 15;
42112 	  /* If align > 3, only up to 16 - max_skip - 1 bytes can be
42113 	     already in the current 16 byte page, because otherwise
42114 	     ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
42115 	     bytes to reach 16 byte boundary.  */
42116 	  if (align <= 0
42117 	      || (align <= 3 && max_skip != (1 << align) - 1))
42118 	    max_skip = 0;
42119 	  if (dump_file)
42120 	    fprintf (dump_file, "Label %i with max_skip %i\n",
42121 		     INSN_UID (insn), max_skip);
42122 	  if (max_skip)
42123 	    {
42124 	      while (nbytes + max_skip >= 16)
42125 		{
42126 		  start = NEXT_INSN (start);
42127 		  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
42128 		      || CALL_P (start))
42129 		    njumps--, isjump = true;
42130 		  else
42131 		    isjump = false;
42132 		  nbytes -= ix86_min_insn_size (start);
42133 		}
42134 	    }
42135 	  continue;
42136 	}
42137 
42138       min_size = ix86_min_insn_size (insn);
42139       nbytes += min_size;
42140       if (dump_file)
42141 	fprintf (dump_file, "Insn %i estimated to %i bytes\n",
42142 		 INSN_UID (insn), min_size);
42143       if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
42144 	  || CALL_P (insn))
42145 	njumps++;
42146       else
42147 	continue;
42148 
42149       while (njumps > 3)
42150 	{
42151 	  start = NEXT_INSN (start);
42152 	  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
42153 	      || CALL_P (start))
42154 	    njumps--, isjump = true;
42155 	  else
42156 	    isjump = false;
42157 	  nbytes -= ix86_min_insn_size (start);
42158 	}
42159       gcc_assert (njumps >= 0);
42160       if (dump_file)
42161         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
42162 		 INSN_UID (start), INSN_UID (insn), nbytes);
42163 
42164       if (njumps == 3 && isjump && nbytes < 16)
42165 	{
42166 	  int padsize = 15 - nbytes + ix86_min_insn_size (insn);
42167 
42168 	  if (dump_file)
42169 	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
42170 		     INSN_UID (insn), padsize);
42171           emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
42172 	}
42173     }
42174 }
42175 #endif
42176 
42177 /* AMD Athlon works faster
42178    when RET is not destination of conditional jump or directly preceded
42179    by other jump instruction.  We avoid the penalty by inserting NOP just
42180    before the RET instructions in such cases.  */
42181 static void
42182 ix86_pad_returns (void)
42183 {
42184   edge e;
42185   edge_iterator ei;
42186 
42187   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42188     {
42189       basic_block bb = e->src;
42190       rtx_insn *ret = BB_END (bb);
42191       rtx_insn *prev;
42192       bool replace = false;
42193 
42194       if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
42195 	  || optimize_bb_for_size_p (bb))
42196 	continue;
42197       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
42198 	if (active_insn_p (prev) || LABEL_P (prev))
42199 	  break;
42200       if (prev && LABEL_P (prev))
42201 	{
42202 	  edge e;
42203 	  edge_iterator ei;
42204 
42205 	  FOR_EACH_EDGE (e, ei, bb->preds)
42206 	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
42207 		&& !(e->flags & EDGE_FALLTHRU))
42208 	      {
42209 		replace = true;
42210 		break;
42211 	      }
42212 	}
42213       if (!replace)
42214 	{
42215 	  prev = prev_active_insn (ret);
42216 	  if (prev
42217 	      && ((JUMP_P (prev) && any_condjump_p (prev))
42218 		  || CALL_P (prev)))
42219 	    replace = true;
42220 	  /* Empty functions get branch mispredict even when
42221 	     the jump destination is not visible to us.  */
42222 	  if (!prev && !optimize_function_for_size_p (cfun))
42223 	    replace = true;
42224 	}
42225       if (replace)
42226 	{
42227 	  emit_jump_insn_before (gen_simple_return_internal_long (), ret);
42228 	  delete_insn (ret);
42229 	}
42230     }
42231 }
42232 
42233 /* Count the minimum number of instructions in BB.  Return 4 if the
42234    number of instructions >= 4.  */
42235 
42236 static int
42237 ix86_count_insn_bb (basic_block bb)
42238 {
42239   rtx_insn *insn;
42240   int insn_count = 0;
42241 
42242   /* Count number of instructions in this block.  Return 4 if the number
42243      of instructions >= 4.  */
42244   FOR_BB_INSNS (bb, insn)
42245     {
42246       /* Only happen in exit blocks.  */
42247       if (JUMP_P (insn)
42248 	  && ANY_RETURN_P (PATTERN (insn)))
42249 	break;
42250 
42251       if (NONDEBUG_INSN_P (insn)
42252 	  && GET_CODE (PATTERN (insn)) != USE
42253 	  && GET_CODE (PATTERN (insn)) != CLOBBER)
42254 	{
42255 	  insn_count++;
42256 	  if (insn_count >= 4)
42257 	    return insn_count;
42258 	}
42259     }
42260 
42261   return insn_count;
42262 }
42263 
42264 
42265 /* Count the minimum number of instructions in code path in BB.
42266    Return 4 if the number of instructions >= 4.  */
42267 
42268 static int
42269 ix86_count_insn (basic_block bb)
42270 {
42271   edge e;
42272   edge_iterator ei;
42273   int min_prev_count;
42274 
42275   /* Only bother counting instructions along paths with no
42276      more than 2 basic blocks between entry and exit.  Given
42277      that BB has an edge to exit, determine if a predecessor
42278      of BB has an edge from entry.  If so, compute the number
42279      of instructions in the predecessor block.  If there
42280      happen to be multiple such blocks, compute the minimum.  */
42281   min_prev_count = 4;
42282   FOR_EACH_EDGE (e, ei, bb->preds)
42283     {
42284       edge prev_e;
42285       edge_iterator prev_ei;
42286 
42287       if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42288 	{
42289 	  min_prev_count = 0;
42290 	  break;
42291 	}
42292       FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
42293 	{
42294 	  if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42295 	    {
42296 	      int count = ix86_count_insn_bb (e->src);
42297 	      if (count < min_prev_count)
42298 		min_prev_count = count;
42299 	      break;
42300 	    }
42301 	}
42302     }
42303 
42304   if (min_prev_count < 4)
42305     min_prev_count += ix86_count_insn_bb (bb);
42306 
42307   return min_prev_count;
42308 }
42309 
42310 /* Pad short function to 4 instructions.   */
42311 
42312 static void
42313 ix86_pad_short_function (void)
42314 {
42315   edge e;
42316   edge_iterator ei;
42317 
42318   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42319     {
42320       rtx_insn *ret = BB_END (e->src);
42321       if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
42322 	{
42323 	  int insn_count = ix86_count_insn (e->src);
42324 
42325 	  /* Pad short function.  */
42326 	  if (insn_count < 4)
42327 	    {
42328 	      rtx_insn *insn = ret;
42329 
42330 	      /* Find epilogue.  */
42331 	      while (insn
42332 		     && (!NOTE_P (insn)
42333 			 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
42334 		insn = PREV_INSN (insn);
42335 
42336 	      if (!insn)
42337 		insn = ret;
42338 
42339 	      /* Two NOPs count as one instruction.  */
42340 	      insn_count = 2 * (4 - insn_count);
42341 	      emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
42342 	    }
42343 	}
42344     }
42345 }
42346 
42347 /* Fix up a Windows system unwinder issue.  If an EH region falls through into
42348    the epilogue, the Windows system unwinder will apply epilogue logic and
42349    produce incorrect offsets.  This can be avoided by adding a nop between
42350    the last insn that can throw and the first insn of the epilogue.  */
42351 
42352 static void
42353 ix86_seh_fixup_eh_fallthru (void)
42354 {
42355   edge e;
42356   edge_iterator ei;
42357 
42358   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42359     {
42360       rtx_insn *insn, *next;
42361 
42362       /* Find the beginning of the epilogue.  */
42363       for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
42364 	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
42365 	  break;
42366       if (insn == NULL)
42367 	continue;
42368 
42369       /* We only care about preceding insns that can throw.  */
42370       insn = prev_active_insn (insn);
42371       if (insn == NULL || !can_throw_internal (insn))
42372 	continue;
42373 
42374       /* Do not separate calls from their debug information.  */
42375       for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
42376 	if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
42377 	  insn = next;
42378 	else
42379 	  break;
42380 
42381       emit_insn_after (gen_nops (const1_rtx), insn);
42382     }
42383 }
42384 
42385 /* Given a register number BASE, the lowest of a group of registers, update
42386    regsets IN and OUT with the registers that should be avoided in input
42387    and output operands respectively when trying to avoid generating a modr/m
42388    byte for -mmitigate-rop.  */
42389 
42390 static void
42391 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
42392 {
42393   SET_HARD_REG_BIT (out, base);
42394   SET_HARD_REG_BIT (out, base + 1);
42395   SET_HARD_REG_BIT (in, base + 2);
42396   SET_HARD_REG_BIT (in, base + 3);
42397 }
42398 
42399 /* Called if -mmitigate-rop is in effect.  Try to rewrite instructions so
42400    that certain encodings of modr/m bytes do not occur.  */
42401 static void
42402 ix86_mitigate_rop (void)
42403 {
42404   HARD_REG_SET input_risky;
42405   HARD_REG_SET output_risky;
42406   HARD_REG_SET inout_risky;
42407 
42408   CLEAR_HARD_REG_SET (output_risky);
42409   CLEAR_HARD_REG_SET (input_risky);
42410   SET_HARD_REG_BIT (output_risky, AX_REG);
42411   SET_HARD_REG_BIT (output_risky, CX_REG);
42412   SET_HARD_REG_BIT (input_risky, BX_REG);
42413   SET_HARD_REG_BIT (input_risky, DX_REG);
42414   set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
42415   set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
42416   set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
42417   set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
42418   set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
42419   set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
42420   COPY_HARD_REG_SET (inout_risky, input_risky);
42421   IOR_HARD_REG_SET (inout_risky, output_risky);
42422 
42423   df_note_add_problem ();
42424   /* Fix up what stack-regs did.  */
42425   df_insn_rescan_all ();
42426   df_analyze ();
42427 
42428   regrename_init (true);
42429   regrename_analyze (NULL);
42430 
42431   auto_vec<du_head_p> cands;
42432 
42433   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
42434     {
42435       if (!NONDEBUG_INSN_P (insn))
42436 	continue;
42437 
42438       if (GET_CODE (PATTERN (insn)) == USE
42439 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
42440 	continue;
42441 
42442       extract_insn (insn);
42443 
42444       int opno0, opno1;
42445       int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
42446 					  recog_data.n_operands, &opno0,
42447 					  &opno1);
42448 
42449       if (!ix86_rop_should_change_byte_p (modrm))
42450 	continue;
42451 
42452       insn_rr_info *info = &insn_rr[INSN_UID (insn)];
42453 
42454       /* This happens when regrename has to fail a block.  */
42455       if (!info->op_info)
42456 	continue;
42457 
42458       if (info->op_info[opno0].n_chains != 0)
42459 	{
42460 	  gcc_assert (info->op_info[opno0].n_chains == 1);
42461 	  du_head_p op0c;
42462 	  op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
42463 	  if (op0c->target_data_1 + op0c->target_data_2 == 0
42464 	      && !op0c->cannot_rename)
42465 	    cands.safe_push (op0c);
42466 
42467 	  op0c->target_data_1++;
42468 	}
42469       if (info->op_info[opno1].n_chains != 0)
42470 	{
42471 	  gcc_assert (info->op_info[opno1].n_chains == 1);
42472 	  du_head_p op1c;
42473 	  op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
42474 	  if (op1c->target_data_1 + op1c->target_data_2 == 0
42475 	      && !op1c->cannot_rename)
42476 	    cands.safe_push (op1c);
42477 
42478 	  op1c->target_data_2++;
42479 	}
42480     }
42481 
42482   int i;
42483   du_head_p head;
42484   FOR_EACH_VEC_ELT (cands, i, head)
42485     {
42486       int old_reg, best_reg;
42487       HARD_REG_SET unavailable;
42488 
42489       CLEAR_HARD_REG_SET (unavailable);
42490       if (head->target_data_1)
42491 	IOR_HARD_REG_SET (unavailable, output_risky);
42492       if (head->target_data_2)
42493 	IOR_HARD_REG_SET (unavailable, input_risky);
42494 
42495       int n_uses;
42496       reg_class superclass = regrename_find_superclass (head, &n_uses,
42497 							&unavailable);
42498       old_reg = head->regno;
42499       best_reg = find_rename_reg (head, superclass, &unavailable,
42500 				  old_reg, false);
42501       bool ok = regrename_do_replace (head, best_reg);
42502       gcc_assert (ok);
42503       if (dump_file)
42504 	fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
42505 		 reg_names[best_reg], reg_class_names[superclass]);
42506 
42507     }
42508 
42509   regrename_finish ();
42510 
42511   df_analyze ();
42512 
42513   basic_block bb;
42514   regset_head live;
42515 
42516   INIT_REG_SET (&live);
42517 
42518   FOR_EACH_BB_FN (bb, cfun)
42519     {
42520       rtx_insn *insn;
42521 
42522       COPY_REG_SET (&live, DF_LR_OUT (bb));
42523       df_simulate_initialize_backwards (bb, &live);
42524 
42525       FOR_BB_INSNS_REVERSE (bb, insn)
42526 	{
42527 	  if (!NONDEBUG_INSN_P (insn))
42528 	    continue;
42529 
42530 	  df_simulate_one_insn_backwards (bb, insn, &live);
42531 
42532 	  if (GET_CODE (PATTERN (insn)) == USE
42533 	      || GET_CODE (PATTERN (insn)) == CLOBBER)
42534 	    continue;
42535 
42536 	  extract_insn (insn);
42537 	  constrain_operands_cached (insn, reload_completed);
42538 	  int opno0, opno1;
42539 	  int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
42540 					      recog_data.n_operands, &opno0,
42541 					      &opno1);
42542 	  if (modrm < 0
42543 	      || !ix86_rop_should_change_byte_p (modrm)
42544 	      || opno0 == opno1)
42545 	    continue;
42546 
42547 	  rtx oldreg = recog_data.operand[opno1];
42548 	  preprocess_constraints (insn);
42549 	  const operand_alternative *alt = which_op_alt ();
42550 
42551 	  int i;
42552 	  for (i = 0; i < recog_data.n_operands; i++)
42553 	    if (i != opno1
42554 		&& alt[i].earlyclobber
42555 		&& reg_overlap_mentioned_p (recog_data.operand[i],
42556 					    oldreg))
42557 	      break;
42558 
42559 	  if (i < recog_data.n_operands)
42560 	    continue;
42561 
42562 	  if (dump_file)
42563 	    fprintf (dump_file,
42564 		     "attempting to fix modrm byte in insn %d:"
42565 		     " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
42566 		     reg_class_names[alt[opno1].cl]);
42567 
42568 	  HARD_REG_SET unavailable;
42569 	  REG_SET_TO_HARD_REG_SET (unavailable, &live);
42570 	  SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
42571 	  IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
42572 	  IOR_HARD_REG_SET (unavailable, fixed_reg_set);
42573 	  IOR_HARD_REG_SET (unavailable, output_risky);
42574 	  IOR_COMPL_HARD_REG_SET (unavailable,
42575 				  reg_class_contents[alt[opno1].cl]);
42576 
42577 	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42578 	      if (!TEST_HARD_REG_BIT (unavailable, i))
42579 		break;
42580 	  if (i == FIRST_PSEUDO_REGISTER)
42581 	    {
42582 	      if (dump_file)
42583 		fprintf (dump_file, ", none available\n");
42584 	      continue;
42585 	    }
42586 	  if (dump_file)
42587 	    fprintf (dump_file, " -> %d\n", i);
42588 	  rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
42589 	  validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
42590 	  insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
42591 	}
42592     }
42593 }
42594 
42595 /* Implement machine specific optimizations.  We implement padding of returns
42596    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
42597 static void
42598 ix86_reorg (void)
42599 {
42600   /* We are freeing block_for_insn in the toplev to keep compatibility
42601      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
42602   compute_bb_for_insn ();
42603 
42604   if (flag_mitigate_rop)
42605     ix86_mitigate_rop ();
42606 
42607   if (TARGET_SEH && current_function_has_exception_handlers ())
42608     ix86_seh_fixup_eh_fallthru ();
42609 
42610   if (optimize && optimize_function_for_speed_p (cfun))
42611     {
42612       if (TARGET_PAD_SHORT_FUNCTION)
42613 	ix86_pad_short_function ();
42614       else if (TARGET_PAD_RETURNS)
42615 	ix86_pad_returns ();
42616 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42617       if (TARGET_FOUR_JUMP_LIMIT)
42618 	ix86_avoid_jump_mispredicts ();
42619 #endif
42620     }
42621 }
42622 
42623 /* Return nonzero when QImode register that must be represented via REX prefix
42624    is used.  */
42625 bool
42626 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
42627 {
42628   int i;
42629   extract_insn_cached (insn);
42630   for (i = 0; i < recog_data.n_operands; i++)
42631     if (GENERAL_REG_P (recog_data.operand[i])
42632 	&& !QI_REGNO_P (REGNO (recog_data.operand[i])))
42633        return true;
42634   return false;
42635 }
42636 
42637 /* Return true when INSN mentions register that must be encoded using REX
42638    prefix.  */
42639 bool
42640 x86_extended_reg_mentioned_p (rtx insn)
42641 {
42642   subrtx_iterator::array_type array;
42643   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
42644     {
42645       const_rtx x = *iter;
42646       if (REG_P (x)
42647 	  && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
42648 	return true;
42649     }
42650   return false;
42651 }
42652 
42653 /* If profitable, negate (without causing overflow) integer constant
42654    of mode MODE at location LOC.  Return true in this case.  */
42655 bool
42656 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
42657 {
42658   HOST_WIDE_INT val;
42659 
42660   if (!CONST_INT_P (*loc))
42661     return false;
42662 
42663   switch (mode)
42664     {
42665     case E_DImode:
42666       /* DImode x86_64 constants must fit in 32 bits.  */
42667       gcc_assert (x86_64_immediate_operand (*loc, mode));
42668 
42669       mode = SImode;
42670       break;
42671 
42672     case E_SImode:
42673     case E_HImode:
42674     case E_QImode:
42675       break;
42676 
42677     default:
42678       gcc_unreachable ();
42679     }
42680 
42681   /* Avoid overflows.  */
42682   if (mode_signbit_p (mode, *loc))
42683     return false;
42684 
42685   val = INTVAL (*loc);
42686 
42687   /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
42688      Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
42689   if ((val < 0 && val != -128)
42690       || val == 128)
42691     {
42692       *loc = GEN_INT (-val);
42693       return true;
42694     }
42695 
42696   return false;
42697 }
42698 
42699 /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
42700    optabs would emit if we didn't have TFmode patterns.  */
42701 
42702 void
42703 x86_emit_floatuns (rtx operands[2])
42704 {
42705   rtx_code_label *neglab, *donelab;
42706   rtx i0, i1, f0, in, out;
42707   machine_mode mode, inmode;
42708 
42709   inmode = GET_MODE (operands[1]);
42710   gcc_assert (inmode == SImode || inmode == DImode);
42711 
42712   out = operands[0];
42713   in = force_reg (inmode, operands[1]);
42714   mode = GET_MODE (out);
42715   neglab = gen_label_rtx ();
42716   donelab = gen_label_rtx ();
42717   f0 = gen_reg_rtx (mode);
42718 
42719   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
42720 
42721   expand_float (out, in, 0);
42722 
42723   emit_jump_insn (gen_jump (donelab));
42724   emit_barrier ();
42725 
42726   emit_label (neglab);
42727 
42728   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
42729 			    1, OPTAB_DIRECT);
42730   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
42731 			    1, OPTAB_DIRECT);
42732   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
42733 
42734   expand_float (f0, i0, 0);
42735 
42736   emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
42737 
42738   emit_label (donelab);
42739 }
42740 
42741 static bool canonicalize_perm (struct expand_vec_perm_d *d);
42742 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
42743 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
42744 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
42745 
42746 /* Get a vector mode of the same size as the original but with elements
42747    twice as wide.  This is only guaranteed to apply to integral vectors.  */
42748 
42749 static inline machine_mode
42750 get_mode_wider_vector (machine_mode o)
42751 {
42752   /* ??? Rely on the ordering that genmodes.c gives to vectors.  */
42753   machine_mode n = GET_MODE_WIDER_MODE (o).require ();
42754   gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
42755   gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
42756   return n;
42757 }
42758 
42759 /* A subroutine of ix86_expand_vector_init_duplicate.  Tries to
42760    fill target with val via vec_duplicate.  */
42761 
42762 static bool
42763 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
42764 {
42765   bool ok;
42766   rtx_insn *insn;
42767   rtx dup;
42768 
42769   /* First attempt to recognize VAL as-is.  */
42770   dup = gen_vec_duplicate (mode, val);
42771   insn = emit_insn (gen_rtx_SET (target, dup));
42772   if (recog_memoized (insn) < 0)
42773     {
42774       rtx_insn *seq;
42775       machine_mode innermode = GET_MODE_INNER (mode);
42776       rtx reg;
42777 
42778       /* If that fails, force VAL into a register.  */
42779 
42780       start_sequence ();
42781       reg = force_reg (innermode, val);
42782       if (GET_MODE (reg) != innermode)
42783 	reg = gen_lowpart (innermode, reg);
42784       SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
42785       seq = get_insns ();
42786       end_sequence ();
42787       if (seq)
42788 	emit_insn_before (seq, insn);
42789 
42790       ok = recog_memoized (insn) >= 0;
42791       gcc_assert (ok);
42792     }
42793   return true;
42794 }
42795 
42796 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
42797    with all elements equal to VAR.  Return true if successful.  */
42798 
42799 static bool
42800 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
42801 				   rtx target, rtx val)
42802 {
42803   bool ok;
42804 
42805   switch (mode)
42806     {
42807     case E_V2SImode:
42808     case E_V2SFmode:
42809       if (!mmx_ok)
42810 	return false;
42811       /* FALLTHRU */
42812 
42813     case E_V4DFmode:
42814     case E_V4DImode:
42815     case E_V8SFmode:
42816     case E_V8SImode:
42817     case E_V2DFmode:
42818     case E_V2DImode:
42819     case E_V4SFmode:
42820     case E_V4SImode:
42821     case E_V16SImode:
42822     case E_V8DImode:
42823     case E_V16SFmode:
42824     case E_V8DFmode:
42825       return ix86_vector_duplicate_value (mode, target, val);
42826 
42827     case E_V4HImode:
42828       if (!mmx_ok)
42829 	return false;
42830       if (TARGET_SSE || TARGET_3DNOW_A)
42831 	{
42832 	  rtx x;
42833 
42834 	  val = gen_lowpart (SImode, val);
42835 	  x = gen_rtx_TRUNCATE (HImode, val);
42836 	  x = gen_rtx_VEC_DUPLICATE (mode, x);
42837 	  emit_insn (gen_rtx_SET (target, x));
42838 	  return true;
42839 	}
42840       goto widen;
42841 
42842     case E_V8QImode:
42843       if (!mmx_ok)
42844 	return false;
42845       goto widen;
42846 
42847     case E_V8HImode:
42848       if (TARGET_AVX2)
42849 	return ix86_vector_duplicate_value (mode, target, val);
42850 
42851       if (TARGET_SSE2)
42852 	{
42853 	  struct expand_vec_perm_d dperm;
42854 	  rtx tmp1, tmp2;
42855 
42856 	permute:
42857 	  memset (&dperm, 0, sizeof (dperm));
42858 	  dperm.target = target;
42859 	  dperm.vmode = mode;
42860 	  dperm.nelt = GET_MODE_NUNITS (mode);
42861 	  dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
42862 	  dperm.one_operand_p = true;
42863 
42864 	  /* Extend to SImode using a paradoxical SUBREG.  */
42865 	  tmp1 = gen_reg_rtx (SImode);
42866 	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
42867 
42868 	  /* Insert the SImode value as low element of a V4SImode vector. */
42869 	  tmp2 = gen_reg_rtx (V4SImode);
42870 	  emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
42871 	  emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
42872 
42873 	  ok = (expand_vec_perm_1 (&dperm)
42874 		|| expand_vec_perm_broadcast_1 (&dperm));
42875 	  gcc_assert (ok);
42876 	  return ok;
42877 	}
42878       goto widen;
42879 
42880     case E_V16QImode:
42881       if (TARGET_AVX2)
42882 	return ix86_vector_duplicate_value (mode, target, val);
42883 
42884       if (TARGET_SSE2)
42885 	goto permute;
42886       goto widen;
42887 
42888     widen:
42889       /* Replicate the value once into the next wider mode and recurse.  */
42890       {
42891 	machine_mode smode, wsmode, wvmode;
42892 	rtx x;
42893 
42894 	smode = GET_MODE_INNER (mode);
42895 	wvmode = get_mode_wider_vector (mode);
42896 	wsmode = GET_MODE_INNER (wvmode);
42897 
42898 	val = convert_modes (wsmode, smode, val, true);
42899 	x = expand_simple_binop (wsmode, ASHIFT, val,
42900 				 GEN_INT (GET_MODE_BITSIZE (smode)),
42901 				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
42902 	val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
42903 
42904 	x = gen_reg_rtx (wvmode);
42905 	ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
42906 	gcc_assert (ok);
42907 	emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
42908 	return ok;
42909       }
42910 
42911     case E_V16HImode:
42912     case E_V32QImode:
42913       if (TARGET_AVX2)
42914 	return ix86_vector_duplicate_value (mode, target, val);
42915       else
42916 	{
42917 	  machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
42918 	  rtx x = gen_reg_rtx (hvmode);
42919 
42920 	  ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42921 	  gcc_assert (ok);
42922 
42923 	  x = gen_rtx_VEC_CONCAT (mode, x, x);
42924 	  emit_insn (gen_rtx_SET (target, x));
42925 	}
42926       return true;
42927 
42928     case E_V64QImode:
42929     case E_V32HImode:
42930       if (TARGET_AVX512BW)
42931 	return ix86_vector_duplicate_value (mode, target, val);
42932       else
42933 	{
42934 	  machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
42935 	  rtx x = gen_reg_rtx (hvmode);
42936 
42937 	  ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42938 	  gcc_assert (ok);
42939 
42940 	  x = gen_rtx_VEC_CONCAT (mode, x, x);
42941 	  emit_insn (gen_rtx_SET (target, x));
42942 	}
42943       return true;
42944 
42945     default:
42946       return false;
42947     }
42948 }
42949 
42950 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
42951    whose ONE_VAR element is VAR, and other elements are zero.  Return true
42952    if successful.  */
42953 
42954 static bool
42955 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
42956 				     rtx target, rtx var, int one_var)
42957 {
42958   machine_mode vsimode;
42959   rtx new_target;
42960   rtx x, tmp;
42961   bool use_vector_set = false;
42962   rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
42963 
42964   switch (mode)
42965     {
42966     case E_V2DImode:
42967       /* For SSE4.1, we normally use vector set.  But if the second
42968 	 element is zero and inter-unit moves are OK, we use movq
42969 	 instead.  */
42970       use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
42971 			&& !(TARGET_INTER_UNIT_MOVES_TO_VEC
42972 			     && one_var == 0));
42973       break;
42974     case E_V16QImode:
42975     case E_V4SImode:
42976     case E_V4SFmode:
42977       use_vector_set = TARGET_SSE4_1;
42978       break;
42979     case E_V8HImode:
42980       use_vector_set = TARGET_SSE2;
42981       break;
42982     case E_V4HImode:
42983       use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
42984       break;
42985     case E_V32QImode:
42986     case E_V16HImode:
42987       use_vector_set = TARGET_AVX;
42988       break;
42989     case E_V8SImode:
42990       use_vector_set = TARGET_AVX;
42991       gen_vec_set_0 = gen_vec_setv8si_0;
42992       break;
42993     case E_V8SFmode:
42994       use_vector_set = TARGET_AVX;
42995       gen_vec_set_0 = gen_vec_setv8sf_0;
42996       break;
42997     case E_V4DFmode:
42998       use_vector_set = TARGET_AVX;
42999       gen_vec_set_0 = gen_vec_setv4df_0;
43000       break;
43001     case E_V4DImode:
43002       /* Use ix86_expand_vector_set in 64bit mode only.  */
43003       use_vector_set = TARGET_AVX && TARGET_64BIT;
43004       gen_vec_set_0 = gen_vec_setv4di_0;
43005       break;
43006     case E_V16SImode:
43007       use_vector_set = TARGET_AVX512F && one_var == 0;
43008       gen_vec_set_0 = gen_vec_setv16si_0;
43009       break;
43010     case E_V16SFmode:
43011       use_vector_set = TARGET_AVX512F && one_var == 0;
43012       gen_vec_set_0 = gen_vec_setv16sf_0;
43013       break;
43014     case E_V8DFmode:
43015       use_vector_set = TARGET_AVX512F && one_var == 0;
43016       gen_vec_set_0 = gen_vec_setv8df_0;
43017       break;
43018     case E_V8DImode:
43019       /* Use ix86_expand_vector_set in 64bit mode only.  */
43020       use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0;
43021       gen_vec_set_0 = gen_vec_setv8di_0;
43022       break;
43023     default:
43024       break;
43025     }
43026 
43027   if (use_vector_set)
43028     {
43029       if (gen_vec_set_0 && one_var == 0)
43030 	{
43031 	  var = force_reg (GET_MODE_INNER (mode), var);
43032 	  emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var));
43033 	  return true;
43034 	}
43035       emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
43036       var = force_reg (GET_MODE_INNER (mode), var);
43037       ix86_expand_vector_set (mmx_ok, target, var, one_var);
43038       return true;
43039     }
43040 
43041   switch (mode)
43042     {
43043     case E_V2SFmode:
43044     case E_V2SImode:
43045       if (!mmx_ok)
43046 	return false;
43047       /* FALLTHRU */
43048 
43049     case E_V2DFmode:
43050     case E_V2DImode:
43051       if (one_var != 0)
43052 	return false;
43053       var = force_reg (GET_MODE_INNER (mode), var);
43054       x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43055       emit_insn (gen_rtx_SET (target, x));
43056       return true;
43057 
43058     case E_V4SFmode:
43059     case E_V4SImode:
43060       if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43061 	new_target = gen_reg_rtx (mode);
43062       else
43063 	new_target = target;
43064       var = force_reg (GET_MODE_INNER (mode), var);
43065       x = gen_rtx_VEC_DUPLICATE (mode, var);
43066       x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43067       emit_insn (gen_rtx_SET (new_target, x));
43068       if (one_var != 0)
43069 	{
43070 	  /* We need to shuffle the value to the correct position, so
43071 	     create a new pseudo to store the intermediate result.  */
43072 
43073 	  /* With SSE2, we can use the integer shuffle insns.  */
43074 	  if (mode != V4SFmode && TARGET_SSE2)
43075 	    {
43076 	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43077 					    const1_rtx,
43078 					    GEN_INT (one_var == 1 ? 0 : 1),
43079 					    GEN_INT (one_var == 2 ? 0 : 1),
43080 					    GEN_INT (one_var == 3 ? 0 : 1)));
43081 	      if (target != new_target)
43082 		emit_move_insn (target, new_target);
43083 	      return true;
43084 	    }
43085 
43086 	  /* Otherwise convert the intermediate result to V4SFmode and
43087 	     use the SSE1 shuffle instructions.  */
43088 	  if (mode != V4SFmode)
43089 	    {
43090 	      tmp = gen_reg_rtx (V4SFmode);
43091 	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43092 	    }
43093 	  else
43094 	    tmp = new_target;
43095 
43096 	  emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43097 				       const1_rtx,
43098 				       GEN_INT (one_var == 1 ? 0 : 1),
43099 				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
43100 				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43101 
43102 	  if (mode != V4SFmode)
43103 	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43104 	  else if (tmp != target)
43105 	    emit_move_insn (target, tmp);
43106 	}
43107       else if (target != new_target)
43108 	emit_move_insn (target, new_target);
43109       return true;
43110 
43111     case E_V8HImode:
43112     case E_V16QImode:
43113       vsimode = V4SImode;
43114       goto widen;
43115     case E_V4HImode:
43116     case E_V8QImode:
43117       if (!mmx_ok)
43118 	return false;
43119       vsimode = V2SImode;
43120       goto widen;
43121     widen:
43122       if (one_var != 0)
43123 	return false;
43124 
43125       /* Zero extend the variable element to SImode and recurse.  */
43126       var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43127 
43128       x = gen_reg_rtx (vsimode);
43129       if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43130 						var, one_var))
43131 	gcc_unreachable ();
43132 
43133       emit_move_insn (target, gen_lowpart (mode, x));
43134       return true;
43135 
43136     default:
43137       return false;
43138     }
43139 }
43140 
43141 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
43142    consisting of the values in VALS.  It is known that all elements
43143    except ONE_VAR are constants.  Return true if successful.  */
43144 
43145 static bool
43146 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43147 				 rtx target, rtx vals, int one_var)
43148 {
43149   rtx var = XVECEXP (vals, 0, one_var);
43150   machine_mode wmode;
43151   rtx const_vec, x;
43152 
43153   const_vec = copy_rtx (vals);
43154   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43155   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43156 
43157   switch (mode)
43158     {
43159     case E_V2DFmode:
43160     case E_V2DImode:
43161     case E_V2SFmode:
43162     case E_V2SImode:
43163       /* For the two element vectors, it's just as easy to use
43164 	 the general case.  */
43165       return false;
43166 
43167     case E_V4DImode:
43168       /* Use ix86_expand_vector_set in 64bit mode only.  */
43169       if (!TARGET_64BIT)
43170 	return false;
43171       /* FALLTHRU */
43172     case E_V4DFmode:
43173     case E_V8SFmode:
43174     case E_V8SImode:
43175     case E_V16HImode:
43176     case E_V32QImode:
43177     case E_V4SFmode:
43178     case E_V4SImode:
43179     case E_V8HImode:
43180     case E_V4HImode:
43181       break;
43182 
43183     case E_V16QImode:
43184       if (TARGET_SSE4_1)
43185 	break;
43186       wmode = V8HImode;
43187       goto widen;
43188     case E_V8QImode:
43189       wmode = V4HImode;
43190       goto widen;
43191     widen:
43192       /* There's no way to set one QImode entry easily.  Combine
43193 	 the variable value with its adjacent constant value, and
43194 	 promote to an HImode set.  */
43195       x = XVECEXP (vals, 0, one_var ^ 1);
43196       if (one_var & 1)
43197 	{
43198 	  var = convert_modes (HImode, QImode, var, true);
43199 	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43200 				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
43201 	  x = GEN_INT (INTVAL (x) & 0xff);
43202 	}
43203       else
43204 	{
43205 	  var = convert_modes (HImode, QImode, var, true);
43206 	  x = gen_int_mode (UINTVAL (x) << 8, HImode);
43207 	}
43208       if (x != const0_rtx)
43209 	var = expand_simple_binop (HImode, IOR, var, x, var,
43210 				   1, OPTAB_LIB_WIDEN);
43211 
43212       x = gen_reg_rtx (wmode);
43213       emit_move_insn (x, gen_lowpart (wmode, const_vec));
43214       ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43215 
43216       emit_move_insn (target, gen_lowpart (mode, x));
43217       return true;
43218 
43219     default:
43220       return false;
43221     }
43222 
43223   emit_move_insn (target, const_vec);
43224   ix86_expand_vector_set (mmx_ok, target, var, one_var);
43225   return true;
43226 }
43227 
43228 /* A subroutine of ix86_expand_vector_init_general.  Use vector
43229    concatenate to handle the most general case: all values variable,
43230    and none identical.  */
43231 
43232 static void
43233 ix86_expand_vector_init_concat (machine_mode mode,
43234 				rtx target, rtx *ops, int n)
43235 {
43236   machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
43237   rtx first[16], second[8], third[4];
43238   rtvec v;
43239   int i, j;
43240 
43241   switch (n)
43242     {
43243     case 2:
43244       switch (mode)
43245 	{
43246 	case E_V16SImode:
43247 	  cmode = V8SImode;
43248 	  break;
43249 	case E_V16SFmode:
43250 	  cmode = V8SFmode;
43251 	  break;
43252 	case E_V8DImode:
43253 	  cmode = V4DImode;
43254 	  break;
43255 	case E_V8DFmode:
43256 	  cmode = V4DFmode;
43257 	  break;
43258 	case E_V8SImode:
43259 	  cmode = V4SImode;
43260 	  break;
43261 	case E_V8SFmode:
43262 	  cmode = V4SFmode;
43263 	  break;
43264 	case E_V4DImode:
43265 	  cmode = V2DImode;
43266 	  break;
43267 	case E_V4DFmode:
43268 	  cmode = V2DFmode;
43269 	  break;
43270 	case E_V4SImode:
43271 	  cmode = V2SImode;
43272 	  break;
43273 	case E_V4SFmode:
43274 	  cmode = V2SFmode;
43275 	  break;
43276 	case E_V2DImode:
43277 	  cmode = DImode;
43278 	  break;
43279 	case E_V2SImode:
43280 	  cmode = SImode;
43281 	  break;
43282 	case E_V2DFmode:
43283 	  cmode = DFmode;
43284 	  break;
43285 	case E_V2SFmode:
43286 	  cmode = SFmode;
43287 	  break;
43288 	default:
43289 	  gcc_unreachable ();
43290 	}
43291 
43292       if (!register_operand (ops[1], cmode))
43293 	ops[1] = force_reg (cmode, ops[1]);
43294       if (!register_operand (ops[0], cmode))
43295 	ops[0] = force_reg (cmode, ops[0]);
43296       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
43297 							  ops[1])));
43298       break;
43299 
43300     case 4:
43301       switch (mode)
43302 	{
43303 	case E_V4DImode:
43304 	  cmode = V2DImode;
43305 	  break;
43306 	case E_V4DFmode:
43307 	  cmode = V2DFmode;
43308 	  break;
43309 	case E_V4SImode:
43310 	  cmode = V2SImode;
43311 	  break;
43312 	case E_V4SFmode:
43313 	  cmode = V2SFmode;
43314 	  break;
43315 	default:
43316 	  gcc_unreachable ();
43317 	}
43318       goto half;
43319 
43320     case 8:
43321       switch (mode)
43322 	{
43323 	case E_V8DImode:
43324 	  cmode = V2DImode;
43325 	  hmode = V4DImode;
43326 	  break;
43327 	case E_V8DFmode:
43328 	  cmode = V2DFmode;
43329 	  hmode = V4DFmode;
43330 	  break;
43331 	case E_V8SImode:
43332 	  cmode = V2SImode;
43333 	  hmode = V4SImode;
43334 	  break;
43335 	case E_V8SFmode:
43336 	  cmode = V2SFmode;
43337 	  hmode = V4SFmode;
43338 	  break;
43339 	default:
43340 	  gcc_unreachable ();
43341 	}
43342       goto half;
43343 
43344     case 16:
43345       switch (mode)
43346 	{
43347 	case E_V16SImode:
43348 	  cmode = V2SImode;
43349 	  hmode = V4SImode;
43350 	  gmode = V8SImode;
43351 	  break;
43352 	case E_V16SFmode:
43353 	  cmode = V2SFmode;
43354 	  hmode = V4SFmode;
43355 	  gmode = V8SFmode;
43356 	  break;
43357 	default:
43358 	  gcc_unreachable ();
43359 	}
43360       goto half;
43361 
43362 half:
43363       /* FIXME: We process inputs backward to help RA.  PR 36222.  */
43364       i = n - 1;
43365       j = (n >> 1) - 1;
43366       for (; i > 0; i -= 2, j--)
43367 	{
43368 	  first[j] = gen_reg_rtx (cmode);
43369 	  v = gen_rtvec (2, ops[i - 1], ops[i]);
43370 	  ix86_expand_vector_init (false, first[j],
43371 				   gen_rtx_PARALLEL (cmode, v));
43372 	}
43373 
43374       n >>= 1;
43375       if (n > 4)
43376 	{
43377 	  gcc_assert (hmode != VOIDmode);
43378 	  gcc_assert (gmode != VOIDmode);
43379 	  for (i = j = 0; i < n; i += 2, j++)
43380 	    {
43381 	      second[j] = gen_reg_rtx (hmode);
43382 	      ix86_expand_vector_init_concat (hmode, second [j],
43383 					      &first [i], 2);
43384 	    }
43385 	  n >>= 1;
43386 	  for (i = j = 0; i < n; i += 2, j++)
43387 	    {
43388 	      third[j] = gen_reg_rtx (gmode);
43389 	      ix86_expand_vector_init_concat (gmode, third[j],
43390 					      &second[i], 2);
43391 	    }
43392 	  n >>= 1;
43393 	  ix86_expand_vector_init_concat (mode, target, third, n);
43394 	}
43395       else if (n > 2)
43396 	{
43397 	  gcc_assert (hmode != VOIDmode);
43398 	  for (i = j = 0; i < n; i += 2, j++)
43399 	    {
43400 	      second[j] = gen_reg_rtx (hmode);
43401 	      ix86_expand_vector_init_concat (hmode, second [j],
43402 					      &first [i], 2);
43403 	    }
43404 	  n >>= 1;
43405 	  ix86_expand_vector_init_concat (mode, target, second, n);
43406 	}
43407       else
43408 	ix86_expand_vector_init_concat (mode, target, first, n);
43409       break;
43410 
43411     default:
43412       gcc_unreachable ();
43413     }
43414 }
43415 
43416 /* A subroutine of ix86_expand_vector_init_general.  Use vector
43417    interleave to handle the most general case: all values variable,
43418    and none identical.  */
43419 
43420 static void
43421 ix86_expand_vector_init_interleave (machine_mode mode,
43422 				    rtx target, rtx *ops, int n)
43423 {
43424   machine_mode first_imode, second_imode, third_imode, inner_mode;
43425   int i, j;
43426   rtx op0, op1;
43427   rtx (*gen_load_even) (rtx, rtx, rtx);
43428   rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
43429   rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
43430 
43431   switch (mode)
43432     {
43433     case E_V8HImode:
43434       gen_load_even = gen_vec_setv8hi;
43435       gen_interleave_first_low = gen_vec_interleave_lowv4si;
43436       gen_interleave_second_low = gen_vec_interleave_lowv2di;
43437       inner_mode = HImode;
43438       first_imode = V4SImode;
43439       second_imode = V2DImode;
43440       third_imode = VOIDmode;
43441       break;
43442     case E_V16QImode:
43443       gen_load_even = gen_vec_setv16qi;
43444       gen_interleave_first_low = gen_vec_interleave_lowv8hi;
43445       gen_interleave_second_low = gen_vec_interleave_lowv4si;
43446       inner_mode = QImode;
43447       first_imode = V8HImode;
43448       second_imode = V4SImode;
43449       third_imode = V2DImode;
43450       break;
43451     default:
43452       gcc_unreachable ();
43453     }
43454 
43455   for (i = 0; i < n; i++)
43456     {
43457       /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
43458       op0 = gen_reg_rtx (SImode);
43459       emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
43460 
43461       /* Insert the SImode value as low element of V4SImode vector. */
43462       op1 = gen_reg_rtx (V4SImode);
43463       op0 = gen_rtx_VEC_MERGE (V4SImode,
43464 			       gen_rtx_VEC_DUPLICATE (V4SImode,
43465 						      op0),
43466 			       CONST0_RTX (V4SImode),
43467 			       const1_rtx);
43468       emit_insn (gen_rtx_SET (op1, op0));
43469 
43470       /* Cast the V4SImode vector back to a vector in orignal mode.  */
43471       op0 = gen_reg_rtx (mode);
43472       emit_move_insn (op0, gen_lowpart (mode, op1));
43473 
43474       /* Load even elements into the second position.  */
43475       emit_insn (gen_load_even (op0,
43476 				force_reg (inner_mode,
43477 					   ops [i + i + 1]),
43478 				const1_rtx));
43479 
43480       /* Cast vector to FIRST_IMODE vector.  */
43481       ops[i] = gen_reg_rtx (first_imode);
43482       emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
43483     }
43484 
43485   /* Interleave low FIRST_IMODE vectors.  */
43486   for (i = j = 0; i < n; i += 2, j++)
43487     {
43488       op0 = gen_reg_rtx (first_imode);
43489       emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
43490 
43491       /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
43492       ops[j] = gen_reg_rtx (second_imode);
43493       emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
43494     }
43495 
43496   /* Interleave low SECOND_IMODE vectors.  */
43497   switch (second_imode)
43498     {
43499     case E_V4SImode:
43500       for (i = j = 0; i < n / 2; i += 2, j++)
43501 	{
43502 	  op0 = gen_reg_rtx (second_imode);
43503 	  emit_insn (gen_interleave_second_low (op0, ops[i],
43504 						ops[i + 1]));
43505 
43506 	  /* Cast the SECOND_IMODE vector to the THIRD_IMODE
43507 	     vector.  */
43508 	  ops[j] = gen_reg_rtx (third_imode);
43509 	  emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
43510 	}
43511       second_imode = V2DImode;
43512       gen_interleave_second_low = gen_vec_interleave_lowv2di;
43513       /* FALLTHRU */
43514 
43515     case E_V2DImode:
43516       op0 = gen_reg_rtx (second_imode);
43517       emit_insn (gen_interleave_second_low (op0, ops[0],
43518 					    ops[1]));
43519 
43520       /* Cast the SECOND_IMODE vector back to a vector on original
43521 	 mode.  */
43522       emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
43523       break;
43524 
43525     default:
43526       gcc_unreachable ();
43527     }
43528 }
43529 
43530 /* A subroutine of ix86_expand_vector_init.  Handle the most general case:
43531    all values variable, and none identical.  */
43532 
43533 static void
43534 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
43535 				 rtx target, rtx vals)
43536 {
43537   rtx ops[64], op0, op1, op2, op3, op4, op5;
43538   machine_mode half_mode = VOIDmode;
43539   machine_mode quarter_mode = VOIDmode;
43540   int n, i;
43541 
43542   switch (mode)
43543     {
43544     case E_V2SFmode:
43545     case E_V2SImode:
43546       if (!mmx_ok && !TARGET_SSE)
43547 	break;
43548       /* FALLTHRU */
43549 
43550     case E_V16SImode:
43551     case E_V16SFmode:
43552     case E_V8DFmode:
43553     case E_V8DImode:
43554     case E_V8SFmode:
43555     case E_V8SImode:
43556     case E_V4DFmode:
43557     case E_V4DImode:
43558     case E_V4SFmode:
43559     case E_V4SImode:
43560     case E_V2DFmode:
43561     case E_V2DImode:
43562       n = GET_MODE_NUNITS (mode);
43563       for (i = 0; i < n; i++)
43564 	ops[i] = XVECEXP (vals, 0, i);
43565       ix86_expand_vector_init_concat (mode, target, ops, n);
43566       return;
43567 
43568     case E_V2TImode:
43569       for (i = 0; i < 2; i++)
43570 	ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
43571       op0 = gen_reg_rtx (V4DImode);
43572       ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
43573       emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
43574       return;
43575 
43576     case E_V4TImode:
43577       for (i = 0; i < 4; i++)
43578 	ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
43579       ops[4] = gen_reg_rtx (V4DImode);
43580       ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
43581       ops[5] = gen_reg_rtx (V4DImode);
43582       ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
43583       op0 = gen_reg_rtx (V8DImode);
43584       ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
43585       emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
43586       return;
43587 
43588     case E_V32QImode:
43589       half_mode = V16QImode;
43590       goto half;
43591 
43592     case E_V16HImode:
43593       half_mode = V8HImode;
43594       goto half;
43595 
43596 half:
43597       n = GET_MODE_NUNITS (mode);
43598       for (i = 0; i < n; i++)
43599 	ops[i] = XVECEXP (vals, 0, i);
43600       op0 = gen_reg_rtx (half_mode);
43601       op1 = gen_reg_rtx (half_mode);
43602       ix86_expand_vector_init_interleave (half_mode, op0, ops,
43603 					  n >> 2);
43604       ix86_expand_vector_init_interleave (half_mode, op1,
43605 					  &ops [n >> 1], n >> 2);
43606       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
43607       return;
43608 
43609     case E_V64QImode:
43610       quarter_mode = V16QImode;
43611       half_mode = V32QImode;
43612       goto quarter;
43613 
43614     case E_V32HImode:
43615       quarter_mode = V8HImode;
43616       half_mode = V16HImode;
43617       goto quarter;
43618 
43619 quarter:
43620       n = GET_MODE_NUNITS (mode);
43621       for (i = 0; i < n; i++)
43622 	ops[i] = XVECEXP (vals, 0, i);
43623       op0 = gen_reg_rtx (quarter_mode);
43624       op1 = gen_reg_rtx (quarter_mode);
43625       op2 = gen_reg_rtx (quarter_mode);
43626       op3 = gen_reg_rtx (quarter_mode);
43627       op4 = gen_reg_rtx (half_mode);
43628       op5 = gen_reg_rtx (half_mode);
43629       ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
43630 					  n >> 3);
43631       ix86_expand_vector_init_interleave (quarter_mode, op1,
43632 					  &ops [n >> 2], n >> 3);
43633       ix86_expand_vector_init_interleave (quarter_mode, op2,
43634 					  &ops [n >> 1], n >> 3);
43635       ix86_expand_vector_init_interleave (quarter_mode, op3,
43636 					  &ops [(n >> 1) | (n >> 2)], n >> 3);
43637       emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
43638       emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
43639       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
43640       return;
43641 
43642     case E_V16QImode:
43643       if (!TARGET_SSE4_1)
43644 	break;
43645       /* FALLTHRU */
43646 
43647     case E_V8HImode:
43648       if (!TARGET_SSE2)
43649 	break;
43650 
43651       /* Don't use ix86_expand_vector_init_interleave if we can't
43652 	 move from GPR to SSE register directly.  */
43653       if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
43654 	break;
43655 
43656       n = GET_MODE_NUNITS (mode);
43657       for (i = 0; i < n; i++)
43658 	ops[i] = XVECEXP (vals, 0, i);
43659       ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
43660       return;
43661 
43662     case E_V4HImode:
43663     case E_V8QImode:
43664       break;
43665 
43666     default:
43667       gcc_unreachable ();
43668     }
43669 
43670     {
43671       int i, j, n_elts, n_words, n_elt_per_word;
43672       machine_mode inner_mode;
43673       rtx words[4], shift;
43674 
43675       inner_mode = GET_MODE_INNER (mode);
43676       n_elts = GET_MODE_NUNITS (mode);
43677       n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
43678       n_elt_per_word = n_elts / n_words;
43679       shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
43680 
43681       for (i = 0; i < n_words; ++i)
43682 	{
43683 	  rtx word = NULL_RTX;
43684 
43685 	  for (j = 0; j < n_elt_per_word; ++j)
43686 	    {
43687 	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
43688 	      elt = convert_modes (word_mode, inner_mode, elt, true);
43689 
43690 	      if (j == 0)
43691 		word = elt;
43692 	      else
43693 		{
43694 		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
43695 					      word, 1, OPTAB_LIB_WIDEN);
43696 		  word = expand_simple_binop (word_mode, IOR, word, elt,
43697 					      word, 1, OPTAB_LIB_WIDEN);
43698 		}
43699 	    }
43700 
43701 	  words[i] = word;
43702 	}
43703 
43704       if (n_words == 1)
43705 	emit_move_insn (target, gen_lowpart (mode, words[0]));
43706       else if (n_words == 2)
43707 	{
43708 	  rtx tmp = gen_reg_rtx (mode);
43709 	  emit_clobber (tmp);
43710 	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
43711 	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
43712 	  emit_move_insn (target, tmp);
43713 	}
43714       else if (n_words == 4)
43715 	{
43716 	  rtx tmp = gen_reg_rtx (V4SImode);
43717 	  gcc_assert (word_mode == SImode);
43718 	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
43719 	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
43720 	  emit_move_insn (target, gen_lowpart (mode, tmp));
43721 	}
43722       else
43723 	gcc_unreachable ();
43724     }
43725 }
43726 
43727 /* Initialize vector TARGET via VALS.  Suppress the use of MMX
43728    instructions unless MMX_OK is true.  */
43729 
43730 void
43731 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
43732 {
43733   machine_mode mode = GET_MODE (target);
43734   machine_mode inner_mode = GET_MODE_INNER (mode);
43735   int n_elts = GET_MODE_NUNITS (mode);
43736   int n_var = 0, one_var = -1;
43737   bool all_same = true, all_const_zero = true;
43738   int i;
43739   rtx x;
43740 
43741   /* Handle first initialization from vector elts.  */
43742   if (n_elts != XVECLEN (vals, 0))
43743     {
43744       rtx subtarget = target;
43745       x = XVECEXP (vals, 0, 0);
43746       gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode);
43747       if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts)
43748 	{
43749 	  rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
43750 	  if (inner_mode == QImode || inner_mode == HImode)
43751 	    {
43752 	      unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode);
43753 	      mode = mode_for_vector (SImode, n_bits / 4).require ();
43754 	      inner_mode = mode_for_vector (SImode, n_bits / 8).require ();
43755 	      ops[0] = gen_lowpart (inner_mode, ops[0]);
43756 	      ops[1] = gen_lowpart (inner_mode, ops[1]);
43757 	      subtarget = gen_reg_rtx (mode);
43758 	    }
43759 	  ix86_expand_vector_init_concat (mode, subtarget, ops, 2);
43760 	  if (subtarget != target)
43761 	    emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
43762 	  return;
43763 	}
43764       gcc_unreachable ();
43765     }
43766 
43767   for (i = 0; i < n_elts; ++i)
43768     {
43769       x = XVECEXP (vals, 0, i);
43770       if (!(CONST_SCALAR_INT_P (x)
43771 	    || CONST_DOUBLE_P (x)
43772 	    || CONST_FIXED_P (x)))
43773 	n_var++, one_var = i;
43774       else if (x != CONST0_RTX (inner_mode))
43775 	all_const_zero = false;
43776       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
43777 	all_same = false;
43778     }
43779 
43780   /* Constants are best loaded from the constant pool.  */
43781   if (n_var == 0)
43782     {
43783       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
43784       return;
43785     }
43786 
43787   /* If all values are identical, broadcast the value.  */
43788   if (all_same
43789       && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
43790 					    XVECEXP (vals, 0, 0)))
43791     return;
43792 
43793   /* Values where only one field is non-constant are best loaded from
43794      the pool and overwritten via move later.  */
43795   if (n_var == 1)
43796     {
43797       if (all_const_zero
43798 	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
43799 						  XVECEXP (vals, 0, one_var),
43800 						  one_var))
43801 	return;
43802 
43803       if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
43804 	return;
43805     }
43806 
43807   ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
43808 }
43809 
43810 void
43811 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
43812 {
43813   machine_mode mode = GET_MODE (target);
43814   machine_mode inner_mode = GET_MODE_INNER (mode);
43815   machine_mode half_mode;
43816   bool use_vec_merge = false;
43817   rtx tmp;
43818   static rtx (*gen_extract[6][2]) (rtx, rtx)
43819     = {
43820 	{ gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
43821 	{ gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
43822 	{ gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
43823 	{ gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
43824 	{ gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
43825 	{ gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
43826       };
43827   static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
43828     = {
43829 	{ gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
43830 	{ gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
43831 	{ gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
43832 	{ gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
43833 	{ gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
43834 	{ gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
43835       };
43836   int i, j, n;
43837   machine_mode mmode = VOIDmode;
43838   rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
43839 
43840   switch (mode)
43841     {
43842     case E_V2SFmode:
43843     case E_V2SImode:
43844       if (mmx_ok)
43845 	{
43846 	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43847 	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
43848 	  if (elt == 0)
43849 	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43850 	  else
43851 	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43852 	  emit_insn (gen_rtx_SET (target, tmp));
43853 	  return;
43854 	}
43855       break;
43856 
43857     case E_V2DImode:
43858       use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
43859       if (use_vec_merge)
43860 	break;
43861 
43862       tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43863       ix86_expand_vector_extract (false, tmp, target, 1 - elt);
43864       if (elt == 0)
43865 	tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43866       else
43867 	tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43868       emit_insn (gen_rtx_SET (target, tmp));
43869       return;
43870 
43871     case E_V2DFmode:
43872       {
43873 	rtx op0, op1;
43874 
43875 	/* For the two element vectors, we implement a VEC_CONCAT with
43876 	   the extraction of the other element.  */
43877 
43878 	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
43879 	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
43880 
43881 	if (elt == 0)
43882 	  op0 = val, op1 = tmp;
43883 	else
43884 	  op0 = tmp, op1 = val;
43885 
43886 	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
43887 	emit_insn (gen_rtx_SET (target, tmp));
43888       }
43889       return;
43890 
43891     case E_V4SFmode:
43892       use_vec_merge = TARGET_SSE4_1;
43893       if (use_vec_merge)
43894 	break;
43895 
43896       switch (elt)
43897 	{
43898 	case 0:
43899 	  use_vec_merge = true;
43900 	  break;
43901 
43902 	case 1:
43903 	  /* tmp = target = A B C D */
43904 	  tmp = copy_to_reg (target);
43905 	  /* target = A A B B */
43906 	  emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
43907 	  /* target = X A B B */
43908 	  ix86_expand_vector_set (false, target, val, 0);
43909 	  /* target = A X C D  */
43910 	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43911 					  const1_rtx, const0_rtx,
43912 					  GEN_INT (2+4), GEN_INT (3+4)));
43913 	  return;
43914 
43915 	case 2:
43916 	  /* tmp = target = A B C D */
43917 	  tmp = copy_to_reg (target);
43918 	  /* tmp = X B C D */
43919 	  ix86_expand_vector_set (false, tmp, val, 0);
43920 	  /* target = A B X D */
43921 	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43922 					  const0_rtx, const1_rtx,
43923 					  GEN_INT (0+4), GEN_INT (3+4)));
43924 	  return;
43925 
43926 	case 3:
43927 	  /* tmp = target = A B C D */
43928 	  tmp = copy_to_reg (target);
43929 	  /* tmp = X B C D */
43930 	  ix86_expand_vector_set (false, tmp, val, 0);
43931 	  /* target = A B X D */
43932 	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43933 					  const0_rtx, const1_rtx,
43934 					  GEN_INT (2+4), GEN_INT (0+4)));
43935 	  return;
43936 
43937 	default:
43938 	  gcc_unreachable ();
43939 	}
43940       break;
43941 
43942     case E_V4SImode:
43943       use_vec_merge = TARGET_SSE4_1;
43944       if (use_vec_merge)
43945 	break;
43946 
43947       /* Element 0 handled by vec_merge below.  */
43948       if (elt == 0)
43949 	{
43950 	  use_vec_merge = true;
43951 	  break;
43952 	}
43953 
43954       if (TARGET_SSE2)
43955 	{
43956 	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
43957 	     store into element 0, then shuffle them back.  */
43958 
43959 	  rtx order[4];
43960 
43961 	  order[0] = GEN_INT (elt);
43962 	  order[1] = const1_rtx;
43963 	  order[2] = const2_rtx;
43964 	  order[3] = GEN_INT (3);
43965 	  order[elt] = const0_rtx;
43966 
43967 	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43968 					order[1], order[2], order[3]));
43969 
43970 	  ix86_expand_vector_set (false, target, val, 0);
43971 
43972 	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43973 					order[1], order[2], order[3]));
43974 	}
43975       else
43976 	{
43977 	  /* For SSE1, we have to reuse the V4SF code.  */
43978 	  rtx t = gen_reg_rtx (V4SFmode);
43979 	  emit_move_insn (t, gen_lowpart (V4SFmode, target));
43980 	  ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
43981 	  emit_move_insn (target, gen_lowpart (mode, t));
43982 	}
43983       return;
43984 
43985     case E_V8HImode:
43986       use_vec_merge = TARGET_SSE2;
43987       break;
43988     case E_V4HImode:
43989       use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
43990       break;
43991 
43992     case E_V16QImode:
43993       use_vec_merge = TARGET_SSE4_1;
43994       break;
43995 
43996     case E_V8QImode:
43997       break;
43998 
43999     case E_V32QImode:
44000       half_mode = V16QImode;
44001       j = 0;
44002       n = 16;
44003       goto half;
44004 
44005     case E_V16HImode:
44006       half_mode = V8HImode;
44007       j = 1;
44008       n = 8;
44009       goto half;
44010 
44011     case E_V8SImode:
44012       half_mode = V4SImode;
44013       j = 2;
44014       n = 4;
44015       goto half;
44016 
44017     case E_V4DImode:
44018       half_mode = V2DImode;
44019       j = 3;
44020       n = 2;
44021       goto half;
44022 
44023     case E_V8SFmode:
44024       half_mode = V4SFmode;
44025       j = 4;
44026       n = 4;
44027       goto half;
44028 
44029     case E_V4DFmode:
44030       half_mode = V2DFmode;
44031       j = 5;
44032       n = 2;
44033       goto half;
44034 
44035 half:
44036       /* Compute offset.  */
44037       i = elt / n;
44038       elt %= n;
44039 
44040       gcc_assert (i <= 1);
44041 
44042       /* Extract the half.  */
44043       tmp = gen_reg_rtx (half_mode);
44044       emit_insn (gen_extract[j][i] (tmp, target));
44045 
44046       /* Put val in tmp at elt.  */
44047       ix86_expand_vector_set (false, tmp, val, elt);
44048 
44049       /* Put it back.  */
44050       emit_insn (gen_insert[j][i] (target, target, tmp));
44051       return;
44052 
44053     case E_V8DFmode:
44054       if (TARGET_AVX512F)
44055 	{
44056 	  mmode = QImode;
44057 	  gen_blendm = gen_avx512f_blendmv8df;
44058 	}
44059       break;
44060 
44061     case E_V8DImode:
44062       if (TARGET_AVX512F)
44063 	{
44064 	  mmode = QImode;
44065 	  gen_blendm = gen_avx512f_blendmv8di;
44066 	}
44067       break;
44068 
44069     case E_V16SFmode:
44070       if (TARGET_AVX512F)
44071 	{
44072 	  mmode = HImode;
44073 	  gen_blendm = gen_avx512f_blendmv16sf;
44074 	}
44075       break;
44076 
44077     case E_V16SImode:
44078       if (TARGET_AVX512F)
44079 	{
44080 	  mmode = HImode;
44081 	  gen_blendm = gen_avx512f_blendmv16si;
44082 	}
44083       break;
44084 
44085     case E_V32HImode:
44086       if (TARGET_AVX512BW)
44087 	{
44088 	  mmode = SImode;
44089 	  gen_blendm = gen_avx512bw_blendmv32hi;
44090 	}
44091       else if (TARGET_AVX512F)
44092 	{
44093 	  half_mode = E_V8HImode;
44094 	  n = 8;
44095 	  goto quarter;
44096 	}
44097       break;
44098 
44099     case E_V64QImode:
44100       if (TARGET_AVX512BW)
44101 	{
44102 	  mmode = DImode;
44103 	  gen_blendm = gen_avx512bw_blendmv64qi;
44104 	}
44105       else if (TARGET_AVX512F)
44106 	{
44107 	  half_mode = E_V16QImode;
44108 	  n = 16;
44109 	  goto quarter;
44110 	}
44111       break;
44112 
44113 quarter:
44114       /* Compute offset.  */
44115       i = elt / n;
44116       elt %= n;
44117 
44118       gcc_assert (i <= 3);
44119 
44120       {
44121 	/* Extract the quarter.  */
44122 	tmp = gen_reg_rtx (V4SImode);
44123 	rtx tmp2 = gen_lowpart (V16SImode, target);
44124 	rtx mask = gen_reg_rtx (QImode);
44125 
44126 	emit_move_insn (mask, constm1_rtx);
44127 	emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i),
44128 						   tmp, mask));
44129 
44130 	tmp2 = gen_reg_rtx (half_mode);
44131 	emit_move_insn (tmp2, gen_lowpart (half_mode, tmp));
44132 	tmp = tmp2;
44133 
44134 	/* Put val in tmp at elt.  */
44135 	ix86_expand_vector_set (false, tmp, val, elt);
44136 
44137 	/* Put it back.  */
44138 	tmp2 = gen_reg_rtx (V16SImode);
44139 	rtx tmp3 = gen_lowpart (V16SImode, target);
44140 	mask = gen_reg_rtx (HImode);
44141 	emit_move_insn (mask, constm1_rtx);
44142 	tmp = gen_lowpart (V4SImode, tmp);
44143 	emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i),
44144 						  tmp3, mask));
44145 	emit_move_insn (target, gen_lowpart (mode, tmp2));
44146       }
44147       return;
44148 
44149     default:
44150       break;
44151     }
44152 
44153   if (mmode != VOIDmode)
44154     {
44155       tmp = gen_reg_rtx (mode);
44156       emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44157       /* The avx512*_blendm<mode> expanders have different operand order
44158 	 from VEC_MERGE.  In VEC_MERGE, the first input operand is used for
44159 	 elements where the mask is set and second input operand otherwise,
44160 	 in {sse,avx}*_*blend* the first input operand is used for elements
44161 	 where the mask is clear and second input operand otherwise.  */
44162       emit_insn (gen_blendm (target, target, tmp,
44163 			     force_reg (mmode,
44164 					gen_int_mode (HOST_WIDE_INT_1U << elt,
44165 						      mmode))));
44166     }
44167   else if (use_vec_merge)
44168     {
44169       tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44170       tmp = gen_rtx_VEC_MERGE (mode, tmp, target,
44171 			       GEN_INT (HOST_WIDE_INT_1U << elt));
44172       emit_insn (gen_rtx_SET (target, tmp));
44173     }
44174   else
44175     {
44176       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44177 
44178       emit_move_insn (mem, target);
44179 
44180       tmp = adjust_address (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
44181       emit_move_insn (tmp, val);
44182 
44183       emit_move_insn (target, mem);
44184     }
44185 }
44186 
44187 void
44188 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44189 {
44190   machine_mode mode = GET_MODE (vec);
44191   machine_mode inner_mode = GET_MODE_INNER (mode);
44192   bool use_vec_extr = false;
44193   rtx tmp;
44194 
44195   switch (mode)
44196     {
44197     case E_V2SImode:
44198     case E_V2SFmode:
44199       if (!mmx_ok)
44200 	break;
44201       /* FALLTHRU */
44202 
44203     case E_V2DFmode:
44204     case E_V2DImode:
44205     case E_V2TImode:
44206     case E_V4TImode:
44207       use_vec_extr = true;
44208       break;
44209 
44210     case E_V4SFmode:
44211       use_vec_extr = TARGET_SSE4_1;
44212       if (use_vec_extr)
44213 	break;
44214 
44215       switch (elt)
44216 	{
44217 	case 0:
44218 	  tmp = vec;
44219 	  break;
44220 
44221 	case 1:
44222 	case 3:
44223 	  tmp = gen_reg_rtx (mode);
44224 	  emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44225 				       GEN_INT (elt), GEN_INT (elt),
44226 				       GEN_INT (elt+4), GEN_INT (elt+4)));
44227 	  break;
44228 
44229 	case 2:
44230 	  tmp = gen_reg_rtx (mode);
44231 	  emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44232 	  break;
44233 
44234 	default:
44235 	  gcc_unreachable ();
44236 	}
44237       vec = tmp;
44238       use_vec_extr = true;
44239       elt = 0;
44240       break;
44241 
44242     case E_V4SImode:
44243       use_vec_extr = TARGET_SSE4_1;
44244       if (use_vec_extr)
44245 	break;
44246 
44247       if (TARGET_SSE2)
44248 	{
44249 	  switch (elt)
44250 	    {
44251 	    case 0:
44252 	      tmp = vec;
44253 	      break;
44254 
44255 	    case 1:
44256 	    case 3:
44257 	      tmp = gen_reg_rtx (mode);
44258 	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44259 					    GEN_INT (elt), GEN_INT (elt),
44260 					    GEN_INT (elt), GEN_INT (elt)));
44261 	      break;
44262 
44263 	    case 2:
44264 	      tmp = gen_reg_rtx (mode);
44265 	      emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44266 	      break;
44267 
44268 	    default:
44269 	      gcc_unreachable ();
44270 	    }
44271 	  vec = tmp;
44272 	  use_vec_extr = true;
44273 	  elt = 0;
44274 	}
44275       else
44276 	{
44277 	  /* For SSE1, we have to reuse the V4SF code.  */
44278 	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44279 				      gen_lowpart (V4SFmode, vec), elt);
44280 	  return;
44281 	}
44282       break;
44283 
44284     case E_V8HImode:
44285       use_vec_extr = TARGET_SSE2;
44286       break;
44287     case E_V4HImode:
44288       use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44289       break;
44290 
44291     case E_V16QImode:
44292       use_vec_extr = TARGET_SSE4_1;
44293       break;
44294 
44295     case E_V8SFmode:
44296       if (TARGET_AVX)
44297 	{
44298 	  tmp = gen_reg_rtx (V4SFmode);
44299 	  if (elt < 4)
44300 	    emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44301 	  else
44302 	    emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44303 	  ix86_expand_vector_extract (false, target, tmp, elt & 3);
44304 	  return;
44305 	}
44306       break;
44307 
44308     case E_V4DFmode:
44309       if (TARGET_AVX)
44310 	{
44311 	  tmp = gen_reg_rtx (V2DFmode);
44312 	  if (elt < 2)
44313 	    emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
44314 	  else
44315 	    emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
44316 	  ix86_expand_vector_extract (false, target, tmp, elt & 1);
44317 	  return;
44318 	}
44319       break;
44320 
44321     case E_V32QImode:
44322       if (TARGET_AVX)
44323 	{
44324 	  tmp = gen_reg_rtx (V16QImode);
44325 	  if (elt < 16)
44326 	    emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
44327 	  else
44328 	    emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
44329 	  ix86_expand_vector_extract (false, target, tmp, elt & 15);
44330 	  return;
44331 	}
44332       break;
44333 
44334     case E_V16HImode:
44335       if (TARGET_AVX)
44336 	{
44337 	  tmp = gen_reg_rtx (V8HImode);
44338 	  if (elt < 8)
44339 	    emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
44340 	  else
44341 	    emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
44342 	  ix86_expand_vector_extract (false, target, tmp, elt & 7);
44343 	  return;
44344 	}
44345       break;
44346 
44347     case E_V8SImode:
44348       if (TARGET_AVX)
44349 	{
44350 	  tmp = gen_reg_rtx (V4SImode);
44351 	  if (elt < 4)
44352 	    emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
44353 	  else
44354 	    emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
44355 	  ix86_expand_vector_extract (false, target, tmp, elt & 3);
44356 	  return;
44357 	}
44358       break;
44359 
44360     case E_V4DImode:
44361       if (TARGET_AVX)
44362 	{
44363 	  tmp = gen_reg_rtx (V2DImode);
44364 	  if (elt < 2)
44365 	    emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
44366 	  else
44367 	    emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
44368 	  ix86_expand_vector_extract (false, target, tmp, elt & 1);
44369 	  return;
44370 	}
44371       break;
44372 
44373     case E_V32HImode:
44374       if (TARGET_AVX512BW)
44375 	{
44376 	  tmp = gen_reg_rtx (V16HImode);
44377 	  if (elt < 16)
44378 	    emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
44379 	  else
44380 	    emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
44381 	  ix86_expand_vector_extract (false, target, tmp, elt & 15);
44382 	  return;
44383 	}
44384       break;
44385 
44386     case E_V64QImode:
44387       if (TARGET_AVX512BW)
44388 	{
44389 	  tmp = gen_reg_rtx (V32QImode);
44390 	  if (elt < 32)
44391 	    emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
44392 	  else
44393 	    emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
44394 	  ix86_expand_vector_extract (false, target, tmp, elt & 31);
44395 	  return;
44396 	}
44397       break;
44398 
44399     case E_V16SFmode:
44400       tmp = gen_reg_rtx (V8SFmode);
44401       if (elt < 8)
44402 	emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
44403       else
44404 	emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
44405       ix86_expand_vector_extract (false, target, tmp, elt & 7);
44406       return;
44407 
44408     case E_V8DFmode:
44409       tmp = gen_reg_rtx (V4DFmode);
44410       if (elt < 4)
44411 	emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
44412       else
44413 	emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
44414       ix86_expand_vector_extract (false, target, tmp, elt & 3);
44415       return;
44416 
44417     case E_V16SImode:
44418       tmp = gen_reg_rtx (V8SImode);
44419       if (elt < 8)
44420 	emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
44421       else
44422 	emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
44423       ix86_expand_vector_extract (false, target, tmp, elt & 7);
44424       return;
44425 
44426     case E_V8DImode:
44427       tmp = gen_reg_rtx (V4DImode);
44428       if (elt < 4)
44429 	emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
44430       else
44431 	emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
44432       ix86_expand_vector_extract (false, target, tmp, elt & 3);
44433       return;
44434 
44435     case E_V8QImode:
44436       /* ??? Could extract the appropriate HImode element and shift.  */
44437     default:
44438       break;
44439     }
44440 
44441   if (use_vec_extr)
44442     {
44443       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
44444       tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
44445 
44446       /* Let the rtl optimizers know about the zero extension performed.  */
44447       if (inner_mode == QImode || inner_mode == HImode)
44448 	{
44449 	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
44450 	  target = gen_lowpart (SImode, target);
44451 	}
44452 
44453       emit_insn (gen_rtx_SET (target, tmp));
44454     }
44455   else
44456     {
44457       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44458 
44459       emit_move_insn (mem, vec);
44460 
44461       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44462       emit_move_insn (target, tmp);
44463     }
44464 }
44465 
44466 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
44467    to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
44468    The upper bits of DEST are undefined, though they shouldn't cause
44469    exceptions (some bits from src or all zeros are ok).  */
44470 
44471 static void
44472 emit_reduc_half (rtx dest, rtx src, int i)
44473 {
44474   rtx tem, d = dest;
44475   switch (GET_MODE (src))
44476     {
44477     case E_V4SFmode:
44478       if (i == 128)
44479 	tem = gen_sse_movhlps (dest, src, src);
44480       else
44481 	tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
44482 				   GEN_INT (1 + 4), GEN_INT (1 + 4));
44483       break;
44484     case E_V2DFmode:
44485       tem = gen_vec_interleave_highv2df (dest, src, src);
44486       break;
44487     case E_V16QImode:
44488     case E_V8HImode:
44489     case E_V4SImode:
44490     case E_V2DImode:
44491       d = gen_reg_rtx (V1TImode);
44492       tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
44493 				GEN_INT (i / 2));
44494       break;
44495     case E_V8SFmode:
44496       if (i == 256)
44497 	tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
44498       else
44499 	tem = gen_avx_shufps256 (dest, src, src,
44500 				 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
44501       break;
44502     case E_V4DFmode:
44503       if (i == 256)
44504 	tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
44505       else
44506 	tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
44507       break;
44508     case E_V32QImode:
44509     case E_V16HImode:
44510     case E_V8SImode:
44511     case E_V4DImode:
44512       if (i == 256)
44513 	{
44514 	  if (GET_MODE (dest) != V4DImode)
44515 	    d = gen_reg_rtx (V4DImode);
44516 	  tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
44517 				   gen_lowpart (V4DImode, src),
44518 				   const1_rtx);
44519 	}
44520       else
44521 	{
44522 	  d = gen_reg_rtx (V2TImode);
44523 	  tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
44524 				    GEN_INT (i / 2));
44525 	}
44526       break;
44527     case E_V64QImode:
44528     case E_V32HImode:
44529     case E_V16SImode:
44530     case E_V16SFmode:
44531     case E_V8DImode:
44532     case E_V8DFmode:
44533       if (i > 128)
44534 	tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
44535 				      gen_lowpart (V16SImode, src),
44536 				      gen_lowpart (V16SImode, src),
44537 				      GEN_INT (0x4 + (i == 512 ? 4 : 0)),
44538 				      GEN_INT (0x5 + (i == 512 ? 4 : 0)),
44539 				      GEN_INT (0x6 + (i == 512 ? 4 : 0)),
44540 				      GEN_INT (0x7 + (i == 512 ? 4 : 0)),
44541 				      GEN_INT (0xC), GEN_INT (0xD),
44542 				      GEN_INT (0xE), GEN_INT (0xF),
44543 				      GEN_INT (0x10), GEN_INT (0x11),
44544 				      GEN_INT (0x12), GEN_INT (0x13),
44545 				      GEN_INT (0x14), GEN_INT (0x15),
44546 				      GEN_INT (0x16), GEN_INT (0x17));
44547       else
44548 	tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
44549 				   gen_lowpart (V16SImode, src),
44550 				   GEN_INT (i == 128 ? 0x2 : 0x1),
44551 				   GEN_INT (0x3),
44552 				   GEN_INT (0x3),
44553 				   GEN_INT (0x3),
44554 				   GEN_INT (i == 128 ? 0x6 : 0x5),
44555 				   GEN_INT (0x7),
44556 				   GEN_INT (0x7),
44557 				   GEN_INT (0x7),
44558 				   GEN_INT (i == 128 ? 0xA : 0x9),
44559 				   GEN_INT (0xB),
44560 				   GEN_INT (0xB),
44561 				   GEN_INT (0xB),
44562 				   GEN_INT (i == 128 ? 0xE : 0xD),
44563 				   GEN_INT (0xF),
44564 				   GEN_INT (0xF),
44565 				   GEN_INT (0xF));
44566       break;
44567     default:
44568       gcc_unreachable ();
44569     }
44570   emit_insn (tem);
44571   if (d != dest)
44572     emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
44573 }
44574 
44575 /* Expand a vector reduction.  FN is the binary pattern to reduce;
44576    DEST is the destination; IN is the input vector.  */
44577 
44578 void
44579 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
44580 {
44581   rtx half, dst, vec = in;
44582   machine_mode mode = GET_MODE (in);
44583   int i;
44584 
44585   /* SSE4 has a special instruction for V8HImode UMIN reduction.  */
44586   if (TARGET_SSE4_1
44587       && mode == V8HImode
44588       && fn == gen_uminv8hi3)
44589     {
44590       emit_insn (gen_sse4_1_phminposuw (dest, in));
44591       return;
44592     }
44593 
44594   for (i = GET_MODE_BITSIZE (mode);
44595        i > GET_MODE_UNIT_BITSIZE (mode);
44596        i >>= 1)
44597     {
44598       half = gen_reg_rtx (mode);
44599       emit_reduc_half (half, vec, i);
44600       if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
44601 	dst = dest;
44602       else
44603 	dst = gen_reg_rtx (mode);
44604       emit_insn (fn (dst, half, vec));
44605       vec = dst;
44606     }
44607 }
44608 
44609 /* Target hook for scalar_mode_supported_p.  */
44610 static bool
44611 ix86_scalar_mode_supported_p (scalar_mode mode)
44612 {
44613   if (DECIMAL_FLOAT_MODE_P (mode))
44614     return default_decimal_float_supported_p ();
44615   else if (mode == TFmode)
44616     return true;
44617   else
44618     return default_scalar_mode_supported_p (mode);
44619 }
44620 
44621 /* Implements target hook vector_mode_supported_p.  */
44622 static bool
44623 ix86_vector_mode_supported_p (machine_mode mode)
44624 {
44625   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
44626     return true;
44627   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
44628     return true;
44629   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
44630     return true;
44631   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
44632     return true;
44633   if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
44634     return true;
44635   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
44636     return true;
44637   return false;
44638 }
44639 
44640 /* Target hook for c_mode_for_suffix.  */
44641 static machine_mode
44642 ix86_c_mode_for_suffix (char suffix)
44643 {
44644   if (suffix == 'q')
44645     return TFmode;
44646   if (suffix == 'w')
44647     return XFmode;
44648 
44649   return VOIDmode;
44650 }
44651 
44652 /* Worker function for TARGET_MD_ASM_ADJUST.
44653 
44654    We implement asm flag outputs, and maintain source compatibility
44655    with the old cc0-based compiler.  */
44656 
44657 static rtx_insn *
44658 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
44659 		    vec<const char *> &constraints,
44660 		    vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
44661 {
44662   clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
44663   SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
44664 
44665   bool saw_asm_flag = false;
44666 
44667   start_sequence ();
44668   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
44669     {
44670       const char *con = constraints[i];
44671       if (strncmp (con, "=@cc", 4) != 0)
44672 	continue;
44673       con += 4;
44674       if (strchr (con, ',') != NULL)
44675 	{
44676 	  error ("alternatives not allowed in asm flag output");
44677 	  continue;
44678 	}
44679 
44680       bool invert = false;
44681       if (con[0] == 'n')
44682 	invert = true, con++;
44683 
44684       machine_mode mode = CCmode;
44685       rtx_code code = UNKNOWN;
44686 
44687       switch (con[0])
44688 	{
44689 	case 'a':
44690 	  if (con[1] == 0)
44691 	    mode = CCAmode, code = EQ;
44692 	  else if (con[1] == 'e' && con[2] == 0)
44693 	    mode = CCCmode, code = NE;
44694 	  break;
44695 	case 'b':
44696 	  if (con[1] == 0)
44697 	    mode = CCCmode, code = EQ;
44698 	  else if (con[1] == 'e' && con[2] == 0)
44699 	    mode = CCAmode, code = NE;
44700 	  break;
44701 	case 'c':
44702 	  if (con[1] == 0)
44703 	    mode = CCCmode, code = EQ;
44704 	  break;
44705 	case 'e':
44706 	  if (con[1] == 0)
44707 	    mode = CCZmode, code = EQ;
44708 	  break;
44709 	case 'g':
44710 	  if (con[1] == 0)
44711 	    mode = CCGCmode, code = GT;
44712 	  else if (con[1] == 'e' && con[2] == 0)
44713 	    mode = CCGCmode, code = GE;
44714 	  break;
44715 	case 'l':
44716 	  if (con[1] == 0)
44717 	    mode = CCGCmode, code = LT;
44718 	  else if (con[1] == 'e' && con[2] == 0)
44719 	    mode = CCGCmode, code = LE;
44720 	  break;
44721 	case 'o':
44722 	  if (con[1] == 0)
44723 	    mode = CCOmode, code = EQ;
44724 	  break;
44725 	case 'p':
44726 	  if (con[1] == 0)
44727 	    mode = CCPmode, code = EQ;
44728 	  break;
44729 	case 's':
44730 	  if (con[1] == 0)
44731 	    mode = CCSmode, code = EQ;
44732 	  break;
44733 	case 'z':
44734 	  if (con[1] == 0)
44735 	    mode = CCZmode, code = EQ;
44736 	  break;
44737 	}
44738       if (code == UNKNOWN)
44739 	{
44740 	  error ("unknown asm flag output %qs", constraints[i]);
44741 	  continue;
44742 	}
44743       if (invert)
44744 	code = reverse_condition (code);
44745 
44746       rtx dest = outputs[i];
44747       if (!saw_asm_flag)
44748 	{
44749 	  /* This is the first asm flag output.  Here we put the flags
44750 	     register in as the real output and adjust the condition to
44751 	     allow it.  */
44752 	  constraints[i] = "=Bf";
44753 	  outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
44754 	  saw_asm_flag = true;
44755 	}
44756       else
44757 	{
44758 	  /* We don't need the flags register as output twice.  */
44759 	  constraints[i] = "=X";
44760 	  outputs[i] = gen_rtx_SCRATCH (SImode);
44761 	}
44762 
44763       rtx x = gen_rtx_REG (mode, FLAGS_REG);
44764       x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
44765 
44766       machine_mode dest_mode = GET_MODE (dest);
44767       if (!SCALAR_INT_MODE_P (dest_mode))
44768 	{
44769 	  error ("invalid type for asm flag output");
44770 	  continue;
44771 	}
44772 
44773       if (dest_mode == DImode && !TARGET_64BIT)
44774 	dest_mode = SImode;
44775 
44776       if (dest_mode != QImode)
44777 	{
44778 	  rtx destqi = gen_reg_rtx (QImode);
44779 	  emit_insn (gen_rtx_SET (destqi, x));
44780 
44781 	  if (TARGET_ZERO_EXTEND_WITH_AND
44782 	      && optimize_function_for_speed_p (cfun))
44783 	    {
44784 	      x = force_reg (dest_mode, const0_rtx);
44785 
44786 	      emit_insn (gen_movstrictqi
44787 			 (gen_lowpart (QImode, x), destqi));
44788 	    }
44789 	  else
44790 	    x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
44791 	}
44792 
44793       if (dest_mode != GET_MODE (dest))
44794 	{
44795 	  rtx tmp = gen_reg_rtx (SImode);
44796 
44797 	  emit_insn (gen_rtx_SET (tmp, x));
44798 	  emit_insn (gen_zero_extendsidi2 (dest, tmp));
44799 	}
44800       else
44801 	emit_insn (gen_rtx_SET (dest, x));
44802     }
44803   rtx_insn *seq = get_insns ();
44804   end_sequence ();
44805 
44806   if (saw_asm_flag)
44807     return seq;
44808   else
44809     {
44810       /* If we had no asm flag outputs, clobber the flags.  */
44811       clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
44812       SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
44813       return NULL;
44814     }
44815 }
44816 
44817 /* Implements target vector targetm.asm.encode_section_info.  */
44818 
44819 static void ATTRIBUTE_UNUSED
44820 ix86_encode_section_info (tree decl, rtx rtl, int first)
44821 {
44822   default_encode_section_info (decl, rtl, first);
44823 
44824   if (ix86_in_large_data_p (decl))
44825     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
44826 }
44827 
44828 /* Worker function for REVERSE_CONDITION.  */
44829 
44830 enum rtx_code
44831 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
44832 {
44833   return (mode == CCFPmode
44834 	  ? reverse_condition_maybe_unordered (code)
44835 	  : reverse_condition (code));
44836 }
44837 
44838 /* Output code to perform an x87 FP register move, from OPERANDS[1]
44839    to OPERANDS[0].  */
44840 
44841 const char *
44842 output_387_reg_move (rtx_insn *insn, rtx *operands)
44843 {
44844   if (REG_P (operands[0]))
44845     {
44846       if (REG_P (operands[1])
44847 	  && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44848 	{
44849 	  if (REGNO (operands[0]) == FIRST_STACK_REG)
44850 	    return output_387_ffreep (operands, 0);
44851 	  return "fstp\t%y0";
44852 	}
44853       if (STACK_TOP_P (operands[0]))
44854 	return "fld%Z1\t%y1";
44855       return "fst\t%y0";
44856     }
44857   else if (MEM_P (operands[0]))
44858     {
44859       gcc_assert (REG_P (operands[1]));
44860       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44861 	return "fstp%Z0\t%y0";
44862       else
44863 	{
44864 	  /* There is no non-popping store to memory for XFmode.
44865 	     So if we need one, follow the store with a load.  */
44866 	  if (GET_MODE (operands[0]) == XFmode)
44867 	    return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
44868 	  else
44869 	    return "fst%Z0\t%y0";
44870 	}
44871     }
44872   else
44873     gcc_unreachable();
44874 }
44875 
44876 /* Output code to perform a conditional jump to LABEL, if C2 flag in
44877    FP status register is set.  */
44878 
44879 void
44880 ix86_emit_fp_unordered_jump (rtx label)
44881 {
44882   rtx reg = gen_reg_rtx (HImode);
44883   rtx temp;
44884 
44885   emit_insn (gen_x86_fnstsw_1 (reg));
44886 
44887   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
44888     {
44889       emit_insn (gen_x86_sahf_1 (reg));
44890 
44891       temp = gen_rtx_REG (CCmode, FLAGS_REG);
44892       temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
44893     }
44894   else
44895     {
44896       emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04)));
44897 
44898       temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
44899       temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
44900     }
44901 
44902   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
44903 			      gen_rtx_LABEL_REF (VOIDmode, label),
44904 			      pc_rtx);
44905   temp = gen_rtx_SET (pc_rtx, temp);
44906 
44907   emit_jump_insn (temp);
44908   predict_jump (REG_BR_PROB_BASE * 10 / 100);
44909 }
44910 
44911 /* Output code to perform a log1p XFmode calculation.  */
44912 
44913 void ix86_emit_i387_log1p (rtx op0, rtx op1)
44914 {
44915   rtx_code_label *label1 = gen_label_rtx ();
44916   rtx_code_label *label2 = gen_label_rtx ();
44917 
44918   rtx tmp = gen_reg_rtx (XFmode);
44919   rtx tmp2 = gen_reg_rtx (XFmode);
44920   rtx test;
44921 
44922   emit_insn (gen_absxf2 (tmp, op1));
44923   test = gen_rtx_GE (VOIDmode, tmp,
44924     const_double_from_real_value (
44925        REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
44926        XFmode));
44927   emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
44928 
44929   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
44930   emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
44931   emit_jump (label2);
44932 
44933   emit_label (label1);
44934   emit_move_insn (tmp, CONST1_RTX (XFmode));
44935   emit_insn (gen_addxf3 (tmp, op1, tmp));
44936   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
44937   emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
44938 
44939   emit_label (label2);
44940 }
44941 
44942 /* Emit code for round calculation.  */
44943 void ix86_emit_i387_round (rtx op0, rtx op1)
44944 {
44945   machine_mode inmode = GET_MODE (op1);
44946   machine_mode outmode = GET_MODE (op0);
44947   rtx e1, e2, res, tmp, tmp1, half;
44948   rtx scratch = gen_reg_rtx (HImode);
44949   rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
44950   rtx_code_label *jump_label = gen_label_rtx ();
44951   rtx insn;
44952   rtx (*gen_abs) (rtx, rtx);
44953   rtx (*gen_neg) (rtx, rtx);
44954 
44955   switch (inmode)
44956     {
44957     case E_SFmode:
44958       gen_abs = gen_abssf2;
44959       break;
44960     case E_DFmode:
44961       gen_abs = gen_absdf2;
44962       break;
44963     case E_XFmode:
44964       gen_abs = gen_absxf2;
44965       break;
44966     default:
44967       gcc_unreachable ();
44968     }
44969 
44970   switch (outmode)
44971     {
44972     case E_SFmode:
44973       gen_neg = gen_negsf2;
44974       break;
44975     case E_DFmode:
44976       gen_neg = gen_negdf2;
44977       break;
44978     case E_XFmode:
44979       gen_neg = gen_negxf2;
44980       break;
44981     case E_HImode:
44982       gen_neg = gen_neghi2;
44983       break;
44984     case E_SImode:
44985       gen_neg = gen_negsi2;
44986       break;
44987     case E_DImode:
44988       gen_neg = gen_negdi2;
44989       break;
44990     default:
44991       gcc_unreachable ();
44992     }
44993 
44994   e1 = gen_reg_rtx (inmode);
44995   e2 = gen_reg_rtx (inmode);
44996   res = gen_reg_rtx (outmode);
44997 
44998   half = const_double_from_real_value (dconsthalf, inmode);
44999 
45000   /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45001 
45002   /* scratch = fxam(op1) */
45003   emit_insn (gen_rtx_SET (scratch,
45004 			  gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45005 					  UNSPEC_FXAM)));
45006   /* e1 = fabs(op1) */
45007   emit_insn (gen_abs (e1, op1));
45008 
45009   /* e2 = e1 + 0.5 */
45010   half = force_reg (inmode, half);
45011   emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
45012 
45013   /* res = floor(e2) */
45014   if (inmode != XFmode)
45015     {
45016       tmp1 = gen_reg_rtx (XFmode);
45017 
45018       emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45019     }
45020   else
45021     tmp1 = e2;
45022 
45023   switch (outmode)
45024     {
45025     case E_SFmode:
45026     case E_DFmode:
45027       {
45028 	rtx tmp0 = gen_reg_rtx (XFmode);
45029 
45030 	emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45031 
45032 	emit_insn (gen_rtx_SET (res,
45033 				gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45034 						UNSPEC_TRUNC_NOOP)));
45035       }
45036       break;
45037     case E_XFmode:
45038       emit_insn (gen_frndintxf2_floor (res, tmp1));
45039       break;
45040     case E_HImode:
45041       emit_insn (gen_lfloorxfhi2 (res, tmp1));
45042       break;
45043     case E_SImode:
45044       emit_insn (gen_lfloorxfsi2 (res, tmp1));
45045       break;
45046     case E_DImode:
45047       emit_insn (gen_lfloorxfdi2 (res, tmp1));
45048 	break;
45049     default:
45050       gcc_unreachable ();
45051     }
45052 
45053   /* flags = signbit(a) */
45054   emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
45055 
45056   /* if (flags) then res = -res */
45057   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45058 			      gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45059 			      gen_rtx_LABEL_REF (VOIDmode, jump_label),
45060 			      pc_rtx);
45061   insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
45062   predict_jump (REG_BR_PROB_BASE * 50 / 100);
45063   JUMP_LABEL (insn) = jump_label;
45064 
45065   emit_insn (gen_neg (res, res));
45066 
45067   emit_label (jump_label);
45068   LABEL_NUSES (jump_label) = 1;
45069 
45070   emit_move_insn (op0, res);
45071 }
45072 
45073 /* Output code to perform a Newton-Rhapson approximation of a single precision
45074    floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
45075 
45076 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45077 {
45078   rtx x0, x1, e0, e1;
45079 
45080   x0 = gen_reg_rtx (mode);
45081   e0 = gen_reg_rtx (mode);
45082   e1 = gen_reg_rtx (mode);
45083   x1 = gen_reg_rtx (mode);
45084 
45085   /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45086 
45087   b = force_reg (mode, b);
45088 
45089   /* x0 = rcp(b) estimate */
45090   if (mode == V16SFmode || mode == V8DFmode)
45091     {
45092       if (TARGET_AVX512ER)
45093 	{
45094 	  emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45095 						      UNSPEC_RCP28)));
45096 	  /* res = a * x0 */
45097 	  emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0)));
45098 	  return;
45099 	}
45100       else
45101 	emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45102 						    UNSPEC_RCP14)));
45103     }
45104   else
45105     emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45106 						UNSPEC_RCP)));
45107 
45108   /* e0 = x0 * b */
45109   emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
45110 
45111   /* e0 = x0 * e0 */
45112   emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
45113 
45114   /* e1 = x0 + x0 */
45115   emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
45116 
45117   /* x1 = e1 - e0 */
45118   emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
45119 
45120   /* res = a * x1 */
45121   emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
45122 }
45123 
45124 /* Output code to perform a Newton-Rhapson approximation of a
45125    single precision floating point [reciprocal] square root.  */
45126 
45127 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
45128 {
45129   rtx x0, e0, e1, e2, e3, mthree, mhalf;
45130   REAL_VALUE_TYPE r;
45131   int unspec;
45132 
45133   x0 = gen_reg_rtx (mode);
45134   e0 = gen_reg_rtx (mode);
45135   e1 = gen_reg_rtx (mode);
45136   e2 = gen_reg_rtx (mode);
45137   e3 = gen_reg_rtx (mode);
45138 
45139   if (TARGET_AVX512ER && mode == V16SFmode)
45140     {
45141       if (recip)
45142 	/* res = rsqrt28(a) estimate */
45143 	emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45144 						     UNSPEC_RSQRT28)));
45145       else
45146 	{
45147 	  /* x0 = rsqrt28(a) estimate */
45148 	  emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45149 						      UNSPEC_RSQRT28)));
45150 	  /* res = rcp28(x0) estimate */
45151 	  emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
45152 						       UNSPEC_RCP28)));
45153 	}
45154       return;
45155     }
45156 
45157   real_from_integer (&r, VOIDmode, -3, SIGNED);
45158   mthree = const_double_from_real_value (r, SFmode);
45159 
45160   real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45161   mhalf = const_double_from_real_value (r, SFmode);
45162   unspec = UNSPEC_RSQRT;
45163 
45164   if (VECTOR_MODE_P (mode))
45165     {
45166       mthree = ix86_build_const_vector (mode, true, mthree);
45167       mhalf = ix86_build_const_vector (mode, true, mhalf);
45168       /* There is no 512-bit rsqrt.  There is however rsqrt14.  */
45169       if (GET_MODE_SIZE (mode) == 64)
45170 	unspec = UNSPEC_RSQRT14;
45171     }
45172 
45173   /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45174      rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45175 
45176   a = force_reg (mode, a);
45177 
45178   /* x0 = rsqrt(a) estimate */
45179   emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45180 					      unspec)));
45181 
45182   /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
45183   if (!recip)
45184     {
45185       rtx zero = force_reg (mode, CONST0_RTX(mode));
45186       rtx mask;
45187 
45188       /* Handle masked compare.  */
45189       if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45190 	{
45191 	  mask = gen_reg_rtx (HImode);
45192 	  /* Imm value 0x4 corresponds to not-equal comparison.  */
45193 	  emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45194 	  emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45195 	}
45196       else
45197 	{
45198 	  mask = gen_reg_rtx (mode);
45199 	  emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
45200 	  emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
45201 	}
45202     }
45203 
45204   /* e0 = x0 * a */
45205   emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
45206   /* e1 = e0 * x0 */
45207   emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
45208 
45209   /* e2 = e1 - 3. */
45210   mthree = force_reg (mode, mthree);
45211   emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
45212 
45213   mhalf = force_reg (mode, mhalf);
45214   if (recip)
45215     /* e3 = -.5 * x0 */
45216     emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
45217   else
45218     /* e3 = -.5 * e0 */
45219     emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
45220   /* ret = e2 * e3 */
45221   emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
45222 }
45223 
45224 #ifdef TARGET_SOLARIS
45225 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
45226 
45227 static void
45228 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45229 				tree decl)
45230 {
45231   /* With Binutils 2.15, the "@unwind" marker must be specified on
45232      every occurrence of the ".eh_frame" section, not just the first
45233      one.  */
45234   if (TARGET_64BIT
45235       && strcmp (name, ".eh_frame") == 0)
45236     {
45237       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45238 	       flags & SECTION_WRITE ? "aw" : "a");
45239       return;
45240     }
45241 
45242 #ifndef USE_GAS
45243   if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45244     {
45245       solaris_elf_asm_comdat_section (name, flags, decl);
45246       return;
45247     }
45248 #endif
45249 
45250   default_elf_asm_named_section (name, flags, decl);
45251 }
45252 #endif /* TARGET_SOLARIS */
45253 
45254 /* Return the mangling of TYPE if it is an extended fundamental type.  */
45255 
45256 static const char *
45257 ix86_mangle_type (const_tree type)
45258 {
45259   type = TYPE_MAIN_VARIANT (type);
45260 
45261   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45262       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45263     return NULL;
45264 
45265   switch (TYPE_MODE (type))
45266     {
45267     case E_TFmode:
45268       /* __float128 is "g".  */
45269       return "g";
45270     case E_XFmode:
45271       /* "long double" or __float80 is "e".  */
45272       return "e";
45273     default:
45274       return NULL;
45275     }
45276 }
45277 
45278 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
45279 
45280 static tree
45281 ix86_stack_protect_guard (void)
45282 {
45283   if (TARGET_SSP_TLS_GUARD)
45284     {
45285       tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
45286       int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
45287       tree type = build_qualified_type (type_node, qual);
45288       tree t;
45289 
45290       if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
45291 	{
45292 	  t = ix86_tls_stack_chk_guard_decl;
45293 
45294 	  if (t == NULL)
45295 	    {
45296 	      rtx x;
45297 
45298 	      t = build_decl
45299 		(UNKNOWN_LOCATION, VAR_DECL,
45300 		 get_identifier (ix86_stack_protector_guard_symbol_str),
45301 		 type);
45302 	      TREE_STATIC (t) = 1;
45303 	      TREE_PUBLIC (t) = 1;
45304 	      DECL_EXTERNAL (t) = 1;
45305 	      TREE_USED (t) = 1;
45306 	      TREE_THIS_VOLATILE (t) = 1;
45307 	      DECL_ARTIFICIAL (t) = 1;
45308 	      DECL_IGNORED_P (t) = 1;
45309 
45310 	      /* Do not share RTL as the declaration is visible outside of
45311 		 current function.  */
45312 	      x = DECL_RTL (t);
45313 	      RTX_FLAG (x, used) = 1;
45314 
45315 	      ix86_tls_stack_chk_guard_decl = t;
45316 	    }
45317 	}
45318       else
45319 	{
45320 	  tree asptrtype = build_pointer_type (type);
45321 
45322 	  t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
45323 	  t = build2 (MEM_REF, asptrtype, t,
45324 		      build_int_cst (asptrtype, 0));
45325 	}
45326 
45327       return t;
45328     }
45329 
45330   return default_stack_protect_guard ();
45331 }
45332 
45333 /* For 32-bit code we can save PIC register setup by using
45334    __stack_chk_fail_local hidden function instead of calling
45335    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
45336    register, so it is better to call __stack_chk_fail directly.  */
45337 
45338 static tree ATTRIBUTE_UNUSED
45339 ix86_stack_protect_fail (void)
45340 {
45341   return TARGET_64BIT
45342 	 ? default_external_stack_protect_fail ()
45343 	 : default_hidden_stack_protect_fail ();
45344 }
45345 
45346 /* Select a format to encode pointers in exception handling data.  CODE
45347    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
45348    true if the symbol may be affected by dynamic relocations.
45349 
45350    ??? All x86 object file formats are capable of representing this.
45351    After all, the relocation needed is the same as for the call insn.
45352    Whether or not a particular assembler allows us to enter such, I
45353    guess we'll have to see.  */
45354 int
45355 asm_preferred_eh_data_format (int code, int global)
45356 {
45357   if (flag_pic)
45358     {
45359       int type = DW_EH_PE_sdata8;
45360       if (!TARGET_64BIT
45361 	  || ix86_cmodel == CM_SMALL_PIC
45362 	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45363 	type = DW_EH_PE_sdata4;
45364       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45365     }
45366   if (ix86_cmodel == CM_SMALL
45367       || (ix86_cmodel == CM_MEDIUM && code))
45368     return DW_EH_PE_udata4;
45369   return DW_EH_PE_absptr;
45370 }
45371 
45372 /* Expand copysign from SIGN to the positive value ABS_VALUE
45373    storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
45374    the sign-bit.  */
45375 static void
45376 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45377 {
45378   machine_mode mode = GET_MODE (sign);
45379   rtx sgn = gen_reg_rtx (mode);
45380   if (mask == NULL_RTX)
45381     {
45382       machine_mode vmode;
45383 
45384       if (mode == SFmode)
45385 	vmode = V4SFmode;
45386       else if (mode == DFmode)
45387 	vmode = V2DFmode;
45388       else
45389 	vmode = mode;
45390 
45391       mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45392       if (!VECTOR_MODE_P (mode))
45393 	{
45394 	  /* We need to generate a scalar mode mask in this case.  */
45395 	  rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45396 	  tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45397 	  mask = gen_reg_rtx (mode);
45398 	  emit_insn (gen_rtx_SET (mask, tmp));
45399 	}
45400     }
45401   else
45402     mask = gen_rtx_NOT (mode, mask);
45403   emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
45404   emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
45405 }
45406 
45407 /* Expand fabs (OP0) and return a new rtx that holds the result.  The
45408    mask for masking out the sign-bit is stored in *SMASK, if that is
45409    non-null.  */
45410 static rtx
45411 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45412 {
45413   machine_mode vmode, mode = GET_MODE (op0);
45414   rtx xa, mask;
45415 
45416   xa = gen_reg_rtx (mode);
45417   if (mode == SFmode)
45418     vmode = V4SFmode;
45419   else if (mode == DFmode)
45420     vmode = V2DFmode;
45421   else
45422     vmode = mode;
45423   mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45424   if (!VECTOR_MODE_P (mode))
45425     {
45426       /* We need to generate a scalar mode mask in this case.  */
45427       rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45428       tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45429       mask = gen_reg_rtx (mode);
45430       emit_insn (gen_rtx_SET (mask, tmp));
45431     }
45432   emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
45433 
45434   if (smask)
45435     *smask = mask;
45436 
45437   return xa;
45438 }
45439 
45440 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45441    swapping the operands if SWAP_OPERANDS is true.  The expanded
45442    code is a forward jump to a newly created label in case the
45443    comparison is true.  The generated label rtx is returned.  */
45444 static rtx_code_label *
45445 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45446                                   bool swap_operands)
45447 {
45448   bool unordered_compare = ix86_unordered_fp_compare (code);
45449   rtx_code_label *label;
45450   rtx tmp, reg;
45451 
45452   if (swap_operands)
45453     std::swap (op0, op1);
45454 
45455   label = gen_label_rtx ();
45456   tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
45457   if (unordered_compare)
45458     tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
45459   reg = gen_rtx_REG (CCFPmode, FLAGS_REG);
45460   emit_insn (gen_rtx_SET (reg, tmp));
45461   tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx);
45462   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45463 			      gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45464   tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
45465   JUMP_LABEL (tmp) = label;
45466 
45467   return label;
45468 }
45469 
45470 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45471    using comparison code CODE.  Operands are swapped for the comparison if
45472    SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
45473 static rtx
45474 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45475 			      bool swap_operands)
45476 {
45477   rtx (*insn)(rtx, rtx, rtx, rtx);
45478   machine_mode mode = GET_MODE (op0);
45479   rtx mask = gen_reg_rtx (mode);
45480 
45481   if (swap_operands)
45482     std::swap (op0, op1);
45483 
45484   insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45485 
45486   emit_insn (insn (mask, op0, op1,
45487 		   gen_rtx_fmt_ee (code, mode, op0, op1)));
45488   return mask;
45489 }
45490 
45491 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45492    of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
45493 static rtx
45494 ix86_gen_TWO52 (machine_mode mode)
45495 {
45496   REAL_VALUE_TYPE TWO52r;
45497   rtx TWO52;
45498 
45499   real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45500   TWO52 = const_double_from_real_value (TWO52r, mode);
45501   TWO52 = force_reg (mode, TWO52);
45502 
45503   return TWO52;
45504 }
45505 
45506 /* Expand SSE sequence for computing lround from OP1 storing
45507    into OP0.  */
45508 void
45509 ix86_expand_lround (rtx op0, rtx op1)
45510 {
45511   /* C code for the stuff we're doing below:
45512        tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
45513        return (long)tmp;
45514    */
45515   machine_mode mode = GET_MODE (op1);
45516   const struct real_format *fmt;
45517   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45518   rtx adj;
45519 
45520   /* load nextafter (0.5, 0.0) */
45521   fmt = REAL_MODE_FORMAT (mode);
45522   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45523   real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45524 
45525   /* adj = copysign (0.5, op1) */
45526   adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
45527   ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
45528 
45529   /* adj = op1 + adj */
45530   adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
45531 
45532   /* op0 = (imode)adj */
45533   expand_fix (op0, adj, 0);
45534 }
45535 
45536 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
45537    into OPERAND0.  */
45538 void
45539 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
45540 {
45541   /* C code for the stuff we're doing below (for do_floor):
45542 	xi = (long)op1;
45543         xi -= (double)xi > op1 ? 1 : 0;
45544         return xi;
45545    */
45546   machine_mode fmode = GET_MODE (op1);
45547   machine_mode imode = GET_MODE (op0);
45548   rtx ireg, freg, tmp;
45549   rtx_code_label *label;
45550 
45551   /* reg = (long)op1 */
45552   ireg = gen_reg_rtx (imode);
45553   expand_fix (ireg, op1, 0);
45554 
45555   /* freg = (double)reg */
45556   freg = gen_reg_rtx (fmode);
45557   expand_float (freg, ireg, 0);
45558 
45559   /* ireg = (freg > op1) ? ireg - 1 : ireg */
45560   label = ix86_expand_sse_compare_and_jump (UNLE,
45561 					    freg, op1, !do_floor);
45562   tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
45563 			     ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
45564   emit_move_insn (ireg, tmp);
45565 
45566   emit_label (label);
45567   LABEL_NUSES (label) = 1;
45568 
45569   emit_move_insn (op0, ireg);
45570 }
45571 
45572 /* Expand rint rounding OPERAND1 and storing the result in OPERAND0.  */
45573 void
45574 ix86_expand_rint (rtx operand0, rtx operand1)
45575 {
45576   /* C code for the stuff we're doing below:
45577 	xa = fabs (operand1);
45578         if (!isless (xa, 2**52))
45579 	  return operand1;
45580         two52 = 2**52;
45581         if (flag_rounding_math)
45582 	  {
45583 	    two52 = copysign (two52, operand1);
45584 	    xa = operand1;
45585 	  }
45586         xa = xa + two52 - two52;
45587         return copysign (xa, operand1);
45588    */
45589   machine_mode mode = GET_MODE (operand0);
45590   rtx res, xa, TWO52, two52, mask;
45591   rtx_code_label *label;
45592 
45593   res = gen_reg_rtx (mode);
45594   emit_move_insn (res, operand1);
45595 
45596   /* xa = abs (operand1) */
45597   xa = ix86_expand_sse_fabs (res, &mask);
45598 
45599   /* if (!isless (xa, TWO52)) goto label; */
45600   TWO52 = ix86_gen_TWO52 (mode);
45601   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45602 
45603   two52 = TWO52;
45604   if (flag_rounding_math)
45605     {
45606       two52 = gen_reg_rtx (mode);
45607       ix86_sse_copysign_to_positive (two52, TWO52, res, mask);
45608       xa = res;
45609     }
45610 
45611   xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT);
45612   xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);
45613 
45614   ix86_sse_copysign_to_positive (res, xa, res, mask);
45615 
45616   emit_label (label);
45617   LABEL_NUSES (label) = 1;
45618 
45619   emit_move_insn (operand0, res);
45620 }
45621 
45622 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45623    into OPERAND0.  */
45624 void
45625 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
45626 {
45627   /* C code for the stuff we expand below.
45628         double xa = fabs (x), x2;
45629         if (!isless (xa, TWO52))
45630           return x;
45631         xa = xa + TWO52 - TWO52;
45632         x2 = copysign (xa, x);
45633      Compensate.  Floor:
45634         if (x2 > x)
45635           x2 -= 1;
45636      Compensate.  Ceil:
45637         if (x2 < x)
45638           x2 -= -1;
45639         return x2;
45640    */
45641   machine_mode mode = GET_MODE (operand0);
45642   rtx xa, TWO52, tmp, one, res, mask;
45643   rtx_code_label *label;
45644 
45645   TWO52 = ix86_gen_TWO52 (mode);
45646 
45647   /* Temporary for holding the result, initialized to the input
45648      operand to ease control flow.  */
45649   res = gen_reg_rtx (mode);
45650   emit_move_insn (res, operand1);
45651 
45652   /* xa = abs (operand1) */
45653   xa = ix86_expand_sse_fabs (res, &mask);
45654 
45655   /* if (!isless (xa, TWO52)) goto label; */
45656   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45657 
45658   /* xa = xa + TWO52 - TWO52; */
45659   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45660   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
45661 
45662   /* xa = copysign (xa, operand1) */
45663   ix86_sse_copysign_to_positive (xa, xa, res, mask);
45664 
45665   /* generate 1.0 or -1.0 */
45666   one = force_reg (mode,
45667 	           const_double_from_real_value (do_floor
45668 						 ? dconst1 : dconstm1, mode));
45669 
45670   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45671   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45672   emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45673   /* We always need to subtract here to preserve signed zero.  */
45674   tmp = expand_simple_binop (mode, MINUS,
45675 			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45676   emit_move_insn (res, tmp);
45677 
45678   emit_label (label);
45679   LABEL_NUSES (label) = 1;
45680 
45681   emit_move_insn (operand0, res);
45682 }
45683 
45684 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45685    into OPERAND0.  */
45686 void
45687 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
45688 {
45689   /* C code for the stuff we expand below.
45690 	double xa = fabs (x), x2;
45691         if (!isless (xa, TWO52))
45692           return x;
45693 	x2 = (double)(long)x;
45694      Compensate.  Floor:
45695 	if (x2 > x)
45696 	  x2 -= 1;
45697      Compensate.  Ceil:
45698 	if (x2 < x)
45699 	  x2 += 1;
45700 	if (HONOR_SIGNED_ZEROS (mode))
45701 	  return copysign (x2, x);
45702 	return x2;
45703    */
45704   machine_mode mode = GET_MODE (operand0);
45705   rtx xa, xi, TWO52, tmp, one, res, mask;
45706   rtx_code_label *label;
45707 
45708   TWO52 = ix86_gen_TWO52 (mode);
45709 
45710   /* Temporary for holding the result, initialized to the input
45711      operand to ease control flow.  */
45712   res = gen_reg_rtx (mode);
45713   emit_move_insn (res, operand1);
45714 
45715   /* xa = abs (operand1) */
45716   xa = ix86_expand_sse_fabs (res, &mask);
45717 
45718   /* if (!isless (xa, TWO52)) goto label; */
45719   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45720 
45721   /* xa = (double)(long)x */
45722   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45723   expand_fix (xi, res, 0);
45724   expand_float (xa, xi, 0);
45725 
45726   /* generate 1.0 */
45727   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45728 
45729   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45730   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45731   emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45732   tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
45733 			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45734   emit_move_insn (res, tmp);
45735 
45736   if (HONOR_SIGNED_ZEROS (mode))
45737     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45738 
45739   emit_label (label);
45740   LABEL_NUSES (label) = 1;
45741 
45742   emit_move_insn (operand0, res);
45743 }
45744 
45745 /* Expand SSE sequence for computing round from OPERAND1 storing
45746    into OPERAND0.  Sequence that works without relying on DImode truncation
45747    via cvttsd2siq that is only available on 64bit targets.  */
45748 void
45749 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
45750 {
45751   /* C code for the stuff we expand below.
45752         double xa = fabs (x), xa2, x2;
45753         if (!isless (xa, TWO52))
45754           return x;
45755      Using the absolute value and copying back sign makes
45756      -0.0 -> -0.0 correct.
45757         xa2 = xa + TWO52 - TWO52;
45758      Compensate.
45759 	dxa = xa2 - xa;
45760         if (dxa <= -0.5)
45761           xa2 += 1;
45762         else if (dxa > 0.5)
45763           xa2 -= 1;
45764         x2 = copysign (xa2, x);
45765         return x2;
45766    */
45767   machine_mode mode = GET_MODE (operand0);
45768   rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
45769   rtx_code_label *label;
45770 
45771   TWO52 = ix86_gen_TWO52 (mode);
45772 
45773   /* Temporary for holding the result, initialized to the input
45774      operand to ease control flow.  */
45775   res = gen_reg_rtx (mode);
45776   emit_move_insn (res, operand1);
45777 
45778   /* xa = abs (operand1) */
45779   xa = ix86_expand_sse_fabs (res, &mask);
45780 
45781   /* if (!isless (xa, TWO52)) goto label; */
45782   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45783 
45784   /* xa2 = xa + TWO52 - TWO52; */
45785   xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45786   xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
45787 
45788   /* dxa = xa2 - xa; */
45789   dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
45790 
45791   /* generate 0.5, 1.0 and -0.5 */
45792   half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
45793   one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
45794   mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
45795 			       0, OPTAB_DIRECT);
45796 
45797   /* Compensate.  */
45798   tmp = gen_reg_rtx (mode);
45799   /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
45800   tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
45801   emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45802   xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45803   /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
45804   tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
45805   emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
45806   xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45807 
45808   /* res = copysign (xa2, operand1) */
45809   ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
45810 
45811   emit_label (label);
45812   LABEL_NUSES (label) = 1;
45813 
45814   emit_move_insn (operand0, res);
45815 }
45816 
45817 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45818    into OPERAND0.  */
45819 void
45820 ix86_expand_trunc (rtx operand0, rtx operand1)
45821 {
45822   /* C code for SSE variant we expand below.
45823         double xa = fabs (x), x2;
45824         if (!isless (xa, TWO52))
45825           return x;
45826         x2 = (double)(long)x;
45827 	if (HONOR_SIGNED_ZEROS (mode))
45828 	  return copysign (x2, x);
45829 	return x2;
45830    */
45831   machine_mode mode = GET_MODE (operand0);
45832   rtx xa, xi, TWO52, res, mask;
45833   rtx_code_label *label;
45834 
45835   TWO52 = ix86_gen_TWO52 (mode);
45836 
45837   /* Temporary for holding the result, initialized to the input
45838      operand to ease control flow.  */
45839   res = gen_reg_rtx (mode);
45840   emit_move_insn (res, operand1);
45841 
45842   /* xa = abs (operand1) */
45843   xa = ix86_expand_sse_fabs (res, &mask);
45844 
45845   /* if (!isless (xa, TWO52)) goto label; */
45846   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45847 
45848   /* x = (double)(long)x */
45849   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45850   expand_fix (xi, res, 0);
45851   expand_float (res, xi, 0);
45852 
45853   if (HONOR_SIGNED_ZEROS (mode))
45854     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45855 
45856   emit_label (label);
45857   LABEL_NUSES (label) = 1;
45858 
45859   emit_move_insn (operand0, res);
45860 }
45861 
45862 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45863    into OPERAND0.  */
45864 void
45865 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
45866 {
45867   machine_mode mode = GET_MODE (operand0);
45868   rtx xa, mask, TWO52, one, res, smask, tmp;
45869   rtx_code_label *label;
45870 
45871   /* C code for SSE variant we expand below.
45872         double xa = fabs (x), x2;
45873         if (!isless (xa, TWO52))
45874           return x;
45875         xa2 = xa + TWO52 - TWO52;
45876      Compensate:
45877         if (xa2 > xa)
45878           xa2 -= 1.0;
45879         x2 = copysign (xa2, x);
45880         return x2;
45881    */
45882 
45883   TWO52 = ix86_gen_TWO52 (mode);
45884 
45885   /* Temporary for holding the result, initialized to the input
45886      operand to ease control flow.  */
45887   res = gen_reg_rtx (mode);
45888   emit_move_insn (res, operand1);
45889 
45890   /* xa = abs (operand1) */
45891   xa = ix86_expand_sse_fabs (res, &smask);
45892 
45893   /* if (!isless (xa, TWO52)) goto label; */
45894   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45895 
45896   /* res = xa + TWO52 - TWO52; */
45897   tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45898   tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
45899   emit_move_insn (res, tmp);
45900 
45901   /* generate 1.0 */
45902   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45903 
45904   /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
45905   mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
45906   emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
45907   tmp = expand_simple_binop (mode, MINUS,
45908 			     res, mask, NULL_RTX, 0, OPTAB_DIRECT);
45909   emit_move_insn (res, tmp);
45910 
45911   /* res = copysign (res, operand1) */
45912   ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
45913 
45914   emit_label (label);
45915   LABEL_NUSES (label) = 1;
45916 
45917   emit_move_insn (operand0, res);
45918 }
45919 
45920 /* Expand SSE sequence for computing round from OPERAND1 storing
45921    into OPERAND0.  */
45922 void
45923 ix86_expand_round (rtx operand0, rtx operand1)
45924 {
45925   /* C code for the stuff we're doing below:
45926         double xa = fabs (x);
45927         if (!isless (xa, TWO52))
45928           return x;
45929         xa = (double)(long)(xa + nextafter (0.5, 0.0));
45930         return copysign (xa, x);
45931    */
45932   machine_mode mode = GET_MODE (operand0);
45933   rtx res, TWO52, xa, xi, half, mask;
45934   rtx_code_label *label;
45935   const struct real_format *fmt;
45936   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45937 
45938   /* Temporary for holding the result, initialized to the input
45939      operand to ease control flow.  */
45940   res = gen_reg_rtx (mode);
45941   emit_move_insn (res, operand1);
45942 
45943   TWO52 = ix86_gen_TWO52 (mode);
45944   xa = ix86_expand_sse_fabs (res, &mask);
45945   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45946 
45947   /* load nextafter (0.5, 0.0) */
45948   fmt = REAL_MODE_FORMAT (mode);
45949   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45950   real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
45951 
45952   /* xa = xa + 0.5 */
45953   half = force_reg (mode, const_double_from_real_value (pred_half, mode));
45954   xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
45955 
45956   /* xa = (double)(int64_t)xa */
45957   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45958   expand_fix (xi, xa, 0);
45959   expand_float (xa, xi, 0);
45960 
45961   /* res = copysign (xa, operand1) */
45962   ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
45963 
45964   emit_label (label);
45965   LABEL_NUSES (label) = 1;
45966 
45967   emit_move_insn (operand0, res);
45968 }
45969 
45970 /* Expand SSE sequence for computing round
45971    from OP1 storing into OP0 using sse4 round insn.  */
45972 void
45973 ix86_expand_round_sse4 (rtx op0, rtx op1)
45974 {
45975   machine_mode mode = GET_MODE (op0);
45976   rtx e1, e2, res, half;
45977   const struct real_format *fmt;
45978   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45979   rtx (*gen_copysign) (rtx, rtx, rtx);
45980   rtx (*gen_round) (rtx, rtx, rtx);
45981 
45982   switch (mode)
45983     {
45984     case E_SFmode:
45985       gen_copysign = gen_copysignsf3;
45986       gen_round = gen_sse4_1_roundsf2;
45987       break;
45988     case E_DFmode:
45989       gen_copysign = gen_copysigndf3;
45990       gen_round = gen_sse4_1_rounddf2;
45991       break;
45992     default:
45993       gcc_unreachable ();
45994     }
45995 
45996   /* round (a) = trunc (a + copysign (0.5, a)) */
45997 
45998   /* load nextafter (0.5, 0.0) */
45999   fmt = REAL_MODE_FORMAT (mode);
46000   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46001   real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
46002   half = const_double_from_real_value (pred_half, mode);
46003 
46004   /* e1 = copysign (0.5, op1) */
46005   e1 = gen_reg_rtx (mode);
46006   emit_insn (gen_copysign (e1, half, op1));
46007 
46008   /* e2 = op1 + e1 */
46009   e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46010 
46011   /* res = trunc (e2) */
46012   res = gen_reg_rtx (mode);
46013   emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46014 
46015   emit_move_insn (op0, res);
46016 }
46017 
46018 
46019 /* Table of valid machine attributes.  */
46020 static const struct attribute_spec ix86_attribute_table[] =
46021 {
46022   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
46023        affects_type_identity, handler, exclude } */
46024   /* Stdcall attribute says callee is responsible for popping arguments
46025      if they are not variable.  */
46026   { "stdcall",   0, 0, false, true,  true,  true, ix86_handle_cconv_attribute,
46027     NULL },
46028   /* Fastcall attribute says callee is responsible for popping arguments
46029      if they are not variable.  */
46030   { "fastcall",  0, 0, false, true,  true,  true, ix86_handle_cconv_attribute,
46031     NULL },
46032   /* Thiscall attribute says callee is responsible for popping arguments
46033      if they are not variable.  */
46034   { "thiscall",  0, 0, false, true,  true,  true, ix86_handle_cconv_attribute,
46035     NULL },
46036   /* Cdecl attribute says the callee is a normal C declaration */
46037   { "cdecl",     0, 0, false, true,  true,  true, ix86_handle_cconv_attribute,
46038     NULL },
46039   /* Regparm attribute specifies how many integer arguments are to be
46040      passed in registers.  */
46041   { "regparm",   1, 1, false, true,  true,  true, ix86_handle_cconv_attribute,
46042     NULL },
46043   /* Sseregparm attribute says we are using x86_64 calling conventions
46044      for FP arguments.  */
46045   { "sseregparm", 0, 0, false, true, true,  true, ix86_handle_cconv_attribute,
46046     NULL },
46047   /* The transactional memory builtins are implicitly regparm or fastcall
46048      depending on the ABI.  Override the generic do-nothing attribute that
46049      these builtins were declared with.  */
46050   { "*tm regparm", 0, 0, false, true, true, true,
46051     ix86_handle_tm_regparm_attribute, NULL },
46052   /* force_align_arg_pointer says this function realigns the stack at entry.  */
46053   { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46054     false, true,  true, false, ix86_handle_force_align_arg_pointer_attribute,
46055     NULL },
46056 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46057   { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
46058     NULL },
46059   { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
46060     NULL },
46061   { "shared",    0, 0, true,  false, false, false,
46062     ix86_handle_shared_attribute, NULL },
46063 #endif
46064   { "ms_struct", 0, 0, false, false,  false, false,
46065     ix86_handle_struct_attribute, NULL },
46066   { "gcc_struct", 0, 0, false, false,  false, false,
46067     ix86_handle_struct_attribute, NULL },
46068 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46069   SUBTARGET_ATTRIBUTE_TABLE,
46070 #endif
46071   /* ms_abi and sysv_abi calling convention function attributes.  */
46072   { "ms_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, NULL },
46073   { "sysv_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute,
46074     NULL },
46075   { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL },
46076   { "sysv_abi va_list", 0, 0, false, false, false, false, NULL, NULL },
46077   { "ms_hook_prologue", 0, 0, true, false, false, false,
46078     ix86_handle_fndecl_attribute, NULL },
46079   { "callee_pop_aggregate_return", 1, 1, false, true, true, true,
46080     ix86_handle_callee_pop_aggregate_return, NULL },
46081   { "interrupt", 0, 0, false, true, true, false,
46082     ix86_handle_interrupt_attribute, NULL },
46083   { "no_caller_saved_registers", 0, 0, false, true, true, false,
46084     ix86_handle_no_caller_saved_registers_attribute, NULL },
46085   { "naked", 0, 0, true, false, false, false,
46086     ix86_handle_fndecl_attribute, NULL },
46087   { "indirect_branch", 1, 1, true, false, false, false,
46088     ix86_handle_fndecl_attribute, NULL },
46089   { "function_return", 1, 1, true, false, false, false,
46090     ix86_handle_fndecl_attribute, NULL },
46091 
46092   /* End element.  */
46093   { NULL, 0, 0, false, false, false, false, NULL, NULL }
46094 };
46095 
46096 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
46097 static int
46098 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46099                                  tree vectype, int)
46100 {
46101   bool fp = false;
46102   machine_mode mode = TImode;
46103   int index;
46104   if (vectype != NULL)
46105     {
46106       fp = FLOAT_TYPE_P (vectype);
46107       mode = TYPE_MODE (vectype);
46108     }
46109 
46110   switch (type_of_cost)
46111     {
46112       case scalar_stmt:
46113         return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
46114 
46115       case scalar_load:
46116 	/* load/store costs are relative to register move which is 2. Recompute
46117  	   it to COSTS_N_INSNS so everything have same base.  */
46118         return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
46119 			      : ix86_cost->int_load [2]) / 2;
46120 
46121       case scalar_store:
46122         return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
46123 			      : ix86_cost->int_store [2]) / 2;
46124 
46125       case vector_stmt:
46126         return ix86_vec_cost (mode,
46127 			      fp ? ix86_cost->addss : ix86_cost->sse_op,
46128 			      true);
46129 
46130       case vector_load:
46131 	index = sse_store_index (mode);
46132 	/* See PR82713 - we may end up being called on non-vector type.  */
46133 	if (index < 0)
46134 	  index = 2;
46135         return ix86_vec_cost (mode,
46136 			      COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2,
46137 			      true);
46138 
46139       case vector_store:
46140 	index = sse_store_index (mode);
46141 	/* See PR82713 - we may end up being called on non-vector type.  */
46142 	if (index < 0)
46143 	  index = 2;
46144         return ix86_vec_cost (mode,
46145 			      COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2,
46146 			      true);
46147 
46148       case vec_to_scalar:
46149       case scalar_to_vec:
46150         return ix86_vec_cost (mode, ix86_cost->sse_op, true);
46151 
46152       /* We should have separate costs for unaligned loads and gather/scatter.
46153 	 Do that incrementally.  */
46154       case unaligned_load:
46155 	index = sse_store_index (mode);
46156 	/* See PR82713 - we may end up being called on non-vector type.  */
46157 	if (index < 0)
46158 	  index = 2;
46159         return ix86_vec_cost (mode,
46160 			      COSTS_N_INSNS
46161 				 (ix86_cost->sse_unaligned_load[index]) / 2,
46162 			      true);
46163 
46164       case unaligned_store:
46165 	index = sse_store_index (mode);
46166 	/* See PR82713 - we may end up being called on non-vector type.  */
46167 	if (index < 0)
46168 	  index = 2;
46169         return ix86_vec_cost (mode,
46170 			      COSTS_N_INSNS
46171 				 (ix86_cost->sse_unaligned_store[index]) / 2,
46172 			      true);
46173 
46174       case vector_gather_load:
46175         return ix86_vec_cost (mode,
46176 			      COSTS_N_INSNS
46177 				 (ix86_cost->gather_static
46178 				  + ix86_cost->gather_per_elt
46179 				    * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
46180 			      true);
46181 
46182       case vector_scatter_store:
46183         return ix86_vec_cost (mode,
46184 			      COSTS_N_INSNS
46185 				 (ix86_cost->scatter_static
46186 				  + ix86_cost->scatter_per_elt
46187 				    * TYPE_VECTOR_SUBPARTS (vectype)) / 2,
46188 			      true);
46189 
46190       case cond_branch_taken:
46191         return ix86_cost->cond_taken_branch_cost;
46192 
46193       case cond_branch_not_taken:
46194         return ix86_cost->cond_not_taken_branch_cost;
46195 
46196       case vec_perm:
46197       case vec_promote_demote:
46198         return ix86_vec_cost (mode,
46199 			      ix86_cost->sse_op, true);
46200 
46201       case vec_construct:
46202 	{
46203 	  /* N element inserts.  */
46204 	  int cost = ix86_vec_cost (mode, ix86_cost->sse_op, false);
46205 	  /* One vinserti128 for combining two SSE vectors for AVX256.  */
46206 	  if (GET_MODE_BITSIZE (mode) == 256)
46207 	    cost += ix86_vec_cost (mode, ix86_cost->addss, true);
46208 	  /* One vinserti64x4 and two vinserti128 for combining SSE
46209 	     and AVX256 vectors to AVX512.  */
46210 	  else if (GET_MODE_BITSIZE (mode) == 512)
46211 	    cost += 3 * ix86_vec_cost (mode, ix86_cost->addss, true);
46212 	  return cost;
46213 	}
46214 
46215       default:
46216         gcc_unreachable ();
46217     }
46218 }
46219 
46220 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46221    insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46222    insn every time.  */
46223 
46224 static GTY(()) rtx_insn *vselect_insn;
46225 
46226 /* Initialize vselect_insn.  */
46227 
46228 static void
46229 init_vselect_insn (void)
46230 {
46231   unsigned i;
46232   rtx x;
46233 
46234   x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46235   for (i = 0; i < MAX_VECT_LEN; ++i)
46236     XVECEXP (x, 0, i) = const0_rtx;
46237   x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46238 							const0_rtx), x);
46239   x = gen_rtx_SET (const0_rtx, x);
46240   start_sequence ();
46241   vselect_insn = emit_insn (x);
46242   end_sequence ();
46243 }
46244 
46245 /* Construct (set target (vec_select op0 (parallel perm))) and
46246    return true if that's a valid instruction in the active ISA.  */
46247 
46248 static bool
46249 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46250 		unsigned nelt, bool testing_p)
46251 {
46252   unsigned int i;
46253   rtx x, save_vconcat;
46254   int icode;
46255 
46256   if (vselect_insn == NULL_RTX)
46257     init_vselect_insn ();
46258 
46259   x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46260   PUT_NUM_ELEM (XVEC (x, 0), nelt);
46261   for (i = 0; i < nelt; ++i)
46262     XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46263   save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46264   XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46265   PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46266   SET_DEST (PATTERN (vselect_insn)) = target;
46267   icode = recog_memoized (vselect_insn);
46268 
46269   if (icode >= 0 && !testing_p)
46270     emit_insn (copy_rtx (PATTERN (vselect_insn)));
46271 
46272   SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46273   XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46274   INSN_CODE (vselect_insn) = -1;
46275 
46276   return icode >= 0;
46277 }
46278 
46279 /* Similar, but generate a vec_concat from op0 and op1 as well.  */
46280 
46281 static bool
46282 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46283 			const unsigned char *perm, unsigned nelt,
46284 			bool testing_p)
46285 {
46286   machine_mode v2mode;
46287   rtx x;
46288   bool ok;
46289 
46290   if (vselect_insn == NULL_RTX)
46291     init_vselect_insn ();
46292 
46293   if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
46294     return false;
46295   x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46296   PUT_MODE (x, v2mode);
46297   XEXP (x, 0) = op0;
46298   XEXP (x, 1) = op1;
46299   ok = expand_vselect (target, x, perm, nelt, testing_p);
46300   XEXP (x, 0) = const0_rtx;
46301   XEXP (x, 1) = const0_rtx;
46302   return ok;
46303 }
46304 
46305 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
46306    in terms of blendp[sd] / pblendw / pblendvb / vpblendd.  */
46307 
46308 static bool
46309 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46310 {
46311   machine_mode mmode, vmode = d->vmode;
46312   unsigned i, mask, nelt = d->nelt;
46313   rtx target, op0, op1, maskop, x;
46314   rtx rperm[32], vperm;
46315 
46316   if (d->one_operand_p)
46317     return false;
46318   if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46319       && (TARGET_AVX512BW
46320 	  || GET_MODE_UNIT_SIZE (vmode) >= 4))
46321     ;
46322   else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46323     ;
46324   else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46325     ;
46326   else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46327     ;
46328   else
46329     return false;
46330 
46331   /* This is a blend, not a permute.  Elements must stay in their
46332      respective lanes.  */
46333   for (i = 0; i < nelt; ++i)
46334     {
46335       unsigned e = d->perm[i];
46336       if (!(e == i || e == i + nelt))
46337 	return false;
46338     }
46339 
46340   if (d->testing_p)
46341     return true;
46342 
46343   /* ??? Without SSE4.1, we could implement this with and/andn/or.  This
46344      decision should be extracted elsewhere, so that we only try that
46345      sequence once all budget==3 options have been tried.  */
46346   target = d->target;
46347   op0 = d->op0;
46348   op1 = d->op1;
46349   mask = 0;
46350 
46351   switch (vmode)
46352     {
46353     case E_V8DFmode:
46354     case E_V16SFmode:
46355     case E_V4DFmode:
46356     case E_V8SFmode:
46357     case E_V2DFmode:
46358     case E_V4SFmode:
46359     case E_V8HImode:
46360     case E_V8SImode:
46361     case E_V32HImode:
46362     case E_V64QImode:
46363     case E_V16SImode:
46364     case E_V8DImode:
46365       for (i = 0; i < nelt; ++i)
46366 	mask |= (d->perm[i] >= nelt) << i;
46367       break;
46368 
46369     case E_V2DImode:
46370       for (i = 0; i < 2; ++i)
46371 	mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46372       vmode = V8HImode;
46373       goto do_subreg;
46374 
46375     case E_V4SImode:
46376       for (i = 0; i < 4; ++i)
46377 	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46378       vmode = V8HImode;
46379       goto do_subreg;
46380 
46381     case E_V16QImode:
46382       /* See if bytes move in pairs so we can use pblendw with
46383 	 an immediate argument, rather than pblendvb with a vector
46384 	 argument.  */
46385       for (i = 0; i < 16; i += 2)
46386 	if (d->perm[i] + 1 != d->perm[i + 1])
46387 	  {
46388 	  use_pblendvb:
46389 	    for (i = 0; i < nelt; ++i)
46390 	      rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46391 
46392 	  finish_pblendvb:
46393 	    vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46394 	    vperm = force_reg (vmode, vperm);
46395 
46396 	    if (GET_MODE_SIZE (vmode) == 16)
46397 	      emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46398 	    else
46399 	      emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46400 	    if (target != d->target)
46401 	      emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46402 	    return true;
46403 	  }
46404 
46405       for (i = 0; i < 8; ++i)
46406 	mask |= (d->perm[i * 2] >= 16) << i;
46407       vmode = V8HImode;
46408       /* FALLTHRU */
46409 
46410     do_subreg:
46411       target = gen_reg_rtx (vmode);
46412       op0 = gen_lowpart (vmode, op0);
46413       op1 = gen_lowpart (vmode, op1);
46414       break;
46415 
46416     case E_V32QImode:
46417       /* See if bytes move in pairs.  If not, vpblendvb must be used.  */
46418       for (i = 0; i < 32; i += 2)
46419 	if (d->perm[i] + 1 != d->perm[i + 1])
46420 	  goto use_pblendvb;
46421       /* See if bytes move in quadruplets.  If yes, vpblendd
46422 	 with immediate can be used.  */
46423       for (i = 0; i < 32; i += 4)
46424 	if (d->perm[i] + 2 != d->perm[i + 2])
46425 	  break;
46426       if (i < 32)
46427 	{
46428 	  /* See if bytes move the same in both lanes.  If yes,
46429 	     vpblendw with immediate can be used.  */
46430 	  for (i = 0; i < 16; i += 2)
46431 	    if (d->perm[i] + 16 != d->perm[i + 16])
46432 	      goto use_pblendvb;
46433 
46434 	  /* Use vpblendw.  */
46435 	  for (i = 0; i < 16; ++i)
46436 	    mask |= (d->perm[i * 2] >= 32) << i;
46437 	  vmode = V16HImode;
46438 	  goto do_subreg;
46439 	}
46440 
46441       /* Use vpblendd.  */
46442       for (i = 0; i < 8; ++i)
46443 	mask |= (d->perm[i * 4] >= 32) << i;
46444       vmode = V8SImode;
46445       goto do_subreg;
46446 
46447     case E_V16HImode:
46448       /* See if words move in pairs.  If yes, vpblendd can be used.  */
46449       for (i = 0; i < 16; i += 2)
46450 	if (d->perm[i] + 1 != d->perm[i + 1])
46451 	  break;
46452       if (i < 16)
46453 	{
46454 	  /* See if words move the same in both lanes.  If not,
46455 	     vpblendvb must be used.  */
46456 	  for (i = 0; i < 8; i++)
46457 	    if (d->perm[i] + 8 != d->perm[i + 8])
46458 	      {
46459 		/* Use vpblendvb.  */
46460 		for (i = 0; i < 32; ++i)
46461 		  rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46462 
46463 		vmode = V32QImode;
46464 		nelt = 32;
46465 		target = gen_reg_rtx (vmode);
46466 		op0 = gen_lowpart (vmode, op0);
46467 		op1 = gen_lowpart (vmode, op1);
46468 		goto finish_pblendvb;
46469 	      }
46470 
46471 	  /* Use vpblendw.  */
46472 	  for (i = 0; i < 16; ++i)
46473 	    mask |= (d->perm[i] >= 16) << i;
46474 	  break;
46475 	}
46476 
46477       /* Use vpblendd.  */
46478       for (i = 0; i < 8; ++i)
46479 	mask |= (d->perm[i * 2] >= 16) << i;
46480       vmode = V8SImode;
46481       goto do_subreg;
46482 
46483     case E_V4DImode:
46484       /* Use vpblendd.  */
46485       for (i = 0; i < 4; ++i)
46486 	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46487       vmode = V8SImode;
46488       goto do_subreg;
46489 
46490     default:
46491       gcc_unreachable ();
46492     }
46493 
46494   switch (vmode)
46495     {
46496     case E_V8DFmode:
46497     case E_V8DImode:
46498       mmode = QImode;
46499       break;
46500     case E_V16SFmode:
46501     case E_V16SImode:
46502       mmode = HImode;
46503       break;
46504     case E_V32HImode:
46505       mmode = SImode;
46506       break;
46507     case E_V64QImode:
46508       mmode = DImode;
46509       break;
46510     default:
46511       mmode = VOIDmode;
46512     }
46513 
46514   if (mmode != VOIDmode)
46515     maskop = force_reg (mmode, gen_int_mode (mask, mmode));
46516   else
46517     maskop = GEN_INT (mask);
46518 
46519   /* This matches five different patterns with the different modes.  */
46520   x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
46521   x = gen_rtx_SET (target, x);
46522   emit_insn (x);
46523   if (target != d->target)
46524     emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46525 
46526   return true;
46527 }
46528 
46529 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
46530    in terms of the variable form of vpermilps.
46531 
46532    Note that we will have already failed the immediate input vpermilps,
46533    which requires that the high and low part shuffle be identical; the
46534    variable form doesn't require that.  */
46535 
46536 static bool
46537 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46538 {
46539   rtx rperm[8], vperm;
46540   unsigned i;
46541 
46542   if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46543     return false;
46544 
46545   /* We can only permute within the 128-bit lane.  */
46546   for (i = 0; i < 8; ++i)
46547     {
46548       unsigned e = d->perm[i];
46549       if (i < 4 ? e >= 4 : e < 4)
46550 	return false;
46551     }
46552 
46553   if (d->testing_p)
46554     return true;
46555 
46556   for (i = 0; i < 8; ++i)
46557     {
46558       unsigned e = d->perm[i];
46559 
46560       /* Within each 128-bit lane, the elements of op0 are numbered
46561 	 from 0 and the elements of op1 are numbered from 4.  */
46562       if (e >= 8 + 4)
46563 	e -= 8;
46564       else if (e >= 4)
46565 	e -= 4;
46566 
46567       rperm[i] = GEN_INT (e);
46568     }
46569 
46570   vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46571   vperm = force_reg (V8SImode, vperm);
46572   emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46573 
46574   return true;
46575 }
46576 
46577 /* Return true if permutation D can be performed as VMODE permutation
46578    instead.  */
46579 
46580 static bool
46581 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46582 {
46583   unsigned int i, j, chunk;
46584 
46585   if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46586       || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46587       || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46588     return false;
46589 
46590   if (GET_MODE_NUNITS (vmode) >= d->nelt)
46591     return true;
46592 
46593   chunk = d->nelt / GET_MODE_NUNITS (vmode);
46594   for (i = 0; i < d->nelt; i += chunk)
46595     if (d->perm[i] & (chunk - 1))
46596       return false;
46597     else
46598       for (j = 1; j < chunk; ++j)
46599 	if (d->perm[i] + j != d->perm[i + j])
46600 	  return false;
46601 
46602   return true;
46603 }
46604 
46605 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
46606    in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128.  */
46607 
46608 static bool
46609 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46610 {
46611   unsigned i, nelt, eltsz, mask;
46612   unsigned char perm[64];
46613   machine_mode vmode = V16QImode;
46614   rtx rperm[64], vperm, target, op0, op1;
46615 
46616   nelt = d->nelt;
46617 
46618   if (!d->one_operand_p)
46619     {
46620       if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46621 	{
46622 	  if (TARGET_AVX2
46623 	      && valid_perm_using_mode_p (V2TImode, d))
46624 	    {
46625 	      if (d->testing_p)
46626 		return true;
46627 
46628 	      /* Use vperm2i128 insn.  The pattern uses
46629 		 V4DImode instead of V2TImode.  */
46630 	      target = d->target;
46631 	      if (d->vmode != V4DImode)
46632 		target = gen_reg_rtx (V4DImode);
46633 	      op0 = gen_lowpart (V4DImode, d->op0);
46634 	      op1 = gen_lowpart (V4DImode, d->op1);
46635 	      rperm[0]
46636 		= GEN_INT ((d->perm[0] / (nelt / 2))
46637 			   | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
46638 	      emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
46639 	      if (target != d->target)
46640 		emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46641 	      return true;
46642 	    }
46643 	  return false;
46644 	}
46645     }
46646   else
46647     {
46648       if (GET_MODE_SIZE (d->vmode) == 16)
46649 	{
46650 	  if (!TARGET_SSSE3)
46651 	    return false;
46652 	}
46653       else if (GET_MODE_SIZE (d->vmode) == 32)
46654 	{
46655 	  if (!TARGET_AVX2)
46656 	    return false;
46657 
46658 	  /* V4DImode should be already handled through
46659 	     expand_vselect by vpermq instruction.  */
46660 	  gcc_assert (d->vmode != V4DImode);
46661 
46662 	  vmode = V32QImode;
46663 	  if (d->vmode == V8SImode
46664 	      || d->vmode == V16HImode
46665 	      || d->vmode == V32QImode)
46666 	    {
46667 	      /* First see if vpermq can be used for
46668 		 V8SImode/V16HImode/V32QImode.  */
46669 	      if (valid_perm_using_mode_p (V4DImode, d))
46670 		{
46671 		  for (i = 0; i < 4; i++)
46672 		    perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
46673 		  if (d->testing_p)
46674 		    return true;
46675 		  target = gen_reg_rtx (V4DImode);
46676 		  if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
46677 				      perm, 4, false))
46678 		    {
46679 		      emit_move_insn (d->target,
46680 				      gen_lowpart (d->vmode, target));
46681 		      return true;
46682 		    }
46683 		  return false;
46684 		}
46685 
46686 	      /* Next see if vpermd can be used.  */
46687 	      if (valid_perm_using_mode_p (V8SImode, d))
46688 		vmode = V8SImode;
46689 	    }
46690 	  /* Or if vpermps can be used.  */
46691 	  else if (d->vmode == V8SFmode)
46692 	    vmode = V8SImode;
46693 
46694 	  if (vmode == V32QImode)
46695 	    {
46696 	      /* vpshufb only works intra lanes, it is not
46697 		 possible to shuffle bytes in between the lanes.  */
46698 	      for (i = 0; i < nelt; ++i)
46699 		if ((d->perm[i] ^ i) & (nelt / 2))
46700 		  return false;
46701 	    }
46702 	}
46703       else if (GET_MODE_SIZE (d->vmode) == 64)
46704 	{
46705 	  if (!TARGET_AVX512BW)
46706 	    return false;
46707 
46708 	  /* If vpermq didn't work, vpshufb won't work either.  */
46709 	  if (d->vmode == V8DFmode || d->vmode == V8DImode)
46710 	    return false;
46711 
46712 	  vmode = V64QImode;
46713 	  if (d->vmode == V16SImode
46714 	      || d->vmode == V32HImode
46715 	      || d->vmode == V64QImode)
46716 	    {
46717 	      /* First see if vpermq can be used for
46718 		 V16SImode/V32HImode/V64QImode.  */
46719 	      if (valid_perm_using_mode_p (V8DImode, d))
46720 		{
46721 		  for (i = 0; i < 8; i++)
46722 		    perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
46723 		  if (d->testing_p)
46724 		    return true;
46725 		  target = gen_reg_rtx (V8DImode);
46726 		  if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
46727 				      perm, 8, false))
46728 		    {
46729 		      emit_move_insn (d->target,
46730 				      gen_lowpart (d->vmode, target));
46731 		      return true;
46732 		    }
46733 		  return false;
46734 		}
46735 
46736 	      /* Next see if vpermd can be used.  */
46737 	      if (valid_perm_using_mode_p (V16SImode, d))
46738 		vmode = V16SImode;
46739 	    }
46740 	  /* Or if vpermps can be used.  */
46741 	  else if (d->vmode == V16SFmode)
46742 	    vmode = V16SImode;
46743 	  if (vmode == V64QImode)
46744 	    {
46745 	      /* vpshufb only works intra lanes, it is not
46746 		 possible to shuffle bytes in between the lanes.  */
46747 	      for (i = 0; i < nelt; ++i)
46748 		if ((d->perm[i] ^ i) & (nelt / 4))
46749 		  return false;
46750 	    }
46751 	}
46752       else
46753 	return false;
46754     }
46755 
46756   if (d->testing_p)
46757     return true;
46758 
46759   if (vmode == V8SImode)
46760     for (i = 0; i < 8; ++i)
46761       rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
46762   else if (vmode == V16SImode)
46763     for (i = 0; i < 16; ++i)
46764       rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
46765   else
46766     {
46767       eltsz = GET_MODE_UNIT_SIZE (d->vmode);
46768       if (!d->one_operand_p)
46769 	mask = 2 * nelt - 1;
46770       else if (vmode == V16QImode)
46771 	mask = nelt - 1;
46772       else if (vmode == V64QImode)
46773 	mask = nelt / 4 - 1;
46774       else
46775 	mask = nelt / 2 - 1;
46776 
46777       for (i = 0; i < nelt; ++i)
46778 	{
46779 	  unsigned j, e = d->perm[i] & mask;
46780 	  for (j = 0; j < eltsz; ++j)
46781 	    rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
46782 	}
46783     }
46784 
46785   vperm = gen_rtx_CONST_VECTOR (vmode,
46786 				gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
46787   vperm = force_reg (vmode, vperm);
46788 
46789   target = d->target;
46790   if (d->vmode != vmode)
46791     target = gen_reg_rtx (vmode);
46792   op0 = gen_lowpart (vmode, d->op0);
46793   if (d->one_operand_p)
46794     {
46795       if (vmode == V16QImode)
46796 	emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
46797       else if (vmode == V32QImode)
46798 	emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
46799       else if (vmode == V64QImode)
46800 	emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
46801       else if (vmode == V8SFmode)
46802 	emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
46803       else if (vmode == V8SImode)
46804 	emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
46805       else if (vmode == V16SFmode)
46806 	emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
46807       else if (vmode == V16SImode)
46808 	emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
46809       else
46810 	gcc_unreachable ();
46811     }
46812   else
46813     {
46814       op1 = gen_lowpart (vmode, d->op1);
46815       emit_insn (gen_xop_pperm (target, op0, op1, vperm));
46816     }
46817   if (target != d->target)
46818     emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46819 
46820   return true;
46821 }
46822 
46823 /* For V*[QHS]Imode permutations, check if the same permutation
46824    can't be performed in a 2x, 4x or 8x wider inner mode.  */
46825 
46826 static bool
46827 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
46828 			      struct expand_vec_perm_d *nd)
46829 {
46830   int i;
46831   machine_mode mode = VOIDmode;
46832 
46833   switch (d->vmode)
46834     {
46835     case E_V16QImode: mode = V8HImode; break;
46836     case E_V32QImode: mode = V16HImode; break;
46837     case E_V64QImode: mode = V32HImode; break;
46838     case E_V8HImode: mode = V4SImode; break;
46839     case E_V16HImode: mode = V8SImode; break;
46840     case E_V32HImode: mode = V16SImode; break;
46841     case E_V4SImode: mode = V2DImode; break;
46842     case E_V8SImode: mode = V4DImode; break;
46843     case E_V16SImode: mode = V8DImode; break;
46844     default: return false;
46845     }
46846   for (i = 0; i < d->nelt; i += 2)
46847     if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
46848       return false;
46849   nd->vmode = mode;
46850   nd->nelt = d->nelt / 2;
46851   for (i = 0; i < nd->nelt; i++)
46852     nd->perm[i] = d->perm[2 * i] / 2;
46853   if (GET_MODE_INNER (mode) != DImode)
46854     canonicalize_vector_int_perm (nd, nd);
46855   if (nd != d)
46856     {
46857       nd->one_operand_p = d->one_operand_p;
46858       nd->testing_p = d->testing_p;
46859       if (d->op0 == d->op1)
46860 	nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
46861       else
46862 	{
46863 	  nd->op0 = gen_lowpart (nd->vmode, d->op0);
46864 	  nd->op1 = gen_lowpart (nd->vmode, d->op1);
46865 	}
46866       if (d->testing_p)
46867 	nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
46868       else
46869 	nd->target = gen_reg_rtx (nd->vmode);
46870     }
46871   return true;
46872 }
46873 
46874 /* Try to expand one-operand permutation with constant mask.  */
46875 
46876 static bool
46877 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d)
46878 {
46879   machine_mode mode = GET_MODE (d->op0);
46880   machine_mode maskmode = mode;
46881   rtx (*gen) (rtx, rtx, rtx) = NULL;
46882   rtx target, op0, mask;
46883   rtx vec[64];
46884 
46885   if (!rtx_equal_p (d->op0, d->op1))
46886     return false;
46887 
46888   if (!TARGET_AVX512F)
46889     return false;
46890 
46891   switch (mode)
46892     {
46893     case E_V16SImode:
46894       gen = gen_avx512f_permvarv16si;
46895       break;
46896     case E_V16SFmode:
46897       gen = gen_avx512f_permvarv16sf;
46898       maskmode = V16SImode;
46899       break;
46900     case E_V8DImode:
46901       gen = gen_avx512f_permvarv8di;
46902       break;
46903     case E_V8DFmode:
46904       gen = gen_avx512f_permvarv8df;
46905       maskmode = V8DImode;
46906       break;
46907     default:
46908       return false;
46909     }
46910 
46911   target = d->target;
46912   op0 = d->op0;
46913   for (int i = 0; i < d->nelt; ++i)
46914     vec[i] = GEN_INT (d->perm[i]);
46915   mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
46916   emit_insn (gen (target, op0, force_reg (maskmode, mask)));
46917   return true;
46918 }
46919 
46920 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
46921    in a single instruction.  */
46922 
46923 static bool
46924 expand_vec_perm_1 (struct expand_vec_perm_d *d)
46925 {
46926   unsigned i, nelt = d->nelt;
46927   struct expand_vec_perm_d nd;
46928 
46929   /* Check plain VEC_SELECT first, because AVX has instructions that could
46930      match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
46931      input where SEL+CONCAT may not.  */
46932   if (d->one_operand_p)
46933     {
46934       int mask = nelt - 1;
46935       bool identity_perm = true;
46936       bool broadcast_perm = true;
46937 
46938       for (i = 0; i < nelt; i++)
46939 	{
46940 	  nd.perm[i] = d->perm[i] & mask;
46941 	  if (nd.perm[i] != i)
46942 	    identity_perm = false;
46943 	  if (nd.perm[i])
46944 	    broadcast_perm = false;
46945 	}
46946 
46947       if (identity_perm)
46948 	{
46949 	  if (!d->testing_p)
46950 	    emit_move_insn (d->target, d->op0);
46951 	  return true;
46952 	}
46953       else if (broadcast_perm && TARGET_AVX2)
46954 	{
46955 	  /* Use vpbroadcast{b,w,d}.  */
46956 	  rtx (*gen) (rtx, rtx) = NULL;
46957 	  switch (d->vmode)
46958 	    {
46959 	    case E_V64QImode:
46960 	      if (TARGET_AVX512BW)
46961 		gen = gen_avx512bw_vec_dupv64qi_1;
46962 	      break;
46963 	    case E_V32QImode:
46964 	      gen = gen_avx2_pbroadcastv32qi_1;
46965 	      break;
46966 	    case E_V32HImode:
46967 	      if (TARGET_AVX512BW)
46968 		gen = gen_avx512bw_vec_dupv32hi_1;
46969 	      break;
46970 	    case E_V16HImode:
46971 	      gen = gen_avx2_pbroadcastv16hi_1;
46972 	      break;
46973 	    case E_V16SImode:
46974 	      if (TARGET_AVX512F)
46975 		gen = gen_avx512f_vec_dupv16si_1;
46976 	      break;
46977 	    case E_V8SImode:
46978 	      gen = gen_avx2_pbroadcastv8si_1;
46979 	      break;
46980 	    case E_V16QImode:
46981 	      gen = gen_avx2_pbroadcastv16qi;
46982 	      break;
46983 	    case E_V8HImode:
46984 	      gen = gen_avx2_pbroadcastv8hi;
46985 	      break;
46986 	    case E_V16SFmode:
46987 	      if (TARGET_AVX512F)
46988 		gen = gen_avx512f_vec_dupv16sf_1;
46989 	      break;
46990 	    case E_V8SFmode:
46991 	      gen = gen_avx2_vec_dupv8sf_1;
46992 	      break;
46993 	    case E_V8DFmode:
46994 	      if (TARGET_AVX512F)
46995 		gen = gen_avx512f_vec_dupv8df_1;
46996 	      break;
46997 	    case E_V8DImode:
46998 	      if (TARGET_AVX512F)
46999 		gen = gen_avx512f_vec_dupv8di_1;
47000 	      break;
47001 	    /* For other modes prefer other shuffles this function creates.  */
47002 	    default: break;
47003 	    }
47004 	  if (gen != NULL)
47005 	    {
47006 	      if (!d->testing_p)
47007 		emit_insn (gen (d->target, d->op0));
47008 	      return true;
47009 	    }
47010 	}
47011 
47012       if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
47013 	return true;
47014 
47015       /* There are plenty of patterns in sse.md that are written for
47016 	 SEL+CONCAT and are not replicated for a single op.  Perhaps
47017 	 that should be changed, to avoid the nastiness here.  */
47018 
47019       /* Recognize interleave style patterns, which means incrementing
47020 	 every other permutation operand.  */
47021       for (i = 0; i < nelt; i += 2)
47022 	{
47023 	  nd.perm[i] = d->perm[i] & mask;
47024 	  nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
47025 	}
47026       if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
47027 				  d->testing_p))
47028 	return true;
47029 
47030       /* Recognize shufps, which means adding {0, 0, nelt, nelt}.  */
47031       if (nelt >= 4)
47032 	{
47033 	  for (i = 0; i < nelt; i += 4)
47034 	    {
47035 	      nd.perm[i + 0] = d->perm[i + 0] & mask;
47036 	      nd.perm[i + 1] = d->perm[i + 1] & mask;
47037 	      nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
47038 	      nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
47039 	    }
47040 
47041 	  if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
47042 				      d->testing_p))
47043 	    return true;
47044 	}
47045     }
47046 
47047   /* Finally, try the fully general two operand permute.  */
47048   if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47049 			      d->testing_p))
47050     return true;
47051 
47052   /* Recognize interleave style patterns with reversed operands.  */
47053   if (!d->one_operand_p)
47054     {
47055       for (i = 0; i < nelt; ++i)
47056 	{
47057 	  unsigned e = d->perm[i];
47058 	  if (e >= nelt)
47059 	    e -= nelt;
47060 	  else
47061 	    e += nelt;
47062 	  nd.perm[i] = e;
47063 	}
47064 
47065       if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
47066 				  d->testing_p))
47067 	return true;
47068     }
47069 
47070   /* Try the SSE4.1 blend variable merge instructions.  */
47071   if (expand_vec_perm_blend (d))
47072     return true;
47073 
47074   /* Try one of the AVX vpermil variable permutations.  */
47075   if (expand_vec_perm_vpermil (d))
47076     return true;
47077 
47078   /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47079      vpshufb, vpermd, vpermps or vpermq variable permutation.  */
47080   if (expand_vec_perm_pshufb (d))
47081     return true;
47082 
47083   /* Try the AVX2 vpalignr instruction.  */
47084   if (expand_vec_perm_palignr (d, true))
47085     return true;
47086 
47087   /* Try the AVX512F vperm{s,d} instructions.  */
47088   if (ix86_expand_vec_one_operand_perm_avx512 (d))
47089     return true;
47090 
47091   /* Try the AVX512F vpermt2/vpermi2 instructions.  */
47092   if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47093     return true;
47094 
47095   /* See if we can get the same permutation in different vector integer
47096      mode.  */
47097   if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
47098     {
47099       if (!d->testing_p)
47100 	emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
47101       return true;
47102     }
47103   return false;
47104 }
47105 
47106 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
47107    in terms of a pair of pshuflw + pshufhw instructions.  */
47108 
47109 static bool
47110 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47111 {
47112   unsigned char perm2[MAX_VECT_LEN];
47113   unsigned i;
47114   bool ok;
47115 
47116   if (d->vmode != V8HImode || !d->one_operand_p)
47117     return false;
47118 
47119   /* The two permutations only operate in 64-bit lanes.  */
47120   for (i = 0; i < 4; ++i)
47121     if (d->perm[i] >= 4)
47122       return false;
47123   for (i = 4; i < 8; ++i)
47124     if (d->perm[i] < 4)
47125       return false;
47126 
47127   if (d->testing_p)
47128     return true;
47129 
47130   /* Emit the pshuflw.  */
47131   memcpy (perm2, d->perm, 4);
47132   for (i = 4; i < 8; ++i)
47133     perm2[i] = i;
47134   ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47135   gcc_assert (ok);
47136 
47137   /* Emit the pshufhw.  */
47138   memcpy (perm2 + 4, d->perm + 4, 4);
47139   for (i = 0; i < 4; ++i)
47140     perm2[i] = i;
47141   ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47142   gcc_assert (ok);
47143 
47144   return true;
47145 }
47146 
47147 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
47148    the permutation using the SSSE3 palignr instruction.  This succeeds
47149    when all of the elements in PERM fit within one vector and we merely
47150    need to shift them down so that a single vector permutation has a
47151    chance to succeed.  If SINGLE_INSN_ONLY_P, succeed if only
47152    the vpalignr instruction itself can perform the requested permutation.  */
47153 
47154 static bool
47155 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47156 {
47157   unsigned i, nelt = d->nelt;
47158   unsigned min, max, minswap, maxswap;
47159   bool in_order, ok, swap = false;
47160   rtx shift, target;
47161   struct expand_vec_perm_d dcopy;
47162 
47163   /* Even with AVX, palignr only operates on 128-bit vectors,
47164      in AVX2 palignr operates on both 128-bit lanes.  */
47165   if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47166       && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47167     return false;
47168 
47169   min = 2 * nelt;
47170   max = 0;
47171   minswap = 2 * nelt;
47172   maxswap = 0;
47173   for (i = 0; i < nelt; ++i)
47174     {
47175       unsigned e = d->perm[i];
47176       unsigned eswap = d->perm[i] ^ nelt;
47177       if (GET_MODE_SIZE (d->vmode) == 32)
47178 	{
47179 	  e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47180 	  eswap = e ^ (nelt / 2);
47181 	}
47182       if (e < min)
47183 	min = e;
47184       if (e > max)
47185 	max = e;
47186       if (eswap < minswap)
47187 	minswap = eswap;
47188       if (eswap > maxswap)
47189 	maxswap = eswap;
47190     }
47191   if (min == 0
47192       || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47193     {
47194       if (d->one_operand_p
47195 	  || minswap == 0
47196 	  || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47197 				   ? nelt / 2 : nelt))
47198 	return false;
47199       swap = true;
47200       min = minswap;
47201       max = maxswap;
47202     }
47203 
47204   /* Given that we have SSSE3, we know we'll be able to implement the
47205      single operand permutation after the palignr with pshufb for
47206      128-bit vectors.  If SINGLE_INSN_ONLY_P, in_order has to be computed
47207      first.  */
47208   if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47209     return true;
47210 
47211   dcopy = *d;
47212   if (swap)
47213     {
47214       dcopy.op0 = d->op1;
47215       dcopy.op1 = d->op0;
47216       for (i = 0; i < nelt; ++i)
47217 	dcopy.perm[i] ^= nelt;
47218     }
47219 
47220   in_order = true;
47221   for (i = 0; i < nelt; ++i)
47222     {
47223       unsigned e = dcopy.perm[i];
47224       if (GET_MODE_SIZE (d->vmode) == 32
47225 	  && e >= nelt
47226 	  && (e & (nelt / 2 - 1)) < min)
47227 	e = e - min - (nelt / 2);
47228       else
47229 	e = e - min;
47230       if (e != i)
47231 	in_order = false;
47232       dcopy.perm[i] = e;
47233     }
47234   dcopy.one_operand_p = true;
47235 
47236   if (single_insn_only_p && !in_order)
47237     return false;
47238 
47239   /* For AVX2, test whether we can permute the result in one instruction.  */
47240   if (d->testing_p)
47241     {
47242       if (in_order)
47243 	return true;
47244       dcopy.op1 = dcopy.op0;
47245       return expand_vec_perm_1 (&dcopy);
47246     }
47247 
47248   shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
47249   if (GET_MODE_SIZE (d->vmode) == 16)
47250     {
47251       target = gen_reg_rtx (TImode);
47252       emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47253 				      gen_lowpart (TImode, dcopy.op0), shift));
47254     }
47255   else
47256     {
47257       target = gen_reg_rtx (V2TImode);
47258       emit_insn (gen_avx2_palignrv2ti (target,
47259 				       gen_lowpart (V2TImode, dcopy.op1),
47260 				       gen_lowpart (V2TImode, dcopy.op0),
47261 				       shift));
47262     }
47263 
47264   dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47265 
47266   /* Test for the degenerate case where the alignment by itself
47267      produces the desired permutation.  */
47268   if (in_order)
47269     {
47270       emit_move_insn (d->target, dcopy.op0);
47271       return true;
47272     }
47273 
47274   ok = expand_vec_perm_1 (&dcopy);
47275   gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47276 
47277   return ok;
47278 }
47279 
47280 /* A subroutine of ix86_expand_vec_perm_const_1.  Try to simplify
47281    the permutation using the SSE4_1 pblendv instruction.  Potentially
47282    reduces permutation from 2 pshufb and or to 1 pshufb and pblendv.  */
47283 
47284 static bool
47285 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47286 {
47287   unsigned i, which, nelt = d->nelt;
47288   struct expand_vec_perm_d dcopy, dcopy1;
47289   machine_mode vmode = d->vmode;
47290   bool ok;
47291 
47292   /* Use the same checks as in expand_vec_perm_blend.  */
47293   if (d->one_operand_p)
47294     return false;
47295   if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47296     ;
47297   else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47298     ;
47299   else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47300     ;
47301   else
47302     return false;
47303 
47304   /* Figure out where permutation elements stay not in their
47305      respective lanes.  */
47306   for (i = 0, which = 0; i < nelt; ++i)
47307     {
47308       unsigned e = d->perm[i];
47309       if (e != i)
47310 	which |= (e < nelt ? 1 : 2);
47311     }
47312   /* We can pblend the part where elements stay not in their
47313      respective lanes only when these elements are all in one
47314      half of a permutation.
47315      {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47316      lanes, but both 8 and 9 >= 8
47317      {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47318      respective lanes and 8 >= 8, but 2 not.  */
47319   if (which != 1 && which != 2)
47320     return false;
47321   if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47322     return true;
47323 
47324   /* First we apply one operand permutation to the part where
47325      elements stay not in their respective lanes.  */
47326   dcopy = *d;
47327   if (which == 2)
47328     dcopy.op0 = dcopy.op1 = d->op1;
47329   else
47330     dcopy.op0 = dcopy.op1 = d->op0;
47331   if (!d->testing_p)
47332     dcopy.target = gen_reg_rtx (vmode);
47333   dcopy.one_operand_p = true;
47334 
47335   for (i = 0; i < nelt; ++i)
47336     dcopy.perm[i] = d->perm[i] & (nelt - 1);
47337 
47338   ok = expand_vec_perm_1 (&dcopy);
47339   if (GET_MODE_SIZE (vmode) != 16 && !ok)
47340     return false;
47341   else
47342     gcc_assert (ok);
47343   if (d->testing_p)
47344     return true;
47345 
47346   /* Next we put permuted elements into their positions.  */
47347   dcopy1 = *d;
47348   if (which == 2)
47349     dcopy1.op1 = dcopy.target;
47350   else
47351     dcopy1.op0 = dcopy.target;
47352 
47353   for (i = 0; i < nelt; ++i)
47354     dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47355 
47356   ok = expand_vec_perm_blend (&dcopy1);
47357   gcc_assert (ok);
47358 
47359   return true;
47360 }
47361 
47362 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47363 
47364 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
47365    a two vector permutation into a single vector permutation by using
47366    an interleave operation to merge the vectors.  */
47367 
47368 static bool
47369 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47370 {
47371   struct expand_vec_perm_d dremap, dfinal;
47372   unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47373   unsigned HOST_WIDE_INT contents;
47374   unsigned char remap[2 * MAX_VECT_LEN];
47375   rtx_insn *seq;
47376   bool ok, same_halves = false;
47377 
47378   if (GET_MODE_SIZE (d->vmode) == 16)
47379     {
47380       if (d->one_operand_p)
47381 	return false;
47382     }
47383   else if (GET_MODE_SIZE (d->vmode) == 32)
47384     {
47385       if (!TARGET_AVX)
47386 	return false;
47387       /* For 32-byte modes allow even d->one_operand_p.
47388 	 The lack of cross-lane shuffling in some instructions
47389 	 might prevent a single insn shuffle.  */
47390       dfinal = *d;
47391       dfinal.testing_p = true;
47392       /* If expand_vec_perm_interleave3 can expand this into
47393 	 a 3 insn sequence, give up and let it be expanded as
47394 	 3 insn sequence.  While that is one insn longer,
47395 	 it doesn't need a memory operand and in the common
47396 	 case that both interleave low and high permutations
47397 	 with the same operands are adjacent needs 4 insns
47398 	 for both after CSE.  */
47399       if (expand_vec_perm_interleave3 (&dfinal))
47400 	return false;
47401     }
47402   else
47403     return false;
47404 
47405   /* Examine from whence the elements come.  */
47406   contents = 0;
47407   for (i = 0; i < nelt; ++i)
47408     contents |= HOST_WIDE_INT_1U << d->perm[i];
47409 
47410   memset (remap, 0xff, sizeof (remap));
47411   dremap = *d;
47412 
47413   if (GET_MODE_SIZE (d->vmode) == 16)
47414     {
47415       unsigned HOST_WIDE_INT h1, h2, h3, h4;
47416 
47417       /* Split the two input vectors into 4 halves.  */
47418       h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
47419       h2 = h1 << nelt2;
47420       h3 = h2 << nelt2;
47421       h4 = h3 << nelt2;
47422 
47423       /* If the elements from the low halves use interleave low, and similarly
47424 	 for interleave high.  If the elements are from mis-matched halves, we
47425 	 can use shufps for V4SF/V4SI or do a DImode shuffle.  */
47426       if ((contents & (h1 | h3)) == contents)
47427 	{
47428 	  /* punpckl* */
47429 	  for (i = 0; i < nelt2; ++i)
47430 	    {
47431 	      remap[i] = i * 2;
47432 	      remap[i + nelt] = i * 2 + 1;
47433 	      dremap.perm[i * 2] = i;
47434 	      dremap.perm[i * 2 + 1] = i + nelt;
47435 	    }
47436 	  if (!TARGET_SSE2 && d->vmode == V4SImode)
47437 	    dremap.vmode = V4SFmode;
47438 	}
47439       else if ((contents & (h2 | h4)) == contents)
47440 	{
47441 	  /* punpckh* */
47442 	  for (i = 0; i < nelt2; ++i)
47443 	    {
47444 	      remap[i + nelt2] = i * 2;
47445 	      remap[i + nelt + nelt2] = i * 2 + 1;
47446 	      dremap.perm[i * 2] = i + nelt2;
47447 	      dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47448 	    }
47449 	  if (!TARGET_SSE2 && d->vmode == V4SImode)
47450 	    dremap.vmode = V4SFmode;
47451 	}
47452       else if ((contents & (h1 | h4)) == contents)
47453 	{
47454 	  /* shufps */
47455 	  for (i = 0; i < nelt2; ++i)
47456 	    {
47457 	      remap[i] = i;
47458 	      remap[i + nelt + nelt2] = i + nelt2;
47459 	      dremap.perm[i] = i;
47460 	      dremap.perm[i + nelt2] = i + nelt + nelt2;
47461 	    }
47462 	  if (nelt != 4)
47463 	    {
47464 	      /* shufpd */
47465 	      dremap.vmode = V2DImode;
47466 	      dremap.nelt = 2;
47467 	      dremap.perm[0] = 0;
47468 	      dremap.perm[1] = 3;
47469 	    }
47470 	}
47471       else if ((contents & (h2 | h3)) == contents)
47472 	{
47473 	  /* shufps */
47474 	  for (i = 0; i < nelt2; ++i)
47475 	    {
47476 	      remap[i + nelt2] = i;
47477 	      remap[i + nelt] = i + nelt2;
47478 	      dremap.perm[i] = i + nelt2;
47479 	      dremap.perm[i + nelt2] = i + nelt;
47480 	    }
47481 	  if (nelt != 4)
47482 	    {
47483 	      /* shufpd */
47484 	      dremap.vmode = V2DImode;
47485 	      dremap.nelt = 2;
47486 	      dremap.perm[0] = 1;
47487 	      dremap.perm[1] = 2;
47488 	    }
47489 	}
47490       else
47491 	return false;
47492     }
47493   else
47494     {
47495       unsigned int nelt4 = nelt / 4, nzcnt = 0;
47496       unsigned HOST_WIDE_INT q[8];
47497       unsigned int nonzero_halves[4];
47498 
47499       /* Split the two input vectors into 8 quarters.  */
47500       q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
47501       for (i = 1; i < 8; ++i)
47502 	q[i] = q[0] << (nelt4 * i);
47503       for (i = 0; i < 4; ++i)
47504 	if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47505 	  {
47506 	    nonzero_halves[nzcnt] = i;
47507 	    ++nzcnt;
47508 	  }
47509 
47510       if (nzcnt == 1)
47511 	{
47512 	  gcc_assert (d->one_operand_p);
47513 	  nonzero_halves[1] = nonzero_halves[0];
47514 	  same_halves = true;
47515 	}
47516       else if (d->one_operand_p)
47517 	{
47518 	  gcc_assert (nonzero_halves[0] == 0);
47519 	  gcc_assert (nonzero_halves[1] == 1);
47520 	}
47521 
47522       if (nzcnt <= 2)
47523 	{
47524 	  if (d->perm[0] / nelt2 == nonzero_halves[1])
47525 	    {
47526 	      /* Attempt to increase the likelihood that dfinal
47527 		 shuffle will be intra-lane.  */
47528 	      std::swap (nonzero_halves[0], nonzero_halves[1]);
47529 	    }
47530 
47531 	  /* vperm2f128 or vperm2i128.  */
47532 	  for (i = 0; i < nelt2; ++i)
47533 	    {
47534 	      remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47535 	      remap[i + nonzero_halves[0] * nelt2] = i;
47536 	      dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47537 	      dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47538 	    }
47539 
47540 	  if (d->vmode != V8SFmode
47541 	      && d->vmode != V4DFmode
47542 	      && d->vmode != V8SImode)
47543 	    {
47544 	      dremap.vmode = V8SImode;
47545 	      dremap.nelt = 8;
47546 	      for (i = 0; i < 4; ++i)
47547 		{
47548 		  dremap.perm[i] = i + nonzero_halves[0] * 4;
47549 		  dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47550 		}
47551 	    }
47552 	}
47553       else if (d->one_operand_p)
47554 	return false;
47555       else if (TARGET_AVX2
47556 	       && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47557 	{
47558 	  /* vpunpckl* */
47559 	  for (i = 0; i < nelt4; ++i)
47560 	    {
47561 	      remap[i] = i * 2;
47562 	      remap[i + nelt] = i * 2 + 1;
47563 	      remap[i + nelt2] = i * 2 + nelt2;
47564 	      remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47565 	      dremap.perm[i * 2] = i;
47566 	      dremap.perm[i * 2 + 1] = i + nelt;
47567 	      dremap.perm[i * 2 + nelt2] = i + nelt2;
47568 	      dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47569 	    }
47570 	}
47571       else if (TARGET_AVX2
47572 	       && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47573 	{
47574 	  /* vpunpckh* */
47575 	  for (i = 0; i < nelt4; ++i)
47576 	    {
47577 	      remap[i + nelt4] = i * 2;
47578 	      remap[i + nelt + nelt4] = i * 2 + 1;
47579 	      remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47580 	      remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47581 	      dremap.perm[i * 2] = i + nelt4;
47582 	      dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47583 	      dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47584 	      dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47585 	    }
47586 	}
47587       else
47588 	return false;
47589     }
47590 
47591   /* Use the remapping array set up above to move the elements from their
47592      swizzled locations into their final destinations.  */
47593   dfinal = *d;
47594   for (i = 0; i < nelt; ++i)
47595     {
47596       unsigned e = remap[d->perm[i]];
47597       gcc_assert (e < nelt);
47598       /* If same_halves is true, both halves of the remapped vector are the
47599 	 same.  Avoid cross-lane accesses if possible.  */
47600       if (same_halves && i >= nelt2)
47601 	{
47602 	  gcc_assert (e < nelt2);
47603 	  dfinal.perm[i] = e + nelt2;
47604 	}
47605       else
47606 	dfinal.perm[i] = e;
47607     }
47608   if (!d->testing_p)
47609     {
47610       dremap.target = gen_reg_rtx (dremap.vmode);
47611       dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47612     }
47613   dfinal.op1 = dfinal.op0;
47614   dfinal.one_operand_p = true;
47615 
47616   /* Test if the final remap can be done with a single insn.  For V4SFmode or
47617      V4SImode this *will* succeed.  For V8HImode or V16QImode it may not.  */
47618   start_sequence ();
47619   ok = expand_vec_perm_1 (&dfinal);
47620   seq = get_insns ();
47621   end_sequence ();
47622 
47623   if (!ok)
47624     return false;
47625 
47626   if (d->testing_p)
47627     return true;
47628 
47629   if (dremap.vmode != dfinal.vmode)
47630     {
47631       dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47632       dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47633     }
47634 
47635   ok = expand_vec_perm_1 (&dremap);
47636   gcc_assert (ok);
47637 
47638   emit_insn (seq);
47639   return true;
47640 }
47641 
47642 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
47643    a single vector cross-lane permutation into vpermq followed
47644    by any of the single insn permutations.  */
47645 
47646 static bool
47647 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47648 {
47649   struct expand_vec_perm_d dremap, dfinal;
47650   unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47651   unsigned contents[2];
47652   bool ok;
47653 
47654   if (!(TARGET_AVX2
47655 	&& (d->vmode == V32QImode || d->vmode == V16HImode)
47656 	&& d->one_operand_p))
47657     return false;
47658 
47659   contents[0] = 0;
47660   contents[1] = 0;
47661   for (i = 0; i < nelt2; ++i)
47662     {
47663       contents[0] |= 1u << (d->perm[i] / nelt4);
47664       contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47665     }
47666 
47667   for (i = 0; i < 2; ++i)
47668     {
47669       unsigned int cnt = 0;
47670       for (j = 0; j < 4; ++j)
47671 	if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47672 	  return false;
47673     }
47674 
47675   if (d->testing_p)
47676     return true;
47677 
47678   dremap = *d;
47679   dremap.vmode = V4DImode;
47680   dremap.nelt = 4;
47681   dremap.target = gen_reg_rtx (V4DImode);
47682   dremap.op0 = gen_lowpart (V4DImode, d->op0);
47683   dremap.op1 = dremap.op0;
47684   dremap.one_operand_p = true;
47685   for (i = 0; i < 2; ++i)
47686     {
47687       unsigned int cnt = 0;
47688       for (j = 0; j < 4; ++j)
47689 	if ((contents[i] & (1u << j)) != 0)
47690 	  dremap.perm[2 * i + cnt++] = j;
47691       for (; cnt < 2; ++cnt)
47692 	dremap.perm[2 * i + cnt] = 0;
47693     }
47694 
47695   dfinal = *d;
47696   dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47697   dfinal.op1 = dfinal.op0;
47698   dfinal.one_operand_p = true;
47699   for (i = 0, j = 0; i < nelt; ++i)
47700     {
47701       if (i == nelt2)
47702 	j = 2;
47703       dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47704       if ((d->perm[i] / nelt4) == dremap.perm[j])
47705 	;
47706       else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47707 	dfinal.perm[i] |= nelt4;
47708       else
47709 	gcc_unreachable ();
47710     }
47711 
47712   ok = expand_vec_perm_1 (&dremap);
47713   gcc_assert (ok);
47714 
47715   ok = expand_vec_perm_1 (&dfinal);
47716   gcc_assert (ok);
47717 
47718   return true;
47719 }
47720 
47721 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to expand
47722    a vector permutation using two instructions, vperm2f128 resp.
47723    vperm2i128 followed by any single in-lane permutation.  */
47724 
47725 static bool
47726 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47727 {
47728   struct expand_vec_perm_d dfirst, dsecond;
47729   unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47730   bool ok;
47731 
47732   if (!TARGET_AVX
47733       || GET_MODE_SIZE (d->vmode) != 32
47734       || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
47735     return false;
47736 
47737   dsecond = *d;
47738   dsecond.one_operand_p = false;
47739   dsecond.testing_p = true;
47740 
47741   /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
47742      immediate.  For perm < 16 the second permutation uses
47743      d->op0 as first operand, for perm >= 16 it uses d->op1
47744      as first operand.  The second operand is the result of
47745      vperm2[fi]128.  */
47746   for (perm = 0; perm < 32; perm++)
47747     {
47748       /* Ignore permutations which do not move anything cross-lane.  */
47749       if (perm < 16)
47750 	{
47751 	  /* The second shuffle for e.g. V4DFmode has
47752 	     0123 and ABCD operands.
47753 	     Ignore AB23, as 23 is already in the second lane
47754 	     of the first operand.  */
47755 	  if ((perm & 0xc) == (1 << 2)) continue;
47756 	  /* And 01CD, as 01 is in the first lane of the first
47757 	     operand.  */
47758 	  if ((perm & 3) == 0) continue;
47759 	  /* And 4567, as then the vperm2[fi]128 doesn't change
47760 	     anything on the original 4567 second operand.  */
47761 	  if ((perm & 0xf) == ((3 << 2) | 2)) continue;
47762 	}
47763       else
47764 	{
47765 	  /* The second shuffle for e.g. V4DFmode has
47766 	     4567 and ABCD operands.
47767 	     Ignore AB67, as 67 is already in the second lane
47768 	     of the first operand.  */
47769 	  if ((perm & 0xc) == (3 << 2)) continue;
47770 	  /* And 45CD, as 45 is in the first lane of the first
47771 	     operand.  */
47772 	  if ((perm & 3) == 2) continue;
47773 	  /* And 0123, as then the vperm2[fi]128 doesn't change
47774 	     anything on the original 0123 first operand.  */
47775 	  if ((perm & 0xf) == (1 << 2)) continue;
47776 	}
47777 
47778       for (i = 0; i < nelt; i++)
47779 	{
47780 	  j = d->perm[i] / nelt2;
47781 	  if (j == ((perm >> (2 * (i >= nelt2))) & 3))
47782 	    dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
47783 	  else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
47784 	    dsecond.perm[i] = d->perm[i] & (nelt - 1);
47785 	  else
47786 	    break;
47787 	}
47788 
47789       if (i == nelt)
47790 	{
47791 	  start_sequence ();
47792 	  ok = expand_vec_perm_1 (&dsecond);
47793 	  end_sequence ();
47794 	}
47795       else
47796 	ok = false;
47797 
47798       if (ok)
47799 	{
47800 	  if (d->testing_p)
47801 	    return true;
47802 
47803 	  /* Found a usable second shuffle.  dfirst will be
47804 	     vperm2f128 on d->op0 and d->op1.  */
47805 	  dsecond.testing_p = false;
47806 	  dfirst = *d;
47807 	  dfirst.target = gen_reg_rtx (d->vmode);
47808 	  for (i = 0; i < nelt; i++)
47809 	    dfirst.perm[i] = (i & (nelt2 - 1))
47810 			     + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
47811 
47812 	  canonicalize_perm (&dfirst);
47813 	  ok = expand_vec_perm_1 (&dfirst);
47814 	  gcc_assert (ok);
47815 
47816 	  /* And dsecond is some single insn shuffle, taking
47817 	     d->op0 and result of vperm2f128 (if perm < 16) or
47818 	     d->op1 and result of vperm2f128 (otherwise).  */
47819 	  if (perm >= 16)
47820 	    dsecond.op0 = dsecond.op1;
47821 	  dsecond.op1 = dfirst.target;
47822 
47823 	  ok = expand_vec_perm_1 (&dsecond);
47824 	  gcc_assert (ok);
47825 
47826 	  return true;
47827 	}
47828 
47829       /* For one operand, the only useful vperm2f128 permutation is 0x01
47830 	 aka lanes swap.  */
47831       if (d->one_operand_p)
47832 	return false;
47833     }
47834 
47835   return false;
47836 }
47837 
47838 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
47839    a two vector permutation using 2 intra-lane interleave insns
47840    and cross-lane shuffle for 32-byte vectors.  */
47841 
47842 static bool
47843 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
47844 {
47845   unsigned i, nelt;
47846   rtx (*gen) (rtx, rtx, rtx);
47847 
47848   if (d->one_operand_p)
47849     return false;
47850   if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
47851     ;
47852   else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
47853     ;
47854   else
47855     return false;
47856 
47857   nelt = d->nelt;
47858   if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
47859     return false;
47860   for (i = 0; i < nelt; i += 2)
47861     if (d->perm[i] != d->perm[0] + i / 2
47862 	|| d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
47863       return false;
47864 
47865   if (d->testing_p)
47866     return true;
47867 
47868   switch (d->vmode)
47869     {
47870     case E_V32QImode:
47871       if (d->perm[0])
47872 	gen = gen_vec_interleave_highv32qi;
47873       else
47874 	gen = gen_vec_interleave_lowv32qi;
47875       break;
47876     case E_V16HImode:
47877       if (d->perm[0])
47878 	gen = gen_vec_interleave_highv16hi;
47879       else
47880 	gen = gen_vec_interleave_lowv16hi;
47881       break;
47882     case E_V8SImode:
47883       if (d->perm[0])
47884 	gen = gen_vec_interleave_highv8si;
47885       else
47886 	gen = gen_vec_interleave_lowv8si;
47887       break;
47888     case E_V4DImode:
47889       if (d->perm[0])
47890 	gen = gen_vec_interleave_highv4di;
47891       else
47892 	gen = gen_vec_interleave_lowv4di;
47893       break;
47894     case E_V8SFmode:
47895       if (d->perm[0])
47896 	gen = gen_vec_interleave_highv8sf;
47897       else
47898 	gen = gen_vec_interleave_lowv8sf;
47899       break;
47900     case E_V4DFmode:
47901       if (d->perm[0])
47902 	gen = gen_vec_interleave_highv4df;
47903       else
47904 	gen = gen_vec_interleave_lowv4df;
47905       break;
47906     default:
47907       gcc_unreachable ();
47908     }
47909 
47910   emit_insn (gen (d->target, d->op0, d->op1));
47911   return true;
47912 }
47913 
47914 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement
47915    a single vector permutation using a single intra-lane vector
47916    permutation, vperm2f128 swapping the lanes and vblend* insn blending
47917    the non-swapped and swapped vectors together.  */
47918 
47919 static bool
47920 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
47921 {
47922   struct expand_vec_perm_d dfirst, dsecond;
47923   unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
47924   rtx_insn *seq;
47925   bool ok;
47926   rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
47927 
47928   if (!TARGET_AVX
47929       || TARGET_AVX2
47930       || (d->vmode != V8SFmode && d->vmode != V4DFmode)
47931       || !d->one_operand_p)
47932     return false;
47933 
47934   dfirst = *d;
47935   for (i = 0; i < nelt; i++)
47936     dfirst.perm[i] = 0xff;
47937   for (i = 0, msk = 0; i < nelt; i++)
47938     {
47939       j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
47940       if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
47941 	return false;
47942       dfirst.perm[j] = d->perm[i];
47943       if (j != i)
47944 	msk |= (1 << i);
47945     }
47946   for (i = 0; i < nelt; i++)
47947     if (dfirst.perm[i] == 0xff)
47948       dfirst.perm[i] = i;
47949 
47950   if (!d->testing_p)
47951     dfirst.target = gen_reg_rtx (dfirst.vmode);
47952 
47953   start_sequence ();
47954   ok = expand_vec_perm_1 (&dfirst);
47955   seq = get_insns ();
47956   end_sequence ();
47957 
47958   if (!ok)
47959     return false;
47960 
47961   if (d->testing_p)
47962     return true;
47963 
47964   emit_insn (seq);
47965 
47966   dsecond = *d;
47967   dsecond.op0 = dfirst.target;
47968   dsecond.op1 = dfirst.target;
47969   dsecond.one_operand_p = true;
47970   dsecond.target = gen_reg_rtx (dsecond.vmode);
47971   for (i = 0; i < nelt; i++)
47972     dsecond.perm[i] = i ^ nelt2;
47973 
47974   ok = expand_vec_perm_1 (&dsecond);
47975   gcc_assert (ok);
47976 
47977   blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
47978   emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
47979   return true;
47980 }
47981 
47982 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement a V4DF
47983    permutation using two vperm2f128, followed by a vshufpd insn blending
47984    the two vectors together.  */
47985 
47986 static bool
47987 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
47988 {
47989   struct expand_vec_perm_d dfirst, dsecond, dthird;
47990   bool ok;
47991 
47992   if (!TARGET_AVX || (d->vmode != V4DFmode))
47993     return false;
47994 
47995   if (d->testing_p)
47996     return true;
47997 
47998   dfirst = *d;
47999   dsecond = *d;
48000   dthird = *d;
48001 
48002   dfirst.perm[0] = (d->perm[0] & ~1);
48003   dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48004   dfirst.perm[2] = (d->perm[2] & ~1);
48005   dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48006   dsecond.perm[0] = (d->perm[1] & ~1);
48007   dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48008   dsecond.perm[2] = (d->perm[3] & ~1);
48009   dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48010   dthird.perm[0] = (d->perm[0] % 2);
48011   dthird.perm[1] = (d->perm[1] % 2) + 4;
48012   dthird.perm[2] = (d->perm[2] % 2) + 2;
48013   dthird.perm[3] = (d->perm[3] % 2) + 6;
48014 
48015   dfirst.target = gen_reg_rtx (dfirst.vmode);
48016   dsecond.target = gen_reg_rtx (dsecond.vmode);
48017   dthird.op0 = dfirst.target;
48018   dthird.op1 = dsecond.target;
48019   dthird.one_operand_p = false;
48020 
48021   canonicalize_perm (&dfirst);
48022   canonicalize_perm (&dsecond);
48023 
48024   ok = expand_vec_perm_1 (&dfirst)
48025        && expand_vec_perm_1 (&dsecond)
48026        && expand_vec_perm_1 (&dthird);
48027 
48028   gcc_assert (ok);
48029 
48030   return true;
48031 }
48032 
48033 /* A subroutine of expand_vec_perm_even_odd_1.  Implement the double-word
48034    permutation with two pshufb insns and an ior.  We should have already
48035    failed all two instruction sequences.  */
48036 
48037 static bool
48038 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48039 {
48040   rtx rperm[2][16], vperm, l, h, op, m128;
48041   unsigned int i, nelt, eltsz;
48042 
48043   if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48044     return false;
48045   gcc_assert (!d->one_operand_p);
48046 
48047   if (d->testing_p)
48048     return true;
48049 
48050   nelt = d->nelt;
48051   eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48052 
48053   /* Generate two permutation masks.  If the required element is within
48054      the given vector it is shuffled into the proper lane.  If the required
48055      element is in the other vector, force a zero into the lane by setting
48056      bit 7 in the permutation mask.  */
48057   m128 = GEN_INT (-128);
48058   for (i = 0; i < nelt; ++i)
48059     {
48060       unsigned j, e = d->perm[i];
48061       unsigned which = (e >= nelt);
48062       if (e >= nelt)
48063 	e -= nelt;
48064 
48065       for (j = 0; j < eltsz; ++j)
48066 	{
48067 	  rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48068 	  rperm[1-which][i*eltsz + j] = m128;
48069 	}
48070     }
48071 
48072   vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48073   vperm = force_reg (V16QImode, vperm);
48074 
48075   l = gen_reg_rtx (V16QImode);
48076   op = gen_lowpart (V16QImode, d->op0);
48077   emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48078 
48079   vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48080   vperm = force_reg (V16QImode, vperm);
48081 
48082   h = gen_reg_rtx (V16QImode);
48083   op = gen_lowpart (V16QImode, d->op1);
48084   emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48085 
48086   op = d->target;
48087   if (d->vmode != V16QImode)
48088     op = gen_reg_rtx (V16QImode);
48089   emit_insn (gen_iorv16qi3 (op, l, h));
48090   if (op != d->target)
48091     emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48092 
48093   return true;
48094 }
48095 
48096 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48097    with two vpshufb insns, vpermq and vpor.  We should have already failed
48098    all two or three instruction sequences.  */
48099 
48100 static bool
48101 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48102 {
48103   rtx rperm[2][32], vperm, l, h, hp, op, m128;
48104   unsigned int i, nelt, eltsz;
48105 
48106   if (!TARGET_AVX2
48107       || !d->one_operand_p
48108       || (d->vmode != V32QImode && d->vmode != V16HImode))
48109     return false;
48110 
48111   if (d->testing_p)
48112     return true;
48113 
48114   nelt = d->nelt;
48115   eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48116 
48117   /* Generate two permutation masks.  If the required element is within
48118      the same lane, it is shuffled in.  If the required element from the
48119      other lane, force a zero by setting bit 7 in the permutation mask.
48120      In the other mask the mask has non-negative elements if element
48121      is requested from the other lane, but also moved to the other lane,
48122      so that the result of vpshufb can have the two V2TImode halves
48123      swapped.  */
48124   m128 = GEN_INT (-128);
48125   for (i = 0; i < nelt; ++i)
48126     {
48127       unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48128       unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48129 
48130       for (j = 0; j < eltsz; ++j)
48131 	{
48132 	  rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48133 	  rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48134 	}
48135     }
48136 
48137   vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48138   vperm = force_reg (V32QImode, vperm);
48139 
48140   h = gen_reg_rtx (V32QImode);
48141   op = gen_lowpart (V32QImode, d->op0);
48142   emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48143 
48144   /* Swap the 128-byte lanes of h into hp.  */
48145   hp = gen_reg_rtx (V4DImode);
48146   op = gen_lowpart (V4DImode, h);
48147   emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48148 				  const1_rtx));
48149 
48150   vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48151   vperm = force_reg (V32QImode, vperm);
48152 
48153   l = gen_reg_rtx (V32QImode);
48154   op = gen_lowpart (V32QImode, d->op0);
48155   emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48156 
48157   op = d->target;
48158   if (d->vmode != V32QImode)
48159     op = gen_reg_rtx (V32QImode);
48160   emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48161   if (op != d->target)
48162     emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48163 
48164   return true;
48165 }
48166 
48167 /* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
48168    and extract-odd permutations of two V32QImode and V16QImode operand
48169    with two vpshufb insns, vpor and vpermq.  We should have already
48170    failed all two or three instruction sequences.  */
48171 
48172 static bool
48173 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48174 {
48175   rtx rperm[2][32], vperm, l, h, ior, op, m128;
48176   unsigned int i, nelt, eltsz;
48177 
48178   if (!TARGET_AVX2
48179       || d->one_operand_p
48180       || (d->vmode != V32QImode && d->vmode != V16HImode))
48181     return false;
48182 
48183   for (i = 0; i < d->nelt; ++i)
48184     if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48185       return false;
48186 
48187   if (d->testing_p)
48188     return true;
48189 
48190   nelt = d->nelt;
48191   eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48192 
48193   /* Generate two permutation masks.  In the first permutation mask
48194      the first quarter will contain indexes for the first half
48195      of the op0, the second quarter will contain bit 7 set, third quarter
48196      will contain indexes for the second half of the op0 and the
48197      last quarter bit 7 set.  In the second permutation mask
48198      the first quarter will contain bit 7 set, the second quarter
48199      indexes for the first half of the op1, the third quarter bit 7 set
48200      and last quarter indexes for the second half of the op1.
48201      I.e. the first mask e.g. for V32QImode extract even will be:
48202      0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48203      (all values masked with 0xf except for -128) and second mask
48204      for extract even will be
48205      -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe.  */
48206   m128 = GEN_INT (-128);
48207   for (i = 0; i < nelt; ++i)
48208     {
48209       unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48210       unsigned which = d->perm[i] >= nelt;
48211       unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48212 
48213       for (j = 0; j < eltsz; ++j)
48214 	{
48215 	  rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48216 	  rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48217 	}
48218     }
48219 
48220   vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48221   vperm = force_reg (V32QImode, vperm);
48222 
48223   l = gen_reg_rtx (V32QImode);
48224   op = gen_lowpart (V32QImode, d->op0);
48225   emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48226 
48227   vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48228   vperm = force_reg (V32QImode, vperm);
48229 
48230   h = gen_reg_rtx (V32QImode);
48231   op = gen_lowpart (V32QImode, d->op1);
48232   emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48233 
48234   ior = gen_reg_rtx (V32QImode);
48235   emit_insn (gen_iorv32qi3 (ior, l, h));
48236 
48237   /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation.  */
48238   op = gen_reg_rtx (V4DImode);
48239   ior = gen_lowpart (V4DImode, ior);
48240   emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48241 				  const1_rtx, GEN_INT (3)));
48242   emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48243 
48244   return true;
48245 }
48246 
48247 /* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
48248    and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48249    with two "and" and "pack" or two "shift" and "pack" insns.  We should
48250    have already failed all two instruction sequences.  */
48251 
48252 static bool
48253 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48254 {
48255   rtx op, dop0, dop1, t;
48256   unsigned i, odd, c, s, nelt = d->nelt;
48257   bool end_perm = false;
48258   machine_mode half_mode;
48259   rtx (*gen_and) (rtx, rtx, rtx);
48260   rtx (*gen_pack) (rtx, rtx, rtx);
48261   rtx (*gen_shift) (rtx, rtx, rtx);
48262 
48263   if (d->one_operand_p)
48264     return false;
48265 
48266   switch (d->vmode)
48267     {
48268     case E_V8HImode:
48269       /* Required for "pack".  */
48270       if (!TARGET_SSE4_1)
48271         return false;
48272       c = 0xffff;
48273       s = 16;
48274       half_mode = V4SImode;
48275       gen_and = gen_andv4si3;
48276       gen_pack = gen_sse4_1_packusdw;
48277       gen_shift = gen_lshrv4si3;
48278       break;
48279     case E_V16QImode:
48280       /* No check as all instructions are SSE2.  */
48281       c = 0xff;
48282       s = 8;
48283       half_mode = V8HImode;
48284       gen_and = gen_andv8hi3;
48285       gen_pack = gen_sse2_packuswb;
48286       gen_shift = gen_lshrv8hi3;
48287       break;
48288     case E_V16HImode:
48289       if (!TARGET_AVX2)
48290         return false;
48291       c = 0xffff;
48292       s = 16;
48293       half_mode = V8SImode;
48294       gen_and = gen_andv8si3;
48295       gen_pack = gen_avx2_packusdw;
48296       gen_shift = gen_lshrv8si3;
48297       end_perm = true;
48298       break;
48299     case E_V32QImode:
48300       if (!TARGET_AVX2)
48301         return false;
48302       c = 0xff;
48303       s = 8;
48304       half_mode = V16HImode;
48305       gen_and = gen_andv16hi3;
48306       gen_pack = gen_avx2_packuswb;
48307       gen_shift = gen_lshrv16hi3;
48308       end_perm = true;
48309       break;
48310     default:
48311       /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48312 	 general shuffles.  */
48313       return false;
48314     }
48315 
48316   /* Check that permutation is even or odd.  */
48317   odd = d->perm[0];
48318   if (odd > 1)
48319     return false;
48320 
48321   for (i = 1; i < nelt; ++i)
48322     if (d->perm[i] != 2 * i + odd)
48323       return false;
48324 
48325   if (d->testing_p)
48326     return true;
48327 
48328   dop0 = gen_reg_rtx (half_mode);
48329   dop1 = gen_reg_rtx (half_mode);
48330   if (odd == 0)
48331     {
48332       t = gen_const_vec_duplicate (half_mode, GEN_INT (c));
48333       t = force_reg (half_mode, t);
48334       emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48335       emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48336     }
48337   else
48338     {
48339       emit_insn (gen_shift (dop0,
48340 			    gen_lowpart (half_mode, d->op0),
48341 			    GEN_INT (s)));
48342       emit_insn (gen_shift (dop1,
48343 			    gen_lowpart (half_mode, d->op1),
48344 			    GEN_INT (s)));
48345     }
48346   /* In AVX2 for 256 bit case we need to permute pack result.  */
48347   if (TARGET_AVX2 && end_perm)
48348     {
48349       op = gen_reg_rtx (d->vmode);
48350       t = gen_reg_rtx (V4DImode);
48351       emit_insn (gen_pack (op, dop0, dop1));
48352       emit_insn (gen_avx2_permv4di_1 (t,
48353 				      gen_lowpart (V4DImode, op),
48354 				      const0_rtx,
48355 				      const2_rtx,
48356 				      const1_rtx,
48357 				      GEN_INT (3)));
48358       emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48359     }
48360   else
48361     emit_insn (gen_pack (d->target, dop0, dop1));
48362 
48363   return true;
48364 }
48365 
48366 /* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
48367    and extract-odd permutations of two V64QI operands
48368    with two "shifts", two "truncs" and one "concat" insns for "odd"
48369    and two "truncs" and one concat insn for "even."
48370    Have already failed all two instruction sequences.  */
48371 
48372 static bool
48373 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
48374 {
48375   rtx t1, t2, t3, t4;
48376   unsigned i, odd, nelt = d->nelt;
48377 
48378   if (!TARGET_AVX512BW
48379       || d->one_operand_p
48380       || d->vmode != V64QImode)
48381     return false;
48382 
48383   /* Check that permutation is even or odd.  */
48384   odd = d->perm[0];
48385   if (odd > 1)
48386     return false;
48387 
48388   for (i = 1; i < nelt; ++i)
48389     if (d->perm[i] != 2 * i + odd)
48390       return false;
48391 
48392   if (d->testing_p)
48393     return true;
48394 
48395 
48396   if (odd)
48397     {
48398       t1 = gen_reg_rtx (V32HImode);
48399       t2 = gen_reg_rtx (V32HImode);
48400       emit_insn (gen_lshrv32hi3 (t1,
48401 				 gen_lowpart (V32HImode, d->op0),
48402 				 GEN_INT (8)));
48403       emit_insn (gen_lshrv32hi3 (t2,
48404 				 gen_lowpart (V32HImode, d->op1),
48405 				 GEN_INT (8)));
48406     }
48407   else
48408     {
48409       t1 = gen_lowpart (V32HImode, d->op0);
48410       t2 = gen_lowpart (V32HImode, d->op1);
48411     }
48412 
48413   t3 = gen_reg_rtx (V32QImode);
48414   t4 = gen_reg_rtx (V32QImode);
48415   emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
48416   emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
48417   emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
48418 
48419   return true;
48420 }
48421 
48422 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement extract-even
48423    and extract-odd permutations.  */
48424 
48425 static bool
48426 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48427 {
48428   rtx t1, t2, t3, t4, t5;
48429 
48430   switch (d->vmode)
48431     {
48432     case E_V4DFmode:
48433       if (d->testing_p)
48434 	break;
48435       t1 = gen_reg_rtx (V4DFmode);
48436       t2 = gen_reg_rtx (V4DFmode);
48437 
48438       /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
48439       emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48440       emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48441 
48442       /* Now an unpck[lh]pd will produce the result required.  */
48443       if (odd)
48444 	t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48445       else
48446 	t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48447       emit_insn (t3);
48448       break;
48449 
48450     case E_V8SFmode:
48451       {
48452 	int mask = odd ? 0xdd : 0x88;
48453 
48454 	if (d->testing_p)
48455 	  break;
48456 	t1 = gen_reg_rtx (V8SFmode);
48457 	t2 = gen_reg_rtx (V8SFmode);
48458 	t3 = gen_reg_rtx (V8SFmode);
48459 
48460 	/* Shuffle within the 128-bit lanes to produce:
48461 	   { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }.  */
48462 	emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48463 				      GEN_INT (mask)));
48464 
48465 	/* Shuffle the lanes around to produce:
48466 	   { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }.  */
48467 	emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48468 					    GEN_INT (0x3)));
48469 
48470 	/* Shuffle within the 128-bit lanes to produce:
48471 	   { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }.  */
48472 	emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48473 
48474 	/* Shuffle within the 128-bit lanes to produce:
48475 	   { 8 a c e c e 8 a } | { 9 b d f d f 9 b }.  */
48476 	emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48477 
48478 	/* Shuffle the lanes around to produce:
48479 	   { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }.  */
48480 	emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48481 					    GEN_INT (0x20)));
48482       }
48483       break;
48484 
48485     case E_V2DFmode:
48486     case E_V4SFmode:
48487     case E_V2DImode:
48488     case E_V4SImode:
48489       /* These are always directly implementable by expand_vec_perm_1.  */
48490       gcc_unreachable ();
48491 
48492     case E_V8HImode:
48493       if (TARGET_SSE4_1)
48494 	return expand_vec_perm_even_odd_pack (d);
48495       else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48496 	return expand_vec_perm_pshufb2 (d);
48497       else
48498 	{
48499 	  if (d->testing_p)
48500 	    break;
48501 	  /* We need 2*log2(N)-1 operations to achieve odd/even
48502 	     with interleave. */
48503 	  t1 = gen_reg_rtx (V8HImode);
48504 	  t2 = gen_reg_rtx (V8HImode);
48505 	  emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48506 	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48507 	  emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48508 	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48509 	  if (odd)
48510 	    t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48511 	  else
48512 	    t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48513 	  emit_insn (t3);
48514 	}
48515       break;
48516 
48517     case E_V16QImode:
48518       return expand_vec_perm_even_odd_pack (d);
48519 
48520     case E_V16HImode:
48521     case E_V32QImode:
48522       return expand_vec_perm_even_odd_pack (d);
48523 
48524     case E_V64QImode:
48525       return expand_vec_perm_even_odd_trunc (d);
48526 
48527     case E_V4DImode:
48528       if (!TARGET_AVX2)
48529 	{
48530 	  struct expand_vec_perm_d d_copy = *d;
48531 	  d_copy.vmode = V4DFmode;
48532 	  if (d->testing_p)
48533 	    d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
48534 	  else
48535 	    d_copy.target = gen_reg_rtx (V4DFmode);
48536 	  d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48537 	  d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48538 	  if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48539 	    {
48540 	      if (!d->testing_p)
48541 		emit_move_insn (d->target,
48542 				gen_lowpart (V4DImode, d_copy.target));
48543 	      return true;
48544 	    }
48545 	  return false;
48546 	}
48547 
48548       if (d->testing_p)
48549 	break;
48550 
48551       t1 = gen_reg_rtx (V4DImode);
48552       t2 = gen_reg_rtx (V4DImode);
48553 
48554       /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
48555       emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48556       emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48557 
48558       /* Now an vpunpck[lh]qdq will produce the result required.  */
48559       if (odd)
48560 	t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48561       else
48562 	t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48563       emit_insn (t3);
48564       break;
48565 
48566     case E_V8SImode:
48567       if (!TARGET_AVX2)
48568 	{
48569 	  struct expand_vec_perm_d d_copy = *d;
48570 	  d_copy.vmode = V8SFmode;
48571 	  if (d->testing_p)
48572 	    d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
48573 	  else
48574 	    d_copy.target = gen_reg_rtx (V8SFmode);
48575 	  d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48576 	  d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48577 	  if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48578 	    {
48579 	      if (!d->testing_p)
48580 		emit_move_insn (d->target,
48581 				gen_lowpart (V8SImode, d_copy.target));
48582 	      return true;
48583 	    }
48584 	  return false;
48585 	}
48586 
48587       if (d->testing_p)
48588 	break;
48589 
48590       t1 = gen_reg_rtx (V8SImode);
48591       t2 = gen_reg_rtx (V8SImode);
48592       t3 = gen_reg_rtx (V4DImode);
48593       t4 = gen_reg_rtx (V4DImode);
48594       t5 = gen_reg_rtx (V4DImode);
48595 
48596       /* Shuffle the lanes around into
48597 	 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }.  */
48598       emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48599 				    gen_lowpart (V4DImode, d->op1),
48600 				    GEN_INT (0x20)));
48601       emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48602 				    gen_lowpart (V4DImode, d->op1),
48603 				    GEN_INT (0x31)));
48604 
48605       /* Swap the 2nd and 3rd position in each lane into
48606 	 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }.  */
48607       emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48608 				    GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48609       emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48610 				    GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48611 
48612       /* Now an vpunpck[lh]qdq will produce
48613 	 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }.  */
48614       if (odd)
48615 	t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48616 					   gen_lowpart (V4DImode, t2));
48617       else
48618 	t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48619 					  gen_lowpart (V4DImode, t2));
48620       emit_insn (t3);
48621       emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48622       break;
48623 
48624     default:
48625       gcc_unreachable ();
48626     }
48627 
48628   return true;
48629 }
48630 
48631 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
48632    extract-even and extract-odd permutations.  */
48633 
48634 static bool
48635 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48636 {
48637   unsigned i, odd, nelt = d->nelt;
48638 
48639   odd = d->perm[0];
48640   if (odd != 0 && odd != 1)
48641     return false;
48642 
48643   for (i = 1; i < nelt; ++i)
48644     if (d->perm[i] != 2 * i + odd)
48645       return false;
48646 
48647   return expand_vec_perm_even_odd_1 (d, odd);
48648 }
48649 
48650 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement broadcast
48651    permutations.  We assume that expand_vec_perm_1 has already failed.  */
48652 
48653 static bool
48654 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48655 {
48656   unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48657   machine_mode vmode = d->vmode;
48658   unsigned char perm2[4];
48659   rtx op0 = d->op0, dest;
48660   bool ok;
48661 
48662   switch (vmode)
48663     {
48664     case E_V4DFmode:
48665     case E_V8SFmode:
48666       /* These are special-cased in sse.md so that we can optionally
48667 	 use the vbroadcast instruction.  They expand to two insns
48668 	 if the input happens to be in a register.  */
48669       gcc_unreachable ();
48670 
48671     case E_V2DFmode:
48672     case E_V2DImode:
48673     case E_V4SFmode:
48674     case E_V4SImode:
48675       /* These are always implementable using standard shuffle patterns.  */
48676       gcc_unreachable ();
48677 
48678     case E_V8HImode:
48679     case E_V16QImode:
48680       /* These can be implemented via interleave.  We save one insn by
48681 	 stopping once we have promoted to V4SImode and then use pshufd.  */
48682       if (d->testing_p)
48683 	return true;
48684       do
48685 	{
48686 	  rtx dest;
48687 	  rtx (*gen) (rtx, rtx, rtx)
48688 	    = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48689 				 : gen_vec_interleave_lowv8hi;
48690 
48691 	  if (elt >= nelt2)
48692 	    {
48693 	      gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48694 				       : gen_vec_interleave_highv8hi;
48695 	      elt -= nelt2;
48696 	    }
48697 	  nelt2 /= 2;
48698 
48699 	  dest = gen_reg_rtx (vmode);
48700 	  emit_insn (gen (dest, op0, op0));
48701 	  vmode = get_mode_wider_vector (vmode);
48702 	  op0 = gen_lowpart (vmode, dest);
48703 	}
48704       while (vmode != V4SImode);
48705 
48706       memset (perm2, elt, 4);
48707       dest = gen_reg_rtx (V4SImode);
48708       ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48709       gcc_assert (ok);
48710       if (!d->testing_p)
48711 	emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48712       return true;
48713 
48714     case E_V64QImode:
48715     case E_V32QImode:
48716     case E_V16HImode:
48717     case E_V8SImode:
48718     case E_V4DImode:
48719       /* For AVX2 broadcasts of the first element vpbroadcast* or
48720 	 vpermq should be used by expand_vec_perm_1.  */
48721       gcc_assert (!TARGET_AVX2 || d->perm[0]);
48722       return false;
48723 
48724     default:
48725       gcc_unreachable ();
48726     }
48727 }
48728 
48729 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
48730    broadcast permutations.  */
48731 
48732 static bool
48733 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48734 {
48735   unsigned i, elt, nelt = d->nelt;
48736 
48737   if (!d->one_operand_p)
48738     return false;
48739 
48740   elt = d->perm[0];
48741   for (i = 1; i < nelt; ++i)
48742     if (d->perm[i] != elt)
48743       return false;
48744 
48745   return expand_vec_perm_broadcast_1 (d);
48746 }
48747 
48748 /* Implement arbitrary permutations of two V64QImode operands
48749    with 2 vperm[it]2w, 2 vpshufb and one vpor instruction.  */
48750 static bool
48751 expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d)
48752 {
48753   if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
48754     return false;
48755 
48756   if (d->testing_p)
48757     return true;
48758 
48759   struct expand_vec_perm_d ds[2];
48760   rtx rperm[128], vperm, target0, target1;
48761   unsigned int i, nelt;
48762   machine_mode vmode;
48763 
48764   nelt = d->nelt;
48765   vmode = V64QImode;
48766 
48767   for (i = 0; i < 2; i++)
48768     {
48769       ds[i] = *d;
48770       ds[i].vmode = V32HImode;
48771       ds[i].nelt = 32;
48772       ds[i].target = gen_reg_rtx (V32HImode);
48773       ds[i].op0 = gen_lowpart (V32HImode, d->op0);
48774       ds[i].op1 = gen_lowpart (V32HImode, d->op1);
48775     }
48776 
48777   /* Prepare permutations such that the first one takes care of
48778      putting the even bytes into the right positions or one higher
48779      positions (ds[0]) and the second one takes care of
48780      putting the odd bytes into the right positions or one below
48781      (ds[1]).  */
48782 
48783   for (i = 0; i < nelt; i++)
48784     {
48785       ds[i & 1].perm[i / 2] = d->perm[i] / 2;
48786       if (i & 1)
48787 	{
48788 	  rperm[i] = constm1_rtx;
48789 	  rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
48790 	}
48791       else
48792 	{
48793 	  rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
48794 	  rperm[i + 64] = constm1_rtx;
48795 	}
48796     }
48797 
48798   bool ok = expand_vec_perm_1 (&ds[0]);
48799   gcc_assert (ok);
48800   ds[0].target = gen_lowpart (V64QImode, ds[0].target);
48801 
48802   ok = expand_vec_perm_1 (&ds[1]);
48803   gcc_assert (ok);
48804   ds[1].target = gen_lowpart (V64QImode, ds[1].target);
48805 
48806   vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
48807   vperm = force_reg (vmode, vperm);
48808   target0 = gen_reg_rtx (V64QImode);
48809   emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
48810 
48811   vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
48812   vperm = force_reg (vmode, vperm);
48813   target1 = gen_reg_rtx (V64QImode);
48814   emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
48815 
48816   emit_insn (gen_iorv64qi3 (d->target, target0, target1));
48817   return true;
48818 }
48819 
48820 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
48821    with 4 vpshufb insns, 2 vpermq and 3 vpor.  We should have already failed
48822    all the shorter instruction sequences.  */
48823 
48824 static bool
48825 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
48826 {
48827   rtx rperm[4][32], vperm, l[2], h[2], op, m128;
48828   unsigned int i, nelt, eltsz;
48829   bool used[4];
48830 
48831   if (!TARGET_AVX2
48832       || d->one_operand_p
48833       || (d->vmode != V32QImode && d->vmode != V16HImode))
48834     return false;
48835 
48836   if (d->testing_p)
48837     return true;
48838 
48839   nelt = d->nelt;
48840   eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48841 
48842   /* Generate 4 permutation masks.  If the required element is within
48843      the same lane, it is shuffled in.  If the required element from the
48844      other lane, force a zero by setting bit 7 in the permutation mask.
48845      In the other mask the mask has non-negative elements if element
48846      is requested from the other lane, but also moved to the other lane,
48847      so that the result of vpshufb can have the two V2TImode halves
48848      swapped.  */
48849   m128 = GEN_INT (-128);
48850   for (i = 0; i < 32; ++i)
48851     {
48852       rperm[0][i] = m128;
48853       rperm[1][i] = m128;
48854       rperm[2][i] = m128;
48855       rperm[3][i] = m128;
48856     }
48857   used[0] = false;
48858   used[1] = false;
48859   used[2] = false;
48860   used[3] = false;
48861   for (i = 0; i < nelt; ++i)
48862     {
48863       unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48864       unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48865       unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
48866 
48867       for (j = 0; j < eltsz; ++j)
48868 	rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
48869       used[which] = true;
48870     }
48871 
48872   for (i = 0; i < 2; ++i)
48873     {
48874       if (!used[2 * i + 1])
48875 	{
48876 	  h[i] = NULL_RTX;
48877 	  continue;
48878 	}
48879       vperm = gen_rtx_CONST_VECTOR (V32QImode,
48880 				    gen_rtvec_v (32, rperm[2 * i + 1]));
48881       vperm = force_reg (V32QImode, vperm);
48882       h[i] = gen_reg_rtx (V32QImode);
48883       op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48884       emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
48885     }
48886 
48887   /* Swap the 128-byte lanes of h[X].  */
48888   for (i = 0; i < 2; ++i)
48889    {
48890      if (h[i] == NULL_RTX)
48891        continue;
48892      op = gen_reg_rtx (V4DImode);
48893      emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
48894 				     const2_rtx, GEN_INT (3), const0_rtx,
48895 				     const1_rtx));
48896      h[i] = gen_lowpart (V32QImode, op);
48897    }
48898 
48899   for (i = 0; i < 2; ++i)
48900     {
48901       if (!used[2 * i])
48902 	{
48903 	  l[i] = NULL_RTX;
48904 	  continue;
48905 	}
48906       vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
48907       vperm = force_reg (V32QImode, vperm);
48908       l[i] = gen_reg_rtx (V32QImode);
48909       op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48910       emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
48911     }
48912 
48913   for (i = 0; i < 2; ++i)
48914     {
48915       if (h[i] && l[i])
48916 	{
48917 	  op = gen_reg_rtx (V32QImode);
48918 	  emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
48919 	  l[i] = op;
48920 	}
48921       else if (h[i])
48922 	l[i] = h[i];
48923     }
48924 
48925   gcc_assert (l[0] && l[1]);
48926   op = d->target;
48927   if (d->vmode != V32QImode)
48928     op = gen_reg_rtx (V32QImode);
48929   emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
48930   if (op != d->target)
48931     emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48932   return true;
48933 }
48934 
48935 /* The guts of ix86_vectorize_vec_perm_const.  With all of the interface bits
48936    taken care of, perform the expansion in D and return true on success.  */
48937 
48938 static bool
48939 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
48940 {
48941   /* Try a single instruction expansion.  */
48942   if (expand_vec_perm_1 (d))
48943     return true;
48944 
48945   /* Try sequences of two instructions.  */
48946 
48947   if (expand_vec_perm_pshuflw_pshufhw (d))
48948     return true;
48949 
48950   if (expand_vec_perm_palignr (d, false))
48951     return true;
48952 
48953   if (expand_vec_perm_interleave2 (d))
48954     return true;
48955 
48956   if (expand_vec_perm_broadcast (d))
48957     return true;
48958 
48959   if (expand_vec_perm_vpermq_perm_1 (d))
48960     return true;
48961 
48962   if (expand_vec_perm_vperm2f128 (d))
48963     return true;
48964 
48965   if (expand_vec_perm_pblendv (d))
48966     return true;
48967 
48968   /* Try sequences of three instructions.  */
48969 
48970   if (expand_vec_perm_even_odd_pack (d))
48971     return true;
48972 
48973   if (expand_vec_perm_2vperm2f128_vshuf (d))
48974     return true;
48975 
48976   if (expand_vec_perm_pshufb2 (d))
48977     return true;
48978 
48979   if (expand_vec_perm_interleave3 (d))
48980     return true;
48981 
48982   if (expand_vec_perm_vperm2f128_vblend (d))
48983     return true;
48984 
48985   /* Try sequences of four instructions.  */
48986 
48987   if (expand_vec_perm_even_odd_trunc (d))
48988     return true;
48989   if (expand_vec_perm_vpshufb2_vpermq (d))
48990     return true;
48991 
48992   if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
48993     return true;
48994 
48995   if (expand_vec_perm_vpermt2_vpshub2 (d))
48996     return true;
48997 
48998   /* ??? Look for narrow permutations whose element orderings would
48999      allow the promotion to a wider mode.  */
49000 
49001   /* ??? Look for sequences of interleave or a wider permute that place
49002      the data into the correct lanes for a half-vector shuffle like
49003      pshuf[lh]w or vpermilps.  */
49004 
49005   /* ??? Look for sequences of interleave that produce the desired results.
49006      The combinatorics of punpck[lh] get pretty ugly... */
49007 
49008   if (expand_vec_perm_even_odd (d))
49009     return true;
49010 
49011   /* Even longer sequences.  */
49012   if (expand_vec_perm_vpshufb4_vpermq2 (d))
49013     return true;
49014 
49015   /* See if we can get the same permutation in different vector integer
49016      mode.  */
49017   struct expand_vec_perm_d nd;
49018   if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
49019     {
49020       if (!d->testing_p)
49021 	emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
49022       return true;
49023     }
49024 
49025   return false;
49026 }
49027 
49028 /* If a permutation only uses one operand, make it clear. Returns true
49029    if the permutation references both operands.  */
49030 
49031 static bool
49032 canonicalize_perm (struct expand_vec_perm_d *d)
49033 {
49034   int i, which, nelt = d->nelt;
49035 
49036   for (i = which = 0; i < nelt; ++i)
49037       which |= (d->perm[i] < nelt ? 1 : 2);
49038 
49039   d->one_operand_p = true;
49040   switch (which)
49041     {
49042     default:
49043       gcc_unreachable();
49044 
49045     case 3:
49046       if (!rtx_equal_p (d->op0, d->op1))
49047         {
49048 	  d->one_operand_p = false;
49049 	  break;
49050         }
49051       /* The elements of PERM do not suggest that only the first operand
49052 	 is used, but both operands are identical.  Allow easier matching
49053 	 of the permutation by folding the permutation into the single
49054 	 input vector.  */
49055       /* FALLTHRU */
49056 
49057     case 2:
49058       for (i = 0; i < nelt; ++i)
49059         d->perm[i] &= nelt - 1;
49060       d->op0 = d->op1;
49061       break;
49062 
49063     case 1:
49064       d->op1 = d->op0;
49065       break;
49066     }
49067 
49068   return (which == 3);
49069 }
49070 
49071 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
49072 
49073 static bool
49074 ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
49075 			       rtx op1, const vec_perm_indices &sel)
49076 {
49077   struct expand_vec_perm_d d;
49078   unsigned char perm[MAX_VECT_LEN];
49079   unsigned int i, nelt, which;
49080   bool two_args;
49081 
49082   d.target = target;
49083   d.op0 = op0;
49084   d.op1 = op1;
49085 
49086   d.vmode = vmode;
49087   gcc_assert (VECTOR_MODE_P (d.vmode));
49088   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49089   d.testing_p = !target;
49090 
49091   gcc_assert (sel.length () == nelt);
49092   gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49093 
49094   /* Given sufficient ISA support we can just return true here
49095      for selected vector modes.  */
49096   switch (d.vmode)
49097     {
49098     case E_V16SFmode:
49099     case E_V16SImode:
49100     case E_V8DImode:
49101     case E_V8DFmode:
49102       if (!TARGET_AVX512F)
49103 	return false;
49104       /* All implementable with a single vperm[it]2 insn.  */
49105       if (d.testing_p)
49106 	return true;
49107       break;
49108     case E_V32HImode:
49109       if (!TARGET_AVX512BW)
49110 	return false;
49111       if (d.testing_p)
49112 	/* All implementable with a single vperm[it]2 insn.  */
49113 	return true;
49114       break;
49115     case E_V64QImode:
49116       if (!TARGET_AVX512BW)
49117 	return false;
49118       if (d.testing_p)
49119 	/* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn.  */
49120 	return true;
49121       break;
49122     case E_V8SImode:
49123     case E_V8SFmode:
49124     case E_V4DFmode:
49125     case E_V4DImode:
49126       if (!TARGET_AVX)
49127 	return false;
49128       if (d.testing_p && TARGET_AVX512VL)
49129 	/* All implementable with a single vperm[it]2 insn.  */
49130 	return true;
49131       break;
49132     case E_V16HImode:
49133       if (!TARGET_SSE2)
49134 	return false;
49135       if (d.testing_p && TARGET_AVX2)
49136 	/* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns.  */
49137 	return true;
49138       break;
49139     case E_V32QImode:
49140       if (!TARGET_SSE2)
49141 	return false;
49142       if (d.testing_p && TARGET_AVX2)
49143 	/* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns.  */
49144 	return true;
49145       break;
49146     case E_V8HImode:
49147     case E_V16QImode:
49148       if (!TARGET_SSE2)
49149 	return false;
49150       /* Fall through.  */
49151     case E_V4SImode:
49152     case E_V4SFmode:
49153       if (!TARGET_SSE)
49154 	return false;
49155       /* All implementable with a single vpperm insn.  */
49156       if (d.testing_p && TARGET_XOP)
49157 	return true;
49158       /* All implementable with 2 pshufb + 1 ior.  */
49159       if (d.testing_p && TARGET_SSSE3)
49160 	return true;
49161       break;
49162     case E_V2DImode:
49163     case E_V2DFmode:
49164       if (!TARGET_SSE)
49165 	return false;
49166       /* All implementable with shufpd or unpck[lh]pd.  */
49167       if (d.testing_p)
49168 	return true;
49169       break;
49170     default:
49171       return false;
49172     }
49173 
49174   for (i = which = 0; i < nelt; ++i)
49175     {
49176       unsigned char e = sel[i];
49177       gcc_assert (e < 2 * nelt);
49178       d.perm[i] = e;
49179       perm[i] = e;
49180       which |= (e < nelt ? 1 : 2);
49181     }
49182 
49183   if (d.testing_p)
49184     {
49185       /* For all elements from second vector, fold the elements to first.  */
49186       if (which == 2)
49187 	for (i = 0; i < nelt; ++i)
49188 	  d.perm[i] -= nelt;
49189 
49190       /* Check whether the mask can be applied to the vector type.  */
49191       d.one_operand_p = (which != 3);
49192 
49193       /* Implementable with shufps or pshufd.  */
49194       if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49195 	return true;
49196 
49197       /* Otherwise we have to go through the motions and see if we can
49198 	 figure out how to generate the requested permutation.  */
49199       d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49200       d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49201       if (!d.one_operand_p)
49202 	d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49203 
49204       start_sequence ();
49205       bool ret = ix86_expand_vec_perm_const_1 (&d);
49206       end_sequence ();
49207 
49208       return ret;
49209     }
49210 
49211   two_args = canonicalize_perm (&d);
49212 
49213   if (ix86_expand_vec_perm_const_1 (&d))
49214     return true;
49215 
49216   /* If the selector says both arguments are needed, but the operands are the
49217      same, the above tried to expand with one_operand_p and flattened selector.
49218      If that didn't work, retry without one_operand_p; we succeeded with that
49219      during testing.  */
49220   if (two_args && d.one_operand_p)
49221     {
49222       d.one_operand_p = false;
49223       memcpy (d.perm, perm, sizeof (perm));
49224       return ix86_expand_vec_perm_const_1 (&d);
49225     }
49226 
49227   return false;
49228 }
49229 
49230 void
49231 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49232 {
49233   struct expand_vec_perm_d d;
49234   unsigned i, nelt;
49235 
49236   d.target = targ;
49237   d.op0 = op0;
49238   d.op1 = op1;
49239   d.vmode = GET_MODE (targ);
49240   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49241   d.one_operand_p = false;
49242   d.testing_p = false;
49243 
49244   for (i = 0; i < nelt; ++i)
49245     d.perm[i] = i * 2 + odd;
49246 
49247   /* We'll either be able to implement the permutation directly...  */
49248   if (expand_vec_perm_1 (&d))
49249     return;
49250 
49251   /* ... or we use the special-case patterns.  */
49252   expand_vec_perm_even_odd_1 (&d, odd);
49253 }
49254 
49255 static void
49256 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49257 {
49258   struct expand_vec_perm_d d;
49259   unsigned i, nelt, base;
49260   bool ok;
49261 
49262   d.target = targ;
49263   d.op0 = op0;
49264   d.op1 = op1;
49265   d.vmode = GET_MODE (targ);
49266   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49267   d.one_operand_p = false;
49268   d.testing_p = false;
49269 
49270   base = high_p ? nelt / 2 : 0;
49271   for (i = 0; i < nelt / 2; ++i)
49272     {
49273       d.perm[i * 2] = i + base;
49274       d.perm[i * 2 + 1] = i + base + nelt;
49275     }
49276 
49277   /* Note that for AVX this isn't one instruction.  */
49278   ok = ix86_expand_vec_perm_const_1 (&d);
49279   gcc_assert (ok);
49280 }
49281 
49282 
49283 /* Expand a vector operation CODE for a V*QImode in terms of the
49284    same operation on V*HImode.  */
49285 
49286 void
49287 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49288 {
49289   machine_mode qimode = GET_MODE (dest);
49290   machine_mode himode;
49291   rtx (*gen_il) (rtx, rtx, rtx);
49292   rtx (*gen_ih) (rtx, rtx, rtx);
49293   rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49294   struct expand_vec_perm_d d;
49295   bool ok, full_interleave;
49296   bool uns_p = false;
49297   int i;
49298 
49299   switch (qimode)
49300     {
49301     case E_V16QImode:
49302       himode = V8HImode;
49303       gen_il = gen_vec_interleave_lowv16qi;
49304       gen_ih = gen_vec_interleave_highv16qi;
49305       break;
49306     case E_V32QImode:
49307       himode = V16HImode;
49308       gen_il = gen_avx2_interleave_lowv32qi;
49309       gen_ih = gen_avx2_interleave_highv32qi;
49310       break;
49311     case E_V64QImode:
49312       himode = V32HImode;
49313       gen_il = gen_avx512bw_interleave_lowv64qi;
49314       gen_ih = gen_avx512bw_interleave_highv64qi;
49315       break;
49316     default:
49317       gcc_unreachable ();
49318     }
49319 
49320   op2_l = op2_h = op2;
49321   switch (code)
49322     {
49323     case MULT:
49324       /* Unpack data such that we've got a source byte in each low byte of
49325 	 each word.  We don't care what goes into the high byte of each word.
49326 	 Rather than trying to get zero in there, most convenient is to let
49327 	 it be a copy of the low byte.  */
49328       op2_l = gen_reg_rtx (qimode);
49329       op2_h = gen_reg_rtx (qimode);
49330       emit_insn (gen_il (op2_l, op2, op2));
49331       emit_insn (gen_ih (op2_h, op2, op2));
49332 
49333       op1_l = gen_reg_rtx (qimode);
49334       op1_h = gen_reg_rtx (qimode);
49335       emit_insn (gen_il (op1_l, op1, op1));
49336       emit_insn (gen_ih (op1_h, op1, op1));
49337       full_interleave = qimode == V16QImode;
49338       break;
49339 
49340     case ASHIFT:
49341     case LSHIFTRT:
49342       uns_p = true;
49343       /* FALLTHRU */
49344     case ASHIFTRT:
49345       op1_l = gen_reg_rtx (himode);
49346       op1_h = gen_reg_rtx (himode);
49347       ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49348       ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49349       full_interleave = true;
49350       break;
49351     default:
49352       gcc_unreachable ();
49353     }
49354 
49355   /* Perform the operation.  */
49356   res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49357 			       1, OPTAB_DIRECT);
49358   res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49359 			       1, OPTAB_DIRECT);
49360   gcc_assert (res_l && res_h);
49361 
49362   /* Merge the data back into the right place.  */
49363   d.target = dest;
49364   d.op0 = gen_lowpart (qimode, res_l);
49365   d.op1 = gen_lowpart (qimode, res_h);
49366   d.vmode = qimode;
49367   d.nelt = GET_MODE_NUNITS (qimode);
49368   d.one_operand_p = false;
49369   d.testing_p = false;
49370 
49371   if (full_interleave)
49372     {
49373       /* For SSE2, we used an full interleave, so the desired
49374 	 results are in the even elements.  */
49375       for (i = 0; i < d.nelt; ++i)
49376 	d.perm[i] = i * 2;
49377     }
49378   else
49379     {
49380       /* For AVX, the interleave used above was not cross-lane.  So the
49381 	 extraction is evens but with the second and third quarter swapped.
49382 	 Happily, that is even one insn shorter than even extraction.
49383 	 For AVX512BW we have 4 lanes.  We extract evens from within a lane,
49384 	 always first from the first and then from the second source operand,
49385 	 the index bits above the low 4 bits remains the same.
49386 	 Thus, for d.nelt == 32 we want permutation
49387 	 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
49388 	 and for d.nelt == 64 we want permutation
49389 	 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
49390 	 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126.  */
49391       for (i = 0; i < d.nelt; ++i)
49392 	d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
49393     }
49394 
49395   ok = ix86_expand_vec_perm_const_1 (&d);
49396   gcc_assert (ok);
49397 
49398   set_unique_reg_note (get_last_insn (), REG_EQUAL,
49399 		       gen_rtx_fmt_ee (code, qimode, op1, op2));
49400 }
49401 
49402 /* Helper function of ix86_expand_mul_widen_evenodd.  Return true
49403    if op is CONST_VECTOR with all odd elements equal to their
49404    preceding element.  */
49405 
49406 static bool
49407 const_vector_equal_evenodd_p (rtx op)
49408 {
49409   machine_mode mode = GET_MODE (op);
49410   int i, nunits = GET_MODE_NUNITS (mode);
49411   if (GET_CODE (op) != CONST_VECTOR
49412       || nunits != CONST_VECTOR_NUNITS (op))
49413     return false;
49414   for (i = 0; i < nunits; i += 2)
49415     if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49416       return false;
49417   return true;
49418 }
49419 
49420 void
49421 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49422 			       bool uns_p, bool odd_p)
49423 {
49424   machine_mode mode = GET_MODE (op1);
49425   machine_mode wmode = GET_MODE (dest);
49426   rtx x;
49427   rtx orig_op1 = op1, orig_op2 = op2;
49428 
49429   if (!nonimmediate_operand (op1, mode))
49430     op1 = force_reg (mode, op1);
49431   if (!nonimmediate_operand (op2, mode))
49432     op2 = force_reg (mode, op2);
49433 
49434   /* We only play even/odd games with vectors of SImode.  */
49435   gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49436 
49437   /* If we're looking for the odd results, shift those members down to
49438      the even slots.  For some cpus this is faster than a PSHUFD.  */
49439   if (odd_p)
49440     {
49441       /* For XOP use vpmacsdqh, but only for smult, as it is only
49442 	 signed.  */
49443       if (TARGET_XOP && mode == V4SImode && !uns_p)
49444 	{
49445 	  x = force_reg (wmode, CONST0_RTX (wmode));
49446 	  emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49447 	  return;
49448 	}
49449 
49450       x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49451       if (!const_vector_equal_evenodd_p (orig_op1))
49452 	op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49453 			    x, NULL, 1, OPTAB_DIRECT);
49454       if (!const_vector_equal_evenodd_p (orig_op2))
49455 	op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49456 			    x, NULL, 1, OPTAB_DIRECT);
49457       op1 = gen_lowpart (mode, op1);
49458       op2 = gen_lowpart (mode, op2);
49459     }
49460 
49461   if (mode == V16SImode)
49462     {
49463       if (uns_p)
49464 	x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49465       else
49466 	x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49467     }
49468   else if (mode == V8SImode)
49469     {
49470       if (uns_p)
49471 	x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49472       else
49473 	x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49474     }
49475   else if (uns_p)
49476     x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49477   else if (TARGET_SSE4_1)
49478     x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49479   else
49480     {
49481       rtx s1, s2, t0, t1, t2;
49482 
49483       /* The easiest way to implement this without PMULDQ is to go through
49484 	 the motions as if we are performing a full 64-bit multiply.  With
49485 	 the exception that we need to do less shuffling of the elements.  */
49486 
49487       /* Compute the sign-extension, aka highparts, of the two operands.  */
49488       s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49489 				op1, pc_rtx, pc_rtx);
49490       s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49491 				op2, pc_rtx, pc_rtx);
49492 
49493       /* Multiply LO(A) * HI(B), and vice-versa.  */
49494       t1 = gen_reg_rtx (wmode);
49495       t2 = gen_reg_rtx (wmode);
49496       emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49497       emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49498 
49499       /* Multiply LO(A) * LO(B).  */
49500       t0 = gen_reg_rtx (wmode);
49501       emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49502 
49503       /* Combine and shift the highparts into place.  */
49504       t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49505       t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49506 			 1, OPTAB_DIRECT);
49507 
49508       /* Combine high and low parts.  */
49509       force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49510       return;
49511     }
49512   emit_insn (x);
49513 }
49514 
49515 void
49516 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49517 			    bool uns_p, bool high_p)
49518 {
49519   machine_mode wmode = GET_MODE (dest);
49520   machine_mode mode = GET_MODE (op1);
49521   rtx t1, t2, t3, t4, mask;
49522 
49523   switch (mode)
49524     {
49525     case E_V4SImode:
49526       t1 = gen_reg_rtx (mode);
49527       t2 = gen_reg_rtx (mode);
49528       if (TARGET_XOP && !uns_p)
49529 	{
49530 	  /* With XOP, we have pmacsdqh, aka mul_widen_odd.  In this case,
49531 	     shuffle the elements once so that all elements are in the right
49532 	     place for immediate use: { A C B D }.  */
49533 	  emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49534 					const1_rtx, GEN_INT (3)));
49535 	  emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49536 					const1_rtx, GEN_INT (3)));
49537 	}
49538       else
49539 	{
49540 	  /* Put the elements into place for the multiply.  */
49541 	  ix86_expand_vec_interleave (t1, op1, op1, high_p);
49542 	  ix86_expand_vec_interleave (t2, op2, op2, high_p);
49543 	  high_p = false;
49544 	}
49545       ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49546       break;
49547 
49548     case E_V8SImode:
49549       /* Shuffle the elements between the lanes.  After this we
49550 	 have { A B E F | C D G H } for each operand.  */
49551       t1 = gen_reg_rtx (V4DImode);
49552       t2 = gen_reg_rtx (V4DImode);
49553       emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49554 				      const0_rtx, const2_rtx,
49555 				      const1_rtx, GEN_INT (3)));
49556       emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49557 				      const0_rtx, const2_rtx,
49558 				      const1_rtx, GEN_INT (3)));
49559 
49560       /* Shuffle the elements within the lanes.  After this we
49561 	 have { A A B B | C C D D } or { E E F F | G G H H }.  */
49562       t3 = gen_reg_rtx (V8SImode);
49563       t4 = gen_reg_rtx (V8SImode);
49564       mask = GEN_INT (high_p
49565 		      ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49566 		      : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49567       emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49568       emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49569 
49570       ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49571       break;
49572 
49573     case E_V8HImode:
49574     case E_V16HImode:
49575       t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49576 			 uns_p, OPTAB_DIRECT);
49577       t2 = expand_binop (mode,
49578 			 uns_p ? umul_highpart_optab : smul_highpart_optab,
49579 			 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49580       gcc_assert (t1 && t2);
49581 
49582       t3 = gen_reg_rtx (mode);
49583       ix86_expand_vec_interleave (t3, t1, t2, high_p);
49584       emit_move_insn (dest, gen_lowpart (wmode, t3));
49585       break;
49586 
49587     case E_V16QImode:
49588     case E_V32QImode:
49589     case E_V32HImode:
49590     case E_V16SImode:
49591     case E_V64QImode:
49592       t1 = gen_reg_rtx (wmode);
49593       t2 = gen_reg_rtx (wmode);
49594       ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49595       ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49596 
49597       emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
49598       break;
49599 
49600     default:
49601       gcc_unreachable ();
49602     }
49603 }
49604 
49605 void
49606 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49607 {
49608   rtx res_1, res_2, res_3, res_4;
49609 
49610   res_1 = gen_reg_rtx (V4SImode);
49611   res_2 = gen_reg_rtx (V4SImode);
49612   res_3 = gen_reg_rtx (V2DImode);
49613   res_4 = gen_reg_rtx (V2DImode);
49614   ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49615   ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49616 
49617   /* Move the results in element 2 down to element 1; we don't care
49618      what goes in elements 2 and 3.  Then we can merge the parts
49619      back together with an interleave.
49620 
49621      Note that two other sequences were tried:
49622      (1) Use interleaves at the start instead of psrldq, which allows
49623      us to use a single shufps to merge things back at the end.
49624      (2) Use shufps here to combine the two vectors, then pshufd to
49625      put the elements in the correct order.
49626      In both cases the cost of the reformatting stall was too high
49627      and the overall sequence slower.  */
49628 
49629   emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49630 				const0_rtx, const2_rtx,
49631 				const0_rtx, const0_rtx));
49632   emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49633 				const0_rtx, const2_rtx,
49634 				const0_rtx, const0_rtx));
49635   res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49636 
49637   set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49638 }
49639 
49640 void
49641 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49642 {
49643   machine_mode mode = GET_MODE (op0);
49644   rtx t1, t2, t3, t4, t5, t6;
49645 
49646   if (TARGET_AVX512DQ && mode == V8DImode)
49647     emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49648   else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49649     emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49650   else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49651     emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49652   else if (TARGET_XOP && mode == V2DImode)
49653     {
49654       /* op1: A,B,C,D, op2: E,F,G,H */
49655       op1 = gen_lowpart (V4SImode, op1);
49656       op2 = gen_lowpart (V4SImode, op2);
49657 
49658       t1 = gen_reg_rtx (V4SImode);
49659       t2 = gen_reg_rtx (V4SImode);
49660       t3 = gen_reg_rtx (V2DImode);
49661       t4 = gen_reg_rtx (V2DImode);
49662 
49663       /* t1: B,A,D,C */
49664       emit_insn (gen_sse2_pshufd_1 (t1, op1,
49665 				    GEN_INT (1),
49666 				    GEN_INT (0),
49667 				    GEN_INT (3),
49668 				    GEN_INT (2)));
49669 
49670       /* t2: (B*E),(A*F),(D*G),(C*H) */
49671       emit_insn (gen_mulv4si3 (t2, t1, op2));
49672 
49673       /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49674       emit_insn (gen_xop_phadddq (t3, t2));
49675 
49676       /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49677       emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49678 
49679       /* Multiply lower parts and add all */
49680       t5 = gen_reg_rtx (V2DImode);
49681       emit_insn (gen_vec_widen_umult_even_v4si (t5,
49682 					gen_lowpart (V4SImode, op1),
49683 					gen_lowpart (V4SImode, op2)));
49684       op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49685 
49686     }
49687   else
49688     {
49689       machine_mode nmode;
49690       rtx (*umul) (rtx, rtx, rtx);
49691 
49692       if (mode == V2DImode)
49693 	{
49694 	  umul = gen_vec_widen_umult_even_v4si;
49695 	  nmode = V4SImode;
49696 	}
49697       else if (mode == V4DImode)
49698 	{
49699 	  umul = gen_vec_widen_umult_even_v8si;
49700 	  nmode = V8SImode;
49701 	}
49702       else if (mode == V8DImode)
49703 	{
49704 	  umul = gen_vec_widen_umult_even_v16si;
49705 	  nmode = V16SImode;
49706 	}
49707       else
49708 	gcc_unreachable ();
49709 
49710 
49711       /* Multiply low parts.  */
49712       t1 = gen_reg_rtx (mode);
49713       emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49714 
49715       /* Shift input vectors right 32 bits so we can multiply high parts.  */
49716       t6 = GEN_INT (32);
49717       t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49718       t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49719 
49720       /* Multiply high parts by low parts.  */
49721       t4 = gen_reg_rtx (mode);
49722       t5 = gen_reg_rtx (mode);
49723       emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49724       emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49725 
49726       /* Combine and shift the highparts back.  */
49727       t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49728       t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49729 
49730       /* Combine high and low parts.  */
49731       force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49732     }
49733 
49734   set_unique_reg_note (get_last_insn (), REG_EQUAL,
49735 		       gen_rtx_MULT (mode, op1, op2));
49736 }
49737 
49738 /* Return 1 if control tansfer instruction INSN
49739    should be encoded with bnd prefix.
49740    If insn is NULL then return 1 when control
49741    transfer instructions should be prefixed with
49742    bnd by default for current function.  */
49743 
49744 bool
49745 ix86_bnd_prefixed_insn_p (rtx insn)
49746 {
49747   /* For call insns check special flag.  */
49748   if (insn && CALL_P (insn))
49749     {
49750       rtx call = get_call_rtx_from (insn);
49751       if (call)
49752 	return CALL_EXPR_WITH_BOUNDS_P (call);
49753     }
49754 
49755   /* All other insns are prefixed only if function is instrumented.  */
49756   return chkp_function_instrumented_p (current_function_decl);
49757 }
49758 
49759 /* Return 1 if control tansfer instruction INSN
49760    should be encoded with notrack prefix.  */
49761 
49762 static bool
49763 ix86_notrack_prefixed_insn_p (rtx insn)
49764 {
49765   if (!insn || !((flag_cf_protection & CF_BRANCH)))
49766     return false;
49767 
49768   if (CALL_P (insn))
49769     {
49770       rtx call = get_call_rtx_from (insn);
49771       gcc_assert (call != NULL_RTX);
49772       rtx addr = XEXP (call, 0);
49773 
49774       /* Do not emit 'notrack' if it's not an indirect call.  */
49775       if (MEM_P (addr)
49776 	  && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
49777 	return false;
49778       else
49779 	return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0);
49780     }
49781 
49782   if (JUMP_P (insn) && !flag_cet_switch)
49783     {
49784       rtx target = JUMP_LABEL (insn);
49785       if (target == NULL_RTX || ANY_RETURN_P (target))
49786 	return false;
49787 
49788       /* Check the jump is a switch table.  */
49789       rtx_insn *label = as_a<rtx_insn *> (target);
49790       rtx_insn *table = next_insn (label);
49791       if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
49792 	return false;
49793       else
49794 	return true;
49795     }
49796   return false;
49797 }
49798 
49799 /* Calculate integer abs() using only SSE2 instructions.  */
49800 
49801 void
49802 ix86_expand_sse2_abs (rtx target, rtx input)
49803 {
49804   machine_mode mode = GET_MODE (target);
49805   rtx tmp0, tmp1, x;
49806 
49807   switch (mode)
49808     {
49809       /* For 32-bit signed integer X, the best way to calculate the absolute
49810 	 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
49811       case E_V4SImode:
49812 	tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49813 				    GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
49814 				    NULL, 0, OPTAB_DIRECT);
49815 	tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49816 				    NULL, 0, OPTAB_DIRECT);
49817 	x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49818 				 target, 0, OPTAB_DIRECT);
49819 	break;
49820 
49821       /* For 16-bit signed integer X, the best way to calculate the absolute
49822 	 value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
49823       case E_V8HImode:
49824 	tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49825 
49826 	x = expand_simple_binop (mode, SMAX, tmp0, input,
49827 				 target, 0, OPTAB_DIRECT);
49828 	break;
49829 
49830       /* For 8-bit signed integer X, the best way to calculate the absolute
49831 	 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49832 	 as SSE2 provides the PMINUB insn.  */
49833       case E_V16QImode:
49834 	tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49835 
49836 	x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49837 				 target, 0, OPTAB_DIRECT);
49838 	break;
49839 
49840       default:
49841 	gcc_unreachable ();
49842     }
49843 
49844   if (x != target)
49845     emit_move_insn (target, x);
49846 }
49847 
49848 /* Expand an extract from a vector register through pextr insn.
49849    Return true if successful.  */
49850 
49851 bool
49852 ix86_expand_pextr (rtx *operands)
49853 {
49854   rtx dst = operands[0];
49855   rtx src = operands[1];
49856 
49857   unsigned int size = INTVAL (operands[2]);
49858   unsigned int pos = INTVAL (operands[3]);
49859 
49860   if (SUBREG_P (dst))
49861     {
49862       /* Reject non-lowpart subregs.  */
49863       if (SUBREG_BYTE (dst) > 0)
49864 	return false;
49865       dst = SUBREG_REG (dst);
49866     }
49867 
49868   if (SUBREG_P (src))
49869     {
49870       pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
49871       src = SUBREG_REG (src);
49872     }
49873 
49874   switch (GET_MODE (src))
49875     {
49876     case E_V16QImode:
49877     case E_V8HImode:
49878     case E_V4SImode:
49879     case E_V2DImode:
49880     case E_V1TImode:
49881     case E_TImode:
49882       {
49883 	machine_mode srcmode, dstmode;
49884 	rtx d, pat;
49885 
49886 	if (!int_mode_for_size (size, 0).exists (&dstmode))
49887 	  return false;
49888 
49889 	switch (dstmode)
49890 	  {
49891 	  case E_QImode:
49892 	    if (!TARGET_SSE4_1)
49893 	      return false;
49894 	    srcmode = V16QImode;
49895 	    break;
49896 
49897 	  case E_HImode:
49898 	    if (!TARGET_SSE2)
49899 	      return false;
49900 	    srcmode = V8HImode;
49901 	    break;
49902 
49903 	  case E_SImode:
49904 	    if (!TARGET_SSE4_1)
49905 	      return false;
49906 	    srcmode = V4SImode;
49907 	    break;
49908 
49909 	  case E_DImode:
49910 	    gcc_assert (TARGET_64BIT);
49911 	    if (!TARGET_SSE4_1)
49912 	      return false;
49913 	    srcmode = V2DImode;
49914 	    break;
49915 
49916 	  default:
49917 	    return false;
49918 	  }
49919 
49920 	/* Reject extractions from misaligned positions.  */
49921 	if (pos & (size-1))
49922 	  return false;
49923 
49924 	if (GET_MODE (dst) == dstmode)
49925 	  d = dst;
49926 	else
49927 	  d = gen_reg_rtx (dstmode);
49928 
49929 	/* Construct insn pattern.  */
49930 	pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
49931 	pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
49932 
49933 	/* Let the rtl optimizers know about the zero extension performed.  */
49934 	if (dstmode == QImode || dstmode == HImode)
49935 	  {
49936 	    pat = gen_rtx_ZERO_EXTEND (SImode, pat);
49937 	    d = gen_lowpart (SImode, d);
49938 	  }
49939 
49940 	emit_insn (gen_rtx_SET (d, pat));
49941 
49942 	if (d != dst)
49943 	  emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49944 	return true;
49945       }
49946 
49947     default:
49948       return false;
49949     }
49950 }
49951 
49952 /* Expand an insert into a vector register through pinsr insn.
49953    Return true if successful.  */
49954 
49955 bool
49956 ix86_expand_pinsr (rtx *operands)
49957 {
49958   rtx dst = operands[0];
49959   rtx src = operands[3];
49960 
49961   unsigned int size = INTVAL (operands[1]);
49962   unsigned int pos = INTVAL (operands[2]);
49963 
49964   if (SUBREG_P (dst))
49965     {
49966       pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
49967       dst = SUBREG_REG (dst);
49968     }
49969 
49970   switch (GET_MODE (dst))
49971     {
49972     case E_V16QImode:
49973     case E_V8HImode:
49974     case E_V4SImode:
49975     case E_V2DImode:
49976     case E_V1TImode:
49977     case E_TImode:
49978       {
49979 	machine_mode srcmode, dstmode;
49980 	rtx (*pinsr)(rtx, rtx, rtx, rtx);
49981 	rtx d;
49982 
49983 	if (!int_mode_for_size (size, 0).exists (&srcmode))
49984 	  return false;
49985 
49986 	switch (srcmode)
49987 	  {
49988 	  case E_QImode:
49989 	    if (!TARGET_SSE4_1)
49990 	      return false;
49991 	    dstmode = V16QImode;
49992 	    pinsr = gen_sse4_1_pinsrb;
49993 	    break;
49994 
49995 	  case E_HImode:
49996 	    if (!TARGET_SSE2)
49997 	      return false;
49998 	    dstmode = V8HImode;
49999 	    pinsr = gen_sse2_pinsrw;
50000 	    break;
50001 
50002 	  case E_SImode:
50003 	    if (!TARGET_SSE4_1)
50004 	      return false;
50005 	    dstmode = V4SImode;
50006 	    pinsr = gen_sse4_1_pinsrd;
50007 	    break;
50008 
50009 	  case E_DImode:
50010 	    gcc_assert (TARGET_64BIT);
50011 	    if (!TARGET_SSE4_1)
50012 	      return false;
50013 	    dstmode = V2DImode;
50014 	    pinsr = gen_sse4_1_pinsrq;
50015 	    break;
50016 
50017 	  default:
50018 	    return false;
50019 	  }
50020 
50021 	/* Reject insertions to misaligned positions.  */
50022 	if (pos & (size-1))
50023 	  return false;
50024 
50025 	if (SUBREG_P (src))
50026 	  {
50027 	    unsigned int srcpos = SUBREG_BYTE (src);
50028 
50029 	    if (srcpos > 0)
50030 	      {
50031 		rtx extr_ops[4];
50032 
50033 		extr_ops[0] = gen_reg_rtx (srcmode);
50034 		extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
50035 		extr_ops[2] = GEN_INT (size);
50036 		extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
50037 
50038 		if (!ix86_expand_pextr (extr_ops))
50039 		  return false;
50040 
50041 		src = extr_ops[0];
50042 	      }
50043 	    else
50044 	      src = gen_lowpart (srcmode, SUBREG_REG (src));
50045 	  }
50046 
50047 	if (GET_MODE (dst) == dstmode)
50048 	  d = dst;
50049 	else
50050 	  d = gen_reg_rtx (dstmode);
50051 
50052 	emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
50053 			  gen_lowpart (srcmode, src),
50054 			  GEN_INT (1 << (pos / size))));
50055 	if (d != dst)
50056 	  emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50057 	return true;
50058       }
50059 
50060     default:
50061       return false;
50062     }
50063 }
50064 
50065 /* This function returns the calling abi specific va_list type node.
50066    It returns  the FNDECL specific va_list type.  */
50067 
50068 static tree
50069 ix86_fn_abi_va_list (tree fndecl)
50070 {
50071   if (!TARGET_64BIT)
50072     return va_list_type_node;
50073   gcc_assert (fndecl != NULL_TREE);
50074 
50075   if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50076     return ms_va_list_type_node;
50077   else
50078     return sysv_va_list_type_node;
50079 }
50080 
50081 /* Returns the canonical va_list type specified by TYPE. If there
50082    is no valid TYPE provided, it return NULL_TREE.  */
50083 
50084 static tree
50085 ix86_canonical_va_list_type (tree type)
50086 {
50087   if (TARGET_64BIT)
50088     {
50089       if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
50090 	return ms_va_list_type_node;
50091 
50092       if ((TREE_CODE (type) == ARRAY_TYPE
50093 	   && integer_zerop (array_type_nelts (type)))
50094 	  || POINTER_TYPE_P (type))
50095 	{
50096 	  tree elem_type = TREE_TYPE (type);
50097 	  if (TREE_CODE (elem_type) == RECORD_TYPE
50098 	      && lookup_attribute ("sysv_abi va_list",
50099 				   TYPE_ATTRIBUTES (elem_type)))
50100 	    return sysv_va_list_type_node;
50101 	}
50102 
50103       return NULL_TREE;
50104     }
50105 
50106   return std_canonical_va_list_type (type);
50107 }
50108 
50109 /* Iterate through the target-specific builtin types for va_list.
50110    IDX denotes the iterator, *PTREE is set to the result type of
50111    the va_list builtin, and *PNAME to its internal type.
50112    Returns zero if there is no element for this index, otherwise
50113    IDX should be increased upon the next call.
50114    Note, do not iterate a base builtin's name like __builtin_va_list.
50115    Used from c_common_nodes_and_builtins.  */
50116 
50117 static int
50118 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50119 {
50120   if (TARGET_64BIT)
50121     {
50122       switch (idx)
50123 	{
50124 	default:
50125 	  break;
50126 
50127 	case 0:
50128 	  *ptree = ms_va_list_type_node;
50129 	  *pname = "__builtin_ms_va_list";
50130 	  return 1;
50131 
50132 	case 1:
50133 	  *ptree = sysv_va_list_type_node;
50134 	  *pname = "__builtin_sysv_va_list";
50135 	  return 1;
50136 	}
50137     }
50138 
50139   return 0;
50140 }
50141 
50142 #undef TARGET_SCHED_DISPATCH
50143 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
50144 #undef TARGET_SCHED_DISPATCH_DO
50145 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
50146 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50147 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50148 #undef TARGET_SCHED_REORDER
50149 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
50150 #undef TARGET_SCHED_ADJUST_PRIORITY
50151 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50152 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50153 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50154   ix86_dependencies_evaluation_hook
50155 
50156 
50157 /* Implementation of reassociation_width target hook used by
50158    reassoc phase to identify parallelism level in reassociated
50159    tree.  Statements tree_code is passed in OPC.  Arguments type
50160    is passed in MODE.  */
50161 
50162 static int
50163 ix86_reassociation_width (unsigned int op, machine_mode mode)
50164 {
50165   int width = 1;
50166   /* Vector part.  */
50167   if (VECTOR_MODE_P (mode))
50168     {
50169       int div = 1;
50170       if (INTEGRAL_MODE_P (mode))
50171 	width = ix86_cost->reassoc_vec_int;
50172       else if (FLOAT_MODE_P (mode))
50173 	width = ix86_cost->reassoc_vec_fp;
50174 
50175       if (width == 1)
50176 	return 1;
50177 
50178       /* Integer vector instructions execute in FP unit
50179 	 and can execute 3 additions and one multiplication per cycle.  */
50180       if (ix86_tune == PROCESSOR_ZNVER1 && INTEGRAL_MODE_P (mode)
50181 	  && op != PLUS && op != MINUS)
50182 	return 1;
50183 
50184       /* Account for targets that splits wide vectors into multiple parts.  */
50185       if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (mode) > 128)
50186 	div = GET_MODE_BITSIZE (mode) / 128;
50187       else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
50188 	div = GET_MODE_BITSIZE (mode) / 64;
50189       width = (width + div - 1) / div;
50190     }
50191   /* Scalar part.  */
50192   else if (INTEGRAL_MODE_P (mode))
50193     width = ix86_cost->reassoc_int;
50194   else if (FLOAT_MODE_P (mode))
50195     width = ix86_cost->reassoc_fp;
50196 
50197   /* Avoid using too many registers in 32bit mode.  */
50198   if (!TARGET_64BIT && width > 2)
50199     width = 2;
50200   return width;
50201 }
50202 
50203 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
50204    place emms and femms instructions.  */
50205 
50206 static machine_mode
50207 ix86_preferred_simd_mode (scalar_mode mode)
50208 {
50209   if (!TARGET_SSE)
50210     return word_mode;
50211 
50212   switch (mode)
50213     {
50214     case E_QImode:
50215       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
50216 	return V64QImode;
50217       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50218 	return V32QImode;
50219       else
50220 	return V16QImode;
50221 
50222     case E_HImode:
50223       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
50224 	return V32HImode;
50225       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50226 	return V16HImode;
50227       else
50228 	return V8HImode;
50229 
50230     case E_SImode:
50231       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50232 	return V16SImode;
50233       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50234 	return V8SImode;
50235       else
50236 	return V4SImode;
50237 
50238     case E_DImode:
50239       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50240 	return V8DImode;
50241       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50242 	return V4DImode;
50243       else
50244 	return V2DImode;
50245 
50246     case E_SFmode:
50247       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50248 	return V16SFmode;
50249       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50250 	return V8SFmode;
50251       else
50252 	return V4SFmode;
50253 
50254     case E_DFmode:
50255       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50256 	return V8DFmode;
50257       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50258 	return V4DFmode;
50259       else if (TARGET_SSE2)
50260 	return V2DFmode;
50261       /* FALLTHRU */
50262 
50263     default:
50264       return word_mode;
50265     }
50266 }
50267 
50268 /* All CPUs prefer to avoid cross-lane operations so perform reductions
50269    upper against lower halves up to SSE reg size.  */
50270 
50271 static machine_mode
50272 ix86_split_reduction (machine_mode mode)
50273 {
50274   /* Reduce lowpart against highpart until we reach SSE reg width to
50275      avoid cross-lane operations.  */
50276   switch (mode)
50277     {
50278     case E_V8DImode:
50279     case E_V4DImode:
50280       return V2DImode;
50281     case E_V16SImode:
50282     case E_V8SImode:
50283       return V4SImode;
50284     case E_V32HImode:
50285     case E_V16HImode:
50286       return V8HImode;
50287     case E_V64QImode:
50288     case E_V32QImode:
50289       return V16QImode;
50290     case E_V16SFmode:
50291     case E_V8SFmode:
50292       return V4SFmode;
50293     case E_V8DFmode:
50294     case E_V4DFmode:
50295       return V2DFmode;
50296     default:
50297       return mode;
50298     }
50299 }
50300 
50301 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
50302    vectors.  If AVX512F is enabled then try vectorizing with 512bit,
50303    256bit and 128bit vectors.  */
50304 
50305 static void
50306 ix86_autovectorize_vector_sizes (vector_sizes *sizes)
50307 {
50308   if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
50309     {
50310       sizes->safe_push (64);
50311       sizes->safe_push (32);
50312       sizes->safe_push (16);
50313     }
50314   else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50315     {
50316       sizes->safe_push (32);
50317       sizes->safe_push (16);
50318     }
50319 }
50320 
50321 /* Implemenation of targetm.vectorize.get_mask_mode.  */
50322 
50323 static opt_machine_mode
50324 ix86_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
50325 {
50326   unsigned elem_size = vector_size / nunits;
50327 
50328   /* Scalar mask case.  */
50329   if ((TARGET_AVX512F && vector_size == 64)
50330       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
50331     {
50332       if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
50333 	return smallest_int_mode_for_size (nunits);
50334     }
50335 
50336   scalar_int_mode elem_mode
50337     = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
50338 
50339   gcc_assert (elem_size * nunits == vector_size);
50340 
50341   return mode_for_vector (elem_mode, nunits);
50342 }
50343 
50344 
50345 
50346 /* Return class of registers which could be used for pseudo of MODE
50347    and of class RCLASS for spilling instead of memory.  Return NO_REGS
50348    if it is not possible or non-profitable.  */
50349 
50350 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
50351 
50352 static reg_class_t
50353 ix86_spill_class (reg_class_t rclass, machine_mode mode)
50354 {
50355   if (0 && TARGET_GENERAL_REGS_SSE_SPILL
50356       && TARGET_SSE2
50357       && TARGET_INTER_UNIT_MOVES_TO_VEC
50358       && TARGET_INTER_UNIT_MOVES_FROM_VEC
50359       && (mode == SImode || (TARGET_64BIT && mode == DImode))
50360       && INTEGER_CLASS_P (rclass))
50361     return ALL_SSE_REGS;
50362   return NO_REGS;
50363 }
50364 
50365 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  Like the default implementation,
50366    but returns a lower bound.  */
50367 
50368 static unsigned int
50369 ix86_max_noce_ifcvt_seq_cost (edge e)
50370 {
50371   bool predictable_p = predictable_edge_p (e);
50372 
50373   enum compiler_param param
50374     = (predictable_p
50375        ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
50376        : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST);
50377 
50378   /* If we have a parameter set, use that, otherwise take a guess using
50379      BRANCH_COST.  */
50380   if (global_options_set.x_param_values[param])
50381     return PARAM_VALUE (param);
50382   else
50383     return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
50384 }
50385 
50386 /* Return true if SEQ is a good candidate as a replacement for the
50387    if-convertible sequence described in IF_INFO.  */
50388 
50389 static bool
50390 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
50391 {
50392   if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
50393     {
50394       int cmov_cnt = 0;
50395       /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
50396 	 Maybe we should allow even more conditional moves as long as they
50397 	 are used far enough not to stall the CPU, or also consider
50398 	 IF_INFO->TEST_BB succ edge probabilities.  */
50399       for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
50400 	{
50401 	  rtx set = single_set (insn);
50402 	  if (!set)
50403 	    continue;
50404 	  if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
50405 	    continue;
50406 	  rtx src = SET_SRC (set);
50407 	  machine_mode mode = GET_MODE (src);
50408 	  if (GET_MODE_CLASS (mode) != MODE_INT
50409 	      && GET_MODE_CLASS (mode) != MODE_FLOAT)
50410 	    continue;
50411 	  if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
50412 	      || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
50413 	    continue;
50414 	  /* insn is CMOV or FCMOV.  */
50415 	  if (++cmov_cnt > 1)
50416 	    return false;
50417 	}
50418     }
50419   return default_noce_conversion_profitable_p (seq, if_info);
50420 }
50421 
50422 /* Implement targetm.vectorize.init_cost.  */
50423 
50424 static void *
50425 ix86_init_cost (struct loop *)
50426 {
50427   unsigned *cost = XNEWVEC (unsigned, 3);
50428   cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
50429   return cost;
50430 }
50431 
50432 /* Implement targetm.vectorize.add_stmt_cost.  */
50433 
50434 static unsigned
50435 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
50436 		    struct _stmt_vec_info *stmt_info, int misalign,
50437 		    enum vect_cost_model_location where)
50438 {
50439   unsigned *cost = (unsigned *) data;
50440   unsigned retval = 0;
50441 
50442   tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
50443   int stmt_cost = - 1;
50444 
50445   if ((kind == vector_stmt || kind == scalar_stmt)
50446       && stmt_info
50447       && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
50448     {
50449       tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
50450       bool fp = false;
50451       machine_mode mode = TImode;
50452 
50453       if (vectype != NULL)
50454 	{
50455 	  fp = FLOAT_TYPE_P (vectype);
50456 	  mode = TYPE_MODE (vectype);
50457 	}
50458       /*machine_mode inner_mode = mode;
50459       if (VECTOR_MODE_P (mode))
50460 	inner_mode = GET_MODE_INNER (mode);*/
50461 
50462       switch (subcode)
50463 	{
50464 	case PLUS_EXPR:
50465 	case POINTER_PLUS_EXPR:
50466 	case MINUS_EXPR:
50467 	  if (kind == scalar_stmt)
50468 	    {
50469 	      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50470 		stmt_cost = ix86_cost->addss;
50471 	      else if (X87_FLOAT_MODE_P (mode))
50472 		stmt_cost = ix86_cost->fadd;
50473 	      else
50474 	        stmt_cost = ix86_cost->add;
50475 	    }
50476 	  else
50477 	    stmt_cost = ix86_vec_cost (mode,
50478 				       fp ? ix86_cost->addss
50479 				       : ix86_cost->sse_op,
50480 				       true);
50481 	  break;
50482 
50483 	case MULT_EXPR:
50484 	case WIDEN_MULT_EXPR:
50485 	case MULT_HIGHPART_EXPR:
50486 	  stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
50487 	  break;
50488 	case FMA_EXPR:
50489           stmt_cost = ix86_vec_cost (mode,
50490 				     mode == SFmode ? ix86_cost->fmass
50491 				     : ix86_cost->fmasd,
50492 				     true);
50493 	  break;
50494 	case NEGATE_EXPR:
50495 	  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50496 	    stmt_cost = ix86_cost->sse_op;
50497 	  else if (X87_FLOAT_MODE_P (mode))
50498 	    stmt_cost = ix86_cost->fchs;
50499 	  else if (VECTOR_MODE_P (mode))
50500 	    stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op, true);
50501 	  else
50502 	    stmt_cost = ix86_cost->add;
50503 	  break;
50504 	case TRUNC_DIV_EXPR:
50505 	case CEIL_DIV_EXPR:
50506 	case FLOOR_DIV_EXPR:
50507 	case ROUND_DIV_EXPR:
50508 	case TRUNC_MOD_EXPR:
50509 	case CEIL_MOD_EXPR:
50510 	case FLOOR_MOD_EXPR:
50511 	case RDIV_EXPR:
50512 	case ROUND_MOD_EXPR:
50513 	case EXACT_DIV_EXPR:
50514 	  stmt_cost = ix86_division_cost (ix86_cost, mode);
50515 	  break;
50516 
50517 	case RSHIFT_EXPR:
50518 	case LSHIFT_EXPR:
50519 	case LROTATE_EXPR:
50520 	case RROTATE_EXPR:
50521 	  {
50522 	    tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
50523 	    stmt_cost = ix86_shift_rotate_cost
50524 			   (ix86_cost, mode,
50525 		            TREE_CODE (op2) == INTEGER_CST,
50526 			    cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
50527 		            true, false, false, NULL, NULL);
50528 	  }
50529 	  break;
50530 	case NOP_EXPR:
50531 	  /* Only sign-conversions are free.  */
50532 	  if (tree_nop_conversion_p
50533 	        (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
50534 		 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
50535 	    stmt_cost = 0;
50536 	  break;
50537 
50538 	case BIT_IOR_EXPR:
50539 	case ABS_EXPR:
50540 	case MIN_EXPR:
50541 	case MAX_EXPR:
50542 	case BIT_XOR_EXPR:
50543 	case BIT_AND_EXPR:
50544 	case BIT_NOT_EXPR:
50545 	  if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
50546 	    stmt_cost = ix86_cost->sse_op;
50547 	  else if (VECTOR_MODE_P (mode))
50548 	    stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op, true);
50549 	  else
50550 	    stmt_cost = ix86_cost->add;
50551 	  break;
50552 	default:
50553 	  break;
50554 	}
50555     }
50556   /* If we do elementwise loads into a vector then we are bound by
50557      latency and execution resources for the many scalar loads
50558      (AGU and load ports).  Try to account for this by scaling the
50559      construction cost by the number of elements involved.  */
50560   if (kind == vec_construct
50561       && stmt_info
50562       && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
50563       && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
50564       && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
50565     {
50566       stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
50567       stmt_cost *= TYPE_VECTOR_SUBPARTS (vectype);
50568     }
50569   if (stmt_cost == -1)
50570     stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
50571 
50572   /* Penalize DFmode vector operations for Bonnell.  */
50573   if (TARGET_BONNELL && kind == vector_stmt
50574       && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
50575     stmt_cost *= 5;  /* FIXME: The value here is arbitrary.  */
50576 
50577   /* Statements in an inner loop relative to the loop being
50578      vectorized are weighted more heavily.  The value here is
50579      arbitrary and could potentially be improved with analysis.  */
50580   if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
50581     count *= 50;  /* FIXME.  */
50582 
50583   retval = (unsigned) (count * stmt_cost);
50584 
50585   /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
50586      for Silvermont as it has out of order integer pipeline and can execute
50587      2 scalar instruction per tick, but has in order SIMD pipeline.  */
50588   if ((TARGET_SILVERMONT || TARGET_INTEL)
50589       && stmt_info && stmt_info->stmt)
50590     {
50591       tree lhs_op = gimple_get_lhs (stmt_info->stmt);
50592       if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
50593 	retval = (retval * 17) / 10;
50594     }
50595 
50596   cost[where] += retval;
50597 
50598   return retval;
50599 }
50600 
50601 /* Implement targetm.vectorize.finish_cost.  */
50602 
50603 static void
50604 ix86_finish_cost (void *data, unsigned *prologue_cost,
50605 		  unsigned *body_cost, unsigned *epilogue_cost)
50606 {
50607   unsigned *cost = (unsigned *) data;
50608   *prologue_cost = cost[vect_prologue];
50609   *body_cost     = cost[vect_body];
50610   *epilogue_cost = cost[vect_epilogue];
50611 }
50612 
50613 /* Implement targetm.vectorize.destroy_cost_data.  */
50614 
50615 static void
50616 ix86_destroy_cost_data (void *data)
50617 {
50618   free (data);
50619 }
50620 
50621 /* Validate target specific memory model bits in VAL. */
50622 
50623 static unsigned HOST_WIDE_INT
50624 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
50625 {
50626   enum memmodel model = memmodel_from_int (val);
50627   bool strong;
50628 
50629   if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
50630 				      |MEMMODEL_MASK)
50631       || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
50632     {
50633       warning (OPT_Winvalid_memory_model,
50634 	       "unknown architecture specific memory model");
50635       return MEMMODEL_SEQ_CST;
50636     }
50637   strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
50638   if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
50639     {
50640       warning (OPT_Winvalid_memory_model,
50641               "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
50642       return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
50643     }
50644   if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
50645     {
50646       warning (OPT_Winvalid_memory_model,
50647               "HLE_RELEASE not used with RELEASE or stronger memory model");
50648       return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
50649     }
50650   return val;
50651 }
50652 
50653 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
50654    CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
50655    CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
50656    or number of vecsize_mangle variants that should be emitted.  */
50657 
50658 static int
50659 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
50660 					     struct cgraph_simd_clone *clonei,
50661 					     tree base_type, int num)
50662 {
50663   int ret = 1;
50664 
50665   if (clonei->simdlen
50666       && (clonei->simdlen < 2
50667 	  || clonei->simdlen > 1024
50668 	  || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
50669     {
50670       warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50671 		  "unsupported simdlen %d", clonei->simdlen);
50672       return 0;
50673     }
50674 
50675   tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
50676   if (TREE_CODE (ret_type) != VOID_TYPE)
50677     switch (TYPE_MODE (ret_type))
50678       {
50679       case E_QImode:
50680       case E_HImode:
50681       case E_SImode:
50682       case E_DImode:
50683       case E_SFmode:
50684       case E_DFmode:
50685       /* case E_SCmode: */
50686       /* case E_DCmode: */
50687 	break;
50688       default:
50689 	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50690 		    "unsupported return type %qT for simd", ret_type);
50691 	return 0;
50692       }
50693 
50694   tree t;
50695   int i;
50696 
50697   for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
50698     /* FIXME: Shouldn't we allow such arguments if they are uniform?  */
50699     switch (TYPE_MODE (TREE_TYPE (t)))
50700       {
50701       case E_QImode:
50702       case E_HImode:
50703       case E_SImode:
50704       case E_DImode:
50705       case E_SFmode:
50706       case E_DFmode:
50707       /* case E_SCmode: */
50708       /* case E_DCmode: */
50709 	break;
50710       default:
50711 	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50712 		    "unsupported argument type %qT for simd", TREE_TYPE (t));
50713 	return 0;
50714       }
50715 
50716   if (!TREE_PUBLIC (node->decl))
50717     {
50718       /* If the function isn't exported, we can pick up just one ISA
50719 	 for the clones.  */
50720       if (TARGET_AVX512F)
50721 	clonei->vecsize_mangle = 'e';
50722       else if (TARGET_AVX2)
50723 	clonei->vecsize_mangle = 'd';
50724       else if (TARGET_AVX)
50725 	clonei->vecsize_mangle = 'c';
50726       else
50727 	clonei->vecsize_mangle = 'b';
50728       ret = 1;
50729     }
50730   else
50731     {
50732       clonei->vecsize_mangle = "bcde"[num];
50733       ret = 4;
50734     }
50735   clonei->mask_mode = VOIDmode;
50736   switch (clonei->vecsize_mangle)
50737     {
50738     case 'b':
50739       clonei->vecsize_int = 128;
50740       clonei->vecsize_float = 128;
50741       break;
50742     case 'c':
50743       clonei->vecsize_int = 128;
50744       clonei->vecsize_float = 256;
50745       break;
50746     case 'd':
50747       clonei->vecsize_int = 256;
50748       clonei->vecsize_float = 256;
50749       break;
50750     case 'e':
50751       clonei->vecsize_int = 512;
50752       clonei->vecsize_float = 512;
50753       if (TYPE_MODE (base_type) == QImode)
50754 	clonei->mask_mode = DImode;
50755       else
50756 	clonei->mask_mode = SImode;
50757       break;
50758     }
50759   if (clonei->simdlen == 0)
50760     {
50761       if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
50762 	clonei->simdlen = clonei->vecsize_int;
50763       else
50764 	clonei->simdlen = clonei->vecsize_float;
50765       clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
50766     }
50767   else if (clonei->simdlen > 16)
50768     {
50769       /* For compatibility with ICC, use the same upper bounds
50770 	 for simdlen.  In particular, for CTYPE below, use the return type,
50771 	 unless the function returns void, in that case use the characteristic
50772 	 type.  If it is possible for given SIMDLEN to pass CTYPE value
50773 	 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
50774 	 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
50775 	 emit corresponding clone.  */
50776       tree ctype = ret_type;
50777       if (TREE_CODE (ret_type) == VOID_TYPE)
50778 	ctype = base_type;
50779       int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
50780       if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
50781 	cnt /= clonei->vecsize_int;
50782       else
50783 	cnt /= clonei->vecsize_float;
50784       if (cnt > (TARGET_64BIT ? 16 : 8))
50785 	{
50786 	  warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50787 		      "unsupported simdlen %d", clonei->simdlen);
50788 	  return 0;
50789 	}
50790       }
50791   return ret;
50792 }
50793 
50794 /* Add target attribute to SIMD clone NODE if needed.  */
50795 
50796 static void
50797 ix86_simd_clone_adjust (struct cgraph_node *node)
50798 {
50799   const char *str = NULL;
50800   gcc_assert (node->decl == cfun->decl);
50801   switch (node->simdclone->vecsize_mangle)
50802     {
50803     case 'b':
50804       if (!TARGET_SSE2)
50805 	str = "sse2";
50806       break;
50807     case 'c':
50808       if (!TARGET_AVX)
50809 	str = "avx";
50810       break;
50811     case 'd':
50812       if (!TARGET_AVX2)
50813 	str = "avx2";
50814       break;
50815     case 'e':
50816       if (!TARGET_AVX512F)
50817 	str = "avx512f";
50818       break;
50819     default:
50820       gcc_unreachable ();
50821     }
50822   if (str == NULL)
50823     return;
50824   push_cfun (NULL);
50825   tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
50826   bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
50827   gcc_assert (ok);
50828   pop_cfun ();
50829   ix86_reset_previous_fndecl ();
50830   ix86_set_current_function (node->decl);
50831 }
50832 
50833 /* If SIMD clone NODE can't be used in a vectorized loop
50834    in current function, return -1, otherwise return a badness of using it
50835    (0 if it is most desirable from vecsize_mangle point of view, 1
50836    slightly less desirable, etc.).  */
50837 
50838 static int
50839 ix86_simd_clone_usable (struct cgraph_node *node)
50840 {
50841   switch (node->simdclone->vecsize_mangle)
50842     {
50843     case 'b':
50844       if (!TARGET_SSE2)
50845 	return -1;
50846       if (!TARGET_AVX)
50847 	return 0;
50848       return TARGET_AVX2 ? 2 : 1;
50849     case 'c':
50850       if (!TARGET_AVX)
50851 	return -1;
50852       return TARGET_AVX2 ? 1 : 0;
50853     case 'd':
50854       if (!TARGET_AVX2)
50855 	return -1;
50856       return 0;
50857     case 'e':
50858       if (!TARGET_AVX512F)
50859 	return -1;
50860       return 0;
50861     default:
50862       gcc_unreachable ();
50863     }
50864 }
50865 
50866 /* This function adjusts the unroll factor based on
50867    the hardware capabilities. For ex, bdver3 has
50868    a loop buffer which makes unrolling of smaller
50869    loops less important. This function decides the
50870    unroll factor using number of memory references
50871    (value 32 is used) as a heuristic. */
50872 
50873 static unsigned
50874 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
50875 {
50876   basic_block *bbs;
50877   rtx_insn *insn;
50878   unsigned i;
50879   unsigned mem_count = 0;
50880 
50881   if (!TARGET_ADJUST_UNROLL)
50882      return nunroll;
50883 
50884   /* Count the number of memory references within the loop body.
50885      This value determines the unrolling factor for bdver3 and bdver4
50886      architectures. */
50887   subrtx_iterator::array_type array;
50888   bbs = get_loop_body (loop);
50889   for (i = 0; i < loop->num_nodes; i++)
50890     FOR_BB_INSNS (bbs[i], insn)
50891       if (NONDEBUG_INSN_P (insn))
50892 	FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
50893 	  if (const_rtx x = *iter)
50894 	    if (MEM_P (x))
50895 	      {
50896 		machine_mode mode = GET_MODE (x);
50897 		unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
50898 		if (n_words > 4)
50899 		  mem_count += 2;
50900 		else
50901 		  mem_count += 1;
50902 	      }
50903   free (bbs);
50904 
50905   if (mem_count && mem_count <=32)
50906     return MIN (nunroll, 32 / mem_count);
50907 
50908   return nunroll;
50909 }
50910 
50911 
50912 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
50913 
50914 static bool
50915 ix86_float_exceptions_rounding_supported_p (void)
50916 {
50917   /* For x87 floating point with standard excess precision handling,
50918      there is no adddf3 pattern (since x87 floating point only has
50919      XFmode operations) so the default hook implementation gets this
50920      wrong.  */
50921   return TARGET_80387 || TARGET_SSE_MATH;
50922 }
50923 
50924 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
50925 
50926 static void
50927 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
50928 {
50929   if (!TARGET_80387 && !TARGET_SSE_MATH)
50930     return;
50931   tree exceptions_var = create_tmp_var_raw (integer_type_node);
50932   if (TARGET_80387)
50933     {
50934       tree fenv_index_type = build_index_type (size_int (6));
50935       tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
50936       tree fenv_var = create_tmp_var_raw (fenv_type);
50937       TREE_ADDRESSABLE (fenv_var) = 1;
50938       tree fenv_ptr = build_pointer_type (fenv_type);
50939       tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
50940       fenv_addr = fold_convert (ptr_type_node, fenv_addr);
50941       tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
50942       tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
50943       tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
50944       tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
50945       tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
50946       tree hold_fnclex = build_call_expr (fnclex, 0);
50947       fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
50948 			 NULL_TREE, NULL_TREE);
50949       *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
50950 		      hold_fnclex);
50951       *clear = build_call_expr (fnclex, 0);
50952       tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
50953       tree fnstsw_call = build_call_expr (fnstsw, 0);
50954       tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
50955 			    sw_var, fnstsw_call);
50956       tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
50957       tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
50958 				exceptions_var, exceptions_x87);
50959       *update = build2 (COMPOUND_EXPR, integer_type_node,
50960 			sw_mod, update_mod);
50961       tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
50962       *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
50963     }
50964   if (TARGET_SSE_MATH)
50965     {
50966       tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
50967       tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
50968       tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
50969       tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
50970       tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
50971       tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
50972 				      mxcsr_orig_var, stmxcsr_hold_call);
50973       tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
50974 				  mxcsr_orig_var,
50975 				  build_int_cst (unsigned_type_node, 0x1f80));
50976       hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
50977 			     build_int_cst (unsigned_type_node, 0xffffffc0));
50978       tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
50979 				     mxcsr_mod_var, hold_mod_val);
50980       tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
50981       tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
50982 			      hold_assign_orig, hold_assign_mod);
50983       hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
50984 			 ldmxcsr_hold_call);
50985       if (*hold)
50986 	*hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
50987       else
50988 	*hold = hold_all;
50989       tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
50990       if (*clear)
50991 	*clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
50992 			 ldmxcsr_clear_call);
50993       else
50994 	*clear = ldmxcsr_clear_call;
50995       tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
50996       tree exceptions_sse = fold_convert (integer_type_node,
50997 					  stxmcsr_update_call);
50998       if (*update)
50999 	{
51000 	  tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51001 					exceptions_var, exceptions_sse);
51002 	  tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51003 					   exceptions_var, exceptions_mod);
51004 	  *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51005 			    exceptions_assign);
51006 	}
51007       else
51008 	*update = build2 (MODIFY_EXPR, integer_type_node,
51009 			  exceptions_var, exceptions_sse);
51010       tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51011       *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51012 			ldmxcsr_update_call);
51013     }
51014   tree atomic_feraiseexcept
51015     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51016   tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51017 						    1, exceptions_var);
51018   *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51019 		    atomic_feraiseexcept_call);
51020 }
51021 
51022 /* Return mode to be used for bounds or VOIDmode
51023    if bounds are not supported.  */
51024 
51025 static machine_mode
51026 ix86_mpx_bound_mode ()
51027 {
51028   /* Do not support pointer checker if MPX
51029      is not enabled.  */
51030   if (!TARGET_MPX)
51031     {
51032       if (flag_check_pointer_bounds)
51033 	warning (0, "Pointer Checker requires MPX support on this target."
51034 		 " Use -mmpx options to enable MPX.");
51035       return VOIDmode;
51036     }
51037 
51038   return BNDmode;
51039 }
51040 
51041 /*  Return constant used to statically initialize constant bounds.
51042 
51043     This function is used to create special bound values.  For now
51044     only INIT bounds and NONE bounds are expected.  More special
51045     values may be added later.  */
51046 
51047 static tree
51048 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51049 {
51050   tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51051     : build_zero_cst (pointer_sized_int_node);
51052   tree high = ub ? build_zero_cst (pointer_sized_int_node)
51053     : build_minus_one_cst (pointer_sized_int_node);
51054 
51055   /* This function is supposed to be used to create INIT and
51056      NONE bounds only.  */
51057   gcc_assert ((lb == 0 && ub == -1)
51058 	      || (lb == -1 && ub == 0));
51059 
51060   return build_complex (NULL, low, high);
51061 }
51062 
51063 /* Generate a list of statements STMTS to initialize pointer bounds
51064    variable VAR with bounds LB and UB.  Return the number of generated
51065    statements.  */
51066 
51067 static int
51068 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51069 {
51070   tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51071   tree lhs, modify, var_p;
51072 
51073   ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51074   var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51075 
51076   lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51077   modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51078   append_to_statement_list (modify, stmts);
51079 
51080   lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51081 		build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51082 			TYPE_SIZE_UNIT (pointer_sized_int_node)));
51083   modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51084   append_to_statement_list (modify, stmts);
51085 
51086   return 2;
51087 }
51088 
51089 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
51090 /* For i386, common symbol is local only for non-PIE binaries.  For
51091    x86-64, common symbol is local only for non-PIE binaries or linker
51092    supports copy reloc in PIE binaries.   */
51093 
51094 static bool
51095 ix86_binds_local_p (const_tree exp)
51096 {
51097   return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
51098 				  (!flag_pic
51099 				   || (TARGET_64BIT
51100 				       && HAVE_LD_PIE_COPYRELOC != 0)));
51101 }
51102 #endif
51103 
51104 /* If MEM is in the form of [base+offset], extract the two parts
51105    of address and set to BASE and OFFSET, otherwise return false.  */
51106 
51107 static bool
51108 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
51109 {
51110   rtx addr;
51111 
51112   gcc_assert (MEM_P (mem));
51113 
51114   addr = XEXP (mem, 0);
51115 
51116   if (GET_CODE (addr) == CONST)
51117     addr = XEXP (addr, 0);
51118 
51119   if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
51120     {
51121       *base = addr;
51122       *offset = const0_rtx;
51123       return true;
51124     }
51125 
51126   if (GET_CODE (addr) == PLUS
51127       && (REG_P (XEXP (addr, 0))
51128 	  || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
51129       && CONST_INT_P (XEXP (addr, 1)))
51130     {
51131       *base = XEXP (addr, 0);
51132       *offset = XEXP (addr, 1);
51133       return true;
51134     }
51135 
51136   return false;
51137 }
51138 
51139 /* Given OPERANDS of consecutive load/store, check if we can merge
51140    them into move multiple.  LOAD is true if they are load instructions.
51141    MODE is the mode of memory operands.  */
51142 
51143 bool
51144 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
51145 				    machine_mode mode)
51146 {
51147   HOST_WIDE_INT offval_1, offval_2, msize;
51148   rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
51149 
51150   if (load)
51151     {
51152       mem_1 = operands[1];
51153       mem_2 = operands[3];
51154       reg_1 = operands[0];
51155       reg_2 = operands[2];
51156     }
51157   else
51158     {
51159       mem_1 = operands[0];
51160       mem_2 = operands[2];
51161       reg_1 = operands[1];
51162       reg_2 = operands[3];
51163     }
51164 
51165   gcc_assert (REG_P (reg_1) && REG_P (reg_2));
51166 
51167   if (REGNO (reg_1) != REGNO (reg_2))
51168     return false;
51169 
51170   /* Check if the addresses are in the form of [base+offset].  */
51171   if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
51172     return false;
51173   if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
51174     return false;
51175 
51176   /* Check if the bases are the same.  */
51177   if (!rtx_equal_p (base_1, base_2))
51178     return false;
51179 
51180   offval_1 = INTVAL (offset_1);
51181   offval_2 = INTVAL (offset_2);
51182   msize = GET_MODE_SIZE (mode);
51183   /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address.  */
51184   if (offval_1 + msize != offval_2)
51185     return false;
51186 
51187   return true;
51188 }
51189 
51190 /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
51191 
51192 static bool
51193 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
51194 			optimization_type opt_type)
51195 {
51196   switch (op)
51197     {
51198     case asin_optab:
51199     case acos_optab:
51200     case log1p_optab:
51201     case exp_optab:
51202     case exp10_optab:
51203     case exp2_optab:
51204     case expm1_optab:
51205     case ldexp_optab:
51206     case scalb_optab:
51207     case round_optab:
51208       return opt_type == OPTIMIZE_FOR_SPEED;
51209 
51210     case rint_optab:
51211       if (SSE_FLOAT_MODE_P (mode1)
51212 	  && TARGET_SSE_MATH
51213 	  && !flag_trapping_math
51214 	  && !TARGET_SSE4_1)
51215 	return opt_type == OPTIMIZE_FOR_SPEED;
51216       return true;
51217 
51218     case floor_optab:
51219     case ceil_optab:
51220     case btrunc_optab:
51221       if (SSE_FLOAT_MODE_P (mode1)
51222 	  && TARGET_SSE_MATH
51223 	  && !flag_trapping_math
51224 	  && TARGET_SSE4_1)
51225 	return true;
51226       return opt_type == OPTIMIZE_FOR_SPEED;
51227 
51228     case rsqrt_optab:
51229       return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
51230 
51231     default:
51232       return true;
51233     }
51234 }
51235 
51236 /* Address space support.
51237 
51238    This is not "far pointers" in the 16-bit sense, but an easy way
51239    to use %fs and %gs segment prefixes.  Therefore:
51240 
51241     (a) All address spaces have the same modes,
51242     (b) All address spaces have the same addresss forms,
51243     (c) While %fs and %gs are technically subsets of the generic
51244         address space, they are probably not subsets of each other.
51245     (d) Since we have no access to the segment base register values
51246         without resorting to a system call, we cannot convert a
51247         non-default address space to a default address space.
51248         Therefore we do not claim %fs or %gs are subsets of generic.
51249 
51250    Therefore we can (mostly) use the default hooks.  */
51251 
51252 /* All use of segmentation is assumed to make address 0 valid.  */
51253 
51254 static bool
51255 ix86_addr_space_zero_address_valid (addr_space_t as)
51256 {
51257   return as != ADDR_SPACE_GENERIC;
51258 }
51259 
51260 static void
51261 ix86_init_libfuncs (void)
51262 {
51263   if (TARGET_64BIT)
51264     {
51265       set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
51266       set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
51267     }
51268   else
51269     {
51270       set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
51271       set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
51272     }
51273 
51274 #if TARGET_MACHO
51275   darwin_rename_builtins ();
51276 #endif
51277 }
51278 
51279 /* Generate call to __divmoddi4.  */
51280 
51281 static void
51282 ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
51283 			    rtx op0, rtx op1,
51284 			    rtx *quot_p, rtx *rem_p)
51285 {
51286   rtx rem = assign_386_stack_local (mode, SLOT_TEMP);
51287 
51288   rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
51289 				      mode,
51290 				      op0, GET_MODE (op0),
51291 				      op1, GET_MODE (op1),
51292 				      XEXP (rem, 0), Pmode);
51293   *quot_p = quot;
51294   *rem_p = rem;
51295 }
51296 
51297 /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
51298    FPU, assume that the fpcw is set to extended precision; when using
51299    only SSE, rounding is correct; when using both SSE and the FPU,
51300    the rounding precision is indeterminate, since either may be chosen
51301    apparently at random.  */
51302 
51303 static enum flt_eval_method
51304 ix86_excess_precision (enum excess_precision_type type)
51305 {
51306   switch (type)
51307     {
51308       case EXCESS_PRECISION_TYPE_FAST:
51309 	/* The fastest type to promote to will always be the native type,
51310 	   whether that occurs with implicit excess precision or
51311 	   otherwise.  */
51312 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51313       case EXCESS_PRECISION_TYPE_STANDARD:
51314       case EXCESS_PRECISION_TYPE_IMPLICIT:
51315 	/* Otherwise, the excess precision we want when we are
51316 	   in a standards compliant mode, and the implicit precision we
51317 	   provide would be identical were it not for the unpredictable
51318 	   cases.  */
51319 	if (!TARGET_80387)
51320 	  return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51321 	else if (!TARGET_MIX_SSE_I387)
51322 	  {
51323 	    if (!TARGET_SSE_MATH)
51324 	      return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
51325 	    else if (TARGET_SSE2)
51326 	      return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
51327 	  }
51328 
51329 	/* If we are in standards compliant mode, but we know we will
51330 	   calculate in unpredictable precision, return
51331 	   FLT_EVAL_METHOD_FLOAT.  There is no reason to introduce explicit
51332 	   excess precision if the target can't guarantee it will honor
51333 	   it.  */
51334 	return (type == EXCESS_PRECISION_TYPE_STANDARD
51335 		? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
51336 		: FLT_EVAL_METHOD_UNPREDICTABLE);
51337       default:
51338 	gcc_unreachable ();
51339     }
51340 
51341   return FLT_EVAL_METHOD_UNPREDICTABLE;
51342 }
51343 
51344 /* Implement PUSH_ROUNDING.  On 386, we have pushw instruction that
51345    decrements by exactly 2 no matter what the position was, there is no pushb.
51346 
51347    But as CIE data alignment factor on this arch is -4 for 32bit targets
51348    and -8 for 64bit targets, we need to make sure all stack pointer adjustments
51349    are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
51350 
51351 poly_int64
51352 ix86_push_rounding (poly_int64 bytes)
51353 {
51354   return ROUND_UP (bytes, UNITS_PER_WORD);
51355 }
51356 
51357 /* Target-specific selftests.  */
51358 
51359 #if CHECKING_P
51360 
51361 namespace selftest {
51362 
51363 /* Verify that hard regs are dumped as expected (in compact mode).  */
51364 
51365 static void
51366 ix86_test_dumping_hard_regs ()
51367 {
51368   ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
51369   ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
51370 }
51371 
51372 /* Test dumping an insn with repeated references to the same SCRATCH,
51373    to verify the rtx_reuse code.  */
51374 
51375 static void
51376 ix86_test_dumping_memory_blockage ()
51377 {
51378   set_new_first_and_last_insn (NULL, NULL);
51379 
51380   rtx pat = gen_memory_blockage ();
51381   rtx_reuse_manager r;
51382   r.preprocess (pat);
51383 
51384   /* Verify that the repeated references to the SCRATCH show use
51385      reuse IDS.  The first should be prefixed with a reuse ID,
51386      and the second should be dumped as a "reuse_rtx" of that ID.
51387      The expected string assumes Pmode == DImode.  */
51388   if (Pmode == DImode)
51389     ASSERT_RTL_DUMP_EQ_WITH_REUSE
51390       ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0  A8])\n"
51391        "        (unspec:BLK [\n"
51392        "                (mem/v:BLK (reuse_rtx 0) [0  A8])\n"
51393        "            ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
51394 }
51395 
51396 /* Verify loading an RTL dump; specifically a dump of copying
51397    a param on x86_64 from a hard reg into the frame.
51398    This test is target-specific since the dump contains target-specific
51399    hard reg names.  */
51400 
51401 static void
51402 ix86_test_loading_dump_fragment_1 ()
51403 {
51404   rtl_dump_test t (SELFTEST_LOCATION,
51405 		   locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
51406 
51407   rtx_insn *insn = get_insn_by_uid (1);
51408 
51409   /* The block structure and indentation here is purely for
51410      readability; it mirrors the structure of the rtx.  */
51411   tree mem_expr;
51412   {
51413     rtx pat = PATTERN (insn);
51414     ASSERT_EQ (SET, GET_CODE (pat));
51415     {
51416       rtx dest = SET_DEST (pat);
51417       ASSERT_EQ (MEM, GET_CODE (dest));
51418       /* Verify the "/c" was parsed.  */
51419       ASSERT_TRUE (RTX_FLAG (dest, call));
51420       ASSERT_EQ (SImode, GET_MODE (dest));
51421       {
51422 	rtx addr = XEXP (dest, 0);
51423 	ASSERT_EQ (PLUS, GET_CODE (addr));
51424 	ASSERT_EQ (DImode, GET_MODE (addr));
51425 	{
51426 	  rtx lhs = XEXP (addr, 0);
51427 	  /* Verify that the "frame" REG was consolidated.  */
51428 	  ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
51429 	}
51430 	{
51431 	  rtx rhs = XEXP (addr, 1);
51432 	  ASSERT_EQ (CONST_INT, GET_CODE (rhs));
51433 	  ASSERT_EQ (-4, INTVAL (rhs));
51434 	}
51435       }
51436       /* Verify the "[1 i+0 S4 A32]" was parsed.  */
51437       ASSERT_EQ (1, MEM_ALIAS_SET (dest));
51438       /* "i" should have been handled by synthesizing a global int
51439 	 variable named "i".  */
51440       mem_expr = MEM_EXPR (dest);
51441       ASSERT_NE (mem_expr, NULL);
51442       ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
51443       ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
51444       ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
51445       ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
51446       /* "+0".  */
51447       ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
51448       ASSERT_EQ (0, MEM_OFFSET (dest));
51449       /* "S4".  */
51450       ASSERT_EQ (4, MEM_SIZE (dest));
51451       /* "A32.  */
51452       ASSERT_EQ (32, MEM_ALIGN (dest));
51453     }
51454     {
51455       rtx src = SET_SRC (pat);
51456       ASSERT_EQ (REG, GET_CODE (src));
51457       ASSERT_EQ (SImode, GET_MODE (src));
51458       ASSERT_EQ (5, REGNO (src));
51459       tree reg_expr = REG_EXPR (src);
51460       /* "i" here should point to the same var as for the MEM_EXPR.  */
51461       ASSERT_EQ (reg_expr, mem_expr);
51462     }
51463   }
51464 }
51465 
51466 /* Verify that the RTL loader copes with a call_insn dump.
51467    This test is target-specific since the dump contains a target-specific
51468    hard reg name.  */
51469 
51470 static void
51471 ix86_test_loading_call_insn ()
51472 {
51473   /* The test dump includes register "xmm0", where requires TARGET_SSE
51474      to exist.  */
51475   if (!TARGET_SSE)
51476     return;
51477 
51478   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
51479 
51480   rtx_insn *insn = get_insns ();
51481   ASSERT_EQ (CALL_INSN, GET_CODE (insn));
51482 
51483   /* "/j".  */
51484   ASSERT_TRUE (RTX_FLAG (insn, jump));
51485 
51486   rtx pat = PATTERN (insn);
51487   ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
51488 
51489   /* Verify REG_NOTES.  */
51490   {
51491     /* "(expr_list:REG_CALL_DECL".   */
51492     ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
51493     rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
51494     ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
51495 
51496     /* "(expr_list:REG_EH_REGION (const_int 0 [0])".  */
51497     rtx_expr_list *note1 = note0->next ();
51498     ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
51499 
51500     ASSERT_EQ (NULL, note1->next ());
51501   }
51502 
51503   /* Verify CALL_INSN_FUNCTION_USAGE.  */
51504   {
51505     /* "(expr_list:DF (use (reg:DF 21 xmm0))".  */
51506     rtx_expr_list *usage
51507       = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
51508     ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
51509     ASSERT_EQ (DFmode, GET_MODE (usage));
51510     ASSERT_EQ (USE, GET_CODE (usage->element ()));
51511     ASSERT_EQ (NULL, usage->next ());
51512   }
51513 }
51514 
51515 /* Verify that the RTL loader copes a dump from print_rtx_function.
51516    This test is target-specific since the dump contains target-specific
51517    hard reg names.  */
51518 
51519 static void
51520 ix86_test_loading_full_dump ()
51521 {
51522   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
51523 
51524   ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
51525 
51526   rtx_insn *insn_1 = get_insn_by_uid (1);
51527   ASSERT_EQ (NOTE, GET_CODE (insn_1));
51528 
51529   rtx_insn *insn_7 = get_insn_by_uid (7);
51530   ASSERT_EQ (INSN, GET_CODE (insn_7));
51531   ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
51532 
51533   rtx_insn *insn_15 = get_insn_by_uid (15);
51534   ASSERT_EQ (INSN, GET_CODE (insn_15));
51535   ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
51536 
51537   /* Verify crtl->return_rtx.  */
51538   ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
51539   ASSERT_EQ (0, REGNO (crtl->return_rtx));
51540   ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
51541 }
51542 
51543 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
51544    In particular, verify that it correctly loads the 2nd operand.
51545    This test is target-specific since these are machine-specific
51546    operands (and enums).  */
51547 
51548 static void
51549 ix86_test_loading_unspec ()
51550 {
51551   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
51552 
51553   ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
51554 
51555   ASSERT_TRUE (cfun);
51556 
51557   /* Test of an UNSPEC.  */
51558    rtx_insn *insn = get_insns ();
51559   ASSERT_EQ (INSN, GET_CODE (insn));
51560   rtx set = single_set (insn);
51561   ASSERT_NE (NULL, set);
51562   rtx dst = SET_DEST (set);
51563   ASSERT_EQ (MEM, GET_CODE (dst));
51564   rtx src = SET_SRC (set);
51565   ASSERT_EQ (UNSPEC, GET_CODE (src));
51566   ASSERT_EQ (BLKmode, GET_MODE (src));
51567   ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
51568 
51569   rtx v0 = XVECEXP (src, 0, 0);
51570 
51571   /* Verify that the two uses of the first SCRATCH have pointer
51572      equality.  */
51573   rtx scratch_a = XEXP (dst, 0);
51574   ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
51575 
51576   rtx scratch_b = XEXP (v0, 0);
51577   ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
51578 
51579   ASSERT_EQ (scratch_a, scratch_b);
51580 
51581   /* Verify that the two mems are thus treated as equal.  */
51582   ASSERT_TRUE (rtx_equal_p (dst, v0));
51583 
51584   /* Verify the the insn is recognized.  */
51585   ASSERT_NE(-1, recog_memoized (insn));
51586 
51587   /* Test of an UNSPEC_VOLATILE, which has its own enum values.  */
51588   insn = NEXT_INSN (insn);
51589   ASSERT_EQ (INSN, GET_CODE (insn));
51590 
51591   set = single_set (insn);
51592   ASSERT_NE (NULL, set);
51593 
51594   src = SET_SRC (set);
51595   ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
51596   ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
51597 }
51598 
51599 /* Run all target-specific selftests.  */
51600 
51601 static void
51602 ix86_run_selftests (void)
51603 {
51604   ix86_test_dumping_hard_regs ();
51605   ix86_test_dumping_memory_blockage ();
51606 
51607   /* Various tests of loading RTL dumps, here because they contain
51608      ix86-isms (e.g. names of hard regs).  */
51609   ix86_test_loading_dump_fragment_1 ();
51610   ix86_test_loading_call_insn ();
51611   ix86_test_loading_full_dump ();
51612   ix86_test_loading_unspec ();
51613 }
51614 
51615 } // namespace selftest
51616 
51617 #endif /* CHECKING_P */
51618 
51619 /* Initialize the GCC target structure.  */
51620 #undef TARGET_RETURN_IN_MEMORY
51621 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51622 
51623 #undef TARGET_LEGITIMIZE_ADDRESS
51624 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51625 
51626 #undef TARGET_ATTRIBUTE_TABLE
51627 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51628 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51629 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51630 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51631 #  undef TARGET_MERGE_DECL_ATTRIBUTES
51632 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51633 #endif
51634 
51635 #undef TARGET_COMP_TYPE_ATTRIBUTES
51636 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51637 
51638 #undef TARGET_INIT_BUILTINS
51639 #define TARGET_INIT_BUILTINS ix86_init_builtins
51640 #undef TARGET_BUILTIN_DECL
51641 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51642 #undef TARGET_EXPAND_BUILTIN
51643 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51644 
51645 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51646 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51647   ix86_builtin_vectorized_function
51648 
51649 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51650 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51651 
51652 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
51653 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
51654 
51655 #undef TARGET_BUILTIN_RECIPROCAL
51656 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51657 
51658 #undef TARGET_ASM_FUNCTION_EPILOGUE
51659 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51660 
51661 #undef TARGET_ENCODE_SECTION_INFO
51662 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51663 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51664 #else
51665 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51666 #endif
51667 
51668 #undef TARGET_ASM_OPEN_PAREN
51669 #define TARGET_ASM_OPEN_PAREN ""
51670 #undef TARGET_ASM_CLOSE_PAREN
51671 #define TARGET_ASM_CLOSE_PAREN ""
51672 
51673 #undef TARGET_ASM_BYTE_OP
51674 #define TARGET_ASM_BYTE_OP ASM_BYTE
51675 
51676 #undef TARGET_ASM_ALIGNED_HI_OP
51677 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51678 #undef TARGET_ASM_ALIGNED_SI_OP
51679 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51680 #ifdef ASM_QUAD
51681 #undef TARGET_ASM_ALIGNED_DI_OP
51682 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51683 #endif
51684 
51685 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51686 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51687 
51688 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51689 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51690 
51691 #undef TARGET_ASM_UNALIGNED_HI_OP
51692 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51693 #undef TARGET_ASM_UNALIGNED_SI_OP
51694 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51695 #undef TARGET_ASM_UNALIGNED_DI_OP
51696 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51697 
51698 #undef TARGET_PRINT_OPERAND
51699 #define TARGET_PRINT_OPERAND ix86_print_operand
51700 #undef TARGET_PRINT_OPERAND_ADDRESS
51701 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51702 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51703 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51704 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51705 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51706 
51707 #undef TARGET_SCHED_INIT_GLOBAL
51708 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51709 #undef TARGET_SCHED_ADJUST_COST
51710 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51711 #undef TARGET_SCHED_ISSUE_RATE
51712 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51713 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51714 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51715   ia32_multipass_dfa_lookahead
51716 #undef TARGET_SCHED_MACRO_FUSION_P
51717 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51718 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51719 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51720 
51721 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51722 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51723 
51724 #undef TARGET_MEMMODEL_CHECK
51725 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51726 
51727 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51728 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51729 
51730 #ifdef HAVE_AS_TLS
51731 #undef TARGET_HAVE_TLS
51732 #define TARGET_HAVE_TLS true
51733 #endif
51734 #undef TARGET_CANNOT_FORCE_CONST_MEM
51735 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51736 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51737 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51738 
51739 #undef TARGET_DELEGITIMIZE_ADDRESS
51740 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51741 
51742 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
51743 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
51744 
51745 #undef TARGET_MS_BITFIELD_LAYOUT_P
51746 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51747 
51748 #if TARGET_MACHO
51749 #undef TARGET_BINDS_LOCAL_P
51750 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51751 #else
51752 #undef TARGET_BINDS_LOCAL_P
51753 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
51754 #endif
51755 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51756 #undef TARGET_BINDS_LOCAL_P
51757 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51758 #endif
51759 
51760 #undef TARGET_ASM_OUTPUT_MI_THUNK
51761 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51762 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51763 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51764 
51765 #undef TARGET_ASM_FILE_START
51766 #define TARGET_ASM_FILE_START x86_file_start
51767 
51768 #undef TARGET_OPTION_OVERRIDE
51769 #define TARGET_OPTION_OVERRIDE ix86_option_override
51770 
51771 #undef TARGET_REGISTER_MOVE_COST
51772 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51773 #undef TARGET_MEMORY_MOVE_COST
51774 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51775 #undef TARGET_RTX_COSTS
51776 #define TARGET_RTX_COSTS ix86_rtx_costs
51777 #undef TARGET_ADDRESS_COST
51778 #define TARGET_ADDRESS_COST ix86_address_cost
51779 
51780 #undef TARGET_FLAGS_REGNUM
51781 #define TARGET_FLAGS_REGNUM FLAGS_REG
51782 #undef TARGET_FIXED_CONDITION_CODE_REGS
51783 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51784 #undef TARGET_CC_MODES_COMPATIBLE
51785 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51786 
51787 #undef TARGET_MACHINE_DEPENDENT_REORG
51788 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51789 
51790 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51791 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51792 
51793 #undef TARGET_BUILD_BUILTIN_VA_LIST
51794 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51795 
51796 #undef TARGET_FOLD_BUILTIN
51797 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51798 
51799 #undef TARGET_GIMPLE_FOLD_BUILTIN
51800 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
51801 
51802 #undef TARGET_COMPARE_VERSION_PRIORITY
51803 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51804 
51805 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51806 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51807   ix86_generate_version_dispatcher_body
51808 
51809 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51810 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51811   ix86_get_function_versions_dispatcher
51812 
51813 #undef TARGET_ENUM_VA_LIST_P
51814 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51815 
51816 #undef TARGET_FN_ABI_VA_LIST
51817 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51818 
51819 #undef TARGET_CANONICAL_VA_LIST_TYPE
51820 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51821 
51822 #undef TARGET_EXPAND_BUILTIN_VA_START
51823 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51824 
51825 #undef TARGET_MD_ASM_ADJUST
51826 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
51827 
51828 #undef TARGET_C_EXCESS_PRECISION
51829 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
51830 #undef TARGET_PROMOTE_PROTOTYPES
51831 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51832 #undef TARGET_SETUP_INCOMING_VARARGS
51833 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51834 #undef TARGET_MUST_PASS_IN_STACK
51835 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51836 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
51837 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
51838 #undef TARGET_FUNCTION_ARG_ADVANCE
51839 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51840 #undef TARGET_FUNCTION_ARG
51841 #define TARGET_FUNCTION_ARG ix86_function_arg
51842 #undef TARGET_INIT_PIC_REG
51843 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51844 #undef TARGET_USE_PSEUDO_PIC_REG
51845 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51846 #undef TARGET_FUNCTION_ARG_BOUNDARY
51847 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51848 #undef TARGET_PASS_BY_REFERENCE
51849 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51850 #undef TARGET_INTERNAL_ARG_POINTER
51851 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51852 #undef TARGET_UPDATE_STACK_BOUNDARY
51853 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51854 #undef TARGET_GET_DRAP_RTX
51855 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51856 #undef TARGET_STRICT_ARGUMENT_NAMING
51857 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51858 #undef TARGET_STATIC_CHAIN
51859 #define TARGET_STATIC_CHAIN ix86_static_chain
51860 #undef TARGET_TRAMPOLINE_INIT
51861 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51862 #undef TARGET_RETURN_POPS_ARGS
51863 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51864 
51865 #undef TARGET_WARN_FUNC_RETURN
51866 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
51867 
51868 #undef TARGET_LEGITIMATE_COMBINED_INSN
51869 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51870 
51871 #undef TARGET_ASAN_SHADOW_OFFSET
51872 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51873 
51874 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51875 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51876 
51877 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51878 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51879 
51880 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51881 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51882 
51883 #undef TARGET_C_MODE_FOR_SUFFIX
51884 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51885 
51886 #ifdef HAVE_AS_TLS
51887 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51888 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51889 #endif
51890 
51891 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51892 #undef TARGET_INSERT_ATTRIBUTES
51893 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51894 #endif
51895 
51896 #undef TARGET_MANGLE_TYPE
51897 #define TARGET_MANGLE_TYPE ix86_mangle_type
51898 
51899 #undef TARGET_STACK_PROTECT_GUARD
51900 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
51901 
51902 #if !TARGET_MACHO
51903 #undef TARGET_STACK_PROTECT_FAIL
51904 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51905 #endif
51906 
51907 #undef TARGET_FUNCTION_VALUE
51908 #define TARGET_FUNCTION_VALUE ix86_function_value
51909 
51910 #undef TARGET_FUNCTION_VALUE_REGNO_P
51911 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51912 
51913 #undef TARGET_PROMOTE_FUNCTION_MODE
51914 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51915 
51916 #undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
51917 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
51918 
51919 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51920 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51921 
51922 #undef TARGET_INSTANTIATE_DECLS
51923 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51924 
51925 #undef TARGET_SECONDARY_RELOAD
51926 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51927 #undef TARGET_SECONDARY_MEMORY_NEEDED
51928 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
51929 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
51930 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
51931 
51932 #undef TARGET_CLASS_MAX_NREGS
51933 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51934 
51935 #undef TARGET_PREFERRED_RELOAD_CLASS
51936 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51937 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51938 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51939 #undef TARGET_CLASS_LIKELY_SPILLED_P
51940 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51941 
51942 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51943 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51944   ix86_builtin_vectorization_cost
51945 #undef TARGET_VECTORIZE_VEC_PERM_CONST
51946 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
51947 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51948 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51949   ix86_preferred_simd_mode
51950 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
51951 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
51952   ix86_split_reduction
51953 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51954 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51955   ix86_autovectorize_vector_sizes
51956 #undef TARGET_VECTORIZE_GET_MASK_MODE
51957 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
51958 #undef TARGET_VECTORIZE_INIT_COST
51959 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51960 #undef TARGET_VECTORIZE_ADD_STMT_COST
51961 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51962 #undef TARGET_VECTORIZE_FINISH_COST
51963 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51964 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51965 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51966 
51967 #undef TARGET_SET_CURRENT_FUNCTION
51968 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51969 
51970 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51971 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51972 
51973 #undef TARGET_OPTION_SAVE
51974 #define TARGET_OPTION_SAVE ix86_function_specific_save
51975 
51976 #undef TARGET_OPTION_RESTORE
51977 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51978 
51979 #undef TARGET_OPTION_POST_STREAM_IN
51980 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
51981 
51982 #undef TARGET_OPTION_PRINT
51983 #define TARGET_OPTION_PRINT ix86_function_specific_print
51984 
51985 #undef TARGET_OPTION_FUNCTION_VERSIONS
51986 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
51987 
51988 #undef TARGET_CAN_INLINE_P
51989 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51990 
51991 #undef TARGET_LEGITIMATE_ADDRESS_P
51992 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51993 
51994 #undef TARGET_REGISTER_PRIORITY
51995 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51996 
51997 #undef TARGET_REGISTER_USAGE_LEVELING_P
51998 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51999 
52000 #undef TARGET_LEGITIMATE_CONSTANT_P
52001 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52002 
52003 #undef TARGET_COMPUTE_FRAME_LAYOUT
52004 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
52005 
52006 #undef TARGET_FRAME_POINTER_REQUIRED
52007 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52008 
52009 #undef TARGET_CAN_ELIMINATE
52010 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52011 
52012 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52013 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52014 
52015 #undef TARGET_ASM_CODE_END
52016 #define TARGET_ASM_CODE_END ix86_code_end
52017 
52018 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52019 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52020 
52021 #undef TARGET_CANONICALIZE_COMPARISON
52022 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
52023 
52024 #undef TARGET_LOOP_UNROLL_ADJUST
52025 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52026 
52027 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
52028 #undef TARGET_SPILL_CLASS
52029 #define TARGET_SPILL_CLASS ix86_spill_class
52030 
52031 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52032 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52033   ix86_simd_clone_compute_vecsize_and_simdlen
52034 
52035 #undef TARGET_SIMD_CLONE_ADJUST
52036 #define TARGET_SIMD_CLONE_ADJUST \
52037   ix86_simd_clone_adjust
52038 
52039 #undef TARGET_SIMD_CLONE_USABLE
52040 #define TARGET_SIMD_CLONE_USABLE \
52041   ix86_simd_clone_usable
52042 
52043 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52044 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52045   ix86_float_exceptions_rounding_supported_p
52046 
52047 #undef TARGET_MODE_EMIT
52048 #define TARGET_MODE_EMIT ix86_emit_mode_set
52049 
52050 #undef TARGET_MODE_NEEDED
52051 #define TARGET_MODE_NEEDED ix86_mode_needed
52052 
52053 #undef TARGET_MODE_AFTER
52054 #define TARGET_MODE_AFTER ix86_mode_after
52055 
52056 #undef TARGET_MODE_ENTRY
52057 #define TARGET_MODE_ENTRY ix86_mode_entry
52058 
52059 #undef TARGET_MODE_EXIT
52060 #define TARGET_MODE_EXIT ix86_mode_exit
52061 
52062 #undef TARGET_MODE_PRIORITY
52063 #define TARGET_MODE_PRIORITY ix86_mode_priority
52064 
52065 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52066 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52067 
52068 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52069 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52070 
52071 #undef TARGET_STORE_BOUNDS_FOR_ARG
52072 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52073 
52074 #undef TARGET_LOAD_RETURNED_BOUNDS
52075 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52076 
52077 #undef TARGET_STORE_RETURNED_BOUNDS
52078 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52079 
52080 #undef TARGET_CHKP_BOUND_MODE
52081 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52082 
52083 #undef TARGET_BUILTIN_CHKP_FUNCTION
52084 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52085 
52086 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52087 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52088 
52089 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52090 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52091 
52092 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52093 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52094 
52095 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52096 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52097 
52098 #undef TARGET_OFFLOAD_OPTIONS
52099 #define TARGET_OFFLOAD_OPTIONS \
52100   ix86_offload_options
52101 
52102 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52103 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52104 
52105 #undef TARGET_OPTAB_SUPPORTED_P
52106 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
52107 
52108 #undef TARGET_HARD_REGNO_SCRATCH_OK
52109 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
52110 
52111 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
52112 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
52113 
52114 #undef TARGET_ADDITIONAL_ALLOCNO_CLASS_P
52115 #define TARGET_ADDITIONAL_ALLOCNO_CLASS_P ix86_additional_allocno_class_p
52116 
52117 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
52118 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
52119 
52120 #undef TARGET_INIT_LIBFUNCS
52121 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
52122 
52123 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
52124 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
52125 
52126 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
52127 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
52128 
52129 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
52130 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
52131 
52132 #undef TARGET_HARD_REGNO_NREGS
52133 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
52134 #undef TARGET_HARD_REGNO_MODE_OK
52135 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
52136 
52137 #undef TARGET_MODES_TIEABLE_P
52138 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
52139 
52140 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
52141 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
52142   ix86_hard_regno_call_part_clobbered
52143 
52144 #undef TARGET_CAN_CHANGE_MODE_CLASS
52145 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
52146 
52147 #undef TARGET_STATIC_RTX_ALIGNMENT
52148 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
52149 #undef TARGET_CONSTANT_ALIGNMENT
52150 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
52151 
52152 #undef TARGET_EMPTY_RECORD_P
52153 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
52154 
52155 #undef TARGET_WARN_PARAMETER_PASSING_ABI
52156 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
52157 
52158 #if CHECKING_P
52159 #undef TARGET_RUN_TARGET_SELFTESTS
52160 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
52161 #endif /* #if CHECKING_P */
52162 
52163 struct gcc_target targetm = TARGET_INITIALIZER;
52164 
52165 #include "gt-i386.h"
52166